bio-1.4.3.0001/0000755000004100000410000000000012200110570012663 5ustar www-datawww-databio-1.4.3.0001/sample/0000755000004100000410000000000012200110570014144 5ustar www-datawww-databio-1.4.3.0001/sample/demo_kegg_compound.rb0000644000004100000410000000236512200110570020324 0ustar www-datawww-data# # = sample/demo_kegg_compound.rb - demonstration of Bio::KEGG::COMPOUND # # Copyright:: Copyright (C) 2001, 2002, 2004, 2007 Toshiaki Katayama # Copyright:: Copyright (C) 2009 Kozo Nishida # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::COMPOUND, a parser class for the KEGG COMPOUND # chemical structure database. # # == Usage # # Specify files containing KEGG COMPOUND data. # # $ ruby demo_kegg_compound.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_kegg_compound.rb test/data/KEGG/C00025.compound # # == Development information # # The code was moved from lib/bio/db/kegg/compound.rb and modified. # require 'bio' Bio::FlatFile.foreach(Bio::KEGG::COMPOUND, ARGF) do |cpd| puts "### cpd = Bio::KEGG::COMPOUND.new(str)" puts "# cpd.entry_id" p cpd.entry_id puts "# cpd.names" p cpd.names puts "# cpd.name" p cpd.name puts "# cpd.formula" p cpd.formula puts "# cpd.mass" p cpd.mass puts "# cpd.reactions" p cpd.reactions puts "# cpd.rpairs" p cpd.rpairs puts "# cpd.pathways" p cpd.pathways puts "# cpd.enzymes" p cpd.enzymes puts "# cpd.dblinks" p cpd.dblinks puts "# cpd.kcf" p cpd.kcf puts "=" * 78 end bio-1.4.3.0001/sample/dbget0000755000004100000410000000225412200110570015162 0ustar www-datawww-data#!/usr/bin/env ruby # # dbget - DBGET client # # Interface to GenomeNet DBGET system - http://www.genome.jp/dbget/ # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: dbget,v 1.7 2004/08/24 00:09:24 k Exp $ # require "bio/io/dbget" # DBGET command com = File.basename($0) # e.g. $PATH/bget db entry com = ARGV.shift if com == "dbget" # e.g. $PATH/dbget bget db entry # DBGET query strings arg = ARGV.join(" ") # DBGET result print Bio::DBGET.dbget(com, arg) bio-1.4.3.0001/sample/demo_das.rb0000644000004100000410000000435012200110570016246 0ustar www-datawww-data# # = sample/demo_go.rb - demonstration of Bio::DAS, BioDAS access module # # Copyright:: Copyright (C) 2003, 2004, 2007 # Shuichi Kawashima , # Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::GO, BioDAS access module. # # == Requirements # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_das.rb # # == Notes # # Demo using the WormBase DAS server is temporarily disabled because # it does not work well possibly because of the server trouble. # # == Development information # # The code was moved from lib/bio/io/das.rb and modified as below: # # * Demo codes using UCSC DAS server is added. # require 'bio' # begin # require 'pp' # alias p pp # rescue LoadError # end if false #disabled puts "### WormBase" wormbase = Bio::DAS.new('http://www.wormbase.org/db/') puts ">>> test get_dsn" p wormbase.get_dsn puts ">>> create segment obj Bio::DAS::SEGMENT.region('I', 1, 1000)" seg = Bio::DAS::SEGMENT.region('I', 1, 1000) p seg puts ">>> test get_dna" p wormbase.get_dna('elegans', seg) puts "### test get_features" p wormbase.get_features('elegans', seg) end #if false #disabled if true #enabled puts "### UCSC" ucsc = Bio::DAS.new('http://genome.ucsc.edu/cgi-bin/') puts ">>> test get_dsn" p ucsc.get_dsn puts ">>> test get_entry_points('hg19')" p ucsc.get_entry_points('hg19') puts ">>> test get_types('hg19')" p ucsc.get_types('hg19') len = rand(50) * 10 + 100 pos = rand(243199373 - len) puts ">>> create segment obj Bio::DAS::SEGMENT.region('2', #{pos}, #{pos + len - 1})" seg2 = Bio::DAS::SEGMENT.region('2', pos, pos + len - 1) p seg2 puts ">>> test get_dna" p ucsc.get_dna('hg19', seg2) puts "### test get_features" p ucsc.get_features('hg19', seg2) end #if true #enabled if true #enabled puts "### KEGG DAS" kegg_das = Bio::DAS.new("http://das.hgc.jp/cgi-bin/") dsn_list = kegg_das.get_dsn org_list = dsn_list.collect {|x| x.source} puts ">>> dsn : entry_points" org_list.each do |org| print "#{org} : " list = kegg_das.get_entry_points(org) list.segments.each do |seg| print " #{seg.entry_id}" end puts end end #if true #enabled bio-1.4.3.0001/sample/demo_kegg_drug.rb0000644000004100000410000000242412200110570017435 0ustar www-datawww-data# # = sample/demo_kegg_drug.rb - demonstration of Bio::KEGG::DRUG # # Copyright:: Copyright (C) 2007 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::DRUG, a parser class for the KEGG DRUG # drug database entry. # # == Usage # # Specify files containing KEGG DRUG data. # # $ ruby demo_kegg_drug.rb files... # # == Example of running this script # # Download test data. # # $ ruby -Ilib bin/br_biofetch.rb dr D00001 > D00001.drug # $ ruby -Ilib bin/br_biofetch.rb dr D00002 > D00002.drug # # Run this script. # # $ ruby -Ilib sample/demo_kegg_drug.rb D00001.drug D00002.drug # # == Development information # # The code was moved from lib/bio/db/kegg/drug.rb and modified. # require 'bio' Bio::FlatFile.foreach(Bio::KEGG::DRUG, ARGF) do |dr| #entry = ARGF.read # dr:D00001 #dr = Bio::KEGG::DRUG.new(entry) puts "### dr = Bio::KEGG::DRUG.new(str)" puts "# dr.entry_id" p dr.entry_id puts "# dr.names" p dr.names puts "# dr.name" p dr.name puts "# dr.formula" p dr.formula puts "# dr.mass" p dr.mass puts "# dr.activity" p dr.activity puts "# dr.remark" p dr.remark puts "# dr.comment" p dr.comment puts "# dr.dblinks" p dr.dblinks puts "# dr.kcf" p dr.kcf puts "=" * 78 end bio-1.4.3.0001/sample/biofetch.rb0000755000004100000410000003025412200110570016263 0ustar www-datawww-data#!/usr/proj/bioruby/bin/ruby # # biofetch.rb : BioFetch server (interface to GenomeNet/DBGET via KEGG API) # # Copyright (C) 2002-2004 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $ # require 'cgi' require 'html/template' require 'bio/io/keggapi' MAX_ID_NUM = 50 module BioFetchError def print_text_page(str) print "Content-type: text/plain; charset=UTF-8\n\n" puts str exit end def error1(db) str = "ERROR 1 Unknown database [#{db}]." print_text_page(str) end def error2(style) str = "ERROR 2 Unknown style [#{style}]." print_text_page(str) end def error3(format, db) str = "ERROR 3 Format [#{format}] not known for database [#{db}]." print_text_page(str) end def error4(entry_id, db) str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]." print_text_page(str) end def error5(count) str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed." print_text_page(str) end def error6(info) str = "ERROR 6 Illegal information request [#{info}]." print_text_page(str) end end module KeggAPI include BioFetchError def list_databases serv = Bio::KEGG::API.new results = serv.list_databases results.collect {|x| x.entry_id} end def bget(db, id_list, format) serv = Bio::KEGG::API.new results = '' id_list.each do |query_id| entry_id = "#{db}:#{query_id}" result = serv.get_entries([entry_id]) if result.empty? error4(query_id, db) else results << result end end return results end end class BioFetch include BioFetchError include KeggAPI def initialize(db, id_list, style, format) check_style(style) check_format(format, db) check_number_of_id(id_list.length) check_dbname(db) if /html/.match(style) goto_html_style_page(db, id_list, format) end entries = bget(db, id_list, format) if /fasta/.match(format) and entries entries = convert_to_fasta_format(entries, db) end print_text_page(entries) end private def convert_to_fasta_format(str, db) require 'bio' require 'stringio' fasta = Array.new entries = StringIO.new(str) Bio::FlatFile.auto(entries) do |ff| ff.each do |entry| seq = nil if entry.respond_to?(:seq) seq = entry.seq elsif entry.respond_to?(:aaseq) seq = entry.aaseq elsif entry.respond_to?(:naseq) seq = entry.naseq end if seq entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : '' definition = entry.respond_to?(:definition) ? entry.definition : '' fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60) end end end return fasta.join end def goto_html_style_page(db, id_list, format) url = "http://www.genome.jp/dbget-bin/www_bget" opt = '-f+' if /fasta/.match(format) ids = id_list.join('%2B') print "Location: #{url}?#{opt}#{db}+#{ids}\n\n" exit end def check_style(style) error2(style) unless /html|raw/.match(style) end def check_format(format, db) error3(format, db) if format && ! /fasta|default/.match(format) end def check_number_of_id(num) error5(num) if num > MAX_ID_NUM end def check_dbname(db) error1(db) unless list_databases.include?(db) end end class BioFetchInfo include BioFetchError include KeggAPI def initialize(info, db) @db = db begin send(info) rescue error6(info) end end private def dbs str = list_databases.sort.join(' ') print_text_page(str) end def formats fasta = " fasta" if check_fasta_ok str = "default#{fasta}" print_text_page(str) end def maxids str = MAX_ID_NUM.to_s print_text_page(str) end def check_fasta_ok # sequence databases supported by Bio::FlatFile.auto /genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db) end end class BioFetchCGI def initialize(cgi) @cgi = cgi show_page end private def show_page if info.empty? if id_list.empty? show_query_page else show_result_page(db, id_list, style, format) end else show_info_page(info, db) end end def show_query_page html = HTML::Template.new html.set_html(DATA.read) html.param('max_id_num' => MAX_ID_NUM) @cgi.out do html.output end end def show_result_page(db, id_list, style, format) BioFetch.new(db, id_list, style, format) end def show_info_page(info, db) BioFetchInfo.new(info, db) end def info @cgi['info'].downcase end def db @cgi['db'].downcase end def id_list @cgi['id'].split(/\W/) # not only ',' end def style s = @cgi['style'].downcase return s.empty? ? "html" : s end def format f = @cgi['format'].downcase return f.empty? ? "default" : f end end BioFetchCGI.new(CGI.new) =begin This program was created during BioHackathon 2002, Tucson and updated in Cape Town :) Rewrited in 2004 to use KEGG API as the bioruby.org server left from Kyoto University (where DBGET runs) and the old version could not run without having internally accessible DBGET server. =end __END__ BioFetch interface to GenomeNet/DBGET

BioFetch interface to GenomeNet/DBGET

This page allows you to retrieve up to entries at a time from various up-to-date biological databases.



Direct access

http://bioruby.org/cgi-bin/biofetch.rb?format=(default|fasta|...);style=(html|raw);db=(genbank|embl|...);id=ID[,ID,ID,...]

(NOTE: the option separator ';' can be '&')

format (optional)
default|fasta|...
style (required)
html|raw
db (required)
genbank|refseq|embl|swissprot|pir|prf|pdb|pdbstr|epd|transfac|prosite|pmd|litdb|omim|ligand|pathway|brite|genes|genome|linkdb|aaindex|...
id (required)
comma separated list of IDs

See the BioFetch specification for more details.

Server informations

What databases are available?
http://bioruby.org/cgi-bin/biofetch.rb?info=dbs
What formats does the database X have?
http://bioruby.org/cgi-bin/biofetch.rb?info=formats;db=embl
How many entries can be retrieved simultaneously?
http://bioruby.org/cgi-bin/biofetch.rb?info=maxids

Examples

gb:AJ617376 (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376
gb:AJ617376 (fasta/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=genbank;id=AJ617376
gb:AJ617376 (default/html)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=html;db=genbank;id=AJ617376
gb:AJ617376,AJ617377 (default/raw, multiple)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376,AJ617377
embl:BUM (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=embl;id=BUM
sp:CYC_BOVIN (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=swissprot;id=CYC_BOVIN
sp:CYC_BOVIN (fasta/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN
genes:b0015 (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genes;id=b0015
ps:PS00028 (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028

Errors

Error1 sample : DB not found
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=nonexistent;id=AJ617376"
Error2 sample : unknown style
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=nonexistent;db=genbank;id=AJ617376"
Error3 sample : unknown format
http://bioruby.org/cgi-bin/biofetch.rb?format=nonexistent;style=raw;db=genbank;id=AJ617376"
Error4 sample : ID not found
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=nonexistent"
Error5 sample : too many IDs
http://bioruby.org/cgi-bin/biofetch.rb?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
Error6 sample : unknown info
http://bioruby.org/cgi-bin/biofetch.rb?info=nonexistent"

Other BioFetch implementations


staff@BioRuby.org

bio-1.4.3.0001/sample/demo_blast_report.rb0000644000004100000410000002412512200110570020201 0ustar www-datawww-data# # = sample/demo_blast_report.rb - demonstration of Bio::Blast::Report, Bio::Blast::Default::Report, and Bio::Blast::WU::Report # # Copyright:: Copyright (C) 2003 Toshiaki Katayama # Copyright:: Copyright (C) 2003-2006,2008-2009 Naohisa Goto # License:: The Ruby License # # # == Description # # Demonstration of Bio::Blast::Report (NCBI BLAST XML format parser), # Bio::Blast::Default::Report (NCBI BLAST default (-m 0) format parser), # and Bio::Blast::WU::Report (WU-BLAST default format parser). # # == Usage # # Specify files containing BLAST results. # # $ ruby demo_blast_report.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_blast_report.rb test/data/blast/b0002.faa.m7 # $ ruby -Ilib sample/demo_blast_report.rb test/data/blast/b0002.faa.m0 # # == Development information # # The code was moved from lib/bio/appl/blast/report.rb, # lib/bio/appl/blast/format0.rb, and lib/bio/appl/blast/wublast.rb, # and modified. # require 'bio' # dummpy class to return specific object class Dummy def initialize(obj) @obj = obj end def size @obj end def inspect @obj.inspect end end #class Dummy # wrapper class to ignore error class Wrapper def initialize(obj) @obj = obj end def class @obj.class end def respond_to?(*arg) @obj.respond_to?(*arg) end def method_missing(meth, *arg, &block) begin @obj.__send__(meth, *arg, &block) rescue NoMethodError => evar Dummy.new(evar) end end end #class Wrapper def wrap(obj) Wrapper.new(obj) end # -m0: not defined in Bio::Blast::Default::Report ??? # +m0: newly added in Bio::Blast::Default::Report ??? # -WU: not defined in Bio::Blast::WU::Report ??? # +WU: newly added in Bio::Blast::WU::Report ??? Bio::FlatFile.open(ARGF) do |ff| puts "Detected file format: #{ff.dbclass}" unless ff.dbclass then ff.dbclass = Bio::Blast::Report puts "Input data may be tab-delimited format (-m 8)." end ff.each do |rep| rep = wrap(rep) #print "# === Bio::Blast::Default::Report\n" print "# === #{rep.class}\n" puts print " rep.program #=> "; p rep.program print " rep.version #=> "; p rep.version print " rep.reference #=> "; p rep.reference print " rep.notice [WU] #=> "; p rep.notice #+WU print " rep.db #=> "; p rep.db print " rep.query_id #=> "; p rep.query_id #-m0,-WU print " rep.query_def #=> "; p rep.query_def print " rep.query_len #=> "; p rep.query_len #puts print " rep.version_number #=> "; p rep.version_number #+m0,+WU print " rep.version_date #=> "; p rep.version_date #+m0,+WU puts print "# === Parameters\n" #puts print " rep.parameters #=> "; p rep.parameters #-m0 puts print " rep.matrix #=> "; p rep.matrix #-WU print " rep.expect #=> "; p rep.expect print " rep.inclusion #=> "; p rep.inclusion #-m0,-WU print " rep.sc_match #=> "; p rep.sc_match #-WU print " rep.sc_mismatch #=> "; p rep.sc_mismatch #-WU print " rep.gap_open #=> "; p rep.gap_open #-WU print " rep.gap_extend #=> "; p rep.gap_extend #-WU print " rep.filter #=> "; p rep.filter #-m0,-WU print " rep.pattern #=> "; p rep.pattern #-WU print " rep.entrez_query #=> "; p rep.entrez_query #-m0 #puts print " rep.pattern_positions #=> "; p rep.pattern_positions #+m0 puts print "# === Statistics (last iteration's)\n" #puts print " rep.statistics #=> "; p rep.statistics #-m0,-WU puts print " rep.db_num #=> "; p rep.db_num print " rep.db_len #=> "; p rep.db_len print " rep.hsp_len #=> "; p rep.hsp_len #-m0,-WU print " rep.eff_space #=> "; p rep.eff_space #-WU print " rep.kappa #=> "; p rep.kappa #-WU print " rep.lambda #=> "; p rep.lambda #-WU print " rep.entropy #=> "; p rep.entropy #-WU puts print " rep.num_hits #=> "; p rep.num_hits #+m0 print " rep.gapped_kappa #=> "; p rep.gapped_kappa #+m0 print " rep.gapped_lambda #=> "; p rep.gapped_lambda #+m0 print " rep.gapped_entropy #=> "; p rep.gapped_entropy #+m0 print " rep.posted_date #=> "; p rep.posted_date #+m0 puts print "# === Message (last iteration's)\n" puts print " rep.message #=> "; p rep.message #-WU #puts print " rep.converged? #=> "; p rep.converged? #+m0 puts print "# === Warning messages\n" print " rep.warnings [WU] #=> "; p rep.warnings #+WU print "# === Iterations\n" puts print " rep.itrerations.each do |itr|\n" puts rep.iterations.each do |itr| itr = wrap(itr) #print "# --- Bio::Blast::Default::Report::Iteration\n" print "# --- #{itr.class}\n" puts print " itr.num #=> "; p itr.num print " itr.statistics #=> "; p itr.statistics #-m0,-WU print " itr.warnings [WU] #=> "; p itr.warnings #+WU print " itr.message #=> "; p itr.message print " itr.hits.size #=> "; p itr.hits.size #puts print " itr.hits_newly_found.size #=> "; p((itr.hits_newly_found.size rescue nil)); #+m0 print " itr.hits_found_again.size #=> "; p((itr.hits_found_again.size rescue nil)); #+m0 if itr.respond_to?(:hits_for_pattern) and itr.hits_for_pattern then #+m0 itr.hits_for_pattern.each_with_index do |hp, hpi| print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size; end end print " itr.converged? #=> "; p itr.converged? #+m0,+WU puts print " itr.hits.each do |hit|\n" puts itr.hits.each_with_index do |hit, i| hit = wrap(hit) #print "# --- Bio::Blast::Default::Report::Hit" print "# --- #{hit.class}" print " ([#{i}])\n" puts print " hit.num #=> "; p hit.num #-m0,-WU print " hit.hit_id #=> "; p hit.hit_id #-m0,-WU print " hit.len #=> "; p hit.len print " hit.definition #=> "; p hit.definition print " hit.accession #=> "; p hit.accession #-m0,-WU #puts print " hit.found_again? #=> "; p hit.found_again? #+m0,+WU print " hit.score [WU] #=> "; p hit.score #+WU print " hit.pvalue [WU] #=> "; p hit.pvalue #+WU print " hit.n_number [WU] #=> "; p hit.n_number #+WU print " --- compatible/shortcut ---\n" print " hit.query_id #=> "; p hit.query_id #-m0,-WU print " hit.query_def #=> "; p hit.query_def #-m0,-WU print " hit.query_len #=> "; p hit.query_len #-m0,-WU print " hit.target_id #=> "; p hit.target_id #-m0,-WU print " hit.target_def #=> "; p hit.target_def print " hit.target_len #=> "; p hit.target_len print " --- first HSP's values (shortcut) ---\n" print " hit.evalue #=> "; p hit.evalue print " hit.bit_score #=> "; p hit.bit_score print " hit.identity #=> "; p hit.identity print " hit.overlap #=> "; p hit.overlap #-m0,-WU print " hit.query_seq #=> "; p hit.query_seq print " hit.midline #=> "; p hit.midline print " hit.target_seq #=> "; p hit.target_seq print " hit.query_start #=> "; p hit.query_start print " hit.query_end #=> "; p hit.query_end print " hit.target_start #=> "; p hit.target_start print " hit.target_end #=> "; p hit.target_end print " hit.lap_at #=> "; p hit.lap_at print " --- first HSP's vaules (shortcut) ---\n" print " --- compatible/shortcut ---\n" puts print " hit.hsps.size #=> "; p hit.hsps.size if hit.hsps.size == 0 then puts " (HSP not found: please see blastall's -b and -v options)" puts else puts print " hit.hsps.each do |hsp|\n" puts hit.hsps.each_with_index do |hsp, j| hsp = wrap(hsp) #print "# --- Bio::Blast::Default::Report::Hsp" print "# --- #{hsp.class}" print " ([#{j}])\n" puts print " hsp.num #=> "; p hsp.num #-m0,-WU print " hsp.bit_score #=> "; p hsp.bit_score print " hsp.score #=> "; p hsp.score print " hsp.evalue #=> "; p hsp.evalue print " hsp.identity #=> "; p hsp.identity print " hsp.gaps #=> "; p hsp.gaps print " hsp.positive #=> "; p hsp.positive print " hsp.align_len #=> "; p hsp.align_len print " hsp.density #=> "; p hsp.density #-m0,-WU print " hsp.pvalue [WU]#=> "; p hsp.pvalue #+WU print " hsp.p_sum_n [WU]#=> "; p hsp.p_sum_n #+WU print " hsp.query_frame #=> "; p hsp.query_frame print " hsp.query_from #=> "; p hsp.query_from print " hsp.query_to #=> "; p hsp.query_to print " hsp.hit_frame #=> "; p hsp.hit_frame print " hsp.hit_from #=> "; p hsp.hit_from print " hsp.hit_to #=> "; p hsp.hit_to print " hsp.pattern_from#=> "; p hsp.pattern_from #-m0,-WU print " hsp.pattern_to #=> "; p hsp.pattern_to #-m0,-WU print " hsp.qseq #=> "; p hsp.qseq print " hsp.midline #=> "; p hsp.midline print " hsp.hseq #=> "; p hsp.hseq puts print " hsp.percent_identity #=> "; p hsp.percent_identity print " hsp.mismatch_count #=> "; p hsp.mismatch_count #-m0,-WU # print " hsp.query_strand #=> "; p hsp.query_strand #+m0,+WU print " hsp.hit_strand #=> "; p hsp.hit_strand #+m0,+WU print " hsp.percent_positive #=> "; p hsp.percent_positive #+m0,+WU print " hsp.percent_gaps #=> "; p hsp.percent_gaps #+m0,+WU puts end #each end #if hit.hsps.size == 0 end end end #ff.each end #Bio::FlatFile.open bio-1.4.3.0001/sample/demo_genscan_report.rb0000644000004100000410000001406212200110570020511 0ustar www-datawww-data# # = sample/demo_genscan_report.rb - demonstration of Bio::Genscan::Report # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::Genscan::Report, parser class for Genscan output. # # == Usage # # Usage 1: Without arguments, demonstrates using preset sample data. # # $ ruby demo_genscan.rb # # Usage 2: When a "-" is specified as the argument, read data from stdin. # # $ cat testdata | ruby demo_genscan.rb - # # Usage 3: Specify a file containing a Genscan output. # # $ ruby demo_genscan.rb file # # Example usage using test data: # # $ ruby -Ilib sample/demo_genscan.rb test/data/genscan/sample.report # # == Development information # # The code was moved from lib/bio/appl/genscan/report.rb and modified: # * Changed the way to read preset sample data. # require 'bio' #if __FILE__ == $0 if ARGV.empty? then report = DATA.read elsif ARGV.size == 1 and ARGV[0] == '-' then ARGV.shift report = $<.read else report = ARGF.read end puts "= class Bio::Genscan::Report " report = Bio::Genscan::Report.new(report) print " report.genscan_version #=> " p report.genscan_version print " report.date_run #=> " p report.date_run print " report.time #=> " p report.time print " report.query_name #=> " p report.query_name print " report.length #=> " p report.length print " report.gccontent #=> " p report.gccontent print " report.isochore #=> " p report.isochore print " report.matrix #=> " p report.matrix puts " report.predictions (Array of Bio::Genscan::Report::Gene) " print " report.predictions.size #=> " p report.predictions.size report.predictions.each {|gene| puts "\n== class Bio::Genscan::Report::Gene " print " gene.number #=> " p gene.number print " gene.aaseq (Bio::FastaFormat) #=> " p gene.aaseq print " gene.naseq (Bio::FastaFormat) #=> " p gene.naseq print " ene.promoter (Bio::Genscan::Report::Exon) #=> " p gene.promoter print " gene.polyA (Bio::Genscan::Report::Exon) #=> " p gene.polyA puts " gene.exons (Array of Bio::Genscan::Report::Exon) " print " gene.exons.size #=> " p gene.exons.size gene.exons.each {|exon| puts "\n== class Bio::Genscan::Report::Exon " print " exon.number #=> " p exon.number print " exon.exon_type #=> " p exon.exon_type print " exon.exon_type_long #=> " p exon.exon_type_long print " exon.strand #=> " p exon.strand print " exon.first #=> " p exon.first print " exon.last #=> " p exon.last print " exon.range (Range) #=> " p exon.range print " exon.frame #=> " p exon.frame print " exon.phase #=> " p exon.phase print " exon.acceptor_score #=> " p exon.acceptor_score print " exon.donor_score #=> " p exon.donor_score print " exon.initiation_score #=> " p exon.initiation_score print " exon.termination_score #=> " p exon.termination_score print " exon.score #=> " p exon.score print " exon.p_value #=> " p exon.p_value print " exon.t_score #=> " p exon.t_score puts } puts } #end ### Sample Genscan report is attached below. ### The lines after the "__END__" can be accessed by using "DATA". __END__ GENSCAN 1.0 Date run: 30-May-103 Time: 14:06:28 Sequence HUMRASH : 12942 bp : 68.17% C+G : Isochore 4 (57 - 100 C+G%) Parameter matrix: HumanIso.smat Predicted genes/exons: Gn.Ex Type S .Begin ...End .Len Fr Ph I/Ac Do/T CodRg P.... Tscr.. ----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------ 1.01 Init + 1664 1774 111 1 0 94 83 212 0.997 21.33 1.02 Intr + 2042 2220 179 1 2 104 66 408 0.997 40.12 1.03 Intr + 2374 2533 160 1 1 89 94 302 0.999 32.08 1.04 Term + 3231 3350 120 2 0 115 48 202 0.980 18.31 1.05 PlyA + 3722 3727 6 -5.80 2.00 Prom + 6469 6508 40 -7.92 2.01 Init + 8153 8263 111 1 0 94 83 212 0.998 21.33 2.02 Intr + 8531 8709 179 1 2 104 66 408 0.997 40.12 2.03 Intr + 8863 9022 160 1 1 89 94 302 0.999 32.08 2.04 Term + 9720 9839 120 2 0 115 48 202 0.961 18.31 Predicted peptide sequence(s): Predicted coding sequence(s): >HUMRASH|GENSCAN_predicted_peptide_1|189_aa MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDL AARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG CMSCKCVLS >HUMRASH|GENSCAN_predicted_CDS_1|570_bp atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgacc atccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctac cggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggc caggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgt gtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatc aaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctg gctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccc tacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtg cgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggc tgcatgagctgcaagtgtgtgctctcctga >HUMRASH|GENSCAN_predicted_peptide_2|189_aa MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDL AARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG CMSCKCVLS >HUMRASH|GENSCAN_predicted_CDS_2|570_bp atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgacc atccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctac cggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggc caggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgt gtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatc aaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctg gctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccc tacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtg cgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggc tgcatgagctgcaagtgtgtgctctcctga bio-1.4.3.0001/sample/demo_kegg_genome.rb0000644000004100000410000000267612200110570017757 0ustar www-datawww-data# # = sample/demo_kegg_genome.rb - demonstration of Bio::KEGG::GENOME # # Copyright:: Copyright (C) 2001, 2002, 2007 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::GENOME, a parser class for the KEGG/GENOME # genome database. # # == Usage # # Specify files containing KEGG GENOME data. # # $ ruby demo_kegg_genome.rb files... # # == Example of running this script # # Download test data. # # $ ruby -Ilib bin/br_biofetch.rb genome eco > eco.genome # $ ruby -Ilib bin/br_biofetch.rb genome hsa > hsa.genome # # Run this script. # # $ ruby -Ilib sample/demo_kegg_genome.rb eco.genome hsa.genome # # == Development information # # The code was moved from lib/bio/db/kegg/genome.rb and modified. # require 'bio' #if __FILE__ == $0 begin require 'pp' def p(arg); pp(arg); end rescue LoadError end #require 'bio/io/flatfile' ff = Bio::FlatFile.new(Bio::KEGG::GENOME, ARGF) ff.each do |genome| puts "### Tags" p genome.tags [ %w( ENTRY entry_id ), %w( NAME name ), %w( DEFINITION definition ), %w( TAXONOMY taxonomy taxid lineage ), %w( REFERENCE references ), %w( CHROMOSOME chromosomes ), %w( PLASMID plasmids ), %w( STATISTICS statistics nalen num_gene num_rna ), ].each do |x| puts "### " + x.shift x.each do |m| p genome.__send__(m) end end puts "=" * 78 end #end bio-1.4.3.0001/sample/demo_go.rb0000644000004100000410000000373412200110570016111 0ustar www-datawww-data# # = sample/demo_go.rb - demonstration of Bio::GO, classes for Gene Ontology # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::GO, classes for Gene Ontology. # # == Requirement # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_go.rb # # == Note # # The code was originally written in 2003, and it can only parse GO format # that is deprecated and no new data is available after August 2009. # # == Development information # # The code was moved from lib/bio/db/go.rb. # require 'bio' #if __FILE__ == $0 def wget(url) Bio::Command.read_uri(url) end go_c_url = 'http://www.geneontology.org/ontology/component.ontology' ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz' e2g_url = 'http://www.geneontology.org/external2go/spkw2go' puts "\n #==> Bio::GO::Ontology" p go_c_url component_ontology = wget(go_c_url) comp = Bio::GO::Ontology.new(component_ontology) [['0003673', '0005632'], ['0003673', '0005619'], ['0003673', '0004649']].each {|pair| puts p pair p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }] puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])" p comp.bfs_shortest_path(pair[0], pair[1]) } puts "\n #==> Bio::GO::External2go" p e2g_url spkw2go = Bio::GO::External2go.parser(wget(e2g_url)) puts "\n #==> spkw2go.dbs" p spkw2go.dbs puts "\n #==> spkw2go[1]" p spkw2go[1] require 'zlib' puts "\n #==> Bio::GO::GeenAssociation" p ga_url # # The workaround (Zlib::MAX_WBITS + 32) is taken from: # http://d.hatena.ne.jp/ksef-3go/20070924/1190563143 # ga = Zlib::Inflate.new(Zlib::MAX_WBITS + 32).inflate(wget(ga_url)) #ga = Zlib::Inflate.inflate(wget(ga_url)) ga = Bio::GO::GeneAssociation.parser(ga) puts "\n #==> ga.size" p ga.size puts "\n #==> ga[100]" p ga[100] #end bio-1.4.3.0001/sample/demo_psort.rb0000644000004100000410000000657112200110570016655 0ustar www-datawww-data# # = sample/demo_psort.rb - demonstration of Bio::PSORT, client for PSORT WWW server # # Copyright:: Copyright (C) 2003-2006 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::PSORT, client for PSORT (protein sorting site # prediction systems) WWW server. # # == Requirements # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_psort.rb # # == Development information # # The code was moved from lib/bio/appl/psort.rb. # require 'bio' #if __FILE__ == $0 #begin # require 'psort/report.rb' #rescue LoadError #end seq = ">hoge mit MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN " Seq1 = ">hgoe LTFVENDKII NI " puts "\n Bio::PSORT::PSORT" puts "\n ==> p serv = Bio::PSORT::PSORT.imsut" p serv = Bio::PSORT::PSORT1.imsut puts "\n ==> p serv.class " p serv.class puts "\n ==> p serv.title = 'Query_title_splited_by_white space'" p serv.title = 'Query_title_splited_by_white space' puts "\n ==> p serv.exec(seq, false) " p serv.exec(seq, false) puts "\n ==> p serv.exec(seq) " p serv.exec(seq) puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) " p report = serv.exec(Bio::FastaFormat.new(seq)) puts "\n ==> p report.class" p report.class puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) " p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) puts "\n ==> p report_raw.class" p report_raw.class puts "\n ==> p report.methods" p report.methods methods = ['entry_id', 'origin', 'title', 'sequence','result_info', 'reasoning', 'final_result', 'raw'] methods.each do |method| puts "\n ==> p report.#{method}" p eval("report.#{method}") end puts "\n Bio::PSORT::PSORT2" puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut" p serv = Bio::PSORT::PSORT2.imsut puts "\n ==> p serv.class " p serv.class puts "\n ==> p seq " p seq puts "\n ==> p serv.title = 'Query_title_splited_by_white space'" p serv.title = 'Query_title_splited_by_white space' puts "\n ==> p serv.exec(seq) # parsed report" p serv.exec(seq) puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report" p report = serv.exec(Bio::FastaFormat.new(seq)) puts "\n ==> p serv.exec(seq, false) # report in plain text" p serv.exec(seq, false) puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text" p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) puts "\n ==> p report.methods" p report.methods methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw'] methods.each do |method| puts "\n ==> p report.#{method}" p eval("report.#{method}") end #end bio-1.4.3.0001/sample/na2aa.rb0000755000004100000410000000173612200110570015465 0ustar www-datawww-data#!/usr/bin/env ruby # # translate.rb - translate any NA input into AA FASTA format # # Copyright (C) 2008 KATAYAMA Toshiaki & Pjotr Prins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: na2aa.rb,v 1.1 2008/02/06 16:25:53 pjotr Exp $ # require 'bio' require 'pp' include Bio ARGV.each do | fn | Bio::FlatFile.auto(fn).each do | item | seq = Sequence::NA.new(item.data) aa = seq.translate aa.gsub!(/X/,'-') rec = Bio::FastaFormat.new('> '+item.definition+"\n"+aa) print rec end end bio-1.4.3.0001/sample/pmfetch.rb0000755000004100000410000000254312200110570016126 0ustar www-datawww-data#!/usr/bin/env ruby # # pmfetch.rb - generate BibTeX format reference list by PubMed ID list # # Copyright (C) 2002 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id:$ # require 'bio' Bio::NCBI.default_email = 'staff@bioruby.org' if ARGV[0] =~ /\A\-f/ ARGV.shift form = ARGV.shift else form = 'bibtex' end ARGV.each do |id| entries = Bio::PubMed.efetch(id) if entries and entries.size == 1 then entry = entries[0] else # dummy entry if not found or possibly incorrect result entry = 'PMID- ' end case form when 'medline' puts entry else puts Bio::MEDLINE.new(entry).reference.__send__(form.intern) end print "\n" end bio-1.4.3.0001/sample/genome2rb.rb0000755000004100000410000000161612200110570016360 0ustar www-datawww-data#!/usr/bin/env ruby # # genome2rb.rb - used to generate contents of the bio/data/keggorg.rb # # Usage: # # % genome2rb.rb genome | sort # # Copyright (C) 2002 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: genome2rb.rb,v 1.1 2002/03/04 08:14:45 katayama Exp $ # require 'bio' Bio::FlatFile.new(Bio::KEGG::GENOME,ARGF).each do |x| puts " '#{x.entry_id}' => [ '#{x.name}', '#{x.definition}' ]," end bio-1.4.3.0001/sample/demo_ncbi_rest.rb0000644000004100000410000000675312200110570017460 0ustar www-datawww-data# # = sample/demo_ncbi_rest.rb - demonstration of Bio::NCBI::REST, NCBI E-Utilities client # # Copyright:: Copyright (C) 2008 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::NCBI::REST, NCBI E-Utilities client. # # == Requirements # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_ncbi_rest.rb # # == Development information # # The code was moved from lib/bio/io/ncbirest.rb. # require 'bio' Bio::NCBI.default_email = 'staff@bioruby.org' #if __FILE__ == $0 gbopts = {"db"=>"nuccore", "rettype"=>"gb"} pmopts = {"db"=>"pubmed", "rettype"=>"medline"} count = {"rettype" => "count"} xml = {"retmode"=>"xml"} max = {"retmax"=>5} puts "=== class methods ===" puts "--- Search NCBI by E-Utils ---" puts Time.now puts "# count of 'tardigrada' in nuccore" puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(count)) puts Time.now puts "# max 5 'tardigrada' entries in nuccore" puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(max)) puts Time.now puts "# count of 'yeast kinase' in nuccore" puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(count)) puts Time.now puts "# max 5 'yeast kinase' entries in nuccore (XML)" puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(xml).merge(max)) puts Time.now puts "# count of 'genome&analysis|bioinformatics' in pubmed" puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count)) puts Time.now puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed (XML)" puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(xml).merge(max)) puts Time.now Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)).each do |x| puts "# each of 5 'genome&analysis|bioinformatics' entries in pubmed" puts x end puts "--- Retrieve NCBI entry by E-Utils ---" puts Time.now puts "# '185041' entry in nuccore" puts Bio::NCBI::REST.efetch("185041", gbopts) puts Time.now puts "# 'J00231' entry in nuccore (XML)" puts Bio::NCBI::REST.efetch("J00231", gbopts.merge(xml)) puts Time.now puts "# 16381885 entry in pubmed" puts Bio::NCBI::REST.efetch(16381885, pmopts) puts Time.now puts "# '16381885' entry in pubmed" puts Bio::NCBI::REST.efetch("16381885", pmopts) puts Time.now puts "# [10592173,14693808] entries in pubmed" puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts) puts Time.now puts "# [10592173,14693808] entries in pubmed (XML)" puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts.merge(xml)) puts "=== instance methods ===" ncbi = Bio::NCBI::REST.new puts "--- Search NCBI by E-Utils ---" puts Time.now puts "# count of 'genome&analysis|bioinformatics' in pubmed" puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count)) puts Time.now puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed" puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)) puts Time.now ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts).each do |x| puts "# each 'genome&analysis|bioinformatics' entries in pubmed" puts x end puts "--- Retrieve NCBI entry by E-Utils ---" puts Time.now puts "# 16381885 entry in pubmed" puts ncbi.efetch(16381885, pmopts) puts Time.now puts "# [10592173,14693808] entries in pubmed" puts ncbi.efetch([10592173, 14693808], pmopts) #end bio-1.4.3.0001/sample/demo_prosite.rb0000644000004100000410000000416612200110570017171 0ustar www-datawww-data# # = sample/demo_prosite.rb - demonstration of Bio::PROSITE # # Copyright:: Copyright (C) 2001 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::PROSITE, parser class for PROSITE database entry. # # == Usage # # Specify files containing PROSITE data. # # $ ruby demo_prosite.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_prosite.rb test/data/prosite/prosite.dat # # == Development information # # The code was moved from lib/bio/db/prosite.rb. # require 'bio' begin require 'pp' alias p pp rescue LoadError end Bio::FlatFile.foreach(Bio::PROSITE, ARGF) do |ps| puts "### ps = Bio::PROSITE.new(str)" list = %w( name division ac entry_id dt date de definition pa pattern ma profile ru rule nr statistics release swissprot_release_number swissprot_release_sequences total total_hits total_sequences positive positive_hits positive_sequences unknown unknown_hits unknown_sequences false_pos false_positive_hits false_positive_sequences false_neg false_negative_hits partial cc comment max_repeat site skip_flag dr sp_xref pdb_xref pdoc_xref ) list.each do |method| puts ">>> #{method}" p ps.__send__(method) end puts ">>> taxon_range" p ps.taxon_range puts ">>> taxon_range(expand)" p ps.taxon_range(true) puts ">>> list_truepositive" p ps.list_truepositive puts ">>> list_truepositive(by_name)" p ps.list_truepositive(true) puts ">>> list_falsenegative" p ps.list_falsenegative puts ">>> list_falsenegative(by_name)" p ps.list_falsenegative(true) puts ">>> list_falsepositive" p ps.list_falsepositive puts ">>> list_falsepositive(by_name)" p ps.list_falsepositive(true) puts ">>> list_potentialhit" p ps.list_potentialhit puts ">>> list_potentialhit(by_name)" p ps.list_potentialhit(true) puts ">>> list_unknown" p ps.list_unknown puts ">>> list_unknown(by_name)" p ps.list_unknown(true) puts "=" * 78 end bio-1.4.3.0001/sample/gt2fasta.rb0000755000004100000410000000227712200110570016217 0ustar www-datawww-data#!/usr/bin/env ruby # # gt2fasta.rb - convert GenBank translations into FASTA format (pep) # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: gt2fasta.rb,v 0.3 2002/04/15 03:06:17 k Exp $ # require 'bio/io/flatfile' require 'bio/feature' require 'bio/db/genbank' include Bio ff = FlatFile.new(GenBank, ARGF) while gb = ff.next_entry orf = 0 gb.features.each do |f| f = f.assoc if aaseq = f['translation'] orf += 1 gene = [ f['gene'], f['product'], f['note'], f['function'] ].compact.join(', ') definition = "gp:#{gb.entry_id}_#{orf} #{gene} [#{gb.organism}]" print aaseq.to_fasta(definition, 70) end end end bio-1.4.3.0001/sample/demo_sosui_report.rb0000644000004100000410000000357212200110570020241 0ustar www-datawww-data# # = sample/demo_sosui_report.rb - demonstration of Bio::SOSUI::Report # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::SOSUI::Report, SOSUI output parser. # # SOSUI performs classification and secondary structures prediction # of membrane proteins. # # == Usage # # Usage 1: Without arguments, runs demo using preset example data. # # $ ruby demo_sosui_report.rb # # Usage 2: Specify files containing SOSUI reports. # # $ ruby demo_sosui_report.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_sosui_report.rb test/data/SOSUI/sample.report # # == References # # * http://bp.nuap.nagoya-u.ac.jp/sosui/ # # == Development information # # The code was moved from lib/bio/appl/sosui/report.rb, and modified as below: # * Disables internal sample data when arguments are specified. # * Method name is changed. # * Bug fix about tmhs demo. require 'bio' begin require 'pp' alias p pp rescue LoadError end sample = <HOGE1 MEMBRANE PROTEIN NUMBER OF TM HELIX = 6 TM 1 12- 34 SECONDARY LLVPILLPEKCYDQLFVQWDLLH TM 2 36- 58 PRIMARY PCLKILLSKGLGLGIVAGSLLVK TM 3 102- 124 SECONDARY SWGEALFLMLQTITICFLVMHYR TM 4 126- 148 PRIMARY QTVKGVAFLACYGLVLLVLLSPL TM 5 152- 174 SECONDARY TVVTLLQASNVPAVVVGRLLQAA TM 6 214- 236 SECONDARY AGTFVVSSLCNGLIAAQLLFYWN >HOGE2 SOLUBLE PROTEIN HOGE def demo_sosui_report(ent) puts '===' puts ent puts '===' sosui = Bio::SOSUI::Report.new(ent) p [:entry_id, sosui.entry_id] p [:prediction, sosui.prediction] p [:tmhs, sosui.tmhs] end if ARGV.empty? then sample.split(/#{Bio::SOSUI::Report::DELIMITER}/).each {|ent| demo_sosui_report(ent) } else while ent = $<.gets(Bio::SOSUI::Report::DELIMITER) demo_sosui_report(ent) end end bio-1.4.3.0001/sample/fastasort.rb0000755000004100000410000000337312200110570016510 0ustar www-datawww-data#!/usr/bin/env ruby # # fastasort: Sorts a FASTA file (in fact it can use any flat file input supported # by BIORUBY) while modifying the definition of each record in the # process so it is suitable for processing with (for example) pal2nal # and PAML. # # Copyright (C) 2008 KATAYAMA Toshiaki & Pjotr Prins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: fastasort.rb,v 1.2 2008/05/19 12:22:05 pjotr Exp $ # require 'bio' include Bio table = Hash.new # table to sort objects ARGV.each do | fn | Bio::FlatFile.auto(fn).each do | item | # Some procession of the definition for external programs (just # an example): # strip JALView extension from definition e.g. .../1-212 if item.definition =~ /\/\d+-\d+$/ item.definition = $` end # substitute slashes: definition = item.definition.gsub(/\//,'-') # substitute quotes and ampersands: definition = item.definition.gsub(/['"&]/,'x') # prefix letters if the first position is a number: definition = 'seq'+definition if definition =~ /^\d/ # Now add the data to the sort table table[definition] = item.data end end # Output sorted table table.sort.each do | definition, data | rec = Bio::FastaFormat.new('> '+definition.strip+"\n"+data) print rec end bio-1.4.3.0001/sample/psortplot_html.rb0000644000004100000410000001171212200110570017565 0ustar www-datawww-data#!/usr/bin/env ruby # # psortplot_html.rb - A KEGG API demo script. Generates a HTML file of # genes marked by PSORT II predictions onto a # KEGG/PATHWAY map. # # Usage: # # % ruby psortplot_html.rb # % cat sce00010_psort2.html # % ruby psortplot_html.rb path:eco00010 # % cat eco00010_psort2.html # # Copyright:: Copyright (C) 2005 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id: psortplot_html.rb,v 1.3 2007/04/05 23:35:42 trevor Exp $ # require 'bio' class KEGG DBGET_BASEURI = 'http://kegg.com/dbget-bin' WWW_BGET_BASEURI = DBGET_BASEURI + '/www_bget' WWW_PATHWAY_BASEURI = DBGET_BASEURI + '/get_pathway' # path := path:sce00010 def self.link_pathway(path0) path, path = path0.split(':') org_name = path.scan(/(^\w{3})/).to_s mapno = path.sub(org_name, '') str = "#{path0}" end # ec_num := ec:1.2.3.4 def self.link_ec(ec_num) ec = ec_num.sub(/^ec:/, '') str = "#{ec_num}" return str end # gene := eco:b0002 def self.link_genes(gene) org_name, gene_name = gene.split(':') str = "#{gene}" return str end end class PSORT COLOR_Palette = { 'csk' => "#FF0000", # 'cytoskeletal' 'cyt' => "#FF8000", # 'cytoplasmic' 'nuc' => "#FFFF00", # 'nuclear' 'mit' => "#80FF00", # 'mitochondrial' 'ves' => "#00FF00", # 'vesicles of secretory system' 'end' => "#00FF80", # 'endoplasmic reticulum' 'gol' => "#00FFFF", # 'Golgi' 'vac' => "#0080FF", # 'vacuolar' 'pla' => "#0000FF", # 'plasma membrane' 'pox' => "#8000FF", # 'peroxisomal' 'exc' => "#FF00FF", # 'extracellular, including cell wall' '---' => "#FF0080" # 'other' } end keggapi = Bio::KEGG::API.new psort2serv = Bio::PSORT::PSORT2.imsut # Obtains a list of genes on specified pathway pathway = ARGV.shift || "path:sce00010" genes = keggapi.get_genes_by_pathway(pathway) scl = Hash.new # protein subcelluler localizations ec = Hash.new # EC numbers serial = 0 sync_default = $stdout.sync $stdout.sync = true genes.each do |gene| print "#{(serial += 1).to_s.rjust(genes.size.to_s.size)}\t#{gene}\t" # Obtains amino acid sequence from KEGG GENES entry aaseq = keggapi.get_aaseqs([gene]) # Predicts protein subcellualr localization result = psort2serv.exec(aaseq) scl[gene] = result.pred print "#{scl[gene]}\t" # Obtains the EC number from KEGG GENES entry ec[gene] = keggapi.get_enzymes_by_gene(gene) puts "#{ec[gene].inspect}" end $stdout.sync = sync_default fg_list = Array.new bg_list = Array.new genes.each do |gene| fg_list << "#FF0000" bg_list << PSORT::COLOR_Palette[scl[gene]] end # coloring KEGG pathway according to gene's localization url = keggapi.color_pathway_by_objects(pathway, genes, fg_list, bg_list) puts "#{url} downloaded." # remove "path:" prefix from pathway_id path_code = pathway.sub(/^path:/, '') # save the result image image_file = "#{path_code}_psort2.gif" begin keggapi.save_image(url, image_file) end # create html with a color palette html = < PSORT II prediction protein subcellular localization map of KEGG/PATHWAY (#{pathway})

  • PSORT II prediction protein subcellular localization map of KEGG/PATHWAY (#{KEGG.link_pathway(pathway)})
  • END # generate gene table with localization names = Bio::PSORT::PSORT2::SclNames multi_genes = Hash.new(0) ec.values.flatten.sort.uniq.each do |ec_num| ec.find_all {|x| x[1].include?(ec_num) }.each do |gene| gene = gene[0] loc = scl[gene] color = PSORT::COLOR_Palette[loc] name = names[loc] multi_genes[gene] += 1 html += < END end end html += <
    EC Gene Localization
    #{multi_genes[gene]} #{KEGG.link_ec(ec_num)} #{KEGG.link_genes(gene)} #{name}
    END # generate color code table also PSORT::COLOR_Palette.sort.each do |code, color| html += < END end html += <
    Code Color
    #{code} #{names[code]}

    END # save generated HTML file html_file = "#{path_code}_psort2.html" File.open(html_file, "w+") do |file| file.puts html end puts "Open #{html_file}" bio-1.4.3.0001/sample/fasta2tab.rb0000755000004100000410000000452512200110570016351 0ustar www-datawww-data#!/usr/bin/env ruby # # fasta2tab.rb - convert FASTA (-m 6) output into tab delimited data for MySQL # # Usage: # # % fasta2tab.rb FASTA-output-file[s] > fasta_results.tab # % mysql < fasta_results.sql (use sample at the end of this file) # # Format accepted: # # % fasta3[3][_t] -Q -H -m 6 query.f target.f ktup > FASTA-output-file # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: fasta2tab.rb,v 0.1 2001/06/21 08:21:58 katayama Exp $ # while gets # query if /^\S+: (\d+) aa$/ q_len = $1 end # each hit if /^>>([^>]\S+).*\((\d+) aa\)$/ target = $1 t_len = $2 # d = dummy variable d, d, initn, d, init1, d, opt, d, zscore, d, bits, d, evalue = gets.split(/\s+/) d, d, sw, ident, d, ugident, d, d, overlap, d, d, lap = gets.split(/\s+/) # query-hit pair print "#{$FILENAME}\t#{q_len}\t#{target}\t#{t_len}" # pick up values ary = [ initn, init1, opt, zscore, bits, evalue, sw, ident, ugident, overlap, lap ] # print values for i in ary i.tr!('^0-9.:e\-','') print "\t#{i}" end print "\n" end end =begin MySQL fasta_results.sql sample CREATE DATABASE IF NOT EXISTS db_name; CREATE TABLE IF NOT EXISTS db_name.table_name ( query varchar(25) not NULL, q_len integer unsigned default 0, target varchar(25) not NULL, t_len integer unsigned default 0, initn integer unsigned default 0, init1 integer unsigned default 0, opt integer unsigned default 0, zscore float default 0.0, bits float default 0.0, evalue float default 0.0, sw integer unsigned default 0, ident float default 0.0, ugident float default 0.0, overlap integer unsigned default 0, lap_at varchar(25) default NULL ); LOAD DATA LOCAL INFILE 'fasta_results.tab' INTO TABLE db_name.table_name; =end bio-1.4.3.0001/sample/demo_gff1.rb0000644000004100000410000000227012200110570016321 0ustar www-datawww-data# # = sample/demo_gff1.rb - very simple demonstration of Bio::GFF # # Copyright:: Copyright (C) 2003, 2005 # Toshiaki Katayama # 2006 Jan Aerts # 2008 Naohisa Goto # License:: The Ruby License # # # == Description # # Very simple demonstration of Bio::GFF, parser classes for GFF formatted # text. # # == Usage # # Simply run this script. # # $ ruby demo_gff1.rb # # == To do # # Bio::GFF and related classes have many functions, and we should write # more example and/or demonstration codes. # # == Development information # # The code was moved from lib/bio/db/gff.rb. # require 'bio' #if __FILE__ == $0 begin require 'pp' alias p pp rescue LoadError end this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n" this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n" this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n" this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n" this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n" this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n" this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n" p Bio::GFF.new(this_gff) #end bio-1.4.3.0001/sample/test_phyloxml_big.rb0000644000004100000410000001367312200110570020237 0ustar www-datawww-data# # = sample/test_phyloxml_big.rb - Tests for Bio::PhyloXML. Testing very big files. # # Copyright:: Copyright (C) 2009 # Diana Jaunzeikare # Naohisa Goto # License:: The Ruby License # # libraries needed for the tests require 'libxml' require 'pathname' require 'test/unit' require 'digest/sha1' require 'bio/command' require 'bio/db/phyloxml/phyloxml_parser' require 'bio/db/phyloxml/phyloxml_writer' PhyloXMLBigDataPath = ARGV.shift if !PhyloXMLBigDataPath then exit_code = 0 elsif !File.directory?(PhyloXMLBigDataPath) then exit_code = 1 else exit_code = false end if exit_code then puts "Usage: #{$0} path_to_data (test options...)" puts "" puts "Requirements:" puts " - Write permission to the path_to_data" puts " - Internet connection for downloading test data" puts " - unzip command to extract downloaded test data" puts "" puts "You may want to run Ruby with -rubygems and -I." puts "" puts "Example of usage using /tmp:" puts " $ mkdir /tmp/phyloxml" puts " $ ruby -rubygems -I lib #{$0} /tmp/phyloxml -v" puts "" exit(exit_code) end module TestPhyloXMLBigData module_function def metazoa_xml #puts "Metazoa 30MB" filename = 'ncbi_taxonomy_metazoa.xml' uri = "http://www.phylosoft.org/archaeopteryx/examples/data/ncbi_taxonomy_metazoa.xml.zip" download_and_unzip_if_not_found(filename, uri, "1M", "33M") end def metazoa_test_xml #puts "writing Metazoa 30MB" File.join PhyloXMLBigDataPath, 'writer_test_ncbi_taxonomy_metazoa.xml' end def metazoa_roundtrip_xml #puts "writing Metazoa 30MB roundtrip" File.join PhyloXMLBigDataPath, 'roundtrip_test_ncbi_taxonomy_metazoa.xml' end def mollusca_xml #puts "Mollusca 1.5MB" filename = 'ncbi_taxonomy_mollusca.xml' uri = "http://www.phylosoft.org/archaeopteryx/examples/data/ncbi_taxonomy_mollusca.xml.zip" download_and_unzip_if_not_found(filename, uri, "67K", "1.5M") end def mollusca_test_xml #puts "Writing Mollusca 1.5MB" File.join PhyloXMLBigDataPath, 'writer_test_ncbi_taxonomy_mollusca.xml' end def mollusca_roundtrip_xml #puts "Writing Mollusca 1.5MB roundtrip" File.join PhyloXMLBigDataPath, 'roundtrip_test_ncbi_taxonomy_mollusca.xml' end def life_xml #Right now this file is not compatible with xsd 1.10 filename = 'tol_life_on_earth_1.xml' uri = "http://www.phylosoft.org/archaeopteryx/examples/data/tol_life_on_earth_1.xml.zip" download_and_unzip_if_not_found(filename, uri, '10M', '45M') end def life_test_xml File.join PhyloXMLBigDataPath, 'writer_test_tol_life_on_earth_1.xml' end def life_roundtrip_xml File.join PhyloXMLBigDataPath, 'roundtrip_test_tol_life_on_earth_1.xml' end def unzip_file(file, target_dir) flag = system('unzip', "#{file}.zip", "-d", target_dir) unless flag then raise "Failed to unzip #{file}.zip" end file end def download_and_unzip_if_not_found(basename, uri, zipsize, origsize) file = File.join PhyloXMLBigDataPath, basename return file if File.exists?(file) if File.exists?("#{file}.zip") unzip_file(file, PhyloXMLBigDataPath) return file end puts "File #{basename} does not exist. Do you want to download it? (If yes, ~#{zipsize}B zip file will be downloaded and extracted (to #{origsize}B), if no, the test will be skipped.) y/n?" res = gets if res.to_s.chomp.downcase == "y" File.open("#{file}.zip", "wb") do |f| f.write(Bio::Command.read_uri(uri)) end puts "File downloaded." self.unzip_file(file, PhyloXMLBigDataPath) return file else return nil #return File.join PHYLOXML_TEST_DATA, "#{basename}.stub" end end end #end module TestPhyloXMLBigData module Bio class TestPhyloXMLBig < Test::Unit::TestCase def do_test_next_tree(readfilename) raise "the test is skipped" unless readfilename filesizeMB = File.size(readfilename) / 1048576.0 printf "Reading %s (%2.1f MB)\n", readfilename, filesizeMB begin phyloxml = Bio::PhyloXML::Parser.open(readfilename) rescue NoMethodError phyloxml = Bio::PhyloXML::Parser.new(readfilename) end tree = nil assert_nothing_raised { tree = phyloxml.next_tree } tree end private :do_test_next_tree def do_test_write(tree, writefilename) printf "Writing to %s\n", writefilename writer = Bio::PhyloXML::Writer.new(writefilename) assert_nothing_raised { writer.write(tree) } # checks file size and sha1sum str = File.open(writefilename, 'rb') { |f| f.read } sha1 = Digest::SHA1.hexdigest(str) puts "Wrote #{str.length} bytes." puts "sha1: #{sha1}" end private :do_test_write def test_mollusca tree = do_test_next_tree(TestPhyloXMLBigData.mollusca_xml) do_test_write(tree, TestPhyloXMLBigData.mollusca_test_xml) tree2 = do_test_next_tree(TestPhyloXMLBigData.mollusca_test_xml) do_test_write(tree2, TestPhyloXMLBigData.mollusca_roundtrip_xml) end def test_metazoa tree = do_test_next_tree(TestPhyloXMLBigData.metazoa_xml) do_test_write(tree, TestPhyloXMLBigData.metazoa_test_xml) tree2 = do_test_next_tree(TestPhyloXMLBigData.metazoa_test_xml) do_test_write(tree2, TestPhyloXMLBigData.metazoa_roundtrip_xml) end if false # Disabled because of the error. # LibXML::XML::Error: Fatal error: Input is not proper UTF-8, # indicate encoding ! # Bytes: 0xE9 0x6B 0x65 0x73 at tol_life_on_earth_1.xml:132170. # def test_life tree = do_test_next_tree(TestPhyloXMLBigData.life_xml) do_test_write(tree, TestPhyloXMLBigData.life_test_xml) tree2 = do_test_next_tree(TestPhyloXMLBigData.life_test_xml) do_test_write(tree2, TestPhyloXMLBigData.life_roundtrip_xml) end end #if false end end bio-1.4.3.0001/sample/demo_keggapi.rb0000644000004100000410000003503012200110570017105 0ustar www-datawww-data# # = sample/demo_keggapi.rb - demonstration of Bio::KEGG::API web service client # # Copyright:: Copyright (C) 2003, 2004 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::API, the KEGG API web service client via # SOAP/WSDL. # # == Requirements # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_keggapi.rb # # == Notes # # * It may take long time to run this script. # * It can not be run with Ruby 1.9 because SOAP4R (SOAP support for Ruby) # currently does not support Ruby 1.9. # # == Development information # # The code was moved from lib/bio/io/keggapi.rb, and modified as below: # # * Commented out deprecated methods: get_neighbors_by_gene, # get_similarity_between_genes, get_ko_members, get_oc_members_by_gene, # get_pc_members_by_gene. # * Commented out some methods internally using the deprecated methods: # get_all_neighbors_by_gene, get_all_oc_members_by_gene, # get_all_pc_members_by_gene. # require 'bio' #if __FILE__ == $0 begin require 'pp' alias p pp rescue LoadError end puts ">>> KEGG API" serv = Bio::KEGG::API.new # serv.log = STDERR puts "# * parameters" puts " wsdl : #{serv.wsdl}" puts " log : #{serv.log}" puts " start : #{serv.start}" puts " max_results : #{serv.max_results}" puts "=== META" puts "### list_databases" list = serv.list_databases list.each do |db| print db.entry_id, "\t", db.definition, "\n" end puts "### list_organisms" list = serv.list_organisms list.each do |org| print org.entry_id, "\t", org.definition, "\n" end puts "### list_pathways('map') : reference pathway" list = serv.list_pathways("map") list.each do |path| print path.entry_id, "\t", path.definition, "\n" end puts "### list_pathways('eco') : E. coli pathway" list = serv.list_pathways("eco") list.each do |path| print path.entry_id, "\t", path.definition, "\n" end puts "=== DBGET" puts "### binfo('all')" puts serv.binfo("all") puts "### binfo('genbank')" puts serv.binfo("genbank") puts "### bfind('genbank kinase cell cycle human')" puts serv.bfind("genbank kinase cell cycle human") puts "### bget('gb:AJ617376')" puts serv.bget("gb:AJ617376") puts "### bget('eco:b0002 eco:b0003')" puts serv.bget("eco:b0002 eco:b0003") puts "### btit('eco:b0002 eco:b0003')" puts serv.btit("eco:b0002 eco:b0003") puts "# * get_entries(['eco:b0002', 'eco:b0003'])" puts serv.get_entries(["eco:b0002", "eco:b0003"]) puts "# * get_aaseqs(['eco:b0002', 'eco:b0003'])" puts serv.get_aaseqs(["eco:b0002", "eco:b0003"]) puts "# * get_naseqs(['eco:b0002', 'eco:b0003'])" puts serv.get_naseqs(["eco:b0002", "eco:b0003"]) puts "# * get_definitions(['eco:b0002', 'eco:b0003'])" puts serv.get_definitions(["eco:b0002", "eco:b0003"]) puts "# * get_definitions(('eco:b0001'..'eco:b0200').to_a)" puts serv.get_definitions(("eco:b0001".."eco:b0200").to_a) puts "=== LinkDB" puts "### get_linkdb_by_entry('eco:b0002', 'pathway', 1, 5)" list = serv.get_linkdb_by_entry("eco:b0002", "pathway", 1, 5) list.each do |link| puts [ link.entry_id1, link.entry_id2, link.type, link.path ].join("\t") end puts "# * get_all_linkdb_by_entry('eco:b0002', 'pathway')" list = serv.get_all_linkdb_by_entry("eco:b0002", "pathway") list.each do |link| puts [ link.entry_id1, link.entry_id2, link.type, link.path ].join("\t") end puts "=== SSDB" # The method "get_neighbors_by_gene" is deprecated in 2005-02-20. # #puts "### get_neighbors_by_gene('eco:b0002', 'all', 1, 5)" #list = serv.get_neighbors_by_gene("eco:b0002", "all", 1, 5) #list.each do |hit| # puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") #end # The method "get_all_neighbors_by_gene" can not be used because # it internally uses the deprecated "get_neighbors_by_gene" method. # #puts "# * get_all_neighbors_by_gene('eco:b0002', 'bsu')" #list = serv.get_all_neighbors_by_gene("eco:b0002", "bsu") #list.each do |hit| # puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") #end puts "### get_best_best_neighbors_by_gene('eco:b0002', 1, 5)" list = serv.get_best_best_neighbors_by_gene("eco:b0002", 1, 5) list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "# * get_all_best_best_neighbors_by_gene('eco:b0002')" list = serv.get_all_best_best_neighbors_by_gene("eco:b0002") list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "### get_best_neighbors_by_gene('eco:b0002', 1, 5)" list = serv.get_best_neighbors_by_gene("eco:b0002", 1, 5) list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "# * get_all_best_neighbors_by_gene('eco:b0002')" list = serv.get_all_best_neighbors_by_gene("eco:b0002") list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "### get_reverse_best_neighbors_by_gene('eco:b0002', 1, 5)" list = serv.get_reverse_best_neighbors_by_gene("eco:b0002", 1, 5) list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "# * get_all_reverse_best_neighbors_by_gene('eco:b0002')" list = serv.get_all_reverse_best_neighbors_by_gene("eco:b0002") list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "### get_paralogs_by_gene('eco:b0002', 1, 5)" list = serv.get_paralogs_by_gene("eco:b0002", 1, 5) list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end puts "# * get_all_paralogs_by_gene('eco:b0002')" list = serv.get_all_paralogs_by_gene("eco:b0002") list.each do |hit| puts [ hit.genes_id1, hit.genes_id2, hit.sw_score ].join("\t") end # The method "get_similarity_between_genes" is deprecated in 2005-02-20. # #puts "### get_similarity_between_genes('eco:b0002', 'bsu:BG10350')" #relation = serv.get_similarity_between_genes("eco:b0002", "bsu:BG10350") #puts " genes_id1 : #{relation.genes_id1}" # string #puts " genes_id2 : #{relation.genes_id2}" # string #puts " sw_score : #{relation.sw_score}" # int #puts " bit_score : #{relation.bit_score}" # float #puts " identity : #{relation.identity}" # float #puts " overlap : #{relation.overlap}" # int #puts " start_position1 : #{relation.start_position1}" # int #puts " end_position1 : #{relation.end_position1}" # int #puts " start_position2 : #{relation.start_position2}" # int #puts " end_position2 : #{relation.end_position2}" # int #puts " best_flag_1to2 : #{relation.best_flag_1to2}" # boolean #puts " best_flag_2to1 : #{relation.best_flag_2to1}" # boolean #puts " definition1 : #{relation.definition1}" # string #puts " definition2 : #{relation.definition2}" # string #puts " length1 : #{relation.length1}" # int #puts " length2 : #{relation.length2}" # int puts "=== MOTIF" puts "### get_motifs_by_gene('eco:b0002', 'pfam')" list = serv.get_motifs_by_gene("eco:b0002", "pfam") list.each do |motif| puts motif.motif_id end if list puts "### get_motifs_by_gene('eco:b0002', 'tfam')" list = serv.get_motifs_by_gene("eco:b0002", "tfam") list.each do |motif| puts motif.motif_id end if list puts "### get_motifs_by_gene('eco:b0002', 'pspt')" list = serv.get_motifs_by_gene("eco:b0002", "pspt") list.each do |motif| puts motif.motif_id end if list puts "### get_motifs_by_gene('eco:b0002', 'pspf')" list = serv.get_motifs_by_gene("eco:b0002", "pspf") list.each do |motif| puts motif.motif_id end if list puts "### get_motifs_by_gene('eco:b0002', 'all')" list = serv.get_motifs_by_gene("eco:b0002", "all") list.each do |motif| puts "# * motif result" puts " motif_id : #{motif.motif_id}" puts " definition : #{motif.definition}" puts " genes_id : #{motif.genes_id}" puts " start_position : #{motif.start_position}" puts " end_position : #{motif.end_position}" puts " score : #{motif.score}" puts " evalue : #{motif.evalue}" end puts "### get_genes_by_motifs(['pf:ACT', 'ps:ASPARTOKINASE'], 1, 5)" list = serv.get_genes_by_motifs(["pf:ACT", "ps:ASPARTOKINASE"], 1, 5) list.each do |gene| puts [ gene.entry_id, gene.definition ].join("\t") end puts "# * get_all_genes_by_motifs(['pf:ACT', 'ps:ASPARTOKINASE'])" list = serv.get_all_genes_by_motifs(["pf:ACT", "ps:ASPARTOKINASE"]) list.each do |gene| puts [ gene.entry_id, gene.definition ].join("\t") end puts "=== KO, OC, PC" puts "### get_ko_by_gene('eco:b0002')" list = serv.get_ko_by_gene("eco:b0002") list.each do |ko| puts ko end # The method "get_ko_members" is removed in 2005-06-01. # #puts "### get_ko_members('ko:K00003')" #list = serv.get_ko_members("ko:K00003") #list.each do |gene| # puts gene #end # The method "get_oc_members_by_gene" is removed in 2006-10-04. # #puts "### get_oc_members_by_gene('eco:b0002', 1, 5)" #list = serv.get_oc_members_by_gene("eco:b0002", 1, 5) #list.each do |gene| # puts gene #end # The method "get_all_oc_members_by_gene" can not be used because # it internally uses the deprecated "get_oc_members_by_gene" method. # #puts "# * get_all_oc_members_by_gene('eco:b0002')" #list = serv.get_all_oc_members_by_gene("eco:b0002") #list.each do |gene| # puts gene #end # The method "get_pc_members_by_gene" is removed in 2006-10-04. # #puts "### get_pc_members_by_gene('eco:b0002', 1, 5)" #list = serv.get_pc_members_by_gene("eco:b0002", 1, 5) #list.each do |gene| # puts gene #end # The method "get_all_pc_members_by_gene" can not be used because # it internally uses the deprecated "get_pc_members_by_gene" method. # #puts "# * get_all_pc_members_by_gene('eco:b0002')" #list = serv.get_all_pc_members_by_gene("eco:b0002") #list.each do |gene| # puts gene #end puts "=== PATHWAY" puts "==== coloring pathway" puts "### mark_pathway_by_objects('path:eco00260', obj_list)" puts " obj_list = ['eco:b0002', 'cpd:C00263']" obj_list = ["eco:b0002", "cpd:C00263"] url = serv.mark_pathway_by_objects("path:eco00260", obj_list) puts url puts "### color_pathway_by_objects('path:eco00053', obj_list, fg_list, bg_list)" puts " obj_list = ['eco:b0207', 'eco:b1300']" puts " fg_list = ['blue', '#00ff00']" puts " bg_list = ['#ff0000', 'yellow']" obj_list = ["eco:b0207", "eco:b1300"] fg_list = ["blue", "#00ff00"] bg_list = ["#ff0000", "yellow"] url = serv.color_pathway_by_objects("path:eco00053", obj_list, fg_list, bg_list) puts url #puts "# * save_image(#{url})" #filename = serv.save_image(url, "test.gif") #filename = serv.save_image(url) #puts filename puts "==== objects on pathway" puts "### get_genes_by_pathway('path:map00010')" list = serv.get_genes_by_pathway("path:map00010") list.each do |gene| puts gene end puts "### get_genes_by_pathway('path:eco00010')" list = serv.get_genes_by_pathway("path:eco00010") list.each do |gene| puts gene end puts "### get_enzymes_by_pathway('path:map00010')" list = serv.get_enzymes_by_pathway("path:map00010") list.each do |enzyme| puts enzyme end puts "### get_enzymes_by_pathway('path:eco00010')" list = serv.get_enzymes_by_pathway("path:eco00010") list.each do |enzyme| puts enzyme end puts "### get_compounds_by_pathway('path:map00010')" list = serv.get_compounds_by_pathway("path:map00010") list.each do |compound| puts compound end puts "### get_compounds_by_pathway('path:eco00010')" list = serv.get_compounds_by_pathway("path:eco00010") list.each do |compound| puts compound end puts "### get_reactions_by_pathway('path:map00010')" list = serv.get_reactions_by_pathway("path:map00010") list.each do |reaction| puts reaction end puts "### get_reactions_by_pathway('path:eco00010')" list = serv.get_reactions_by_pathway("path:eco00010") list.each do |reaction| puts reaction end puts "==== pathway by objects" puts "### get_pathways_by_genes(['eco:b0756', 'eco:b1002'])" list = serv.get_pathways_by_genes(["eco:b0756", "eco:b1002"]) list.each do |path| puts path end puts "### get_pathways_by_enzymes(['ec:5.1.3.3', 'ec:3.1.3.10'])" list = serv.get_pathways_by_enzymes(["ec:5.1.3.3", "ec:3.1.3.10"]) list.each do |path| puts path end puts "### get_pathways_by_compounds(['cpd:C00221', 'cpd:C00267'])" list = serv.get_pathways_by_compounds(["cpd:C00221", "cpd:C00267"]) list.each do |path| puts path end puts "### get_pathways_by_reactions(['rn:R00014', 'rn:R00710'])" list = serv.get_pathways_by_reactions(["rn:R00014", "rn:R00710"]) list.each do |path| puts path end puts "==== relation between objects" puts "### get_linked_pathways('path:eco00620')" list = serv.get_linked_pathways('path:eco00620') list.each do |path| puts path end puts "### get_genes_by_enzyme('ec:1.1.1.1', 'eco')" list = serv.get_genes_by_enzyme("ec:1.1.1.1", "eco") list.each do |gene| puts gene end puts "### get_enzymes_by_gene('eco:b0002')" list = serv.get_enzymes_by_gene("eco:b0002") list.each do |enzyme| puts enzyme end puts "### get_enzymes_by_compound('cpd:C00345')" list = serv.get_enzymes_by_compound("cpd:C00345") list.each do |enzyme| puts enzyme end puts "### get_enzymes_by_reaction('rn:R00100')" list = serv.get_enzymes_by_reaction("rn:R00100") list.each do |enzyme| puts enzyme end puts "### get_compounds_by_enzyme('ec:2.7.1.12')" list = serv.get_compounds_by_enzyme("ec:2.7.1.12") list.each do |compound| puts compound end puts "### get_compounds_by_reaction('rn:R00100')" list = serv.get_compounds_by_reaction("rn:R00100") list.each do |compound| puts compound end puts "### get_reactions_by_enzyme('ec:2.7.1.12')" list = serv.get_reactions_by_enzyme("ec:2.7.1.12") list.each do |reaction| puts reaction end puts "### get_reactions_by_compound('cpd:C00199')" list = serv.get_reactions_by_compound("cpd:C00199") list.each do |reaction| puts reaction end puts "=== GENES" puts "### get_genes_by_organism('mge', 1, 5)" list = serv.get_genes_by_organism("mge", 1, 5) list.each do |gene| puts gene end puts "# * get_all_genes_by_organism('mge')" list = serv.get_all_genes_by_organism("mge") list.each do |gene| puts gene end puts "=== GENOME" puts "### get_number_of_genes_by_organism(org)" puts serv.get_number_of_genes_by_organism("mge") #end bio-1.4.3.0001/sample/demo_targetp_report.rb0000644000004100000410000000642412200110570020544 0ustar www-datawww-data# # = sample/demo_targetp_report.rb - demonstration of Bio::TargetP::Report # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::TargetP::Report, TargetP output parser. # # == Usage # # Usage 1: Without arguments, runs demo using preset example data. # # $ ruby demo_targetp_report.rb # # Usage 2: Specify files containing TargetP reports. # # $ ruby demo_targetp_report.rb files... # # == References # # * http://www.cbs.dtu.dk/services/TargetP/ # # == Development information # # The code was moved from lib/bio/appl/targetp/report.rb, and modified # as below: # * Disables internal sample data when arguments are specified. # * Method name is changed. # require 'bio' begin require 'pp' alias p pp rescue LoadError end plant = < # Copyright:: Copyright (C) 2003 Masumi Itoh # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::ORTHOLOGY, the parser class for the KEGG # ORTHOLOGY database entry. # # == Usage # # Specify files containing KEGG ORTHOLOGY data. # # $ ruby demo_kegg_orthology.rb files... # # == Example of running this script # # Download test data. # # $ ruby -Ilib bin/br_biofetch.rb ko K00001 > K00001.ko # $ ruby -Ilib bin/br_biofetch.rb ko K00161 > K00161.ko # # Run this script. # # $ ruby -Ilib sample/demo_kegg_orthology.rb K00001.ko K00161.ko # # == Development information # # The code was moved from lib/bio/db/kegg/orthology.rb and modified. # require 'bio' Bio::FlatFile.foreach(Bio::KEGG::ORTHOLOGY, ARGF) do |ko| puts "### ko = Bio::KEGG::ORTHOLOGY.new(str)" puts "# ko.ko_id" p ko.entry_id puts "# ko.name" p ko.name puts "# ko.names" p ko.names puts "# ko.definition" p ko.definition puts "# ko.keggclass" p ko.keggclass puts "# ko.keggclasses" p ko.keggclasses puts "# ko.pathways" p ko.pathways puts "# ko.dblinks" p ko.dblinks puts "# ko.genes" p ko.genes puts "=" * 78 end bio-1.4.3.0001/sample/demo_fastaformat.rb0000644000004100000410000000467412200110570020017 0ustar www-datawww-data# # = sample/demo_fastaformat.rb - demonstration of the FASTA format parser # # Copyright:: Copyright (C) 2001, 2002 # Naohisa Goto , # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # == Description # # Demonstration of FASTA format parser. # # == Usage # # Simply run the script. # # $ ruby demo_fastaformat.rb # # == Development information # # The code was moved from lib/bio/db/fasta.rb. # require 'bio' f_str = <sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST] MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST] MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER KTGDPLEWRRLFKKISTICRDIILIPN END f = Bio::FastaFormat.new(f_str) puts "### FastaFormat" puts "# entry" puts f.entry puts "# entry_id" p f.entry_id puts "# definition" p f.definition puts "# data" p f.data puts "# seq" p f.seq puts "# seq.type" p f.seq.type puts "# length" p f.length puts "# aaseq" p f.aaseq puts "# aaseq.type" p f.aaseq.type puts "# aaseq.composition" p f.aaseq.composition puts "# aalen" p f.aalen puts n_str = <CRA3575282.F 24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 32 29 29 25 END n = Bio::FastaNumericFormat.new(n_str) puts "### FastaNumericFormat" puts "# entry" puts n.entry puts "# entry_id" p n.entry_id puts "# definition" p n.definition puts "# data" p n.data puts "# length" p n.length #puts "# percent to ratio by yield" #n.each do |x| # p x/100.0 #end puts "# first three" p n[0] p n[1] p n[2] puts "# last one" p n[-1] bio-1.4.3.0001/sample/demo_sirna.rb0000644000004100000410000000257312200110570016620 0ustar www-datawww-data# # = sample/demo_sirna.rb - demonstration of Bio::SiRNA # # Copyright:: Copyright (C) 2004, 2005 # Itoshi NIKAIDO # License:: The Ruby License # # # == Description # # Demonstration of Bio::SiRNA, class for designing small inhibitory RNAs. # # == Usage # # Specify files containing nucleotide sequences. # # $ ruby demo_sirna.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_sirna.rb test/data/fasta/example1.txt # # == Development information # # The code was moved from lib/bio/util/sirna.rb, and modified for reading # normal sequence files. # require 'bio' if ARGV.size <= 0 then puts "Demonstration of designing SiRNA for each sequence." puts "Usage: #{$0} files..." exit(0) end ARGV.each do |filename| Bio::FlatFile.foreach(filename) do |entry| puts "##entry.entry_id: #{entry.entry_id}" puts "##entry.definition: #{entry.definition}" seq = entry.naseq puts "##entry.naseq.length: #{seq.length}" sirna = Bio::SiRNA.new(seq) pairs = sirna.design # or .design('uitei') or .uitei or .reynolds pairs.each do |pair| puts pair.report shrna = Bio::SiRNA::ShRNA.new(pair) shrna.design # or .design('BLOCK-iT') or .block_it puts shrna.report puts "# as DNA" puts shrna.top_strand.dna puts shrna.bottom_strand.dna end puts "=" * 78 end #Bio::FlatFile.foreach end #ARGV.each bio-1.4.3.0001/sample/demo_genbank.rb0000644000004100000410000000475512200110570017115 0ustar www-datawww-data# # = sample/demo_genbank.rb - demonstration of Bio::GenBank # # Copyright:: Copyright (C) 2000-2005 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::GenBank, the parser class for the GenBank entry. # # == Usage # # Usage 1: Without arguments, showing demo with a GenBank entry. # Internet connection is needed. # # $ ruby demo_genbank.rb # # Usage 2: IDs or accession numbers are given as the arguments. # Internet connection is needed. # # $ ruby demo_genbank.rb X94434 NM_000669 # # Usage 3: When the first argument is "--files", "-files", "--file", or # "-file", filenames are given as the arguments. # # $ ruby demo_genbank.rb --files file1.gbk file2.gbk ... # # == Development information # # The code was moved from lib/bio/db/genbank/genbank.rb, and modified # as below: # * To get sequences from the NCBI web service. # * By default, arguments are sequence IDs (accession numbers). # * New option "--files" (or "-files", "--file", or "-file") to # read sequences from file(s). # require 'bio' begin require 'pp' alias p pp rescue LoadError end def demo_genbank(gb) puts "### GenBank" puts "## LOCUS" puts "# GenBank.locus" p gb.locus puts "# GenBank.entry_id" p gb.entry_id puts "# GenBank.nalen" p gb.nalen puts "# GenBank.strand" p gb.strand puts "# GenBank.natype" p gb.natype puts "# GenBank.circular" p gb.circular puts "# GenBank.division" p gb.division puts "# GenBank.date" p gb.date puts "## DEFINITION" p gb.definition puts "## ACCESSION" p gb.accession puts "## VERSION" p gb.versions p gb.version p gb.gi puts "## NID" p gb.nid puts "## KEYWORDS" p gb.keywords puts "## SEGMENT" p gb.segment puts "## SOURCE" p gb.source p gb.common_name p gb.vernacular_name p gb.organism p gb.taxonomy puts "## REFERENCE" p gb.references puts "## COMMENT" p gb.comment puts "## FEATURES" p gb.features puts "## BASE COUNT" p gb.basecount p gb.basecount('a') p gb.basecount('A') puts "## ORIGIN" p gb.origin p gb.naseq puts "=" * 78 end case ARGV[0] when '-file', '--file', '-files', '--files' ARGV.shift ARGV.each do |filename| Bio::FlatFile.foreach(filename) do |gb| demo_genbank(gb) end end else efetch = Bio::NCBI::REST::EFetch.new argv = ARGV.empty? ? [ 'X94434' ] : ARGV argv.each do |id_or_accession| raw = efetch.sequence(id_or_accession) gb = Bio::GenBank.new(raw) demo_genbank(gb) end end bio-1.4.3.0001/sample/gb2tab.rb0000755000004100000410000001323612200110570015642 0ustar www-datawww-data#!/usr/bin/env ruby # # gb2tab.rb - convert GenBank into tab delimited data for MySQL # # Usage: # # % gb2tab.rb gb*.seq # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: gb2tab.rb,v 0.11 2002/04/22 09:10:10 k Exp $ # require 'bio' $stderr.puts Time.now ARGV.each do |gbkfile| gbk = open("#{gbkfile}") ent = open("#{gbkfile}.ent.tab", "w") ft = open("#{gbkfile}.ft.tab", "w") ref = open("#{gbkfile}.ref.tab", "w") seq = open("#{gbkfile}.seq.tab", "w") while entry = gbk.gets(Bio::GenBank::DELIMITER) gb = Bio::GenBank.new(entry) ### MAIN BODY ary = [ gb.entry_id, gb.nalen, gb.strand, gb.natype, gb.circular, gb.division, gb.date, gb.definition, gb.accession, gb.versions.inspect, gb.keywords.inspect, gb.segment.inspect, gb.common_name, gb.organism, gb.taxonomy, gb.comment, gb.basecount.inspect, gb.origin, ] ent.puts ary.join("\t") ### FEATURES num = 0 gb.features.each do |f| num += 1 span_min, span_max = f.locations.span if f.qualifiers.empty? ary = [ gb.entry_id, num, f.feature, f.position, span_min, span_max, '', '', ] ft.puts ary.join("\t") else f.each do |q| ary = [ gb.entry_id, num, f.feature, f.position, span_min, span_max, q.qualifier, q.value, ] ft.puts ary.join("\t") end end end ### REFERENCE num = 0 gb.references.each do |r| num += 1 ary = [ gb.entry_id, num, r.authors.inspect, r.title, r.journal, r.medline, r.pubmed, ] ref.puts ary.join("\t") end ### SEQUENCE maxlen = 16 * 10 ** 6 num = 0 0.step(gb.nalen, maxlen) do |i| num += 1 ary = [ gb.entry_id, num, gb.naseq[i, maxlen] ] seq.puts ary.join("\t") end end gbk.close ent.close ft.close ref.close seq.close end $stderr.puts Time.now =begin Example usage in zsh: % gb2tab.rb *.seq % for i in *.seq > do > base=`basename $i .seq` > ruby -pe "gsub(/%HOGE%/,'$base')" gb2tab.sql | mysql > done gb2tab.sql: CREATE DATABASE IF NOT EXISTS genbank; USE genbank; CREATE TABLE IF NOT EXISTS %HOGE% ( id varchar(16) NOT NULL PRIMARY KEY, nalen integer, strand varchar(5), natype varchar(5), circular varchar(10), division varchar(5), date varchar(12), definition varchar(255), accession varchar(30), versions varchar(30), keywords varchar(255), segment varchar(255), source varchar(255), organism varchar(255), taxonomy varchar(255), comment text, basecount varchar(255), origin varchar(255), KEY (nalen), KEY (division), KEY (accession), KEY (organism), KEY (taxonomy) ); LOAD DATA LOCAL INFILE '%HOGE%.seq.ent.tab' INTO TABLE %HOGE%; CREATE TABLE IF NOT EXISTS %HOGE%ft ( id varchar(16) NOT NULL, num integer, feature varchar(30), position text, span_min integer, span_max integer, qualifier varchar(30), value text, KEY (id), KEY (num), KEY (feature), KEY (span_min), KEY (span_max), KEY (qualifier) ); LOAD DATA LOCAL INFILE '%HOGE%.seq.ft.tab' INTO TABLE %HOGE%ft; CREATE TABLE IF NOT EXISTS %HOGE%ref ( id varchar(16) NOT NULL, num integer, authors text, title text, journal text, medline varchar(255), pubmed varchar(255), KEY (id), KEY (medline), KEY (pubmed) ); LOAD DATA LOCAL INFILE '%HOGE%.seq.ref.tab' INTO TABLE %HOGE%ref; CREATE TABLE IF NOT EXISTS %HOGE%seq ( id varchar(16) NOT NULL, num integer, naseq mediumtext, KEY (id) ); LOAD DATA LOCAL INFILE '%HOGE%.seq.seq.tab' INTO TABLE %HOGE%seq; gbmerge.sql sample: CREATE TABLE IF NOT EXISTS ent ( id varchar(16) NOT NULL PRIMARY KEY, nalen integer, strand varchar(5), natype varchar(5), circular varchar(10), division varchar(5), date varchar(12), definition varchar(255), accession varchar(30), versions varchar(30), keywords varchar(255), segment varchar(255), source varchar(255), organism varchar(255), taxonomy varchar(255), comment text, basecount varchar(255), origin varchar(255), KEY (nalen), KEY (division), KEY (accession), KEY (organism), KEY (taxonomy) ) TYPE=MERGE UNION=( gbbct1, gbbct2, ..., # list up all tables by yourself gbvrt ); CREATE TABLE IF NOT EXISTS ft ( id varchar(16) NOT NULL, num integer, feature varchar(30), position text, span_min integer, span_max integer, qualifier varchar(30), value text, KEY (id), KEY (num), KEY (feature), KEY (span_min), KEY (span_max), KEY (qualifier) ) TYPE=MERGE UNION=( gbbct1ft, gbbct2ft, ..., # list up all ft tables by yourself gbvrtft ); CREATE TABLE IF NOT EXISTS ref ( id varchar(16) NOT NULL, num integer, authors text, title text, journal text, medline varchar(255), pubmed varchar(255), KEY (id), KEY (medline), KEY (pubmed) ) TYPE=MERGE UNION=( gbbct1ref, gbbct2ref, ..., # list up all ref tables by yourself gbvrtref ); CREATE TABLE IF NOT EXISTS seq ( id varchar(16) NOT NULL, num integer, naseq mediumtext, KEY (id) ) TYPE=MERGE UNION=( gbbct1seq, gbbct2seq, ..., # list up all seq tables by yourself gbvrtseq ); =end bio-1.4.3.0001/sample/color_scheme_na.rb0000644000004100000410000000401112200110570017605 0ustar www-datawww-data#!/usr/bin/env ruby # # color_scheme_na.rb - A Bio::ColorScheme demo script for Nucleic Acids # sequences. # # Usage: # # % ruby color_scheme_na.rb > cs-seq-fna.html # # % cat seq.fna # >DNA_sequence # acgtgtgtcatgctagtcgatcgtactagtcgtagctagtca # % ruby color_scheme_na.rb seq.fna > colored-seq-fna.html # # # Copyright:: Copyright (C) 2005 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id: color_scheme_na.rb,v 1.3 2007/04/05 23:35:42 trevor Exp $ # require 'bio' # returns folded sequence with
    . def br(i, width = 80) return "" if i % width == 0 "" end # returns sequence html doc def display(seq, cs) html = '

    ' postfix = '' i = 0 seq.each_byte do |c| color = cs[c.chr] prefix = %Q() html += prefix + c.chr + postfix html += br(i += 1) end html + '

    ' end # returns scheme wise html doc def display_scheme(scheme, naseq, aaseq) html = '' cs = eval("Bio::ColorScheme::#{scheme}") [naseq, aaseq].each do |seq| html += display(seq, cs) end return ['
    ', "

    #{cs}

    ", html, '
    '] end if fna = ARGV.shift naseq = Bio::FastaFormat.new(File.open(fna, 'r').read).naseq aaseq = naseq.translate else naseq = Bio::Sequence::NA.new('acgtu' * 20).randomize aaseq = naseq.translate end title = 'Bio::ColorScheme for DNA sequences' doc = ['', '
    ', '', title, '', '
    ', '', '

    ', title, '

    '] doc << ['
    ', '

    ', 'Simple colors', '

    '] ['Nucleotide'].each do |scheme| doc << display_scheme(scheme, naseq, "") end doc << ['
    '] ['Zappo', 'Taylor' ].each do |scheme| doc << display_scheme(scheme, "", aaseq) end doc << [''] doc << ['
    ', '

    ', 'Score colors', '

    '] ['Buried', 'Helix', 'Hydropathy', 'Strand', 'Turn'].each do |score| doc << display_scheme(score, "", aaseq) end doc << ['
    '] puts doc + ['',''] bio-1.4.3.0001/sample/demo_pathway.rb0000644000004100000410000001140212200110570017150 0ustar www-datawww-data# # = sample/demo_pathway.rb - demonstration of Bio::Pathway # # Copyright: Copyright (C) 2001 # Toshiaki Katayama , # Shuichi Kawashima # License:: The Ruby License # # # == Description # # Demonstration of Bio::Pathway, an implementation of the graph data structure # and graph algorithms. # # == Usage # # Simply run this script. # # $ ruby demo_pathway.rb # # == Development information # # The code was moved from lib/bio/pathway.rb. # require 'bio' #if __FILE__ == $0 puts "--- Test === method true/false" r1 = Bio::Relation.new('a', 'b', 1) r2 = Bio::Relation.new('b', 'a', 1) r3 = Bio::Relation.new('b', 'a', 2) r4 = Bio::Relation.new('a', 'b', 1) p r1 === r2 p r1 === r3 p r1 === r4 p [ r1, r2, r3, r4 ].uniq p r1.eql?(r2) p r3.eql?(r2) # Sample Graph : # +----------------+ # | | # v | # +---------(q)-->(t)------->(y)<----(r) # | | | ^ | # v | v | | # +--(s)<--+ | (x)<---+ (u)<-----+ # | | | | | # v | | v | # (v)----->(w)<---+ (z)----+ data = [ [ 'q', 's', 1, ], [ 'q', 't', 1, ], [ 'q', 'w', 1, ], [ 'r', 'u', 1, ], [ 'r', 'y', 1, ], [ 's', 'v', 1, ], [ 't', 'x', 1, ], [ 't', 'y', 1, ], [ 'u', 'y', 1, ], [ 'v', 'w', 1, ], [ 'w', 's', 1, ], [ 'x', 'z', 1, ], [ 'y', 'q', 1, ], [ 'z', 'x', 1, ], ] ary = [] puts "--- List of relations" data.each do |x| ary << Bio::Relation.new(*x) end p ary puts "--- Generate graph from list of relations" graph = Bio::Pathway.new(ary) p graph puts "--- Test to_matrix method" p graph.to_matrix puts "--- Test dump_matrix method" puts graph.dump_matrix(0) puts "--- Test dump_list method" puts graph.dump_list puts "--- Labeling some nodes" hash = { 'q' => "L1", 's' => "L2", 'v' => "L3", 'w' => "L4" } graph.label = hash p graph puts "--- Extract subgraph by label" p graph.subgraph puts "--- Extract subgraph by list" p graph.subgraph(['q', 't', 'x', 'y', 'z']) puts "--- Test cliquishness of the node 'q'" p graph.cliquishness('q') puts "--- Test cliquishness of the node 'q' (undirected)" u_graph = Bio::Pathway.new(ary, 'undirected') p u_graph.cliquishness('q') puts "--- Test small_world histgram" p graph.small_world puts "--- Test breadth_first_search method" distance, predecessor = graph.breadth_first_search('q') p distance p predecessor puts "--- Test bfs_shortest_path method" step, path = graph.bfs_shortest_path('y', 'w') p step p path puts "--- Test depth_first_search method" timestamp, tree, back, cross, forward = graph.depth_first_search p timestamp print "tree edges : "; p tree print "back edges : "; p back print "cross edges : "; p cross print "forward edges : "; p forward puts "--- Test dfs_topological_sort method" # # Professor Bumstead topologically sorts his clothing when getting dressed. # # "undershorts" "socks" # | | | # v | v "watch" # "pants" --+-------> "shoes" # | # v # "belt" <----- "shirt" ----> "tie" ----> "jacket" # | ^ # `---------------------------------------' # dag = Bio::Pathway.new([ Bio::Relation.new("undeershorts", "pants", true), Bio::Relation.new("undeershorts", "shoes", true), Bio::Relation.new("socks", "shoes", true), Bio::Relation.new("watch", "watch", true), Bio::Relation.new("pants", "belt", true), Bio::Relation.new("pants", "shoes", true), Bio::Relation.new("shirt", "belt", true), Bio::Relation.new("shirt", "tie", true), Bio::Relation.new("tie", "jacket", true), Bio::Relation.new("belt", "jacket", true), ]) p dag.dfs_topological_sort puts "--- Test dijkstra method" distance, predecessor = graph.dijkstra('q') p distance p predecessor puts "--- Test dijkstra method by weighted graph" # # 'a' --> 'b' # | 1 | 3 # |5 v # `----> 'c' # r1 = Bio::Relation.new('a', 'b', 1) r2 = Bio::Relation.new('a', 'c', 5) r3 = Bio::Relation.new('b', 'c', 3) w_graph = Bio::Pathway.new([r1, r2, r3]) p w_graph p w_graph.dijkstra('a') puts "--- Test bellman_ford method by negative weighted graph" # # ,-- 'a' --> 'b' # | | 1 | 3 # | |5 v # | `----> 'c' # | ^ # |2 | -5 # `--> 'd' ----' # r4 = Bio::Relation.new('a', 'd', 2) r5 = Bio::Relation.new('d', 'c', -5) w_graph.append(r4) w_graph.append(r5) p w_graph.bellman_ford('a') p graph.bellman_ford('q') #end bio-1.4.3.0001/sample/ssearch2tab.rb0000755000004100000410000000436312200110570016703 0ustar www-datawww-data#!/usr/bin/env ruby # # ssearch2tab.rb - convert SSEARCH output into tab delimited data for MySQL # # Usage: # # % ssearch2tab.rb SSEARCH-output-file[s] > ssearch_results.tab # % mysql < ssearch_results.sql (use sample at the end of this file) # # Format accepted: # # % ssearch3[3][_t] -Q -H -m 6 query.f target.f > SSEARCH-output-file # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: ssearch2tab.rb,v 0.1 2001/06/21 08:25:58 katayama Exp $ # while gets # query if /^\S+: (\d+) aa$/ q_len = $1 end # each hit if /^>>([^>]\S+).*\((\d+) aa\)$/ target = $1 t_len = $2 # d = dummy variable d, d, d, swopt, d, zscore, d, bits, d, evalue = gets.split(/\s+/) d, d, sw, ident, d, ugident, d, d, overlap, d, d, lap = gets.split(/\s+/) # query-hit pair print "#{$FILENAME}\t#{q_len}\t#{target}\t#{t_len}" # pick up values ary = [ swopt, zscore, bits, evalue, sw, ident, ugident, overlap, lap ] # print values for i in ary i.tr!('^0-9.:e\-','') print "\t#{i}" end print "\n" end end =begin MySQL ssearch_results.sql sample CREATE DATABASE IF NOT EXISTS db_name; CREATE TABLE IF NOT EXISTS db_name.table_name ( query varchar(25) not NULL, q_len integer unsigned default 0, target varchar(25) not NULL, t_len integer unsigned default 0, swopt integer unsigned default 0, zscore float default 0.0, bits float default 0.0, evalue float default 0.0, sw integer unsigned default 0, ident float default 0.0, ugident float default 0.0, overlap integer unsigned default 0, lap_at varchar(25) default NULL ); LOAD DATA LOCAL INFILE 'ssearch_results.tab' INTO TABLE db_name.table_name; =end bio-1.4.3.0001/sample/demo_bl2seq_report.rb0000644000004100000410000001710612200110570020265 0ustar www-datawww-data# # = sample/demo_bl2seq_report.rb - demo of bl2seq (BLAST 2 sequences) parser # # Copyright:: Copyright (C) 2005 Naohisa Goto # License:: The Ruby License # # == Description # # Demonstration of Bio::Blast::Bl2seq::Report, bl2seq (BLAST 2 sequences) # parser class. # # == Usage # # Run this script with specifying filename(s) containing bl2seq result(s). # # $ ruby demo_bl2seq_report.rb files... # # Example usage using test data: # # $ ruby -I lib sample/demo_bl2seq_report.rb test/data/bl2seq/cd8a_cd8b_blastp.bl2seq # # == Development information # # The code was moved from lib/bio/appl/bl2seq/report.rb # require 'bio' if ARGV.empty? then puts "Demonstration of bl2seq (BLAST 2 sequences) parser." puts "Usage: #{$0} files..." exit(0) end Bio::FlatFile.open(Bio::Blast::Bl2seq::Report, ARGF) do |ff| ff.each do |rep| print "# === Bio::Blast::Bl2seq::Report\n" puts #@#print " rep.program #=> "; p rep.program #@#print " rep.version #=> "; p rep.version #@#print " rep.reference #=> "; p rep.reference #@#print " rep.db #=> "; p rep.db #print " rep.query_id #=> "; p rep.query_id print " rep.query_def #=> "; p rep.query_def print " rep.query_len #=> "; p rep.query_len #puts #@#print " rep.version_number #=> "; p rep.version_number #@#print " rep.version_date #=> "; p rep.version_date puts print "# === Parameters\n" #puts #print " rep.parameters #=> "; p rep.parameters puts print " rep.matrix #=> "; p rep.matrix print " rep.expect #=> "; p rep.expect #print " rep.inclusion #=> "; p rep.inclusion print " rep.sc_match #=> "; p rep.sc_match print " rep.sc_mismatch #=> "; p rep.sc_mismatch print " rep.gap_open #=> "; p rep.gap_open print " rep.gap_extend #=> "; p rep.gap_extend #print " rep.filter #=> "; p rep.filter #@#print " rep.pattern #=> "; p rep.pattern #print " rep.entrez_query #=> "; p rep.entrez_query #puts #@#print " rep.pattern_positions #=> "; p rep.pattern_positions puts print "# === Statistics (last iteration's)\n" #puts #print " rep.statistics #=> "; p rep.statistics puts print " rep.db_num #=> "; p rep.db_num print " rep.db_len #=> "; p rep.db_len #print " rep.hsp_len #=> "; p rep.hsp_len print " rep.eff_space #=> "; p rep.eff_space print " rep.kappa #=> "; p rep.kappa print " rep.lambda #=> "; p rep.lambda print " rep.entropy #=> "; p rep.entropy puts print " rep.num_hits #=> "; p rep.num_hits print " rep.gapped_kappa #=> "; p rep.gapped_kappa print " rep.gapped_lambda #=> "; p rep.gapped_lambda print " rep.gapped_entropy #=> "; p rep.gapped_entropy print " rep.posted_date #=> "; p rep.posted_date puts #@#print "# === Message (last iteration's)\n" #@#puts #@#print " rep.message #=> "; p rep.message #puts #@#print " rep.converged? #=> "; p rep.converged? #@#puts print "# === Iterations\n" puts print " rep.itrerations.each do |itr|\n" puts rep.iterations.each do |itr| print "# --- Bio::Blast::Bl2seq::Report::Iteration\n" puts print " itr.num #=> "; p itr.num #print " itr.statistics #=> "; p itr.statistics #@#print " itr.message #=> "; p itr.message print " itr.hits.size #=> "; p itr.hits.size #puts #@#print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size; #@#print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size; #@#if itr.hits_for_pattern then #@#itr.hits_for_pattern.each_with_index do |hp, hpi| #@#print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size; #@#end #@#end #@#print " itr.converged? #=> "; p itr.converged? puts print " itr.hits.each do |hit|\n" puts itr.hits.each_with_index do |hit, i| print "# --- Bio::Blast::Bl2seq::Default::Report::Hit" print " ([#{i}])\n" puts #print " hit.num #=> "; p hit.num #print " hit.hit_id #=> "; p hit.hit_id print " hit.len #=> "; p hit.len print " hit.definition #=> "; p hit.definition #print " hit.accession #=> "; p hit.accession #puts print " hit.found_again? #=> "; p hit.found_again? print " --- compatible/shortcut ---\n" #print " hit.query_id #=> "; p hit.query_id #print " hit.query_def #=> "; p hit.query_def #print " hit.query_len #=> "; p hit.query_len #print " hit.target_id #=> "; p hit.target_id print " hit.target_def #=> "; p hit.target_def print " hit.target_len #=> "; p hit.target_len print " --- first HSP's values (shortcut) ---\n" print " hit.evalue #=> "; p hit.evalue print " hit.bit_score #=> "; p hit.bit_score print " hit.identity #=> "; p hit.identity #print " hit.overlap #=> "; p hit.overlap print " hit.query_seq #=> "; p hit.query_seq print " hit.midline #=> "; p hit.midline print " hit.target_seq #=> "; p hit.target_seq print " hit.query_start #=> "; p hit.query_start print " hit.query_end #=> "; p hit.query_end print " hit.target_start #=> "; p hit.target_start print " hit.target_end #=> "; p hit.target_end print " hit.lap_at #=> "; p hit.lap_at print " --- first HSP's vaules (shortcut) ---\n" print " --- compatible/shortcut ---\n" puts print " hit.hsps.size #=> "; p hit.hsps.size if hit.hsps.size == 0 then puts " (HSP not found: please see blastall's -b and -v options)" puts else puts print " hit.hsps.each do |hsp|\n" puts hit.hsps.each_with_index do |hsp, j| print "# --- Bio::Blast::Default::Report::HSP (Bio::Blast::Bl2seq::Report::HSP)" print " ([#{j}])\n" puts #print " hsp.num #=> "; p hsp.num print " hsp.bit_score #=> "; p hsp.bit_score print " hsp.score #=> "; p hsp.score print " hsp.evalue #=> "; p hsp.evalue print " hsp.identity #=> "; p hsp.identity print " hsp.gaps #=> "; p hsp.gaps print " hsp.positive #=> "; p hsp.positive print " hsp.align_len #=> "; p hsp.align_len #print " hsp.density #=> "; p hsp.density print " hsp.query_frame #=> "; p hsp.query_frame print " hsp.query_from #=> "; p hsp.query_from print " hsp.query_to #=> "; p hsp.query_to print " hsp.hit_frame #=> "; p hsp.hit_frame print " hsp.hit_from #=> "; p hsp.hit_from print " hsp.hit_to #=> "; p hsp.hit_to #print " hsp.pattern_from#=> "; p hsp.pattern_from #print " hsp.pattern_to #=> "; p hsp.pattern_to print " hsp.qseq #=> "; p hsp.qseq print " hsp.midline #=> "; p hsp.midline print " hsp.hseq #=> "; p hsp.hseq puts print " hsp.percent_identity #=> "; p hsp.percent_identity #print " hsp.mismatch_count #=> "; p hsp.mismatch_count # print " hsp.query_strand #=> "; p hsp.query_strand print " hsp.hit_strand #=> "; p hsp.hit_strand print " hsp.percent_positive #=> "; p hsp.percent_positive print " hsp.percent_gaps #=> "; p hsp.percent_gaps puts end #each end #if hit.hsps.size == 0 end end end #ff.each end #FlatFile.open bio-1.4.3.0001/sample/demo_codontable.rb0000644000004100000410000000553212200110570017614 0ustar www-datawww-data# # = sample/demo_codontable.rb - demonstration of Bio::CodonTable # # Copyright:: Copyright (C) 2001, 2004 # Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::CodonTable. # # == Usage # # Simply run this script. # # $ ruby demo_codontable.rb # # == Development information # # The code was moved from lib/bio/data/codontable.rb. # require 'bio' #if __FILE__ == $0 begin require 'pp' alias p pp rescue LoadError end puts "### Bio::CodonTable[1]" p ct1 = Bio::CodonTable[1] puts ">>> Bio::CodonTable#table" p ct1.table puts ">>> Bio::CodonTable#each" ct1.each do |codon, aa| puts "#{codon} -- #{aa}" end puts ">>> Bio::CodonTable#definition" p ct1.definition puts ">>> Bio::CodonTable#['atg']" p ct1['atg'] puts ">>> Bio::CodonTable#revtrans('A')" p ct1.revtrans('A') puts ">>> Bio::CodonTable#start_codon?('atg')" p ct1.start_codon?('atg') puts ">>> Bio::CodonTable#start_codon?('aaa')" p ct1.start_codon?('aaa') puts ">>> Bio::CodonTable#stop_codon?('tag')" p ct1.stop_codon?('tag') puts ">>> Bio::CodonTable#stop_codon?('aaa')" p ct1.stop_codon?('aaa') puts ">>> ct1_copy = Bio::CodonTable.copy(1)" p ct1_copy = Bio::CodonTable.copy(1) puts ">>> ct1_copy['tga'] = 'U'" p ct1_copy['tga'] = 'U' puts " orig : #{ct1['tga']}" puts " copy : #{ct1_copy['tga']}" puts "### ct = Bio::CodonTable.new(hash, definition)" hash = { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'U', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', } my_ct = Bio::CodonTable.new(hash, "my codon table") puts ">>> ct.definition" puts my_ct.definition puts ">>> ct.definition=(str)" my_ct.definition = "selenoproteins (Eukaryote)" puts my_ct.definition puts ">>> ct['tga']" puts my_ct['tga'] puts ">>> ct.revtrans('U')" puts my_ct.revtrans('U') puts ">>> ct.stop_codon?('tga')" puts my_ct.stop_codon?('tga') puts ">>> ct.stop_codon?('tag')" puts my_ct.stop_codon?('tag') #end bio-1.4.3.0001/sample/tdiary.rb0000644000004100000410000001120712200110570015766 0ustar www-datawww-data# # tDiary : plugin/bio.rb # # Copyright (C) 2003 KATAYAMA Toshiaki # Mitsuteru C. Nakao # Itoshi NIKAIDO # Takeya KASUKAWA # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: tdiary.rb,v 1.3 2003/03/17 04:24:47 k Exp $ # =begin == What's this? This is a plugin for the (()) to create various links for biological resources from your diary. tDiary is an extensible web diary application written in Ruby. == How to install Just copy this file under the tDiary's plugin directory as bio.rb. == Usage --- pubmed(pmid, comment = nil) Create a link to NCBI Entrez reference database by using PubMed ID. See (()) for more information. * tDiary style * <%= pubmed 12345 %> * <%= pubmed 12345, 'hogehoge' %> * RD style * ((% pubmed 12345 %)) * ((% pubmed 12345, 'hogehoge' %)) --- biofetch(db, entry_id) Create a link to the BioFetch detabase entry retrieval system. See (()) for more information. * tDiary style * <%= biofetch 'genbank', 'AA2CG' %> * RD style * ((% biofetch 'genbank', 'AA2CG' %)) --- amigo(go_id, comment = nil) Create a link to the AmiGO GO term browser by using GO ID. See (()) for more information. * tDiary style * <%= amigo '0003673' %> * <%= amigo '0003673', 'The root of GO' %> * RD style * ((% amigo 0003673 %)) * ((% amigo 0003673, 'The root of GO' %)) --- fantom(id, comment = nil) Create a link to FANTOM database by using Clone ID. You can use RIKEN clone ID, Rearray ID, Seq ID and Accession Number. See (()) for more information. * tDiary style * <%= fantom 12345 %> * <%= fantom 12345, 'hogehoge' %> * RD style * ((% fantom 12345 %)) * ((% fantom 12345, 'hogehoge' %)) --- rtps(id, comment = nil) Create a link to FANTOM RTPS database by using Clone ID. You can use only RTPS ID. See (()) for more information. * tDiary style * <%= rtps 12345 %> * <%= rtps 12345, 'hogehoge' %> * RD style * ((% rtps 12345 %)) * ((% rtps 12345, 'hogehoge' %)) == References * Analysis of the mouse transcriptome based on functional annotation of 60,770 full-length cDNAs, The FANTOM Consortium and the RIKEN Genome Exploration Research Group Phase I & II Team, Nature 420:563-573, 2002 * Functional annotation of a full-length mouse cDNA collection, The RIKEN Genome Exploration Research Group Phase II Team and the FANTOM Consortium, Nature 409:685-690, 2001 =end def pubmed(pmid, comment = nil) pmid = pmid.to_s.strip url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi" url << "?cmd=Retrieve&db=PubMed&dopt=Abstract&list_uids=#{pmid}" if comment %Q[#{comment.to_s.strip}] else %Q[PMID:#{pmid}] end end def biofetch(db, entry_id) url = "http://biofetch.bioruby.org/" %Q[#{db}:#{entry_id}] end def amigo(go_id = '0003673', comment = nil) go_id = go_id.to_s.strip url = "http://www.godatabase.org/cgi-bin/go.cgi?query=#{go_id};view=query;action=query;search_constraint=terms" comment = "AmiGO:#{go_id}" unless comment %Q[#{comment}] end def fantom(id, comment = nil) id = id.to_s.strip url = "http://fantom2.gsc.riken.go.jp/db/link/id.cgi" url << "?id=#{id}" if comment %Q[#{comment.to_s.strip}] else %Q[FANTOM DB:#{id}] end end def rtps(id, comment = nil) id = id.to_s.strip url = "http://fantom2.gsc.riken.go.jp/RTPS/link/id.cgi" url << "?id=#{id}" if comment %Q[#{comment.to_s.strip}] else %Q[FANTOM RTPS DB:#{id}] end end bio-1.4.3.0001/sample/vs-genes.rb0000755000004100000410000001351412200110570016227 0ustar www-datawww-data#!/usr/bin/env ruby # # vs-genes.rb - homology/motif search wrapper # # FASTA/BLAST/Pfam interface for the multiple query in the FASTA format # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: vs-genes.rb,v 0.1 2001/06/21 08:26:31 katayama Exp $ # def usage(cpu, ktup, skip, resultdir, verbose) print <<-END Usage: % #{$0} -p PROG -q QUERY -t TARGET [-c #] [-k #] [-s #] [-d DIR] [-v on] options -p PROG : (fasta3|ssearch3|tfasta3|fastx3|tfastx3)[3] or (blastp|blastn|blastx|tblastn|tblastx) or (hmmpfam|hmmpfam_n) -q QUERY : query nucleotide or peptide sequences in the FASTA format -t TARGET : target DB (FASTA or BLAST2 formatdb or Pfam format) optional arguments -c num : number of CPUs (for the SMP machines, default is #{cpu}) -k num : FASTA ktup value (2 for pep, 6 for nuc, default is #{ktup}) -s num : skip query (for the resume session, default is #{skip}) -d DIR : result output directory (default is "#{resultdir}") -v on/off : verbose output of processing if on (default is "#{verbose}") END exit 1 end ### initialize def init arg = {} # default values arg['c'] = 1 # num of CPUs arg['k'] = 2 # ktup value for FASTA arg['s'] = 0 # skip query arg['d'] = "./result" # result directory arg['v'] = 'off' # verbose mode # parse options ARGV.join(' ').scan(/-(\w) (\S+)/).each do |key, val| arg[key] = val end # check program, query, target or print usage unless arg['p'] and arg['q'] and arg['t'] usage(arg['c'], arg['k'], arg['s'], arg['d'], arg['v']) end # create result output directory unless test(?d, "#{arg['d']}") Dir.mkdir("#{arg['d']}", 0755) end # print status if arg['v'] != 'off' puts "PROG : #{arg['p']}" puts " ktup : #{arg['k']}" if arg['p'] =~ /fast/ puts "QUERY : #{arg['q']}" puts " skip : #{arg['s']}" puts "TARGET : #{arg['t']}" puts "RESULT : #{arg['d']}" end return arg end ### generate command line def cmd_line(arg, orf) # program with default command line options # query -> target DB opt = { # FASTA : "-b n" for best n scores, "-d n" for best n alignment 'fasta3' => "fasta3 -Q -H -m 6", # pep -> pep or nuc -> nuc 'ssearch3' => "ssearch3 -Q -H -m 6", # pep -> pep or nuc -> nuc 'tfasta3' => "tfasta3 -Q -H -m 6", # pep -> nuc 'fastx3' => "fastx3 -Q -H -m 6", # nuc -> pep 'tfastx3' => "tfastx3 -Q -H -m 6", # pep -> nuc (with frameshifts) 'fasta33' => "fasta33 -Q -H -m 6", # pep -> pep or nuc -> nuc 'ssearch33' => "ssearch33 -Q -H -m 6", # pep -> pep or nuc -> nuc 'tfasta33' => "tfasta33 -Q -H -m 6", # pep -> nuc 'fastx33' => "fastx33 -Q -H -m 6", # nuc -> pep 'tfastx33' => "tfastx33 -Q -H -m 6", # pep -> nuc (with frameshifts) # BLAST : outputs XML 'blastp' => "blastall -m 7 -p blastp -d", # pep -> pep 'blastn' => "blastall -m 7 -p blastn -d", # nuc -> nuc 'blastx' => "blastall -m 7 -p blastx -d", # nuc -> pep 'tblastn' => "blastall -m 7 -p tblastn -d", # pep -> nuc 'tblastx' => "blastall -m 7 -p tblastx -d", # nuc -> nuc (by trans) # Pfam : "-A n" for best n alignment, "-E n" for E value cutoff etc. 'hmmpfam' => "hmmpfam", # pep -> Pfam DB 'hmmpfam_n' => "hmmpfam -n", # nuc -> Pfam DB } # arguments used in the command line cpu = arg['c'].to_i ktup = arg['k'] target = arg['t'] query = arg['d'] + "/query." + orf result = arg['d'] + "/" + orf prog = opt[arg['p']] if cpu > 1 # use multiple CPUs case arg['p'] when /(fast|ssearch)/ prog += " -T #{cpu}" prog.sub!(' ', '_t ') # rename program with "_t" when /pfam/ prog += " --cpu #{cpu}" end end # generate complete command line to execute case arg['p'] when /fast/ command = "#{prog} #{query} #{target} #{ktup} > #{result}" when /ssearch/ command = "#{prog} #{query} #{target} > #{result}" when /blast/ command = "#{prog} #{target} -i #{query} > #{result}" when /pfam/ command = "#{prog} #{target} #{query} > #{result}" end return command end ### main begin arg = init count = 0 open(arg['q'], "r") do |f| while seq = f.gets("\n>") count += 1 # skip (-s option) next unless count > arg['s'].to_i # clean up seq.sub!(/^>?[ \t]*/, '') # delete '>' and SPACEs or TABs at the head seq.sub!(/>$/, '') # delete '>' at the tail (separator) # get ORF name if seq[/^$/] # no definition (e.g. ">\nSEQ>" or ">\n>") next # -> useless for the multiple query else orf = seq[/^\S+/] # the first word in the definition line end # KEGG uses ">DB:ENTRY" format in the definition line if orf =~ /:/ db,orf = orf.split(/:/) end # add time if the same ORF name was already used if test(?f, "#{arg['d']}/#{orf}") orf = "#{orf}.#{Time.now.to_f.to_s}" end # create temporal file of the query open("#{arg['d']}/query.#{orf}", "w+") do |tmp| tmp.print(">#{seq}") end command = cmd_line(arg, orf) # print status if arg['v'] != 'off' puts "#{count} : #{orf} ..." puts " #{command}" end # execute system("#{command}") # remove temporal file File.delete("#{arg['d']}/query.#{orf}") end end end bio-1.4.3.0001/sample/genes2nuc.rb0000755000004100000410000000167312200110570016374 0ustar www-datawww-data#!/usr/bin/env ruby # # genes2nuc.rb - convert KEGG/GENES entry into FASTA format (nuc) # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: genes2nuc.rb,v 0.4 2002/06/23 20:21:56 k Exp $ # require 'bio/db/kegg/genes' require 'bio/extend' include Bio while gets(KEGG::GENES::DELIMITER) genes = KEGG::GENES.new($_) next if genes.nalen == 0 puts ">#{genes.entry_id} #{genes.definition}" puts genes.naseq.fold(60+12, 12) end bio-1.4.3.0001/sample/demo_litdb.rb0000644000004100000410000000150012200110570016567 0ustar www-datawww-data# # = sample/demo_litdb.rb - demonstration of Bio::LITDB # # Copyright:: Copyright (C) 2001 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::LITDB, LITDB literature database parser class. # # == Requirements # # Internet connection and/or OBDA (Open Bio Database Access) configuration. # # == Usage # # Simply run this script. # # $ ruby demo_litdb.rb # # == Development information # # The code was moved from lib/bio/db/litdb.rb. # require 'bio' #if __FILE__ == $0 entry = Bio::Fetch.query('litdb', '0308004') puts entry p Bio::LITDB.new(entry).reference entry = Bio::Fetch.query('litdb', '0309094') puts entry p Bio::LITDB.new(entry).reference entry = Bio::Fetch.query('litdb', '0309093') puts entry p Bio::LITDB.new(entry).reference #end bio-1.4.3.0001/sample/demo_psort_report.rb0000644000004100000410000000266012200110570020243 0ustar www-datawww-data# # = sample/demo_psort_report.rb - demonstration of Bio::PSORT::PSORT2::Report # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # # == IMPORTANT NOTE # # The sample may not work because it has not been tested for a long time. # # == Description # # Demonstration of Bio::PSORT::PSORT2::Report, parser class for the PSORT # systems output. # # == Usage # # Specify a file containing PSORT2 output. # # $ ruby demo_psort_report.rb # # == Development information # # The code was moved from lib/bio/appl/psort/report.rb. # require 'bio' # testing code #if __FILE__ == $0 while entry = $<.gets(Bio::PSORT::PSORT2::Report::DELIMITER) puts "\n ==> a = Bio::PSORT::PSORT2::Report.parser(entry)" a = Bio::PSORT::PSORT2::Report.parser(entry) puts "\n ==> a.entry_id " p a.entry_id puts "\n ==> a.scl " p a.scl puts "\n ==> a.pred " p a.pred puts "\n ==> a.prob " p a.prob p a.prob.keys.sort.map {|k| k.rjust(4)}.inspect.gsub('"','') p a.prob.keys.sort.map {|k| a.prob[k].to_s.rjust(4) }.inspect.gsub('"','') puts "\n ==> a.k " p a.k puts "\n ==> a.definition" p a.definition puts "\n ==> a.seq" p a.seq puts "\n ==> a.features.keys.sort " p a.features.keys.sort a.features.keys.sort.each do |key| puts "\n ==> a.features['#{key}'] " puts a.features[key] end end #end bio-1.4.3.0001/sample/fastagrep.rb0000755000004100000410000000344112200110570016452 0ustar www-datawww-data#!/usr/bin/env ruby # # fastagrep: Greps a FASTA file (in fact it can use any flat file input supported # by BIORUBY) and outputs sorted FASTA # # Copyright (C) 2008 KATAYAMA Toshiaki & Pjotr Prins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: fastagrep.rb,v 1.1 2008/05/19 12:22:05 pjotr Exp $ # require 'bio' include Bio usage = < reduced.fasta As the result is a FASTA stream you could pipe it for sorting: fastagrep.rb "/Arabidopsis|Drosophila/i" *.seq | fastasort.rb USAGE if ARGV.size == 0 print usage exit 1 end skip = (ARGV[0] == '-v') ARGV.shift if skip # ---- Valid regular expression - if it is not a file regex = ARGV[0] if regex=~/^\// and !File.exist?(regex) ARGV.shift else print usage exit 1 end ARGV.each do | fn | Bio::FlatFile.auto(fn).each do | item | if skip next if eval("item.definition =~ #{regex}") else next if eval("item.definition !~ #{regex}") end rec = Bio::FastaFormat.new('> '+item.definition.strip+"\n"+item.data) print rec end end bio-1.4.3.0001/sample/seqdatabase.ini0000644000004100000410000000664012200110570017130 0ustar www-datawww-dataVERSION=1.00 [embl] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=embl [embl-upd] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=embl-upd [embl_biofetch] protocol=biofetch location=http://www.ebi.ac.uk/cgi-bin/dbfetch dbname=embl [embl_biosql] protocol=biosql location=localhost dbname=biosql driver=postgres user=hack pass= biodbname=embl [embl_biocorba] protocol=bsane-corba location=sqldbsrv.ior [embl_xembl] protocol=xembl location=http://www.ebi.ac.uk/xembl/XEMBL.wsdl format=Bsml [embl_flat] protcol=flat location=/export/database/ dbname=embl [genbank_bdb] protcol=flat location=/export/database/ dbname=genbank [swissprot] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=swissprot [swissprot-upd] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=swissprot-upd [swissprot_biofetch] protocol=biofetch location=http://www.ebi.ac.uk/cgi-bin/dbfetch dbname=swall [swissprot_biosql] protocol=biosql location=db.bioruby.org dbname=biosql driver=mysql user=root pass= biodbname=sp [genbank] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=genbank [genbank-upd] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=genbank-upd [genbank_biosql] protocol=biosql location=db.bioruby.org dbname=biosql driver=mysql user=root pass= biodbname=gb [refseq] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=refseq [refseq_biosql] protocol=biosql location=db.bioruby.org dbname=biosql driver=mysql user= pass= biodbname=rs [kegg-pathway] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=pathway [kegg-genome] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=genome [kegg-genes] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=genes [kegg-vgenes] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=vgenes [aaindex] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=aaindex [blocks] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=blocks [enzyme] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=enzyme [epd] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=epd [litdb] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=litdb [omim] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=omim [pdb] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=pdb [pdbstr] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=pdbstr [pfam] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=pfam [pir] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=pir [pmd] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=pmd [prf] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=prf [prints] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=prints [prodom] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=prodom [prosdoc] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=prosdoc [prosite] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=prosite [transfac] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb dbname=transfac bio-1.4.3.0001/sample/demo_locations.rb0000644000004100000410000000563412200110570017500 0ustar www-datawww-data# # = sample/demo_locations.rb - demonstration of Bio::Locations # # Copyright:: Copyright (C) 2001, 2005 Toshiaki Katayama # 2006 Jan Aerts # 2008 Naohisa Goto # License:: The Ruby License # # == Description # # Demonstration of Bio::Locations, a parser class for the location string # used in the INSDC Feature Table. # # == Usage # # Simply run this script. # # $ ruby demo_locations.rb # # == Development information # # The code was moved from lib/bio/location.rb. # require 'bio' #if __FILE__ == $0 puts "Test new & span methods" [ '450', '500..600', 'join(500..550, 600..625)', 'complement(join(500..550, 600..625))', 'join(complement(500..550), 600..625)', '754^755', 'complement(53^54)', 'replace(4792^4793,"a")', 'replace(1905^1906,"acaaagacaccgccctacgcc")', '157..(800.806)', '(67.68)..(699.703)', '(45934.45974)..46135', '<180..(731.761)', '(88.89)..>1122', 'complement((1700.1708)..(1715.1721))', 'complement(<22..(255.275))', 'complement((64.74)..1525)', 'join((8298.8300)..10206,1..855)', 'replace((651.655)..(651.655),"")', 'one-of(898,900)..983', 'one-of(5971..6308,5971..6309)', '8050..one-of(10731,10758,10905,11242)', 'one-of(623,627,632)..one-of(628,633,637)', 'one-of(845,953,963,1078,1104)..1354', 'join(2035..2050,complement(1775..1818),13..345,414..992,1232..1253,1024..1157)', 'join(complement(1..61),complement(AP000007.1:252907..253505))', 'complement(join(71606..71829,75327..75446,76039..76203))', 'order(3..26,complement(964..987))', 'order(L44135.1:(454.445)..>538,<1..181)', '<200001..<318389', ].each do |pos| p pos # p Bio::Locations.new(pos) # p Bio::Locations.new(pos).span # p Bio::Locations.new(pos).range Bio::Locations.new(pos).each do |location| puts "class=" + location.class.to_s puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s end end puts "Test rel2abs/abs2rel method" [ '6..15', 'join(6..10,16..30)', 'complement(join(6..10,16..30))', 'join(complement(6..10),complement(16..30))', 'join(6..10,complement(16..30))', ].each do |pos| loc = Bio::Locations.new(pos) p pos # p loc (1..21).each do |x| print "absolute(#{x}) #=> ", y = loc.absolute(x), "\n" print "relative(#{y}) #=> ", y ? loc.relative(y) : y, "\n" print "absolute(#{x}, :aa) #=> ", y = loc.absolute(x, :aa), "\n" print "relative(#{y}, :aa) #=> ", y ? loc.relative(y, :aa) : y, "\n" end end pos = 'join(complement(6..10),complement(16..30))' loc = Bio::Locations.new(pos) print "pos : "; p pos print "`- loc[1] : "; p loc[1] print " `- range : "; p loc[1].range puts Bio::Location.new('5').<=>(Bio::Location.new('3')) #end bio-1.4.3.0001/sample/gbtab2mysql.rb0000755000004100000410000000776112200110570016736 0ustar www-datawww-data#!/usr/bin/env ruby # # gbtab2mysql.rb - load tab delimited GenBank data files into MySQL # # Copyright (C) 2002 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: gbtab2mysql.rb,v 1.3 2002/06/25 19:30:26 k Exp $ # require 'dbi' $schema_ent = < 20 unless dbh.tables.include?(table) create_table(dbh, table) tables.push(table) end load_tab(dbh, base, table) end merge_table(dbh, tables) end $stderr.puts Time.now bio-1.4.3.0001/sample/demo_hmmer_report.rb0000644000004100000410000000733512200110570020210 0ustar www-datawww-data# # = sample/demo_hmmer_report.rb - demonstration of Bio::HMMER::Report # # Copyright:: Copyright (C) 2002 # Hiroshi Suga , # Copyright:: Copyright (C) 2005 # Masashi Fujita # License:: The Ruby License # # # == Description # # Demonstration of Bio::HMMER::Report (HMMER output parser). # # Note that it (and Bio::HMMER::Report) supports HMMER 2.x. # HMMER 3.x is currently not supported. # # == Usage # # Specify a file containing a HMMER result. # # $ ruby demo_hmmer_report.rb file # # Example usage using test data: # # $ ruby -Ilib sample/demo_hmmer_report.rb test/data/HMMER/hmmsearch.out # $ ruby -Ilib sample/demo_blast_report.rb test/data/HMMER/hmmpfam.out # # == Development information # # The code was moved from lib/bio/appl/hmmer/report.rb. # require 'bio' #if __FILE__ == $0 =begin # # for multiple reports in a single output file (hmmpfam) # Bio::HMMER.reports(ARGF.read) do |report| report.hits.each do |hit| hit.hsps.each do |hsp| end end end =end begin require 'pp' alias p pp rescue LoadError end rep = Bio::HMMER::Report.new(ARGF.read) p rep indent = 18 puts "### hmmer result" print "name : ".rjust(indent) p rep.program['name'] print "version : ".rjust(indent) p rep.program['version'] print "copyright : ".rjust(indent) p rep.program['copyright'] print "license : ".rjust(indent) p rep.program['license'] print "HMM file : ".rjust(indent) p rep.parameter['HMM file'] print "Sequence file : ".rjust(indent) p rep.parameter['Sequence file'] print "Query sequence : ".rjust(indent) p rep.query_info['Query sequence'] print "Accession : ".rjust(indent) p rep.query_info['Accession'] print "Description : ".rjust(indent) p rep.query_info['Description'] rep.each do |hit| puts "## each hit" print "accession : ".rjust(indent) p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ] print "description : ".rjust(indent) p [ hit.description, hit.definition ] print "target_def : ".rjust(indent) p hit.target_def print "score : ".rjust(indent) p [ hit.score, hit.bit_score ] print "evalue : ".rjust(indent) p hit.evalue print "num : ".rjust(indent) p hit.num hit.each do |hsp| puts "## each hsp" print "accession : ".rjust(indent) p [ hsp.accession, hsp.target_id ] print "domain : ".rjust(indent) p hsp.domain print "seq_f : ".rjust(indent) p hsp.seq_f print "seq_t : ".rjust(indent) p hsp.seq_t print "seq_ft : ".rjust(indent) p hsp.seq_ft print "hmm_f : ".rjust(indent) p hsp.hmm_f print "hmm_t : ".rjust(indent) p hsp.hmm_t print "hmm_ft : ".rjust(indent) p hsp.hmm_ft print "score : ".rjust(indent) p [ hsp.score, hsp.bit_score ] print "evalue : ".rjust(indent) p hsp.evalue print "midline : ".rjust(indent) p hsp.midline print "hmmseq : ".rjust(indent) p hsp.hmmseq print "flatseq : ".rjust(indent) p hsp.flatseq print "query_frame : ".rjust(indent) p hsp.query_frame print "target_frame : ".rjust(indent) p hsp.target_frame print "query_seq : ".rjust(indent) p hsp.query_seq # hmmseq, flatseq print "target_seq : ".rjust(indent) p hsp.target_seq # flatseq, hmmseq print "target_from : ".rjust(indent) p hsp.target_from # seq_f, hmm_f print "target_to : ".rjust(indent) p hsp.target_to # seq_t, hmm_t print "query_from : ".rjust(indent) p hsp.query_from # hmm_f, seq_f print "query_to : ".rjust(indent) p hsp.query_to # hmm_t, seq_t end end #end bio-1.4.3.0001/sample/enzymes.rb0000755000004100000410000000400512200110570016165 0ustar www-datawww-data#!/usr/bin/env ruby # # enzymes.rb - cut input file using enzyme on command line # # Copyright (C) 2006 Pjotr Prins and Trevor Wennblom # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: enzymes.rb,v 1.1 2006/03/03 15:31:06 pjotr Exp $ # require 'bio/io/flatfile' require 'bio/util/restriction_enzyme' include Bio usage = < '+entry.definition+"\n" print frag.primary,"\n" end end end bio-1.4.3.0001/sample/demo_aaindex.rb0000644000004100000410000000251612200110570017112 0ustar www-datawww-data# # = sample/demo_aaindex.rb - demonstration of Bio::AAindex1 and AAindex2 # # Copyright:: Copyright (C) 2001 # KAWASHIMA Shuichi # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::AAindex1 and Bio::AAindex2. # # == Requirements # # Internet connection and/or OBDA (Open Bio Database Access) configuration. # # == Usage # # Simply run this script. # # $ ruby demo_aaindex.rb # # == Development information # # The code was moved from lib/bio/db/aaindex.rb. # require 'bio' #if __FILE__ == $0 puts "### AAindex1 (PRAM900102)" aax1 = Bio::AAindex1.new(Bio::Fetch.query('aaindex', 'PRAM900102', 'raw')) p aax1.entry_id p aax1.definition p aax1.dblinks p aax1.author p aax1.title p aax1.journal p aax1.comment p aax1.correlation_coefficient p aax1.index p aax1 puts "### AAindex2 (DAYM780301)" aax2 = Bio::AAindex2.new(Bio::Fetch.query('aaindex', 'DAYM780301', 'raw')) p aax2.entry_id p aax2.definition p aax2.dblinks p aax2.author p aax2.title p aax2.journal p aax1.comment p aax2.rows p aax2.cols p aax2.matrix p aax2.matrix[2,2] p aax2.matrix[2,3] p aax2.matrix[4,3] p aax2.matrix.determinant p aax2.matrix.rank p aax2.matrix.transpose p aax2 #end bio-1.4.3.0001/sample/demo_ddbjxml.rb0000644000004100000410000001257412200110570017132 0ustar www-datawww-data# # = sample/demo_ddbjxml.rb - demonstration of Bio::DDBJ::XML, DDBJ SOAP access # # Copyright:: Copyright (C) 2003, 2004 # Toshiaki Katayama # License:: The Ruby License # # # # == Description # # Demonstration of Bio::DDBJ::XML, DDBJ SOAP server access class. # # == Requirements # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_ddbjxml.rb # # == Notes # # It can not be run with Ruby 1.9 because SOAP4R (SOAP support for Ruby) # currently does not support Ruby 1.9. # # == Development information # # The code was moved from lib/bio/io/ddbjxml.rb. # require 'bio' #if __FILE__ == $0 begin require 'pp' alias p pp rescue LoadError end puts ">>> Bio::DDBJ::XML::Blast" serv = Bio::DDBJ::XML::Blast.new # serv.log = STDERR query = "MSSRIARALALVVTLLHLTRLALSTCPAACHCPLEAPKCAPGVGLVRDGCGCCKVCAKQL" puts "### searchSimple('blastp', 'SWISS', query)" puts serv.searchSimple('blastp', 'SWISS', query) puts "### searchParam('tblastn', 'ddbjvrl', query, '-m 8')" puts serv.searchParam('tblastn', 'ddbjvrl', query, '-m 8') puts ">>> Bio::DDBJ::XML::ClustalW" serv = Bio::DDBJ::XML::ClustalW.new query = < RABSTOUT rabbit Guinness receptor LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC > MUSNOSE mouse nose drying factor mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfdv > HSHEAVEN human Guinness receptor repeat mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv END puts "### analyzeSimple(query)" puts serv.analyzeSimple(query) puts "### analyzeParam(query, '-align -matrix=blosum')" puts serv.analyzeParam(query, '-align -matrix=blosum') puts ">>> Bio::DDBJ::XML::DDBJ" serv = Bio::DDBJ::XML::DDBJ.new puts "### getFFEntry('AB000050')" puts serv.getFFEntry('AB000050') puts "### getXMLEntry('AB000050')" puts serv.getXMLEntry('AB000050') puts "### getFeatureInfo('AB000050', 'cds')" puts serv.getFeatureInfo('AB000050', 'cds') puts "### getAllFeatures('AB000050')" puts serv.getAllFeatures('AB000050') puts "### getRelatedFeatures('AL121903', '59000', '64000')" puts serv.getRelatedFeatures('AL121903', '59000', '64000') puts "### getRelatedFeaturesSeq('AL121903', '59000', '64000')" puts serv.getRelatedFeaturesSeq('AL121903', '59000', '64000') puts ">>> Bio::DDBJ::XML::Fasta" serv = Bio::DDBJ::XML::Fasta.new query = ">Test\nMSDGAVQPDG GQPAVRNERA TGSGNGSGGG GGGGSGGVGI" puts "### searchSimple('fasta34', 'PDB', query)" puts serv.searchSimple('fasta34', 'PDB', query) query = ">Test\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" puts "### searchParam('fastx34_t', 'PDB', query, '-n')" puts serv.searchParam('fastx34_t', 'PDB', query, '-n') puts ">>> Bio::DDBJ::XML::GetEntry" serv = Bio::DDBJ::XML::GetEntry.new puts "### getDDBJEntry('AB000050')" puts serv.getDDBJEntry('AB000050') puts "### getPDBEntry('1AAR')" puts serv. getPDBEntry('1AAR') puts ">>> Bio::DDBJ::XML::Gib" serv = Bio::DDBJ::XML::Gib.new puts "### getOrganismList" puts serv.getOrganismList puts "### getChIDList" puts serv.getChIDList puts "### getOrganismNameFromChid('Sent_CT18:')" puts serv.getOrganismNameFromChid('Sent_CT18:') puts "### getChIDFromOrganismName('Aquifex aeolicus VF5')" puts serv.getChIDFromOrganismName('Aquifex aeolicus VF5') puts "### getAccession('Ecol_K12_MG1655:')" puts serv.getAccession('Ecol_K12_MG1655:') puts "### getPieceNumber('Mgen_G37:')" puts serv.getPieceNumber('Mgen_G37:') puts "### getDivision('Mgen_G37:')" puts serv.getDivision('Mgen_G37:') puts "### getType('Mgen_G37:')" puts serv.getType('Mgen_G37:') puts "### getCDS('Aaeo_VF5:ece1')" puts serv.getCDS('Aaeo_VF5:ece1') puts "### getFlatFile('Nost_PCC7120:pCC7120zeta')" puts serv.getFlatFile('Nost_PCC7120:pCC7120zeta') puts "### getFastaFile('Nost_PCC7120:pCC7120zeta')" puts serv.getFastaFile('Nost_PCC7120:pCC7120zeta', 'cdsaa') puts ">>> Bio::DDBJ::XML::Gtop" serv = Bio::DDBJ::XML::Gtop.new puts "### getOrganismList" puts serv.getOrganismList puts "### getMasterInfo" puts serv.getMasterInfo('thrA', 'ecol0') # puts ">>> Bio::DDBJ::XML::PML" # serv = Bio::DDBJ::XML::PML.new # # puts "### getVariation('1')" # puts serv.getVariation('1') puts ">>> Bio::DDBJ::XML::SRS" serv = Bio::DDBJ::XML::SRS.new puts "### searchSimple('[pathway-des:sugar]')" puts serv.searchSimple('[pathway-des:sugar]') puts "### searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')" puts serv.searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta') puts ">>> Bio::DDBJ::XML::TxSearch" serv = Bio::DDBJ::XML::TxSearch.new puts "### searchSimple('*coli')" puts serv.searchSimple('*coli') puts "### searchSimple('*tardigrada*')" puts serv.searchSimple('*tardigrada*') puts "### getTxId('Escherichia coli')" puts serv.getTxId('Escherichia coli') puts "### getTxName('562')" puts serv.getTxName('562') query = "Campylobacter coli\nEscherichia coli" rank = "family\ngenus" puts "### searchLineage(query, rank, 'Bacteria')" puts serv.searchLineage(query, rank, 'Bacteria') #end bio-1.4.3.0001/sample/demo_kegg_taxonomy.rb0000644000004100000410000000371612200110570020357 0ustar www-datawww-data# # = sample/demo_kegg_taxonomy.rb - demonstration of Bio::KEGG::Taxonomy # # Copyright:: Copyright (C) 2007 Toshiaki Katayama # License:: The Ruby License # # # == Description # # IMPORTANT NOTE: currently, this sample does not work! # # Demonstration of Bio::KEGG::Taxonomy. # # == Usage # # Specify a file containing KEGG Taxonomy data. # # $ ruby demo_kegg_taxonomy.rb file # # Optionally, when a file containing organisms list (1 line per 1 organism) # is specified after the file, only the specified organisms are shown. # # $ ruby demo_kegg_taxonomy.rb kegg_taxonomy_file org_list_file # # == Example of running this script # # Download test data. # # $ wget ftp://ftp.genome.jp/pub/kegg/genes/taxonomy # # The downloaded filename is "taxonomy". # # Run this script. # # $ ruby -Ilib sample/demo_kegg_taxonomy.rb taxonomy # # == Development information # # The code was moved from lib/bio/db/kegg/taxonomy.rb. # require 'bio' #if __FILE__ == $0 # Usage: # % wget ftp://ftp.genome.jp/pub/kegg/genes/taxonomy # % ruby taxonomy.rb taxonomy | less -S taxonomy = ARGV.shift org_list = ARGV.shift || nil if org_list orgs = File.readlines(org_list).map{|x| x.strip} else orgs = nil end tree = Bio::KEGG::Taxonomy.new(taxonomy, orgs) puts ">>> tree - original" puts tree puts ">>> tree - after compact" tree.compact puts tree puts ">>> tree - after reduce" tree.reduce puts tree puts ">>> path - sorted" tree.path.sort.each do |path| puts path.join("/") end puts ">>> group : orgs" tree.dfs(tree.root) do |parent, children| if orgs = tree.organisms(parent) puts "#{parent.ljust(30)} (#{orgs.size})\t#{orgs.join(', ')}" end end puts ">>> group : subgroups" tree.dfs_with_level(tree.root) do |parent, children, level| subgroups = children.keys.sort indent = " " * level label = "#{indent} #{level} #{parent}" puts "#{label.ljust(35)}\t#{subgroups.join(', ')}" end #end bio-1.4.3.0001/sample/demo_nucleicacid.rb0000644000004100000410000000167212200110570017746 0ustar www-datawww-data# # = sample/demo_nucleicacid.rb - demonstration of Bio::NucleicAcid # # Copyright:: Copyright (C) 2001, 2005 # Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::NucleicAcid, data related to nucleic acids. # # == Usage # # Simply run this script. # # $ ruby demo_nucleicacid.rb # # == Development information # # The code was moved from lib/bio/data/na.rb. # require 'bio' #if __FILE__ == $0 puts "### na = Bio::NucleicAcid.new" na = Bio::NucleicAcid.new puts "# na.to_re('yrwskmbdhvnatgc')" p na.to_re('yrwskmbdhvnatgc') puts "# Bio::NucleicAcid.to_re('yrwskmbdhvnatgc')" p Bio::NucleicAcid.to_re('yrwskmbdhvnatgc') puts "# na.weight('A')" p na.weight('A') puts "# Bio::NucleicAcid.weight('A')" p Bio::NucleicAcid.weight('A') puts "# na.weight('atgc')" p na.weight('atgc') puts "# Bio::NucleicAcid.weight('atgc')" p Bio::NucleicAcid.weight('atgc') #end bio-1.4.3.0001/sample/test_restriction_enzyme_long.rb0000644000004100000410000062704712200110570022523 0ustar www-datawww-data# # = sample/test_restriction_enzyme_long.rb - Benchmark tests for Bio::RestrictionEnzyme::Analysis.cut for long sequences # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # # Acknowledgements: The idea of the test is based on the issue report # https://github.com/bioruby/bioruby/issues/10 # posted by ray1729 (https://github.com/ray1729). # require 'test/unit' require 'benchmark' require 'bio' entry = Bio::TogoWS::REST.entry('genbank', 'BA000007.2') EcoliO157H7Seq = Bio::GenBank.new(entry).naseq.freeze module TestRestrictionEnzymeAnalysisCutLong # dummy benchmarch class class DummyBench def report(str); yield; end end module HelperMethods def _truncate_cut_ranges(cut_ranges, len) limit = len - 1 ret = cut_ranges.collect do |a| if a[0] > limit || a[2] > limit then nil else a.collect { |pos| pos > limit ? limit : pos } end end ret.compact! if last_a = ret[-1] then last_a[1] = limit last_a[3] = limit end ret end def _collect_cut_ranges(cuts) cuts.collect do |f| [ f.p_left, f.p_right, f.c_left, f.c_right ] end end def _test_by_size(len, bench = DummyBench.new) cuts = nil bench.report("#{self.class::TestLabel} #{len}") { cuts = _cut(self.class::SampleSequence[0, len]) } cut_ranges = _collect_cut_ranges(cuts) expected = _truncate_cut_ranges(self.class::SampleCutRanges, len) assert_equal(expected, cut_ranges) end def test_10k_to_100k $stderr.print "\n" Benchmark.bm(26) do |bench| 10_000.step(100_000, 10_000) do |len| _test_by_size(len, bench) end end end def test_100k_to_1M $stderr.print "\n" Benchmark.bm(26) do |bench| 100_000.step(1_000_000, 100_000) do |len| _test_by_size(len, bench) end end end def test_1M_to_5M_and_whole $stderr.print "\n" Benchmark.bm(26) do |bench| 1_000_000.step(5_000_000, 1_000_000) do |len| _test_by_size(len, bench) end _test_by_size(self.class::SampleSequence.length, bench) end end if defined? Bio::RestrictionEnzyme::SortedNumArray def disabled_test_whole cuts = _cut(self.class::SampleSequence) cut_ranges = _collect_cut_ranges(cuts) cut_ranges.each do |a| $stderr.print " [ ", a.join(", "), " ], \n" end assert_equal(self.class::SampleCutRanges, cut_ranges) end end #module HelperMethods class TestEcoliO157H7_BstEII < Test::Unit::TestCase include HelperMethods TestLabel = 'BstEII' SampleSequence = EcoliO157H7Seq SampleCutRanges = BstEII_WHOLE = [ [ 0, 79, 0, 84 ], [ 80, 4612, 85, 4617 ], [ 4613, 13483, 4618, 13488 ], [ 13484, 15984, 13489, 15989 ], [ 15985, 21462, 15990, 21467 ], [ 21463, 27326, 21468, 27331 ], [ 27327, 30943, 27332, 30948 ], [ 30944, 34888, 30949, 34893 ], [ 34889, 35077, 34894, 35082 ], [ 35078, 35310, 35083, 35315 ], [ 35311, 36254, 35316, 36259 ], [ 36255, 41885, 36260, 41890 ], [ 41886, 43070, 41891, 43075 ], [ 43071, 45689, 43076, 45694 ], [ 45690, 52325, 45695, 52330 ], [ 52326, 55703, 52331, 55708 ], [ 55704, 58828, 55709, 58833 ], [ 58829, 59178, 58834, 59183 ], [ 59179, 72610, 59184, 72615 ], [ 72611, 72739, 72616, 72744 ], [ 72740, 73099, 72745, 73104 ], [ 73100, 75123, 73105, 75128 ], [ 75124, 77366, 75129, 77371 ], [ 77367, 77810, 77372, 77815 ], [ 77811, 78740, 77816, 78745 ], [ 78741, 79717, 78746, 79722 ], [ 79718, 82250, 79723, 82255 ], [ 82251, 84604, 82256, 84609 ], [ 84605, 95491, 84610, 95496 ], [ 95492, 95785, 95497, 95790 ], [ 95786, 95794, 95791, 95799 ], [ 95795, 96335, 95800, 96340 ], [ 96336, 102044, 96341, 102049 ], [ 102045, 102541, 102050, 102546 ], [ 102542, 103192, 102547, 103197 ], [ 103193, 104722, 103198, 104727 ], [ 104723, 110883, 104728, 110888 ], [ 110884, 120090, 110889, 120095 ], [ 120091, 120657, 120096, 120662 ], [ 120658, 128308, 120663, 128313 ], [ 128309, 138305, 128314, 138310 ], [ 138306, 141147, 138311, 141152 ], [ 141148, 143724, 141153, 143729 ], [ 143725, 143838, 143730, 143843 ], [ 143839, 144303, 143844, 144308 ], [ 144304, 148199, 144309, 148204 ], [ 148200, 149577, 148205, 149582 ], [ 149578, 149731, 149583, 149736 ], [ 149732, 156115, 149737, 156120 ], [ 156116, 161126, 156121, 161131 ], [ 161127, 162856, 161132, 162861 ], [ 162857, 170693, 162862, 170698 ], [ 170694, 170944, 170699, 170949 ], [ 170945, 171201, 170950, 171206 ], [ 171202, 173241, 171207, 173246 ], [ 173242, 177283, 173247, 177288 ], [ 177284, 178177, 177289, 178182 ], [ 178178, 178781, 178183, 178786 ], [ 178782, 181610, 178787, 181615 ], [ 181611, 181706, 181616, 181711 ], [ 181707, 185661, 181712, 185666 ], [ 185662, 193407, 185667, 193412 ], [ 193408, 195511, 193413, 195516 ], [ 195512, 195754, 195517, 195759 ], [ 195755, 197247, 195760, 197252 ], [ 197248, 200659, 197253, 200664 ], [ 200660, 201820, 200665, 201825 ], [ 201821, 202300, 201826, 202305 ], [ 202301, 202686, 202306, 202691 ], [ 202687, 206289, 202692, 206294 ], [ 206290, 206466, 206295, 206471 ], [ 206467, 207011, 206472, 207016 ], [ 207012, 208159, 207017, 208164 ], [ 208160, 209976, 208165, 209981 ], [ 209977, 210078, 209982, 210083 ], [ 210079, 211485, 210084, 211490 ], [ 211486, 212377, 211491, 212382 ], [ 212378, 213569, 212383, 213574 ], [ 213570, 216005, 213575, 216010 ], [ 216006, 220098, 216011, 220103 ], [ 220099, 224063, 220104, 224068 ], [ 224064, 228604, 224069, 228609 ], [ 228605, 239993, 228610, 239998 ], [ 239994, 247914, 239999, 247919 ], [ 247915, 251579, 247920, 251584 ], [ 251580, 257092, 251585, 257097 ], [ 257093, 261621, 257098, 261626 ], [ 261622, 263030, 261627, 263035 ], [ 263031, 265084, 263036, 265089 ], [ 265085, 265243, 265090, 265248 ], [ 265244, 265534, 265249, 265539 ], [ 265535, 266117, 265540, 266122 ], [ 266118, 274428, 266123, 274433 ], [ 274429, 282285, 274434, 282290 ], [ 282286, 286948, 282291, 286953 ], [ 286949, 292547, 286954, 292552 ], [ 292548, 297678, 292553, 297683 ], [ 297679, 308161, 297684, 308166 ], [ 308162, 308706, 308167, 308711 ], [ 308707, 313482, 308712, 313487 ], [ 313483, 337118, 313488, 337123 ], [ 337119, 337935, 337124, 337940 ], [ 337936, 338781, 337941, 338786 ], [ 338782, 339493, 338787, 339498 ], [ 339494, 341025, 339499, 341030 ], [ 341026, 344424, 341031, 344429 ], [ 344425, 348384, 344430, 348389 ], [ 348385, 354781, 348390, 354786 ], [ 354782, 356692, 354787, 356697 ], [ 356693, 357008, 356698, 357013 ], [ 357009, 357305, 357014, 357310 ], [ 357306, 357328, 357311, 357333 ], [ 357329, 358126, 357334, 358131 ], [ 358127, 359472, 358132, 359477 ], [ 359473, 362160, 359478, 362165 ], [ 362161, 365395, 362166, 365400 ], [ 365396, 365704, 365401, 365709 ], [ 365705, 381746, 365710, 381751 ], [ 381747, 381994, 381752, 381999 ], [ 381995, 383335, 382000, 383340 ], [ 383336, 385141, 383341, 385146 ], [ 385142, 390171, 385147, 390176 ], [ 390172, 392764, 390177, 392769 ], [ 392765, 394338, 392770, 394343 ], [ 394339, 394686, 394344, 394691 ], [ 394687, 398703, 394692, 398708 ], [ 398704, 404095, 398709, 404100 ], [ 404096, 408361, 404101, 408366 ], [ 408362, 413032, 408367, 413037 ], [ 413033, 414563, 413038, 414568 ], [ 414564, 416901, 414569, 416906 ], [ 416902, 417419, 416907, 417424 ], [ 417420, 421777, 417425, 421782 ], [ 421778, 423748, 421783, 423753 ], [ 423749, 431903, 423754, 431908 ], [ 431904, 440000, 431909, 440005 ], [ 440001, 448040, 440006, 448045 ], [ 448041, 452994, 448046, 452999 ], [ 452995, 453075, 453000, 453080 ], [ 453076, 454950, 453081, 454955 ], [ 454951, 455888, 454956, 455893 ], [ 455889, 460160, 455894, 460165 ], [ 460161, 463076, 460166, 463081 ], [ 463077, 465003, 463082, 465008 ], [ 465004, 466828, 465009, 466833 ], [ 466829, 467686, 466834, 467691 ], [ 467687, 468596, 467692, 468601 ], [ 468597, 479953, 468602, 479958 ], [ 479954, 480538, 479959, 480543 ], [ 480539, 482869, 480544, 482874 ], [ 482870, 489378, 482875, 489383 ], [ 489379, 492241, 489384, 492246 ], [ 492242, 495406, 492247, 495411 ], [ 495407, 495712, 495412, 495717 ], [ 495713, 497829, 495718, 497834 ], [ 497830, 501698, 497835, 501703 ], [ 501699, 504565, 501704, 504570 ], [ 504566, 505105, 504571, 505110 ], [ 505106, 508452, 505111, 508457 ], [ 508453, 515947, 508458, 515952 ], [ 515948, 519141, 515953, 519146 ], [ 519142, 519398, 519147, 519403 ], [ 519399, 521386, 519404, 521391 ], [ 521387, 526115, 521392, 526120 ], [ 526116, 526729, 526121, 526734 ], [ 526730, 527018, 526735, 527023 ], [ 527019, 528059, 527024, 528064 ], [ 528060, 532689, 528065, 532694 ], [ 532690, 534702, 532695, 534707 ], [ 534703, 535272, 534708, 535277 ], [ 535273, 538668, 535278, 538673 ], [ 538669, 543939, 538674, 543944 ], [ 543940, 547429, 543945, 547434 ], [ 547430, 553890, 547435, 553895 ], [ 553891, 554678, 553896, 554683 ], [ 554679, 555452, 554684, 555457 ], [ 555453, 556296, 555458, 556301 ], [ 556297, 559341, 556302, 559346 ], [ 559342, 559991, 559347, 559996 ], [ 559992, 563242, 559997, 563247 ], [ 563243, 576432, 563248, 576437 ], [ 576433, 582431, 576438, 582436 ], [ 582432, 582959, 582437, 582964 ], [ 582960, 583475, 582965, 583480 ], [ 583476, 583589, 583481, 583594 ], [ 583590, 583670, 583595, 583675 ], [ 583671, 583901, 583676, 583906 ], [ 583902, 584198, 583907, 584203 ], [ 584199, 584633, 584204, 584638 ], [ 584634, 585704, 584639, 585709 ], [ 585705, 585746, 585710, 585751 ], [ 585747, 586175, 585752, 586180 ], [ 586176, 586301, 586181, 586306 ], [ 586302, 586643, 586307, 586648 ], [ 586644, 586775, 586649, 586780 ], [ 586776, 587072, 586781, 587077 ], [ 587073, 587214, 587078, 587219 ], [ 587215, 587540, 587220, 587545 ], [ 587541, 587969, 587546, 587974 ], [ 587970, 588095, 587975, 588100 ], [ 588096, 588437, 588101, 588442 ], [ 588438, 588569, 588443, 588574 ], [ 588570, 589008, 588575, 589013 ], [ 589009, 589166, 589014, 589171 ], [ 589167, 590366, 589172, 590371 ], [ 590367, 590792, 590372, 590797 ], [ 590793, 591077, 590798, 591082 ], [ 591078, 591263, 591083, 591268 ], [ 591264, 591863, 591269, 591868 ], [ 591864, 592058, 591869, 592063 ], [ 592059, 592160, 592064, 592165 ], [ 592161, 592568, 592166, 592573 ], [ 592569, 592760, 592574, 592765 ], [ 592761, 593060, 592766, 593065 ], [ 593061, 593186, 593066, 593191 ], [ 593187, 593366, 593192, 593371 ], [ 593367, 593957, 593372, 593962 ], [ 593958, 594827, 593963, 594832 ], [ 594828, 594980, 594833, 594985 ], [ 594981, 595649, 594986, 595654 ], [ 595650, 595893, 595655, 595898 ], [ 595894, 596057, 595899, 596062 ], [ 596058, 596159, 596063, 596164 ], [ 596160, 596351, 596165, 596356 ], [ 596352, 596660, 596357, 596665 ], [ 596661, 596960, 596666, 596965 ], [ 596961, 597102, 596966, 597107 ], [ 597103, 597155, 597108, 597160 ], [ 597156, 597257, 597161, 597262 ], [ 597258, 599957, 597263, 599962 ], [ 599958, 611038, 599963, 611043 ], [ 611039, 612202, 611044, 612207 ], [ 612203, 614051, 612208, 614056 ], [ 614052, 614134, 614057, 614139 ], [ 614135, 614787, 614140, 614792 ], [ 614788, 616272, 614793, 616277 ], [ 616273, 617737, 616278, 617742 ], [ 617738, 627339, 617743, 627344 ], [ 627340, 628902, 627345, 628907 ], [ 628903, 636523, 628908, 636528 ], [ 636524, 637529, 636529, 637534 ], [ 637530, 647713, 637535, 647718 ], [ 647714, 648684, 647719, 648689 ], [ 648685, 653543, 648690, 653548 ], [ 653544, 659030, 653549, 659035 ], [ 659031, 662241, 659036, 662246 ], [ 662242, 671781, 662247, 671786 ], [ 671782, 672048, 671787, 672053 ], [ 672049, 673788, 672054, 673793 ], [ 673789, 674707, 673794, 674712 ], [ 674708, 674998, 674713, 675003 ], [ 674999, 675157, 675004, 675162 ], [ 675158, 688595, 675163, 688600 ], [ 688596, 693309, 688601, 693314 ], [ 693310, 697406, 693315, 697411 ], [ 697407, 702676, 697412, 702681 ], [ 702677, 707382, 702682, 707387 ], [ 707383, 708604, 707388, 708609 ], [ 708605, 710046, 708610, 710051 ], [ 710047, 711630, 710052, 711635 ], [ 711631, 711696, 711636, 711701 ], [ 711697, 712329, 711702, 712334 ], [ 712330, 716461, 712335, 716466 ], [ 716462, 720238, 716467, 720243 ], [ 720239, 720374, 720244, 720379 ], [ 720375, 724200, 720380, 724205 ], [ 724201, 725687, 724206, 725692 ], [ 725688, 730067, 725693, 730072 ], [ 730068, 730574, 730073, 730579 ], [ 730575, 730699, 730580, 730704 ], [ 730700, 732726, 730705, 732731 ], [ 732727, 738597, 732732, 738602 ], [ 738598, 743326, 738603, 743331 ], [ 743327, 744992, 743332, 744997 ], [ 744993, 745843, 744998, 745848 ], [ 745844, 751518, 745849, 751523 ], [ 751519, 752431, 751524, 752436 ], [ 752432, 752549, 752437, 752554 ], [ 752550, 766036, 752555, 766041 ], [ 766037, 768968, 766042, 768973 ], [ 768969, 770151, 768974, 770156 ], [ 770152, 771158, 770157, 771163 ], [ 771159, 771405, 771164, 771410 ], [ 771406, 781958, 771411, 781963 ], [ 781959, 784226, 781964, 784231 ], [ 784227, 786945, 784232, 786950 ], [ 786946, 787203, 786951, 787208 ], [ 787204, 789251, 787209, 789256 ], [ 789252, 791218, 789257, 791223 ], [ 791219, 793716, 791224, 793721 ], [ 793717, 795003, 793722, 795008 ], [ 795004, 795521, 795009, 795526 ], [ 795522, 804514, 795527, 804519 ], [ 804515, 805238, 804520, 805243 ], [ 805239, 805887, 805244, 805892 ], [ 805888, 808461, 805893, 808466 ], [ 808462, 809805, 808467, 809810 ], [ 809806, 810086, 809811, 810091 ], [ 810087, 810726, 810092, 810731 ], [ 810727, 820111, 810732, 820116 ], [ 820112, 821326, 820117, 821331 ], [ 821327, 821647, 821332, 821652 ], [ 821648, 824277, 821653, 824282 ], [ 824278, 825750, 824283, 825755 ], [ 825751, 828770, 825756, 828775 ], [ 828771, 828924, 828776, 828929 ], [ 828925, 830194, 828930, 830199 ], [ 830195, 830786, 830200, 830791 ], [ 830787, 832788, 830792, 832793 ], [ 832789, 833306, 832794, 833311 ], [ 833307, 835656, 833312, 835661 ], [ 835657, 841180, 835662, 841185 ], [ 841181, 842112, 841186, 842117 ], [ 842113, 843973, 842118, 843978 ], [ 843974, 843990, 843979, 843995 ], [ 843991, 852882, 843996, 852887 ], [ 852883, 854392, 852888, 854397 ], [ 854393, 857721, 854398, 857726 ], [ 857722, 857961, 857727, 857966 ], [ 857962, 862783, 857967, 862788 ], [ 862784, 878953, 862789, 878958 ], [ 878954, 885194, 878959, 885199 ], [ 885195, 886313, 885200, 886318 ], [ 886314, 886460, 886319, 886465 ], [ 886461, 890233, 886466, 890238 ], [ 890234, 890346, 890239, 890351 ], [ 890347, 890379, 890352, 890384 ], [ 890380, 899676, 890385, 899681 ], [ 899677, 903962, 899682, 903967 ], [ 903963, 904236, 903968, 904241 ], [ 904237, 908130, 904242, 908135 ], [ 908131, 916611, 908136, 916616 ], [ 916612, 916803, 916617, 916808 ], [ 916804, 920531, 916809, 920536 ], [ 920532, 928505, 920537, 928510 ], [ 928506, 936947, 928511, 936952 ], [ 936948, 937240, 936953, 937245 ], [ 937241, 939698, 937246, 939703 ], [ 939699, 939711, 939704, 939716 ], [ 939712, 941642, 939717, 941647 ], [ 941643, 949052, 941648, 949057 ], [ 949053, 949800, 949058, 949805 ], [ 949801, 951412, 949806, 951417 ], [ 951413, 951810, 951418, 951815 ], [ 951811, 952386, 951816, 952391 ], [ 952387, 953295, 952392, 953300 ], [ 953296, 953894, 953301, 953899 ], [ 953895, 958753, 953900, 958758 ], [ 958754, 964476, 958759, 964481 ], [ 964477, 967468, 964482, 967473 ], [ 967469, 969631, 967474, 969636 ], [ 969632, 970966, 969637, 970971 ], [ 970967, 971138, 970972, 971143 ], [ 971139, 974185, 971144, 974190 ], [ 974186, 974365, 974191, 974370 ], [ 974366, 975256, 974371, 975261 ], [ 975257, 976794, 975262, 976799 ], [ 976795, 987406, 976800, 987411 ], [ 987407, 988132, 987412, 988137 ], [ 988133, 992809, 988138, 992814 ], [ 992810, 1000225, 992815, 1000230 ], [ 1000226, 1001626, 1000231, 1001631 ], [ 1001627, 1007354, 1001632, 1007359 ], [ 1007355, 1011910, 1007360, 1011915 ], [ 1011911, 1012377, 1011916, 1012382 ], [ 1012378, 1017328, 1012383, 1017333 ], [ 1017329, 1020891, 1017334, 1020896 ], [ 1020892, 1021340, 1020897, 1021345 ], [ 1021341, 1024845, 1021346, 1024850 ], [ 1024846, 1025853, 1024851, 1025858 ], [ 1025854, 1030691, 1025859, 1030696 ], [ 1030692, 1032676, 1030697, 1032681 ], [ 1032677, 1037847, 1032682, 1037852 ], [ 1037848, 1039473, 1037853, 1039478 ], [ 1039474, 1044241, 1039479, 1044246 ], [ 1044242, 1045920, 1044247, 1045925 ], [ 1045921, 1053286, 1045926, 1053291 ], [ 1053287, 1053309, 1053292, 1053314 ], [ 1053310, 1054643, 1053315, 1054648 ], [ 1054644, 1056527, 1054649, 1056532 ], [ 1056528, 1058682, 1056533, 1058687 ], [ 1058683, 1059297, 1058688, 1059302 ], [ 1059298, 1060416, 1059303, 1060421 ], [ 1060417, 1064234, 1060422, 1064239 ], [ 1064235, 1064848, 1064240, 1064853 ], [ 1064849, 1065434, 1064854, 1065439 ], [ 1065435, 1075642, 1065440, 1075647 ], [ 1075643, 1076325, 1075648, 1076330 ], [ 1076326, 1076534, 1076331, 1076539 ], [ 1076535, 1078866, 1076540, 1078871 ], [ 1078867, 1080537, 1078872, 1080542 ], [ 1080538, 1082144, 1080543, 1082149 ], [ 1082145, 1085746, 1082150, 1085751 ], [ 1085747, 1087087, 1085752, 1087092 ], [ 1087088, 1088273, 1087093, 1088278 ], [ 1088274, 1093062, 1088279, 1093067 ], [ 1093063, 1096867, 1093068, 1096872 ], [ 1096868, 1102488, 1096873, 1102493 ], [ 1102489, 1106371, 1102494, 1106376 ], [ 1106372, 1108123, 1106377, 1108128 ], [ 1108124, 1113311, 1108129, 1113316 ], [ 1113312, 1114557, 1113317, 1114562 ], [ 1114558, 1120566, 1114563, 1120571 ], [ 1120567, 1121004, 1120572, 1121009 ], [ 1121005, 1122501, 1121010, 1122506 ], [ 1122502, 1130582, 1122507, 1130587 ], [ 1130583, 1132170, 1130588, 1132175 ], [ 1132171, 1140126, 1132176, 1140131 ], [ 1140127, 1143361, 1140132, 1143366 ], [ 1143362, 1149205, 1143367, 1149210 ], [ 1149206, 1149331, 1149211, 1149336 ], [ 1149332, 1156272, 1149337, 1156277 ], [ 1156273, 1161624, 1156278, 1161629 ], [ 1161625, 1171353, 1161630, 1171358 ], [ 1171354, 1171934, 1171359, 1171939 ], [ 1171935, 1172114, 1171940, 1172119 ], [ 1172115, 1185368, 1172120, 1185373 ], [ 1185369, 1193993, 1185374, 1193998 ], [ 1193994, 1194272, 1193999, 1194277 ], [ 1194273, 1197920, 1194278, 1197925 ], [ 1197921, 1200373, 1197926, 1200378 ], [ 1200374, 1200597, 1200379, 1200602 ], [ 1200598, 1200714, 1200603, 1200719 ], [ 1200715, 1203674, 1200720, 1203679 ], [ 1203675, 1204865, 1203680, 1204870 ], [ 1204866, 1205330, 1204871, 1205335 ], [ 1205331, 1210727, 1205336, 1210732 ], [ 1210728, 1211881, 1210733, 1211886 ], [ 1211882, 1214283, 1211887, 1214288 ], [ 1214284, 1216981, 1214289, 1216986 ], [ 1216982, 1223522, 1216987, 1223527 ], [ 1223523, 1228205, 1223528, 1228210 ], [ 1228206, 1236067, 1228211, 1236072 ], [ 1236068, 1236265, 1236073, 1236270 ], [ 1236266, 1239969, 1236271, 1239974 ], [ 1239970, 1240641, 1239975, 1240646 ], [ 1240642, 1244738, 1240647, 1244743 ], [ 1244739, 1244821, 1244744, 1244826 ], [ 1244822, 1272971, 1244827, 1272976 ], [ 1272972, 1276524, 1272977, 1276529 ], [ 1276525, 1290344, 1276530, 1290349 ], [ 1290345, 1292253, 1290350, 1292258 ], [ 1292254, 1293482, 1292259, 1293487 ], [ 1293483, 1295919, 1293488, 1295924 ], [ 1295920, 1302834, 1295925, 1302839 ], [ 1302835, 1303464, 1302840, 1303469 ], [ 1303465, 1309308, 1303470, 1309313 ], [ 1309309, 1311482, 1309314, 1311487 ], [ 1311483, 1312493, 1311488, 1312498 ], [ 1312494, 1316488, 1312499, 1316493 ], [ 1316489, 1318127, 1316494, 1318132 ], [ 1318128, 1325643, 1318133, 1325648 ], [ 1325644, 1328313, 1325649, 1328318 ], [ 1328314, 1345348, 1328319, 1345353 ], [ 1345349, 1347480, 1345354, 1347485 ], [ 1347481, 1348458, 1347486, 1348463 ], [ 1348459, 1350595, 1348464, 1350600 ], [ 1350596, 1350770, 1350601, 1350775 ], [ 1350771, 1351954, 1350776, 1351959 ], [ 1351955, 1356474, 1351960, 1356479 ], [ 1356475, 1362756, 1356480, 1362761 ], [ 1362757, 1368544, 1362762, 1368549 ], [ 1368545, 1377993, 1368550, 1377998 ], [ 1377994, 1379610, 1377999, 1379615 ], [ 1379611, 1391551, 1379616, 1391556 ], [ 1391552, 1395841, 1391557, 1395846 ], [ 1395842, 1401721, 1395847, 1401726 ], [ 1401722, 1406871, 1401727, 1406876 ], [ 1406872, 1411041, 1406877, 1411046 ], [ 1411042, 1417851, 1411047, 1417856 ], [ 1417852, 1419058, 1417857, 1419063 ], [ 1419059, 1428120, 1419064, 1428125 ], [ 1428121, 1428584, 1428126, 1428589 ], [ 1428585, 1430700, 1428590, 1430705 ], [ 1430701, 1438278, 1430706, 1438283 ], [ 1438279, 1443084, 1438284, 1443089 ], [ 1443085, 1444668, 1443090, 1444673 ], [ 1444669, 1444866, 1444674, 1444871 ], [ 1444867, 1444914, 1444872, 1444919 ], [ 1444915, 1445093, 1444920, 1445098 ], [ 1445094, 1446216, 1445099, 1446221 ], [ 1446217, 1448518, 1446222, 1448523 ], [ 1448519, 1452860, 1448524, 1452865 ], [ 1452861, 1454246, 1452866, 1454251 ], [ 1454247, 1455414, 1454252, 1455419 ], [ 1455415, 1460976, 1455420, 1460981 ], [ 1460977, 1461164, 1460982, 1461169 ], [ 1461165, 1463675, 1461170, 1463680 ], [ 1463676, 1465339, 1463681, 1465344 ], [ 1465340, 1469872, 1465345, 1469877 ], [ 1469873, 1471479, 1469878, 1471484 ], [ 1471480, 1472745, 1471485, 1472750 ], [ 1472746, 1479208, 1472751, 1479213 ], [ 1479209, 1480831, 1479214, 1480836 ], [ 1480832, 1485359, 1480837, 1485364 ], [ 1485360, 1485530, 1485365, 1485535 ], [ 1485531, 1486004, 1485536, 1486009 ], [ 1486005, 1487314, 1486010, 1487319 ], [ 1487315, 1491008, 1487320, 1491013 ], [ 1491009, 1492068, 1491014, 1492073 ], [ 1492069, 1493001, 1492074, 1493006 ], [ 1493002, 1495524, 1493007, 1495529 ], [ 1495525, 1498599, 1495530, 1498604 ], [ 1498600, 1499384, 1498605, 1499389 ], [ 1499385, 1500494, 1499390, 1500499 ], [ 1500495, 1504828, 1500500, 1504833 ], [ 1504829, 1509790, 1504834, 1509795 ], [ 1509791, 1512050, 1509796, 1512055 ], [ 1512051, 1514922, 1512056, 1514927 ], [ 1514923, 1515140, 1514928, 1515145 ], [ 1515141, 1515194, 1515146, 1515199 ], [ 1515195, 1515647, 1515200, 1515652 ], [ 1515648, 1516602, 1515653, 1516607 ], [ 1516603, 1517689, 1516608, 1517694 ], [ 1517690, 1519324, 1517695, 1519329 ], [ 1519325, 1524288, 1519330, 1524293 ], [ 1524289, 1524809, 1524294, 1524814 ], [ 1524810, 1525934, 1524815, 1525939 ], [ 1525935, 1526325, 1525940, 1526330 ], [ 1526326, 1527046, 1526331, 1527051 ], [ 1527047, 1528800, 1527052, 1528805 ], [ 1528801, 1529067, 1528806, 1529072 ], [ 1529068, 1529127, 1529073, 1529132 ], [ 1529128, 1536262, 1529133, 1536267 ], [ 1536263, 1543858, 1536268, 1543863 ], [ 1543859, 1554015, 1543864, 1554020 ], [ 1554016, 1555315, 1554021, 1555320 ], [ 1555316, 1558476, 1555321, 1558481 ], [ 1558477, 1560403, 1558482, 1560408 ], [ 1560404, 1564152, 1560409, 1564157 ], [ 1564153, 1565868, 1564158, 1565873 ], [ 1565869, 1566075, 1565874, 1566080 ], [ 1566076, 1572715, 1566081, 1572720 ], [ 1572716, 1575566, 1572721, 1575571 ], [ 1575567, 1575840, 1575572, 1575845 ], [ 1575841, 1575957, 1575846, 1575962 ], [ 1575958, 1578588, 1575963, 1578593 ], [ 1578589, 1587557, 1578594, 1587562 ], [ 1587558, 1588891, 1587563, 1588896 ], [ 1588892, 1597227, 1588897, 1597232 ], [ 1597228, 1597262, 1597233, 1597267 ], [ 1597263, 1606974, 1597268, 1606979 ], [ 1606975, 1613512, 1606980, 1613517 ], [ 1613513, 1613900, 1613518, 1613905 ], [ 1613901, 1614931, 1613906, 1614936 ], [ 1614932, 1620971, 1614937, 1620976 ], [ 1620972, 1625931, 1620977, 1625936 ], [ 1625932, 1635578, 1625937, 1635583 ], [ 1635579, 1636949, 1635584, 1636954 ], [ 1636950, 1642076, 1636955, 1642081 ], [ 1642077, 1643227, 1642082, 1643232 ], [ 1643228, 1643451, 1643233, 1643456 ], [ 1643452, 1643568, 1643457, 1643573 ], [ 1643569, 1651406, 1643574, 1651411 ], [ 1651407, 1651474, 1651412, 1651479 ], [ 1651475, 1660688, 1651480, 1660693 ], [ 1660689, 1665846, 1660694, 1665851 ], [ 1665847, 1667026, 1665852, 1667031 ], [ 1667027, 1675465, 1667032, 1675470 ], [ 1675466, 1679164, 1675471, 1679169 ], [ 1679165, 1681962, 1679170, 1681967 ], [ 1681963, 1688016, 1681968, 1688021 ], [ 1688017, 1690659, 1688022, 1690664 ], [ 1690660, 1692872, 1690665, 1692877 ], [ 1692873, 1697102, 1692878, 1697107 ], [ 1697103, 1698132, 1697108, 1698137 ], [ 1698133, 1703429, 1698138, 1703434 ], [ 1703430, 1706057, 1703435, 1706062 ], [ 1706058, 1708683, 1706063, 1708688 ], [ 1708684, 1720884, 1708689, 1720889 ], [ 1720885, 1721218, 1720890, 1721223 ], [ 1721219, 1725289, 1721224, 1725294 ], [ 1725290, 1726495, 1725295, 1726500 ], [ 1726496, 1728646, 1726501, 1728651 ], [ 1728647, 1729060, 1728652, 1729065 ], [ 1729061, 1732801, 1729066, 1732806 ], [ 1732802, 1733308, 1732807, 1733313 ], [ 1733309, 1734471, 1733314, 1734476 ], [ 1734472, 1740942, 1734477, 1740947 ], [ 1740943, 1744762, 1740948, 1744767 ], [ 1744763, 1746379, 1744768, 1746384 ], [ 1746380, 1747144, 1746385, 1747149 ], [ 1747145, 1753062, 1747150, 1753067 ], [ 1753063, 1754367, 1753068, 1754372 ], [ 1754368, 1763444, 1754373, 1763449 ], [ 1763445, 1777420, 1763450, 1777425 ], [ 1777421, 1782626, 1777426, 1782631 ], [ 1782627, 1784342, 1782632, 1784347 ], [ 1784343, 1784549, 1784348, 1784554 ], [ 1784550, 1791189, 1784555, 1791194 ], [ 1791190, 1793878, 1791195, 1793883 ], [ 1793879, 1794152, 1793884, 1794157 ], [ 1794153, 1794269, 1794158, 1794274 ], [ 1794270, 1794972, 1794275, 1794977 ], [ 1794973, 1796163, 1794978, 1796168 ], [ 1796164, 1802296, 1796169, 1802301 ], [ 1802297, 1805729, 1802302, 1805734 ], [ 1805730, 1806305, 1805735, 1806310 ], [ 1806306, 1810512, 1806311, 1810517 ], [ 1810513, 1816402, 1810518, 1816407 ], [ 1816403, 1826227, 1816408, 1826232 ], [ 1826228, 1826701, 1826233, 1826706 ], [ 1826702, 1827720, 1826707, 1827725 ], [ 1827721, 1836707, 1827726, 1836712 ], [ 1836708, 1836926, 1836713, 1836931 ], [ 1836927, 1838667, 1836932, 1838672 ], [ 1838668, 1843220, 1838673, 1843225 ], [ 1843221, 1843829, 1843226, 1843834 ], [ 1843830, 1846577, 1843835, 1846582 ], [ 1846578, 1849125, 1846583, 1849130 ], [ 1849126, 1850237, 1849131, 1850242 ], [ 1850238, 1851708, 1850243, 1851713 ], [ 1851709, 1853436, 1851714, 1853441 ], [ 1853437, 1853475, 1853442, 1853480 ], [ 1853476, 1853493, 1853481, 1853498 ], [ 1853494, 1854900, 1853499, 1854905 ], [ 1854901, 1861797, 1854906, 1861802 ], [ 1861798, 1862267, 1861803, 1862272 ], [ 1862268, 1866445, 1862273, 1866450 ], [ 1866446, 1866700, 1866451, 1866705 ], [ 1866701, 1870143, 1866706, 1870148 ], [ 1870144, 1870675, 1870149, 1870680 ], [ 1870676, 1881704, 1870681, 1881709 ], [ 1881705, 1882659, 1881710, 1882664 ], [ 1882660, 1884008, 1882665, 1884013 ], [ 1884009, 1885076, 1884014, 1885081 ], [ 1885077, 1897857, 1885082, 1897862 ], [ 1897858, 1931549, 1897863, 1931554 ], [ 1931550, 1931660, 1931555, 1931665 ], [ 1931661, 1936680, 1931666, 1936685 ], [ 1936681, 1938835, 1936686, 1938840 ], [ 1938836, 1939367, 1938841, 1939372 ], [ 1939368, 1944718, 1939373, 1944723 ], [ 1944719, 1949924, 1944724, 1949929 ], [ 1949925, 1951640, 1949930, 1951645 ], [ 1951641, 1951847, 1951646, 1951852 ], [ 1951848, 1958495, 1951853, 1958500 ], [ 1958496, 1961184, 1958501, 1961189 ], [ 1961185, 1961458, 1961190, 1961463 ], [ 1961459, 1963223, 1961464, 1963228 ], [ 1963224, 1964535, 1963229, 1964540 ], [ 1964536, 1964578, 1964541, 1964583 ], [ 1964579, 1965726, 1964584, 1965731 ], [ 1965727, 1975723, 1965732, 1975728 ], [ 1975724, 1983495, 1975729, 1983500 ], [ 1983496, 1989041, 1983501, 1989046 ], [ 1989042, 1991939, 1989047, 1991944 ], [ 1991940, 1994134, 1991945, 1994139 ], [ 1994135, 2006390, 1994140, 2006395 ], [ 2006391, 2006681, 2006396, 2006686 ], [ 2006682, 2012753, 2006687, 2012758 ], [ 2012754, 2020299, 2012759, 2020304 ], [ 2020300, 2021594, 2020305, 2021599 ], [ 2021595, 2035653, 2021600, 2035658 ], [ 2035654, 2043961, 2035659, 2043966 ], [ 2043962, 2044411, 2043967, 2044416 ], [ 2044412, 2045320, 2044417, 2045325 ], [ 2045321, 2046593, 2045326, 2046598 ], [ 2046594, 2058014, 2046599, 2058019 ], [ 2058015, 2058262, 2058020, 2058267 ], [ 2058263, 2061616, 2058268, 2061621 ], [ 2061617, 2067334, 2061622, 2067339 ], [ 2067335, 2069059, 2067340, 2069064 ], [ 2069060, 2073142, 2069065, 2073147 ], [ 2073143, 2074555, 2073148, 2074560 ], [ 2074556, 2074634, 2074561, 2074639 ], [ 2074635, 2076422, 2074640, 2076427 ], [ 2076423, 2081937, 2076428, 2081942 ], [ 2081938, 2082042, 2081943, 2082047 ], [ 2082043, 2082408, 2082048, 2082413 ], [ 2082409, 2094661, 2082414, 2094666 ], [ 2094662, 2105556, 2094667, 2105561 ], [ 2105557, 2106153, 2105562, 2106158 ], [ 2106154, 2113282, 2106159, 2113287 ], [ 2113283, 2114197, 2113288, 2114202 ], [ 2114198, 2124245, 2114203, 2124250 ], [ 2124246, 2126629, 2124251, 2126634 ], [ 2126630, 2127367, 2126635, 2127372 ], [ 2127368, 2131854, 2127373, 2131859 ], [ 2131855, 2138481, 2131860, 2138486 ], [ 2138482, 2140084, 2138487, 2140089 ], [ 2140085, 2151397, 2140090, 2151402 ], [ 2151398, 2154116, 2151403, 2154121 ], [ 2154117, 2164531, 2154122, 2164536 ], [ 2164532, 2164999, 2164537, 2165004 ], [ 2165000, 2166190, 2165005, 2166195 ], [ 2166191, 2168535, 2166196, 2168540 ], [ 2168536, 2168652, 2168541, 2168657 ], [ 2168653, 2168876, 2168658, 2168881 ], [ 2168877, 2175197, 2168882, 2175202 ], [ 2175198, 2176568, 2175203, 2176573 ], [ 2176569, 2185419, 2176574, 2185424 ], [ 2185420, 2198074, 2185425, 2198079 ], [ 2198075, 2205716, 2198080, 2205721 ], [ 2205717, 2206482, 2205722, 2206487 ], [ 2206483, 2214819, 2206488, 2214824 ], [ 2214820, 2215255, 2214825, 2215260 ], [ 2215256, 2216910, 2215261, 2216915 ], [ 2216911, 2219477, 2216916, 2219482 ], [ 2219478, 2219751, 2219483, 2219756 ], [ 2219752, 2222602, 2219757, 2222607 ], [ 2222603, 2224016, 2222608, 2224021 ], [ 2224017, 2229253, 2224022, 2229258 ], [ 2229254, 2229460, 2229259, 2229465 ], [ 2229461, 2231176, 2229466, 2231181 ], [ 2231177, 2236382, 2231182, 2236387 ], [ 2236383, 2245581, 2236388, 2245586 ], [ 2245582, 2245719, 2245587, 2245724 ], [ 2245720, 2245761, 2245725, 2245766 ], [ 2245762, 2249902, 2245767, 2249907 ], [ 2249903, 2254722, 2249908, 2254727 ], [ 2254723, 2262668, 2254728, 2262673 ], [ 2262669, 2276333, 2262674, 2276338 ], [ 2276334, 2278349, 2276339, 2278354 ], [ 2278350, 2278595, 2278355, 2278600 ], [ 2278596, 2282039, 2278601, 2282044 ], [ 2282040, 2309292, 2282045, 2309297 ], [ 2309293, 2309737, 2309298, 2309742 ], [ 2309738, 2314845, 2309743, 2314850 ], [ 2314846, 2315016, 2314851, 2315021 ], [ 2315017, 2320047, 2315022, 2320052 ], [ 2320048, 2320645, 2320053, 2320650 ], [ 2320646, 2330437, 2320651, 2330442 ], [ 2330438, 2338082, 2330443, 2338087 ], [ 2338083, 2345465, 2338088, 2345470 ], [ 2345466, 2347233, 2345471, 2347238 ], [ 2347234, 2348720, 2347239, 2348725 ], [ 2348721, 2351324, 2348726, 2351329 ], [ 2351325, 2352448, 2351330, 2352453 ], [ 2352449, 2353999, 2352454, 2354004 ], [ 2354000, 2359046, 2354005, 2359051 ], [ 2359047, 2361149, 2359052, 2361154 ], [ 2361150, 2374039, 2361155, 2374044 ], [ 2374040, 2385349, 2374045, 2385354 ], [ 2385350, 2388585, 2385355, 2388590 ], [ 2388586, 2391734, 2388591, 2391739 ], [ 2391735, 2392141, 2391740, 2392146 ], [ 2392142, 2393939, 2392147, 2393944 ], [ 2393940, 2395026, 2393945, 2395031 ], [ 2395027, 2395860, 2395032, 2395865 ], [ 2395861, 2398211, 2395866, 2398216 ], [ 2398212, 2398326, 2398217, 2398331 ], [ 2398327, 2402303, 2398332, 2402308 ], [ 2402304, 2408154, 2402309, 2408159 ], [ 2408155, 2409936, 2408160, 2409941 ], [ 2409937, 2410353, 2409942, 2410358 ], [ 2410354, 2411021, 2410359, 2411026 ], [ 2411022, 2419571, 2411027, 2419576 ], [ 2419572, 2424488, 2419577, 2424493 ], [ 2424489, 2427895, 2424494, 2427900 ], [ 2427896, 2433794, 2427901, 2433799 ], [ 2433795, 2434280, 2433800, 2434285 ], [ 2434281, 2436129, 2434286, 2436134 ], [ 2436130, 2446339, 2436135, 2446344 ], [ 2446340, 2446355, 2446345, 2446360 ], [ 2446356, 2447550, 2446361, 2447555 ], [ 2447551, 2456375, 2447556, 2456380 ], [ 2456376, 2459685, 2456381, 2459690 ], [ 2459686, 2467707, 2459691, 2467712 ], [ 2467708, 2489626, 2467713, 2489631 ], [ 2489627, 2490030, 2489632, 2490035 ], [ 2490031, 2494181, 2490036, 2494186 ], [ 2494182, 2494578, 2494187, 2494583 ], [ 2494579, 2498330, 2494584, 2498335 ], [ 2498331, 2501619, 2498336, 2501624 ], [ 2501620, 2502774, 2501625, 2502779 ], [ 2502775, 2505440, 2502780, 2505445 ], [ 2505441, 2507840, 2505446, 2507845 ], [ 2507841, 2513953, 2507846, 2513958 ], [ 2513954, 2518482, 2513959, 2518487 ], [ 2518483, 2518510, 2518488, 2518515 ], [ 2518511, 2519154, 2518516, 2519159 ], [ 2519155, 2521663, 2519160, 2521668 ], [ 2521664, 2522690, 2521669, 2522695 ], [ 2522691, 2535156, 2522696, 2535161 ], [ 2535157, 2536302, 2535162, 2536307 ], [ 2536303, 2539683, 2536308, 2539688 ], [ 2539684, 2540838, 2539689, 2540843 ], [ 2540839, 2542542, 2540844, 2542547 ], [ 2542543, 2549711, 2542548, 2549716 ], [ 2549712, 2549979, 2549717, 2549984 ], [ 2549980, 2550376, 2549985, 2550381 ], [ 2550377, 2550442, 2550382, 2550447 ], [ 2550443, 2552498, 2550448, 2552503 ], [ 2552499, 2556237, 2552504, 2556242 ], [ 2556238, 2561281, 2556243, 2561286 ], [ 2561282, 2562381, 2561287, 2562386 ], [ 2562382, 2571576, 2562387, 2571581 ], [ 2571577, 2573918, 2571582, 2573923 ], [ 2573919, 2575854, 2573924, 2575859 ], [ 2575855, 2579045, 2575860, 2579050 ], [ 2579046, 2588728, 2579051, 2588733 ], [ 2588729, 2591930, 2588734, 2591935 ], [ 2591931, 2601304, 2591936, 2601309 ], [ 2601305, 2614812, 2601310, 2614817 ], [ 2614813, 2614839, 2614818, 2614844 ], [ 2614840, 2622328, 2614845, 2622333 ], [ 2622329, 2627903, 2622334, 2627908 ], [ 2627904, 2648431, 2627909, 2648436 ], [ 2648432, 2651846, 2648437, 2651851 ], [ 2651847, 2660586, 2651852, 2660591 ], [ 2660587, 2663434, 2660592, 2663439 ], [ 2663435, 2674481, 2663440, 2674486 ], [ 2674482, 2674949, 2674487, 2674954 ], [ 2674950, 2676096, 2674955, 2676101 ], [ 2676097, 2676139, 2676102, 2676144 ], [ 2676140, 2678485, 2676145, 2678490 ], [ 2678486, 2678602, 2678491, 2678607 ], [ 2678603, 2678826, 2678608, 2678831 ], [ 2678827, 2681278, 2678832, 2681283 ], [ 2681279, 2684926, 2681284, 2684931 ], [ 2684927, 2685205, 2684932, 2685210 ], [ 2685206, 2692493, 2685211, 2692498 ], [ 2692494, 2714956, 2692499, 2714961 ], [ 2714957, 2716135, 2714962, 2716140 ], [ 2716136, 2716930, 2716141, 2716935 ], [ 2716931, 2717350, 2716936, 2717355 ], [ 2717351, 2717774, 2717356, 2717779 ], [ 2717775, 2718590, 2717780, 2718595 ], [ 2718591, 2720379, 2718596, 2720384 ], [ 2720380, 2723960, 2720385, 2723965 ], [ 2723961, 2725658, 2723966, 2725663 ], [ 2725659, 2731980, 2725664, 2731985 ], [ 2731981, 2738172, 2731986, 2738177 ], [ 2738173, 2748433, 2738178, 2748438 ], [ 2748434, 2748596, 2748439, 2748601 ], [ 2748597, 2749729, 2748602, 2749734 ], [ 2749730, 2752944, 2749735, 2752949 ], [ 2752945, 2753953, 2752950, 2753958 ], [ 2753954, 2760599, 2753959, 2760604 ], [ 2760600, 2761236, 2760605, 2761241 ], [ 2761237, 2763346, 2761242, 2763351 ], [ 2763347, 2764218, 2763352, 2764223 ], [ 2764219, 2773348, 2764224, 2773353 ], [ 2773349, 2779707, 2773354, 2779712 ], [ 2779708, 2792111, 2779713, 2792116 ], [ 2792112, 2794418, 2792117, 2794423 ], [ 2794419, 2795818, 2794424, 2795823 ], [ 2795819, 2796261, 2795824, 2796266 ], [ 2796262, 2798929, 2796267, 2798934 ], [ 2798930, 2799454, 2798935, 2799459 ], [ 2799455, 2813416, 2799460, 2813421 ], [ 2813417, 2813479, 2813422, 2813484 ], [ 2813480, 2814107, 2813485, 2814112 ], [ 2814108, 2825357, 2814113, 2825362 ], [ 2825358, 2826820, 2825363, 2826825 ], [ 2826821, 2828096, 2826826, 2828101 ], [ 2828097, 2830088, 2828102, 2830093 ], [ 2830089, 2834601, 2830094, 2834606 ], [ 2834602, 2836621, 2834607, 2836626 ], [ 2836622, 2836884, 2836627, 2836889 ], [ 2836885, 2837913, 2836890, 2837918 ], [ 2837914, 2840393, 2837919, 2840398 ], [ 2840394, 2843391, 2840399, 2843396 ], [ 2843392, 2843629, 2843397, 2843634 ], [ 2843630, 2844665, 2843635, 2844670 ], [ 2844666, 2847821, 2844671, 2847826 ], [ 2847822, 2849601, 2847827, 2849606 ], [ 2849602, 2853189, 2849607, 2853194 ], [ 2853190, 2860428, 2853195, 2860433 ], [ 2860429, 2862152, 2860434, 2862157 ], [ 2862153, 2862729, 2862158, 2862734 ], [ 2862730, 2869033, 2862735, 2869038 ], [ 2869034, 2869157, 2869039, 2869162 ], [ 2869158, 2882082, 2869163, 2882087 ], [ 2882083, 2894091, 2882088, 2894096 ], [ 2894092, 2895090, 2894097, 2895095 ], [ 2895091, 2900119, 2895096, 2900124 ], [ 2900120, 2900555, 2900125, 2900560 ], [ 2900556, 2902167, 2900561, 2902172 ], [ 2902168, 2902210, 2902173, 2902215 ], [ 2902211, 2904556, 2902216, 2904561 ], [ 2904557, 2904673, 2904562, 2904678 ], [ 2904674, 2904897, 2904679, 2904902 ], [ 2904898, 2907797, 2904903, 2907802 ], [ 2907798, 2910456, 2907803, 2910461 ], [ 2910457, 2911608, 2910462, 2911613 ], [ 2911609, 2914744, 2911614, 2914749 ], [ 2914745, 2914779, 2914750, 2914784 ], [ 2914780, 2918124, 2914785, 2918129 ], [ 2918125, 2921020, 2918130, 2921025 ], [ 2921021, 2921458, 2921026, 2921463 ], [ 2921459, 2926580, 2921464, 2926585 ], [ 2926581, 2930570, 2926586, 2930575 ], [ 2930571, 2934873, 2930576, 2934878 ], [ 2934874, 2942883, 2934879, 2942888 ], [ 2942884, 2950823, 2942889, 2950828 ], [ 2950824, 2952204, 2950829, 2952209 ], [ 2952205, 2954108, 2952210, 2954113 ], [ 2954109, 2958788, 2954114, 2958793 ], [ 2958789, 2962338, 2958794, 2962343 ], [ 2962339, 2962634, 2962344, 2962639 ], [ 2962635, 2963567, 2962640, 2963572 ], [ 2963568, 2965512, 2963573, 2965517 ], [ 2965513, 2965715, 2965518, 2965720 ], [ 2965716, 2969537, 2965721, 2969542 ], [ 2969538, 2969667, 2969543, 2969672 ], [ 2969668, 2971097, 2969673, 2971102 ], [ 2971098, 2971329, 2971103, 2971334 ], [ 2971330, 2971874, 2971335, 2971879 ], [ 2971875, 2972098, 2971880, 2972103 ], [ 2972099, 2978342, 2972104, 2978347 ], [ 2978343, 2984060, 2978348, 2984065 ], [ 2984061, 2988924, 2984066, 2988929 ], [ 2988925, 2994739, 2988930, 2994744 ], [ 2994740, 3002088, 2994745, 3002093 ], [ 3002089, 3009887, 3002094, 3009892 ], [ 3009888, 3014827, 3009893, 3014832 ], [ 3014828, 3020885, 3014833, 3020890 ], [ 3020886, 3022261, 3020891, 3022266 ], [ 3022262, 3029543, 3022267, 3029548 ], [ 3029544, 3030265, 3029549, 3030270 ], [ 3030266, 3032363, 3030271, 3032368 ], [ 3032364, 3033161, 3032369, 3033166 ], [ 3033162, 3042175, 3033167, 3042180 ], [ 3042176, 3042389, 3042181, 3042394 ], [ 3042390, 3049663, 3042395, 3049668 ], [ 3049664, 3050210, 3049669, 3050215 ], [ 3050211, 3051389, 3050216, 3051394 ], [ 3051390, 3052128, 3051395, 3052133 ], [ 3052129, 3052883, 3052134, 3052888 ], [ 3052884, 3054679, 3052889, 3054684 ], [ 3054680, 3055955, 3054685, 3055960 ], [ 3055956, 3056024, 3055961, 3056029 ], [ 3056025, 3062859, 3056030, 3062864 ], [ 3062860, 3063276, 3062865, 3063281 ], [ 3063277, 3064101, 3063282, 3064106 ], [ 3064102, 3065575, 3064107, 3065580 ], [ 3065576, 3065710, 3065581, 3065715 ], [ 3065711, 3066590, 3065716, 3066595 ], [ 3066591, 3075292, 3066596, 3075297 ], [ 3075293, 3076853, 3075298, 3076858 ], [ 3076854, 3080566, 3076859, 3080571 ], [ 3080567, 3080707, 3080572, 3080712 ], [ 3080708, 3080747, 3080713, 3080752 ], [ 3080748, 3080916, 3080753, 3080921 ], [ 3080917, 3082200, 3080922, 3082205 ], [ 3082201, 3085986, 3082206, 3085991 ], [ 3085987, 3086111, 3085992, 3086116 ], [ 3086112, 3088509, 3086117, 3088514 ], [ 3088510, 3091108, 3088515, 3091113 ], [ 3091109, 3094298, 3091114, 3094303 ], [ 3094299, 3098254, 3094304, 3098259 ], [ 3098255, 3100958, 3098260, 3100963 ], [ 3100959, 3101111, 3100964, 3101116 ], [ 3101112, 3101710, 3101117, 3101715 ], [ 3101711, 3107070, 3101716, 3107075 ], [ 3107071, 3108550, 3107076, 3108555 ], [ 3108551, 3116752, 3108556, 3116757 ], [ 3116753, 3119656, 3116758, 3119661 ], [ 3119657, 3120436, 3119662, 3120441 ], [ 3120437, 3122936, 3120442, 3122941 ], [ 3122937, 3125746, 3122942, 3125751 ], [ 3125747, 3126385, 3125752, 3126390 ], [ 3126386, 3129525, 3126391, 3129530 ], [ 3129526, 3129570, 3129531, 3129575 ], [ 3129571, 3129752, 3129576, 3129757 ], [ 3129753, 3132349, 3129758, 3132354 ], [ 3132350, 3139904, 3132355, 3139909 ], [ 3139905, 3143832, 3139910, 3143837 ], [ 3143833, 3144573, 3143838, 3144578 ], [ 3144574, 3148252, 3144579, 3148257 ], [ 3148253, 3150053, 3148258, 3150058 ], [ 3150054, 3152136, 3150059, 3152141 ], [ 3152137, 3161578, 3152142, 3161583 ], [ 3161579, 3164826, 3161584, 3164831 ], [ 3164827, 3164943, 3164832, 3164948 ], [ 3164944, 3167563, 3164949, 3167568 ], [ 3167564, 3168258, 3167569, 3168263 ], [ 3168259, 3170295, 3168264, 3170300 ], [ 3170296, 3172533, 3170301, 3172538 ], [ 3172534, 3176792, 3172539, 3176797 ], [ 3176793, 3177637, 3176798, 3177642 ], [ 3177638, 3178306, 3177643, 3178311 ], [ 3178307, 3186857, 3178312, 3186862 ], [ 3186858, 3196386, 3186863, 3196391 ], [ 3196387, 3197840, 3196392, 3197845 ], [ 3197841, 3210828, 3197846, 3210833 ], [ 3210829, 3212953, 3210834, 3212958 ], [ 3212954, 3226292, 3212959, 3226297 ], [ 3226293, 3226762, 3226298, 3226767 ], [ 3226763, 3230485, 3226768, 3230490 ], [ 3230486, 3233743, 3230491, 3233748 ], [ 3233744, 3234567, 3233749, 3234572 ], [ 3234568, 3235194, 3234573, 3235199 ], [ 3235195, 3235499, 3235200, 3235504 ], [ 3235500, 3243889, 3235505, 3243894 ], [ 3243890, 3259251, 3243895, 3259256 ], [ 3259252, 3262432, 3259257, 3262437 ], [ 3262433, 3262672, 3262438, 3262677 ], [ 3262673, 3266686, 3262678, 3266691 ], [ 3266687, 3269046, 3266692, 3269051 ], [ 3269047, 3269620, 3269052, 3269625 ], [ 3269621, 3272036, 3269626, 3272041 ], [ 3272037, 3272516, 3272042, 3272521 ], [ 3272517, 3272796, 3272522, 3272801 ], [ 3272797, 3278978, 3272802, 3278983 ], [ 3278979, 3279467, 3278984, 3279472 ], [ 3279468, 3280283, 3279473, 3280288 ], [ 3280284, 3282531, 3280289, 3282536 ], [ 3282532, 3286305, 3282537, 3286310 ], [ 3286306, 3286606, 3286311, 3286611 ], [ 3286607, 3287302, 3286612, 3287307 ], [ 3287303, 3290776, 3287308, 3290781 ], [ 3290777, 3294305, 3290782, 3294310 ], [ 3294306, 3298010, 3294311, 3298015 ], [ 3298011, 3298799, 3298016, 3298804 ], [ 3298800, 3299744, 3298805, 3299749 ], [ 3299745, 3302402, 3299750, 3302407 ], [ 3302403, 3314750, 3302408, 3314755 ], [ 3314751, 3314777, 3314756, 3314782 ], [ 3314778, 3315543, 3314783, 3315548 ], [ 3315544, 3319495, 3315549, 3319500 ], [ 3319496, 3321744, 3319501, 3321749 ], [ 3321745, 3324685, 3321750, 3324690 ], [ 3324686, 3331241, 3324691, 3331246 ], [ 3331242, 3345624, 3331247, 3345629 ], [ 3345625, 3349729, 3345630, 3349734 ], [ 3349730, 3350579, 3349735, 3350584 ], [ 3350580, 3353994, 3350585, 3353999 ], [ 3353995, 3355238, 3354000, 3355243 ], [ 3355239, 3360120, 3355244, 3360125 ], [ 3360121, 3368828, 3360126, 3368833 ], [ 3368829, 3376335, 3368834, 3376340 ], [ 3376336, 3380722, 3376341, 3380727 ], [ 3380723, 3381309, 3380728, 3381314 ], [ 3381310, 3382080, 3381315, 3382085 ], [ 3382081, 3384674, 3382086, 3384679 ], [ 3384675, 3385188, 3384680, 3385193 ], [ 3385189, 3388713, 3385194, 3388718 ], [ 3388714, 3392079, 3388719, 3392084 ], [ 3392080, 3394131, 3392085, 3394136 ], [ 3394132, 3394381, 3394137, 3394386 ], [ 3394382, 3395362, 3394387, 3395367 ], [ 3395363, 3396615, 3395368, 3396620 ], [ 3396616, 3399453, 3396621, 3399458 ], [ 3399454, 3400585, 3399459, 3400590 ], [ 3400586, 3405494, 3400591, 3405499 ], [ 3405495, 3413040, 3405500, 3413045 ], [ 3413041, 3413955, 3413046, 3413960 ], [ 3413956, 3413965, 3413961, 3413970 ], [ 3413966, 3415450, 3413971, 3415455 ], [ 3415451, 3415543, 3415456, 3415548 ], [ 3415544, 3415693, 3415549, 3415698 ], [ 3415694, 3416866, 3415699, 3416871 ], [ 3416867, 3421885, 3416872, 3421890 ], [ 3421886, 3425142, 3421891, 3425147 ], [ 3425143, 3437815, 3425148, 3437820 ], [ 3437816, 3440231, 3437821, 3440236 ], [ 3440232, 3442385, 3440237, 3442390 ], [ 3442386, 3449766, 3442391, 3449771 ], [ 3449767, 3455799, 3449772, 3455804 ], [ 3455800, 3473047, 3455805, 3473052 ], [ 3473048, 3483375, 3473053, 3483380 ], [ 3483376, 3505500, 3483381, 3505505 ], [ 3505501, 3514082, 3505506, 3514087 ], [ 3514083, 3522092, 3514088, 3522097 ], [ 3522093, 3522111, 3522098, 3522116 ], [ 3522112, 3522767, 3522117, 3522772 ], [ 3522768, 3531627, 3522773, 3531632 ], [ 3531628, 3537780, 3531633, 3537785 ], [ 3537781, 3540644, 3537786, 3540649 ], [ 3540645, 3543017, 3540650, 3543022 ], [ 3543018, 3543842, 3543023, 3543847 ], [ 3543843, 3545546, 3543848, 3545551 ], [ 3545547, 3547077, 3545552, 3547082 ], [ 3547078, 3547149, 3547083, 3547154 ], [ 3547150, 3548533, 3547155, 3548538 ], [ 3548534, 3549829, 3548539, 3549834 ], [ 3549830, 3549935, 3549835, 3549940 ], [ 3549936, 3550203, 3549941, 3550208 ], [ 3550204, 3550227, 3550209, 3550232 ], [ 3550228, 3551480, 3550233, 3551485 ], [ 3551481, 3552210, 3551486, 3552215 ], [ 3552211, 3562847, 3552216, 3562852 ], [ 3562848, 3563068, 3562853, 3563073 ], [ 3563069, 3563973, 3563074, 3563978 ], [ 3563974, 3565130, 3563979, 3565135 ], [ 3565131, 3565565, 3565136, 3565570 ], [ 3565566, 3566132, 3565571, 3566137 ], [ 3566133, 3567018, 3566138, 3567023 ], [ 3567019, 3567627, 3567024, 3567632 ], [ 3567628, 3570838, 3567633, 3570843 ], [ 3570839, 3574124, 3570844, 3574129 ], [ 3574125, 3575098, 3574130, 3575103 ], [ 3575099, 3575586, 3575104, 3575591 ], [ 3575587, 3576602, 3575592, 3576607 ], [ 3576603, 3590383, 3576608, 3590388 ], [ 3590384, 3594697, 3590389, 3594702 ], [ 3594698, 3597307, 3594703, 3597312 ], [ 3597308, 3598837, 3597313, 3598842 ], [ 3598838, 3599839, 3598843, 3599844 ], [ 3599840, 3603578, 3599845, 3603583 ], [ 3603579, 3609322, 3603584, 3609327 ], [ 3609323, 3614549, 3609328, 3614554 ], [ 3614550, 3618664, 3614555, 3618669 ], [ 3618665, 3619899, 3618670, 3619904 ], [ 3619900, 3624824, 3619905, 3624829 ], [ 3624825, 3628159, 3624830, 3628164 ], [ 3628160, 3628423, 3628165, 3628428 ], [ 3628424, 3630285, 3628429, 3630290 ], [ 3630286, 3630486, 3630291, 3630491 ], [ 3630487, 3634237, 3630492, 3634242 ], [ 3634238, 3639209, 3634243, 3639214 ], [ 3639210, 3652188, 3639215, 3652193 ], [ 3652189, 3659027, 3652194, 3659032 ], [ 3659028, 3659079, 3659033, 3659084 ], [ 3659080, 3659207, 3659085, 3659212 ], [ 3659208, 3667507, 3659213, 3667512 ], [ 3667508, 3667741, 3667513, 3667746 ], [ 3667742, 3668429, 3667747, 3668434 ], [ 3668430, 3670470, 3668435, 3670475 ], [ 3670471, 3673148, 3670476, 3673153 ], [ 3673149, 3674083, 3673154, 3674088 ], [ 3674084, 3678214, 3674089, 3678219 ], [ 3678215, 3680677, 3678220, 3680682 ], [ 3680678, 3684205, 3680683, 3684210 ], [ 3684206, 3690753, 3684211, 3690758 ], [ 3690754, 3698350, 3690759, 3698355 ], [ 3698351, 3699287, 3698356, 3699292 ], [ 3699288, 3702298, 3699293, 3702303 ], [ 3702299, 3706224, 3702304, 3706229 ], [ 3706225, 3706948, 3706230, 3706953 ], [ 3706949, 3709050, 3706954, 3709055 ], [ 3709051, 3725109, 3709056, 3725114 ], [ 3725110, 3731514, 3725115, 3731519 ], [ 3731515, 3736239, 3731520, 3736244 ], [ 3736240, 3736361, 3736245, 3736366 ], [ 3736362, 3738698, 3736367, 3738703 ], [ 3738699, 3745350, 3738704, 3745355 ], [ 3745351, 3747516, 3745356, 3747521 ], [ 3747517, 3748248, 3747522, 3748253 ], [ 3748249, 3750382, 3748254, 3750387 ], [ 3750383, 3761090, 3750388, 3761095 ], [ 3761091, 3762084, 3761096, 3762089 ], [ 3762085, 3762616, 3762090, 3762621 ], [ 3762617, 3769672, 3762622, 3769677 ], [ 3769673, 3769809, 3769678, 3769814 ], [ 3769810, 3769962, 3769815, 3769967 ], [ 3769963, 3774893, 3769968, 3774898 ], [ 3774894, 3776322, 3774899, 3776327 ], [ 3776323, 3778089, 3776328, 3778094 ], [ 3778090, 3784214, 3778095, 3784219 ], [ 3784215, 3799015, 3784220, 3799020 ], [ 3799016, 3806065, 3799021, 3806070 ], [ 3806066, 3807768, 3806071, 3807773 ], [ 3807769, 3816682, 3807774, 3816687 ], [ 3816683, 3817044, 3816688, 3817049 ], [ 3817045, 3817768, 3817050, 3817773 ], [ 3817769, 3822167, 3817774, 3822172 ], [ 3822168, 3824032, 3822173, 3824037 ], [ 3824033, 3828542, 3824038, 3828547 ], [ 3828543, 3832118, 3828548, 3832123 ], [ 3832119, 3832453, 3832124, 3832458 ], [ 3832454, 3837573, 3832459, 3837578 ], [ 3837574, 3842658, 3837579, 3842663 ], [ 3842659, 3844599, 3842664, 3844604 ], [ 3844600, 3844788, 3844605, 3844793 ], [ 3844789, 3850826, 3844794, 3850831 ], [ 3850827, 3855395, 3850832, 3855400 ], [ 3855396, 3860530, 3855401, 3860535 ], [ 3860531, 3873056, 3860536, 3873061 ], [ 3873057, 3880197, 3873062, 3880202 ], [ 3880198, 3882820, 3880203, 3882825 ], [ 3882821, 3883310, 3882826, 3883315 ], [ 3883311, 3885744, 3883316, 3885749 ], [ 3885745, 3891218, 3885750, 3891223 ], [ 3891219, 3891354, 3891224, 3891359 ], [ 3891355, 3894419, 3891360, 3894424 ], [ 3894420, 3900500, 3894425, 3900505 ], [ 3900501, 3907313, 3900506, 3907318 ], [ 3907314, 3908919, 3907319, 3908924 ], [ 3908920, 3909807, 3908925, 3909812 ], [ 3909808, 3909977, 3909813, 3909982 ], [ 3909978, 3930585, 3909983, 3930590 ], [ 3930586, 3933363, 3930591, 3933368 ], [ 3933364, 3935176, 3933369, 3935181 ], [ 3935177, 3936871, 3935182, 3936876 ], [ 3936872, 3945198, 3936877, 3945203 ], [ 3945199, 3946390, 3945204, 3946395 ], [ 3946391, 3946986, 3946396, 3946991 ], [ 3946987, 3956408, 3946992, 3956413 ], [ 3956409, 3958333, 3956414, 3958338 ], [ 3958334, 3959031, 3958339, 3959036 ], [ 3959032, 3960932, 3959037, 3960937 ], [ 3960933, 3964190, 3960938, 3964195 ], [ 3964191, 3969413, 3964196, 3969418 ], [ 3969414, 3972146, 3969419, 3972151 ], [ 3972147, 3972344, 3972152, 3972349 ], [ 3972345, 3978065, 3972350, 3978070 ], [ 3978066, 3981977, 3978071, 3981982 ], [ 3981978, 3984768, 3981983, 3984773 ], [ 3984769, 3984918, 3984774, 3984923 ], [ 3984919, 3985704, 3984924, 3985709 ], [ 3985705, 3995454, 3985710, 3995459 ], [ 3995455, 3997410, 3995460, 3997415 ], [ 3997411, 4000982, 3997416, 4000987 ], [ 4000983, 4002514, 4000988, 4002519 ], [ 4002515, 4005689, 4002520, 4005694 ], [ 4005690, 4016078, 4005695, 4016083 ], [ 4016079, 4017237, 4016084, 4017242 ], [ 4017238, 4018412, 4017243, 4018417 ], [ 4018413, 4018717, 4018418, 4018722 ], [ 4018718, 4023281, 4018723, 4023286 ], [ 4023282, 4032076, 4023287, 4032081 ], [ 4032077, 4041164, 4032082, 4041169 ], [ 4041165, 4041295, 4041170, 4041300 ], [ 4041296, 4041920, 4041301, 4041925 ], [ 4041921, 4046747, 4041926, 4046752 ], [ 4046748, 4052606, 4046753, 4052611 ], [ 4052607, 4054469, 4052612, 4054474 ], [ 4054470, 4054629, 4054475, 4054634 ], [ 4054630, 4054968, 4054635, 4054973 ], [ 4054969, 4055333, 4054974, 4055338 ], [ 4055334, 4056058, 4055339, 4056063 ], [ 4056059, 4059084, 4056064, 4059089 ], [ 4059085, 4062508, 4059090, 4062513 ], [ 4062509, 4065383, 4062514, 4065388 ], [ 4065384, 4065643, 4065389, 4065648 ], [ 4065644, 4069949, 4065649, 4069954 ], [ 4069950, 4078343, 4069955, 4078348 ], [ 4078344, 4083896, 4078349, 4083901 ], [ 4083897, 4085884, 4083902, 4085889 ], [ 4085885, 4090152, 4085890, 4090157 ], [ 4090153, 4093908, 4090158, 4093913 ], [ 4093909, 4094118, 4093914, 4094123 ], [ 4094119, 4095249, 4094124, 4095254 ], [ 4095250, 4097811, 4095255, 4097816 ], [ 4097812, 4101896, 4097817, 4101901 ], [ 4101897, 4107088, 4101902, 4107093 ], [ 4107089, 4107551, 4107094, 4107556 ], [ 4107552, 4107580, 4107557, 4107585 ], [ 4107581, 4109180, 4107586, 4109185 ], [ 4109181, 4110537, 4109186, 4110542 ], [ 4110538, 4116491, 4110543, 4116496 ], [ 4116492, 4117909, 4116497, 4117914 ], [ 4117910, 4118469, 4117915, 4118474 ], [ 4118470, 4123661, 4118475, 4123666 ], [ 4123662, 4123865, 4123667, 4123870 ], [ 4123866, 4125127, 4123871, 4125132 ], [ 4125128, 4129187, 4125133, 4129192 ], [ 4129188, 4132972, 4129193, 4132977 ], [ 4132973, 4134272, 4132978, 4134277 ], [ 4134273, 4135470, 4134278, 4135475 ], [ 4135471, 4136837, 4135476, 4136842 ], [ 4136838, 4146292, 4136843, 4146297 ], [ 4146293, 4146443, 4146298, 4146448 ], [ 4146444, 4148935, 4146449, 4148940 ], [ 4148936, 4162265, 4148941, 4162270 ], [ 4162266, 4164781, 4162271, 4164786 ], [ 4164782, 4170898, 4164787, 4170903 ], [ 4170899, 4175870, 4170904, 4175875 ], [ 4175871, 4175960, 4175876, 4175965 ], [ 4175961, 4180869, 4175966, 4180874 ], [ 4180870, 4181479, 4180875, 4181484 ], [ 4181480, 4187785, 4181485, 4187790 ], [ 4187786, 4192423, 4187791, 4192428 ], [ 4192424, 4192625, 4192429, 4192630 ], [ 4192626, 4194747, 4192631, 4194752 ], [ 4194748, 4195600, 4194753, 4195605 ], [ 4195601, 4195842, 4195606, 4195847 ], [ 4195843, 4197634, 4195848, 4197639 ], [ 4197635, 4198036, 4197640, 4198041 ], [ 4198037, 4205660, 4198042, 4205665 ], [ 4205661, 4213510, 4205666, 4213515 ], [ 4213511, 4217238, 4213516, 4217243 ], [ 4217239, 4221217, 4217244, 4221222 ], [ 4221218, 4225159, 4221223, 4225164 ], [ 4225160, 4226183, 4225165, 4226188 ], [ 4226184, 4228901, 4226189, 4228906 ], [ 4228902, 4229774, 4228907, 4229779 ], [ 4229775, 4230559, 4229780, 4230564 ], [ 4230560, 4235572, 4230565, 4235577 ], [ 4235573, 4242029, 4235578, 4242034 ], [ 4242030, 4242765, 4242035, 4242770 ], [ 4242766, 4244054, 4242771, 4244059 ], [ 4244055, 4245334, 4244060, 4245339 ], [ 4245335, 4249019, 4245340, 4249024 ], [ 4249020, 4249100, 4249025, 4249105 ], [ 4249101, 4256705, 4249106, 4256710 ], [ 4256706, 4259509, 4256711, 4259514 ], [ 4259510, 4261410, 4259515, 4261415 ], [ 4261411, 4270525, 4261416, 4270530 ], [ 4270526, 4274109, 4270531, 4274114 ], [ 4274110, 4281849, 4274115, 4281854 ], [ 4281850, 4284762, 4281855, 4284767 ], [ 4284763, 4303042, 4284768, 4303047 ], [ 4303043, 4303258, 4303048, 4303263 ], [ 4303259, 4306632, 4303264, 4306637 ], [ 4306633, 4312216, 4306638, 4312221 ], [ 4312217, 4314890, 4312222, 4314895 ], [ 4314891, 4316460, 4314896, 4316465 ], [ 4316461, 4316626, 4316466, 4316631 ], [ 4316627, 4318092, 4316632, 4318097 ], [ 4318093, 4318604, 4318098, 4318609 ], [ 4318605, 4318772, 4318610, 4318777 ], [ 4318773, 4321424, 4318778, 4321429 ], [ 4321425, 4321489, 4321430, 4321494 ], [ 4321490, 4327535, 4321495, 4327540 ], [ 4327536, 4329548, 4327541, 4329553 ], [ 4329549, 4331509, 4329554, 4331514 ], [ 4331510, 4332147, 4331515, 4332152 ], [ 4332148, 4334445, 4332153, 4334450 ], [ 4334446, 4338820, 4334451, 4338825 ], [ 4338821, 4339739, 4338826, 4339744 ], [ 4339740, 4343682, 4339745, 4343687 ], [ 4343683, 4348264, 4343688, 4348269 ], [ 4348265, 4352206, 4348270, 4352211 ], [ 4352207, 4356621, 4352212, 4356626 ], [ 4356622, 4366184, 4356627, 4366189 ], [ 4366185, 4374134, 4366190, 4374139 ], [ 4374135, 4374749, 4374140, 4374754 ], [ 4374750, 4388049, 4374755, 4388054 ], [ 4388050, 4388548, 4388055, 4388553 ], [ 4388549, 4395214, 4388554, 4395219 ], [ 4395215, 4397725, 4395220, 4397730 ], [ 4397726, 4401917, 4397731, 4401922 ], [ 4401918, 4401962, 4401923, 4401967 ], [ 4401963, 4404470, 4401968, 4404475 ], [ 4404471, 4404603, 4404476, 4404608 ], [ 4404604, 4404677, 4404609, 4404682 ], [ 4404678, 4407845, 4404683, 4407850 ], [ 4407846, 4415693, 4407851, 4415698 ], [ 4415694, 4415957, 4415699, 4415962 ], [ 4415958, 4426334, 4415963, 4426339 ], [ 4426335, 4427143, 4426340, 4427148 ], [ 4427144, 4432989, 4427149, 4432994 ], [ 4432990, 4433640, 4432995, 4433645 ], [ 4433641, 4435236, 4433646, 4435241 ], [ 4435237, 4450981, 4435242, 4450986 ], [ 4450982, 4452121, 4450987, 4452126 ], [ 4452122, 4454417, 4452127, 4454422 ], [ 4454418, 4455164, 4454423, 4455169 ], [ 4455165, 4459226, 4455170, 4459231 ], [ 4459227, 4462863, 4459232, 4462868 ], [ 4462864, 4469714, 4462869, 4469719 ], [ 4469715, 4471008, 4469720, 4471013 ], [ 4471009, 4473114, 4471014, 4473119 ], [ 4473115, 4477852, 4473120, 4477857 ], [ 4477853, 4477874, 4477858, 4477879 ], [ 4477875, 4482921, 4477880, 4482926 ], [ 4482922, 4489809, 4482927, 4489814 ], [ 4489810, 4490912, 4489815, 4490917 ], [ 4490913, 4491974, 4490918, 4491979 ], [ 4491975, 4492157, 4491980, 4492162 ], [ 4492158, 4493614, 4492163, 4493619 ], [ 4493615, 4496829, 4493620, 4496834 ], [ 4496830, 4497697, 4496835, 4497702 ], [ 4497698, 4499157, 4497703, 4499162 ], [ 4499158, 4502248, 4499163, 4502253 ], [ 4502249, 4504493, 4502254, 4504498 ], [ 4504494, 4505336, 4504499, 4505341 ], [ 4505337, 4506680, 4505342, 4506685 ], [ 4506681, 4506961, 4506686, 4506966 ], [ 4506962, 4507601, 4506967, 4507606 ], [ 4507602, 4513351, 4507607, 4513356 ], [ 4513352, 4516356, 4513357, 4516361 ], [ 4516357, 4520650, 4516362, 4520655 ], [ 4520651, 4528820, 4520656, 4528825 ], [ 4528821, 4535971, 4528826, 4535976 ], [ 4535972, 4540172, 4535977, 4540177 ], [ 4540173, 4551230, 4540178, 4551235 ], [ 4551231, 4552997, 4551236, 4553002 ], [ 4552998, 4555491, 4553003, 4555496 ], [ 4555492, 4558033, 4555497, 4558038 ], [ 4558034, 4562123, 4558039, 4562128 ], [ 4562124, 4563100, 4562129, 4563105 ], [ 4563101, 4564639, 4563106, 4564644 ], [ 4564640, 4566006, 4564645, 4566011 ], [ 4566007, 4575457, 4566012, 4575462 ], [ 4575458, 4575809, 4575463, 4575814 ], [ 4575810, 4576253, 4575815, 4576258 ], [ 4576254, 4579647, 4576259, 4579652 ], [ 4579648, 4588823, 4579653, 4588828 ], [ 4588824, 4589254, 4588829, 4589259 ], [ 4589255, 4598670, 4589260, 4598675 ], [ 4598671, 4601105, 4598676, 4601110 ], [ 4601106, 4602566, 4601111, 4602571 ], [ 4602567, 4612068, 4602572, 4612073 ], [ 4612069, 4615603, 4612074, 4615608 ], [ 4615604, 4627887, 4615609, 4627892 ], [ 4627888, 4631394, 4627893, 4631399 ], [ 4631395, 4631631, 4631400, 4631636 ], [ 4631632, 4635963, 4631637, 4635968 ], [ 4635964, 4641129, 4635969, 4641134 ], [ 4641130, 4642980, 4641135, 4642985 ], [ 4642981, 4643635, 4642986, 4643640 ], [ 4643636, 4644147, 4643641, 4644152 ], [ 4644148, 4649332, 4644153, 4649337 ], [ 4649333, 4649464, 4649338, 4649469 ], [ 4649465, 4656366, 4649470, 4656371 ], [ 4656367, 4656864, 4656372, 4656869 ], [ 4656865, 4656933, 4656870, 4656938 ], [ 4656934, 4660056, 4656939, 4660061 ], [ 4660057, 4665881, 4660062, 4665886 ], [ 4665882, 4668837, 4665887, 4668842 ], [ 4668838, 4672873, 4668843, 4672878 ], [ 4672874, 4681462, 4672879, 4681467 ], [ 4681463, 4696368, 4681468, 4696373 ], [ 4696369, 4699474, 4696374, 4699479 ], [ 4699475, 4704523, 4699480, 4704528 ], [ 4704524, 4706008, 4704529, 4706013 ], [ 4706009, 4706510, 4706014, 4706515 ], [ 4706511, 4711295, 4706516, 4711300 ], [ 4711296, 4711543, 4711301, 4711548 ], [ 4711544, 4711935, 4711549, 4711940 ], [ 4711936, 4712790, 4711941, 4712795 ], [ 4712791, 4713126, 4712796, 4713131 ], [ 4713127, 4713730, 4713132, 4713735 ], [ 4713731, 4717619, 4713736, 4717624 ], [ 4717620, 4724224, 4717625, 4724229 ], [ 4724225, 4725868, 4724230, 4725873 ], [ 4725869, 4727653, 4725874, 4727658 ], [ 4727654, 4729069, 4727659, 4729074 ], [ 4729070, 4730833, 4729075, 4730838 ], [ 4730834, 4733099, 4730839, 4733104 ], [ 4733100, 4733576, 4733105, 4733581 ], [ 4733577, 4736754, 4733582, 4736759 ], [ 4736755, 4741684, 4736760, 4741689 ], [ 4741685, 4744830, 4741690, 4744835 ], [ 4744831, 4746768, 4744836, 4746773 ], [ 4746769, 4749037, 4746774, 4749042 ], [ 4749038, 4749801, 4749043, 4749806 ], [ 4749802, 4749864, 4749807, 4749869 ], [ 4749865, 4750966, 4749870, 4750971 ], [ 4750967, 4752965, 4750972, 4752970 ], [ 4752966, 4754237, 4752971, 4754242 ], [ 4754238, 4757191, 4754243, 4757196 ], [ 4757192, 4762052, 4757197, 4762057 ], [ 4762053, 4764164, 4762058, 4764169 ], [ 4764165, 4766341, 4764170, 4766346 ], [ 4766342, 4767519, 4766347, 4767524 ], [ 4767520, 4769451, 4767525, 4769456 ], [ 4769452, 4770366, 4769457, 4770371 ], [ 4770367, 4774504, 4770372, 4774509 ], [ 4774505, 4779310, 4774510, 4779315 ], [ 4779311, 4784713, 4779316, 4784718 ], [ 4784714, 4784960, 4784719, 4784965 ], [ 4784961, 4789181, 4784966, 4789186 ], [ 4789182, 4792894, 4789187, 4792899 ], [ 4792895, 4804321, 4792900, 4804326 ], [ 4804322, 4807780, 4804327, 4807785 ], [ 4807781, 4808367, 4807786, 4808372 ], [ 4808368, 4811025, 4808373, 4811030 ], [ 4811026, 4813062, 4811031, 4813067 ], [ 4813063, 4822160, 4813068, 4822165 ], [ 4822161, 4833156, 4822166, 4833161 ], [ 4833157, 4839621, 4833162, 4839626 ], [ 4839622, 4853316, 4839627, 4853321 ], [ 4853317, 4862268, 4853322, 4862273 ], [ 4862269, 4862689, 4862274, 4862694 ], [ 4862690, 4863453, 4862695, 4863458 ], [ 4863454, 4863657, 4863459, 4863662 ], [ 4863658, 4867215, 4863663, 4867220 ], [ 4867216, 4867943, 4867221, 4867948 ], [ 4867944, 4870367, 4867949, 4870372 ], [ 4870368, 4871260, 4870373, 4871265 ], [ 4871261, 4871925, 4871266, 4871930 ], [ 4871926, 4872824, 4871931, 4872829 ], [ 4872825, 4879935, 4872830, 4879940 ], [ 4879936, 4881593, 4879941, 4881598 ], [ 4881594, 4882087, 4881599, 4882092 ], [ 4882088, 4889351, 4882093, 4889356 ], [ 4889352, 4890443, 4889357, 4890448 ], [ 4890444, 4898485, 4890449, 4898490 ], [ 4898486, 4901057, 4898491, 4901062 ], [ 4901058, 4904245, 4901063, 4904250 ], [ 4904246, 4904668, 4904251, 4904673 ], [ 4904669, 4904984, 4904674, 4904989 ], [ 4904985, 4914224, 4904990, 4914229 ], [ 4914225, 4916537, 4914230, 4916542 ], [ 4916538, 4919908, 4916543, 4919913 ], [ 4919909, 4926663, 4919914, 4926668 ], [ 4926664, 4929329, 4926669, 4929334 ], [ 4929330, 4930673, 4929335, 4930678 ], [ 4930674, 4930954, 4930679, 4930959 ], [ 4930955, 4931594, 4930960, 4931599 ], [ 4931595, 4937282, 4931600, 4937287 ], [ 4937283, 4939667, 4937288, 4939672 ], [ 4939668, 4941837, 4939673, 4941842 ], [ 4941838, 4947030, 4941843, 4947035 ], [ 4947031, 4951071, 4947036, 4951076 ], [ 4951072, 4953999, 4951077, 4954004 ], [ 4954000, 4955481, 4954005, 4955486 ], [ 4955482, 4959224, 4955487, 4959229 ], [ 4959225, 4974624, 4959230, 4974629 ], [ 4974625, 4977429, 4974630, 4977434 ], [ 4977430, 4984448, 4977435, 4984453 ], [ 4984449, 4986670, 4984454, 4986675 ], [ 4986671, 4992038, 4986676, 4992043 ], [ 4992039, 4993811, 4992044, 4993816 ], [ 4993812, 4995631, 4993817, 4995636 ], [ 4995632, 4996624, 4995637, 4996629 ], [ 4996625, 4996668, 4996630, 4996673 ], [ 4996669, 4998818, 4996674, 4998823 ], [ 4998819, 5004186, 4998824, 5004191 ], [ 5004187, 5013598, 5004192, 5013603 ], [ 5013599, 5016180, 5013604, 5016185 ], [ 5016181, 5018455, 5016186, 5018460 ], [ 5018456, 5026770, 5018461, 5026775 ], [ 5026771, 5028841, 5026776, 5028846 ], [ 5028842, 5031862, 5028847, 5031867 ], [ 5031863, 5036331, 5031868, 5036336 ], [ 5036332, 5037861, 5036337, 5037866 ], [ 5037862, 5038887, 5037867, 5038892 ], [ 5038888, 5040440, 5038893, 5040445 ], [ 5040441, 5042902, 5040446, 5042907 ], [ 5042903, 5044827, 5042908, 5044832 ], [ 5044828, 5050524, 5044833, 5050529 ], [ 5050525, 5053866, 5050530, 5053871 ], [ 5053867, 5054707, 5053872, 5054712 ], [ 5054708, 5055021, 5054713, 5055026 ], [ 5055022, 5057873, 5055027, 5057878 ], [ 5057874, 5059734, 5057879, 5059739 ], [ 5059735, 5061548, 5059740, 5061553 ], [ 5061549, 5063342, 5061554, 5063347 ], [ 5063343, 5064119, 5063348, 5064124 ], [ 5064120, 5064638, 5064125, 5064643 ], [ 5064639, 5068774, 5064644, 5068779 ], [ 5068775, 5069157, 5068780, 5069162 ], [ 5069158, 5069375, 5069163, 5069380 ], [ 5069376, 5071533, 5069381, 5071538 ], [ 5071534, 5072259, 5071539, 5072264 ], [ 5072260, 5072332, 5072265, 5072337 ], [ 5072333, 5074288, 5072338, 5074293 ], [ 5074289, 5087508, 5074294, 5087513 ], [ 5087509, 5088409, 5087514, 5088414 ], [ 5088410, 5093963, 5088415, 5093968 ], [ 5093964, 5098261, 5093969, 5098266 ], [ 5098262, 5116037, 5098267, 5116042 ], [ 5116038, 5116647, 5116043, 5116652 ], [ 5116648, 5119282, 5116653, 5119287 ], [ 5119283, 5132940, 5119288, 5132945 ], [ 5132941, 5133405, 5132946, 5133410 ], [ 5133406, 5134558, 5133411, 5134563 ], [ 5134559, 5138432, 5134564, 5138437 ], [ 5138433, 5138944, 5138438, 5138949 ], [ 5138945, 5139157, 5138950, 5139162 ], [ 5139158, 5139587, 5139163, 5139592 ], [ 5139588, 5142617, 5139593, 5142622 ], [ 5142618, 5148183, 5142623, 5148188 ], [ 5148184, 5148672, 5148189, 5148677 ], [ 5148673, 5150053, 5148678, 5150058 ], [ 5150054, 5151087, 5150059, 5151092 ], [ 5151088, 5153217, 5151093, 5153222 ], [ 5153218, 5154383, 5153223, 5154388 ], [ 5154384, 5155016, 5154389, 5155021 ], [ 5155017, 5156599, 5155022, 5156604 ], [ 5156600, 5157802, 5156605, 5157807 ], [ 5157803, 5157970, 5157808, 5157975 ], [ 5157971, 5160625, 5157976, 5160630 ], [ 5160626, 5162852, 5160631, 5162857 ], [ 5162853, 5164824, 5162858, 5164829 ], [ 5164825, 5171077, 5164830, 5171082 ], [ 5171078, 5180591, 5171083, 5180596 ], [ 5180592, 5188235, 5180597, 5188240 ], [ 5188236, 5194013, 5188241, 5194018 ], [ 5194014, 5200363, 5194019, 5200368 ], [ 5200364, 5200380, 5200369, 5200385 ], [ 5200381, 5212416, 5200386, 5212421 ], [ 5212417, 5214189, 5212422, 5214194 ], [ 5214190, 5218448, 5214195, 5218453 ], [ 5218449, 5221514, 5218454, 5221519 ], [ 5221515, 5222405, 5221520, 5222410 ], [ 5222406, 5223121, 5222411, 5223126 ], [ 5223122, 5225062, 5223127, 5225067 ], [ 5225063, 5227034, 5225068, 5227039 ], [ 5227035, 5237062, 5227040, 5237067 ], [ 5237063, 5238549, 5237068, 5238554 ], [ 5238550, 5239941, 5238555, 5239946 ], [ 5239942, 5241160, 5239947, 5241165 ], [ 5241161, 5245009, 5241166, 5245014 ], [ 5245010, 5245420, 5245015, 5245425 ], [ 5245421, 5246665, 5245426, 5246670 ], [ 5246666, 5246882, 5246671, 5246887 ], [ 5246883, 5252321, 5246888, 5252326 ], [ 5252322, 5261182, 5252327, 5261187 ], [ 5261183, 5273111, 5261188, 5273116 ], [ 5273112, 5273132, 5273117, 5273137 ], [ 5273133, 5273680, 5273138, 5273685 ], [ 5273681, 5274282, 5273686, 5274287 ], [ 5274283, 5277485, 5274288, 5277490 ], [ 5277486, 5278602, 5277491, 5278607 ], [ 5278603, 5286668, 5278608, 5286673 ], [ 5286669, 5288844, 5286674, 5288849 ], [ 5288845, 5295426, 5288850, 5295431 ], [ 5295427, 5299331, 5295432, 5299336 ], [ 5299332, 5299740, 5299337, 5299745 ], [ 5299741, 5302074, 5299746, 5302079 ], [ 5302075, 5304927, 5302080, 5304932 ], [ 5304928, 5316196, 5304933, 5316201 ], [ 5316197, 5321884, 5316202, 5321889 ], [ 5321885, 5327443, 5321890, 5327448 ], [ 5327444, 5333009, 5327449, 5333014 ], [ 5333010, 5335558, 5333015, 5335563 ], [ 5335559, 5337934, 5335564, 5337939 ], [ 5337935, 5340296, 5337940, 5340301 ], [ 5340297, 5342510, 5340302, 5342515 ], [ 5342511, 5343834, 5342516, 5343839 ], [ 5343835, 5359283, 5343840, 5359288 ], [ 5359284, 5362544, 5359289, 5362549 ], [ 5362545, 5375729, 5362550, 5375734 ], [ 5375730, 5375790, 5375735, 5375795 ], [ 5375791, 5377253, 5375796, 5377258 ], [ 5377254, 5378696, 5377259, 5378701 ], [ 5378697, 5382060, 5378702, 5382065 ], [ 5382061, 5388224, 5382066, 5388229 ], [ 5388225, 5391633, 5388230, 5391638 ], [ 5391634, 5401417, 5391639, 5401422 ], [ 5401418, 5406537, 5401423, 5406542 ], [ 5406538, 5408637, 5406543, 5408642 ], [ 5408638, 5417270, 5408643, 5417275 ], [ 5417271, 5419353, 5417276, 5419358 ], [ 5419354, 5420144, 5419359, 5420149 ], [ 5420145, 5420216, 5420150, 5420221 ], [ 5420217, 5420740, 5420222, 5420745 ], [ 5420741, 5427477, 5420746, 5427482 ], [ 5427478, 5429323, 5427483, 5429328 ], [ 5429324, 5441634, 5429329, 5441639 ], [ 5441635, 5448663, 5441640, 5448668 ], [ 5448664, 5452231, 5448669, 5452236 ], [ 5452232, 5458274, 5452237, 5458279 ], [ 5458275, 5459524, 5458280, 5459529 ], [ 5459525, 5468509, 5459530, 5468514 ], [ 5468510, 5469773, 5468515, 5469778 ], [ 5469774, 5475379, 5469779, 5475384 ], [ 5475380, 5476063, 5475385, 5476068 ], [ 5476064, 5477860, 5476069, 5477865 ], [ 5477861, 5478124, 5477866, 5478129 ], [ 5478125, 5478577, 5478130, 5478582 ], [ 5478578, 5479176, 5478583, 5479181 ], [ 5479177, 5483012, 5479182, 5483017 ], [ 5483013, 5483809, 5483018, 5483814 ], [ 5483810, 5495234, 5483815, 5495239 ], [ 5495235, 5498449, 5495240, 5498449 ] ] def _cut(seq) cuts = Bio::RestrictionEnzyme::Analysis.cut(seq, "BstEII", {:view_ranges => true}) end def test_BstEII_edge_cases (13481..13492).each do |len| _test_by_size(len) end end end # class TestEcoliO157H7_BstEII class TestEcoliO157H7_3enzymes < Test::Unit::TestCase include HelperMethods TestLabel = 'SacI+EcoRI+BstEII' SampleSequence = EcoliO157H7Seq SampleCutRanges = The3Enzymes_WHOLE = [ [ 0, 79, 0, 84 ], [ 80, 3858, 85, 3862 ], [ 3859, 4612, 3863, 4617 ], [ 4613, 5619, 4618, 5623 ], [ 5620, 7472, 5624, 7468 ], [ 7473, 12905, 7469, 12909 ], [ 12906, 13483, 12910, 13488 ], [ 13484, 14551, 13489, 14547 ], [ 14552, 15984, 14548, 15989 ], [ 15985, 20045, 15990, 20049 ], [ 20046, 21462, 20050, 21467 ], [ 21463, 27326, 21468, 27331 ], [ 27327, 30943, 27332, 30948 ], [ 30944, 34888, 30949, 34893 ], [ 34889, 35077, 34894, 35082 ], [ 35078, 35310, 35083, 35315 ], [ 35311, 36254, 35316, 36259 ], [ 36255, 36648, 36260, 36652 ], [ 36649, 36918, 36653, 36922 ], [ 36919, 41885, 36923, 41890 ], [ 41886, 43070, 41891, 43075 ], [ 43071, 45689, 43076, 45694 ], [ 45690, 48588, 45695, 48584 ], [ 48589, 52325, 48585, 52330 ], [ 52326, 54650, 52331, 54654 ], [ 54651, 54728, 54655, 54732 ], [ 54729, 55703, 54733, 55708 ], [ 55704, 58828, 55709, 58833 ], [ 58829, 59178, 58834, 59183 ], [ 59179, 59800, 59184, 59796 ], [ 59801, 61256, 59797, 61260 ], [ 61257, 72610, 61261, 72615 ], [ 72611, 72739, 72616, 72744 ], [ 72740, 73099, 72745, 73104 ], [ 73100, 75123, 73105, 75128 ], [ 75124, 77366, 75129, 77371 ], [ 77367, 77810, 77372, 77815 ], [ 77811, 78740, 77816, 78745 ], [ 78741, 79717, 78746, 79722 ], [ 79718, 82250, 79723, 82255 ], [ 82251, 84604, 82256, 84609 ], [ 84605, 95491, 84610, 95496 ], [ 95492, 95785, 95497, 95790 ], [ 95786, 95794, 95791, 95799 ], [ 95795, 96335, 95800, 96340 ], [ 96336, 96489, 96341, 96493 ], [ 96490, 101464, 96494, 101468 ], [ 101465, 102044, 101469, 102049 ], [ 102045, 102541, 102050, 102546 ], [ 102542, 103192, 102547, 103197 ], [ 103193, 103397, 103198, 103393 ], [ 103398, 104722, 103394, 104727 ], [ 104723, 106365, 104728, 106369 ], [ 106366, 106896, 106370, 106900 ], [ 106897, 107735, 106901, 107739 ], [ 107736, 110020, 107740, 110024 ], [ 110021, 110883, 110025, 110888 ], [ 110884, 112524, 110889, 112528 ], [ 112525, 113324, 112529, 113328 ], [ 113325, 115867, 113329, 115871 ], [ 115868, 117723, 115872, 117727 ], [ 117724, 118742, 117728, 118738 ], [ 118743, 120090, 118739, 120095 ], [ 120091, 120657, 120096, 120662 ], [ 120658, 128060, 120663, 128064 ], [ 128061, 128308, 128065, 128313 ], [ 128309, 136112, 128314, 136116 ], [ 136113, 138305, 136117, 138310 ], [ 138306, 138996, 138311, 139000 ], [ 138997, 139146, 139001, 139142 ], [ 139147, 141147, 139143, 141152 ], [ 141148, 143724, 141153, 143729 ], [ 143725, 143838, 143730, 143843 ], [ 143839, 144303, 143844, 144308 ], [ 144304, 148199, 144309, 148204 ], [ 148200, 149577, 148205, 149582 ], [ 149578, 149731, 149583, 149736 ], [ 149732, 152137, 149737, 152141 ], [ 152138, 156115, 152142, 156120 ], [ 156116, 161126, 156121, 161131 ], [ 161127, 162856, 161132, 162861 ], [ 162857, 168965, 162862, 168961 ], [ 168966, 170693, 168962, 170698 ], [ 170694, 170944, 170699, 170949 ], [ 170945, 171201, 170950, 171206 ], [ 171202, 173241, 171207, 173246 ], [ 173242, 177283, 173247, 177288 ], [ 177284, 178048, 177289, 178052 ], [ 178049, 178177, 178053, 178182 ], [ 178178, 178781, 178183, 178786 ], [ 178782, 181610, 178787, 181615 ], [ 181611, 181706, 181616, 181711 ], [ 181707, 185355, 181712, 185351 ], [ 185356, 185661, 185352, 185666 ], [ 185662, 193407, 185667, 193412 ], [ 193408, 194141, 193413, 194145 ], [ 194142, 194876, 194146, 194880 ], [ 194877, 195511, 194881, 195516 ], [ 195512, 195754, 195517, 195759 ], [ 195755, 197005, 195760, 197009 ], [ 197006, 197247, 197010, 197252 ], [ 197248, 200659, 197253, 200664 ], [ 200660, 201820, 200665, 201825 ], [ 201821, 202300, 201826, 202305 ], [ 202301, 202686, 202306, 202691 ], [ 202687, 206289, 202692, 206294 ], [ 206290, 206466, 206295, 206471 ], [ 206467, 207011, 206472, 207016 ], [ 207012, 208159, 207017, 208164 ], [ 208160, 209976, 208165, 209981 ], [ 209977, 210078, 209982, 210083 ], [ 210079, 211485, 210084, 211490 ], [ 211486, 212377, 211491, 212382 ], [ 212378, 213569, 212383, 213574 ], [ 213570, 214316, 213575, 214312 ], [ 214317, 216005, 214313, 216010 ], [ 216006, 217226, 216011, 217222 ], [ 217227, 220098, 217223, 220103 ], [ 220099, 221476, 220104, 221480 ], [ 221477, 221641, 221481, 221645 ], [ 221642, 224063, 221646, 224068 ], [ 224064, 227774, 224069, 227778 ], [ 227775, 228604, 227779, 228609 ], [ 228605, 229453, 228610, 229449 ], [ 229454, 229931, 229450, 229935 ], [ 229932, 232247, 229936, 232251 ], [ 232248, 235221, 232252, 235225 ], [ 235222, 237291, 235226, 237295 ], [ 237292, 239035, 237296, 239039 ], [ 239036, 239993, 239040, 239998 ], [ 239994, 240624, 239999, 240628 ], [ 240625, 240887, 240629, 240891 ], [ 240888, 242089, 240892, 242093 ], [ 242090, 243880, 242094, 243884 ], [ 243881, 245321, 243885, 245325 ], [ 245322, 247914, 245326, 247919 ], [ 247915, 251579, 247920, 251584 ], [ 251580, 257092, 251585, 257097 ], [ 257093, 259887, 257098, 259891 ], [ 259888, 260535, 259892, 260539 ], [ 260536, 261621, 260540, 261626 ], [ 261622, 263030, 261627, 263035 ], [ 263031, 264258, 263036, 264262 ], [ 264259, 265004, 264263, 265008 ], [ 265005, 265084, 265009, 265089 ], [ 265085, 265243, 265090, 265248 ], [ 265244, 265534, 265249, 265539 ], [ 265535, 266117, 265540, 266122 ], [ 266118, 274428, 266123, 274433 ], [ 274429, 275235, 274434, 275231 ], [ 275236, 276946, 275232, 276950 ], [ 276947, 277457, 276951, 277461 ], [ 277458, 279137, 277462, 279133 ], [ 279138, 282285, 279134, 282290 ], [ 282286, 286948, 282291, 286953 ], [ 286949, 288342, 286954, 288338 ], [ 288343, 289897, 288339, 289901 ], [ 289898, 292547, 289902, 292552 ], [ 292548, 297678, 292553, 297683 ], [ 297679, 303902, 297684, 303906 ], [ 303903, 304580, 303907, 304584 ], [ 304581, 307362, 304585, 307366 ], [ 307363, 307931, 307367, 307935 ], [ 307932, 308161, 307936, 308166 ], [ 308162, 308706, 308167, 308711 ], [ 308707, 313482, 308712, 313487 ], [ 313483, 316025, 313488, 316021 ], [ 316026, 324159, 316022, 324163 ], [ 324160, 326130, 324164, 326134 ], [ 326131, 331620, 326135, 331624 ], [ 331621, 336338, 331625, 336342 ], [ 336339, 336873, 336343, 336877 ], [ 336874, 337118, 336878, 337123 ], [ 337119, 337935, 337124, 337940 ], [ 337936, 338781, 337941, 338786 ], [ 338782, 339493, 338787, 339498 ], [ 339494, 341025, 339499, 341030 ], [ 341026, 343919, 341031, 343923 ], [ 343920, 344424, 343924, 344429 ], [ 344425, 348384, 344430, 348389 ], [ 348385, 348408, 348390, 348404 ], [ 348409, 353417, 348405, 353413 ], [ 353418, 354781, 353414, 354786 ], [ 354782, 356692, 354787, 356697 ], [ 356693, 357008, 356698, 357013 ], [ 357009, 357305, 357014, 357310 ], [ 357306, 357328, 357311, 357333 ], [ 357329, 358126, 357334, 358131 ], [ 358127, 359472, 358132, 359477 ], [ 359473, 362160, 359478, 362165 ], [ 362161, 365395, 362166, 365400 ], [ 365396, 365704, 365401, 365709 ], [ 365705, 368663, 365710, 368667 ], [ 368664, 368841, 368668, 368845 ], [ 368842, 370589, 368846, 370593 ], [ 370590, 371148, 370594, 371152 ], [ 371149, 373639, 371153, 373643 ], [ 373640, 377393, 373644, 377397 ], [ 377394, 381068, 377398, 381072 ], [ 381069, 381692, 381073, 381688 ], [ 381693, 381746, 381689, 381751 ], [ 381747, 381994, 381752, 381999 ], [ 381995, 383335, 382000, 383340 ], [ 383336, 385141, 383341, 385146 ], [ 385142, 389399, 385147, 389403 ], [ 389400, 390171, 389404, 390176 ], [ 390172, 392340, 390177, 392344 ], [ 392341, 392764, 392345, 392769 ], [ 392765, 394338, 392770, 394343 ], [ 394339, 394686, 394344, 394691 ], [ 394687, 397592, 394692, 397596 ], [ 397593, 398703, 397597, 398708 ], [ 398704, 404095, 398709, 404100 ], [ 404096, 408361, 404101, 408366 ], [ 408362, 409029, 408367, 409025 ], [ 409030, 413032, 409026, 413037 ], [ 413033, 414563, 413038, 414568 ], [ 414564, 416901, 414569, 416906 ], [ 416902, 417419, 416907, 417424 ], [ 417420, 420057, 417425, 420061 ], [ 420058, 421129, 420062, 421125 ], [ 421130, 421777, 421126, 421782 ], [ 421778, 423748, 421783, 423753 ], [ 423749, 431903, 423754, 431908 ], [ 431904, 432852, 431909, 432848 ], [ 432853, 440000, 432849, 440005 ], [ 440001, 440754, 440006, 440750 ], [ 440755, 444226, 440751, 444222 ], [ 444227, 448040, 444223, 448045 ], [ 448041, 452994, 448046, 452999 ], [ 452995, 453075, 453000, 453080 ], [ 453076, 454654, 453081, 454658 ], [ 454655, 454950, 454659, 454955 ], [ 454951, 455888, 454956, 455893 ], [ 455889, 460160, 455894, 460165 ], [ 460161, 462319, 460166, 462323 ], [ 462320, 462650, 462324, 462654 ], [ 462651, 463076, 462655, 463081 ], [ 463077, 465003, 463082, 465008 ], [ 465004, 466828, 465009, 466833 ], [ 466829, 467686, 466834, 467691 ], [ 467687, 468596, 467692, 468601 ], [ 468597, 475083, 468602, 475087 ], [ 475084, 479953, 475088, 479958 ], [ 479954, 480538, 479959, 480543 ], [ 480539, 482480, 480544, 482484 ], [ 482481, 482869, 482485, 482874 ], [ 482870, 483410, 482875, 483414 ], [ 483411, 489378, 483415, 489383 ], [ 489379, 492112, 489384, 492116 ], [ 492113, 492241, 492117, 492246 ], [ 492242, 493791, 492247, 493795 ], [ 493792, 495406, 493796, 495411 ], [ 495407, 495712, 495412, 495717 ], [ 495713, 497829, 495718, 497834 ], [ 497830, 501698, 497835, 501703 ], [ 501699, 503304, 501704, 503308 ], [ 503305, 504565, 503309, 504570 ], [ 504566, 505105, 504571, 505110 ], [ 505106, 508452, 505111, 508457 ], [ 508453, 514353, 508458, 514357 ], [ 514354, 515947, 514358, 515952 ], [ 515948, 519141, 515953, 519146 ], [ 519142, 519398, 519147, 519403 ], [ 519399, 519662, 519404, 519666 ], [ 519663, 521386, 519667, 521391 ], [ 521387, 521935, 521392, 521939 ], [ 521936, 523114, 521940, 523118 ], [ 523115, 524176, 523119, 524180 ], [ 524177, 524521, 524181, 524525 ], [ 524522, 524936, 524526, 524932 ], [ 524937, 526115, 524933, 526120 ], [ 526116, 526729, 526121, 526734 ], [ 526730, 527018, 526735, 527023 ], [ 527019, 528059, 527024, 528064 ], [ 528060, 532689, 528065, 532694 ], [ 532690, 534193, 532695, 534189 ], [ 534194, 534702, 534190, 534707 ], [ 534703, 535272, 534708, 535277 ], [ 535273, 538638, 535278, 538642 ], [ 538639, 538668, 538643, 538673 ], [ 538669, 543939, 538674, 543944 ], [ 543940, 547429, 543945, 547434 ], [ 547430, 547624, 547435, 547628 ], [ 547625, 550898, 547629, 550902 ], [ 550899, 553890, 550903, 553895 ], [ 553891, 554678, 553896, 554683 ], [ 554679, 555452, 554684, 555457 ], [ 555453, 556296, 555458, 556301 ], [ 556297, 557116, 556302, 557120 ], [ 557117, 559341, 557121, 559346 ], [ 559342, 559991, 559347, 559996 ], [ 559992, 563242, 559997, 563247 ], [ 563243, 563390, 563248, 563394 ], [ 563391, 566071, 563395, 566075 ], [ 566072, 566857, 566076, 566861 ], [ 566858, 571925, 566862, 571929 ], [ 571926, 576432, 571930, 576437 ], [ 576433, 582431, 576438, 582436 ], [ 582432, 582959, 582437, 582964 ], [ 582960, 583475, 582965, 583480 ], [ 583476, 583589, 583481, 583594 ], [ 583590, 583670, 583595, 583675 ], [ 583671, 583901, 583676, 583906 ], [ 583902, 584198, 583907, 584203 ], [ 584199, 584633, 584204, 584638 ], [ 584634, 585704, 584639, 585709 ], [ 585705, 585746, 585710, 585751 ], [ 585747, 586175, 585752, 586180 ], [ 586176, 586301, 586181, 586306 ], [ 586302, 586643, 586307, 586648 ], [ 586644, 586775, 586649, 586780 ], [ 586776, 587072, 586781, 587077 ], [ 587073, 587214, 587078, 587219 ], [ 587215, 587540, 587220, 587545 ], [ 587541, 587969, 587546, 587974 ], [ 587970, 588095, 587975, 588100 ], [ 588096, 588437, 588101, 588442 ], [ 588438, 588569, 588443, 588574 ], [ 588570, 589008, 588575, 589013 ], [ 589009, 589166, 589014, 589171 ], [ 589167, 590366, 589172, 590371 ], [ 590367, 590792, 590372, 590797 ], [ 590793, 591077, 590798, 591082 ], [ 591078, 591263, 591083, 591268 ], [ 591264, 591863, 591269, 591868 ], [ 591864, 592058, 591869, 592063 ], [ 592059, 592160, 592064, 592165 ], [ 592161, 592568, 592166, 592573 ], [ 592569, 592760, 592574, 592765 ], [ 592761, 593060, 592766, 593065 ], [ 593061, 593186, 593066, 593191 ], [ 593187, 593366, 593192, 593371 ], [ 593367, 593957, 593372, 593962 ], [ 593958, 594827, 593963, 594832 ], [ 594828, 594980, 594833, 594985 ], [ 594981, 595649, 594986, 595654 ], [ 595650, 595893, 595655, 595898 ], [ 595894, 596057, 595899, 596062 ], [ 596058, 596159, 596063, 596164 ], [ 596160, 596351, 596165, 596356 ], [ 596352, 596660, 596357, 596665 ], [ 596661, 596960, 596666, 596965 ], [ 596961, 597102, 596966, 597107 ], [ 597103, 597155, 597108, 597160 ], [ 597156, 597257, 597161, 597262 ], [ 597258, 599957, 597263, 599962 ], [ 599958, 604182, 599963, 604186 ], [ 604183, 611038, 604187, 611043 ], [ 611039, 612202, 611044, 612207 ], [ 612203, 614051, 612208, 614056 ], [ 614052, 614134, 614057, 614139 ], [ 614135, 614787, 614140, 614792 ], [ 614788, 616272, 614793, 616277 ], [ 616273, 616867, 616278, 616871 ], [ 616868, 617737, 616872, 617742 ], [ 617738, 627339, 617743, 627344 ], [ 627340, 628902, 627345, 628907 ], [ 628903, 629142, 628908, 629146 ], [ 629143, 629458, 629147, 629454 ], [ 629459, 636523, 629455, 636528 ], [ 636524, 637529, 636529, 637534 ], [ 637530, 639061, 637535, 639065 ], [ 639062, 647713, 639066, 647718 ], [ 647714, 648684, 647719, 648689 ], [ 648685, 652752, 648690, 652748 ], [ 652753, 653543, 652749, 653548 ], [ 653544, 654406, 653549, 654410 ], [ 654407, 658188, 654411, 658192 ], [ 658189, 659030, 658193, 659035 ], [ 659031, 662241, 659036, 662246 ], [ 662242, 670896, 662247, 670900 ], [ 670897, 671781, 670901, 671786 ], [ 671782, 672048, 671787, 672053 ], [ 672049, 673788, 672054, 673793 ], [ 673789, 674707, 673794, 674712 ], [ 674708, 674998, 674713, 675003 ], [ 674999, 675157, 675004, 675162 ], [ 675158, 688595, 675163, 688600 ], [ 688596, 693309, 688601, 693314 ], [ 693310, 693523, 693315, 693527 ], [ 693524, 696514, 693528, 696518 ], [ 696515, 697406, 696519, 697411 ], [ 697407, 702676, 697412, 702681 ], [ 702677, 707208, 702682, 707212 ], [ 707209, 707382, 707213, 707387 ], [ 707383, 708604, 707388, 708609 ], [ 708605, 710046, 708610, 710051 ], [ 710047, 711630, 710052, 711635 ], [ 711631, 711696, 711636, 711701 ], [ 711697, 712329, 711702, 712334 ], [ 712330, 714099, 712335, 714103 ], [ 714100, 716461, 714104, 716466 ], [ 716462, 720238, 716467, 720243 ], [ 720239, 720374, 720244, 720379 ], [ 720375, 720471, 720380, 720475 ], [ 720472, 721463, 720476, 721467 ], [ 721464, 723143, 721468, 723147 ], [ 723144, 723348, 723148, 723352 ], [ 723349, 724200, 723353, 724205 ], [ 724201, 725464, 724206, 725468 ], [ 725465, 725687, 725469, 725692 ], [ 725688, 729467, 725693, 729471 ], [ 729468, 730067, 729472, 730072 ], [ 730068, 730406, 730073, 730410 ], [ 730407, 730574, 730411, 730579 ], [ 730575, 730699, 730580, 730704 ], [ 730700, 732726, 730705, 732731 ], [ 732727, 734363, 732732, 734359 ], [ 734364, 738597, 734360, 738602 ], [ 738598, 738869, 738603, 738873 ], [ 738870, 739571, 738874, 739575 ], [ 739572, 742040, 739576, 742044 ], [ 742041, 742350, 742045, 742346 ], [ 742351, 743326, 742347, 743331 ], [ 743327, 743557, 743332, 743561 ], [ 743558, 743966, 743562, 743970 ], [ 743967, 744992, 743971, 744997 ], [ 744993, 745843, 744998, 745848 ], [ 745844, 751518, 745849, 751523 ], [ 751519, 752431, 751524, 752436 ], [ 752432, 752549, 752437, 752554 ], [ 752550, 758622, 752555, 758626 ], [ 758623, 760978, 758627, 760982 ], [ 760979, 761319, 760983, 761315 ], [ 761320, 766036, 761316, 766041 ], [ 766037, 768968, 766042, 768973 ], [ 768969, 770151, 768974, 770156 ], [ 770152, 771158, 770157, 771163 ], [ 771159, 771405, 771164, 771410 ], [ 771406, 771429, 771411, 771433 ], [ 771430, 774384, 771434, 774388 ], [ 774385, 781958, 774389, 781963 ], [ 781959, 784226, 781964, 784231 ], [ 784227, 784572, 784232, 784576 ], [ 784573, 784806, 784577, 784810 ], [ 784807, 786886, 784811, 786890 ], [ 786887, 786945, 786891, 786950 ], [ 786946, 787203, 786951, 787208 ], [ 787204, 789251, 787209, 789256 ], [ 789252, 791218, 789257, 791223 ], [ 791219, 793716, 791224, 793721 ], [ 793717, 795003, 793722, 795008 ], [ 795004, 795521, 795009, 795526 ], [ 795522, 800659, 795527, 800663 ], [ 800660, 802360, 800664, 802364 ], [ 802361, 804514, 802365, 804519 ], [ 804515, 805238, 804520, 805243 ], [ 805239, 805887, 805244, 805892 ], [ 805888, 807288, 805893, 807292 ], [ 807289, 808461, 807293, 808466 ], [ 808462, 808692, 808467, 808696 ], [ 808693, 809805, 808697, 809810 ], [ 809806, 810086, 809811, 810091 ], [ 810087, 810726, 810092, 810731 ], [ 810727, 813170, 810732, 813174 ], [ 813171, 813863, 813175, 813867 ], [ 813864, 820111, 813868, 820116 ], [ 820112, 821326, 820117, 821331 ], [ 821327, 821647, 821332, 821652 ], [ 821648, 824277, 821653, 824282 ], [ 824278, 825750, 824283, 825755 ], [ 825751, 828770, 825756, 828775 ], [ 828771, 828924, 828776, 828929 ], [ 828925, 830194, 828930, 830199 ], [ 830195, 830786, 830200, 830791 ], [ 830787, 831245, 830792, 831241 ], [ 831246, 832788, 831242, 832793 ], [ 832789, 833306, 832794, 833311 ], [ 833307, 835264, 833312, 835260 ], [ 835265, 835656, 835261, 835661 ], [ 835657, 841180, 835662, 841185 ], [ 841181, 842112, 841186, 842117 ], [ 842113, 842524, 842118, 842528 ], [ 842525, 843973, 842529, 843978 ], [ 843974, 843990, 843979, 843995 ], [ 843991, 851267, 843996, 851271 ], [ 851268, 852882, 851272, 852887 ], [ 852883, 854392, 852888, 854397 ], [ 854393, 857721, 854398, 857726 ], [ 857722, 857961, 857727, 857966 ], [ 857962, 859112, 857967, 859116 ], [ 859113, 862783, 859117, 862788 ], [ 862784, 869922, 862789, 869926 ], [ 869923, 878953, 869927, 878958 ], [ 878954, 885194, 878959, 885199 ], [ 885195, 886313, 885200, 886318 ], [ 886314, 886460, 886319, 886465 ], [ 886461, 888041, 886466, 888045 ], [ 888042, 890161, 888046, 890165 ], [ 890162, 890233, 890166, 890238 ], [ 890234, 890346, 890239, 890351 ], [ 890347, 890379, 890352, 890384 ], [ 890380, 895074, 890385, 895078 ], [ 895075, 897876, 895079, 897880 ], [ 897877, 897943, 897881, 897947 ], [ 897944, 899572, 897948, 899576 ], [ 899573, 899676, 899577, 899681 ], [ 899677, 903962, 899682, 903967 ], [ 903963, 904236, 903968, 904241 ], [ 904237, 908130, 904242, 908135 ], [ 908131, 912131, 908136, 912127 ], [ 912132, 916611, 912128, 916616 ], [ 916612, 916803, 916617, 916808 ], [ 916804, 920531, 916809, 920536 ], [ 920532, 923592, 920537, 923596 ], [ 923593, 927519, 923597, 927523 ], [ 927520, 928181, 927524, 928185 ], [ 928182, 928505, 928186, 928510 ], [ 928506, 928644, 928511, 928640 ], [ 928645, 934935, 928641, 934931 ], [ 934936, 935911, 934932, 935915 ], [ 935912, 936947, 935916, 936952 ], [ 936948, 937240, 936953, 937245 ], [ 937241, 939698, 937246, 939703 ], [ 939699, 939711, 939704, 939716 ], [ 939712, 941642, 939717, 941647 ], [ 941643, 949052, 941648, 949057 ], [ 949053, 949800, 949058, 949805 ], [ 949801, 949851, 949806, 949855 ], [ 949852, 951412, 949856, 951417 ], [ 951413, 951810, 951418, 951815 ], [ 951811, 952386, 951816, 952391 ], [ 952387, 953295, 952392, 953300 ], [ 953296, 953894, 953301, 953899 ], [ 953895, 955768, 953900, 955772 ], [ 955769, 955953, 955773, 955957 ], [ 955954, 956045, 955958, 956041 ], [ 956046, 958753, 956042, 958758 ], [ 958754, 964476, 958759, 964481 ], [ 964477, 967468, 964482, 967473 ], [ 967469, 969625, 967474, 969629 ], [ 969626, 969631, 969630, 969636 ], [ 969632, 970966, 969637, 970971 ], [ 970967, 971138, 970972, 971143 ], [ 971139, 974185, 971144, 974190 ], [ 974186, 974365, 974191, 974370 ], [ 974366, 975256, 974371, 975261 ], [ 975257, 976794, 975262, 976799 ], [ 976795, 980694, 976800, 980698 ], [ 980695, 987406, 980699, 987411 ], [ 987407, 988132, 987412, 988137 ], [ 988133, 992809, 988138, 992814 ], [ 992810, 996307, 992815, 996311 ], [ 996308, 999121, 996312, 999125 ], [ 999122, 1000225, 999126, 1000230 ], [ 1000226, 1001626, 1000231, 1001631 ], [ 1001627, 1005050, 1001632, 1005054 ], [ 1005051, 1007354, 1005055, 1007359 ], [ 1007355, 1011910, 1007360, 1011915 ], [ 1011911, 1012377, 1011916, 1012382 ], [ 1012378, 1015175, 1012383, 1015179 ], [ 1015176, 1017328, 1015180, 1017333 ], [ 1017329, 1020891, 1017334, 1020896 ], [ 1020892, 1021340, 1020897, 1021345 ], [ 1021341, 1024845, 1021346, 1024850 ], [ 1024846, 1025853, 1024851, 1025858 ], [ 1025854, 1030691, 1025859, 1030696 ], [ 1030692, 1032676, 1030697, 1032681 ], [ 1032677, 1037847, 1032682, 1037852 ], [ 1037848, 1039473, 1037853, 1039478 ], [ 1039474, 1039858, 1039479, 1039854 ], [ 1039859, 1044241, 1039855, 1044246 ], [ 1044242, 1045920, 1044247, 1045925 ], [ 1045921, 1049805, 1045926, 1049809 ], [ 1049806, 1050388, 1049810, 1050392 ], [ 1050389, 1053286, 1050393, 1053291 ], [ 1053287, 1053309, 1053292, 1053314 ], [ 1053310, 1054643, 1053315, 1054648 ], [ 1054644, 1056252, 1054649, 1056256 ], [ 1056253, 1056527, 1056257, 1056532 ], [ 1056528, 1056947, 1056533, 1056943 ], [ 1056948, 1058682, 1056944, 1058687 ], [ 1058683, 1059297, 1058688, 1059302 ], [ 1059298, 1060416, 1059303, 1060421 ], [ 1060417, 1061894, 1060422, 1061898 ], [ 1061895, 1064234, 1061899, 1064239 ], [ 1064235, 1064848, 1064240, 1064853 ], [ 1064849, 1065434, 1064854, 1065439 ], [ 1065435, 1075046, 1065440, 1075042 ], [ 1075047, 1075642, 1075043, 1075647 ], [ 1075643, 1076325, 1075648, 1076330 ], [ 1076326, 1076534, 1076331, 1076539 ], [ 1076535, 1078135, 1076540, 1078139 ], [ 1078136, 1078866, 1078140, 1078871 ], [ 1078867, 1079914, 1078872, 1079910 ], [ 1079915, 1080537, 1079911, 1080542 ], [ 1080538, 1082144, 1080543, 1082149 ], [ 1082145, 1083416, 1082150, 1083420 ], [ 1083417, 1085746, 1083421, 1085751 ], [ 1085747, 1087087, 1085752, 1087092 ], [ 1087088, 1088273, 1087093, 1088278 ], [ 1088274, 1092093, 1088279, 1092097 ], [ 1092094, 1093062, 1092098, 1093067 ], [ 1093063, 1096867, 1093068, 1096872 ], [ 1096868, 1097288, 1096873, 1097292 ], [ 1097289, 1102488, 1097293, 1102493 ], [ 1102489, 1102751, 1102494, 1102747 ], [ 1102752, 1104366, 1102748, 1104362 ], [ 1104367, 1106371, 1104363, 1106376 ], [ 1106372, 1108123, 1106377, 1108128 ], [ 1108124, 1112263, 1108129, 1112259 ], [ 1112264, 1113311, 1112260, 1113316 ], [ 1113312, 1114557, 1113317, 1114562 ], [ 1114558, 1117715, 1114563, 1117719 ], [ 1117716, 1118552, 1117720, 1118556 ], [ 1118553, 1120566, 1118557, 1120571 ], [ 1120567, 1121004, 1120572, 1121009 ], [ 1121005, 1121076, 1121010, 1121080 ], [ 1121077, 1121609, 1121081, 1121613 ], [ 1121610, 1121694, 1121614, 1121698 ], [ 1121695, 1122501, 1121699, 1122506 ], [ 1122502, 1130582, 1122507, 1130587 ], [ 1130583, 1132170, 1130588, 1132175 ], [ 1132171, 1135259, 1132176, 1135263 ], [ 1135260, 1136119, 1135264, 1136123 ], [ 1136120, 1137316, 1136124, 1137320 ], [ 1137317, 1140126, 1137321, 1140131 ], [ 1140127, 1142998, 1140132, 1143002 ], [ 1142999, 1143361, 1143003, 1143366 ], [ 1143362, 1143637, 1143367, 1143633 ], [ 1143638, 1143644, 1143634, 1143648 ], [ 1143645, 1146618, 1143649, 1146622 ], [ 1146619, 1149205, 1146623, 1149210 ], [ 1149206, 1149331, 1149211, 1149336 ], [ 1149332, 1152263, 1149337, 1152259 ], [ 1152264, 1152809, 1152260, 1152805 ], [ 1152810, 1154382, 1152806, 1154386 ], [ 1154383, 1156272, 1154387, 1156277 ], [ 1156273, 1159968, 1156278, 1159972 ], [ 1159969, 1161624, 1159973, 1161629 ], [ 1161625, 1163044, 1161630, 1163048 ], [ 1163045, 1164030, 1163049, 1164026 ], [ 1164031, 1166628, 1164027, 1166632 ], [ 1166629, 1167897, 1166633, 1167901 ], [ 1167898, 1171353, 1167902, 1171358 ], [ 1171354, 1171934, 1171359, 1171939 ], [ 1171935, 1172114, 1171940, 1172119 ], [ 1172115, 1173373, 1172120, 1173369 ], [ 1173374, 1175421, 1173370, 1175417 ], [ 1175422, 1179850, 1175418, 1179854 ], [ 1179851, 1181462, 1179855, 1181466 ], [ 1181463, 1185368, 1181467, 1185373 ], [ 1185369, 1193993, 1185374, 1193998 ], [ 1193994, 1194272, 1193999, 1194277 ], [ 1194273, 1195941, 1194278, 1195945 ], [ 1195942, 1197920, 1195946, 1197925 ], [ 1197921, 1199011, 1197926, 1199007 ], [ 1199012, 1200079, 1199008, 1200075 ], [ 1200080, 1200373, 1200076, 1200378 ], [ 1200374, 1200419, 1200379, 1200423 ], [ 1200420, 1200597, 1200424, 1200602 ], [ 1200598, 1200714, 1200603, 1200719 ], [ 1200715, 1203674, 1200720, 1203679 ], [ 1203675, 1204865, 1203680, 1204870 ], [ 1204866, 1205330, 1204871, 1205335 ], [ 1205331, 1206317, 1205336, 1206321 ], [ 1206318, 1210727, 1206322, 1210732 ], [ 1210728, 1211881, 1210733, 1211886 ], [ 1211882, 1214283, 1211887, 1214288 ], [ 1214284, 1215542, 1214289, 1215546 ], [ 1215543, 1216981, 1215547, 1216986 ], [ 1216982, 1220754, 1216987, 1220750 ], [ 1220755, 1221554, 1220751, 1221550 ], [ 1221555, 1222528, 1221551, 1222532 ], [ 1222529, 1223522, 1222533, 1223527 ], [ 1223523, 1223938, 1223528, 1223942 ], [ 1223939, 1226908, 1223943, 1226912 ], [ 1226909, 1227062, 1226913, 1227066 ], [ 1227063, 1228205, 1227067, 1228210 ], [ 1228206, 1229934, 1228211, 1229938 ], [ 1229935, 1236067, 1229939, 1236072 ], [ 1236068, 1236265, 1236073, 1236270 ], [ 1236266, 1239904, 1236271, 1239908 ], [ 1239905, 1239969, 1239909, 1239974 ], [ 1239970, 1240641, 1239975, 1240646 ], [ 1240642, 1244451, 1240647, 1244447 ], [ 1244452, 1244738, 1244448, 1244743 ], [ 1244739, 1244821, 1244744, 1244826 ], [ 1244822, 1247339, 1244827, 1247343 ], [ 1247340, 1248889, 1247344, 1248885 ], [ 1248890, 1250671, 1248886, 1250675 ], [ 1250672, 1254643, 1250676, 1254647 ], [ 1254644, 1255112, 1254648, 1255108 ], [ 1255113, 1257527, 1255109, 1257523 ], [ 1257528, 1264487, 1257524, 1264491 ], [ 1264488, 1269317, 1264492, 1269321 ], [ 1269318, 1272971, 1269322, 1272976 ], [ 1272972, 1276524, 1272977, 1276529 ], [ 1276525, 1281881, 1276530, 1281877 ], [ 1281882, 1281933, 1281878, 1281937 ], [ 1281934, 1287297, 1281938, 1287301 ], [ 1287298, 1287557, 1287302, 1287561 ], [ 1287558, 1290344, 1287562, 1290349 ], [ 1290345, 1292253, 1290350, 1292258 ], [ 1292254, 1293482, 1292259, 1293487 ], [ 1293483, 1295919, 1293488, 1295924 ], [ 1295920, 1302576, 1295925, 1302580 ], [ 1302577, 1302834, 1302581, 1302839 ], [ 1302835, 1302920, 1302840, 1302924 ], [ 1302921, 1303091, 1302925, 1303095 ], [ 1303092, 1303464, 1303096, 1303469 ], [ 1303465, 1306801, 1303470, 1306805 ], [ 1306802, 1307555, 1306806, 1307559 ], [ 1307556, 1309308, 1307560, 1309313 ], [ 1309309, 1311482, 1309314, 1311487 ], [ 1311483, 1312493, 1311488, 1312498 ], [ 1312494, 1316488, 1312499, 1316493 ], [ 1316489, 1318127, 1316494, 1318132 ], [ 1318128, 1325643, 1318133, 1325648 ], [ 1325644, 1328313, 1325649, 1328318 ], [ 1328314, 1329159, 1328319, 1329155 ], [ 1329160, 1332129, 1329156, 1332133 ], [ 1332130, 1332245, 1332134, 1332249 ], [ 1332246, 1332269, 1332250, 1332265 ], [ 1332270, 1332837, 1332266, 1332841 ], [ 1332838, 1334227, 1332842, 1334223 ], [ 1334228, 1345348, 1334224, 1345353 ], [ 1345349, 1346662, 1345354, 1346666 ], [ 1346663, 1347480, 1346667, 1347485 ], [ 1347481, 1348458, 1347486, 1348463 ], [ 1348459, 1350595, 1348464, 1350600 ], [ 1350596, 1350770, 1350601, 1350775 ], [ 1350771, 1351954, 1350776, 1351959 ], [ 1351955, 1356474, 1351960, 1356479 ], [ 1356475, 1359703, 1356480, 1359699 ], [ 1359704, 1361176, 1359700, 1361180 ], [ 1361177, 1362756, 1361181, 1362761 ], [ 1362757, 1368544, 1362762, 1368549 ], [ 1368545, 1370442, 1368550, 1370438 ], [ 1370443, 1373317, 1370439, 1373321 ], [ 1373318, 1374484, 1373322, 1374488 ], [ 1374485, 1377993, 1374489, 1377998 ], [ 1377994, 1379610, 1377999, 1379615 ], [ 1379611, 1380792, 1379616, 1380796 ], [ 1380793, 1381209, 1380797, 1381205 ], [ 1381210, 1386893, 1381206, 1386897 ], [ 1386894, 1391551, 1386898, 1391556 ], [ 1391552, 1393799, 1391557, 1393803 ], [ 1393800, 1395841, 1393804, 1395846 ], [ 1395842, 1397239, 1395847, 1397243 ], [ 1397240, 1401721, 1397244, 1401726 ], [ 1401722, 1403822, 1401727, 1403818 ], [ 1403823, 1406871, 1403819, 1406876 ], [ 1406872, 1407926, 1406877, 1407922 ], [ 1407927, 1408482, 1407923, 1408486 ], [ 1408483, 1409484, 1408487, 1409480 ], [ 1409485, 1410252, 1409481, 1410256 ], [ 1410253, 1411041, 1410257, 1411046 ], [ 1411042, 1417851, 1411047, 1417856 ], [ 1417852, 1419058, 1417857, 1419063 ], [ 1419059, 1419370, 1419064, 1419366 ], [ 1419371, 1419429, 1419367, 1419433 ], [ 1419430, 1426518, 1419434, 1426522 ], [ 1426519, 1428120, 1426523, 1428125 ], [ 1428121, 1428584, 1428126, 1428589 ], [ 1428585, 1430135, 1428590, 1430139 ], [ 1430136, 1430700, 1430140, 1430705 ], [ 1430701, 1436904, 1430706, 1436908 ], [ 1436905, 1438278, 1436909, 1438283 ], [ 1438279, 1441717, 1438284, 1441721 ], [ 1441718, 1443084, 1441722, 1443089 ], [ 1443085, 1444668, 1443090, 1444673 ], [ 1444669, 1444866, 1444674, 1444871 ], [ 1444867, 1444914, 1444872, 1444919 ], [ 1444915, 1445093, 1444920, 1445098 ], [ 1445094, 1446216, 1445099, 1446221 ], [ 1446217, 1448333, 1446222, 1448329 ], [ 1448334, 1448518, 1448330, 1448523 ], [ 1448519, 1449362, 1448524, 1449358 ], [ 1449363, 1449444, 1449359, 1449440 ], [ 1449445, 1452860, 1449441, 1452865 ], [ 1452861, 1454246, 1452866, 1454251 ], [ 1454247, 1455021, 1454252, 1455017 ], [ 1455022, 1455414, 1455018, 1455419 ], [ 1455415, 1460976, 1455420, 1460981 ], [ 1460977, 1461164, 1460982, 1461169 ], [ 1461165, 1461294, 1461170, 1461298 ], [ 1461295, 1463675, 1461299, 1463680 ], [ 1463676, 1463710, 1463681, 1463714 ], [ 1463711, 1465339, 1463715, 1465344 ], [ 1465340, 1469872, 1465345, 1469877 ], [ 1469873, 1471479, 1469878, 1471484 ], [ 1471480, 1471922, 1471485, 1471926 ], [ 1471923, 1472450, 1471927, 1472454 ], [ 1472451, 1472745, 1472455, 1472750 ], [ 1472746, 1479208, 1472751, 1479213 ], [ 1479209, 1480831, 1479214, 1480836 ], [ 1480832, 1483483, 1480837, 1483479 ], [ 1483484, 1485359, 1483480, 1485364 ], [ 1485360, 1485530, 1485365, 1485535 ], [ 1485531, 1485675, 1485536, 1485679 ], [ 1485676, 1486004, 1485680, 1486009 ], [ 1486005, 1487314, 1486010, 1487319 ], [ 1487315, 1491008, 1487320, 1491013 ], [ 1491009, 1492068, 1491014, 1492073 ], [ 1492069, 1492190, 1492074, 1492194 ], [ 1492191, 1493001, 1492195, 1493006 ], [ 1493002, 1495524, 1493007, 1495529 ], [ 1495525, 1498599, 1495530, 1498604 ], [ 1498600, 1499384, 1498605, 1499389 ], [ 1499385, 1500494, 1499390, 1500499 ], [ 1500495, 1504828, 1500500, 1504833 ], [ 1504829, 1506224, 1504834, 1506228 ], [ 1506225, 1506798, 1506229, 1506802 ], [ 1506799, 1508452, 1506803, 1508456 ], [ 1508453, 1509790, 1508457, 1509795 ], [ 1509791, 1512050, 1509796, 1512055 ], [ 1512051, 1514922, 1512056, 1514927 ], [ 1514923, 1515140, 1514928, 1515145 ], [ 1515141, 1515194, 1515146, 1515199 ], [ 1515195, 1515647, 1515200, 1515652 ], [ 1515648, 1516602, 1515653, 1516607 ], [ 1516603, 1517689, 1516608, 1517694 ], [ 1517690, 1519324, 1517695, 1519329 ], [ 1519325, 1524288, 1519330, 1524293 ], [ 1524289, 1524809, 1524294, 1524814 ], [ 1524810, 1524838, 1524815, 1524842 ], [ 1524839, 1525934, 1524843, 1525939 ], [ 1525935, 1526325, 1525940, 1526330 ], [ 1526326, 1527046, 1526331, 1527051 ], [ 1527047, 1527437, 1527052, 1527441 ], [ 1527438, 1528800, 1527442, 1528805 ], [ 1528801, 1529067, 1528806, 1529072 ], [ 1529068, 1529127, 1529073, 1529132 ], [ 1529128, 1532112, 1529133, 1532108 ], [ 1532113, 1533431, 1532109, 1533435 ], [ 1533432, 1536262, 1533436, 1536267 ], [ 1536263, 1543858, 1536268, 1543863 ], [ 1543859, 1547883, 1543864, 1547887 ], [ 1547884, 1550923, 1547888, 1550927 ], [ 1550924, 1550941, 1550928, 1550945 ], [ 1550942, 1551904, 1550946, 1551900 ], [ 1551905, 1554015, 1551901, 1554020 ], [ 1554016, 1554903, 1554021, 1554907 ], [ 1554904, 1555315, 1554908, 1555320 ], [ 1555316, 1558476, 1555321, 1558481 ], [ 1558477, 1560403, 1558482, 1560408 ], [ 1560404, 1564152, 1560409, 1564157 ], [ 1564153, 1565868, 1564158, 1565873 ], [ 1565869, 1566075, 1565874, 1566080 ], [ 1566076, 1572396, 1566081, 1572392 ], [ 1572397, 1572715, 1572393, 1572720 ], [ 1572716, 1573897, 1572721, 1573901 ], [ 1573898, 1575566, 1573902, 1575571 ], [ 1575567, 1575840, 1575572, 1575845 ], [ 1575841, 1575957, 1575846, 1575962 ], [ 1575958, 1577744, 1575963, 1577748 ], [ 1577745, 1578588, 1577749, 1578593 ], [ 1578589, 1580138, 1578594, 1580134 ], [ 1580139, 1582760, 1580135, 1582764 ], [ 1582761, 1587557, 1582765, 1587562 ], [ 1587558, 1588891, 1587563, 1588896 ], [ 1588892, 1590824, 1588897, 1590828 ], [ 1590825, 1591786, 1590829, 1591790 ], [ 1591787, 1597227, 1591791, 1597232 ], [ 1597228, 1597262, 1597233, 1597267 ], [ 1597263, 1606974, 1597268, 1606979 ], [ 1606975, 1608871, 1606980, 1608875 ], [ 1608872, 1613363, 1608876, 1613367 ], [ 1613364, 1613512, 1613368, 1613517 ], [ 1613513, 1613900, 1613518, 1613905 ], [ 1613901, 1614931, 1613906, 1614936 ], [ 1614932, 1616478, 1614937, 1616482 ], [ 1616479, 1620090, 1616483, 1620094 ], [ 1620091, 1620971, 1620095, 1620976 ], [ 1620972, 1625931, 1620977, 1625936 ], [ 1625932, 1635578, 1625937, 1635583 ], [ 1635579, 1636949, 1635584, 1636954 ], [ 1636950, 1642076, 1636955, 1642081 ], [ 1642077, 1643227, 1642082, 1643232 ], [ 1643228, 1643451, 1643233, 1643456 ], [ 1643452, 1643568, 1643457, 1643573 ], [ 1643569, 1651406, 1643574, 1651411 ], [ 1651407, 1651474, 1651412, 1651479 ], [ 1651475, 1656665, 1651480, 1656669 ], [ 1656666, 1660688, 1656670, 1660693 ], [ 1660689, 1662867, 1660694, 1662871 ], [ 1662868, 1665846, 1662872, 1665851 ], [ 1665847, 1667026, 1665852, 1667031 ], [ 1667027, 1669021, 1667032, 1669025 ], [ 1669022, 1669975, 1669026, 1669979 ], [ 1669976, 1675465, 1669980, 1675470 ], [ 1675466, 1679164, 1675471, 1679169 ], [ 1679165, 1681962, 1679170, 1681967 ], [ 1681963, 1688016, 1681968, 1688021 ], [ 1688017, 1690659, 1688022, 1690664 ], [ 1690660, 1692872, 1690665, 1692877 ], [ 1692873, 1697102, 1692878, 1697107 ], [ 1697103, 1698132, 1697108, 1698137 ], [ 1698133, 1698208, 1698138, 1698212 ], [ 1698209, 1703429, 1698213, 1703434 ], [ 1703430, 1705101, 1703435, 1705105 ], [ 1705102, 1705881, 1705106, 1705885 ], [ 1705882, 1706057, 1705886, 1706062 ], [ 1706058, 1708138, 1706063, 1708142 ], [ 1708139, 1708683, 1708143, 1708688 ], [ 1708684, 1712200, 1708689, 1712204 ], [ 1712201, 1720884, 1712205, 1720889 ], [ 1720885, 1721218, 1720890, 1721223 ], [ 1721219, 1725289, 1721224, 1725294 ], [ 1725290, 1726034, 1725295, 1726038 ], [ 1726035, 1726495, 1726039, 1726500 ], [ 1726496, 1728646, 1726501, 1728651 ], [ 1728647, 1729060, 1728652, 1729065 ], [ 1729061, 1732801, 1729066, 1732806 ], [ 1732802, 1733308, 1732807, 1733313 ], [ 1733309, 1734471, 1733314, 1734476 ], [ 1734472, 1738054, 1734477, 1738058 ], [ 1738055, 1738256, 1738059, 1738260 ], [ 1738257, 1740942, 1738261, 1740947 ], [ 1740943, 1744762, 1740948, 1744767 ], [ 1744763, 1746379, 1744768, 1746384 ], [ 1746380, 1746672, 1746385, 1746668 ], [ 1746673, 1747144, 1746669, 1747149 ], [ 1747145, 1751310, 1747150, 1751306 ], [ 1751311, 1753062, 1751307, 1753067 ], [ 1753063, 1754367, 1753068, 1754372 ], [ 1754368, 1758860, 1754373, 1758864 ], [ 1758861, 1763444, 1758865, 1763449 ], [ 1763445, 1769087, 1763450, 1769091 ], [ 1769088, 1769105, 1769092, 1769109 ], [ 1769106, 1769947, 1769110, 1769951 ], [ 1769948, 1772285, 1769952, 1772289 ], [ 1772286, 1774404, 1772290, 1774408 ], [ 1774405, 1776003, 1774409, 1776007 ], [ 1776004, 1777420, 1776008, 1777425 ], [ 1777421, 1782626, 1777426, 1782631 ], [ 1782627, 1784342, 1782632, 1784347 ], [ 1784343, 1784549, 1784348, 1784554 ], [ 1784550, 1785362, 1784555, 1785366 ], [ 1785363, 1791189, 1785367, 1791194 ], [ 1791190, 1792209, 1791195, 1792213 ], [ 1792210, 1793878, 1792214, 1793883 ], [ 1793879, 1794152, 1793884, 1794157 ], [ 1794153, 1794269, 1794158, 1794274 ], [ 1794270, 1794972, 1794275, 1794977 ], [ 1794973, 1796163, 1794978, 1796168 ], [ 1796164, 1800687, 1796169, 1800683 ], [ 1800688, 1802296, 1800684, 1802301 ], [ 1802297, 1804730, 1802302, 1804734 ], [ 1804731, 1805422, 1804735, 1805426 ], [ 1805423, 1805729, 1805427, 1805734 ], [ 1805730, 1806305, 1805735, 1806310 ], [ 1806306, 1806755, 1806311, 1806759 ], [ 1806756, 1806879, 1806760, 1806875 ], [ 1806880, 1810512, 1806876, 1810517 ], [ 1810513, 1816402, 1810518, 1816407 ], [ 1816403, 1826227, 1816408, 1826232 ], [ 1826228, 1826701, 1826233, 1826706 ], [ 1826702, 1827720, 1826707, 1827725 ], [ 1827721, 1833845, 1827726, 1833849 ], [ 1833846, 1836707, 1833850, 1836712 ], [ 1836708, 1836926, 1836713, 1836931 ], [ 1836927, 1838667, 1836932, 1838672 ], [ 1838668, 1838935, 1838673, 1838939 ], [ 1838936, 1842071, 1838940, 1842075 ], [ 1842072, 1842664, 1842076, 1842668 ], [ 1842665, 1843220, 1842669, 1843225 ], [ 1843221, 1843829, 1843226, 1843834 ], [ 1843830, 1845044, 1843835, 1845048 ], [ 1845045, 1846577, 1845049, 1846582 ], [ 1846578, 1848717, 1846583, 1848721 ], [ 1848718, 1849125, 1848722, 1849130 ], [ 1849126, 1850237, 1849131, 1850242 ], [ 1850238, 1851708, 1850243, 1851713 ], [ 1851709, 1853436, 1851714, 1853441 ], [ 1853437, 1853475, 1853442, 1853480 ], [ 1853476, 1853493, 1853481, 1853498 ], [ 1853494, 1854900, 1853499, 1854905 ], [ 1854901, 1854987, 1854906, 1854991 ], [ 1854988, 1861797, 1854992, 1861802 ], [ 1861798, 1862267, 1861803, 1862272 ], [ 1862268, 1866445, 1862273, 1866450 ], [ 1866446, 1866700, 1866451, 1866705 ], [ 1866701, 1870035, 1866706, 1870039 ], [ 1870036, 1870143, 1870040, 1870148 ], [ 1870144, 1870675, 1870149, 1870680 ], [ 1870676, 1871498, 1870681, 1871502 ], [ 1871499, 1873369, 1871503, 1873373 ], [ 1873370, 1877824, 1873374, 1877828 ], [ 1877825, 1880920, 1877829, 1880916 ], [ 1880921, 1881704, 1880917, 1881709 ], [ 1881705, 1882659, 1881710, 1882664 ], [ 1882660, 1884008, 1882665, 1884013 ], [ 1884009, 1885076, 1884014, 1885081 ], [ 1885077, 1888274, 1885082, 1888270 ], [ 1888275, 1897857, 1888271, 1897862 ], [ 1897858, 1907112, 1897863, 1907116 ], [ 1907113, 1907770, 1907117, 1907774 ], [ 1907771, 1912119, 1907775, 1912123 ], [ 1912120, 1913812, 1912124, 1913816 ], [ 1913813, 1913938, 1913817, 1913942 ], [ 1913939, 1922086, 1913943, 1922090 ], [ 1922087, 1924874, 1922091, 1924878 ], [ 1924875, 1929326, 1924879, 1929322 ], [ 1929327, 1931549, 1929323, 1931554 ], [ 1931550, 1931660, 1931555, 1931665 ], [ 1931661, 1936680, 1931666, 1936685 ], [ 1936681, 1938633, 1936686, 1938637 ], [ 1938634, 1938835, 1938638, 1938840 ], [ 1938836, 1939367, 1938841, 1939372 ], [ 1939368, 1941696, 1939373, 1941700 ], [ 1941697, 1943301, 1941701, 1943305 ], [ 1943302, 1944718, 1943306, 1944723 ], [ 1944719, 1949924, 1944724, 1949929 ], [ 1949925, 1951640, 1949930, 1951645 ], [ 1951641, 1951847, 1951646, 1951852 ], [ 1951848, 1952660, 1951853, 1952664 ], [ 1952661, 1953731, 1952665, 1953735 ], [ 1953732, 1958495, 1953736, 1958500 ], [ 1958496, 1959515, 1958501, 1959519 ], [ 1959516, 1961184, 1959520, 1961189 ], [ 1961185, 1961458, 1961190, 1961463 ], [ 1961459, 1963223, 1961464, 1963228 ], [ 1963224, 1964535, 1963229, 1964540 ], [ 1964536, 1964578, 1964541, 1964583 ], [ 1964579, 1965726, 1964584, 1965731 ], [ 1965727, 1967936, 1965732, 1967940 ], [ 1967937, 1970524, 1967941, 1970528 ], [ 1970525, 1971307, 1970529, 1971303 ], [ 1971308, 1971952, 1971304, 1971956 ], [ 1971953, 1971961, 1971957, 1971965 ], [ 1971962, 1974876, 1971966, 1974880 ], [ 1974877, 1975723, 1974881, 1975728 ], [ 1975724, 1977141, 1975729, 1977137 ], [ 1977142, 1983495, 1977138, 1983500 ], [ 1983496, 1985266, 1983501, 1985270 ], [ 1985267, 1989041, 1985271, 1989046 ], [ 1989042, 1991504, 1989047, 1991508 ], [ 1991505, 1991939, 1991509, 1991944 ], [ 1991940, 1994134, 1991945, 1994139 ], [ 1994135, 1997719, 1994140, 1997723 ], [ 1997720, 1997804, 1997724, 1997800 ], [ 1997805, 2002856, 1997801, 2002860 ], [ 2002857, 2006390, 2002861, 2006395 ], [ 2006391, 2006681, 2006396, 2006686 ], [ 2006682, 2012753, 2006687, 2012758 ], [ 2012754, 2020299, 2012759, 2020304 ], [ 2020300, 2021594, 2020305, 2021599 ], [ 2021595, 2023023, 2021600, 2023027 ], [ 2023024, 2026978, 2023028, 2026974 ], [ 2026979, 2035653, 2026975, 2035658 ], [ 2035654, 2035969, 2035659, 2035973 ], [ 2035970, 2038524, 2035974, 2038528 ], [ 2038525, 2040783, 2038529, 2040787 ], [ 2040784, 2042430, 2040788, 2042434 ], [ 2042431, 2043961, 2042435, 2043966 ], [ 2043962, 2044411, 2043967, 2044416 ], [ 2044412, 2045320, 2044417, 2045325 ], [ 2045321, 2045652, 2045326, 2045648 ], [ 2045653, 2046593, 2045649, 2046598 ], [ 2046594, 2058014, 2046599, 2058019 ], [ 2058015, 2058163, 2058020, 2058167 ], [ 2058164, 2058262, 2058168, 2058267 ], [ 2058263, 2059250, 2058268, 2059246 ], [ 2059251, 2061616, 2059247, 2061621 ], [ 2061617, 2067334, 2061622, 2067339 ], [ 2067335, 2069059, 2067340, 2069064 ], [ 2069060, 2073142, 2069065, 2073147 ], [ 2073143, 2074555, 2073148, 2074560 ], [ 2074556, 2074634, 2074561, 2074639 ], [ 2074635, 2076234, 2074640, 2076238 ], [ 2076235, 2076422, 2076239, 2076427 ], [ 2076423, 2080648, 2076428, 2080652 ], [ 2080649, 2081937, 2080653, 2081942 ], [ 2081938, 2082042, 2081943, 2082047 ], [ 2082043, 2082408, 2082048, 2082413 ], [ 2082409, 2086395, 2082414, 2086399 ], [ 2086396, 2088062, 2086400, 2088066 ], [ 2088063, 2088207, 2088067, 2088211 ], [ 2088208, 2092297, 2088212, 2092301 ], [ 2092298, 2093453, 2092302, 2093449 ], [ 2093454, 2094661, 2093450, 2094666 ], [ 2094662, 2098386, 2094667, 2098390 ], [ 2098387, 2105404, 2098391, 2105408 ], [ 2105405, 2105556, 2105409, 2105561 ], [ 2105557, 2106153, 2105562, 2106158 ], [ 2106154, 2107284, 2106159, 2107288 ], [ 2107285, 2110444, 2107289, 2110448 ], [ 2110445, 2110523, 2110449, 2110527 ], [ 2110524, 2113282, 2110528, 2113287 ], [ 2113283, 2114197, 2113288, 2114202 ], [ 2114198, 2119617, 2114203, 2119621 ], [ 2119618, 2124245, 2119622, 2124250 ], [ 2124246, 2126629, 2124251, 2126634 ], [ 2126630, 2127367, 2126635, 2127372 ], [ 2127368, 2131057, 2127373, 2131061 ], [ 2131058, 2131854, 2131062, 2131859 ], [ 2131855, 2134278, 2131860, 2134282 ], [ 2134279, 2134456, 2134283, 2134460 ], [ 2134457, 2137761, 2134461, 2137765 ], [ 2137762, 2138481, 2137766, 2138486 ], [ 2138482, 2139541, 2138487, 2139545 ], [ 2139542, 2140084, 2139546, 2140089 ], [ 2140085, 2151397, 2140090, 2151402 ], [ 2151398, 2154116, 2151403, 2154121 ], [ 2154117, 2158877, 2154122, 2158881 ], [ 2158878, 2158886, 2158882, 2158890 ], [ 2158887, 2160314, 2158891, 2160318 ], [ 2160315, 2164531, 2160319, 2164536 ], [ 2164532, 2164999, 2164537, 2165004 ], [ 2165000, 2166190, 2165005, 2166195 ], [ 2166191, 2168535, 2166196, 2168540 ], [ 2168536, 2168652, 2168541, 2168657 ], [ 2168653, 2168876, 2168658, 2168881 ], [ 2168877, 2169179, 2168882, 2169175 ], [ 2169180, 2170247, 2169176, 2170243 ], [ 2170248, 2175197, 2170244, 2175202 ], [ 2175198, 2176568, 2175203, 2176573 ], [ 2176569, 2185419, 2176574, 2185424 ], [ 2185420, 2187124, 2185425, 2187128 ], [ 2187125, 2188632, 2187129, 2188636 ], [ 2188633, 2194633, 2188637, 2194637 ], [ 2194634, 2196521, 2194638, 2196525 ], [ 2196522, 2197400, 2196526, 2197396 ], [ 2197401, 2198074, 2197397, 2198079 ], [ 2198075, 2200597, 2198080, 2200601 ], [ 2200598, 2202376, 2200602, 2202380 ], [ 2202377, 2203542, 2202381, 2203546 ], [ 2203543, 2205716, 2203547, 2205721 ], [ 2205717, 2206482, 2205722, 2206487 ], [ 2206483, 2209774, 2206488, 2209778 ], [ 2209775, 2214819, 2209779, 2214824 ], [ 2214820, 2215255, 2214825, 2215260 ], [ 2215256, 2216910, 2215261, 2216915 ], [ 2216911, 2219477, 2216916, 2219482 ], [ 2219478, 2219751, 2219483, 2219756 ], [ 2219752, 2221421, 2219757, 2221425 ], [ 2221422, 2222602, 2221426, 2222607 ], [ 2222603, 2222929, 2222608, 2222925 ], [ 2222930, 2224016, 2222926, 2224021 ], [ 2224017, 2228441, 2224022, 2228445 ], [ 2228442, 2229253, 2228446, 2229258 ], [ 2229254, 2229460, 2229259, 2229465 ], [ 2229461, 2231176, 2229466, 2231181 ], [ 2231177, 2236382, 2231182, 2236387 ], [ 2236383, 2238087, 2236388, 2238091 ], [ 2238088, 2239596, 2238092, 2239600 ], [ 2239597, 2242924, 2239601, 2242920 ], [ 2242925, 2243879, 2242921, 2243883 ], [ 2243880, 2243897, 2243884, 2243901 ], [ 2243898, 2245581, 2243902, 2245586 ], [ 2245582, 2245719, 2245587, 2245724 ], [ 2245720, 2245761, 2245725, 2245766 ], [ 2245762, 2249902, 2245767, 2249907 ], [ 2249903, 2254722, 2249908, 2254727 ], [ 2254723, 2254803, 2254728, 2254807 ], [ 2254804, 2257149, 2254808, 2257153 ], [ 2257150, 2262668, 2257154, 2262673 ], [ 2262669, 2267507, 2262674, 2267503 ], [ 2267508, 2270370, 2267504, 2270366 ], [ 2270371, 2276333, 2270367, 2276338 ], [ 2276334, 2277071, 2276339, 2277075 ], [ 2277072, 2278349, 2277076, 2278354 ], [ 2278350, 2278595, 2278355, 2278600 ], [ 2278596, 2281303, 2278601, 2281307 ], [ 2281304, 2282039, 2281308, 2282044 ], [ 2282040, 2286122, 2282045, 2286126 ], [ 2286123, 2295986, 2286127, 2295990 ], [ 2295987, 2296911, 2295991, 2296907 ], [ 2296912, 2300006, 2296908, 2300010 ], [ 2300007, 2309292, 2300011, 2309297 ], [ 2309293, 2309737, 2309298, 2309742 ], [ 2309738, 2312320, 2309743, 2312324 ], [ 2312321, 2314845, 2312325, 2314850 ], [ 2314846, 2315016, 2314851, 2315021 ], [ 2315017, 2320047, 2315022, 2320052 ], [ 2320048, 2320645, 2320053, 2320650 ], [ 2320646, 2326925, 2320651, 2326921 ], [ 2326926, 2330437, 2326922, 2330442 ], [ 2330438, 2335656, 2330443, 2335660 ], [ 2335657, 2338082, 2335661, 2338087 ], [ 2338083, 2343729, 2338088, 2343725 ], [ 2343730, 2345465, 2343726, 2345470 ], [ 2345466, 2345517, 2345471, 2345521 ], [ 2345518, 2347136, 2345522, 2347140 ], [ 2347137, 2347233, 2347141, 2347238 ], [ 2347234, 2348720, 2347239, 2348725 ], [ 2348721, 2351324, 2348726, 2351329 ], [ 2351325, 2352448, 2351330, 2352453 ], [ 2352449, 2353999, 2352454, 2354004 ], [ 2354000, 2354134, 2354005, 2354138 ], [ 2354135, 2359046, 2354139, 2359051 ], [ 2359047, 2361149, 2359052, 2361154 ], [ 2361150, 2374039, 2361155, 2374044 ], [ 2374040, 2382502, 2374045, 2382506 ], [ 2382503, 2385349, 2382507, 2385354 ], [ 2385350, 2388585, 2385355, 2388590 ], [ 2388586, 2391734, 2388591, 2391739 ], [ 2391735, 2392141, 2391740, 2392146 ], [ 2392142, 2393939, 2392147, 2393944 ], [ 2393940, 2395026, 2393945, 2395031 ], [ 2395027, 2395860, 2395032, 2395865 ], [ 2395861, 2398211, 2395866, 2398216 ], [ 2398212, 2398326, 2398217, 2398331 ], [ 2398327, 2400696, 2398332, 2400700 ], [ 2400697, 2402303, 2400701, 2402308 ], [ 2402304, 2405335, 2402309, 2405339 ], [ 2405336, 2408154, 2405340, 2408159 ], [ 2408155, 2409936, 2408160, 2409941 ], [ 2409937, 2410353, 2409942, 2410358 ], [ 2410354, 2411021, 2410359, 2411026 ], [ 2411022, 2414614, 2411027, 2414618 ], [ 2414615, 2419571, 2414619, 2419576 ], [ 2419572, 2421744, 2419577, 2421748 ], [ 2421745, 2424488, 2421749, 2424493 ], [ 2424489, 2427895, 2424494, 2427900 ], [ 2427896, 2430604, 2427901, 2430600 ], [ 2430605, 2433794, 2430601, 2433799 ], [ 2433795, 2434280, 2433800, 2434285 ], [ 2434281, 2435465, 2434286, 2435461 ], [ 2435466, 2436129, 2435462, 2436134 ], [ 2436130, 2446339, 2436135, 2446344 ], [ 2446340, 2446355, 2446345, 2446360 ], [ 2446356, 2447550, 2446361, 2447555 ], [ 2447551, 2447589, 2447556, 2447593 ], [ 2447590, 2451279, 2447594, 2451283 ], [ 2451280, 2456375, 2451284, 2456380 ], [ 2456376, 2458676, 2456381, 2458672 ], [ 2458677, 2459075, 2458673, 2459079 ], [ 2459076, 2459685, 2459080, 2459690 ], [ 2459686, 2467707, 2459691, 2467712 ], [ 2467708, 2474920, 2467713, 2474924 ], [ 2474921, 2483809, 2474925, 2483813 ], [ 2483810, 2487345, 2483814, 2487349 ], [ 2487346, 2489626, 2487350, 2489631 ], [ 2489627, 2490030, 2489632, 2490035 ], [ 2490031, 2493086, 2490036, 2493090 ], [ 2493087, 2494181, 2493091, 2494186 ], [ 2494182, 2494578, 2494187, 2494583 ], [ 2494579, 2498330, 2494584, 2498335 ], [ 2498331, 2501619, 2498336, 2501624 ], [ 2501620, 2502148, 2501625, 2502152 ], [ 2502149, 2502774, 2502153, 2502779 ], [ 2502775, 2503405, 2502780, 2503409 ], [ 2503406, 2505440, 2503410, 2505445 ], [ 2505441, 2507840, 2505446, 2507845 ], [ 2507841, 2513737, 2507846, 2513741 ], [ 2513738, 2513953, 2513742, 2513958 ], [ 2513954, 2516708, 2513959, 2516712 ], [ 2516709, 2518482, 2516713, 2518487 ], [ 2518483, 2518510, 2518488, 2518515 ], [ 2518511, 2519154, 2518516, 2519159 ], [ 2519155, 2521663, 2519160, 2521668 ], [ 2521664, 2522690, 2521669, 2522695 ], [ 2522691, 2533188, 2522696, 2533184 ], [ 2533189, 2535156, 2533185, 2535161 ], [ 2535157, 2536302, 2535162, 2536307 ], [ 2536303, 2536525, 2536308, 2536521 ], [ 2536526, 2539683, 2536522, 2539688 ], [ 2539684, 2540838, 2539689, 2540843 ], [ 2540839, 2542188, 2540844, 2542192 ], [ 2542189, 2542542, 2542193, 2542547 ], [ 2542543, 2543529, 2542548, 2543533 ], [ 2543530, 2543833, 2543534, 2543829 ], [ 2543834, 2549711, 2543830, 2549716 ], [ 2549712, 2549979, 2549717, 2549984 ], [ 2549980, 2550376, 2549985, 2550381 ], [ 2550377, 2550442, 2550382, 2550447 ], [ 2550443, 2552498, 2550448, 2552503 ], [ 2552499, 2556237, 2552504, 2556242 ], [ 2556238, 2557871, 2556243, 2557875 ], [ 2557872, 2561281, 2557876, 2561286 ], [ 2561282, 2562381, 2561287, 2562386 ], [ 2562382, 2571576, 2562387, 2571581 ], [ 2571577, 2573918, 2571582, 2573923 ], [ 2573919, 2575854, 2573924, 2575859 ], [ 2575855, 2575961, 2575860, 2575965 ], [ 2575962, 2576170, 2575966, 2576166 ], [ 2576171, 2579045, 2576167, 2579050 ], [ 2579046, 2587793, 2579051, 2587797 ], [ 2587794, 2588693, 2587798, 2588689 ], [ 2588694, 2588728, 2588690, 2588733 ], [ 2588729, 2589643, 2588734, 2589647 ], [ 2589644, 2591930, 2589648, 2591935 ], [ 2591931, 2592492, 2591936, 2592496 ], [ 2592493, 2595383, 2592497, 2595387 ], [ 2595384, 2601304, 2595388, 2601309 ], [ 2601305, 2610396, 2601310, 2610400 ], [ 2610397, 2613914, 2610401, 2613918 ], [ 2613915, 2614812, 2613919, 2614817 ], [ 2614813, 2614839, 2614818, 2614844 ], [ 2614840, 2622328, 2614845, 2622333 ], [ 2622329, 2624051, 2622334, 2624055 ], [ 2624052, 2627903, 2624056, 2627908 ], [ 2627904, 2633758, 2627909, 2633762 ], [ 2633759, 2640020, 2633763, 2640024 ], [ 2640021, 2648431, 2640025, 2648436 ], [ 2648432, 2651846, 2648437, 2651851 ], [ 2651847, 2658412, 2651852, 2658416 ], [ 2658413, 2660586, 2658417, 2660591 ], [ 2660587, 2660643, 2660592, 2660647 ], [ 2660644, 2662994, 2660648, 2662998 ], [ 2662995, 2663434, 2662999, 2663439 ], [ 2663435, 2664288, 2663440, 2664292 ], [ 2664289, 2666257, 2664293, 2666261 ], [ 2666258, 2668305, 2666262, 2668301 ], [ 2668306, 2668367, 2668302, 2668363 ], [ 2668368, 2669373, 2668364, 2669377 ], [ 2669374, 2674481, 2669378, 2674486 ], [ 2674482, 2674949, 2674487, 2674954 ], [ 2674950, 2676096, 2674955, 2676101 ], [ 2676097, 2676139, 2676102, 2676144 ], [ 2676140, 2678485, 2676145, 2678490 ], [ 2678486, 2678602, 2678491, 2678607 ], [ 2678603, 2678826, 2678608, 2678831 ], [ 2678827, 2679129, 2678832, 2679125 ], [ 2679130, 2680196, 2679126, 2680192 ], [ 2680197, 2681278, 2680193, 2681283 ], [ 2681279, 2683258, 2681284, 2683262 ], [ 2683259, 2684926, 2683263, 2684931 ], [ 2684927, 2685205, 2684932, 2685210 ], [ 2685206, 2691894, 2685211, 2691898 ], [ 2691895, 2692493, 2691899, 2692498 ], [ 2692494, 2695364, 2692499, 2695368 ], [ 2695365, 2696872, 2695369, 2696876 ], [ 2696873, 2701148, 2696877, 2701152 ], [ 2701149, 2701165, 2701153, 2701169 ], [ 2701166, 2703577, 2701170, 2703581 ], [ 2703578, 2709273, 2703582, 2709269 ], [ 2709274, 2710251, 2709270, 2710255 ], [ 2710252, 2714956, 2710256, 2714961 ], [ 2714957, 2716135, 2714962, 2716140 ], [ 2716136, 2716265, 2716141, 2716261 ], [ 2716266, 2716930, 2716262, 2716935 ], [ 2716931, 2717350, 2716936, 2717355 ], [ 2717351, 2717774, 2717356, 2717779 ], [ 2717775, 2718590, 2717780, 2718595 ], [ 2718591, 2720379, 2718596, 2720384 ], [ 2720380, 2721786, 2720385, 2721790 ], [ 2721787, 2723960, 2721791, 2723965 ], [ 2723961, 2725032, 2723966, 2725036 ], [ 2725033, 2725167, 2725037, 2725171 ], [ 2725168, 2725658, 2725172, 2725663 ], [ 2725659, 2731050, 2725664, 2731054 ], [ 2731051, 2731980, 2731055, 2731985 ], [ 2731981, 2738172, 2731986, 2738177 ], [ 2738173, 2738501, 2738178, 2738505 ], [ 2738502, 2741368, 2738506, 2741372 ], [ 2741369, 2741523, 2741373, 2741527 ], [ 2741524, 2743821, 2741528, 2743817 ], [ 2743822, 2743903, 2743818, 2743899 ], [ 2743904, 2746696, 2743900, 2746700 ], [ 2746697, 2748433, 2746701, 2748438 ], [ 2748434, 2748596, 2748439, 2748601 ], [ 2748597, 2749549, 2748602, 2749553 ], [ 2749550, 2749729, 2749554, 2749734 ], [ 2749730, 2750963, 2749735, 2750967 ], [ 2750964, 2751613, 2750968, 2751609 ], [ 2751614, 2752944, 2751610, 2752949 ], [ 2752945, 2753953, 2752950, 2753958 ], [ 2753954, 2754044, 2753959, 2754040 ], [ 2754045, 2755376, 2754041, 2755380 ], [ 2755377, 2760599, 2755381, 2760604 ], [ 2760600, 2761236, 2760605, 2761241 ], [ 2761237, 2763346, 2761242, 2763351 ], [ 2763347, 2764218, 2763352, 2764223 ], [ 2764219, 2767629, 2764224, 2767633 ], [ 2767630, 2771002, 2767634, 2771006 ], [ 2771003, 2773348, 2771007, 2773353 ], [ 2773349, 2778556, 2773354, 2778560 ], [ 2778557, 2779108, 2778561, 2779112 ], [ 2779109, 2779707, 2779113, 2779712 ], [ 2779708, 2780639, 2779713, 2780643 ], [ 2780640, 2781795, 2780644, 2781791 ], [ 2781796, 2782076, 2781792, 2782080 ], [ 2782077, 2788770, 2782081, 2788774 ], [ 2788771, 2792111, 2788775, 2792116 ], [ 2792112, 2794418, 2792117, 2794423 ], [ 2794419, 2795818, 2794424, 2795823 ], [ 2795819, 2796261, 2795824, 2796266 ], [ 2796262, 2798929, 2796267, 2798934 ], [ 2798930, 2799454, 2798935, 2799459 ], [ 2799455, 2811616, 2799460, 2811620 ], [ 2811617, 2813416, 2811621, 2813421 ], [ 2813417, 2813479, 2813422, 2813484 ], [ 2813480, 2814107, 2813485, 2814112 ], [ 2814108, 2818929, 2814113, 2818933 ], [ 2818930, 2825357, 2818934, 2825362 ], [ 2825358, 2826820, 2825363, 2826825 ], [ 2826821, 2828096, 2826826, 2828101 ], [ 2828097, 2828932, 2828102, 2828936 ], [ 2828933, 2830088, 2828937, 2830093 ], [ 2830089, 2830687, 2830094, 2830683 ], [ 2830688, 2834601, 2830684, 2834606 ], [ 2834602, 2836621, 2834607, 2836626 ], [ 2836622, 2836884, 2836627, 2836889 ], [ 2836885, 2837913, 2836890, 2837918 ], [ 2837914, 2840393, 2837919, 2840398 ], [ 2840394, 2841225, 2840399, 2841229 ], [ 2841226, 2843391, 2841230, 2843396 ], [ 2843392, 2843629, 2843397, 2843634 ], [ 2843630, 2844665, 2843635, 2844670 ], [ 2844666, 2845263, 2844671, 2845267 ], [ 2845264, 2847821, 2845268, 2847826 ], [ 2847822, 2848450, 2847827, 2848454 ], [ 2848451, 2849601, 2848455, 2849606 ], [ 2849602, 2853189, 2849607, 2853194 ], [ 2853190, 2853798, 2853195, 2853802 ], [ 2853799, 2860428, 2853803, 2860433 ], [ 2860429, 2862152, 2860434, 2862157 ], [ 2862153, 2862729, 2862158, 2862734 ], [ 2862730, 2869033, 2862735, 2869038 ], [ 2869034, 2869157, 2869039, 2869162 ], [ 2869158, 2872699, 2869163, 2872703 ], [ 2872700, 2882082, 2872704, 2882087 ], [ 2882083, 2888775, 2882088, 2888779 ], [ 2888776, 2894091, 2888780, 2894096 ], [ 2894092, 2895090, 2894097, 2895095 ], [ 2895091, 2895718, 2895096, 2895722 ], [ 2895719, 2896669, 2895723, 2896665 ], [ 2896670, 2900119, 2896666, 2900124 ], [ 2900120, 2900555, 2900125, 2900560 ], [ 2900556, 2902167, 2900561, 2902172 ], [ 2902168, 2902210, 2902173, 2902215 ], [ 2902211, 2904556, 2902216, 2904561 ], [ 2904557, 2904673, 2904562, 2904678 ], [ 2904674, 2904897, 2904679, 2904902 ], [ 2904898, 2905200, 2904903, 2905196 ], [ 2905201, 2906268, 2905197, 2906264 ], [ 2906269, 2907797, 2906265, 2907802 ], [ 2907798, 2910456, 2907803, 2910461 ], [ 2910457, 2911608, 2910462, 2911613 ], [ 2911609, 2914587, 2911614, 2914591 ], [ 2914588, 2914744, 2914592, 2914749 ], [ 2914745, 2914779, 2914750, 2914784 ], [ 2914780, 2918124, 2914785, 2918129 ], [ 2918125, 2921020, 2918130, 2921025 ], [ 2921021, 2921458, 2921026, 2921463 ], [ 2921459, 2923338, 2921464, 2923342 ], [ 2923339, 2926580, 2923343, 2926585 ], [ 2926581, 2930570, 2926586, 2930575 ], [ 2930571, 2931452, 2930576, 2931456 ], [ 2931453, 2934873, 2931457, 2934878 ], [ 2934874, 2940294, 2934879, 2940298 ], [ 2940295, 2942883, 2940299, 2942888 ], [ 2942884, 2943246, 2942889, 2943250 ], [ 2943247, 2946109, 2943251, 2946105 ], [ 2946110, 2950823, 2946106, 2950828 ], [ 2950824, 2952204, 2950829, 2952209 ], [ 2952205, 2952842, 2952210, 2952846 ], [ 2952843, 2954108, 2952847, 2954113 ], [ 2954109, 2958788, 2954114, 2958793 ], [ 2958789, 2959652, 2958794, 2959656 ], [ 2959653, 2962338, 2959657, 2962343 ], [ 2962339, 2962634, 2962344, 2962639 ], [ 2962635, 2963567, 2962640, 2963572 ], [ 2963568, 2965512, 2963573, 2965517 ], [ 2965513, 2965542, 2965518, 2965546 ], [ 2965543, 2965715, 2965547, 2965720 ], [ 2965716, 2969537, 2965721, 2969542 ], [ 2969538, 2969667, 2969543, 2969672 ], [ 2969668, 2971097, 2969673, 2971102 ], [ 2971098, 2971329, 2971103, 2971334 ], [ 2971330, 2971441, 2971335, 2971437 ], [ 2971442, 2971588, 2971438, 2971584 ], [ 2971589, 2971874, 2971585, 2971879 ], [ 2971875, 2972098, 2971880, 2972103 ], [ 2972099, 2974355, 2972104, 2974359 ], [ 2974356, 2978342, 2974360, 2978347 ], [ 2978343, 2984060, 2978348, 2984065 ], [ 2984061, 2985968, 2984066, 2985972 ], [ 2985969, 2987338, 2985973, 2987342 ], [ 2987339, 2988924, 2987343, 2988929 ], [ 2988925, 2991267, 2988930, 2991271 ], [ 2991268, 2994739, 2991272, 2994744 ], [ 2994740, 3002088, 2994745, 3002093 ], [ 3002089, 3009887, 3002094, 3009892 ], [ 3009888, 3014827, 3009893, 3014832 ], [ 3014828, 3017608, 3014833, 3017612 ], [ 3017609, 3020196, 3017613, 3020192 ], [ 3020197, 3020885, 3020193, 3020890 ], [ 3020886, 3022261, 3020891, 3022266 ], [ 3022262, 3029543, 3022267, 3029548 ], [ 3029544, 3030265, 3029549, 3030270 ], [ 3030266, 3032363, 3030271, 3032368 ], [ 3032364, 3033161, 3032369, 3033166 ], [ 3033162, 3042175, 3033167, 3042180 ], [ 3042176, 3042389, 3042181, 3042394 ], [ 3042390, 3043311, 3042395, 3043315 ], [ 3043312, 3045343, 3043316, 3045347 ], [ 3045344, 3049663, 3045348, 3049668 ], [ 3049664, 3050210, 3049669, 3050215 ], [ 3050211, 3051389, 3050216, 3051394 ], [ 3051390, 3052128, 3051395, 3052133 ], [ 3052129, 3052883, 3052134, 3052888 ], [ 3052884, 3054679, 3052889, 3054684 ], [ 3054680, 3055955, 3054685, 3055960 ], [ 3055956, 3056024, 3055961, 3056029 ], [ 3056025, 3062859, 3056030, 3062864 ], [ 3062860, 3063276, 3062865, 3063281 ], [ 3063277, 3064101, 3063282, 3064106 ], [ 3064102, 3065135, 3064107, 3065131 ], [ 3065136, 3065575, 3065132, 3065580 ], [ 3065576, 3065710, 3065581, 3065715 ], [ 3065711, 3066590, 3065716, 3066595 ], [ 3066591, 3070621, 3066596, 3070625 ], [ 3070622, 3075292, 3070626, 3075297 ], [ 3075293, 3076853, 3075298, 3076858 ], [ 3076854, 3078695, 3076859, 3078699 ], [ 3078696, 3080566, 3078700, 3080571 ], [ 3080567, 3080707, 3080572, 3080712 ], [ 3080708, 3080747, 3080713, 3080752 ], [ 3080748, 3080916, 3080753, 3080921 ], [ 3080917, 3082200, 3080922, 3082205 ], [ 3082201, 3085986, 3082206, 3085991 ], [ 3085987, 3086111, 3085992, 3086116 ], [ 3086112, 3088509, 3086117, 3088514 ], [ 3088510, 3090889, 3088515, 3090893 ], [ 3090890, 3091108, 3090894, 3091113 ], [ 3091109, 3094298, 3091114, 3094303 ], [ 3094299, 3095542, 3094304, 3095546 ], [ 3095543, 3098254, 3095547, 3098259 ], [ 3098255, 3100958, 3098260, 3100963 ], [ 3100959, 3101111, 3100964, 3101116 ], [ 3101112, 3101710, 3101117, 3101715 ], [ 3101711, 3102184, 3101716, 3102180 ], [ 3102185, 3107070, 3102181, 3107075 ], [ 3107071, 3108550, 3107076, 3108555 ], [ 3108551, 3109638, 3108556, 3109642 ], [ 3109639, 3110010, 3109643, 3110014 ], [ 3110011, 3110596, 3110015, 3110592 ], [ 3110597, 3114705, 3110593, 3114709 ], [ 3114706, 3116752, 3114710, 3116757 ], [ 3116753, 3118943, 3116758, 3118947 ], [ 3118944, 3118963, 3118948, 3118967 ], [ 3118964, 3119656, 3118968, 3119661 ], [ 3119657, 3120436, 3119662, 3120441 ], [ 3120437, 3122936, 3120442, 3122941 ], [ 3122937, 3125746, 3122942, 3125751 ], [ 3125747, 3126385, 3125752, 3126390 ], [ 3126386, 3126792, 3126391, 3126796 ], [ 3126793, 3129487, 3126797, 3129491 ], [ 3129488, 3129525, 3129492, 3129530 ], [ 3129526, 3129570, 3129531, 3129575 ], [ 3129571, 3129752, 3129576, 3129757 ], [ 3129753, 3132349, 3129758, 3132354 ], [ 3132350, 3133454, 3132355, 3133458 ], [ 3133455, 3135493, 3133459, 3135497 ], [ 3135494, 3139904, 3135498, 3139909 ], [ 3139905, 3143832, 3139910, 3143837 ], [ 3143833, 3144573, 3143838, 3144578 ], [ 3144574, 3146787, 3144579, 3146783 ], [ 3146788, 3148252, 3146784, 3148257 ], [ 3148253, 3149799, 3148258, 3149803 ], [ 3149800, 3150053, 3149804, 3150058 ], [ 3150054, 3152136, 3150059, 3152141 ], [ 3152137, 3159529, 3152142, 3159533 ], [ 3159530, 3161578, 3159534, 3161583 ], [ 3161579, 3162165, 3161584, 3162169 ], [ 3162166, 3162816, 3162170, 3162820 ], [ 3162817, 3164826, 3162821, 3164831 ], [ 3164827, 3164943, 3164832, 3164948 ], [ 3164944, 3167365, 3164949, 3167369 ], [ 3167366, 3167563, 3167370, 3167568 ], [ 3167564, 3168258, 3167569, 3168263 ], [ 3168259, 3170295, 3168264, 3170300 ], [ 3170296, 3172533, 3170301, 3172538 ], [ 3172534, 3176792, 3172539, 3176797 ], [ 3176793, 3177637, 3176798, 3177642 ], [ 3177638, 3178306, 3177643, 3178311 ], [ 3178307, 3180657, 3178312, 3180661 ], [ 3180658, 3181485, 3180662, 3181489 ], [ 3181486, 3182629, 3181490, 3182633 ], [ 3182630, 3186857, 3182634, 3186862 ], [ 3186858, 3190084, 3186863, 3190088 ], [ 3190085, 3194127, 3190089, 3194131 ], [ 3194128, 3196386, 3194132, 3196391 ], [ 3196387, 3196711, 3196392, 3196715 ], [ 3196712, 3197840, 3196716, 3197845 ], [ 3197841, 3201756, 3197846, 3201760 ], [ 3201757, 3208054, 3201761, 3208058 ], [ 3208055, 3210828, 3208059, 3210833 ], [ 3210829, 3212953, 3210834, 3212958 ], [ 3212954, 3224927, 3212959, 3224931 ], [ 3224928, 3226292, 3224932, 3226297 ], [ 3226293, 3226762, 3226298, 3226767 ], [ 3226763, 3228540, 3226768, 3228544 ], [ 3228541, 3229224, 3228545, 3229228 ], [ 3229225, 3230485, 3229229, 3230490 ], [ 3230486, 3233260, 3230491, 3233264 ], [ 3233261, 3233743, 3233265, 3233748 ], [ 3233744, 3234567, 3233749, 3234572 ], [ 3234568, 3235194, 3234573, 3235199 ], [ 3235195, 3235499, 3235200, 3235504 ], [ 3235500, 3243889, 3235505, 3243894 ], [ 3243890, 3245053, 3243895, 3245057 ], [ 3245054, 3245941, 3245058, 3245945 ], [ 3245942, 3255842, 3245946, 3255838 ], [ 3255843, 3259251, 3255839, 3259256 ], [ 3259252, 3260019, 3259257, 3260023 ], [ 3260020, 3262432, 3260024, 3262437 ], [ 3262433, 3262672, 3262438, 3262677 ], [ 3262673, 3266221, 3262678, 3266225 ], [ 3266222, 3266686, 3266226, 3266691 ], [ 3266687, 3269046, 3266692, 3269051 ], [ 3269047, 3269620, 3269052, 3269625 ], [ 3269621, 3272036, 3269626, 3272041 ], [ 3272037, 3272516, 3272042, 3272521 ], [ 3272517, 3272796, 3272522, 3272801 ], [ 3272797, 3278978, 3272802, 3278983 ], [ 3278979, 3279467, 3278984, 3279472 ], [ 3279468, 3280283, 3279473, 3280288 ], [ 3280284, 3282531, 3280289, 3282536 ], [ 3282532, 3283074, 3282537, 3283078 ], [ 3283075, 3283850, 3283079, 3283854 ], [ 3283851, 3286305, 3283855, 3286310 ], [ 3286306, 3286606, 3286311, 3286611 ], [ 3286607, 3287302, 3286612, 3287307 ], [ 3287303, 3290573, 3287308, 3290569 ], [ 3290574, 3290776, 3290570, 3290781 ], [ 3290777, 3294305, 3290782, 3294310 ], [ 3294306, 3295201, 3294311, 3295205 ], [ 3295202, 3298010, 3295206, 3298015 ], [ 3298011, 3298799, 3298016, 3298804 ], [ 3298800, 3299744, 3298805, 3299749 ], [ 3299745, 3302402, 3299750, 3302407 ], [ 3302403, 3308399, 3302408, 3308403 ], [ 3308400, 3313629, 3308404, 3313633 ], [ 3313630, 3314750, 3313634, 3314755 ], [ 3314751, 3314777, 3314756, 3314782 ], [ 3314778, 3315543, 3314783, 3315548 ], [ 3315544, 3317191, 3315549, 3317195 ], [ 3317192, 3319495, 3317196, 3319500 ], [ 3319496, 3321744, 3319501, 3321749 ], [ 3321745, 3324685, 3321750, 3324690 ], [ 3324686, 3331190, 3324691, 3331194 ], [ 3331191, 3331241, 3331195, 3331246 ], [ 3331242, 3334147, 3331247, 3334151 ], [ 3334148, 3334546, 3334152, 3334550 ], [ 3334547, 3339219, 3334551, 3339215 ], [ 3339220, 3339452, 3339216, 3339456 ], [ 3339453, 3345624, 3339457, 3345629 ], [ 3345625, 3349642, 3345630, 3349646 ], [ 3349643, 3349729, 3349647, 3349734 ], [ 3349730, 3350579, 3349735, 3350584 ], [ 3350580, 3351752, 3350585, 3351756 ], [ 3351753, 3351958, 3351757, 3351962 ], [ 3351959, 3353994, 3351963, 3353999 ], [ 3353995, 3355238, 3354000, 3355243 ], [ 3355239, 3356216, 3355244, 3356220 ], [ 3356217, 3360120, 3356221, 3360125 ], [ 3360121, 3360807, 3360126, 3360811 ], [ 3360808, 3362361, 3360812, 3362365 ], [ 3362362, 3363476, 3362366, 3363480 ], [ 3363477, 3364333, 3363481, 3364337 ], [ 3364334, 3368828, 3364338, 3368833 ], [ 3368829, 3370235, 3368834, 3370239 ], [ 3370236, 3371627, 3370240, 3371631 ], [ 3371628, 3375963, 3371632, 3375967 ], [ 3375964, 3376335, 3375968, 3376340 ], [ 3376336, 3380722, 3376341, 3380727 ], [ 3380723, 3381309, 3380728, 3381314 ], [ 3381310, 3382080, 3381315, 3382085 ], [ 3382081, 3384674, 3382086, 3384679 ], [ 3384675, 3385188, 3384680, 3385193 ], [ 3385189, 3388713, 3385194, 3388718 ], [ 3388714, 3392079, 3388719, 3392084 ], [ 3392080, 3394131, 3392085, 3394136 ], [ 3394132, 3394381, 3394137, 3394386 ], [ 3394382, 3395362, 3394387, 3395367 ], [ 3395363, 3395419, 3395368, 3395423 ], [ 3395420, 3396615, 3395424, 3396620 ], [ 3396616, 3399453, 3396621, 3399458 ], [ 3399454, 3400585, 3399459, 3400590 ], [ 3400586, 3401981, 3400591, 3401977 ], [ 3401982, 3405494, 3401978, 3405499 ], [ 3405495, 3408302, 3405500, 3408306 ], [ 3408303, 3413040, 3408307, 3413045 ], [ 3413041, 3413955, 3413046, 3413960 ], [ 3413956, 3413965, 3413961, 3413970 ], [ 3413966, 3414900, 3413971, 3414904 ], [ 3414901, 3415450, 3414905, 3415455 ], [ 3415451, 3415543, 3415456, 3415548 ], [ 3415544, 3415693, 3415549, 3415698 ], [ 3415694, 3416832, 3415699, 3416828 ], [ 3416833, 3416866, 3416829, 3416871 ], [ 3416867, 3420715, 3416872, 3420719 ], [ 3420716, 3421885, 3420720, 3421890 ], [ 3421886, 3425027, 3421891, 3425031 ], [ 3425028, 3425142, 3425032, 3425147 ], [ 3425143, 3429164, 3425148, 3429168 ], [ 3429165, 3431958, 3429169, 3431962 ], [ 3431959, 3437815, 3431963, 3437820 ], [ 3437816, 3439239, 3437821, 3439243 ], [ 3439240, 3440231, 3439244, 3440236 ], [ 3440232, 3440415, 3440237, 3440411 ], [ 3440416, 3442385, 3440412, 3442390 ], [ 3442386, 3448532, 3442391, 3448536 ], [ 3448533, 3449018, 3448537, 3449014 ], [ 3449019, 3449766, 3449015, 3449771 ], [ 3449767, 3450597, 3449772, 3450601 ], [ 3450598, 3451510, 3450602, 3451514 ], [ 3451511, 3455799, 3451515, 3455804 ], [ 3455800, 3457236, 3455805, 3457240 ], [ 3457237, 3463488, 3457241, 3463492 ], [ 3463489, 3473047, 3463493, 3473052 ], [ 3473048, 3479777, 3473053, 3479773 ], [ 3479778, 3483375, 3479774, 3483380 ], [ 3483376, 3484793, 3483381, 3484797 ], [ 3484794, 3486395, 3484798, 3486399 ], [ 3486396, 3490246, 3486400, 3490242 ], [ 3490247, 3494295, 3490243, 3494299 ], [ 3494296, 3495597, 3494300, 3495601 ], [ 3495598, 3496721, 3495602, 3496725 ], [ 3496722, 3499522, 3496726, 3499526 ], [ 3499523, 3503195, 3499527, 3503199 ], [ 3503196, 3505500, 3503200, 3505505 ], [ 3505501, 3510463, 3505506, 3510467 ], [ 3510464, 3514082, 3510468, 3514087 ], [ 3514083, 3520619, 3514088, 3520623 ], [ 3520620, 3521049, 3520624, 3521053 ], [ 3521050, 3522092, 3521054, 3522097 ], [ 3522093, 3522111, 3522098, 3522116 ], [ 3522112, 3522311, 3522117, 3522315 ], [ 3522312, 3522767, 3522316, 3522772 ], [ 3522768, 3531627, 3522773, 3531632 ], [ 3531628, 3537780, 3531633, 3537785 ], [ 3537781, 3538989, 3537786, 3538993 ], [ 3538990, 3540393, 3538994, 3540397 ], [ 3540394, 3540644, 3540398, 3540649 ], [ 3540645, 3543017, 3540650, 3543022 ], [ 3543018, 3543842, 3543023, 3543847 ], [ 3543843, 3545546, 3543848, 3545551 ], [ 3545547, 3546913, 3545552, 3546917 ], [ 3546914, 3547077, 3546918, 3547082 ], [ 3547078, 3547149, 3547083, 3547154 ], [ 3547150, 3548533, 3547155, 3548538 ], [ 3548534, 3549829, 3548539, 3549834 ], [ 3549830, 3549935, 3549835, 3549940 ], [ 3549936, 3550203, 3549941, 3550208 ], [ 3550204, 3550227, 3550209, 3550232 ], [ 3550228, 3551480, 3550233, 3551485 ], [ 3551481, 3552210, 3551486, 3552215 ], [ 3552211, 3561260, 3552216, 3561264 ], [ 3561261, 3562847, 3561265, 3562852 ], [ 3562848, 3563068, 3562853, 3563073 ], [ 3563069, 3563973, 3563074, 3563978 ], [ 3563974, 3565130, 3563979, 3565135 ], [ 3565131, 3565565, 3565136, 3565570 ], [ 3565566, 3566132, 3565571, 3566137 ], [ 3566133, 3567018, 3566138, 3567023 ], [ 3567019, 3567627, 3567024, 3567632 ], [ 3567628, 3570838, 3567633, 3570843 ], [ 3570839, 3574124, 3570844, 3574129 ], [ 3574125, 3575098, 3574130, 3575103 ], [ 3575099, 3575586, 3575104, 3575591 ], [ 3575587, 3576602, 3575592, 3576607 ], [ 3576603, 3590383, 3576608, 3590388 ], [ 3590384, 3594697, 3590389, 3594702 ], [ 3594698, 3596771, 3594703, 3596775 ], [ 3596772, 3597307, 3596776, 3597312 ], [ 3597308, 3598184, 3597313, 3598188 ], [ 3598185, 3598837, 3598189, 3598842 ], [ 3598838, 3599839, 3598843, 3599844 ], [ 3599840, 3603578, 3599845, 3603583 ], [ 3603579, 3605362, 3603584, 3605366 ], [ 3605363, 3609322, 3605367, 3609327 ], [ 3609323, 3614549, 3609328, 3614554 ], [ 3614550, 3618664, 3614555, 3618669 ], [ 3618665, 3619899, 3618670, 3619904 ], [ 3619900, 3624824, 3619905, 3624829 ], [ 3624825, 3627309, 3624830, 3627313 ], [ 3627310, 3628159, 3627314, 3628164 ], [ 3628160, 3628423, 3628165, 3628428 ], [ 3628424, 3629521, 3628429, 3629525 ], [ 3629522, 3629584, 3629526, 3629588 ], [ 3629585, 3629833, 3629589, 3629837 ], [ 3629834, 3630285, 3629838, 3630290 ], [ 3630286, 3630486, 3630291, 3630491 ], [ 3630487, 3634237, 3630492, 3634242 ], [ 3634238, 3639209, 3634243, 3639214 ], [ 3639210, 3639649, 3639215, 3639645 ], [ 3639650, 3647161, 3639646, 3647165 ], [ 3647162, 3648227, 3647166, 3648231 ], [ 3648228, 3652188, 3648232, 3652193 ], [ 3652189, 3659027, 3652194, 3659032 ], [ 3659028, 3659079, 3659033, 3659084 ], [ 3659080, 3659207, 3659085, 3659212 ], [ 3659208, 3660377, 3659213, 3660381 ], [ 3660378, 3667507, 3660382, 3667512 ], [ 3667508, 3667741, 3667513, 3667746 ], [ 3667742, 3668429, 3667747, 3668434 ], [ 3668430, 3670470, 3668435, 3670475 ], [ 3670471, 3673148, 3670476, 3673153 ], [ 3673149, 3674083, 3673154, 3674088 ], [ 3674084, 3678214, 3674089, 3678219 ], [ 3678215, 3680677, 3678220, 3680682 ], [ 3680678, 3684205, 3680683, 3684210 ], [ 3684206, 3688887, 3684211, 3688883 ], [ 3688888, 3690753, 3688884, 3690758 ], [ 3690754, 3698350, 3690759, 3698355 ], [ 3698351, 3699287, 3698356, 3699292 ], [ 3699288, 3700655, 3699293, 3700659 ], [ 3700656, 3702298, 3700660, 3702303 ], [ 3702299, 3706224, 3702304, 3706229 ], [ 3706225, 3706948, 3706230, 3706953 ], [ 3706949, 3709050, 3706954, 3709055 ], [ 3709051, 3710401, 3709056, 3710405 ], [ 3710402, 3713151, 3710406, 3713155 ], [ 3713152, 3714587, 3713156, 3714591 ], [ 3714588, 3714794, 3714592, 3714798 ], [ 3714795, 3725109, 3714799, 3725114 ], [ 3725110, 3725822, 3725115, 3725818 ], [ 3725823, 3726413, 3725819, 3726409 ], [ 3726414, 3731356, 3726410, 3731352 ], [ 3731357, 3731514, 3731353, 3731519 ], [ 3731515, 3736239, 3731520, 3736244 ], [ 3736240, 3736361, 3736245, 3736366 ], [ 3736362, 3738698, 3736367, 3738703 ], [ 3738699, 3742314, 3738704, 3742318 ], [ 3742315, 3745350, 3742319, 3745355 ], [ 3745351, 3747516, 3745356, 3747521 ], [ 3747517, 3747787, 3747522, 3747791 ], [ 3747788, 3748248, 3747792, 3748253 ], [ 3748249, 3750382, 3748254, 3750387 ], [ 3750383, 3752531, 3750388, 3752527 ], [ 3752532, 3753792, 3752528, 3753788 ], [ 3753793, 3755999, 3753789, 3756003 ], [ 3756000, 3761090, 3756004, 3761095 ], [ 3761091, 3762084, 3761096, 3762089 ], [ 3762085, 3762616, 3762090, 3762621 ], [ 3762617, 3769672, 3762622, 3769677 ], [ 3769673, 3769809, 3769678, 3769814 ], [ 3769810, 3769962, 3769815, 3769967 ], [ 3769963, 3770651, 3769968, 3770655 ], [ 3770652, 3773164, 3770656, 3773168 ], [ 3773165, 3774893, 3773169, 3774898 ], [ 3774894, 3776296, 3774899, 3776300 ], [ 3776297, 3776322, 3776301, 3776327 ], [ 3776323, 3778089, 3776328, 3778094 ], [ 3778090, 3783192, 3778095, 3783188 ], [ 3783193, 3784214, 3783189, 3784219 ], [ 3784215, 3795080, 3784220, 3795084 ], [ 3795081, 3799015, 3795085, 3799020 ], [ 3799016, 3805176, 3799021, 3805180 ], [ 3805177, 3806065, 3805181, 3806070 ], [ 3806066, 3807768, 3806071, 3807773 ], [ 3807769, 3808991, 3807774, 3808995 ], [ 3808992, 3809153, 3808996, 3809157 ], [ 3809154, 3809882, 3809158, 3809886 ], [ 3809883, 3813249, 3809887, 3813253 ], [ 3813250, 3816682, 3813254, 3816687 ], [ 3816683, 3817044, 3816688, 3817049 ], [ 3817045, 3817768, 3817050, 3817773 ], [ 3817769, 3821121, 3817774, 3821125 ], [ 3821122, 3822167, 3821126, 3822172 ], [ 3822168, 3824032, 3822173, 3824037 ], [ 3824033, 3827315, 3824038, 3827311 ], [ 3827316, 3828542, 3827312, 3828547 ], [ 3828543, 3829917, 3828548, 3829921 ], [ 3829918, 3832118, 3829922, 3832123 ], [ 3832119, 3832453, 3832124, 3832458 ], [ 3832454, 3837573, 3832459, 3837578 ], [ 3837574, 3842658, 3837579, 3842663 ], [ 3842659, 3843673, 3842664, 3843677 ], [ 3843674, 3844593, 3843678, 3844597 ], [ 3844594, 3844599, 3844598, 3844604 ], [ 3844600, 3844788, 3844605, 3844793 ], [ 3844789, 3850826, 3844794, 3850831 ], [ 3850827, 3854490, 3850832, 3854494 ], [ 3854491, 3855395, 3854495, 3855400 ], [ 3855396, 3855755, 3855401, 3855751 ], [ 3855756, 3860530, 3855752, 3860535 ], [ 3860531, 3860887, 3860536, 3860883 ], [ 3860888, 3862913, 3860884, 3862917 ], [ 3862914, 3872850, 3862918, 3872854 ], [ 3872851, 3873056, 3872855, 3873061 ], [ 3873057, 3875254, 3873062, 3875258 ], [ 3875255, 3880197, 3875259, 3880202 ], [ 3880198, 3882479, 3880203, 3882483 ], [ 3882480, 3882820, 3882484, 3882825 ], [ 3882821, 3883310, 3882826, 3883315 ], [ 3883311, 3885744, 3883316, 3885749 ], [ 3885745, 3890942, 3885750, 3890946 ], [ 3890943, 3891218, 3890947, 3891223 ], [ 3891219, 3891354, 3891224, 3891359 ], [ 3891355, 3893483, 3891360, 3893487 ], [ 3893484, 3894419, 3893488, 3894424 ], [ 3894420, 3900500, 3894425, 3900505 ], [ 3900501, 3901493, 3900506, 3901489 ], [ 3901494, 3903140, 3901490, 3903136 ], [ 3903141, 3907313, 3903137, 3907318 ], [ 3907314, 3908755, 3907319, 3908759 ], [ 3908756, 3908919, 3908760, 3908924 ], [ 3908920, 3909807, 3908925, 3909812 ], [ 3909808, 3909977, 3909813, 3909982 ], [ 3909978, 3911959, 3909983, 3911955 ], [ 3911960, 3914009, 3911956, 3914005 ], [ 3914010, 3922759, 3914006, 3922763 ], [ 3922760, 3930585, 3922764, 3930590 ], [ 3930586, 3931083, 3930591, 3931087 ], [ 3931084, 3933248, 3931088, 3933252 ], [ 3933249, 3933363, 3933253, 3933368 ], [ 3933364, 3935176, 3933369, 3935181 ], [ 3935177, 3936871, 3935182, 3936876 ], [ 3936872, 3937525, 3936877, 3937529 ], [ 3937526, 3945198, 3937530, 3945203 ], [ 3945199, 3946390, 3945204, 3946395 ], [ 3946391, 3946986, 3946396, 3946991 ], [ 3946987, 3952348, 3946992, 3952344 ], [ 3952349, 3956408, 3952345, 3956413 ], [ 3956409, 3958333, 3956414, 3958338 ], [ 3958334, 3959031, 3958339, 3959036 ], [ 3959032, 3960932, 3959037, 3960937 ], [ 3960933, 3964190, 3960938, 3964195 ], [ 3964191, 3969413, 3964196, 3969418 ], [ 3969414, 3972146, 3969419, 3972151 ], [ 3972147, 3972344, 3972152, 3972349 ], [ 3972345, 3978065, 3972350, 3978070 ], [ 3978066, 3981977, 3978071, 3981982 ], [ 3981978, 3984768, 3981983, 3984773 ], [ 3984769, 3984918, 3984774, 3984923 ], [ 3984919, 3985704, 3984924, 3985709 ], [ 3985705, 3986544, 3985710, 3986548 ], [ 3986545, 3995454, 3986549, 3995459 ], [ 3995455, 3997410, 3995460, 3997415 ], [ 3997411, 4000982, 3997416, 4000987 ], [ 4000983, 4002297, 4000988, 4002301 ], [ 4002298, 4002514, 4002302, 4002519 ], [ 4002515, 4005689, 4002520, 4005694 ], [ 4005690, 4008445, 4005695, 4008449 ], [ 4008446, 4008591, 4008450, 4008595 ], [ 4008592, 4016078, 4008596, 4016083 ], [ 4016079, 4017237, 4016084, 4017242 ], [ 4017238, 4018412, 4017243, 4018417 ], [ 4018413, 4018717, 4018418, 4018722 ], [ 4018718, 4018835, 4018723, 4018839 ], [ 4018836, 4021930, 4018840, 4021934 ], [ 4021931, 4023281, 4021935, 4023286 ], [ 4023282, 4032076, 4023287, 4032081 ], [ 4032077, 4041164, 4032082, 4041169 ], [ 4041165, 4041295, 4041170, 4041300 ], [ 4041296, 4041920, 4041301, 4041925 ], [ 4041921, 4046747, 4041926, 4046752 ], [ 4046748, 4049001, 4046753, 4049005 ], [ 4049002, 4050345, 4049006, 4050349 ], [ 4050346, 4052606, 4050350, 4052611 ], [ 4052607, 4054469, 4052612, 4054474 ], [ 4054470, 4054629, 4054475, 4054634 ], [ 4054630, 4054968, 4054635, 4054973 ], [ 4054969, 4055333, 4054974, 4055338 ], [ 4055334, 4056058, 4055339, 4056063 ], [ 4056059, 4057967, 4056064, 4057963 ], [ 4057968, 4059084, 4057964, 4059089 ], [ 4059085, 4062508, 4059090, 4062513 ], [ 4062509, 4065383, 4062514, 4065388 ], [ 4065384, 4065588, 4065389, 4065592 ], [ 4065589, 4065643, 4065593, 4065648 ], [ 4065644, 4065670, 4065649, 4065674 ], [ 4065671, 4069949, 4065675, 4069954 ], [ 4069950, 4073053, 4069955, 4073057 ], [ 4073054, 4076608, 4073058, 4076612 ], [ 4076609, 4078343, 4076613, 4078348 ], [ 4078344, 4083896, 4078349, 4083901 ], [ 4083897, 4085884, 4083902, 4085889 ], [ 4085885, 4090152, 4085890, 4090157 ], [ 4090153, 4093908, 4090158, 4093913 ], [ 4093909, 4094118, 4093914, 4094123 ], [ 4094119, 4095249, 4094124, 4095254 ], [ 4095250, 4095382, 4095255, 4095386 ], [ 4095383, 4095456, 4095387, 4095460 ], [ 4095457, 4097811, 4095461, 4097816 ], [ 4097812, 4101896, 4097817, 4101901 ], [ 4101897, 4102182, 4101902, 4102186 ], [ 4102183, 4104214, 4102187, 4104218 ], [ 4104215, 4107088, 4104219, 4107093 ], [ 4107089, 4107551, 4107094, 4107556 ], [ 4107552, 4107580, 4107557, 4107585 ], [ 4107581, 4107671, 4107586, 4107675 ], [ 4107672, 4109180, 4107676, 4109185 ], [ 4109181, 4110537, 4109186, 4110542 ], [ 4110538, 4116188, 4110543, 4116192 ], [ 4116189, 4116491, 4116193, 4116496 ], [ 4116492, 4117909, 4116497, 4117914 ], [ 4117910, 4118469, 4117915, 4118474 ], [ 4118470, 4123661, 4118475, 4123666 ], [ 4123662, 4123865, 4123667, 4123870 ], [ 4123866, 4125127, 4123871, 4125132 ], [ 4125128, 4125135, 4125133, 4125139 ], [ 4125136, 4129187, 4125140, 4129192 ], [ 4129188, 4132972, 4129193, 4132977 ], [ 4132973, 4134272, 4132978, 4134277 ], [ 4134273, 4135470, 4134278, 4135475 ], [ 4135471, 4136837, 4135476, 4136842 ], [ 4136838, 4146292, 4136843, 4146297 ], [ 4146293, 4146443, 4146298, 4146448 ], [ 4146444, 4148935, 4146449, 4148940 ], [ 4148936, 4150723, 4148941, 4150727 ], [ 4150724, 4156926, 4150728, 4156930 ], [ 4156927, 4160939, 4156931, 4160943 ], [ 4160940, 4161425, 4160944, 4161421 ], [ 4161426, 4162265, 4161422, 4162270 ], [ 4162266, 4163096, 4162271, 4163100 ], [ 4163097, 4163991, 4163101, 4163995 ], [ 4163992, 4164781, 4163996, 4164786 ], [ 4164782, 4167637, 4164787, 4167633 ], [ 4167638, 4170898, 4167634, 4170903 ], [ 4170899, 4175178, 4170904, 4175174 ], [ 4175179, 4175563, 4175175, 4175567 ], [ 4175564, 4175870, 4175568, 4175875 ], [ 4175871, 4175960, 4175876, 4175965 ], [ 4175961, 4178040, 4175966, 4178044 ], [ 4178041, 4179795, 4178045, 4179799 ], [ 4179796, 4180869, 4179800, 4180874 ], [ 4180870, 4181479, 4180875, 4181484 ], [ 4181480, 4184808, 4181485, 4184812 ], [ 4184809, 4185316, 4184813, 4185320 ], [ 4185317, 4187692, 4185321, 4187696 ], [ 4187693, 4187785, 4187697, 4187790 ], [ 4187786, 4188293, 4187791, 4188289 ], [ 4188294, 4189984, 4188290, 4189980 ], [ 4189985, 4191368, 4189981, 4191372 ], [ 4191369, 4191989, 4191373, 4191993 ], [ 4191990, 4192187, 4191994, 4192191 ], [ 4192188, 4192423, 4192192, 4192428 ], [ 4192424, 4192625, 4192429, 4192630 ], [ 4192626, 4194747, 4192631, 4194752 ], [ 4194748, 4195600, 4194753, 4195605 ], [ 4195601, 4195842, 4195606, 4195847 ], [ 4195843, 4196615, 4195848, 4196619 ], [ 4196616, 4197634, 4196620, 4197639 ], [ 4197635, 4198036, 4197640, 4198041 ], [ 4198037, 4198447, 4198042, 4198451 ], [ 4198448, 4198840, 4198452, 4198836 ], [ 4198841, 4203137, 4198837, 4203141 ], [ 4203138, 4203155, 4203142, 4203159 ], [ 4203156, 4203840, 4203160, 4203844 ], [ 4203841, 4205660, 4203845, 4205665 ], [ 4205661, 4213510, 4205666, 4213515 ], [ 4213511, 4217238, 4213516, 4217243 ], [ 4217239, 4219123, 4217244, 4219127 ], [ 4219124, 4221217, 4219128, 4221222 ], [ 4221218, 4225159, 4221223, 4225164 ], [ 4225160, 4226183, 4225165, 4226188 ], [ 4226184, 4228901, 4226189, 4228906 ], [ 4228902, 4229490, 4228907, 4229494 ], [ 4229491, 4229774, 4229495, 4229779 ], [ 4229775, 4230559, 4229780, 4230564 ], [ 4230560, 4231316, 4230565, 4231312 ], [ 4231317, 4235572, 4231313, 4235577 ], [ 4235573, 4242029, 4235578, 4242034 ], [ 4242030, 4242765, 4242035, 4242770 ], [ 4242766, 4242869, 4242771, 4242865 ], [ 4242870, 4244054, 4242866, 4244059 ], [ 4244055, 4245120, 4244060, 4245116 ], [ 4245121, 4245334, 4245117, 4245339 ], [ 4245335, 4245565, 4245340, 4245569 ], [ 4245566, 4247741, 4245570, 4247745 ], [ 4247742, 4249019, 4247746, 4249024 ], [ 4249020, 4249100, 4249025, 4249105 ], [ 4249101, 4256705, 4249106, 4256710 ], [ 4256706, 4258303, 4256711, 4258307 ], [ 4258304, 4259509, 4258308, 4259514 ], [ 4259510, 4261410, 4259515, 4261415 ], [ 4261411, 4269352, 4261416, 4269356 ], [ 4269353, 4270525, 4269357, 4270530 ], [ 4270526, 4274109, 4270531, 4274114 ], [ 4274110, 4275137, 4274115, 4275141 ], [ 4275138, 4281838, 4275142, 4281842 ], [ 4281839, 4281849, 4281843, 4281854 ], [ 4281850, 4283260, 4281855, 4283264 ], [ 4283261, 4284055, 4283265, 4284059 ], [ 4284056, 4284552, 4284060, 4284556 ], [ 4284553, 4284762, 4284557, 4284767 ], [ 4284763, 4287988, 4284768, 4287992 ], [ 4287989, 4294545, 4287993, 4294549 ], [ 4294546, 4303042, 4294550, 4303047 ], [ 4303043, 4303258, 4303048, 4303263 ], [ 4303259, 4303753, 4303264, 4303757 ], [ 4303754, 4306632, 4303758, 4306637 ], [ 4306633, 4310014, 4306638, 4310018 ], [ 4310015, 4312216, 4310019, 4312221 ], [ 4312217, 4314890, 4312222, 4314895 ], [ 4314891, 4316460, 4314896, 4316465 ], [ 4316461, 4316626, 4316466, 4316631 ], [ 4316627, 4318092, 4316632, 4318097 ], [ 4318093, 4318604, 4318098, 4318609 ], [ 4318605, 4318772, 4318610, 4318777 ], [ 4318773, 4321424, 4318778, 4321429 ], [ 4321425, 4321489, 4321430, 4321494 ], [ 4321490, 4324980, 4321495, 4324984 ], [ 4324981, 4327535, 4324985, 4327540 ], [ 4327536, 4329548, 4327541, 4329553 ], [ 4329549, 4331509, 4329554, 4331514 ], [ 4331510, 4332147, 4331515, 4332152 ], [ 4332148, 4334445, 4332153, 4334450 ], [ 4334446, 4338820, 4334451, 4338825 ], [ 4338821, 4339739, 4338826, 4339744 ], [ 4339740, 4343682, 4339745, 4343687 ], [ 4343683, 4348264, 4343688, 4348269 ], [ 4348265, 4351770, 4348270, 4351774 ], [ 4351771, 4352206, 4351775, 4352211 ], [ 4352207, 4356621, 4352212, 4356626 ], [ 4356622, 4362927, 4356627, 4362923 ], [ 4362928, 4366184, 4362924, 4366189 ], [ 4366185, 4374134, 4366190, 4374139 ], [ 4374135, 4374749, 4374140, 4374754 ], [ 4374750, 4386564, 4374755, 4386568 ], [ 4386565, 4386786, 4386569, 4386790 ], [ 4386787, 4388049, 4386791, 4388054 ], [ 4388050, 4388548, 4388055, 4388553 ], [ 4388549, 4395145, 4388554, 4395141 ], [ 4395146, 4395214, 4395142, 4395219 ], [ 4395215, 4396301, 4395220, 4396297 ], [ 4396302, 4397725, 4396298, 4397730 ], [ 4397726, 4400829, 4397731, 4400833 ], [ 4400830, 4401917, 4400834, 4401922 ], [ 4401918, 4401962, 4401923, 4401967 ], [ 4401963, 4404470, 4401968, 4404475 ], [ 4404471, 4404603, 4404476, 4404608 ], [ 4404604, 4404677, 4404609, 4404682 ], [ 4404678, 4407845, 4404683, 4407850 ], [ 4407846, 4408723, 4407851, 4408727 ], [ 4408724, 4414102, 4408728, 4414106 ], [ 4414103, 4415693, 4414107, 4415698 ], [ 4415694, 4415957, 4415699, 4415962 ], [ 4415958, 4418305, 4415963, 4418309 ], [ 4418306, 4426334, 4418310, 4426339 ], [ 4426335, 4427143, 4426340, 4427148 ], [ 4427144, 4432989, 4427149, 4432994 ], [ 4432990, 4433640, 4432995, 4433645 ], [ 4433641, 4435236, 4433646, 4435241 ], [ 4435237, 4436966, 4435242, 4436962 ], [ 4436967, 4447677, 4436963, 4447681 ], [ 4447678, 4449373, 4447682, 4449377 ], [ 4449374, 4450981, 4449378, 4450986 ], [ 4450982, 4452121, 4450987, 4452126 ], [ 4452122, 4453471, 4452127, 4453475 ], [ 4453472, 4454417, 4453476, 4454422 ], [ 4454418, 4455164, 4454423, 4455169 ], [ 4455165, 4459226, 4455170, 4459231 ], [ 4459227, 4460531, 4459232, 4460535 ], [ 4460532, 4462863, 4460536, 4462868 ], [ 4462864, 4469714, 4462869, 4469719 ], [ 4469715, 4469976, 4469720, 4469980 ], [ 4469977, 4471008, 4469981, 4471013 ], [ 4471009, 4473114, 4471014, 4473119 ], [ 4473115, 4477852, 4473120, 4477857 ], [ 4477853, 4477874, 4477858, 4477879 ], [ 4477875, 4482921, 4477880, 4482926 ], [ 4482922, 4489809, 4482927, 4489814 ], [ 4489810, 4490912, 4489815, 4490917 ], [ 4490913, 4491974, 4490918, 4491979 ], [ 4491975, 4492157, 4491980, 4492162 ], [ 4492158, 4493614, 4492163, 4493619 ], [ 4493615, 4496829, 4493620, 4496834 ], [ 4496830, 4497697, 4496835, 4497702 ], [ 4497698, 4499157, 4497703, 4499162 ], [ 4499158, 4502248, 4499163, 4502253 ], [ 4502249, 4504493, 4502254, 4504498 ], [ 4504494, 4505336, 4504499, 4505341 ], [ 4505337, 4505567, 4505342, 4505571 ], [ 4505568, 4506680, 4505572, 4506685 ], [ 4506681, 4506961, 4506686, 4506966 ], [ 4506962, 4507601, 4506967, 4507606 ], [ 4507602, 4509484, 4507607, 4509488 ], [ 4509485, 4509536, 4509489, 4509532 ], [ 4509537, 4513351, 4509533, 4513356 ], [ 4513352, 4516356, 4513357, 4516361 ], [ 4516357, 4520650, 4516362, 4520655 ], [ 4520651, 4522948, 4520656, 4522952 ], [ 4522949, 4527592, 4522953, 4527596 ], [ 4527593, 4528820, 4527597, 4528825 ], [ 4528821, 4533216, 4528826, 4533212 ], [ 4533217, 4535971, 4533213, 4535976 ], [ 4535972, 4536320, 4535977, 4536324 ], [ 4536321, 4539874, 4536325, 4539878 ], [ 4539875, 4540172, 4539879, 4540177 ], [ 4540173, 4541756, 4540178, 4541752 ], [ 4541757, 4543862, 4541753, 4543858 ], [ 4543863, 4545833, 4543859, 4545829 ], [ 4545834, 4551230, 4545830, 4551235 ], [ 4551231, 4552997, 4551236, 4553002 ], [ 4552998, 4555491, 4553003, 4555496 ], [ 4555492, 4557748, 4555497, 4557744 ], [ 4557749, 4558033, 4557745, 4558038 ], [ 4558034, 4561276, 4558039, 4561280 ], [ 4561277, 4562123, 4561281, 4562128 ], [ 4562124, 4562284, 4562129, 4562288 ], [ 4562285, 4563100, 4562289, 4563105 ], [ 4563101, 4564382, 4563106, 4564386 ], [ 4564383, 4564639, 4564387, 4564644 ], [ 4564640, 4566006, 4564645, 4566011 ], [ 4566007, 4575457, 4566012, 4575462 ], [ 4575458, 4575809, 4575463, 4575814 ], [ 4575810, 4576253, 4575815, 4576258 ], [ 4576254, 4579647, 4576259, 4579652 ], [ 4579648, 4582407, 4579653, 4582411 ], [ 4582408, 4588823, 4582412, 4588828 ], [ 4588824, 4589254, 4588829, 4589259 ], [ 4589255, 4589933, 4589260, 4589929 ], [ 4589934, 4591334, 4589930, 4591338 ], [ 4591335, 4598670, 4591339, 4598675 ], [ 4598671, 4599152, 4598676, 4599156 ], [ 4599153, 4600707, 4599157, 4600703 ], [ 4600708, 4601105, 4600704, 4601110 ], [ 4601106, 4601445, 4601111, 4601449 ], [ 4601446, 4602566, 4601450, 4602571 ], [ 4602567, 4606392, 4602572, 4606396 ], [ 4606393, 4607895, 4606397, 4607899 ], [ 4607896, 4612068, 4607900, 4612073 ], [ 4612069, 4615603, 4612074, 4615608 ], [ 4615604, 4618026, 4615609, 4618030 ], [ 4618027, 4621061, 4618031, 4621065 ], [ 4621062, 4627887, 4621066, 4627892 ], [ 4627888, 4631394, 4627893, 4631399 ], [ 4631395, 4631631, 4631400, 4631636 ], [ 4631632, 4635963, 4631637, 4635968 ], [ 4635964, 4641129, 4635969, 4641134 ], [ 4641130, 4642188, 4641135, 4642192 ], [ 4642189, 4642980, 4642193, 4642985 ], [ 4642981, 4643635, 4642986, 4643640 ], [ 4643636, 4644147, 4643641, 4644152 ], [ 4644148, 4644814, 4644153, 4644818 ], [ 4644815, 4649017, 4644819, 4649021 ], [ 4649018, 4649332, 4649022, 4649337 ], [ 4649333, 4649464, 4649338, 4649469 ], [ 4649465, 4650584, 4649470, 4650588 ], [ 4650585, 4653836, 4650589, 4653840 ], [ 4653837, 4654913, 4653841, 4654917 ], [ 4654914, 4656366, 4654918, 4656371 ], [ 4656367, 4656864, 4656372, 4656869 ], [ 4656865, 4656933, 4656870, 4656938 ], [ 4656934, 4660056, 4656939, 4660061 ], [ 4660057, 4665881, 4660062, 4665886 ], [ 4665882, 4666341, 4665887, 4666345 ], [ 4666342, 4668837, 4666346, 4668842 ], [ 4668838, 4669896, 4668843, 4669900 ], [ 4669897, 4670841, 4669901, 4670845 ], [ 4670842, 4672873, 4670846, 4672878 ], [ 4672874, 4681462, 4672879, 4681467 ], [ 4681463, 4682181, 4681468, 4682185 ], [ 4682182, 4691924, 4682186, 4691928 ], [ 4691925, 4696368, 4691929, 4696373 ], [ 4696369, 4699474, 4696374, 4699479 ], [ 4699475, 4702530, 4699480, 4702534 ], [ 4702531, 4704523, 4702535, 4704528 ], [ 4704524, 4704899, 4704529, 4704903 ], [ 4704900, 4706008, 4704904, 4706013 ], [ 4706009, 4706120, 4706014, 4706124 ], [ 4706121, 4706510, 4706125, 4706515 ], [ 4706511, 4710334, 4706516, 4710330 ], [ 4710335, 4710765, 4710331, 4710769 ], [ 4710766, 4711295, 4710770, 4711300 ], [ 4711296, 4711543, 4711301, 4711548 ], [ 4711544, 4711935, 4711549, 4711940 ], [ 4711936, 4712790, 4711941, 4712795 ], [ 4712791, 4713126, 4712796, 4713131 ], [ 4713127, 4713287, 4713132, 4713291 ], [ 4713288, 4713730, 4713292, 4713735 ], [ 4713731, 4717619, 4713736, 4717624 ], [ 4717620, 4718789, 4717625, 4718785 ], [ 4718790, 4722736, 4718786, 4722740 ], [ 4722737, 4724224, 4722741, 4724229 ], [ 4724225, 4725868, 4724230, 4725873 ], [ 4725869, 4727653, 4725874, 4727658 ], [ 4727654, 4729069, 4727659, 4729074 ], [ 4729070, 4730833, 4729075, 4730838 ], [ 4730834, 4733099, 4730839, 4733104 ], [ 4733100, 4733576, 4733105, 4733581 ], [ 4733577, 4735011, 4733582, 4735015 ], [ 4735012, 4735924, 4735016, 4735928 ], [ 4735925, 4736754, 4735929, 4736759 ], [ 4736755, 4737511, 4736760, 4737507 ], [ 4737512, 4737991, 4737508, 4737995 ], [ 4737992, 4740307, 4737996, 4740311 ], [ 4740308, 4741684, 4740312, 4741689 ], [ 4741685, 4744830, 4741690, 4744835 ], [ 4744831, 4746768, 4744836, 4746773 ], [ 4746769, 4749037, 4746774, 4749042 ], [ 4749038, 4749801, 4749043, 4749806 ], [ 4749802, 4749864, 4749807, 4749869 ], [ 4749865, 4750090, 4749870, 4750094 ], [ 4750091, 4750966, 4750095, 4750971 ], [ 4750967, 4751355, 4750972, 4751359 ], [ 4751356, 4751493, 4751360, 4751497 ], [ 4751494, 4752345, 4751498, 4752349 ], [ 4752346, 4752965, 4752350, 4752970 ], [ 4752966, 4753059, 4752971, 4753063 ], [ 4753060, 4754115, 4753064, 4754119 ], [ 4754116, 4754237, 4754120, 4754242 ], [ 4754238, 4757191, 4754243, 4757196 ], [ 4757192, 4761010, 4757197, 4761014 ], [ 4761011, 4762052, 4761015, 4762057 ], [ 4762053, 4762604, 4762058, 4762600 ], [ 4762605, 4764164, 4762601, 4764169 ], [ 4764165, 4766341, 4764170, 4766346 ], [ 4766342, 4767519, 4766347, 4767524 ], [ 4767520, 4769451, 4767525, 4769456 ], [ 4769452, 4770366, 4769457, 4770371 ], [ 4770367, 4774504, 4770372, 4774509 ], [ 4774505, 4777167, 4774510, 4777171 ], [ 4777168, 4778234, 4777172, 4778238 ], [ 4778235, 4779310, 4778239, 4779315 ], [ 4779311, 4784157, 4779316, 4784161 ], [ 4784158, 4784713, 4784162, 4784718 ], [ 4784714, 4784960, 4784719, 4784965 ], [ 4784961, 4789181, 4784966, 4789186 ], [ 4789182, 4792300, 4789187, 4792304 ], [ 4792301, 4792894, 4792305, 4792899 ], [ 4792895, 4804321, 4792900, 4804326 ], [ 4804322, 4807780, 4804327, 4807785 ], [ 4807781, 4808367, 4807786, 4808372 ], [ 4808368, 4811025, 4808373, 4811030 ], [ 4811026, 4811936, 4811031, 4811940 ], [ 4811937, 4812883, 4811941, 4812887 ], [ 4812884, 4813062, 4812888, 4813067 ], [ 4813063, 4813117, 4813068, 4813121 ], [ 4813118, 4822160, 4813122, 4822165 ], [ 4822161, 4830957, 4822166, 4830961 ], [ 4830958, 4831410, 4830962, 4831414 ], [ 4831411, 4832326, 4831415, 4832330 ], [ 4832327, 4833156, 4832331, 4833161 ], [ 4833157, 4834005, 4833162, 4834001 ], [ 4834006, 4834485, 4834002, 4834489 ], [ 4834486, 4839621, 4834490, 4839626 ], [ 4839622, 4844592, 4839627, 4844588 ], [ 4844593, 4853316, 4844589, 4853321 ], [ 4853317, 4853434, 4853322, 4853438 ], [ 4853435, 4853704, 4853439, 4853708 ], [ 4853705, 4862268, 4853709, 4862273 ], [ 4862269, 4862689, 4862274, 4862694 ], [ 4862690, 4863453, 4862695, 4863458 ], [ 4863454, 4863657, 4863459, 4863662 ], [ 4863658, 4865285, 4863663, 4865289 ], [ 4865286, 4867215, 4865290, 4867220 ], [ 4867216, 4867943, 4867221, 4867948 ], [ 4867944, 4870367, 4867949, 4870372 ], [ 4870368, 4871260, 4870373, 4871265 ], [ 4871261, 4871925, 4871266, 4871930 ], [ 4871926, 4872824, 4871931, 4872829 ], [ 4872825, 4877259, 4872830, 4877263 ], [ 4877260, 4877520, 4877264, 4877524 ], [ 4877521, 4879935, 4877525, 4879940 ], [ 4879936, 4880330, 4879941, 4880334 ], [ 4880331, 4881593, 4880335, 4881598 ], [ 4881594, 4882087, 4881599, 4882092 ], [ 4882088, 4889351, 4882093, 4889356 ], [ 4889352, 4890443, 4889357, 4890448 ], [ 4890444, 4892812, 4890449, 4892816 ], [ 4892813, 4893056, 4892817, 4893052 ], [ 4893057, 4893088, 4893053, 4893092 ], [ 4893089, 4897454, 4893093, 4897458 ], [ 4897455, 4898485, 4897459, 4898490 ], [ 4898486, 4901057, 4898491, 4901062 ], [ 4901058, 4904245, 4901063, 4904250 ], [ 4904246, 4904281, 4904251, 4904285 ], [ 4904282, 4904668, 4904286, 4904673 ], [ 4904669, 4904984, 4904674, 4904989 ], [ 4904985, 4907736, 4904990, 4907740 ], [ 4907737, 4908072, 4907741, 4908076 ], [ 4908073, 4908096, 4908077, 4908100 ], [ 4908097, 4908561, 4908101, 4908565 ], [ 4908562, 4913327, 4908566, 4913323 ], [ 4913328, 4914224, 4913324, 4914229 ], [ 4914225, 4916537, 4914230, 4916542 ], [ 4916538, 4918099, 4916543, 4918103 ], [ 4918100, 4919908, 4918104, 4919913 ], [ 4919909, 4926663, 4919914, 4926668 ], [ 4926664, 4929329, 4926669, 4929334 ], [ 4929330, 4929560, 4929335, 4929564 ], [ 4929561, 4930673, 4929565, 4930678 ], [ 4930674, 4930954, 4930679, 4930959 ], [ 4930955, 4931594, 4930960, 4931599 ], [ 4931595, 4932928, 4931600, 4932932 ], [ 4932929, 4937282, 4932933, 4937287 ], [ 4937283, 4939667, 4937288, 4939672 ], [ 4939668, 4940551, 4939673, 4940555 ], [ 4940552, 4940908, 4940556, 4940912 ], [ 4940909, 4941837, 4940913, 4941842 ], [ 4941838, 4946673, 4941843, 4946677 ], [ 4946674, 4947030, 4946678, 4947035 ], [ 4947031, 4948839, 4947036, 4948843 ], [ 4948840, 4951071, 4948844, 4951076 ], [ 4951072, 4953999, 4951077, 4954004 ], [ 4954000, 4955481, 4954005, 4955486 ], [ 4955482, 4959224, 4955487, 4959229 ], [ 4959225, 4967229, 4959230, 4967233 ], [ 4967230, 4972603, 4967234, 4972607 ], [ 4972604, 4974094, 4972608, 4974098 ], [ 4974095, 4974624, 4974099, 4974629 ], [ 4974625, 4975683, 4974630, 4975687 ], [ 4975684, 4976599, 4975688, 4976603 ], [ 4976600, 4977429, 4976604, 4977434 ], [ 4977430, 4978186, 4977435, 4978182 ], [ 4978187, 4978666, 4978183, 4978670 ], [ 4978667, 4984448, 4978671, 4984453 ], [ 4984449, 4986670, 4984454, 4986675 ], [ 4986671, 4987571, 4986676, 4987575 ], [ 4987572, 4989614, 4987576, 4989618 ], [ 4989615, 4990698, 4989619, 4990702 ], [ 4990699, 4992038, 4990703, 4992043 ], [ 4992039, 4993566, 4992044, 4993570 ], [ 4993567, 4993811, 4993571, 4993816 ], [ 4993812, 4994767, 4993817, 4994771 ], [ 4994768, 4995631, 4994772, 4995636 ], [ 4995632, 4996624, 4995637, 4996629 ], [ 4996625, 4996668, 4996630, 4996673 ], [ 4996669, 4997203, 4996674, 4997207 ], [ 4997204, 4998818, 4997208, 4998823 ], [ 4998819, 5004186, 4998824, 5004191 ], [ 5004187, 5004432, 5004192, 5004436 ], [ 5004433, 5008758, 5004437, 5008754 ], [ 5008759, 5013598, 5008755, 5013603 ], [ 5013599, 5016180, 5013604, 5016185 ], [ 5016181, 5016267, 5016186, 5016271 ], [ 5016268, 5017625, 5016272, 5017629 ], [ 5017626, 5018455, 5017630, 5018460 ], [ 5018456, 5019212, 5018461, 5019208 ], [ 5019213, 5019692, 5019209, 5019696 ], [ 5019693, 5026663, 5019697, 5026667 ], [ 5026664, 5026770, 5026668, 5026775 ], [ 5026771, 5028841, 5026776, 5028846 ], [ 5028842, 5030486, 5028847, 5030490 ], [ 5030487, 5031862, 5030491, 5031867 ], [ 5031863, 5036331, 5031868, 5036336 ], [ 5036332, 5037861, 5036337, 5037866 ], [ 5037862, 5038887, 5037867, 5038892 ], [ 5038888, 5040440, 5038893, 5040445 ], [ 5040441, 5042902, 5040446, 5042907 ], [ 5042903, 5044827, 5042908, 5044832 ], [ 5044828, 5050524, 5044833, 5050529 ], [ 5050525, 5053866, 5050530, 5053871 ], [ 5053867, 5054707, 5053872, 5054712 ], [ 5054708, 5055021, 5054713, 5055026 ], [ 5055022, 5057873, 5055027, 5057878 ], [ 5057874, 5059653, 5057879, 5059657 ], [ 5059654, 5059734, 5059658, 5059739 ], [ 5059735, 5061548, 5059740, 5061553 ], [ 5061549, 5063342, 5061554, 5063347 ], [ 5063343, 5064119, 5063348, 5064124 ], [ 5064120, 5064638, 5064125, 5064643 ], [ 5064639, 5066316, 5064644, 5066320 ], [ 5066317, 5068774, 5066321, 5068779 ], [ 5068775, 5069157, 5068780, 5069162 ], [ 5069158, 5069375, 5069163, 5069380 ], [ 5069376, 5071533, 5069381, 5071538 ], [ 5071534, 5072259, 5071539, 5072264 ], [ 5072260, 5072332, 5072265, 5072337 ], [ 5072333, 5074288, 5072338, 5074293 ], [ 5074289, 5078241, 5074294, 5078237 ], [ 5078242, 5084929, 5078238, 5084933 ], [ 5084930, 5087508, 5084934, 5087513 ], [ 5087509, 5088409, 5087514, 5088414 ], [ 5088410, 5093963, 5088415, 5093968 ], [ 5093964, 5097754, 5093969, 5097750 ], [ 5097755, 5098261, 5097751, 5098266 ], [ 5098262, 5100144, 5098267, 5100148 ], [ 5100145, 5102713, 5100149, 5102709 ], [ 5102714, 5105308, 5102710, 5105312 ], [ 5105309, 5110195, 5105313, 5110199 ], [ 5110196, 5116037, 5110200, 5116042 ], [ 5116038, 5116647, 5116043, 5116652 ], [ 5116648, 5119282, 5116653, 5119287 ], [ 5119283, 5121619, 5119288, 5121623 ], [ 5121620, 5122889, 5121624, 5122893 ], [ 5122890, 5125691, 5122894, 5125695 ], [ 5125692, 5125943, 5125696, 5125947 ], [ 5125944, 5132940, 5125948, 5132945 ], [ 5132941, 5133068, 5132946, 5133072 ], [ 5133069, 5133405, 5133073, 5133410 ], [ 5133406, 5134558, 5133411, 5134563 ], [ 5134559, 5138091, 5134564, 5138095 ], [ 5138092, 5138432, 5138096, 5138437 ], [ 5138433, 5138944, 5138438, 5138949 ], [ 5138945, 5139157, 5138950, 5139162 ], [ 5139158, 5139587, 5139163, 5139592 ], [ 5139588, 5142617, 5139593, 5142622 ], [ 5142618, 5148183, 5142623, 5148188 ], [ 5148184, 5148672, 5148189, 5148677 ], [ 5148673, 5150053, 5148678, 5150058 ], [ 5150054, 5151087, 5150059, 5151092 ], [ 5151088, 5153217, 5151093, 5153222 ], [ 5153218, 5154383, 5153223, 5154388 ], [ 5154384, 5154947, 5154389, 5154951 ], [ 5154948, 5155016, 5154952, 5155021 ], [ 5155017, 5156599, 5155022, 5156604 ], [ 5156600, 5157802, 5156605, 5157807 ], [ 5157803, 5157970, 5157808, 5157975 ], [ 5157971, 5160625, 5157976, 5160630 ], [ 5160626, 5162852, 5160631, 5162857 ], [ 5162853, 5164824, 5162858, 5164829 ], [ 5164825, 5171077, 5164830, 5171082 ], [ 5171078, 5176566, 5171083, 5176570 ], [ 5176567, 5180104, 5176571, 5180108 ], [ 5180105, 5180591, 5180109, 5180596 ], [ 5180592, 5184142, 5180597, 5184138 ], [ 5184143, 5188235, 5184139, 5188240 ], [ 5188236, 5190430, 5188241, 5190434 ], [ 5190431, 5194013, 5190435, 5194018 ], [ 5194014, 5194634, 5194019, 5194638 ], [ 5194635, 5199338, 5194639, 5199342 ], [ 5199339, 5200363, 5199343, 5200368 ], [ 5200364, 5200380, 5200369, 5200385 ], [ 5200381, 5207919, 5200386, 5207923 ], [ 5207920, 5210484, 5207924, 5210480 ], [ 5210485, 5211327, 5210481, 5211331 ], [ 5211328, 5212227, 5211332, 5212231 ], [ 5212228, 5212416, 5212232, 5212421 ], [ 5212417, 5214189, 5212422, 5214194 ], [ 5214190, 5218448, 5214195, 5218453 ], [ 5218449, 5221514, 5218454, 5221519 ], [ 5221515, 5222166, 5221520, 5222170 ], [ 5222167, 5222235, 5222171, 5222239 ], [ 5222236, 5222405, 5222240, 5222410 ], [ 5222406, 5223121, 5222411, 5223126 ], [ 5223122, 5225062, 5223127, 5225067 ], [ 5225063, 5227034, 5225068, 5227039 ], [ 5227035, 5230060, 5227040, 5230064 ], [ 5230061, 5235911, 5230065, 5235915 ], [ 5235912, 5237062, 5235916, 5237067 ], [ 5237063, 5238549, 5237068, 5238554 ], [ 5238550, 5239941, 5238555, 5239946 ], [ 5239942, 5241160, 5239947, 5241165 ], [ 5241161, 5243887, 5241166, 5243891 ], [ 5243888, 5244364, 5243892, 5244368 ], [ 5244365, 5245009, 5244369, 5245014 ], [ 5245010, 5245420, 5245015, 5245425 ], [ 5245421, 5246665, 5245426, 5246670 ], [ 5246666, 5246882, 5246671, 5246887 ], [ 5246883, 5252321, 5246888, 5252326 ], [ 5252322, 5258113, 5252327, 5258117 ], [ 5258114, 5259300, 5258118, 5259304 ], [ 5259301, 5261182, 5259305, 5261187 ], [ 5261183, 5261507, 5261188, 5261503 ], [ 5261508, 5268860, 5261504, 5268864 ], [ 5268861, 5269941, 5268865, 5269945 ], [ 5269942, 5270945, 5269946, 5270949 ], [ 5270946, 5271167, 5270950, 5271171 ], [ 5271168, 5273111, 5271172, 5273116 ], [ 5273112, 5273132, 5273117, 5273137 ], [ 5273133, 5273680, 5273138, 5273685 ], [ 5273681, 5274282, 5273686, 5274287 ], [ 5274283, 5277485, 5274288, 5277490 ], [ 5277486, 5278602, 5277491, 5278607 ], [ 5278603, 5279312, 5278608, 5279316 ], [ 5279313, 5279965, 5279317, 5279969 ], [ 5279966, 5286668, 5279970, 5286673 ], [ 5286669, 5288844, 5286674, 5288849 ], [ 5288845, 5292505, 5288850, 5292509 ], [ 5292506, 5295145, 5292510, 5295149 ], [ 5295146, 5295426, 5295150, 5295431 ], [ 5295427, 5296411, 5295432, 5296407 ], [ 5296412, 5299331, 5296408, 5299336 ], [ 5299332, 5299740, 5299337, 5299745 ], [ 5299741, 5302074, 5299746, 5302079 ], [ 5302075, 5304927, 5302080, 5304932 ], [ 5304928, 5309929, 5304933, 5309925 ], [ 5309930, 5311027, 5309926, 5311023 ], [ 5311028, 5313142, 5311024, 5313138 ], [ 5313143, 5313612, 5313139, 5313616 ], [ 5313613, 5316196, 5313617, 5316201 ], [ 5316197, 5320424, 5316202, 5320428 ], [ 5320425, 5321884, 5320429, 5321889 ], [ 5321885, 5327443, 5321890, 5327448 ], [ 5327444, 5333009, 5327449, 5333014 ], [ 5333010, 5335558, 5333015, 5335563 ], [ 5335559, 5337934, 5335564, 5337939 ], [ 5337935, 5340296, 5337940, 5340301 ], [ 5340297, 5341322, 5340302, 5341326 ], [ 5341323, 5342510, 5341327, 5342515 ], [ 5342511, 5343834, 5342516, 5343839 ], [ 5343835, 5351733, 5343840, 5351737 ], [ 5351734, 5355838, 5351738, 5355842 ], [ 5355839, 5359283, 5355843, 5359288 ], [ 5359284, 5362430, 5359289, 5362434 ], [ 5362431, 5362544, 5362435, 5362549 ], [ 5362545, 5364468, 5362550, 5364472 ], [ 5364469, 5369245, 5364473, 5369241 ], [ 5369246, 5375729, 5369242, 5375734 ], [ 5375730, 5375790, 5375735, 5375795 ], [ 5375791, 5377253, 5375796, 5377258 ], [ 5377254, 5378696, 5377259, 5378701 ], [ 5378697, 5382060, 5378702, 5382065 ], [ 5382061, 5388003, 5382066, 5388007 ], [ 5388004, 5388224, 5388008, 5388229 ], [ 5388225, 5389155, 5388230, 5389159 ], [ 5389156, 5391633, 5389160, 5391638 ], [ 5391634, 5399712, 5391639, 5399716 ], [ 5399713, 5401417, 5399717, 5401422 ], [ 5401418, 5406297, 5401423, 5406301 ], [ 5406298, 5406537, 5406302, 5406542 ], [ 5406538, 5406850, 5406543, 5406854 ], [ 5406851, 5408637, 5406855, 5408642 ], [ 5408638, 5410093, 5408643, 5410097 ], [ 5410094, 5417270, 5410098, 5417275 ], [ 5417271, 5419353, 5417276, 5419358 ], [ 5419354, 5420144, 5419359, 5420149 ], [ 5420145, 5420216, 5420150, 5420221 ], [ 5420217, 5420740, 5420222, 5420745 ], [ 5420741, 5423081, 5420746, 5423085 ], [ 5423082, 5424075, 5423086, 5424079 ], [ 5424076, 5425137, 5424080, 5425141 ], [ 5425138, 5427477, 5425142, 5427482 ], [ 5427478, 5429323, 5427483, 5429328 ], [ 5429324, 5429631, 5429329, 5429635 ], [ 5429632, 5432849, 5429636, 5432853 ], [ 5432850, 5432874, 5432854, 5432878 ], [ 5432875, 5440584, 5432879, 5440588 ], [ 5440585, 5440833, 5440589, 5440837 ], [ 5440834, 5441634, 5440838, 5441639 ], [ 5441635, 5446602, 5441640, 5446606 ], [ 5446603, 5448663, 5446607, 5448668 ], [ 5448664, 5452231, 5448669, 5452236 ], [ 5452232, 5453350, 5452237, 5453354 ], [ 5453351, 5455164, 5453355, 5455168 ], [ 5455165, 5458274, 5455169, 5458279 ], [ 5458275, 5459524, 5458280, 5459529 ], [ 5459525, 5468509, 5459530, 5468514 ], [ 5468510, 5469773, 5468515, 5469778 ], [ 5469774, 5474040, 5469779, 5474044 ], [ 5474041, 5475379, 5474045, 5475384 ], [ 5475380, 5476063, 5475385, 5476068 ], [ 5476064, 5477860, 5476069, 5477865 ], [ 5477861, 5478124, 5477866, 5478129 ], [ 5478125, 5478577, 5478130, 5478582 ], [ 5478578, 5479176, 5478583, 5479181 ], [ 5479177, 5483012, 5479182, 5483017 ], [ 5483013, 5483809, 5483018, 5483814 ], [ 5483810, 5490967, 5483815, 5490963 ], [ 5490968, 5491739, 5490964, 5491743 ], [ 5491740, 5495234, 5491744, 5495239 ], [ 5495235, 5498449, 5495240, 5498449 ] ] def _cut(seq) cuts = Bio::RestrictionEnzyme::Analysis.cut(seq, "SacI", "EcoRI", "BstEII", {:view_ranges => true}) end end #TestEcoliO157H7_3enzymes end #module TestRestrictionEnzymeAnalysisCutLong bio-1.4.3.0001/sample/demo_kegg_reaction.rb0000644000004100000410000000274412200110570020305 0ustar www-datawww-data# # = sample/demo_kegg_reaction.rb - demonstration of Bio::KEGG::REACTION # # Copyright:: Copyright (C) 2004 Toshiaki Katayama # Copyright:: Copyright (C) 2009 Kozo Nishida # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::REACTION, the parser class for the KEGG # REACTION biochemical reaction database. # # == Usage # # Specify files containing KEGG REACTION data. # # $ ruby demo_kegg_reaction.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_kegg_reaction.rb test/data/KEGG/R00006.reaction # # == Example of running this script # # Download test data. # # $ ruby -Ilib bin/br_biofetch.rb reaction R00259 > R00259.reaction # $ ruby -Ilib bin/br_biofetch.rb reaction R02282 > R02282.reaction # # Run this script. # # $ ruby -Ilib sample/demo_kegg_reaction.rb R00259.reaction R02282.reaction # # == Development information # # The code was moved from lib/bio/db/kegg/reaction.rb and modified. # require 'bio' Bio::FlatFile.foreach(Bio::KEGG::REACTION, ARGF) do |rn| puts "### rn = Bio::KEGG::REACTION.new(str)" puts "# rn.entry_id" p rn.entry_id puts "# rn.name" p rn.name puts "# rn.definition" p rn.definition puts "# rn.equation" p rn.equation puts "# rn.rpairs" p rn.rpairs puts "# rn.pathways" p rn.pathways puts "# rn.enzymes" p rn.enzymes puts "# rn.orthologs" p rn.orthologs puts "# rn.orthologs_as_hash" p rn.orthologs_as_hash puts "=" * 78 end bio-1.4.3.0001/sample/goslim.rb0000755000004100000410000001516712200110570016000 0ustar www-datawww-data#!/usr/bin/env ruby # # goslim.rb - making a GO slim histgram # # Usage: # # % goslim.rb -p process.ontology -f function.ontology \ # -c component.ontology -s goslim_goa.2002 -g gene_association.mgi \ # -o mgi -r # % R < mgi.R # % gv mgi.pdf # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id: goslim.rb,v 1.5 2007/04/05 23:35:42 trevor Exp $ # SCRIPT_VERSION = '$Id: goslim.rb,v 1.5 2007/04/05 23:35:42 trevor Exp $' USAGE = "${__FILE__} - GO slim Usage: #{__FILE__} -p process.ontology -f function.ontology \ -c component.ontolgy -g gene_association.mgi -s goslim_goa.2002 \ -o goslim.uniqued.out -r #{__FILE__} -p process.ontology -f function.ontology \ -c component.ontolgy -l gene_association.list -s goslim_goa.2002 \ -o mgi.out -r #{__FILE__} -p process.ontology -f function.ontology \ -c component.ontolgy -g gene_association.mgi -s goslim_goa.2002 >\ go_goslit.paired.list Options; -p,--process -f,--function -c,--component -g,--ga -l,--galist -s,--goslim -o,--output -- output file name. -r,--r_script -- Writing a R script in .R to plot a barplot. -h,--help -v,--version Format: GO ID list: /^GO:\d{7}/ for each line Mitsuteru C. Nakao " require 'getoptlong' parser = GetoptLong.new parser.set_options( ['--process', '-p', GetoptLong::REQUIRED_ARGUMENT], ['--function', '-f', GetoptLong::REQUIRED_ARGUMENT], ['--component', '-c', GetoptLong::REQUIRED_ARGUMENT], ['--ga', '-g', GetoptLong::REQUIRED_ARGUMENT], ['--galist', '-l', GetoptLong::REQUIRED_ARGUMENT], ['--goslim', '-s', GetoptLong::REQUIRED_ARGUMENT], ['--output', '-o', GetoptLong::REQUIRED_ARGUMENT], ['--r_script', '-r', GetoptLong::NO_ARGUMENT], ['--help', '-h', GetoptLong::NO_ARGUMENT], ['--version', '-v', GetoptLong::NO_ARGUMENT]) begin parser.each_option do |name, arg| eval "$OPT_#{name.sub(/^--/, '').gsub(/-/, '_').upcase} = '#{arg}'" end rescue exit(1) end if $OPT_VERSION puts SCRIPT_VERSION exit(0) end if $OPT_HELP or !($OPT_PROCESS or $OPT_FUNCTION or $OPT_COMPONENT or ($OPT_GA or $OPT_GALIST)) puts USAGE exit(0) end # subroutines def slim2r(datname) tmp = "# usage: % R --vanilla < #{datname}.R data <- read.delim2('#{datname}') dat <- data$count names(dat) <- paste(data$GO.Term, dat) # set graphc format pdf('#{datname}.pdf') #postscript('#{datname}.ps') # outside margins par(mai = c(1,2.8,1,0.7)) barplot(dat, cex.names = 0.6, # row names font size las = 2, # set horizontal row names horiz = T, # set horizontal main = 'GO slim', # main title # set color schema, proc, blue(3); func, red(2); comp, green(4) col = cbind(c(data$aspect == 'process'), c(data$aspect == 'function'), c(data$aspect == 'component')) %*% c(4,2,3)) # color dev.off() " end # build GOslim uniqued list def slim(ontology, slim_ids, tmp, ga, aspect) tmp[aspect] = Hash.new(0) slim_ids.each {|slim_id| term = ontology.goid2term(slim_id) if term tmp[aspect][term] = 0 else next end ga.each {|gaid| begin res = ontology.bfs_shortest_path(slim_id, gaid) tmp[aspect][term] += 1 if res[0] rescue NameError $stderr.puts "Warnning: GO:#{slim_id} (#{term}) doesn't exist in the #{aspect}.ontology." tmp[aspect].delete(term) break end } } end # build GO-GOslim uniqued list def slim2(ontology, slim_ids, tmp, ga, aspect) tmp[aspect] = Hash.new slim_ids.each {|slim_id| term = ontology.goid2term(slim_id) if term begin unless tmp[aspect][term]['GOslim'].index(slim_id) tmp[aspect][term]['GOslim'] << slim_id end rescue NameError tmp[aspect][term] = {'GOslim'=>[slim_id], 'GO'=>[]} end else next end ga.each {|gaid| begin res = ontology.bfs_shortest_path(slim_id, gaid) tmp[aspect][term]['GO'] << gaid if res[0] rescue NameError break end } } end # # main # require 'bio/db/go' aspects = ['process', 'function', 'component'] rootids = { 'process' => '0008150', 'function' => '0003674', 'component' => '0005575'} # files open ios = {} files = { 'process' => $OPT_PROCESS, 'function' => $OPT_FUNCTION, 'component' => $OPT_COMPONENT, 'ga' => $OPT_GA, # gene-association 'list' => $OPT_GALIST, # gene-association list 'slim' => $OPT_GOSLIM} # GO slim files.each {|k, file_name| next if file_name == nil ios[k] = File.open(file_name) } if $OPT_OUTPUT ios['output'] = File.new($OPT_OUTPUT, "w+") ios['r_script'] = File.new("#{$OPT_OUTPUT}.R", "w+") else ios['r_script'] = ios['output'] = $stdout end # start # ontology ontology = {} aspects.each {|aspect| ontology[aspect] = Bio::GO::Ontology.new(ios[aspect].read) } # GO slim goslim = Bio::GO::Ontology.new(ios['slim'].read) # assign a aspect to terms in the GO slim. slim_ids = Hash.new([]) goslim.to_list.map {|ent| ent.node }.flatten.uniq.each {|goid| rootids.each {|aspect, rootid| begin a,b = ontology[aspect].bfs_shortest_path(rootid, goid) slim_ids[aspect] << goid rescue NameError $stderr.puts "Error: (#{rootid}, #{goid})" end } } # gene-associations ga_ids = [] if $OPT_GA ga = Bio::GO::GeneAssociation.parser(ios['ga'].read) ga_ids = ga.map {|ent| ent.goid } elsif $OPT_GALIST while line = ios['list'].gets if /^GO:(\d{7})/ =~ line goid = $1 ga_ids << goid end end else puts "Error: -l or -g options" exit end # count number count = Hash.new(0) aspects.each {|aspect| slim2(ontology[aspect], slim_ids[aspect], count, ga_ids, aspect) } # output if $OPT_R_SCRIPT and $OPT_OUTPUT tmp = [['aspect', 'count', 'GO Term'].join("\t")] else tmp = [['aspect', 'GO ID', 'GOslim Term', 'GOslim ID'].join("\t")] end ['component','function','process'].each {|aspect| count[aspect].sort {|a, b| b[1]['GO'].size <=> a[1]['GO'].size }.each {|term, value| next if term == "" if $OPT_R_SCRIPT and $OPT_OUTPUT tmp << [aspect, value['GO'].size, term].join("\t") else value['GO'].each {|goid| tmp << [aspect, "GO:#{goid}", term, value['GOslim'].map {|e| "GO:#{e}" }.join(' ')].join("\t") } end } } ios['output'].puts tmp.join("\n") if $OPT_R_SCRIPT and $OPT_OUTPUT ios['r_script'].puts slim2r($OPT_OUTPUT) end # bio-1.4.3.0001/sample/gb2fasta.rb0000755000004100000410000000163412200110570016171 0ustar www-datawww-data#!/usr/bin/env ruby # # gb2fasta.rb - convert GenBank entry into FASTA format (nuc) # # Copyright (C) 2001 KATAYAMA Toshiaki # Copyright (C) 2002 Yoshinori K. Okuji # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: gb2fasta.rb,v 0.6 2008/02/05 12:11:11 pjotr Exp $ # require 'bio' include Bio ff = FlatFile.new(GenBank, ARGF) while gb = ff.next_entry print gb.seq.to_fasta("gb:#{gb.entry_id} #{gb.definition}", 70) end bio-1.4.3.0001/sample/genes2pep.rb0000755000004100000410000000167312200110570016373 0ustar www-datawww-data#!/usr/bin/env ruby # # genes2nuc.rb - convert KEGG/GENES entry into FASTA format (nuc) # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: genes2pep.rb,v 0.4 2002/06/23 20:21:56 k Exp $ # require 'bio/db/kegg/genes' require 'bio/extend' include Bio while gets(KEGG::GENES::DELIMITER) genes = KEGG::GENES.new($_) next if genes.aalen == 0 puts ">#{genes.entry_id} #{genes.definition}" puts genes.aaseq.fold(60+12, 12) end bio-1.4.3.0001/sample/demo_kegg_glycan.rb0000644000004100000410000000277612200110570017763 0ustar www-datawww-data# # = sample/demo_kegg_glycan.rb - demonstration of Bio::KEGG::GLYCAN # # Copyright:: Copyright (C) 2004 Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::KEGG::GLYCAN, a parser class for the KEGG GLYCAN # glycome informatics database. # # == Usage # # Specify files containing KEGG GLYCAN data. # # $ ruby demo_kegg_glycan.rb files... # # == Example of running this script # # Download test data. # # $ ruby -Ilib bin/br_biofetch.rb glycan G00001 > G00001.glycan # $ ruby -Ilib bin/br_biofetch.rb glycan G00024 > G00024.glycan # # Run this script. # # $ ruby -Ilib sample/demo_kegg_glycan.rb G00001.glycan G00024.glycan # # == Development information # # The code was moved from lib/bio/db/kegg/glycan.rb and modified. # require 'bio' Bio::FlatFile.foreach(Bio::KEGG::GLYCAN, ARGF) do |gl| #entry = ARGF.read # gl:G00024 #gl = Bio::KEGG::GLYCAN.new(entry) puts "### gl = Bio::KEGG::GLYCAN.new(str)" puts "# gl.entry_id" p gl.entry_id puts "# gl.name" p gl.name puts "# gl.composition" p gl.composition puts "# gl.mass" p gl.mass puts "# gl.keggclass" p gl.keggclass #puts "# gl.bindings" #p gl.bindings puts "# gl.compounds" p gl.compounds puts "# gl.reactions" p gl.reactions puts "# gl.pathways" p gl.pathways puts "# gl.enzymes" p gl.enzymes puts "# gl.orthologs" p gl.orthologs puts "# gl.references" p gl.references puts "# gl.dblinks" p gl.dblinks puts "# gl.kcf" p gl.kcf puts "=" * 78 end bio-1.4.3.0001/sample/any2fasta.rb0000755000004100000410000000252312200110570016366 0ustar www-datawww-data#!/usr/bin/env ruby # # any2fasta.rb - convert input file into FASTA format using a regex # filter # # Copyright (C) 2006 Pjotr Prins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: any2fasta.rb,v 1.1 2006/02/17 14:59:27 pjotr Exp $ # require 'bio/io/flatfile' include Bio usage = < reduced.fasta USAGE if ARGV.size == 0 print usage exit 1 end # ---- Valid regular expression - if it is not a file regex = ARGV[0] if regex=~/^\// and !File.exist?(regex) ARGV.shift else regex = nil end ARGV.each do | fn | ff = Bio::FlatFile.auto(fn) ff.each_entry do |entry| if regex != nil next if eval("entry.seq !~ #{regex}") end print entry.seq.to_fasta(entry.definition,70) end end bio-1.4.3.0001/sample/fsplit.rb0000755000004100000410000000216212200110570015776 0ustar www-datawww-data#!/usr/bin/env ruby # # fsplit.rb - split FASTA file by each n entries # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: fsplit.rb,v 0.1 2001/06/21 08:22:29 katayama Exp $ # if ARGV.length != 2 print <<-USAGE fsplit.rb - split FASTA file by each n entries Usage : % ./fsplit.rb 2000 seq.f This will produce seq.f.1, seq.f.2, ... with containing 2000 sequences in each file. USAGE exit 1 end count = ARGV.shift.to_i i = -1 while gets if /^>/ i += 1 if i % count == 0 n = i / count out = File.new("#{$FILENAME}.#{n+1}", "w+") end end out.print end bio-1.4.3.0001/sample/tfastx2tab.rb0000755000004100000410000000462112200110570016561 0ustar www-datawww-data#!/usr/bin/env ruby # # tfastx2tab.rb - convert TFASTX (-m 6) output into tab delimited data for MySQL # # Usage: # # % tfastx2tab.rb TFASTX-output-file[s] > tfastx_results.tab # % mysql < tfastx_results.sql (use sample at the end of this file) # # Format accepted: # # % tfastx3[3][_t] -Q -H -m 6 query.f target.f ktup > TFASTX-output-file # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: tfastx2tab.rb,v 0.1 2001/06/21 08:26:14 katayama Exp $ # while gets # query if /^\S+: (\d+) aa$/ q_len = $1 end # each hit if /^>>([^>]\S+).*\((\d+) aa\)$/ target = $1 t_len = $2 # d = dummy variable d, frame, d, initn, d, init1, d, opt, d, zscore, d, bits, d, evalue = gets.split(/\s+/) d, d, sw, ident, d, ugident, d, d, overlap, d, d, lap = gets.split(/\s+/) # query-hit pair print "#{$FILENAME}\t#{q_len}\t#{target}\t#{t_len}" # pick up values ary = [ initn, init1, opt, zscore, bits, evalue, sw, ident, ugident, overlap, lap ] # print values for i in ary i.tr!('^0-9.:e\-','') print "\t#{i}" end print "\t#{frame}\n" end end =begin MySQL tfastx_results.sql sample CREATE DATABASE IF NOT EXISTS db_name; CREATE TABLE IF NOT EXISTS db_name.table_name ( query varchar(25) not NULL, q_len integer unsigned default 0, target varchar(25) not NULL, t_len integer unsigned default 0, initn integer unsigned default 0, init1 integer unsigned default 0, opt integer unsigned default 0, zscore float default 0.0, bits float default 0.0, evalue float default 0.0, sw integer unsigned default 0, ident float default 0.0, ugident float default 0.0, overlap integer unsigned default 0, lap_at varchar(25) default NULL, frame varchar(5) default NULL ); LOAD DATA LOCAL INFILE 'tfastx_results.tab' INTO TABLE db_name.table_name; =end bio-1.4.3.0001/sample/demo_fasta_remote.rb0000644000004100000410000000221612200110570020147 0ustar www-datawww-data# # = sample/demo_fasta_remote.rb - demonstration of FASTA execution using GenomeNet web service # # Copyright:: Copyright (C) 2001, 2002 Toshiaki Katayama # License:: The Ruby License # # == Description # # Demonstration of Bio::Fasta.remote, wrapper class for FASTA execution using # GenomeNet fasta.genome.jp web service. # # == Requirements # # * Internet connection # # == Usage # # Specify a files containing a nucleic acid sequence. # The file format should be the fasta format. # # $ ruby demo_fasta_remote.rb file.fst # # Example usage using test data: # # $ ruby -Ilib sample/demo_fasta_remote.rb test/data/blast/b0002.faa # # Note that it may take very long time. Please wait for 3 to 5 minutes. # # == Development information # # The code was moved from lib/bio/appl/fasta.rb. # require 'bio' #if __FILE__ == $0 begin require 'pp' alias p pp rescue end # serv = Bio::Fasta.local('fasta34', 'hoge.nuc') # serv = Bio::Fasta.local('fasta34', 'hoge.pep') # serv = Bio::Fasta.local('ssearch34', 'hoge.pep') # This may take 3 minutes or so. serv = Bio::Fasta.remote('fasta', 'genes') p serv.query(ARGF.read) #end bio-1.4.3.0001/sample/demo_pubmed.rb0000644000004100000410000000576612200110570016767 0ustar www-datawww-data# # = sample/demo_pubmed.rb - demonstration of Bio::PubMed # # Copyright:: Copyright (C) 2001, 2007, 2008 Toshiaki Katayama # Copyright:: Copyright (C) 2006 Jan Aerts # License:: The Ruby License # # # == Description # # Demonstration of Bio::PubMed, NCBI Entrez/PubMed client module. # # == Requirements # # Internet connection is needed. # # == Usage # # Simply run this script. # # $ ruby demo_pubmed.rb # # == Development information # # The code was moved from lib/bio/io/pubmed.rb and modified as below: # * Codes using Entrez CGI are disabled. require 'bio' Bio::NCBI.default_email = 'staff@bioruby.org' #if __FILE__ == $0 puts "=== instance methods ===" pubmed = Bio::PubMed.new puts "--- Search PubMed by E-Utils ---" opts = {"rettype" => "count"} puts Time.now puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts) puts Time.now puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts) puts Time.now puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts) puts Time.now pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x| puts x end puts "--- Retrieve PubMed entry by E-Utils ---" puts Time.now puts pubmed.efetch(16381885) puts Time.now puts pubmed.efetch("16381885") puts Time.now puts pubmed.efetch("16381885") puts Time.now opts = {"retmode" => "xml"} puts pubmed.efetch([10592173, 14693808], opts) puts Time.now puts pubmed.efetch(["10592173", "14693808"], opts) #puts "--- Search PubMed by Entrez CGI ---" #pubmed.search("(genome AND analysis) OR bioinformatics").each do |x| # p x #end #puts "--- Retrieve PubMed entry by Entrez CGI ---" #puts pubmed.query("16381885") puts "--- Retrieve PubMed entry by PMfetch ---" puts pubmed.pmfetch("16381885") puts "=== class methods ===" puts "--- Search PubMed by E-Utils ---" opts = {"rettype" => "count"} puts Time.now puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts) puts Time.now puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts) puts Time.now puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts) puts Time.now Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x| puts x end puts "--- Retrieve PubMed entry by E-Utils ---" puts Time.now puts Bio::PubMed.efetch(16381885) puts Time.now puts Bio::PubMed.efetch("16381885") puts Time.now puts Bio::PubMed.efetch("16381885") puts Time.now opts = {"retmode" => "xml"} puts Bio::PubMed.efetch([10592173, 14693808], opts) puts Time.now puts Bio::PubMed.efetch(["10592173", "14693808"], opts) #puts "--- Search PubMed by Entrez CGI ---" #Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x| # p x #end #puts "--- Retrieve PubMed entry by Entrez CGI ---" #puts Bio::PubMed.query("16381885") puts "--- Retrieve PubMed entry by PMfetch ---" puts Bio::PubMed.pmfetch("16381885") #end bio-1.4.3.0001/sample/demo_tmhmm_report.rb0000644000004100000410000000256212200110570020217 0ustar www-datawww-data# # = sample/demo_tmhmm_report.rb - demonstration of Bio::TMHMM::Report # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # # == Description # # Demonstration of Bio::TMHMM::Report, TMHMM output parser. # # == Usage # # Specify files containing SOSUI reports. # # $ ruby demo_tmhmm_report.rb files... # # Example usage using test data: # # $ ruby -Ilib sample/demo_tmhmm_report.rb test/data/TMHMM/sample.report # # == References # # * http://www.cbs.dtu.dk/services/TMHMM/ # # == Development information # # The code was moved from lib/bio/appl/tmhmm/report.rb. # require 'bio' #if __FILE__ == $0 begin require 'pp' alias p pp rescue LoadError end Bio::TMHMM.reports(ARGF.read) do |ent| puts '==>' puts ent.to_s pp ent p [:entry_id, ent.entry_id] p [:query_len, ent.query_len] p [:predicted_tmhs, ent.predicted_tmhs] p [:tmhs_size, ent.tmhs.size] p [:exp_aas_in_tmhs, ent.exp_aas_in_tmhs] p [:exp_first_60aa, ent.exp_first_60aa] p [:total_prob_of_N_in, ent.total_prob_of_N_in] ent.tmhs.each do |t| p t p [:entry_id, t.entry_id] p [:version, t.version] p [:status, t.status] p [:range, t.range] p [:pos, t.pos] end p [:helix, ent.helix] p ent.tmhs.map {|t| t if t.status == 'TMhelix' }.compact end #end bio-1.4.3.0001/sample/pmsearch.rb0000755000004100000410000000251112200110570016275 0ustar www-datawww-data#!/usr/bin/env ruby # # pmsearch.rb - generate BibTeX format reference list by PubMed keyword search # # Copyright (C) 2002 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id:$ # require 'bio' Bio::NCBI.default_email = 'staff@bioruby.org' if ARGV[0] =~ /\A\-f/ ARGV.shift form = ARGV.shift else form = 'bibtex' end keywords = ARGV.join(' ') uids = Bio::PubMed.esearch(keywords) if uids and !uids.empty? then entries = Bio::PubMed.efetch(uids) else entries = [] end entries.each do |entry| case form when 'medline' puts entry else puts Bio::MEDLINE.new(entry).reference.__send__(form.intern) end print "\n" end bio-1.4.3.0001/sample/genes2tab.rb0000755000004100000410000000335212200110570016351 0ustar www-datawww-data#!/usr/bin/env ruby # # genes2tab.rb - convert KEGG/GENES into tab delimited data for MySQL # # Usage: # # % genes2tab.rb /bio/db/kegg/genes/e.coli > genes_eco.tab # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: genes2tab.rb,v 0.5 2002/06/23 20:21:56 k Exp $ # require 'bio/db/kegg/genes' include Bio while entry = gets(KEGG::GENES::DELIMITER) genes = KEGG::GENES.new(entry) db = genes.dblinks.inspect if genes.codon_usage.length == 64 cu = genes.codon_usage.join(' ') else cu = '\N' end ary = [ genes.entry_id, genes.division, genes.organism, genes.name, genes.definition, genes.keggclass, genes.position, db, cu, genes.aalen, genes.aaseq, genes.nalen, genes.naseq ] puts ary.join("\t") end =begin CREATE DATABASE IF NOT EXISTS db_name; CREATE TABLE IF NOT EXISTS db_name.genes ( id varchar(30) not NULL, # ENTRY ID division varchar(30), # CDS, tRNA etc. organism varchar(255), gene varchar(255), definition varchar(255), keggclass varchar(255), position varchar(255), dblinks varchar(255), codon_usage text, aalen integer, aaseq text, nalen integer, naseq text ); LOAD DATA LOCAL INFILE 'genes.tab' INTO TABLE db_name.genes; =end bio-1.4.3.0001/sample/demo_sequence.rb0000644000004100000410000001036612200110570017313 0ustar www-datawww-data# # = sample/demo_sequence.rb - demonstration of sequence manipulation # # Copyright:: Copyright (C) 2000-2006 # Toshiaki Katayama , # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # == Description # # Demonstration of biological sequence manipulation. # # == Usage # # Simply run this script. # # $ ruby demo_sequence.rb # # == Development information # # The code was moved from lib/bio/sequence.rb. # require 'bio' #if __FILE__ == $0 puts "== Test Bio::Sequence::NA.new" p Bio::Sequence::NA.new('') p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA') p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa') puts "\n== Test Bio::Sequence::AA.new" p Bio::Sequence::AA.new('') p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU') puts "\n== Test Bio::Sequence#to_s" p na.to_s p aa.to_s puts "\n== Test Bio::Sequence#subseq(2,6)" p na p na.subseq(2,6) puts "\n== Test Bio::Sequence#[2,6]" p na p na[2,6] puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)" puts na.to_fasta('hoge', 8) puts "\n== Test Bio::Sequence#window_search(15)" p na na.window_search(15) {|x| p x} puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})" p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4}) puts "\n== Test Bio::Sequence#composition" p na p na.composition p rna p rna.composition puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')" p na p na.splicing("complement(join(1..5,16..20))") p rna p rna.splicing("complement(join(1..5,16..20))") puts "\n== Test Bio::Sequence::NA#complement" p na.complement p rna.complement p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement puts "\n== Test Bio::Sequence::NA#translate" p na p na.translate p rna p rna.translate puts "\n== Test Bio::Sequence::NA#gc_percent" p na.gc_percent p rna.gc_percent puts "\n== Test Bio::Sequence::NA#illegal_bases" p na.illegal_bases p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases puts "\n== Test Bio::Sequence::NA#molecular_weight" p na p na.molecular_weight p rna p rna.molecular_weight puts "\n== Test Bio::Sequence::NA#to_re" p Bio::Sequence::NA.new('atgcrymkdhvbswn') p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re p Bio::Sequence::NA.new('augcrymkdhvbswn') p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re puts "\n== Test Bio::Sequence::NA#names" p na.names puts "\n== Test Bio::Sequence::NA#pikachu" p na.pikachu puts "\n== Test Bio::Sequence::NA#randomize" print "Orig : "; p na print "Rand : "; p na.randomize print "Rand : "; p na.randomize print "Rand : "; p na.randomize.randomize print "Block : "; na.randomize do |x| print x end; puts print "Orig : "; p rna print "Rand : "; p rna.randomize print "Rand : "; p rna.randomize print "Rand : "; p rna.randomize.randomize print "Block : "; rna.randomize do |x| print x end; puts puts "\n== Test Bio::Sequence::NA.randomize(counts)" print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40} print "Rand : "; p Bio::Sequence::NA.randomize(counts) print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40} print "Rand : "; p Bio::Sequence::NA.randomize(counts) print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts puts "\n== Test Bio::Sequence::AA#codes" p aa p aa.codes puts "\n== Test Bio::Sequence::AA#names" p aa p aa.names puts "\n== Test Bio::Sequence::AA#molecular_weight" p aa.subseq(1,20) p aa.subseq(1,20).molecular_weight puts "\n== Test Bio::Sequence::AA#randomize" aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA' s = Bio::Sequence::AA.new(aaseq) print "Orig : "; p s print "Rand : "; p s.randomize print "Rand : "; p s.randomize print "Rand : "; p s.randomize.randomize print "Block : "; s.randomize {|x| print x}; puts puts "\n== Test Bio::Sequence::AA.randomize(counts)" print "Count : "; p counts = s.composition print "Rand : "; puts Bio::Sequence::AA.randomize(counts) print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts #end bio-1.4.3.0001/sample/genome2tab.rb0000755000004100000410000000323612200110570016523 0ustar www-datawww-data#!/usr/bin/env ruby # # genome2tab.rb - convert KEGG/GENOME into tab delimited data for MySQL # # Usage: # # % genome2tab.rb /bio/db/kegg/genome/genome > genome.tab # # Copyright (C) 2001 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: genome2tab.rb,v 0.5 2002/06/23 20:21:56 k Exp $ # require 'bio/db/kegg/genome' include Bio while entry = gets(KEGG::GENOME::DELIMITER) genome = KEGG::GENOME.new(entry) ref = genome.references.inspect chr = genome.chromosomes.inspect ary = [ genome.entry_id, genome.name, genome.definition, genome.taxid, genome.taxonomy, genome.comment, ref, chr, genome.nalen, genome.num_gene, genome.num_rna, genome.gc, genome.genomemap, ] puts ary.join("\t") end =begin CREATE DATABASE IF NOT EXISTS db_name; CREATE TABLE IF NOT EXISTS db_name.genome ( id varchar(30) not NULL, name varchar(80), definition varchar(255), taxid varchar(30), taxonomy varchar(255), comment varchar(255), reference text, chromosome text, nalen integer, num_gene integer, num_rna integer, gc float, genomemap varchar(30), ); LOAD DATA LOCAL INFILE 'genome.tab' INTO TABLE db_name.genome; =end bio-1.4.3.0001/sample/demo_aminoacid.rb0000644000004100000410000000565112200110570017430 0ustar www-datawww-data# # = sample/demo_aminoacid.rb - demonstration of Bio::AminoAcid # # Copyright:: Copyright (C) 2001, 2005 # Toshiaki Katayama # License:: The Ruby License # # # == Description # # Demonstration of Bio::AminoAcid, the class for amino acid data. # # == Usage # # Simply run this script. # # $ ruby demo_aminoacid.rb # # == Development information # # The code was moved from lib/bio/data/aa.rb. # require 'bio' #if __FILE__ == $0 puts "### aa = Bio::AminoAcid.new" aa = Bio::AminoAcid.new puts "# Bio::AminoAcid['A']" p Bio::AminoAcid['A'] puts "# aa['A']" p aa['A'] puts "# Bio::AminoAcid.name('A'), Bio::AminoAcid.name('Ala')" p Bio::AminoAcid.name('A'), Bio::AminoAcid.name('Ala') puts "# aa.name('A'), aa.name('Ala')" p aa.name('A'), aa.name('Ala') puts "# Bio::AminoAcid.to_1('alanine'), Bio::AminoAcid.one('alanine')" p Bio::AminoAcid.to_1('alanine'), Bio::AminoAcid.one('alanine') puts "# aa.to_1('alanine'), aa.one('alanine')" p aa.to_1('alanine'), aa.one('alanine') puts "# Bio::AminoAcid.to_1('Ala'), Bio::AminoAcid.one('Ala')" p Bio::AminoAcid.to_1('Ala'), Bio::AminoAcid.one('Ala') puts "# aa.to_1('Ala'), aa.one('Ala')" p aa.to_1('Ala'), aa.one('Ala') puts "# Bio::AminoAcid.to_1('A'), Bio::AminoAcid.one('A')" p Bio::AminoAcid.to_1('A'), Bio::AminoAcid.one('A') puts "# aa.to_1('A'), aa.one('A')" p aa.to_1('A'), aa.one('A') puts "# Bio::AminoAcid.to_3('alanine'), Bio::AminoAcid.three('alanine')" p Bio::AminoAcid.to_3('alanine'), Bio::AminoAcid.three('alanine') puts "# aa.to_3('alanine'), aa.three('alanine')" p aa.to_3('alanine'), aa.three('alanine') puts "# Bio::AminoAcid.to_3('Ala'), Bio::AminoAcid.three('Ala')" p Bio::AminoAcid.to_3('Ala'), Bio::AminoAcid.three('Ala') puts "# aa.to_3('Ala'), aa.three('Ala')" p aa.to_3('Ala'), aa.three('Ala') puts "# Bio::AminoAcid.to_3('A'), Bio::AminoAcid.three('A')" p Bio::AminoAcid.to_3('A'), Bio::AminoAcid.three('A') puts "# aa.to_3('A'), aa.three('A')" p aa.to_3('A'), aa.three('A') puts "# Bio::AminoAcid.one2three('A')" p Bio::AminoAcid.one2three('A') puts "# aa.one2three('A')" p aa.one2three('A') puts "# Bio::AminoAcid.three2one('Ala')" p Bio::AminoAcid.three2one('Ala') puts "# aa.three2one('Ala')" p aa.three2one('Ala') puts "# Bio::AminoAcid.one2name('A')" p Bio::AminoAcid.one2name('A') puts "# aa.one2name('A')" p aa.one2name('A') puts "# Bio::AminoAcid.name2one('alanine')" p Bio::AminoAcid.name2one('alanine') puts "# aa.name2one('alanine')" p aa.name2one('alanine') puts "# Bio::AminoAcid.three2name('Ala')" p Bio::AminoAcid.three2name('Ala') puts "# aa.three2name('Ala')" p aa.three2name('Ala') puts "# Bio::AminoAcid.name2three('alanine')" p Bio::AminoAcid.name2three('alanine') puts "# aa.name2three('alanine')" p aa.name2three('alanine') puts "# Bio::AminoAcid.to_re('BZACDEFGHIKLMNPQRSTVWYU')" p Bio::AminoAcid.to_re('BZACDEFGHIKLMNPQRSTVWYU') #end bio-1.4.3.0001/GPL0000644000004100000410000004313112200110570013232 0ustar www-datawww-data GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. bio-1.4.3.0001/extconf.rb0000644000004100000410000000005212200110570014653 0ustar www-datawww-datarequire 'mkmf' create_makefile("bioruby") bio-1.4.3.0001/LEGAL0000644000004100000410000001064112200110570013434 0ustar www-datawww-dataLEGAL NOTICE INFORMATION ------------------------ All the files in this distribution are covered under either the Ruby's license (see the file COPYING) or public-domain except some files mentioned below. setup.rb: Copyright (c) 2000-2006 Minero Aoki This program is free software. You can distribute/modify this program under the terms of the GNU LGPL, Lesser General Public License version 2.1. sample/any2fasta.rb: Copyright (C) 2006 Pjotr Prins This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/biofetch.rb: Copyright (C) 2002-2004 KATAYAMA Toshiaki This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/enzymes.rb: Copyright (C) 2006 Pjotr Prins and Trevor Wennblom This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/fasta2tab.rb: sample/fsplit.rb: sample/gb2tab.rb: sample/genes2nuc.rb: sample/genes2pep.rb: sample/genes2tab.rb: sample/genome2tab.rb: sample/gt2fasta.rb: sample/ssearch2tab.rb: sample/tfastx2tab.rb: sample/vs-genes.rb: Copyright (C) 2001 KATAYAMA Toshiaki This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/fastagrep.rb: sample/fastasort.rb: sample/na2aa.rb: Copyright (C) 2008 KATAYAMA Toshiaki & Pjotr Prins This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/gb2fasta.rb: Copyright (C) 2001 KATAYAMA Toshiaki Copyright (C) 2002 Yoshinori K. Okuji This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/gbtab2mysql.rb: sample/genome2rb.rb: sample/pmfetch.rb: sample/pmsearch.rb: Copyright (C) 2002 KATAYAMA Toshiaki This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. sample/tdiary.rb: Copyright (C) 2003 KATAYAMA Toshiaki Mitsuteru C. Nakao Itoshi NIKAIDO Takeya KASUKAWA This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. test/data/uniprot/p53_human.uniprot: This Swiss-Prot entry is copyright. It is produced through a collaboration between the Swiss Institute of Bioinformatics and the EMBL outstation - the European Bioinformatics Institute. There are no restrictions on its use as long as its content is in no way modified and this statement is not removed. GPL: Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. LGPL: Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. bio-1.4.3.0001/.travis.yml0000644000004100000410000000366512200110570015006 0ustar www-datawww-datalanguage: ruby rvm: - 1.9.3 - 1.8.7 - rbx-18mode gemfile: - gemfiles/Gemfile.travis-ruby1.9 - gemfiles/Gemfile.travis-ruby1.8 before_install: - mkdir /tmp/bioruby - ruby gemfiles/modify-Gemfile.rb - ruby gemfiles/prepare-gemspec.rb matrix: include: - rvm: 1.9.2 gemfile: gemfiles/Gemfile.travis-ruby1.9 - rvm: rbx-19mode gemfile: gemfiles/Gemfile.travis-jruby1.9 - rvm: jruby-19mode gemfile: gemfiles/Gemfile.travis-jruby1.9 env: TMPDIR=/tmp/bioruby - rvm: 1.9.3 gemfile: gemfiles/Gemfile.travis-ruby1.9 env: BIORUBY_RAKE_DEFAULT_TASK=tar-integration-test - rvm: 1.9.3 gemfile: gemfiles/Gemfile.travis-ruby1.9 env: BIORUBY_RAKE_DEFAULT_TASK=gem-test - rvm: jruby-18mode gemfile: gemfiles/Gemfile.travis-jruby1.8 env: TMPDIR=/tmp/bioruby BIORUBY_RAKE_DEFAULT_TASK=tar-integration-test - rvm: jruby-18mode gemfile: gemfiles/Gemfile.travis-jruby1.8 env: TMPDIR=/tmp/bioruby BIORUBY_RAKE_DEFAULT_TASK=gem-test exclude: - rvm: 1.8.7 gemfile: gemfiles/Gemfile.travis-ruby1.9 - rvm: 1.9.3 gemfile: gemfiles/Gemfile.travis-ruby1.8 - rvm: rbx-18mode gemfile: gemfiles/Gemfile.travis-ruby1.9 allow_failures: - rvm: rbx-19mode gemfile: gemfiles/Gemfile.travis-jruby1.9 - rvm: jruby-19mode gemfile: gemfiles/Gemfile.travis-jruby1.9 env: TMPDIR=/tmp/bioruby - rvm: jruby-18mode gemfile: gemfiles/Gemfile.travis-jruby1.8 env: TMPDIR=/tmp/bioruby BIORUBY_RAKE_DEFAULT_TASK=tar-integration-test - rvm: jruby-18mode gemfile: gemfiles/Gemfile.travis-jruby1.8 env: TMPDIR=/tmp/bioruby BIORUBY_RAKE_DEFAULT_TASK=gem-test # uncomment this line if your project needs to run something other than `rake`: # script: bundle exec rspec spec #before_install: # - sudo apt-get update # - sudo apt-get install libxml2-dev libexpat1-dev # whitelist branches branches: only: - master bio-1.4.3.0001/test/0000755000004100000410000000000012200110570013642 5ustar www-datawww-databio-1.4.3.0001/test/bioruby_test_helper.rb0000644000004100000410000000450312200110570020242 0ustar www-datawww-data# # test/bioruby_test_helper.rb - Helper module for testing bioruby # # Copyright:: Copyright (C) 2009 Naohisa Goto # License:: The Ruby License # require 'pathname' unless defined? BioRubyTestDebug then BioRubyTestDebug = ENV['BIORUBY_TEST_DEBUG'].to_s.empty? ? false : true if BioRubyTestDebug then $stderr.puts "BioRuby test debug enabled." end end #BioRubyTestDebug unless defined? BioRubyTestGem then gem_version = ENV['BIORUBY_TEST_GEM'] if gem_version then $stderr.puts 'require "rubygems"' if BioRubyTestDebug require "rubygems" if gem_version.empty? then $stderr.puts "gem 'bio'" if BioRubyTestDebug gem 'bio' else $stderr.puts "gem 'bio', #{gem_version.inspect}" if BioRubyTestDebug gem 'bio', gem_version end end BioRubyTestGem = gem_version end unless defined? BioRubyTestLibPath then libpath = ENV['BIORUBY_TEST_LIB'] unless libpath then libpath = Pathname.new(File.join(File.dirname(__FILE__), "..", "lib")).cleanpath.to_s end # do not add path to $: if BIORUBY_TEST_LIB is empty string # or BioRubyTestGem is true. if (libpath and libpath.empty?) or BioRubyTestGem then libpath = nil end if libpath then libpath.freeze unless $:[0] == libpath then $:.unshift(libpath) if BioRubyTestDebug then $stderr.puts "Added #{libpath.inspect} to $:." end else if BioRubyTestDebug then $stderr.puts "NOT added #{libpath.inspect} to $:. because it is already on the top of $:." end end end # (String or nil) Path to be added to $:. # It may or may not be the path of bioruby. BioRubyTestLibPath = libpath if BioRubyTestDebug then $stderr.print "$: = [", "\n" $stderr.puts($:.collect { |x| "\t#{x.inspect}" }.join(",\n")) $stderr.print "]", "\n" end end #BioRubyTestLibPath unless defined? BioRubyTestDataPath and BioRubyTestDataPath datapath = ENV['BIORUBY_TEST_DATA'] if datapath.to_s.empty? then datapath = Pathname.new(File.join(File.dirname(__FILE__), "data")).cleanpath.to_s end datapath.freeze # (String) Path to the test data. BioRubyTestDataPath = datapath if BioRubyTestDebug then $stderr.print "DataPath = ", BioRubyTestDataPath.inspect, "\n" end end bio-1.4.3.0001/test/runner.rb0000644000004100000410000000212012200110570015473 0ustar www-datawww-data#!/usr/bin/env ruby # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' unit_test = File.join(File.dirname($0), "unit") func_test = File.join(File.dirname($0), "functional") if !defined?(Test::Unit::AutoRunner) then # Ruby 1.9.1 does not have Test::Unit::AutoRunner Test::Unit.setup_argv do |files| [ unit_test, func_test ] end # tests called when exiting the program elsif defined?(Test::Unit::Color) then # workaround for test-unit-2.0.x r = Test::Unit::AutoRunner.new(true) r.to_run.push unit_test r.to_run.push func_test r.process_args(ARGV) exit r.run elsif RUBY_VERSION > "1.8.2" then r = Test::Unit::AutoRunner.new(true) do |ar| ar.to_run.push unit_test ar.to_run.push func_test [ unit_test, func_test ] end r.process_args(ARGV) exit r.run else # old Test::Unit -- Ruby 1.8.2 or older raise "Ruby version too old. Please use newer version of Ruby." end bio-1.4.3.0001/test/network/0000755000004100000410000000000012200110570015333 5ustar www-datawww-databio-1.4.3.0001/test/network/bio/0000755000004100000410000000000012200110570016104 5ustar www-datawww-databio-1.4.3.0001/test/network/bio/io/0000755000004100000410000000000012200110570016513 5ustar www-datawww-databio-1.4.3.0001/test/network/bio/io/test_ddbjrest.rb0000644000004100000410000000205112200110570021676 0ustar www-datawww-data# # test/functional/bio/io/test_ddbjrest.rb - Functional test for Bio::DDBJ::REST # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'bio/io/ddbjrest' require 'test/unit' module Bio module NetTestDDBJREST class TestDDBJ < Test::Unit::TestCase def setup @obj = Bio::DDBJ::REST::DDBJ.new end def test_countBasePair text = @obj.countBasePair("AF237819") expected = { "a" => 47, "t" => 38, "g" => 48, "c" => 38 } h = {} text.each_line do |line| base, count, percent = line.split(/\t/) count = count.to_i h[base] = count if count > 0 end assert_equal(expected, h) end end #class TestDDBJ end #module NetTestDDBJREST end #module Bio bio-1.4.3.0001/test/network/bio/io/test_ensembl.rb0000644000004100000410000001543112200110570021530 0ustar www-datawww-data# # test/functional/bio/io/test_ensembl.rb - Functional test for Bio::Ensembl # # Copyright:: Copyright (C) 2007 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/ensembl' module Bio class FuncTestEnsembl < Test::Unit::TestCase def setup @serv = Bio::Ensembl.new('Homo_sapiens') end def test_class assert_equal(Bio::Ensembl, @serv.class) end end class FuncTestEnsemblHuman < Test::Unit::TestCase def setup @serv = Bio::Ensembl.human end def test_organism assert_equal("Homo_sapiens", @serv.organism) end def test_server assert_equal("http://www.ensembl.org", @serv.server) end end class FuncTestEnsemblHumanExportView < Test::Unit::TestCase def setup @serv = Bio::Ensembl.new('Homo_sapiens', 'http://jul2008.archive.ensembl.org') end def test_fna_exportview seq = ">4 dna:chromosome chromosome:NCBI36:4:1149206:1149209:1\nGAGA\n" fna = @serv.exportview(4, 1149206, 1149209) assert_equal(seq, fna) end def test_fasta_exportview_with_hash_4th_params fna = @serv.exportview(4, 1149206, 1149209, :upstream => 10) fna10 = @serv.exportview(4, 1149196, 1149209) assert_equal(fna10, fna) end def test_fna_exportview_with_named_args seq = ">4 dna:chromosome chromosome:NCBI36:4:1149206:1149209:1\nGAGA\n" fna = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, :anchor2 => 1149209) assert_equal(seq, fna) end def test_fasta_exportview_with_named_args_and_hash_4th_params fna = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, :anchor2 => 1149209, :upstream => 10) fna10 = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149196, :anchor2 => 1149209) assert_equal(fna10, fna) end def test_gff_exportview_for_empty_result gff = @serv.exportview(4, 1149206, 1149209, ['gene']) assert_equal('', gff) end def test_gff_exportview # OR1A1 (Olfactory receptor 1A1) lines = [ [ "17", "Ensembl", "Gene", "3065665", "3066594", ".", "+", "1", "gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding" ], [ "17", "Vega", "Gene", "3065665", "3066594", ".", "+", "1", "gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding" ] ] line = lines.collect { |x| x.join("\t") + "\n" }.join('') gff = @serv.exportview(17, 3065665, 3066594, ['gene']) assert_equal(line, gff) end def test_gff_exportview_with_named_args_for_empty_result gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, :anchor2 => 1149209, :options => ['gene']) assert_equal('', gff) end def test_gff_exportview_with_named_args # OR1A1 (Olfactory receptor 1A1) lines = [ [ "17", "Ensembl", "Gene", "3065665", "3066594", ".", "+", "1", "gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding" ], [ "17", "Vega", "Gene", "3065665", "3066594", ".", "+", "1", "gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding" ] ] line = lines.collect { |x| x.join("\t") + "\n" }.join('') gff = @serv.exportview(:seq_region_name => 17, :anchor1 => 3065665, :anchor2 => 3066594, :options => ['gene']) assert_equal(line, gff) end def test_tab_exportview_with_named_args_for_empty_result line = ["seqname", "source", "feature", "start", "end", "score", "strand", "frame", "gene_id", "transcript_id", "exon_id", "gene_type"].join("\t") + "\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, :anchor2 => 1149209, :options => ['gene'], :format => 'tab') assert_equal(line, gff) end def test_tab_exportview_with_named_args # OR1A1 (Olfactory receptor 1A1) lines = [ [ "seqname", "source", "feature", "start", "end", "score", "strand", "frame", "gene_id", "transcript_id", "exon_id", "gene_type" ], [ "17", "Ensembl", "Gene", "3065665", "3066594", ".", "+", "1", "ENSG00000172146", "ENST00000304094", "ENSE00001137815", "KNOWN_protein_coding" ], [ "17", "Vega", "Gene", "3065665", "3066594", ".", "+", "1", "OTTHUMG00000090637", "OTTHUMT00000207292", "OTTHUME00001080001", "KNOWN_protein_coding" ] ] line = lines.collect { |x| x.join("\t") + "\n" }.join('') gff = @serv.exportview(:seq_region_name => 17, :anchor1 => 3065665, :anchor2 => 3066594, :options => ['gene'], :format => 'tab') assert_equal(line, gff) end end end # module Bio bio-1.4.3.0001/test/network/bio/io/test_soapwsdl.rb0000644000004100000410000000204312200110570021732 0ustar www-datawww-data# # test/functional/bio/io/test_soapwsdl.rb - Functional test for SOAP/WSDL # # Copyright:: Copyright (C) 2005,2007 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/soapwsdl' module Bio class FuncTestSOAPWSDL < Test::Unit::TestCase def setup @wsdl = 'http://www.ebi.ac.uk/xembl/XEMBL.wsdl' @obj = Bio::SOAPWSDL.new(@wsdl) end def test_wsdl assert_equal(@wsdl, @obj.wsdl) end def test_set_wsdl @obj.wsdl = 'http://soap.genome.jp/KEGG.wsdl' assert_equal('http://soap.genome.jp/KEGG.wsdl', @obj.wsdl) end def test_log assert_equal(nil, @obj.log) end def test_set_log require 'stringio' io = StringIO.new @obj.log = io assert_equal(StringIO, @obj.log.class) end end end bio-1.4.3.0001/test/network/bio/io/test_togows.rb0000644000004100000410000001551412200110570021427 0ustar www-datawww-data# # test/functional/bio/io/test_togows.rb - Functional test for Bio::TogoWS # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'uri' require 'bio/version' require 'bio/io/togows' require 'bio/db/genbank/genbank' require 'test/unit' module Bio # common tests for both instance methods and class methods module FuncTestTogoWSRESTcommon TestData = BioRubyTestDataPath def test_entry result = nil acc = 'AF237819' assert_nothing_raised { result = @togows.entry('nucleotide', acc) } assert(!result.to_s.strip.empty?) gb = Bio::GenBank.new(result) assert(gb.accessions.include?(acc)) end def test_entry_multi result = nil accs = [ 'AF237819' ,'AB302966', 'AY582120' ] assert_nothing_raised { result = @togows.entry('nucleotide', accs) } assert(!result.to_s.strip.empty?) count = 0 accs.each do |x| assert_match(Regexp.new(x), result) count += 1 end assert_equal(accs.size, count) # argument is a string accs2 = accs.join(',') result2 = nil assert_nothing_raised { result2 = @togows.entry('nucleotide', accs2) } assert(result2 == result) end def test_entry_with_format result = nil acc = 'AF237819' assert_nothing_raised { result = @togows.entry('nucleotide', acc, 'fasta') } assert(!result.to_s.strip.empty?) assert_match(/^\>/, result) end def test_entry_with_key result = nil assert_nothing_raised { result = @togows.entry('pubmed', '16381885', nil, 'authors') } assert(!result.to_s.strip.empty?) end def test_entry_with_format_and_key result = nil assert_nothing_raised { result = @togows.entry('pubmed', '16381885', 'json', 'authors') } assert(!result.to_s.strip.empty?) end def test_search result = nil assert_nothing_raised { result = @togows.search('nuccore', 'Milnesium tardigradum') } assert(!result.to_s.strip.empty?) end def test_search_with_offset_limit result = nil assert_nothing_raised { result = @togows.search('nuccore', 'Milnesium tardigradum', 2, 3) } assert(!result.to_s.strip.empty?) ary = result.chomp.split(/\n/) assert_equal(3, ary.size) end def test_search_with_offset_limit_format result = nil assert_nothing_raised { result = @togows.search('nuccore', 'Milnesium tardigradum', 2, 3, 'json') } assert(!result.to_s.strip.empty?) end def test_convert data = File.read(File.join(TestData, 'blast', 'b0002.faa.m0')) result = nil assert_nothing_raised { result = @togows.convert(data, 'blast', 'gff') } assert(!result.to_s.strip.empty?) end def test_retrieve result = nil assert_nothing_raised { result = @togows.retrieve('AF237819') } assert(!result.to_s.strip.empty?) end def test_retrieve_1id_1db result = nil assert_nothing_raised { result = @togows.retrieve('hsa:124', :database => 'kegg-genes', :field => 'entry_id', :format => 'json') } assert(!result.to_s.strip.empty?) end def test_retrieve_1id_2db result = nil assert_nothing_raised { result = @togows.retrieve('1.1.1.1', :database => [ 'kegg-genes', 'kegg-enzyme' ]) } assert(!result.to_s.strip.empty?) end def test_retrieve_2id_2db result = nil assert_nothing_raised { result = @togows.retrieve([ '1.1.1.1', 'hsa:124' ], :database => [ 'kegg-genes', 'kegg-enzyme' ]) } assert(!result.to_s.strip.empty?) end def test_entry_database_list result = nil assert_nothing_raised { result = @togows.entry_database_list } assert_kind_of(Array, result) assert(!result.empty?) end def test_search_database_list result = nil assert_nothing_raised { result = @togows.search_database_list } assert_kind_of(Array, result) assert(!result.empty?) end end #FuncTestTogoWSRESTcommon # functional test for Bio::TogoWS::REST class FuncTestTogoWSREST < Test::Unit::TestCase include FuncTestTogoWSRESTcommon def setup @togows = Bio::TogoWS::REST.new end end #class FuncTestTogoWSREST # functional test for Bio::TogoWS::REST private methods class FuncTestTogoWSRESTprivate < Test::Unit::TestCase def setup @togows = Bio::TogoWS::REST.new end def test_get response = nil acc = 'AF237819' assert_nothing_raised { response = @togows.instance_eval { get('entry', 'nucleotide', acc, 'entry_id') } } assert_kind_of(Net::HTTPResponse, response) assert_equal("200", response.code) result = response.body assert(!result.to_s.strip.empty?) end def test_get_dir response = nil assert_nothing_raised { response = @togows.instance_eval { get_dir('search') } } assert_kind_of(Net::HTTPResponse, response) assert_equal("200", response.code) result = response.body assert(!result.to_s.strip.empty?) end def test_post_data data = File.read(File.join(Bio::FuncTestTogoWSRESTcommon::TestData, 'blast', 'b0002.faa.m0')) response = nil assert_nothing_raised { response = @togows.instance_eval { post_data(data, 'convert', 'blast.gff') } } assert_kind_of(Net::HTTPResponse, response) assert_equal("200", response.code) result = response.body assert(!result.to_s.strip.empty?) end def test_database_list result = nil assert_nothing_raised { result = @togows.instance_eval { database_list('entry') } } assert_kind_of(Array, result) assert(!result.empty?) end end #class FuncTestTogoWSRESTprivate if false # DISABLED because of the server load and execution time # functional test for Bio::TogoWS::REST class methods class FuncTestTogoWSRESTclassMethod < Test::Unit::TestCase include FuncTestTogoWSRESTcommon def setup @togows = Bio::TogoWS::REST end end #class FuncTestTogoWSRESTclassMethod end #if false end #module Bio bio-1.4.3.0001/test/network/bio/io/test_pubmed.rb0000644000004100000410000000736512200110570021366 0ustar www-datawww-data# # test/functional/bio/io/test_pubmed.rb - Functional test for Bio::PubMed # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/pubmed' require 'bio/db/medline' module Bio module FuncTestPubmedCommon def test_esearch a = @pm.esearch('agile bioinformatics') assert_kind_of(Array, a) assert_operator(a.size, :>=, 3, 'The failure may be caused by changes of NCBI PubMed.') a.each do |x| assert_kind_of(String, x) assert_equal(x.strip, x.to_i.to_s, 'PMID is not an integer value. This suggests that NCBI have changed the PMID policy.') end end def test_esearch_count a = @pm.esearch('agile bioinformatics', { "rettype"=>"count" }) assert_kind_of(Integer, a) assert_operator(a, :>=, 3, 'The failure may be caused by changes of NCBI PubMed.') end def test_esearch_retmax_retstart a = @pm.esearch('p53', { "retmax" => 10, "retstart" => 20 }) assert_equal(10, a.size, 'The failure may be caused by changes of NCBI PubMed.') a.each do |x| assert_kind_of(String, x) assert_equal(x.strip, x.to_i.to_s, 'PMID is not an integer value. This suggests that NCBI have changed the PMID policy.') end a1 = @pm.esearch('p53', { "retmax" => 15, "retstart" => 35 }) a2 = @pm.esearch('p53', { "retmax" => 10, "retstart" => 0 }) assert_equal(35, (a + a1 + a2).sort.uniq.size, 'The failure may be caused by changes of NCBI PubMed.') a3 = @pm.esearch('p53', { "retmax" => 10 }) assert_equal(a2.sort, a3.sort, 'The failure may be caused by changes of NCBI PubMed.') end def check_pubmed_entry(pmid, str) m = Bio::MEDLINE.new(str) assert_equal(pmid.to_s, m.pmid) end private :check_pubmed_entry def do_efetch_single(pmid) a = @pm.efetch(pmid) assert_kind_of(Array, a) assert_equal(1, a.size) check_pubmed_entry(pmid, a[0]) end private :do_efetch_single def test_efetch do_efetch_single(12368254) end def test_efetch_str do_efetch_single("16734914") end def test_efetch_multiple arg = [ 12368254, 18689808, 19304878 ] a = @pm.efetch(arg) assert_kind_of(Array, a) assert_equal(3, a.size) a.each do |str| check_pubmed_entry(arg.shift, str) end end def test_efetch_single_xml arg = 12368254 str = @pm.efetch(arg, { "retmode" => 'xml' }) assert_kind_of(String, str) assert(str.index(/\/)) end def test_efetch_multiple_xml arg = [ "16734914", 16381885, "10592173" ] str = @pm.efetch(arg, { "retmode" => 'xml' }) assert_kind_of(String, str) assert(str.index(/\/)) end end #module FuncTestPubmedCommon class FuncTestPubmed < Test::Unit::TestCase include FuncTestPubmedCommon def setup Bio::NCBI.default_email = 'staff@bioruby.org' #$stderr.puts Bio::NCBI.default_tool @pm = Bio::PubMed.new end end #class FuncTestPubmed class FuncTestPubmedClassMethod < Test::Unit::TestCase include FuncTestPubmedCommon def setup Bio::NCBI.default_email = 'staff@bioruby.org' #$stderr.puts Bio::NCBI.default_tool @pm = Bio::PubMed end end #class FuncTestPubmedClassMethod end bio-1.4.3.0001/test/network/bio/test_command.rb0000644000004100000410000000144712200110570021114 0ustar www-datawww-data# # test/network/bio/test_command.rb - Functional test for network connection methods in Bio::Command # # Copyright:: Copyright (C) 2008, 2011 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/command' module Bio class FuncTestCommandNet < Test::Unit::TestCase def test_read_uri assert_nothing_raised { Bio::Command.read_uri("http://bioruby.open-bio.org/") } end def test_start_http end def test_new_http end def test_post_form end end #class FuncTestCommandNet end bio-1.4.3.0001/test/network/bio/appl/0000755000004100000410000000000012200110570017040 5ustar www-datawww-databio-1.4.3.0001/test/network/bio/appl/test_blast.rb0000644000004100000410000000327412200110570021537 0ustar www-datawww-data# # = test/functional/bio/appl/test_blast.rb - Unit test for Bio::Blast with network connection # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/appl/blast' module Bio module FunctTestBlast module NetTestBlastCommonProteinQuery filename = File.join(BioRubyTestDataPath, 'fasta', 'EFTU_BACSU.fasta') QuerySequence = File.read(filename).freeze def test_query report = nil assert_nothing_raised { report = @blast.query(QuerySequence) } assert(report.hits.size > 0) end end #module NetTestBlastCommonProteinQuery class NetTestBlast_GenomeNet < Test::Unit::TestCase include NetTestBlastCommonProteinQuery def setup @blast = Bio::Blast.new('blastp', 'mine-aa eco', [ '-e', '1e-10', '-v', '10', '-b', '10' ], 'genomenet') end end #class NetTestBlast_GenomeNet class NetTestBlast_DDBJ < Test::Unit::TestCase include NetTestBlastCommonProteinQuery def setup @blast = Bio::Blast.new('blastp', 'SWISS', ['-e', '1e-10', '-v', '10', '-b', '10' ], 'ddbj') end end #class NetTestBlast_DDBJ end #module FuncTestBlast end #module Bio bio-1.4.3.0001/test/network/bio/appl/blast/0000755000004100000410000000000012200110570020145 5ustar www-datawww-databio-1.4.3.0001/test/network/bio/appl/blast/test_remote.rb0000644000004100000410000000517512200110570023034 0ustar www-datawww-data# # = test/functional/bio/appl/blast/test_remote.rb - Unit test for Bio::Blast::Remote::Genomenet and Bio::Blast::Remote::DDBJ with network connection # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/appl/blast' module Bio module FuncTestBlastRemote module NetTestBlastRemoteCommon Programs = %w( blastn tblastn tblastx blastp blastx ).freeze Programs.each { |x| x.freeze } def test_databases Programs.each do |prog| databases = nil assert_nothing_raised { databases = @klass.databases(prog) } assert_kind_of(Array, databases, "wrong data type for #{prog}") assert(!databases.empty?, "no database found for #{prog}") end end # sampling test for blastn database def test_databases_blastn databases = @klass.databases("blastn") self.class::BLASTN_DBNAME_KEYWORDS.each do |re| assert(databases.find { |x| re =~ x }) end end # sampling test for blastp database def test_databases_blastp databases = @klass.databases("blastp") self.class::BLASTP_DBNAME_KEYWORDS.each do |re| assert(databases.find { |x| re =~ x }) end end def test_database_description Programs.each do |prog| @klass.databases(prog).each do |db| assert_kind_of(String, @klass.database_description(prog, db)) end end end end #module NetTestBlastRemoteCommon # This test class only contains tests for meta information. # BLAST execution tests are written in ../test_blast.rb class NetTestBlastRemoteGenomeNet < Test::Unit::TestCase include NetTestBlastRemoteCommon BLASTN_DBNAME_KEYWORDS = [ /genes/, /nt/ ] BLASTP_DBNAME_KEYWORDS = [ /genes/, /uniprot/, /nr/ ] def setup @klass = Bio::Blast::Remote::GenomeNet end end #class NetTestBlastRemoteGenomeNet # This test class only contains tests for meta information. # BLAST execution tests are written in ../test_blast.rb class NetTestBlastRemoteDDBJ < Test::Unit::TestCase include NetTestBlastRemoteCommon BLASTN_DBNAME_KEYWORDS = [ /ddbj/i, /nt/i ] BLASTP_DBNAME_KEYWORDS = [ /uniprot/i, /pdb/i ] def setup @klass = Bio::Blast::Remote::DDBJ end end #class NetTestBlastRemoteDDBJ end #module FuncTestBlastRemote end #module Bio bio-1.4.3.0001/test/network/bio/appl/test_pts1.rb0000644000004100000410000000466612200110570021327 0ustar www-datawww-data# # = test/functional/bio/appl/test_pts1.rb - Unit test for Bio::PTS1 with network connection # # Copyright:: Copyright (C) 2006 # Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/pts1' module Bio class FuncTestPTS1 < Test::Unit::TestCase def setup @seq =<AB000464 MRTGGDNAGPSHSHIKRLPTSGLSTWLQGTQTCVLHLPTGTRPPAHHPLLGYSSRRSYRL LENPAAGCWARFSFCQGAAWDWDLEGVQWLRALAGGVSTAPSAPPGNLVFLSVSIFLCGS LLLETCPAYFSSLDPD* END @serv = Bio::PTS1.new end def test_function_set @serv.function("GENERAL") assert_equal("GENERAL", @serv.function) end def test_function_show assert_equal("METAZOA-specific", @serv.function) end def test_function_set_number_1 @serv.function(1) assert_equal("METAZOA-specific", @serv.function) end def test_function_set_number_2 @serv.function(2) assert_equal("FUNGI-specific", @serv.function) end def test_function_set_number_3 @serv.function(3) assert_equal("GENERAL", @serv.function) end def test_exec report = @serv.exec(@seq) assert_equal(Bio::PTS1::Report, report.class) end def test_exec_with_faa report = @serv.exec(Bio::FastaFormat.new(@seq)) assert_equal(Bio::PTS1::Report, report.class) end end class FuncTestPTS1Report < Test::Unit::TestCase def setup serv = Bio::PTS1.new seq = ">hoge\nAVSFLSMRRARL\n" @report = serv.exec(seq) end #def test_output_size # assert_equal(1634, @report.output.size) #end def test_entry_id assert_equal("hoge", @report.entry_id) end def test_prediction assert_equal("Targeted", @report.prediction) end def test_cterm assert_equal("AVSFLSMRRARL", @report.cterm) end def test_score assert_equal("7.559", @report.score) end def test_fp assert_equal("2.5e-04", @report.fp) end def test_sppta assert_equal("-5.833", @report.sppta) end def test_spptna assert_equal("-1.698", @report.spptna) end def test_profile assert_equal("15.091", @report.profile) end end end bio-1.4.3.0001/test/functional/0000755000004100000410000000000012200110570016004 5ustar www-datawww-databio-1.4.3.0001/test/functional/bio/0000755000004100000410000000000012200110570016555 5ustar www-datawww-databio-1.4.3.0001/test/functional/bio/sequence/0000755000004100000410000000000012200110570020365 5ustar www-datawww-databio-1.4.3.0001/test/functional/bio/sequence/test_output_embl.rb0000644000004100000410000001250612200110570024314 0ustar www-datawww-data# # test/functional/bio/sequence/test_output_embl.rb - Functional test for Bio::Sequence#output(:embl) # # Copyright:: Copyright (C) 2008 # Jan Aerts # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' module Bio class FuncTestSequenceOutputEMBL < Test::Unit::TestCase def setup @seq = Bio::Sequence.auto('aattaaaacgccacgcaaggcgattctaggaaatcaaaacgacacgaaatgtggggtgggtgtttgggtaggaaagacagttgtcaacatcagggatttggattgaatcaaaaaaaaagtccttagatttcataaaagctaatcacgcctcaaaactggggcctatctcttcttttttgtcgcttcctgtcggtccttctctatttcttctccaacccctcatttttgaatatttacataacaaaccgttttactttctttggtcaaaattagacccaaaattctatattagtttaagatatgtggtctgtaatttattgttgtattgatataaaaattagttataagcgattatatttttatgctcaagtaactggtgttagttaactatattccaccacgataacctgattacataaaatatgattttaatcattttagtaaaccatatcgcacgttggatgattaattttaacggtttaataacacgtgattaaattatttttagaatgattatttacaaacggaaaagctatatgtgacacaataactcgtgcagtattgttagtttgaaaagtgtatttggtttcttatatttggcctcgattttcagtttatgtgctttttacaaagttttattttcgttatctgtttaacgcgacatttgttgtatggctttaccgatttgagaataaaatcatattacctttatgtagccatgtgtggtgtaatatataataatggtccttctacgaaaaaagcagatcacaattgaaataaagggtgaaatttggtgtcccttttcttcgtcgaaataacagaactaaataaaagaaagtgttatagtatattacgtccgaagaataatccatattcctgaaatacagtcaacatattatatatttagtactttatataaagttaggaattaaatcatatgttttatcgaccatattaagtcacaactttatcataaattaatctgtaattagaattccaagttcgccaccgaatttcgtaacctaatctacatataatagataaaatatatatatgtagagtaattatgatatctatgtatgtagtcatggtatatgaattttgaaattggcaaggtaacattgacggatcgtaacccaacaaataatattaattacaaaatgggtgggcgggaatagtatacaactcataattccactcactttttgtattattaggatatgaaataagagtaatcaacatgcataataaagatgtataatttcttcatcttaaaaaacataactacatggtttaatacacaattttaccttttatcaaaaaagtatttcacaattcactcgcaaattacgaaatgatggctagtgcttcaactccaaatttcgaatattttaaatcacgatgtgtagaaccttttatttactggatactaatcactagtttattgagccaaccaattagttaaatagaacaatcaatattatagccagatattttttcctttaaaaatatttaaaagaggggccagaaaagaaccagagagggaggccatgagacattattatcactagtcaaaaacaacaaaccctccttttgctttttcatataaattattatattttattttgcaggtttcttctcttcttcttcttcttcttcttcttcttcctcttggctgctttctttcatcatccataaagtgaaagctaacgcatagagagagccatatcgtcccaaaaaaagcaaaagtccaaaaaaaaacaactccaaaacattctctcttagctctttactctttagtttctctctctctctctgcctttctctttgttgaagttcatggatgctacgaagtggactcaggtacgtaaaaagatatctctctgctatatctgtttgtttgtagcttctccccgactctcacgctctctctctctctctctctctctttgtgtatctctctactcacataaatatatacatgtgtgtgtatgcatgtttatatgtatgtatgaaaccagtagtggttatacagatagtctatatagagatatcaatatgatgtgttttaatttagactttttatatatccgtttgaaacttccgaagttctcgaatggagttaaggaagttttgttctctacaagttcaatttttcttgtcattaattataaaactctgataactaatggataaaaaaggtatgctttgttagttaccttttgttcttggtgctcaggtcttaccatttttttcctaaattttaattagtctcctttctttaattaattttatgttaacgcactgacgatttaacgttaacaaaaaaacctagattctttttcttttcaatagagcataattattacttcaatttcatttatctcacactaaaccctaatcttggcgaaattccttttatatatataaatttaattaatttttccacaatcttggcggaattcaggactcggttttgcttgttattgttctctcttttaatttgacatggttagggaatacttaaagtatgtcttaattttatagggttttcaagaaatgataaacgtaaagccaatggagcaaatgatttctagcaccaacaacaacacaccgcaacaacaaccaacattcatcgccaccaacacaaggccaaacgccaccgcatccaatggtggctccggaggaaataccaacaacacggctacgatggaaactagaaaggcgaggccacaagagaaagtaaattgtccaagatgcaactcaacaaacacaaagttctgttattacaacaactacagtctcacgcaaccaagatacttctgcaaaggttgtcgaaggtattggaccgaaggtggctctcttcgtaacgtcccagtcggaggtagctcaagaaagaacaagagatcctctacacctttagcttcaccttctaatcccaaacttccagatctaaacccaccgattcttttctcaagccaaatccctaataagtcaaataaagatctcaacttgctatctttcccggtcatgcaagatcatcatcatcatggtatgtctcatttttttcatatgcccaagatagagaacaacaatacttcatcctcaatctatgcttcatcatctcctgtctcagctcttgagcttctaagatccaatggagtctcttcaagaggcatgaacacgttcttgcctggtcaaatgatggattcaaactcagtcctgtactcatctttagggtttccaacaatgcctgattacaaacagagtaataacaacctttcattctccattgatcatcatcaagggattggacataacaccatcaacagtaaccaaagagctcaagataacaatgatgacatgaatggagcaagtagggttttgttccctttttcagacatgaaagagctttcaagcacaacccaagagaagagtcatggtaataatacatattggaatgggatgttcagtaatacaggaggatcttcatggtgaaaaaaggttaaaaagagctcatgaactatcagctttcttctctttttctgtttttttctcctattttattatagtttttactttgatgatcttttgttttttctcacatggggaactttacttaaagttgtcagaacttagtttacagattgtctttttattccttctttctggttttccttttttcctttttttatcagtctttttaaaatatgtatttcataattgggtttgatcattcatatttattagtatcaaaatagagtctatgttcatgagggagtgttaaggggtgtgagggtagaagaataagtgaatacgggggcccg') @seq.entry_id = 'AJ224122' @seq.sequence_version = 3 @seq.topology = 'linear' @seq.molecule_type = 'genomic DNA' @seq.data_class = 'STD' @seq.division = 'PLN' @seq.primary_accession = 'AJ224122' @seq.secondary_accessions = [] @seq.date_created = '27-FEB-1998 (Rel. 54, Created)' @seq.date_modified = '14-NOV-2006 (Rel. 89, Last updated, Version 6)' @seq.definition = 'Arabidopsis thaliana DAG1 gene' @seq.keywords = ['BBFa gene', 'transcription factor'] @seq.species = 'Arabidopsis thaliana (thale cress)' @seq.classification = ['Eukaryota', 'Viridiplantae', 'Streptophyta', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'eudicotyledons', 'core eudicotyledons', 'rosids', 'eurosids II', 'Brassicales', 'Brassicaceae', 'Arabidopsis'] end def test_output_embl assert_nothing_raised { @seq.output(:embl) } end def test_output_fasta assert_nothing_raised { @seq.output(:fasta) } end end #class FuncTestSequenceOutputEMBL end #module Bio bio-1.4.3.0001/test/functional/bio/test_command.rb0000644000004100000410000002430112200110570021557 0ustar www-datawww-data# # test/unit/bio/test_command.rb - Functional test for external command execution methods in Bio::Command # # Copyright:: Copyright (C) 2008 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'tempfile' require 'bio/command' module Bio module FuncTestCommandCallCommon def windows_platform? Bio::Command.module_eval { windows_platform? } end module_function :windows_platform? def setup_cmd if windows_platform? then [ File.expand_path(File.join(BioRubyTestDataPath, 'command', 'echoarg2.bat')) ] else [ "/bin/sh", "/bin/echo" ].each do |cmd| unless FileTest.executable?(cmd) then raise "Unsupported environment: #{cmd} not found" end end [ "/bin/sh", File.expand_path(File.join(BioRubyTestDataPath, 'command', 'echoarg2.sh')) ] end end private :setup_cmd def test_call_command ret = Bio::Command.call_command(@arg) do |io| io.close_write io.read end assert_equal(@expected, ret.to_s.strip) end def test_call_command_popen ret = Bio::Command.call_command_popen(@arg) do |io| io.close_write io.read end assert_equal(@expected, ret.to_s.strip) end def test_call_command_fork return unless Thread.respond_to?(:critical) begin ret = Bio::Command.call_command_fork(@arg) do |io| io.close_write io.read end rescue Errno::ENOENT, NotImplementedError # fork() not supported return end assert_equal(@expected, ret.to_s.strip) end def test_call_command_open3 begin ret = Bio::Command.call_command_open3(@arg) do |pin, pout, perr| t = Thread.start { perr.read } begin pin.close output = pout.read ensure t.join end output end rescue NotImplementedError # fork() not supported return end assert_equal(@expected, ret.to_s.strip) end end #module FuncTestCommandCallCommon class FuncTestCommandCallSimple < Test::Unit::TestCase include FuncTestCommandCallCommon def setup @arg = setup_cmd @arg.concat [ "first", "second", "third" ] @expected = "second" end end #class FuncTestCommandCallSimple class FuncTestCommandCallWithSpace < Test::Unit::TestCase include FuncTestCommandCallCommon def setup @arg = setup_cmd @arg.concat [ "this is", "a test for", "escape of space" ] if windows_platform? then @expected = '"a test for"' else @expected = "a test for" end end end #class FuncTestCommandCallWithSpace class FuncTestCommandCallMisc1 < Test::Unit::TestCase include FuncTestCommandCallCommon def setup @arg = setup_cmd @arg.concat [ 'test (a) *.* \'argument 1\'', '\'test\' (b) *.* argument 2', 'arg3' ] if windows_platform? then @expected = '"\'test\' (b) *.* argument 2"' else @expected = '\'test\' (b) *.* argument 2' end end end #class FuncTestCommandCallMisc1 class FuncTestCommandQuery < Test::Unit::TestCase def setup @data = [ "987", "123", "567", "456", "345" ] @sorted = @data.sort if Bio::Command.module_eval { windows_platform? } then @sort = "sort" @data = @data.join("\r\n") + "\r\n" else @sort = `which sort`.chomp if @sort.empty? or !FileTest.executable?(@sort) then raise "Unsupported environment: sort not found in PATH" end @data = @data.join("\n") + "\n" end end def test_query_command ary = [ @sort ] assert_equal('', Bio::Command.query_command(ary).to_s.strip) str = Bio::Command.query_command(ary, @data).to_s assert_equal(@sorted, str.strip.split(/\s+/)) end def test_query_command_popen ary = [ @sort ] assert_equal('', Bio::Command.query_command_popen(ary).to_s.strip) str = Bio::Command.query_command_popen(ary, @data).to_s assert_equal(@sorted, str.strip.split(/\s+/)) end def test_query_command_fork return unless Thread.respond_to?(:critical) ary = [ @sort ] begin str = Bio::Command.query_command_fork(ary).to_s rescue Errno::ENOENT, NotImplementedError # fork() not supported return end assert_equal('', str.strip) str = Bio::Command.query_command_fork(ary, @data).to_s assert_equal(@sorted, str.strip.split(/\s+/)) end def test_query_command_open3 ary = [ @sort ] begin str, err = Bio::Command.query_command_open3(ary) rescue NotImplementedError # fork() not supported return end assert_equal('', str.to_s.strip) str, err = Bio::Command.query_command_open3(ary, @data) assert_equal(@sorted, str.to_s.strip.split(/\s+/)) end end #class FuncTestCommandQuery class FuncTestCommandChdir < Test::Unit::TestCase def setup if Bio::Command.module_eval { windows_platform? } then @arg = [ 'dir', '/B', '/-P' ] else cmd = '/bin/ls' @arg = [ cmd ] unless FileTest.executable?(cmd) then raise "Unsupported environment: #{cmd} not found" end end @tempfile = Tempfile.new('chdir') @tempfile.close(false) @filename = File.basename(@tempfile.path) @dirname = File.dirname(@tempfile.path) end def teardown @tempfile.close(true) end def test_call_command_chdir str = nil Bio::Command.call_command(@arg, { :chdir => @dirname }) do |io| io.close_write str = io.read end assert(str.index(@filename)) end def test_call_command_popen_chdir str = nil Bio::Command.call_command_popen(@arg, { :chdir => @dirname }) do |io| io.close_write str = io.read end assert(str.index(@filename)) end def test_call_command_fork_chdir return unless Thread.respond_to?(:critical) str = nil begin Bio::Command.call_command_fork(@arg, { :chdir => @dirname }) do |io| io.close_write str = io.read end rescue Errno::ENOENT, NotImplementedError # fork() not supported return end assert(str.index(@filename)) end def test_query_command_chdir str = Bio::Command.query_command(@arg, nil, { :chdir => @dirname }).to_s assert(str.index(@filename)) end def test_query_command_popen_chdir str = Bio::Command.query_command_popen(@arg, nil, { :chdir => @dirname }).to_s assert(str.index(@filename)) end def test_query_command_fork_chdir return unless Thread.respond_to?(:critical) begin str = Bio::Command.query_command_fork(@arg, nil, { :chdir => @dirname }).to_s rescue Errno::ENOENT, NotImplementedError # fork() not supported return end assert(str.index(@filename)) end end #class FuncTestCommandChdir class FuncTestCommandBackports < Test::Unit::TestCase def setup if RUBY_VERSION < "1.8.3" @notest = true else @notest = false end end def test_remove_entry_secure return if @notest begin tempfile = Tempfile.new('removed') tempfile.close(false) assert(File.exist?(tempfile.path)) Bio::Command.remove_entry_secure(tempfile.path) assert_equal(false, File.exist?(tempfile.path)) ensure tempfile.close(true) if tempfile end end def test_mktmpdir_with_block return if @notest tmpdirpath = nil Bio::Command.mktmpdir('bioruby') do |path| tmpdirpath = path assert(File.directory?(path)) assert_nothing_raised { File.open(File.join(path, 'test'), 'w') do |w| w.print "This is test." end } end assert_equal(false, File.directory?(tmpdirpath)) end def test_mktmpdir_without_block return if @notest path = nil begin assert_nothing_raised { path = Bio::Command.mktmpdir('bioruby') } assert(File.directory?(path)) assert_nothing_raised { File.open(File.join(path, 'test'), 'w') do |w| w.print "This is test." end } ensure Bio::Command.remove_entry_secure(path) if path end end end #class FuncTestCommandBackports class FuncTestCommandTmpdir < Test::Unit::TestCase def setup if RUBY_VERSION < "1.8.3" @notest = true else @notest = false end end def test_initialize return if @notest tmpdir = Bio::Command::Tmpdir.new('bioruby') assert_instance_of(Bio::Command::Tmpdir, tmpdir) assert(File.directory?(tmpdir.path)) assert_nothing_raised { # creates a dummy file File.open(File.join(tmpdir.path, 'test'), 'w') do |w| w.print "This is test." end } end def test_path return if @notest tmpdir = Bio::Command::Tmpdir.new('bioruby') assert_kind_of(String, tmpdir.path) assert(File.directory?(tmpdir.path)) end def test_close! return if @notest tmpdir = Bio::Command::Tmpdir.new('bioruby') path = tmpdir.path # creates a dummy file File.open(File.join(tmpdir.path, 'test'), 'w') do |w| w.print "This is test." end assert_nothing_raised { tmpdir.close! } assert_equal(false, File.directory?(path)) end def test_path_after_close return if @notest tmpdir = Bio::Command::Tmpdir.new('bioruby') tmpdir.close! assert_raise(IOError) { tmpdir.path } end end #class FuncTestCommandTmpdir end #module Bio bio-1.4.3.0001/test/data/0000755000004100000410000000000012200110570014553 5ustar www-datawww-databio-1.4.3.0001/test/data/medline/0000755000004100000410000000000012200110570016170 5ustar www-datawww-databio-1.4.3.0001/test/data/medline/20146148_modified.medline0000644000004100000410000000301312200110570022275 0ustar www-datawww-dataPMID- 20146148 OWN - NLM STAT- MEDLINE DA - 20100210 DCOM- 20100316 IS - 1538-957X (Electronic) IS - 1538-9588 (Linking) VI - 11 IP - 1 DP - 2010 Feb TI - With power comes responsibility: motorcycle engine power and power-to-weight ratio in relation to accident risk. PG - 87-95 AB - (The abstract is omitted to avoid copyright issues. Please find the abstract at http://www.ncbi.nlm.nih.gov/pubmed/20146148. We believe that other information in this entry is within public domain, according to "Copyright and Disclaimers" in http://www.ncbi.nlm.nih.gov/About/disclaimer.html.) AD - Traffic Research Unit, Department of Psychology, University of Helsinki, Finland. markus.mattsson@helsinki.fi FAU - Mattsson, Markus AU - Mattsson M FAU - Summala, Heikki AU - Summala H LA - eng PT - Journal Article PT - Research Support, Non-U.S. Gov't PL - England TA - Traffic Inj Prev JT - Traffic injury prevention JID - 101144385 SB - IM MH - Accidents, Traffic/mortality/*statistics & numerical data MH - Adult MH - Age Distribution MH - Body Weight MH - Female MH - Finland/epidemiology MH - Humans MH - Linear Models MH - Male MH - Motorcycles/classification/legislation & jurisprudence/*statistics & numerical data MH - Questionnaires MH - Risk MH - Social Responsibility MH - Young Adult EDAT- 2010/02/11 06:00 MHDA- 2010/03/17 06:00 CRDT- 2010/02/11 06:00 AID - 919158438 [pii] AID - 10.1080/15389580903471126 [doi] PST - ppublish SO - Traffic Inj Prev. 2010 Feb;11(1):87-95. bio-1.4.3.0001/test/data/sim4/0000755000004100000410000000000012200110570015427 5ustar www-datawww-databio-1.4.3.0001/test/data/sim4/simple2-A4.sim40000644000004100000410000000124012200110570020037 0ustar www-datawww-data seq1 = sample10-1.fst, 96 bp seq2 = sample10-2.fst (genome1), 599 bp >mrna1 >genome1 1-42 (143-184) 100% <- 43-96 (404-457) 94% 0 . : . : . : . : . : 1 AGTTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAG ||||||||||||||||||||||||||||||||||||||||||<<<...<< 143 AGTTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAGCTG...TA 50 . : . : . : . : . : 43 TCTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CA <|||||||||||||||| |||||||||||||-|||||||||||||||-|| 403 CTCTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCA 100 . 91 AGCTAA |||||| 452 AGCTAA bio-1.4.3.0001/test/data/sim4/simple-A4.sim40000644000004100000410000000122512200110570017760 0ustar www-datawww-data seq1 = sample10-1.fst, 94 bp seq2 = sample10-2.fst (genome1), 599 bp >mrna1 >genome1 1-40 (145-184) 100% <- 41-94 (404-457) 94% 0 . : . : . : . : . : 1 TTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAG T ||||||||||||||||||||||||||||||||||||||||<<<...<<<| 145 TTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAGCTG...TACT 50 . : . : . : . : . : 42 CTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CAAG ||||||||||||||| |||||||||||||-|||||||||||||||-|||| 405 CTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCAAG 100 91 CTAA |||| 454 CTAA bio-1.4.3.0001/test/data/sim4/complement-A4.sim40000644000004100000410000000244212200110570020634 0ustar www-datawww-data seq1 = sample41-1c.fst, 284 bp seq2 = sample40-2.fst (genome4), 770 bp >mrna4c >genome4 (complement) 1-72 (351-424) 89% -> 73-142 (563-630) 95% == 213-284 (700-770) 95% 0 . : . : . : . : . : 1 TTTTAGCCGGCACGAGATTG AGCGTATGATCACGCGCGCGGCCTCCT C ||||||||||||||||||||-||||-||||||||||||||||||||||-| 351 TTTTAGCCGGCACGAGATTGCAGCG ATGATCACGCGCGCGGCCTCCTAC 50 . : . : . : . : . : 49 AGAGTGATGCATGATACAACTT AT ATATGTACTTAGCTG -|||| ||||||||||||||||- |->>>...>>>|||||||||||||-| 400 GAGTCATGCATGATACAACTTCTTGGTT...GATATATGTACTTAGC G 100 . : . : . : . : . : 88 GCAACCGAGATTTACTTTCGAAGCACTGTGATGAACCCGCGGCCCTTTGA ||||||||||||||||||||||| |||||||||||||||||-|||||||| 577 GCAACCGAGATTTACTTTCGAAGGACTGTGATGAACCCGCG CCCTTTGA 150 . 138 GCGCT ||||| 626 GCGCT 0 . : . : . : . : . : 213 TATATATGTACTTAGCGG ACACCGAGATTTACTTTCGAAGGACTGTGGA ||||||||||||||||||-|-|||||||||||||||||||||||||||-| 700 TATATATGTACTTAGCGGCA ACCGAGATTTACTTTCGAAGGACTGTG A 50 . : . : 262 TGAACCCGCGCCCTTTGAGCGCT ||||||||||||||||||||||| 748 TGAACCCGCGCCCTTTGAGCGCT bio-1.4.3.0001/test/data/prosite/0000755000004100000410000000000012200110570016240 5ustar www-datawww-databio-1.4.3.0001/test/data/prosite/prosite.dat0000644000004100000410000051527312200110570020434 0ustar www-datawww-dataID G_PROTEIN_RECEP_F1_1; PATTERN. AC PS00237; DT APR-1990 (CREATED); NOV-1997 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 1 signature. PA [GSTALIVMFYWC]-[GSTANCPDE]-{EDPKRH}-x(2)-[LIVMNQGA]-x(2)-[LIVMFT]- PA [GSTANC]-[LIVMFYWSTAC]-[DENH]-R-[FYWCSH]-x(2)-[LIVM]. NR /RELEASE=40.7,103373; NR /TOTAL=1121(1121); /POSITIVE=1057(1057); /UNKNOWN=0(0); /FALSE_POS=64(64); NR /FALSE_NEG=112; /PARTIAL=48; CC /TAXO-RANGE=??E?V; /MAX-REPEAT=1; DR O42385, 5H1A_FUGRU, T; P08908, 5H1A_HUMAN, T; Q64264, 5H1A_MOUSE, T; DR P19327, 5H1A_RAT , T; O08892, 5H1B_CAVPO, T; P46636, 5H1B_CRIGR, T; DR P35404, 5H1B_DIDMA, T; O42384, 5H1B_FUGRU, T; P28222, 5H1B_HUMAN, T; DR P28334, 5H1B_MOUSE, T; P49144, 5H1B_RABIT, T; P28564, 5H1B_RAT , T; DR P56496, 5H1B_SPAEH, T; P11614, 5H1D_CANFA, T; Q60484, 5H1D_CAVPO, T; DR P79748, 5H1D_FUGRU, T; P28221, 5H1D_HUMAN, T; Q61224, 5H1D_MOUSE, T; DR P79400, 5H1D_PIG , T; P49145, 5H1D_RABIT, T; P28565, 5H1D_RAT , T; DR P28566, 5H1E_HUMAN, T; Q29003, 5H1E_PIG , T; O08890, 5H1F_CAVPO, T; DR P30939, 5H1F_HUMAN, T; Q02284, 5H1F_MOUSE, T; P30940, 5H1F_RAT , T; DR O46635, 5H2A_CANFA, T; P35382, 5H2A_CAVPO, T; P18599, 5H2A_CRIGR, T; DR P28223, 5H2A_HUMAN, T; P50128, 5H2A_MACMU, T; P35363, 5H2A_MOUSE, T; DR P50129, 5H2A_PIG , T; P14842, 5H2A_RAT , T; P41595, 5H2B_HUMAN, T; DR Q02152, 5H2B_MOUSE, T; Q29005, 5H2B_PIG , T; P30994, 5H2B_RAT , T; DR P28335, 5H2C_HUMAN, T; P34968, 5H2C_MOUSE, T; P08909, 5H2C_RAT , T; DR O70528, 5H4_CAVPO , T; Q13639, 5H4_HUMAN , T; P97288, 5H4_MOUSE , T; DR Q62758, 5H4_RAT , T; P47898, 5H5A_HUMAN, T; P30966, 5H5A_MOUSE, T; DR P35364, 5H5A_RAT , T; P31387, 5H5B_MOUSE, T; P35365, 5H5B_RAT , T; DR P50406, 5H6_HUMAN , T; Q9R1C8, 5H6_MOUSE , T; P31388, 5H6_RAT , T; DR P50407, 5H7_CAVPO , T; P34969, 5H7_HUMAN , T; P32304, 5H7_MOUSE , T; DR P32305, 5H7_RAT , T; Q91559, 5H7_XENLA , T; P20905, 5HT1_DROME, T; DR Q17239, 5HT_BOMMO , T; Q25190, 5HT_HELVI , T; Q25414, 5HT_LYMST , T; DR P28285, 5HTA_DROME, T; P28286, 5HTB_DROME, T; P18130, A1AA_BOVIN, T; DR O77621, A1AA_CANFA, T; Q9WU25, A1AA_CAVPO, T; P35348, A1AA_HUMAN, T; DR Q91175, A1AA_ORYLA, T; O02824, A1AA_RABIT, T; P43140, A1AA_RAT , T; DR P11615, A1AB_CANFA, T; P35368, A1AB_HUMAN, T; P18841, A1AB_MESAU, T; DR P97717, A1AB_MOUSE, T; P15823, A1AB_RAT , T; P25100, A1AD_HUMAN, T; DR P97714, A1AD_MOUSE, T; O02666, A1AD_RABIT, T; P23944, A1AD_RAT , T; DR Q28838, A2AA_BOVIN, T; Q60474, A2AA_CAVPO, T; P08913, A2AA_HUMAN, T; DR Q01338, A2AA_MOUSE, T; P18871, A2AA_PIG , T; P22909, A2AA_RAT , T; DR O18935, A2AB_AMBHO, T; Q60475, A2AB_CAVPO, T; O77715, A2AB_DIDMA, T; DR O77713, A2AB_DUGDU, T; O77723, A2AB_ECHTE, T; O19014, A2AB_ELEMA, T; DR O19012, A2AB_ERIEU, T; O77721, A2AB_HORSE, T; P18089, A2AB_HUMAN, T; DR O19025, A2AB_MACPR, T; P30545, A2AB_MOUSE, T; O19032, A2AB_ORYAF, T; DR O19054, A2AB_PROHA, T; O77830, A2AB_RABIT, T; P19328, A2AB_RAT , T; DR O19091, A2AB_TALEU, T; Q60476, A2AC_CAVPO, T; P35405, A2AC_DIDMA, T; DR P18825, A2AC_HUMAN, T; Q01337, A2AC_MOUSE, T; P22086, A2AC_RAT , T; DR P35369, A2AD_HUMAN, T; P32251, A2AR_CARAU, T; Q91081, A2AR_LABOS, T; DR P28190, AA1R_BOVIN, T; P11616, AA1R_CANFA, T; P47745, AA1R_CAVPO, T; DR P49892, AA1R_CHICK, T; P30542, AA1R_HUMAN, T; P34970, AA1R_RABIT, T; DR P25099, AA1R_RAT , T; P11617, AA2A_CANFA, T; P46616, AA2A_CAVPO, T; DR P29274, AA2A_HUMAN, T; Q60613, AA2A_MOUSE, T; P30543, AA2A_RAT , T; DR O13076, AA2B_CHICK, T; P29275, AA2B_HUMAN, T; Q60614, AA2B_MOUSE, T; DR P29276, AA2B_RAT , T; Q28309, AA3R_CANFA, T; P33765, AA3R_HUMAN, T; DR O02667, AA3R_RABIT, T; P28647, AA3R_RAT , T; P35342, AA3R_SHEEP, T; DR P16395, ACM1_DROME, T; P11229, ACM1_HUMAN, T; P56489, ACM1_MACMU, T; DR P12657, ACM1_MOUSE, T; P04761, ACM1_PIG , T; P08482, ACM1_RAT , T; DR P30372, ACM2_CHICK, T; P08172, ACM2_HUMAN, T; Q9ERZ4, ACM2_MOUSE, T; DR P06199, ACM2_PIG , T; P10980, ACM2_RAT , T; P41984, ACM3_BOVIN, T; DR P49578, ACM3_CHICK, T; Q9N2A3, ACM3_GORGO, T; P20309, ACM3_HUMAN, T; DR Q9ERZ3, ACM3_MOUSE, T; Q9N2A4, ACM3_PANTR, T; P11483, ACM3_PIG , T; DR Q9N2A2, ACM3_PONPY, T; P08483, ACM3_RAT , T; P17200, ACM4_CHICK, T; DR P08173, ACM4_HUMAN, T; P32211, ACM4_MOUSE, T; P08485, ACM4_RAT , T; DR P08912, ACM5_HUMAN, T; P56490, ACM5_MACMU, T; P08911, ACM5_RAT , T; DR P34974, ACTR_BOVIN, T; Q9Z1S9, ACTR_CAVPO, T; Q01718, ACTR_HUMAN, T; DR P70115, ACTR_MESAU, T; Q64326, ACTR_MOUSE, T; Q28928, ACTR_PAPHA, T; DR Q9TU77, ACTR_SHEEP, T; O15218, ADMR_HUMAN, T; P43142, ADMR_MOUSE, T; DR P31392, ADMR_RAT , T; P50052, AG22_HUMAN, T; Q9Z0Z6, AG22_MERUN, T; DR P35374, AG22_MOUSE, T; P35351, AG22_RAT , T; Q28929, AG22_SHEEP, T; DR P25104, AG2R_BOVIN, T; P43240, AG2R_CANFA, T; Q9WV26, AG2R_CAVPO, T; DR P79785, AG2R_CHICK, T; P30556, AG2R_HUMAN, T; P33396, AG2R_MELGA, T; DR O35210, AG2R_MERUN, T; P29754, AG2R_MOUSE, T; P30555, AG2R_PIG , T; DR P34976, AG2R_RABIT, T; P25095, AG2R_RAT , T; O77590, AG2R_SHEEP, T; DR P32303, AG2R_XENLA, T; Q13725, AG2S_HUMAN, T; P29755, AG2S_MOUSE, T; DR P29089, AG2S_RAT , T; P35373, AG2S_XENLA, T; P34977, AG2T_RAT , T; DR P35414, APJ_HUMAN , T; O97666, APJ_MACMU , T; Q9WV08, APJ_MOUSE , T; DR Q90352, AVT_CATCO , T; Q9TT96, B1AR_BOVIN, T; P79148, B1AR_CANFA, T; DR Q9TST6, B1AR_FELCA, T; P08588, B1AR_HUMAN, T; P47899, B1AR_MACMU, T; DR P07700, B1AR_MELGA, T; P34971, B1AR_MOUSE, T; Q28998, B1AR_PIG , T; DR P18090, B1AR_RAT , T; Q28927, B1AR_SHEEP, T; O42574, B1AR_XENLA, T; DR Q28044, B2AR_BOVIN, T; P54833, B2AR_CANFA, T; Q9TST5, B2AR_FELCA, T; DR P07550, B2AR_HUMAN, T; Q28509, B2AR_MACMU, T; O70431, B2AR_MERUN, T; DR P04274, B2AR_MESAU, T; P18762, B2AR_MOUSE, T; Q28997, B2AR_PIG , T; DR P10608, B2AR_RAT , T; P46626, B3AR_BOVIN, T; O02662, B3AR_CANFA, T; DR Q9XT57, B3AR_CAPHI, T; Q60483, B3AR_CAVPO, T; Q9TST4, B3AR_FELCA, T; DR P13945, B3AR_HUMAN, T; Q28524, B3AR_MACMU, T; P25962, B3AR_MOUSE, T; DR P26255, B3AR_RAT , T; Q9XT58, B3AR_SHEEP, T; P43141, B4AR_MELGA, T; DR O70526, BRB2_CAVPO, T; P30411, BRB2_HUMAN, T; P32299, BRB2_MOUSE, T; DR Q9GLX8, BRB2_PIG , T; Q28642, BRB2_RABIT, T; P25023, BRB2_RAT , T; DR P35371, BRS3_CAVPO, T; P32247, BRS3_HUMAN, T; O54798, BRS3_MOUSE, T; DR O97967, BRS3_SHEEP, T; P47751, BRS4_BOMOR, T; O88680, C3AR_CAVPO, T; DR Q16581, C3AR_HUMAN, T; O09047, C3AR_MOUSE, T; O55197, C3AR_RAT , T; DR P49238, C3X1_HUMAN, T; Q9Z0D9, C3X1_MOUSE, T; P35411, C3X1_RAT , T; DR P30992, C5AR_CANFA, T; P79175, C5AR_GORGO, T; P21730, C5AR_HUMAN, T; DR P79188, C5AR_MACMU, T; P30993, C5AR_MOUSE, T; P79240, C5AR_PANTR, T; DR P79234, C5AR_PONPY, T; Q9TUE1, C5AR_RABIT, T; P97520, C5AR_RAT , T; DR Q98894, CB1A_FUGRU, T; Q98895, CB1B_FUGRU, T; O02777, CB1R_FELCA, T; DR P21554, CB1R_HUMAN, T; P47746, CB1R_MOUSE, T; P56971, CB1R_POEGU, T; DR P20272, CB1R_RAT , T; Q9PUI7, CB1R_TARGR, T; P34972, CB2R_HUMAN, T; DR P47936, CB2R_MOUSE, T; Q9QZN9, CB2R_RAT , T; Q63931, CCKR_CAVPO, T; DR P32238, CCKR_HUMAN, T; O08786, CCKR_MOUSE, T; O97772, CCKR_RABIT, T; DR P30551, CCKR_RAT , T; P70031, CCKR_XENLA, T; P49682, CCR3_HUMAN, T; DR O88410, CCR3_MOUSE, T; P25930, CCR4_BOVIN, T; O62747, CCR4_CERTO, T; DR P56498, CCR4_FELCA, T; P30991, CCR4_HUMAN, T; Q28474, CCR4_MACFA, T; DR P79394, CCR4_MACMU, T; P70658, CCR4_MOUSE, T; P56491, CCR4_PAPAN, T; DR O08565, CCR4_RAT , T; Q28553, CCR4_SHEEP, T; P32302, CCR5_HUMAN, T; DR Q04683, CCR5_MOUSE, T; P34997, CCR5_RAT , T; O18983, CCR6_CERAE, T; DR O00574, CCR6_HUMAN, T; Q9XT45, CCR6_MACMU, T; O19024, CCR6_MACNE, T; DR P32246, CKR1_HUMAN, T; P56482, CKR1_MACMU, T; P51675, CKR1_MOUSE, T; DR P41597, CKR2_HUMAN, T; O18793, CKR2_MACMU, T; P51683, CKR2_MOUSE, T; DR O55193, CKR2_RAT , T; Q9Z2I3, CKR3_CAVPO, T; P56492, CKR3_CERAE, T; DR P51677, CKR3_HUMAN, T; P56483, CKR3_MACMU, T; P51678, CKR3_MOUSE, T; DR O54814, CKR3_RAT , T; P51679, CKR4_HUMAN, T; P51680, CKR4_MOUSE, T; DR P56493, CKR5_CERAE, T; O62743, CKR5_CERTO, T; P56439, CKR5_GORGO, T; DR P51681, CKR5_HUMAN, T; O97883, CKR5_HYLLE, T; P79436, CKR5_MACMU, T; DR P51682, CKR5_MOUSE, T; P56440, CKR5_PANTR, T; P56441, CKR5_PAPHA, T; DR O97881, CKR5_PONPY, T; O97880, CKR5_PYGBI, T; O97882, CKR5_PYGNE, T; DR O08556, CKR5_RAT , T; O97878, CKR5_TRAFR, T; O97879, CKR5_TRAPH, T; DR P51684, CKR6_HUMAN, T; O54689, CKR6_MOUSE, T; P32248, CKR7_HUMAN, T; DR P47774, CKR7_MOUSE, T; P51685, CKR8_HUMAN, T; O97665, CKR8_MACMU, T; DR P56484, CKR8_MOUSE, T; P51686, CKR9_HUMAN, T; Q9WUT7, CKR9_MOUSE, T; DR P46092, CKRA_HUMAN, T; Q9JL21, CKRA_MOUSE, T; P35350, CKRB_BOVIN, T; DR Q9NPB9, CKRB_HUMAN, T; P51676, CKRV_MOUSE, T; Q99788, CML1_HUMAN, T; DR P97468, CML1_MOUSE, T; O35786, CML1_RAT , T; Q99527, CML2_HUMAN, T; DR O08878, CML2_RAT , T; P46094, CXC1_HUMAN, T; Q9R0M1, CXC1_MOUSE, T; DR P35406, D1DR_CARAU, T; P53452, D1DR_FUGRU, T; P47800, D1DR_OREMO, T; DR P24628, D2D1_XENLA, T; P34973, D2D2_XENLA, T; P20288, D2DR_BOVIN, T; DR P52702, D2DR_CERAE, T; P53453, D2DR_FUGRU, T; P14416, D2DR_HUMAN, T; DR P13953, D2DR_MOUSE, T; P52703, D3DR_CERAE, T; P35462, D3DR_HUMAN, T; DR P30728, D3DR_MOUSE, T; P19020, D3DR_RAT , T; P21917, D4DR_HUMAN, T; DR P51436, D4DR_MOUSE, T; P30729, D4DR_RAT , T; P53454, D5DR_FUGRU, T; DR P42288, DADR_DIDMA, T; P21728, DADR_HUMAN, T; O77680, DADR_MACMU, T; DR P50130, DADR_PIG , T; O02664, DADR_RABIT, T; P18901, DADR_RAT , T; DR P42289, DADR_XENLA, T; P21918, DBDR_HUMAN, T; P25115, DBDR_RAT , T; DR P42290, DBDR_XENLA, T; P42291, DCDR_XENLA, T; P41596, DOP1_DROME, T; DR Q24563, DOP2_DROME, T; P32249, EBI2_HUMAN, T; P21453, EDG1_HUMAN, T; DR O08530, EDG1_MOUSE, T; P48303, EDG1_RAT , T; Q28031, EDG2_BOVIN, T; DR Q92633, EDG2_HUMAN, T; Q61130, EDG2_MOUSE, T; P46628, EDG2_SHEEP, T; DR Q99500, EDG3_HUMAN, T; P21450, ET1R_BOVIN, T; P25101, ET1R_HUMAN, T; DR Q61614, ET1R_MOUSE, T; Q29010, ET1R_PIG , T; P26684, ET1R_RAT , T; DR P28088, ETBR_BOVIN, T; P56497, ETBR_CANFA, T; Q90328, ETBR_COTJA, T; DR O62709, ETBR_HORSE, T; P24530, ETBR_HUMAN, T; Q28468, ETBR_MACFA, T; DR P48302, ETBR_MOUSE, T; P35463, ETBR_PIG , T; P21451, ETBR_RAT , T; DR P79177, FML1_GORGO, T; P25090, FML1_HUMAN, T; P79190, FML1_MACMU, T; DR O08790, FML1_MOUSE, T; P79242, FML1_PANTR, T; P79236, FML1_PONPY, T; DR P79178, FML2_GORGO, T; P25089, FML2_HUMAN, T; P79191, FML2_MACMU, T; DR P79243, FML2_PANTR, T; P79237, FML2_PONPY, T; P79176, FMLR_GORGO, T; DR P21462, FMLR_HUMAN, T; P33766, FMLR_MOUSE, T; Q05394, FMLR_RABIT, T; DR P35376, FSHR_BOVIN, T; P79763, FSHR_CHICK, T; Q95179, FSHR_EQUAS, T; DR P47799, FSHR_HORSE, T; P23945, FSHR_HUMAN, T; P32212, FSHR_MACFA, T; DR P35378, FSHR_MOUSE, T; P49059, FSHR_PIG , T; P20395, FSHR_RAT , T; DR P35379, FSHR_SHEEP, T; P47211, GALR_HUMAN, T; P56479, GALR_MOUSE, T; DR Q62805, GALR_RAT , T; O43603, GALS_HUMAN, T; O88854, GALS_MOUSE, T; DR O08726, GALS_RAT , T; O60755, GALT_HUMAN, T; O88853, GALT_MOUSE, T; DR O88626, GALT_RAT , T; P79266, GASR_BOVIN, T; P30552, GASR_CANFA, T; DR P32239, GASR_HUMAN, T; P56481, GASR_MOUSE, T; P30796, GASR_PRANA, T; DR P46627, GASR_RABIT, T; P30553, GASR_RAT , T; Q92847, GHSR_HUMAN, T; DR Q95254, GHSR_PIG , T; O08725, GHSR_RAT , T; P35409, GLHR_ANTEL, T; DR O43193, GP38_HUMAN, T; O43194, GP39_HUMAN, T; O14843, GP41_HUMAN, T; DR O15529, GP42_HUMAN, T; O15552, GP43_HUMAN, T; Q9Y5Y4, GP44_HUMAN, T; DR Q9Z2J6, GP44_MOUSE, T; Q9Y2T5, GP52_HUMAN, T; Q15743, GP68_HUMAN, T; DR Q9TTQ9, GP72_CANFA, T; Q9NYM4, GP72_HUMAN, T; P30731, GP72_MOUSE, T; DR P46090, GPR1_RAT , T; P46089, GPR3_HUMAN, T; P35413, GPR3_MOUSE, T; DR P46093, GPR4_HUMAN, T; P50132, GPR4_PIG , T; P46095, GPR6_HUMAN, T; DR P51651, GPR6_RAT , T; P48145, GPR7_HUMAN, T; P49681, GPR7_MOUSE, T; DR P48146, GPR8_HUMAN, T; P49683, GPRA_HUMAN, T; Q64121, GPRA_RAT , T; DR P47775, GPRC_HUMAN, T; P35412, GPRC_MOUSE, T; P30951, GPRC_RAT , T; DR O18982, GPRF_CERAE, T; P49685, GPRF_HUMAN, T; O97663, GPRF_MACMU, T; DR P56412, GPRF_MACNE, T; Q13304, GPRH_HUMAN, T; Q14330, GPRI_HUMAN, T; DR Q99678, GPRK_HUMAN, T; Q99679, GPRL_HUMAN, T; O00155, GPRP_HUMAN, T; DR O00270, GPRV_HUMAN, T; O75388, GPRW_HUMAN, T; Q91178, GPRX_ORYLA, T; DR Q9UPC5, GPRY_HUMAN, T; Q9R1K6, GPRY_MOUSE, T; Q9HC97, GPRZ_HUMAN, T; DR Q9ES90, GPRZ_MOUSE, T; Q93126, GRE1_BALAM, T; Q93127, GRE2_BALAM, T; DR P32236, GRHR_BOVIN, T; O42329, GRHR_CLAGA, T; O18821, GRHR_HORSE, T; DR P30968, GRHR_HUMAN, T; Q01776, GRHR_MOUSE, T; P49922, GRHR_PIG , T; DR P30969, GRHR_RAT , T; P32237, GRHR_SHEEP, T; P30550, GRPR_HUMAN, T; DR P21729, GRPR_MOUSE, T; P52500, GRPR_RAT , T; P35894, GU01_RAT , T; DR P35895, GU03_RAT , T; P34987, GU27_RAT , T; P35897, GU38_RAT , T; DR P35898, GU45_RAT , T; P35899, GU58_RAT , T; O14626, H963_HUMAN, T; DR P30546, HH1R_BOVIN, T; P31389, HH1R_CAVPO, T; P35367, HH1R_HUMAN, T; DR P70174, HH1R_MOUSE, T; P31390, HH1R_RAT , T; P17124, HH2R_CANFA, T; DR P47747, HH2R_CAVPO, T; P25021, HH2R_HUMAN, T; P97292, HH2R_MOUSE, T; DR P25102, HH2R_RAT , T; Q9JI35, HH3R_CAVPO, T; Q9Y5N1, HH3R_HUMAN, T; DR P58406, HH3R_MOUSE, T; Q9QYN8, HH3R_RAT , T; Q9H3N8, HH4R_HUMAN, T; DR P49019, HM74_HUMAN, T; P55919, IL8A_GORGO, T; P25024, IL8A_HUMAN, T; DR P55920, IL8A_PANTR, T; P21109, IL8A_RABIT, T; P70612, IL8A_RAT , T; DR Q28003, IL8B_BOVIN, T; O97571, IL8B_CANFA, T; Q28422, IL8B_GORGO, T; DR P25025, IL8B_HUMAN, T; Q28519, IL8B_MACMU, T; P35343, IL8B_MOUSE, T; DR Q28807, IL8B_PANTR, T; P35344, IL8B_RABIT, T; P35407, IL8B_RAT , T; DR Q90334, ITR_CATCO , T; Q28005, LSHR_BOVIN, T; O02721, LSHR_CALJA, T; DR Q90674, LSHR_CHICK, T; P22888, LSHR_HUMAN, T; P30730, LSHR_MOUSE, T; DR P16582, LSHR_PIG , T; P16235, LSHR_RAT , T; Q28585, LSHR_SHEEP, T; DR P04201, MAS_HUMAN , T; P30554, MAS_MOUSE , T; P12526, MAS_RAT , T; DR P41968, MC3R_HUMAN, T; P33033, MC3R_MOUSE, T; P32244, MC3R_RAT , T; DR Q9GLJ8, MC4R_BOVIN, T; P32245, MC4R_HUMAN, T; P56450, MC4R_MOUSE, T; DR O97504, MC4R_PIG , T; P70596, MC4R_RAT , T; P56451, MC5R_BOVIN, T; DR P33032, MC5R_HUMAN, T; P41149, MC5R_MOUSE, T; Q9TT23, MC5R_PANTR, T; DR Q9MZV8, MC5R_PIG , T; P35345, MC5R_RAT , T; P41983, MC5R_SHEEP, T; DR O02769, ML1A_BOVIN, T; P49285, ML1A_CHICK, T; P48039, ML1A_HUMAN, T; DR Q61184, ML1A_MOUSE, T; P49217, ML1A_PHOSU, T; O02781, ML1A_PIG , T; DR P48040, ML1A_SHEEP, T; P51050, ML1B_CHICK, T; P49286, ML1B_HUMAN, T; DR P49288, ML1C_CHICK, T; P49219, ML1C_XENLA, T; Q13585, ML1X_HUMAN, T; DR O88495, ML1X_MOUSE, T; Q28558, ML1X_SHEEP, T; P35410, MRG_HUMAN , T; DR P56442, MSHR_ALCAA, T; P47798, MSHR_BOVIN, T; O77616, MSHR_CANFA, T; DR P56443, MSHR_CAPCA, T; P56444, MSHR_CAPHI, T; P56445, MSHR_CEREL, T; DR P55167, MSHR_CHICK, T; P56446, MSHR_DAMDA, T; P79166, MSHR_HORSE, T; DR Q01726, MSHR_HUMAN, T; Q01727, MSHR_MOUSE, T; P56447, MSHR_OVIMO, T; DR Q9TUK4, MSHR_PANTR, T; Q9TU05, MSHR_PIG , T; P56448, MSHR_RANTA, T; DR O19037, MSHR_SHEEP, T; Q29154, MSHR_VULVU, T; Q90252, MTR_BUFMA , T; DR Q9GZQ6, NFF1_HUMAN, T; Q9EP86, NFF1_RAT , T; Q9Y5X5, NFF2_HUMAN, T; DR Q9EQD2, NFF2_RAT , T; P30547, NK1R_CAVPO, T; P25103, NK1R_HUMAN, T; DR P30548, NK1R_MOUSE, T; Q98982, NK1R_RANCA, T; P14600, NK1R_RAT , T; DR P05363, NK2R_BOVIN, T; Q64077, NK2R_CAVPO, T; P21452, NK2R_HUMAN, T; DR P51144, NK2R_MESAU, T; P30549, NK2R_MOUSE, T; P79218, NK2R_RABIT, T; DR P16610, NK2R_RAT , T; P29371, NK3R_HUMAN, T; P47937, NK3R_MOUSE, T; DR O97512, NK3R_RABIT, T; P16177, NK3R_RAT , T; P30098, NK4R_HUMAN, T; DR P28336, NMBR_HUMAN, T; O54799, NMBR_MOUSE, T; P24053, NMBR_RAT , T; DR P30989, NTR1_HUMAN, T; O88319, NTR1_MOUSE, T; P20789, NTR1_RAT , T; DR O95665, NTR2_HUMAN, T; P70310, NTR2_MOUSE, T; Q63384, NTR2_RAT , T; DR O02813, NY1R_CANFA, T; Q9WVD0, NY1R_CAVPO, T; P25929, NY1R_HUMAN, T; DR Q04573, NY1R_MOUSE, T; O02835, NY1R_PIG , T; P21555, NY1R_RAT , T; DR P34992, NY1R_XENLA, T; P79113, NY2R_BOVIN, T; Q9Z2D5, NY2R_CAVPO, T; DR Q9DDN6, NY2R_CHICK, T; P49146, NY2R_HUMAN, T; Q9GK74, NY2R_MACMU, T; DR P97295, NY2R_MOUSE, T; O02836, NY2R_PIG , T; P79211, NY2R_SHEEP, T; DR P50391, NY4R_HUMAN, T; Q61041, NY4R_MOUSE, T; Q63447, NY4R_RAT , T; DR Q61212, NY6R_MOUSE, T; P79217, NY6R_RABIT, T; P25931, NYR_DROME , T; DR Q15619, O1C1_HUMAN, T; P34982, O1D2_HUMAN, T; P47884, O1D4_HUMAN, T; DR P30953, O1E1_HUMAN, T; P47887, O1E2_HUMAN, T; Q9UM60, O1E5_HUMAN, T; DR O60431, O1I1_HUMAN, T; Q15612, O1Q1_HUMAN, T; Q9GZK3, O2B2_HUMAN, T; DR O76000, O2B3_HUMAN, T; P58173, O2B6_HUMAN, T; O95371, O2C1_HUMAN, T; DR Q13607, O2F1_HUMAN, T; O95006, O2F2_HUMAN, T; Q9GZK4, O2H1_HUMAN, T; DR O95918, O2H2_HUMAN, T; Q15062, O2H3_HUMAN, T; O76002, O2J2_HUMAN, T; DR O76001, O2J3_HUMAN, T; Q9NQN1, O2S2_HUMAN, T; O43869, O2T1_HUMAN, T; DR Q9Y3N9, O2W1_HUMAN, T; P47883, O3A4_HUMAN, T; Q15615, O4D1_HUMAN, T; DR O95013, O4F3_HUMAN, T; Q9UP62, O5D4_HUMAN, T; Q13606, O5I1_HUMAN, T; DR Q9UGF5, O5U1_HUMAN, T; Q9UGF6, O5V1_HUMAN, T; O95007, O6B1_HUMAN, T; DR Q15622, O7A5_HUMAN, T; O76100, O7AA_HUMAN, T; O14581, O7AH_HUMAN, T; DR O76099, O7C1_HUMAN, T; O60412, O7C2_HUMAN, T; Q15620, O8B8_HUMAN, T; DR Q9H209, OAA4_HUMAN, T; Q9H207, OAA5_HUMAN, T; P30954, OAJ1_HUMAN, T; DR Q25321, OAR1_LOCMI, T; O77408, OAR1_LYMST, T; Q25322, OAR2_LOCMI, T; DR Q17232, OAR_BOMMO , T; P22270, OAR_DROME , T; Q25188, OAR_HELVI , T; DR Q9GZK7, OBA1_HUMAN, T; Q9UGF7, OCD3_HUMAN, T; P34984, OL13_MOUSE, T; DR P23275, OL15_MOUSE, T; P47892, OL1L_HUMAN, T; P34983, OL7B_MOUSE, T; DR P23269, OLF0_RAT , T; Q95154, OLF1_CANFA, T; P37067, OLF1_CHICK, T; DR P23274, OLF1_RAT , T; Q95155, OLF2_CANFA, T; P37068, OLF2_CHICK, T; DR Q95156, OLF3_CANFA, T; P37069, OLF3_CHICK, T; P23265, OLF3_RAT , T; DR Q95157, OLF4_CANFA, T; P37070, OLF4_CHICK, T; P23273, OLF4_RAT , T; DR P37071, OLF5_CHICK, T; P23266, OLF5_RAT , T; P34986, OLF6_MOUSE, T; DR P23267, OLF6_RAT , T; P23270, OLF7_RAT , T; P23271, OLF8_RAT , T; DR P23272, OLF9_RAT , T; P30955, OLFD_CANFA, T; Q9H1Y3, OPN3_HUMAN, T; DR Q9WUK7, OPN3_MOUSE, T; Q9UHM6, OPN4_HUMAN, T; Q9QXZ9, OPN4_MOUSE, T; DR P41143, OPRD_HUMAN, T; P32300, OPRD_MOUSE, T; P79291, OPRD_PIG , T; DR P33533, OPRD_RAT , T; P41144, OPRK_CAVPO, T; P41145, OPRK_HUMAN, T; DR P33534, OPRK_MOUSE, T; P34975, OPRK_RAT , T; P79350, OPRM_BOVIN, T; DR P97266, OPRM_CAVPO, T; P35372, OPRM_HUMAN, T; P42866, OPRM_MOUSE, T; DR Q95247, OPRM_PIG , T; P33535, OPRM_RAT , T; P47748, OPRX_CAVPO, T; DR P41146, OPRX_HUMAN, T; P35377, OPRX_MOUSE, T; P79292, OPRX_PIG , T; DR P35370, OPRX_RAT , T; P22269, OPS1_CALVI, T; P06002, OPS1_DROME, T; DR P28678, OPS1_DROPS, T; Q25157, OPS1_HEMSA, T; P35360, OPS1_LIMPO, T; DR O15973, OPS1_PATYE, T; Q94741, OPS1_SCHGR, T; P08099, OPS2_DROME, T; DR P28679, OPS2_DROPS, T; Q25158, OPS2_HEMSA, T; P35361, OPS2_LIMPO, T; DR Q26495, OPS2_SCHGR, T; P04950, OPS3_DROME, T; P28680, OPS3_DROPS, T; DR P08255, OPS4_DROME, T; P29404, OPS4_DROPS, T; P91657, OPS5_DROME, T; DR O01668, OPS6_DROME, T; P51471, OPSB_ANOCA, T; P51472, OPSB_ASTFA, T; DR P51490, OPSB_BOVIN, T; P32310, OPSB_CARAU, T; P28682, OPSB_CHICK, T; DR O13227, OPSB_CONCO, T; P35357, OPSB_GECGE, T; P03999, OPSB_HUMAN, T; DR P51491, OPSB_MOUSE, T; P87365, OPSB_ORYLA, T; Q63652, OPSB_RAT , T; DR O13092, OPSB_SAIBB, T; O42294, OPSD_ABYKO, T; P52202, OPSD_ALLMI, T; DR Q90245, OPSD_AMBTI, T; Q90214, OPSD_ANGAN, T; P41591, OPSD_ANOCA, T; DR Q17053, OPSD_APIME, T; Q9YGZ1, OPSD_ATHBO, T; O42300, OPSD_BATMU, T; DR O42301, OPSD_BATNI, T; P02699, OPSD_BOVIN, T; P56514, OPSD_BUFBU, T; DR P56515, OPSD_BUFMA, T; Q17292, OPSD_CAMAB, T; O18312, OPSD_CAMHU, T; DR O16017, OPSD_CAMLU, T; O18315, OPSD_CAMMA, T; O16018, OPSD_CAMSC, T; DR P32308, OPSD_CANFA, T; P32309, OPSD_CARAU, T; Q17296, OPSD_CATBO, T; DR Q9YGZ8, OPSD_CHELB, T; P22328, OPSD_CHICK, T; O42327, OPSD_COMDY, T; DR O42307, OPSD_COTBO, T; O42328, OPSD_COTGR, T; O42330, OPSD_COTIN, T; DR Q90373, OPSD_COTKE, T; P28681, OPSD_CRIGR, T; P51488, OPSD_CYPCA, T; DR O62791, OPSD_DELDE, T; Q9YGZ4, OPSD_DICLA, T; Q9YH05, OPSD_DIPAN, T; DR Q9YH04, OPSD_DIPVU, T; O93441, OPSD_GALML, T; P79756, OPSD_GAMAF, T; DR O62792, OPSD_GLOME, T; Q9YGZ2, OPSD_GOBNI, T; P08100, OPSD_HUMAN, T; DR O42268, OPSD_ICTPU, T; P22671, OPSD_LAMJA, T; O42427, OPSD_LIMBE, T; DR Q9YH00, OPSD_LITMO, T; Q9YGZ6, OPSD_LIZAU, T; Q9YGZ7, OPSD_LIZSA, T; DR P24603, OPSD_LOLFO, T; Q17094, OPSD_LOLSU, T; Q28886, OPSD_MACFA, T; DR O62793, OPSD_MESBI, T; P15409, OPSD_MOUSE, T; Q9YGZ9, OPSD_MUGCE, T; DR Q9YH01, OPSD_MULSU, T; P79798, OPSD_MYRBE, T; P79807, OPSD_MYRVI, T; DR P79808, OPSD_NEOAR, T; P79809, OPSD_NEOAU, T; P79812, OPSD_NEOSA, T; DR P09241, OPSD_OCTDO, T; O18481, OPSD_ORCAU, T; O16019, OPSD_ORCVI, T; DR P87369, OPSD_ORYLA, T; O42452, OPSD_PARKN, T; Q98980, OPSD_PETMA, T; DR O62795, OPSD_PHOGR, T; O62794, OPSD_PHOVI, T; O18766, OPSD_PIG , T; DR P79848, OPSD_POERE, T; P35403, OPSD_POMMI, T; P35356, OPSD_PROCL, T; DR O42451, OPSD_PROJE, T; O16020, OPSD_PROML, T; O18485, OPSD_PROOR, T; DR O18486, OPSD_PROSE, T; P49912, OPSD_RABIT, T; P79863, OPSD_RAJER, T; DR P51470, OPSD_RANCA, T; P31355, OPSD_RANPI, T; P56516, OPSD_RANTE, T; DR P51489, OPSD_RAT , T; Q9YGZ3, OPSD_SALPV, T; P79898, OPSD_SARDI, T; DR P79901, OPSD_SARMI, T; Q9YGZ0, OPSD_SARPI, T; P79902, OPSD_SARPU, T; DR Q9YH03, OPSD_SARSL, T; P79903, OPSD_SARSP, T; P79911, OPSD_SARTI, T; DR P79914, OPSD_SARXA, T; O93459, OPSD_SCYCA, T; O16005, OPSD_SEPOF, T; DR P02700, OPSD_SHEEP, T; Q9YGZ5, OPSD_SOLSO, T; Q9YH02, OPSD_SPAAU, T; DR P35362, OPSD_SPHSP, T; O42466, OPSD_TAUBU, T; Q9DGG4, OPSD_TETNG, T; DR P31356, OPSD_TODPA, T; O62796, OPSD_TRIMA, T; O62798, OPSD_TURTR, T; DR P29403, OPSD_XENLA, T; O42604, OPSD_ZEUFA, T; Q9YGY9, OPSD_ZOSOP, T; DR Q90215, OPSF_ANGAN, T; P22330, OPSG_ASTFA, T; P32311, OPSG_CARAU, T; DR Q9R024, OPSG_CAVPO, T; P28683, OPSG_CHICK, T; P35358, OPSG_GECGE, T; DR P04001, OPSG_HUMAN, T; O35599, OPSG_MOUSE, T; O18911, OPSG_ODOVI, T; DR P87366, OPSG_ORYLA, T; O18910, OPSG_RABIT, T; O35476, OPSG_RAT , T; DR O35478, OPSG_SCICA, T; P22331, OPSH_ASTFA, T; P32312, OPSH_CARAU, T; DR P51474, OPSI_ASTFA, T; P34989, OPSL_CALJA, T; O13018, OPSO_SALSA, T; DR P51475, OPSP_CHICK, T; P51476, OPSP_COLLI, T; O42266, OPSP_ICTPU, T; DR O42490, OPSP_PETMA, T; P41592, OPSR_ANOCA, T; P22332, OPSR_ASTFA, T; DR O18914, OPSR_CANFA, T; Q95170, OPSR_CAPHI, T; P32313, OPSR_CARAU, T; DR P22329, OPSR_CHICK, T; O18913, OPSR_FELCA, T; O18912, OPSR_HORSE, T; DR P04000, OPSR_HUMAN, T; P87367, OPSR_ORYLA, T; O12948, OPSR_XENLA, T; DR P35359, OPSU_BRARE, T; Q90309, OPSU_CARAU, T; O61303, OPSV_APIME, T; DR P28684, OPSV_CHICK, T; P87368, OPSV_ORYLA, T; P51473, OPSV_XENLA, T; DR O14718, OPSX_HUMAN, T; O35214, OPSX_MOUSE, T; O43613, OX1R_HUMAN, T; DR P58307, OX1R_MOUSE, T; O97661, OX1R_PIG , T; P56718, OX1R_RAT , T; DR Q9TUP7, OX2R_CANFA, T; O43614, OX2R_HUMAN, T; P58308, OX2R_MOUSE, T; DR O62809, OX2R_PIG , T; P56719, OX2R_RAT , T; Q9Y5P1, OXB2_HUMAN, T; DR Q9H255, OXE2_HUMAN, T; O88628, OXE2_RAT , T; Q9H343, OXI1_HUMAN, T; DR Q9H344, OXI2_HUMAN, T; P56449, OXYR_BOVIN, T; P30559, OXYR_HUMAN, T; DR P56494, OXYR_MACMU, T; P97926, OXYR_MOUSE, T; P32306, OXYR_PIG , T; DR P70536, OXYR_RAT , T; Q28756, OXYR_SHEEP, T; Q9UKL2, OYA1_HUMAN, T; DR Q9H346, OYD1_HUMAN, T; P41231, P2UR_HUMAN, T; P35383, P2UR_MOUSE, T; DR P41232, P2UR_RAT , T; P51582, P2Y4_HUMAN, T; P32250, P2Y5_CHICK, T; DR P43657, P2Y5_HUMAN, T; Q15722, P2Y7_HUMAN, T; P79928, P2Y8_XENLA, T; DR Q99677, P2Y9_HUMAN, T; P48042, P2YR_BOVIN, T; P34996, P2YR_CHICK, T; DR P47900, P2YR_HUMAN, T; P49652, P2YR_MELGA, T; P49650, P2YR_MOUSE, T; DR P49651, P2YR_RAT , T; P21556, PAFR_CAVPO, T; P25105, PAFR_HUMAN, T; DR P35366, PAFR_MACMU, T; Q62035, PAFR_MOUSE, T; P46002, PAFR_RAT , T; DR O00254, PAR3_HUMAN, T; P34995, PE21_HUMAN, T; P35375, PE21_MOUSE, T; DR P70597, PE21_RAT , T; Q9XT82, PE22_CANFA, T; P43116, PE22_HUMAN, T; DR Q62053, PE22_MOUSE, T; Q62928, PE22_RAT , T; P35408, PE24_HUMAN, T; DR P32240, PE24_MOUSE, T; Q28691, PE24_RABIT, T; P43114, PE24_RAT , T; DR P37289, PF2R_BOVIN, T; P43088, PF2R_HUMAN, T; P43117, PF2R_MOUSE, T; DR P43118, PF2R_RAT , T; Q28905, PF2R_SHEEP, T; P79393, PI2R_BOVIN, T; DR P43119, PI2R_HUMAN, T; P43252, PI2R_MOUSE, T; P43253, PI2R_RAT , T; DR P11613, RDC1_CANFA, T; P25106, RDC1_HUMAN, T; P56485, RDC1_MOUSE, T; DR O89039, RDC1_RAT , T; P23749, RTA_RAT , T; P30872, SSR1_HUMAN, T; DR P30873, SSR1_MOUSE, T; P28646, SSR1_RAT , T; P34993, SSR2_BOVIN, T; DR P30874, SSR2_HUMAN, T; P30875, SSR2_MOUSE, T; P34994, SSR2_PIG , T; DR P30680, SSR2_RAT , T; P32745, SSR3_HUMAN, T; P30935, SSR3_MOUSE, T; DR P30936, SSR3_RAT , T; P31391, SSR4_HUMAN, T; P49660, SSR4_MOUSE, T; DR P30937, SSR4_RAT , T; P35346, SSR5_HUMAN, T; O08858, SSR5_MOUSE, T; DR P30938, SSR5_RAT , T; O42179, SSRL_FUGRU, T; Q95125, TA2R_BOVIN, T; DR P56486, TA2R_CERAE, T; P21731, TA2R_HUMAN, T; P30987, TA2R_MOUSE, T; DR P34978, TA2R_RAT , T; Q61038, TDA8_MOUSE, T; Q00991, THRR_CRILO, T; DR P25116, THRR_HUMAN, T; P30558, THRR_MOUSE, T; P56488, THRR_PAPHA, T; DR P26824, THRR_RAT , T; P47749, THRR_XENLA, T; P30974, TLR1_DROME, T; DR P30975, TLR2_DROME, T; O46639, TRFR_BOVIN, T; O93603, TRFR_CHICK, T; DR P34981, TRFR_HUMAN, T; P21761, TRFR_MOUSE, T; Q01717, TRFR_RAT , T; DR Q28596, TRFR_SHEEP, T; Q27987, TSHR_BOVIN, T; P14763, TSHR_CANFA, T; DR P16473, TSHR_HUMAN, T; P47750, TSHR_MOUSE, T; P21463, TSHR_RAT , T; DR P56495, TSHR_SHEEP, T; P16849, UL33_HCMVA, T; Q83207, UL33_MCMVS, T; DR O12000, UL33_RCMVM, T; Q9UKP6, UR2R_HUMAN, T; P49684, UR2R_RAT , T; DR P09703, US27_HCMVA, T; P09704, US28_HCMVA, T; P37288, V1AR_HUMAN, T; DR Q62463, V1AR_MOUSE, T; P30560, V1AR_RAT , T; P48043, V1AR_SHEEP, T; DR P47901, V1BR_HUMAN, T; Q9WU02, V1BR_MOUSE, T; P48974, V1BR_RAT , T; DR Q9J529, V206_FOWPV, T; P48044, V2R_BOVIN , T; P30518, V2R_HUMAN , T; DR P32307, V2R_PIG , T; Q00788, V2R_RAT , T; P32229, VC03_SPVKA, T; DR Q08520, VK02_SPVKA, T; Q19084, YDBM_CAEEL, T; P34311, YKR5_CAEEL, T; DR Q03566, YLD1_CAEEL, T; Q09502, YQH2_CAEEL, T; O02213, YQNJ_CAEEL, T; DR Q09638, YR13_CAEEL, T; Q09561, YR42_CAEEL, T; Q11082, YT66_CAEEL, T; DR Q18007, YTJ5_CAEEL, T; Q10904, YWO1_CAEEL, T; Q18179, YXX5_CAEEL, T; DR Q18904, YYO1_CAEEL, T; DR P79250, 5H1B_CANFA, P; P97267, 5H2B_CAVPO, P; Q29006, 5H4_PIG , P; DR Q60612, AA1R_MOUSE, P; Q61618, AA3R_MOUSE, P; P41985, ACM2_BOVIN, P; DR P41986, ACM4_BOVIN, P; O70430, B1AR_MERUN, P; O70432, B3AR_MERUN, P; DR Q95252, B3AR_PIG , P; Q95136, DADR_BOVIN, P; Q61616, DADR_MOUSE, P; DR Q95137, DBDR_BOVIN, P; Q95195, DBDR_MACMU, P; P52592, EDGL_MOUSE, P; DR P51046, ML11_BRARE, P; P51047, ML12_BRARE, P; P51049, ML13_BRARE, P; DR Q90456, ML14_BRARE, P; P49218, ML1A_RAT , P; P51048, ML1A_XENLA, P; DR P49287, ML1B_RAT , P; P51051, ML1B_XENLA, P; P51052, ML1C_BRARE, P; DR Q62953, ML1X_RAT , P; Q28602, NY1R_SHEEP, P; P14803, OAR_PHOPY , P; DR Q60883, OL10_MOUSE, P; Q60890, OL11_MOUSE, P; Q60894, OL12_MOUSE, P; DR P34985, OL7A_MOUSE, P; Q60882, OL7C_MOUSE, P; Q60884, OL7D_MOUSE, P; DR Q60886, OL7E_MOUSE, P; Q60887, OL7F_MOUSE, P; Q60888, OL7G_MOUSE, P; DR Q60893, OL7H_MOUSE, P; Q60895, OL7I_MOUSE, P; Q60891, OLF1_MOUSE, P; DR Q60879, OLF3_MOUSE, P; Q60889, OLF5_MOUSE, P; Q98913, OLF8_CHICK, P; DR Q60892, OLF8_MOUSE, P; Q98914, OLF9_CHICK, P; Q60885, OLF9_MOUSE, P; DR P17645, OPS3_DROVI, P; Q90305, OPSD_CORAU, P; P49220, UR2R_BOVIN, P; DR P79399, 5H1B_PIG , N; Q16950, 5HT1_APLCA, N; Q16951, 5HT2_APLCA, N; DR O77700, A2AB_BOVIN, N; P30544, ACM4_XENLA, N; P46663, BRB1_HUMAN, N; DR Q61125, BRB1_MOUSE, N; P48748, BRB1_RABIT, N; P97583, BRB1_RAT , N; DR O70129, C5AR_CAVPO, N; O00590, CKD6_HUMAN, N; O08707, CKD6_MOUSE, N; DR O09027, CKD6_RAT , N; O73810, D2DR_MELGA, N; O60883, EBP2_HUMAN, N; DR P32940, ET3R_XENLA, N; O15354, GP37_HUMAN, N; O14842, GP40_HUMAN, N; DR Q9I919, GP85_BRARE, N; Q9NPD1, GP85_HUMAN, N; P46023, GPCR_LYMST, N; DR P46091, GPR1_HUMAN, N; O97664, GPR1_MACMU, N; Q15760, GPRJ_HUMAN, N; DR Q61121, GPRJ_MOUSE, N; P70585, GPRJ_RAT , N; Q99680, GPRM_HUMAN, N; DR Q99705, GPRO_HUMAN, N; P97639, GPRO_RAT , N; Q9NS67, GPRS_HUMAN, N; DR O54897, GPRS_MOUSE, N; Q9JJH3, GPRS_RAT , N; O88416, GPRX_MOUSE, N; DR P35896, GU33_RAT , N; P47752, H218_RAT , N; Q15391, KI01_HUMAN, N; DR O35881, KI01_RAT , N; O62729, NY5R_CANFA, N; Q15761, NY5R_HUMAN, N; DR O70342, NY5R_MOUSE, N; O97969, NY5R_PIG , N; Q63634, NY5R_RAT , N; DR Q9P1Q5, O1A1_HUMAN, N; Q9Y585, O1A2_HUMAN, N; P58170, O1D5_HUMAN, N; DR O43749, O1F1_HUMAN, N; P47890, O1G1_HUMAN, N; O95047, O2A4_HUMAN, N; DR Q9H210, O2D2_HUMAN, N; Q9H205, O2G1_HUMAN, N; P47881, O3A1_HUMAN, N; DR P47893, O3A2_HUMAN, N; P47888, O3A3_HUMAN, N; P58180, O4D2_HUMAN, N; DR O95221, O5F1_HUMAN, N; O95222, O6A1_HUMAN, N; Q9GZM6, O8D2_HUMAN, N; DR P58181, OAA3_HUMAN, N; Q9Y4A9, OAH1_HUMAN, N; O60403, OAH2_HUMAN, N; DR O60404, OAH3_HUMAN, N; O01670, OAR2_LYMST, N; P58182, OCD2_HUMAN, N; DR P47886, OL1F_HUMAN, N; P47889, OL1I_HUMAN, N; P23268, OLF2_RAT , N; DR P37072, OLF6_CHICK, N; O15974, OPS2_PATYE, N; P17646, OPS4_DROVI, N; DR P90680, OPSB_APIME, N; P41590, OPSD_ASTFA, N; O42431, OPSD_LIMPA, N; DR Q9Y5P0, OXB4_HUMAN, N; Q98907, P2Y3_CHICK, N; O93361, P2Y3_MELGA, N; DR Q15077, P2Y6_HUMAN, N; Q63371, P2Y6_RAT , N; P55085, PAR2_HUMAN, N; DR P55086, PAR2_MOUSE, N; Q63645, PAR2_RAT , N; O08675, PAR3_MOUSE, N; DR Q13258, PD2R_HUMAN, N; P70263, PD2R_MOUSE, N; P34979, PE23_BOVIN, N; DR P43115, PE23_HUMAN, N; P30557, PE23_MOUSE, N; P50131, PE23_PIG , N; DR P46069, PE23_RABIT, N; P34980, PE23_RAT , N; Q28550, PE23_SHEEP, N; DR P23820, REIS_TODPA, N; P47803, RGR_BOVIN , N; P47804, RGR_HUMAN , N; DR Q9Z2B3, RGR_MOUSE , N; Q9I918, SRB3_BRARE, N; Q9NS66, SRB3_HUMAN, N; DR Q9JJH2, SRB3_RAT , N; P52380, UL33_HSV6U, N; P52381, UL33_HSV7J, N; DR Q9J5I0, V021_FOWPV, N; Q9J5H4, V027_FOWPV, N; Q01035, VG74_HSVSA, N; DR Q98146, VG74_KSHV , N; Q86917, VQ3L_CAPVK, N; P52382, VU51_HSV6U, N; DR P52542, VU51_HSV6Z, N; P52383, VU51_HSV7J, N; P34488, YMJC_CAEEL, N; DR Q03613, YN84_CAEEL, N; Q09965, YS96_CAEEL, N; Q09966, YS97_CAEEL, N; DR Q18775, YYI3_CAEEL, N; DR P14198, AAC4_DICDI, F; P13688, CEA1_HUMAN, F; P23892, DCLY_ECOLI, F; DR P51656, DHB1_MOUSE, F; P51657, DHB1_RAT , F; P77916, DPOL_PYRAB, F; DR P77933, DPOL_PYRKO, F; P77932, DPOL_PYRSE, F; Q51758, EST1_PSEFL, F; DR Q53547, EST2_PSEFL, F; O68824, G6PI_XANCI, F; P15828, GUX1_HUMGR, F; DR Q92839, HAS1_HUMAN, F; Q61647, HAS1_MOUSE, F; P46724, HEM1_MYCLE, F; DR P45873, HEMK_BACSU, F; P00498, HIS1_YEAST, F; P75548, HPRK_MYCPN, F; DR P22817, IDE_DROME , F; P25147, INLB_LISMO, F; P22023, KRE5_YEAST, F; DR Q55593, KTHY_SYNY3, F; Q25410, MIPR_LYMST, F; Q03834, MSH6_YEAST, F; DR P24151, NODC_RHILP, F; Q9VNB3, O83A_DROME, F; P23515, OMGP_HUMAN, F; DR Q63912, OMGP_MOUSE, F; P34724, PHOA_ASPNG, F; P37274, PHOA_PENCH, F; DR P42790, PICP_PSESR, F; Q10775, PLS1_MYCTU, F; Q42608, PME_BRACM , F; DR P41510, PME_BRANA , F; O84877, PMPE_CHLTR, F; Q9HJA4, PURL_THEAC, F; DR P21524, RIR1_YEAST, F; O15910, RIR2_TRYBB, F; P46457, SFSA_HAEIN, F; DR Q9W0K0, SP51_DROME, F; P08032, SPCA_MOUSE, F; P07751, SPCN_CHICK, F; DR Q13813, SPCN_HUMAN, F; P16086, SPCN_RAT , F; P26258, TETN_CARSP, F; DR O60779, THT1_HUMAN, F; P06882, THYG_RAT , F; Q42675, TKTA_CRAPL, F; DR Q00126, VG02_HSVI1, F; P07886, VP7_BTV10 , F; P18609, VP7_BTV17 , F; DR P26560, VP7_BTV1A , F; P18259, VP7_BTV1S , F; P26561, VP7_BTV2A , F; DR Q00274, VP7_EHDV1 , F; P47551, Y309_MYCGE, F; P55687, Y4WI_RHISN, F; DR O67284, YC39_AQUAE, F; P21503, YCAD_ECOLI, F; P76097, YDCJ_ECOLI, F; DR P45198, YE24_HAEIN, F; O59098, YE28_PYRHO, F; P34529, YM68_CAEEL, F; DR P54466, YQFA_BACSU, F; 3D 1BOJ; 1BOK; 1F88; DO PDOC00210; // ID G_PROTEIN_RECEP_F1_2; MATRIX. AC PS50262; DT DEC-2001 (CREATED); DEC-2001 (DATA UPDATE); DEC-2001 (INFO UPDATE). DE G-protein coupled receptors family 1 profile. MA /GENERAL_SPEC: ALPHABET='ABCDEFGHIKLMNPQRSTVWYZ'; LENGTH=259; MA /DISJOINT: DEFINITION=PROTECT; N1=6; N2=254; MA /NORMALIZATION: MODE=1; FUNCTION=LINEAR; R1=1.9359; R2=0.02006056; TEXT='-LogE'; MA /CUT_OFF: LEVEL=0; SCORE=327; N_SCORE=8.5; MODE=1; TEXT='!'; MA /CUT_OFF: LEVEL=-1; SCORE=227; N_SCORE=6.5; MODE=1; TEXT='?'; MA /DEFAULT: D=-20; I=-20; B1=-100; E1=-100; MI=-105; MD=-105; IM=-105; DM=-105; MM=1; M0=-10; MA /I: B1=0; BI=-105; BD=-105; MA /M: SY='G'; M=1,-11,-24,-13,-15,-19,30,-19,-20,-18,-15,-11,-5,-19,-16,-18,-2,-11,-15,-22,-20,-16; MA /M: SY='N'; M=-9,33,-19,15,-2,-18,-2,8,-18,-2,-26,-18,51,-20,-1,-2,9,1,-26,-38,-17,-2; MA /M: SY='I'; M=-1,-21,-16,-26,-21,0,-16,-22,10,-22,8,4,-17,-22,-19,-20,-8,-3,10,-21,-7,-20; MA /M: SY='L'; M=-6,-24,-17,-28,-21,8,-25,-20,14,-24,23,12,-21,-25,-20,-19,-17,-5,11,-17,0,-20; MA /M: SY='V'; M=-1,-19,-16,-23,-21,-2,-24,-14,15,-18,6,7,-16,-24,-18,-17,-7,-1,21,-26,-5,-20; MA /M: SY='I'; M=-8,-28,-16,-33,-25,6,-31,-24,26,-26,24,15,-24,-26,-21,-23,-20,-8,20,-20,-1,-24; MA /M: SY='I'; M=-6,-23,-19,-27,-21,5,-24,-16,8,-19,8,5,-20,-23,-17,-17,-15,-6,5,-2,7,-19; MA /M: SY='V'; M=4,-22,-14,-26,-21,-5,-23,-23,16,-18,7,6,-19,-22,-18,-19,-7,-1,20,-24,-8,-21; MA /M: SY='I'; M=-8,-25,-19,-30,-24,14,-29,-20,19,-24,19,11,-21,-25,-21,-20,-17,-4,16,-16,5,-23; MA /M: SY='F'; M=-3,-18,-12,-23,-18,4,-20,-16,2,-17,3,1,-14,-22,-16,-14,-7,0,3,-16,0,-17; MA /M: SY='R'; M=-9,-10,-22,-12,-6,-10,-18,-7,-15,7,-11,-5,-5,-17,-1,14,-6,-4,-11,-18,-5,-5; MA /M: SY='K'; M=-8,1,-21,-1,0,-14,-16,-1,-18,4,-17,-10,3,-12,-1,3,-1,-1,-15,-23,-5,-1; MA /M: SY='R'; M=-8,-7,-25,-7,0,-19,-16,-6,-19,11,-18,-7,-3,-5,2,13,-3,-4,-15,-23,-10,0; MA /M: SY='R'; M=-8,-7,-21,-9,-3,-16,-16,-4,-14,7,-12,-1,-3,-14,3,11,-4,-4,-11,-23,-8,-1; MA /I: I=-4; MD=-23; MA /M: SY='L'; M=-7,-17,-20,-19,-11,-2,-20,-11,1,-8,11,8,-14,-19,-8,-1,-14,-7,0,-18,-3,-10; D=-4; MA /I: I=-4; MI=0; MD=-23; IM=0; DM=-23; MA /M: SY='R'; M=-11,-5,-24,-7,-1,-18,-17,7,-20,8,-16,-6,1,-15,6,17,-5,-6,-17,-22,-6,0; MA /M: SY='T'; M=-2,3,-16,-3,-3,-17,-11,-7,-17,-1,-20,-13,9,-13,-1,0,15,16,-11,-31,-13,-2; MA /M: SY='P'; M=-1,-13,-21,-13,-8,-17,-15,-16,-8,-8,-13,-7,-10,8,-10,-10,1,2,-4,-29,-17,-11; MA /M: SY='T'; M=-3,-14,-14,-20,-15,-2,-20,-15,2,-14,-1,3,-11,-14,-13,-13,-1,7,4,-23,-5,-15; MA /M: SY='N'; M=-9,11,-22,4,-4,-10,-9,5,-15,-5,-16,-11,19,-20,-4,-4,1,-2,-18,-23,-1,-5; MA /M: SY='I'; M=-9,-24,-20,-29,-23,12,-29,-16,17,-21,14,10,-20,-24,-19,-18,-16,-6,12,-11,11,-22; MA /M: SY='F'; M=-15,-26,-22,-31,-23,34,-30,-11,9,-24,18,7,-22,-27,-23,-18,-21,-9,3,1,26,-23; MA /M: SY='L'; M=-9,-27,-20,-31,-23,4,-30,-20,25,-24,26,21,-23,-24,-17,-20,-21,-8,17,-21,-1,-21; MA /M: SY='V'; M=1,-19,-8,-24,-19,-2,-18,-20,5,-19,7,4,-17,-22,-17,-18,-7,-1,8,-23,-8,-18; MA /M: SY='N'; M=1,8,-16,-1,-6,-15,-2,0,-15,-9,-19,-13,19,-18,-5,-8,10,2,-15,-30,-14,-6; MA /M: SY='L'; M=-9,-27,-19,-29,-19,7,-29,-18,18,-25,36,18,-25,-27,-17,-17,-24,-8,10,-20,-1,-19; MA /M: SY='A'; M=29,-9,0,-16,-11,-17,-5,-18,-12,-12,-14,-12,-6,-14,-11,-18,13,5,-2,-26,-18,-11; MA /M: SY='I'; M=-4,-26,-13,-31,-25,10,-28,-24,20,-24,17,10,-22,-26,-23,-21,-15,-5,20,-20,-1,-25; MA /M: SY='A'; M=17,-11,-3,-17,-12,-10,-7,-18,-10,-15,-11,-10,-6,-16,-12,-17,10,5,-2,-25,-14,-12; MA /M: SY='D'; M=-17,42,-28,57,17,-37,-9,2,-36,-1,-28,-26,20,-11,2,-9,1,-9,-29,-38,-18,9; MA /M: SY='L'; M=-8,-27,-19,-31,-22,13,-28,-21,19,-26,27,14,-23,-26,-21,-21,-20,-7,13,-17,1,-22; MA /M: SY='L'; M=-7,-25,-10,-30,-23,9,-24,-21,11,-25,20,10,-22,-27,-22,-20,-18,-8,9,-17,-2,-22; MA /M: SY='F'; M=-5,-21,-13,-25,-20,9,-22,-14,7,-19,8,7,-17,-24,-18,-16,-11,-3,9,-17,4,-19; MA /M: SY='A'; M=2,-16,-7,-20,-17,-8,-10,-18,-1,-19,2,1,-12,-21,-15,-18,-3,-1,2,-26,-12,-16; MA /M: SY='L'; M=-4,-23,-10,-27,-21,4,-25,-22,13,-24,16,7,-19,-24,-20,-20,-11,0,13,-23,-5,-21; MA /M: SY='T'; M=-1,-15,-13,-20,-16,2,-17,-18,1,-18,2,-1,-11,-20,-16,-16,-2,5,4,-21,-4,-16; MA /M: SY='L'; M=-4,-14,-11,-18,-15,-4,-17,-14,1,-17,2,1,-11,-20,-13,-15,-6,-1,2,-23,-5,-15; MA /I: I=-4; MD=-20; MA /M: SY='I'; M=-5,-21,-19,-24,-18,2,-21,-18,12,-19,10,9,-18,-8,-16,-18,-13,-5,8,-18,-4,-18; D=-4; MA /I: I=-4; MI=0; MD=-20; IM=0; DM=-20; MA /M: SY='P'; M=-6,-16,-24,-14,-8,-8,-19,-14,-4,-13,-7,-5,-15,22,-11,-15,-8,-4,-8,-19,-11,-11; D=-4; MA /I: I=-4; DM=-20; MA /M: SY='F'; M=-8,-17,-21,-20,-14,9,-21,-13,1,-11,3,2,-13,-18,-13,-9,-12,-6,0,-6,4,-13; D=-4; MA /I: I=-4; DM=-20; MA /M: SY='M'; M=-3,-15,-18,-20,-15,1,-19,-13,2,-14,2,4,-12,-18,-12,-13,-6,-1,3,-19,-2,-14; MA /M: SY='I'; M=-1,-20,-19,-24,-18,2,-23,-18,9,-19,9,5,-17,-19,-17,-17,-12,-4,8,-14,-1,-18; MA /M: SY='V'; M=-3,-13,-20,-17,-12,-1,-20,-10,1,-13,-1,1,-11,-18,-11,-12,-5,-1,2,-16,2,-12; MA /M: SY='Y'; M=-7,-6,-22,-9,-8,-5,-16,-4,-7,-9,-9,-6,-2,-17,-7,-9,-3,-2,-8,-16,3,-9; MA /M: SY='F'; M=-4,-13,-18,-18,-14,5,-20,-12,1,-16,2,1,-10,-20,-13,-15,-7,-2,0,-15,2,-13; MA /M: SY='L'; M=-5,-17,-19,-21,-15,1,-20,-13,3,-16,5,3,-13,-21,-13,-13,-10,-4,2,-14,0,-15; MA /M: M=-5,-6,-21,-9,-6,-8,-13,-4,-11,-8,-7,-4,-4,-15,-6,-7,-3,-2,-9,-22,-4,-7; MA /M: SY='N'; M=-7,-1,-24,-2,-3,-16,1,-5,-20,-6,-17,-11,2,-13,-4,-7,-2,-8,-18,-23,-11,-5; MA /M: SY='N'; M=-8,0,-22,-1,0,-15,-13,0,-17,-3,-16,-10,2,-12,0,-1,0,-2,-15,-24,-5,-1; MA /M: SY='W'; M=-14,-26,-36,-27,-20,7,-18,-17,-13,-14,-12,-11,-24,-25,-14,-13,-23,-17,-18,65,20,-15; MA /I: I=-3; MD=-17; MA /M: M=-4,-11,-20,-12,-9,-10,-17,-11,-4,-9,-8,-4,-9,0,-8,-10,-3,0,-4,-22,-7,-10; D=-3; MA /I: I=-3; MD=-17; MA /M: SY='F'; M=-11,-20,-18,-24,-18,23,-22,-13,5,-19,9,4,-15,-22,-19,-14,-15,-7,2,-5,11,-18; D=-3; MA /I: I=-3; MI=0; MD=-17; IM=0; DM=-17; MA /M: SY='G'; M=-2,-5,-20,-6,-9,-18,16,-9,-21,-11,-20,-13,1,-9,-9,-11,4,-4,-17,-20,-15,-9; D=-3; MA /I: I=-3; DM=-17; MA /M: M=-6,-4,-21,-4,-2,-9,-14,-6,-12,-4,-11,-8,-3,-11,-3,-3,-3,-4,-11,-18,-3,-4; D=-3; MA /I: I=-3; DM=-17; MA /M: SY='Y'; M=-3,-14,-21,-17,-13,1,-18,-11,-1,-14,1,0,-11,-19,-12,-12,-7,-3,-1,-14,2,-13; MA /M: SY='L'; M=-5,-18,-20,-21,-16,1,-12,-14,2,-18,6,5,-14,-22,-15,-16,-10,-5,1,-15,-1,-16; MA /M: SY='C'; M=-8,-15,74,-22,-22,-17,-25,-24,-23,-22,-16,-15,-14,-32,-22,-22,-7,-7,-9,-40,-23,-22; MA /M: SY='K'; M=-8,-8,-25,-9,-4,-10,-20,-6,-11,4,-9,-3,-6,-15,-2,4,-9,-7,-9,-18,-3,-4; MA /M: SY='I'; M=-4,-24,-19,-28,-21,9,-25,-19,12,-21,12,7,-20,-21,-20,-18,-13,-4,11,-13,2,-21; MA /M: SY='Y'; M=-6,-18,-20,-22,-14,3,-22,-11,2,-15,3,3,-15,-21,-10,-13,-10,-5,2,-8,4,-12; MA /M: M=-3,-12,-22,-15,-11,-7,-12,-9,-4,-13,-3,-1,-8,-16,-9,-11,-5,-4,-4,-20,-5,-11; MA /M: SY='F'; M=-3,-18,-17,-22,-18,13,-16,-13,-3,-18,-2,-3,-13,-18,-18,-17,-6,-3,-1,-10,8,-18; MA /M: SY='L'; M=-8,-26,-16,-31,-23,17,-27,-21,15,-25,19,10,-21,-25,-23,-20,-17,-6,12,-16,2,-23; MA /M: SY='Y'; M=-8,-7,-20,-8,-8,-3,-15,-7,-9,-10,-7,-4,-6,-17,-7,-10,-6,-5,-8,-18,1,-8; MA /M: M=-7,-9,-17,-11,-11,-7,-17,-7,-4,-11,-4,-2,-7,-20,-9,-10,-6,-3,-2,-20,-1,-11; MA /M: SY='F'; M=-6,-22,-18,-27,-22,11,-23,-17,11,-21,10,7,-18,-23,-19,-19,-11,-2,10,-13,4,-21; MA /M: SY='S'; M=1,-9,-7,-15,-14,1,-8,-15,-11,-17,-8,-9,-2,-20,-16,-15,4,3,-6,-24,-9,-14; MA /M: SY='M'; M=-6,-15,-13,-18,-14,-5,-13,-11,-5,-11,-3,2,-11,-21,-9,-8,-7,-6,-3,-21,-6,-12; MA /M: SY='Y'; M=-8,-12,-15,-17,-13,3,-18,-8,-6,-12,-3,-2,-9,-20,-11,-9,-5,2,-5,-14,6,-13; MA /M: SY='A'; M=10,-15,-1,-21,-17,-7,-14,-19,-3,-17,-3,-3,-13,-20,-15,-18,1,2,4,-25,-12,-16; MA /M: SY='S'; M=6,-1,-14,-2,0,-20,-1,-11,-18,-10,-23,-16,5,-11,-1,-11,24,11,-11,-34,-18,-1; MA /M: SY='I'; M=-2,-19,-13,-24,-19,-6,-22,-21,13,-20,4,5,-14,-17,-16,-20,-5,2,12,-26,-8,-19; MA /M: SY='F'; M=-12,-25,-18,-29,-23,20,-23,-16,-1,-22,5,0,-22,-25,-22,-18,-19,-11,-4,17,15,-21; MA /M: SY='L'; M=-6,-15,-16,-20,-16,5,-21,-13,4,-20,9,4,-9,-21,-14,-15,-5,3,2,-23,-3,-15; MA /M: SY='L'; M=-7,-24,-19,-28,-20,4,-25,-17,16,-24,26,15,-21,-25,-17,-19,-19,-7,10,-19,-1,-19; MA /M: SY='V'; M=2,-19,1,-25,-20,-2,-22,-23,5,-20,5,2,-17,-22,-20,-19,-5,5,11,-26,-9,-20; MA /M: SY='A'; M=9,-19,-5,-25,-19,-3,-16,-20,4,-19,5,3,-17,-22,-18,-20,-6,-4,8,-22,-8,-18; MA /M: SY='I'; M=-7,-26,-23,-33,-24,2,-30,-20,31,-23,22,25,-21,-22,-16,-22,-18,-7,20,-21,-1,-22; MA /M: SY='A'; M=22,-6,-9,-12,-8,-16,-3,-16,-13,-11,-17,-13,-2,-12,-8,-15,19,12,-4,-28,-17,-8; MA /M: SY='I'; M=-6,-26,-19,-31,-25,12,-29,-21,21,-24,18,11,-23,-26,-22,-21,-17,-6,19,-14,5,-24; MA /M: SY='D'; M=-15,26,-27,36,17,-28,-15,0,-26,-2,-19,-19,10,-12,2,-9,-3,-9,-22,-33,-14,9; MA /M: SY='R'; M=-19,-11,-26,-11,-1,-19,-20,-1,-27,25,-18,-9,-2,-20,8,59,-10,-9,-18,-20,-10,-1; MA /M: SY='Y'; M=-15,-21,-19,-24,-21,25,-28,4,-1,-16,3,0,-20,-28,-16,-14,-17,-8,-6,15,47,-20; MA /M: SY='L'; M=-9,-24,-17,-28,-21,4,-29,-18,15,-20,16,10,-20,-26,-17,-15,-18,-7,12,-15,0,-20; MA /M: SY='A'; M=20,-13,-12,-20,-14,-13,-8,-18,-3,-14,-6,-5,-10,-16,-12,-17,4,1,4,-22,-13,-14; MA /M: SY='I'; M=-6,-28,-20,-34,-28,2,-34,-27,35,-25,17,15,-23,-24,-23,-24,-16,-5,31,-23,-4,-27; MA /M: SY='C'; M=-5,-16,15,-21,-18,-6,-23,-18,-9,-12,-8,-6,-13,-25,-16,-9,-5,-1,1,-27,-10,-17; MA /I: I=-4; MI=0; MD=-21; IM=0; DM=-21; MA /M: SY='H'; M=-12,-3,-24,-7,-5,-6,-18,17,-17,-1,-13,-6,3,-19,-1,5,-6,-7,-16,-17,7,-4; MA /M: SY='P'; M=-4,-15,-34,-10,-2,-26,-16,-17,-19,-8,-26,-17,-14,59,-8,-15,-5,-6,-24,-28,-24,-8; MA /M: SY='L'; M=-8,-23,-20,-27,-19,8,-26,-16,14,-21,23,15,-20,-24,-17,-16,-18,-5,9,-19,1,-18; MA /M: SY='R'; M=-10,-5,-24,-7,-1,-15,-15,2,-19,5,-15,-8,-1,-17,3,11,-5,-6,-16,-17,-4,0; MA /M: SY='Y'; M=-7,-13,-22,-15,-13,5,-19,4,-7,-9,-7,-4,-10,-20,-8,-8,-6,-4,-8,-3,25,-13; MA /I: I=-3; MD=-13; MA /M: SY='R'; M=-7,-5,-23,-7,-1,-16,-15,-3,-16,6,-14,-6,-1,-11,3,10,-3,-4,-13,-22,-8,0; D=-3; MA /I: I=-3; MD=-13; MA /M: SY='R'; M=-4,-6,-18,-8,-5,-11,-15,-7,-9,0,-10,-5,-2,-13,-2,4,-1,1,-6,-19,-6,-5; D=-3; MA /I: I=-3; MD=-13; MA /M: SY='I'; M=-7,-14,-19,-16,-10,-2,-19,-9,3,-8,2,2,-11,-15,-7,-6,-10,-6,1,-6,1,-10; D=-3; MA /I: I=-3; MI=0; MD=-13; IM=0; DM=-13; MA /M: SY='M'; M=-9,-15,-16,-19,-13,-4,-19,-10,-3,-7,-1,6,-11,-19,-7,0,-10,-6,-2,-18,-4,-11; MA /M: SY='T'; M=-2,1,-16,-4,-4,-16,-10,-9,-17,-3,-19,-13,7,-11,-2,-2,14,15,-12,-30,-13,-4; MA /M: SY='P'; M=-7,-11,-24,-11,-5,-16,-17,-10,-12,-1,-13,-7,-7,1,-3,1,-5,-4,-11,-21,-10,-6; MA /M: SY='R'; M=-8,-5,-23,-8,-2,-17,-14,-6,-20,11,-17,-8,0,-15,2,17,-3,-3,-15,-20,-9,-1; MA /M: SY='R'; M=-8,-11,-20,-14,-10,-7,-18,-3,-9,-3,-6,-2,-6,-19,-5,4,-7,-4,-6,-19,-1,-9; MA /M: SY='A'; M=11,-13,0,-19,-15,-13,-10,-19,-8,-14,-10,-7,-10,-17,-13,-15,3,2,0,-26,-15,-15; MA /M: SY='V'; M=-5,-18,-19,-22,-16,-1,-20,-14,0,-9,0,0,-14,-21,-13,-7,-11,-5,1,-11,0,-15; MA /M: SY='L'; M=-5,-20,-18,-24,-17,-2,-21,-16,7,-15,8,7,-16,-22,-13,-11,-12,-6,6,-17,-3,-16; MA /M: SY='I'; M=-3,-22,-16,-26,-20,1,-24,-19,14,-19,14,11,-19,-22,-16,-18,-12,-3,13,-20,-3,-19; MA /M: SY='I'; M=-3,-23,-7,-29,-24,-1,-27,-24,19,-23,11,8,-19,-24,-20,-22,-11,-4,18,-25,-7,-23; MA /M: SY='V'; M=5,-20,-15,-24,-19,-3,-13,-21,5,-20,4,2,-16,-19,-18,-19,-6,-3,8,-21,-8,-19; MA /M: SY='V'; M=1,-20,-15,-24,-20,-1,-9,-21,3,-21,4,1,-16,-20,-19,-20,-7,-4,5,-21,-9,-20; MA /M: SY='V'; M=1,-20,-12,-24,-20,-6,-22,-22,13,-20,3,4,-16,-18,-17,-20,-4,1,15,-26,-9,-20; MA /M: SY='W'; M=-15,-33,-36,-35,-27,13,-22,-24,-10,-20,-10,-12,-32,-28,-20,-20,-29,-20,-17,91,24,-21; MA /M: SY='I'; M=0,-23,-17,-28,-22,2,-23,-23,17,-22,14,8,-20,-22,-20,-21,-11,-3,16,-21,-5,-22; MA /M: SY='L'; M=-5,-24,-17,-28,-22,9,-23,-20,13,-23,15,9,-20,-24,-20,-19,-14,-5,11,-15,1,-21; MA /M: SY='S'; M=13,-7,-9,-11,-9,-18,1,-16,-15,-13,-19,-13,-2,-11,-10,-15,15,6,-7,-30,-19,-10; MA /M: SY='L'; M=-3,-22,-15,-27,-21,11,-22,-20,10,-23,12,6,-17,-23,-21,-19,-10,-2,10,-19,-1,-21; MA /M: SY='L'; M=-2,-23,-17,-26,-20,1,-20,-21,11,-22,13,7,-20,-19,-19,-20,-12,-4,10,-21,-5,-20; MA /M: SY='I'; M=-6,-20,-14,-25,-19,4,-23,-15,8,-20,8,5,-16,-24,-17,-18,-12,-6,7,-14,1,-19; MA /M: SY='S'; M=9,-7,-9,-12,-10,-14,-5,-14,-12,-12,-15,-10,-2,-14,-9,-13,12,7,-5,-27,-14,-10; MA /M: SY='L'; M=-3,-21,-17,-26,-20,4,-23,-20,13,-22,14,8,-18,-22,-18,-19,-10,-1,12,-21,-4,-19; MA /M: SY='P'; M=-7,-21,-28,-19,-11,-10,-22,-18,-1,-16,-5,-3,-19,24,-14,-19,-11,-7,-7,-22,-12,-15; MA /M: M=-8,-11,-23,-12,-7,-10,-19,-4,-7,-9,-8,-3,-8,-2,-5,-10,-7,-5,-9,-21,-5,-7; MA /M: SY='F'; M=-5,-21,-19,-26,-20,11,-22,-17,7,-21,10,5,-18,-20,-19,-18,-12,-3,6,-12,3,-19; MA /M: SY='F'; M=-8,-23,-20,-27,-21,15,-24,-17,10,-21,12,7,-19,-23,-20,-17,-14,-5,9,-13,6,-20; MA /I: I=-2; MD=-13; MA /M: SY='F'; M=-6,-13,-17,-15,-12,4,-7,-10,-4,-12,0,0,-9,-17,-12,-10,-8,-6,-4,-9,2,-12; D=-2; MA /I: I=-2; MI=0; MD=-13; IM=0; DM=-13; MA /M: SY='L'; M=-4,-7,-13,-8,-4,-5,-11,-6,-3,-6,1,0,-5,-11,-4,-4,-5,-3,-3,-12,-3,-4; D=-2; MA /I: I=-2; DM=-13; MA /M: M=-5,-7,-16,-8,-4,-6,-12,-8,-8,-6,-8,-6,-6,-8,-5,-6,-3,-3,-7,-7,-2,-5; D=-2; MA /I: I=-2; DM=-13; MA /M: SY='F'; M=-6,-9,-18,-11,-9,2,-12,-7,-6,-9,-6,-4,-6,-15,-9,-7,-4,-2,-5,-12,2,-9; D=-2; MA /I: I=-2; DM=-13; MA /M: M=-6,-4,-2,-6,-4,-13,-14,-9,-14,-5,-13,-9,-4,-13,-4,-5,-2,-2,-10,-22,-10,-5; D=-2; MA /I: I=-2; DM=-13; MA /M: M=-5,-7,-20,-9,-6,-8,-11,-9,-10,-8,-10,-7,-5,-13,-7,-8,-3,-3,-8,-15,-3,-7; D=-2; MA /I: I=-2; DM=-13; MA /M: M=-5,-4,-23,-5,-3,-12,-14,-7,-10,-7,-12,-7,-2,-10,-4,-7,-1,-2,-9,-23,-7,-5; MA /M: SY='N'; M=-8,-1,-21,-3,-2,-14,-14,-6,-14,-4,-14,-9,1,-9,-4,-4,-3,-5,-13,-24,-9,-4; MA /M: M=-8,-4,-25,-4,0,-13,-14,-7,-11,-5,-11,-7,-3,-14,-3,-6,-4,-4,-9,-19,-6,-3; MA /M: M=-6,-7,-21,-9,-6,-11,-14,-10,-7,-9,-9,-5,-4,-13,-7,-10,-3,-3,-6,-22,-7,-8; MA /M: SY='N'; M=-6,2,-22,0,-1,-18,-8,-6,-17,-4,-17,-12,4,-8,-4,-6,0,-3,-15,-26,-12,-3; MA /M: SY='H'; M=-7,-8,-24,-10,-6,-11,-14,1,-10,-8,-9,-4,-4,-12,-5,-6,-5,-5,-9,-21,-4,-7; MA /M: SY='T'; M=-7,-9,-19,-12,-8,-2,-19,-9,-8,-10,-7,-6,-7,-12,-9,-9,-2,2,-6,-19,0,-9; MA /M: M=-6,-10,-21,-13,-8,-2,-18,-11,-6,-9,-7,-4,-8,-16,-7,-9,-4,-2,-4,-15,-2,-8; MA /M: SY='C'; M=-10,-19,110,-29,-29,-19,-29,-29,-28,-28,-19,-19,-19,-38,-28,-28,-9,-9,-9,-48,-28,-29; MA /M: M=-6,-3,-22,-3,-4,-10,-12,-8,-11,-10,-8,-7,-4,-15,-7,-10,-3,-2,-9,-20,-6,-6; MA /I: I=-2; MD=-9; MA /M: SY='I'; M=-7,-13,-21,-14,-9,-1,-19,-12,2,-14,2,1,-12,-9,-11,-13,-9,-5,0,-18,-4,-11; D=-2; MA /I: I=-2; MD=-9; MA /M: M=-6,-1,-19,-2,-2,-10,-13,-3,-9,-5,-9,-5,0,-8,-4,-6,-2,-2,-8,-21,-6,-4; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='Y'; M=-7,-13,-20,-14,-10,2,-13,-9,-5,-10,-5,-3,-11,-5,-9,-10,-9,-6,-7,2,4,-10; D=-2; MA /I: I=-2; MD=-9; MA /M: M=-3,-9,-16,-9,-6,-8,-10,-9,-4,-8,-3,-2,-8,0,-7,-8,-4,-2,-4,-17,-7,-7; D=-2; MA /I: I=-2; MD=-9; MA /M: M=-4,-7,-14,-8,-4,-3,-11,-7,-3,-8,-1,-1,-6,-8,-5,-7,-3,0,-4,-14,-3,-5; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='E'; M=-5,0,-15,1,2,-10,-8,-3,-11,2,-11,-7,0,-3,0,1,-2,-4,-10,-12,-5,0; D=-2; MA /I: I=-2; MI=0; MD=-9; IM=0; DM=-9; MA /M: SY='D'; M=-3,4,-14,6,3,-12,-6,-4,-13,-2,-11,-9,2,-8,-1,-4,1,-2,-10,-16,-8,1; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='T'; M=-4,-7,-15,-9,-6,-3,-12,-7,-5,-6,-5,-3,-5,-9,-6,-6,-1,3,-4,-9,-1,-6; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='Y'; M=-4,-7,-17,-9,-8,-4,-10,-4,-6,-9,-6,-3,-5,-14,-6,-8,-4,-3,-5,-8,1,-7; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='N'; M=-5,0,-19,-2,-1,-13,-9,-3,-12,-3,-11,-7,3,-12,0,-2,0,-2,-11,-19,-8,-1; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='K'; M=-4,-2,-20,-5,-1,-15,-13,-6,-12,1,-11,-6,1,-10,0,1,-1,-2,-11,-22,-10,-1; D=-2; MA /I: I=-2; DM=-9; MA /M: M=-1,-9,-22,-12,-6,-10,-13,-11,-8,-9,-9,-6,-7,-12,-7,-9,-2,-2,-7,-19,-7,-7; MA /M: SY='Y'; M=-15,-22,-26,-25,-21,27,-27,-1,3,-17,6,3,-20,-27,-17,-14,-19,-10,-3,15,39,-20; MA /M: SY='T'; M=-1,-9,-18,-13,-10,-9,-15,-11,-3,-9,-6,-2,-5,-17,-8,-10,-1,1,0,-24,-8,-10; MA /M: SY='I'; M=-6,-23,-20,-27,-21,3,-28,-20,19,-20,16,12,-19,-23,-18,-18,-14,-3,16,-19,-1,-20; MA /M: SY='F'; M=-7,-21,-18,-26,-20,17,-23,-13,4,-19,6,2,-17,-23,-19,-16,-12,-5,2,-4,12,-19; MA /M: SY='V'; M=-4,-15,-18,-18,-15,-1,-18,-13,2,-16,2,1,-11,-20,-13,-14,-6,-2,3,-19,-2,-14; MA /I: I=-4; MD=-23; MA /M: SY='T'; M=-1,-12,-16,-17,-13,2,-17,-14,-1,-14,-1,-1,-8,-18,-13,-12,-1,4,1,-20,-3,-13; D=-4; MA /I: I=-4; MI=0; MD=-23; IM=0; DM=-23; MA /M: SY='A'; M=1,-5,-7,-8,-7,-4,-3,-7,-5,-7,-4,-3,-2,-9,-6,-7,0,0,-3,-12,-6,-6; D=-4; MA /I: I=-4; DM=-23; MA /M: SY='I'; M=-3,-22,-16,-28,-22,4,-21,-22,14,-22,12,9,-18,-23,-19,-20,-11,-4,13,-21,-4,-21; MA /M: SY='V'; M=-4,-22,-15,-27,-22,7,-21,-21,10,-21,10,6,-19,-23,-21,-19,-11,-3,12,-18,-2,-21; MA /M: M=0,-11,-14,-15,-13,-7,-7,-12,-8,-15,-7,-4,-7,-20,-11,-14,0,-1,-4,-21,-8,-12; MA /M: SY='F'; M=-12,-20,-19,-26,-21,29,-23,-14,1,-20,5,1,-14,-25,-23,-16,-14,-6,0,-5,15,-21; MA /M: SY='L'; M=-6,-25,-18,-30,-23,12,-25,-20,15,-24,16,9,-22,-26,-22,-20,-16,-6,13,-12,5,-23; MA /M: SY='I'; M=-6,-26,-19,-31,-24,5,-26,-24,22,-25,19,12,-22,-24,-21,-22,-16,-6,18,-20,-3,-24; MA /M: SY='P'; M=-6,-16,-33,-10,-2,-25,-18,-18,-17,-11,-25,-17,-15,62,-10,-18,-5,-5,-23,-30,-25,-10; MA /M: SY='L'; M=-7,-24,-8,-29,-22,14,-25,-20,9,-24,18,9,-21,-26,-22,-19,-16,-5,9,-19,0,-22; MA /M: SY='I'; M=0,-21,-15,-26,-20,3,-20,-21,11,-22,11,6,-17,-22,-19,-20,-9,-1,11,-21,-5,-20; MA /M: SY='I'; M=-4,-25,-16,-30,-23,4,-27,-24,21,-24,17,11,-21,-23,-21,-22,-14,-3,19,-22,-4,-23; MA /I: I=-5; MD=-26; MA /M: SY='I'; M=-7,-24,-20,-31,-23,4,-29,-19,25,-22,20,21,-20,-22,-16,-20,-16,-5,17,-20,-1,-21; D=-5; MA /I: I=-5; MI=0; MD=-26; IM=0; DM=-26; MA /M: SY='V'; M=-1,-21,-13,-26,-20,0,-23,-22,11,-22,10,5,-18,-18,-19,-20,-9,0,12,-23,-7,-20; MA /M: SY='I'; M=-6,-25,-19,-30,-24,13,-26,-20,15,-23,11,6,-20,-24,-22,-20,-14,-5,14,-11,7,-24; MA /M: SY='C'; M=-2,-17,14,-22,-19,-2,-21,-20,-4,-21,-2,-4,-13,-25,-18,-19,-1,3,3,-29,-10,-18; MA /M: SY='Y'; M=-15,-12,-25,-15,-16,16,-24,11,-3,-10,-4,-3,-9,-26,-9,-9,-11,-4,-10,8,47,-15; MA /M: SY='V'; M=-1,-19,-14,-24,-20,-2,-15,-18,4,-19,2,1,-15,-22,-17,-18,-6,-1,6,-16,-3,-19; MA /M: SY='R'; M=-8,-14,-19,-16,-9,-5,-19,-6,-8,-2,-3,0,-9,-20,-5,4,-9,-7,-6,-19,-2,-8; MA /M: SY='I'; M=-8,-27,-23,-34,-26,1,-33,-25,34,-25,20,18,-21,-23,-19,-24,-17,-6,24,-22,-2,-25; MA /M: SY='I'; M=-6,-24,-18,-28,-22,8,-24,-18,13,-21,10,6,-20,-24,-19,-19,-14,-6,10,-9,6,-22; MA /M: SY='R'; M=-8,-12,-18,-15,-9,-10,-19,-7,-10,0,-8,-3,-7,-20,-3,6,-7,-6,-6,-18,-5,-7; MA /M: SY='A'; M=2,-8,-18,-11,-4,-14,-16,-11,-10,-1,-10,-5,-6,-15,-4,-1,1,2,-4,-24,-10,-5; MA /M: SY='L'; M=-3,-26,-19,-29,-21,3,-28,-21,21,-23,24,14,-23,-24,-19,-19,-18,-6,18,-21,-3,-21; MA /M: SY='R'; M=-11,-12,-23,-14,-7,-10,-20,-6,-14,4,-8,-4,-7,-20,-1,15,-10,-8,-11,-11,-3,-6; MA /M: SY='R'; M=-6,-2,-23,-4,4,-22,-14,-4,-22,15,-20,-9,2,-13,8,17,0,-4,-16,-25,-12,5; MA /M: M=-3,-8,-21,-12,-7,-15,-15,-1,-7,-5,-10,-2,-2,-15,-1,-2,-1,-3,-6,-24,-7,-6; MA /M: SY='R'; M=-3,-5,-22,-6,0,-18,-14,-7,-16,2,-14,-8,-3,-10,1,4,0,-2,-12,-24,-11,0; MA /M: SY='R'; M=-4,-3,-22,-4,0,-19,-13,-4,-17,5,-16,-8,1,-14,3,8,3,-1,-13,-25,-11,1; MA /M: SY='Q'; M=-2,-5,-22,-7,-2,-18,-12,-7,-14,0,-14,-7,-1,-12,1,1,1,-1,-11,-24,-12,-1; MA /M: SY='R'; M=-6,-6,-23,-7,0,-16,-14,-6,-14,2,-11,-5,-4,-15,2,6,-3,-4,-11,-23,-10,0; MA /I: I=-1; MD=-3; MA /M: SY='R'; M=-2,-3,-17,-4,-1,-16,-2,-6,-16,2,-14,-8,-1,-9,0,3,0,-4,-12,-17,-11,-1; D=-1; MA /I: I=-1; MD=-3; MA /M: M=-2,-2,-12,-4,-1,-10,-7,-3,-9,0,-9,-4,0,-7,0,0,0,-1,-7,-15,-7,-1; D=-1; MA /I: I=-1; MD=-3; MA /M: M=-3,-4,-12,-5,-2,-8,-7,-3,-5,-2,-3,-2,-3,-8,0,-1,-2,-2,-5,-12,-4,-1; D=-1; MA /I: I=-1; MI=0; MD=-3; IM=0; DM=-3; MA /M: SY='S'; M=-2,-1,-13,-2,1,-12,-7,-4,-10,0,-10,-6,1,-6,1,0,3,0,-8,-17,-8,0; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='S'; M=-2,-4,-13,-5,-2,-9,-9,-5,-8,-3,-7,-4,-1,-8,-1,-2,2,1,-6,-16,-7,-2; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='S'; M=-1,-2,-15,-3,0,-13,-8,-6,-12,2,-12,-7,1,-6,0,2,3,1,-8,-18,-10,0; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='R'; M=-2,-3,-16,-4,0,-12,-8,-5,-11,1,-10,-6,0,-8,0,2,1,-1,-8,-17,-8,0; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='R'; M=-4,-3,-15,-4,-2,-11,-11,-4,-9,0,-9,-4,0,-11,0,2,0,0,-7,-17,-6,-2; D=-1; MA /I: I=-1; DM=-3; MA /M: M=-2,-5,-15,-7,-3,-10,-11,-6,-6,-2,-7,-3,-2,-9,-1,-1,0,-1,-5,-17,-7,-3; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='K'; M=-1,-2,-17,-3,2,-16,-9,-6,-14,5,-13,-7,0,-8,3,5,2,-2,-11,-20,-10,2; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='R'; M=-2,-1,-19,-2,2,-17,-11,-5,-16,5,-15,-9,2,-8,2,7,1,-2,-12,-22,-11,1; D=-1; MA /I: I=-1; DM=-3; MA /M: SY='R'; M=-9,0,-25,2,12,-21,-16,-4,-22,11,-18,-11,1,-12,6,14,-1,-5,-17,-25,-12,8; MA /M: SY='R'; M=-10,-6,-24,-7,-3,-16,-17,-3,-15,6,-12,-4,-3,-17,3,12,-5,-5,-11,-21,-7,-2; MA /M: SY='R'; M=-11,-1,-27,-1,7,-24,-17,-3,-26,26,-23,-11,3,-13,9,29,-4,-6,-19,-23,-10,6; MA /M: SY='A'; M=10,-14,-15,-20,-15,-8,-15,-18,3,-14,0,0,-12,-16,-14,-16,0,6,9,-24,-10,-15; MA /M: SY='T'; M=1,-16,-16,-22,-17,6,-20,-18,3,-17,4,1,-12,-19,-17,-16,-3,7,6,-20,-3,-17; MA /M: SY='R'; M=-7,-9,-24,-11,-4,-15,-16,-9,-15,12,-15,-6,-5,-17,0,15,-5,-5,-9,-19,-7,-4; MA /M: SY='T'; M=-4,-13,-17,-19,-14,-5,-22,-15,4,-12,5,9,-11,-18,-9,-10,-2,11,6,-24,-6,-12; MA /M: SY='L'; M=-3,-24,-2,-29,-23,0,-27,-23,16,-24,18,11,-22,-26,-20,-21,-15,-3,16,-26,-7,-22; MA /M: SY='L'; M=0,-20,-16,-25,-19,4,-16,-20,5,-20,7,3,-16,-22,-18,-18,-8,-2,7,-19,-4,-19; MA /M: SY='V'; M=3,-19,-13,-24,-19,-2,-19,-20,8,-19,5,3,-15,-20,-17,-18,-4,2,11,-23,-6,-18; MA /M: SY='I'; M=-7,-21,-20,-25,-20,-2,-28,-10,18,-20,11,13,-16,-23,-14,-17,-12,-4,17,-24,-2,-19; MA /M: SY='V'; M=-2,-23,-14,-27,-22,1,-25,-22,17,-20,14,12,-21,-24,-20,-18,-10,1,22,-24,-6,-21; MA /M: SY='V'; M=0,-19,-13,-24,-20,-2,-13,-22,6,-21,4,2,-15,-21,-19,-20,-5,0,9,-23,-9,-20; MA /M: SY='V'; M=2,-20,-12,-25,-20,0,-23,-22,11,-19,7,5,-18,-21,-19,-18,-6,3,16,-23,-7,-20; MA /M: SY='F'; M=-15,-25,-19,-31,-25,47,-28,-14,3,-24,6,1,-19,-26,-29,-18,-16,-8,4,2,23,-25; MA /M: SY='L'; M=0,-23,-14,-28,-21,9,-22,-22,10,-23,13,5,-19,-23,-21,-20,-11,-3,11,-19,-3,-21; MA /M: SY='I'; M=-5,-27,-14,-32,-25,8,-29,-24,23,-26,21,12,-24,-26,-23,-22,-18,-7,21,-21,-2,-25; MA /M: SY='C'; M=-4,-17,43,-25,-22,-4,-20,-22,-16,-23,-10,-10,-15,-28,-22,-22,-4,-3,-5,-31,-13,-22; MA /M: SY='W'; M=-16,-22,-35,-24,-16,9,-20,-13,-14,-15,-13,-12,-20,-25,-12,-14,-22,-17,-21,65,22,-12; MA /M: SY='L'; M=-2,-20,-15,-24,-19,4,-15,-20,5,-22,10,4,-16,-22,-19,-19,-8,1,6,-20,-5,-19; MA /M: SY='P'; M=-7,-19,-34,-12,-4,-23,-20,-20,-15,-12,-22,-15,-18,64,-11,-19,-7,-4,-22,-28,-24,-11; MA /M: SY='Y'; M=-9,-19,-21,-22,-17,17,-22,-6,1,-17,5,2,-15,-23,-15,-14,-12,-6,-2,-5,20,-17; MA /M: SY='Y'; M=-6,-11,-20,-16,-14,2,-12,0,-7,-15,-5,-3,-4,-21,-11,-12,-6,-5,-8,-14,3,-13; MA /M: SY='I'; M=-4,-25,-16,-30,-24,6,-27,-24,20,-24,13,9,-21,-24,-22,-22,-13,-4,19,-20,-2,-24; MA /M: SY='I'; M=-5,-21,-17,-26,-20,7,-24,-17,12,-20,10,8,-17,-23,-18,-16,-10,-2,12,-19,0,-19; MA /M: SY='Y'; M=-6,-7,-19,-12,-11,-3,-15,-2,-7,-9,-8,-3,-1,-19,-6,-7,-4,-3,-9,-16,4,-9; MA /M: SY='L'; M=-6,-24,-17,-29,-22,7,-25,-20,15,-23,18,11,-21,-24,-20,-20,-15,-5,12,-18,-1,-21; MA /M: SY='L'; M=-6,-23,-19,-26,-19,5,-25,-17,10,-19,12,7,-20,-23,-15,-15,-14,-6,8,-10,2,-18; MA /M: M=-3,-4,-23,-4,-1,-16,-11,-7,-14,-5,-14,-9,-3,-8,-3,-5,0,-2,-11,-24,-10,-3; MA /M: SY='A'; M=1,-11,-19,-13,-10,-8,-14,-13,-6,-12,-6,-4,-9,-15,-9,-12,-2,0,-3,-16,-5,-10; MA /M: SY='F'; M=-7,-21,-20,-25,-19,15,-23,-14,7,-20,9,4,-16,-23,-19,-16,-12,-5,5,-10,8,-19; MA /I: I=-3; MD=-14; MA /M: SY='F'; M=-7,-8,-9,-11,-10,1,-16,-8,-4,-11,-3,-3,-6,-17,-10,-10,-7,-4,-4,-12,0,-10; D=-3; MA /I: I=-3; MI=0; MD=-14; IM=0; DM=-14; MA /M: M=-4,-2,-13,-3,-2,-6,-7,-3,-7,-4,-6,-4,-1,-4,-2,-4,-2,-3,-7,-10,-3,-2; D=-3; MA /I: I=-3; DM=-14; MA /M: SY='S'; M=-2,0,-13,0,0,-10,-7,-4,-8,-3,-9,-6,1,-4,-2,-4,2,1,-7,-16,-7,-1; D=-3; MA /I: I=-3; DM=-14; MA /M: M=-5,-6,0,-8,-7,-11,-10,-10,-10,-9,-10,-7,-4,-11,-7,-10,-2,-3,-8,-22,-10,-8; D=-3; MA /I: I=-3; DM=-14; MA /M: M=-5,-2,-18,-2,0,-13,-9,-6,-13,-4,-12,-7,-2,-7,-2,-5,-1,-4,-11,-20,-9,-2; D=-3; MA /I: I=-3; DM=-14; MA /M: M=-4,-6,-19,-7,-6,-10,-10,-8,-8,-9,-7,-5,-4,-9,-6,-10,-2,-2,-7,-19,-7,-7; D=-3; MA /I: I=-3; DM=-14; MA /M: SY='S'; M=-6,-3,-16,-5,-2,-13,-13,-5,-14,-5,-12,-8,-1,-12,-2,-4,1,0,-12,-23,-7,-3; D=-3; MA /I: I=-3; DM=-14; MA /M: M=-7,-4,-18,-5,-3,-15,-16,-5,-14,-2,-13,-8,-1,-12,-2,-2,-2,-3,-12,-23,-8,-3; MA /M: SY='I'; M=-6,-13,-19,-17,-13,-2,-20,-13,2,-14,0,0,-10,-18,-12,-13,-6,-3,2,-20,-3,-13; MA /M: M=-6,-15,-21,-17,-12,-2,-19,-11,0,-15,0,0,-12,-11,-12,-14,-7,-3,-1,-19,-2,-13; MA /M: M=-7,-2,-22,-2,0,-14,-15,-6,-13,-6,-13,-8,-1,-7,-2,-7,-2,-3,-12,-25,-9,-2; MA /M: SY='Y'; M=-7,-15,-22,-19,-13,1,-22,-9,1,-12,2,2,-12,-19,-11,-10,-10,-4,0,-11,5,-13; MA /M: SY='L'; M=-3,-19,-16,-23,-18,4,-21,-17,6,-19,8,5,-17,-21,-17,-17,-11,-4,6,-16,-1,-18; MA /M: SY='Y'; M=-7,-11,-22,-14,-10,-1,-15,-7,-7,-9,-6,-3,-7,-19,-8,-7,-5,-3,-6,-14,4,-10; MA /M: SY='Y'; M=-6,-8,-21,-10,-8,-5,-20,-4,-4,-10,-4,-2,-6,-16,-6,-11,-5,-2,-5,-18,1,-8; MA /M: SY='I'; M=-4,-25,-17,-30,-24,9,-27,-21,18,-23,14,9,-21,-24,-21,-21,-15,-5,16,-14,2,-23; MA /M: SY='T'; M=2,-14,-8,-19,-15,-1,-16,-18,-4,-17,-5,-5,-10,-14,-15,-17,2,5,1,-24,-8,-15; MA /M: M=-5,-9,-20,-13,-10,-7,-16,-6,-3,-12,-3,-2,-4,-17,-9,-10,-3,0,-2,-24,-4,-11; MA /M: SY='F'; M=-8,-25,-17,-29,-22,8,-25,-20,6,-21,7,4,-21,-22,-19,-17,-17,-8,4,3,3,-20; MA /M: SY='L'; M=-10,-26,-17,-30,-22,15,-28,-18,15,-26,28,14,-24,-27,-21,-19,-22,-7,9,-15,5,-22; MA /M: SY='A'; M=11,-13,-15,-18,-14,-11,-4,-17,-6,-15,-7,-6,-10,-14,-13,-17,2,0,-2,-21,-11,-14; MA /M: SY='Y'; M=-7,-15,-20,-18,-15,4,-22,-6,2,-11,-1,3,-11,-19,-11,-11,-6,0,1,-13,11,-14; MA /M: SY='V'; M=0,-21,-12,-26,-20,4,-22,-21,11,-22,10,5,-18,-22,-19,-20,-8,-1,12,-20,-5,-20; MA /M: SY='N'; M=-5,14,-18,3,-5,-16,-6,7,-16,-6,-21,-12,26,-16,-2,-5,8,2,-18,-32,-11,-4; MA /M: SY='S'; M=6,-7,2,-10,-8,-17,-10,-14,-16,-13,-20,-14,-3,-6,-8,-14,16,10,-9,-34,-18,-8; MA /M: SY='C'; M=3,-17,19,-24,-20,-9,-19,-21,-4,-20,-3,-2,-15,-23,-18,-21,-3,0,4,-30,-14,-19; MA /M: SY='L'; M=-4,-23,-14,-27,-21,5,-26,-18,14,-23,16,9,-20,-24,-18,-19,-13,-3,11,-18,1,-20; MA /M: SY='N'; M=-11,30,-21,20,2,-21,-5,9,-22,-1,-26,-18,39,-17,0,-1,7,-1,-25,-37,-16,1; MA /M: SY='P'; M=-9,-21,-32,-16,-7,-13,-21,-20,-14,-14,-20,-14,-20,57,-14,-20,-11,-8,-20,-22,-19,-14; MA /M: SY='I'; M=-10,-29,-22,-34,-27,18,-31,-24,23,-26,18,11,-24,-25,-25,-22,-20,-9,17,-7,7,-26; MA /M: SY='I'; M=-7,-28,-20,-34,-27,4,-33,-26,32,-27,22,15,-23,-24,-22,-24,-19,-7,25,-21,-2,-26; MA /M: SY='Y'; M=-17,-20,-21,-22,-21,27,-29,10,-1,-13,1,0,-18,-29,-14,-12,-18,-8,-7,18,59,-20; MA /I: E1=0; NR /RELEASE=40.7,103373; NR /TOTAL=1221(1220); /POSITIVE=1214(1213); /UNKNOWN=6(6); /FALSE_POS=1(1); NR /FALSE_NEG=0; /PARTIAL=0; CC /MATRIX_TYPE=protein_domain; CC /SCALING_DB=reversed; CC /AUTHOR=K_Hofmann; CC /TAXO-RANGE=??E?V; /MAX-REPEAT=2; DR O42385, 5H1A_FUGRU, T; P08908, 5H1A_HUMAN, T; Q64264, 5H1A_MOUSE, T; DR P19327, 5H1A_RAT , T; P79250, 5H1B_CANFA, T; O08892, 5H1B_CAVPO, T; DR P46636, 5H1B_CRIGR, T; P35404, 5H1B_DIDMA, T; O42384, 5H1B_FUGRU, T; DR P28222, 5H1B_HUMAN, T; P28334, 5H1B_MOUSE, T; P79399, 5H1B_PIG , T; DR P49144, 5H1B_RABIT, T; P28564, 5H1B_RAT , T; P56496, 5H1B_SPAEH, T; DR P11614, 5H1D_CANFA, T; Q60484, 5H1D_CAVPO, T; P79748, 5H1D_FUGRU, T; DR P28221, 5H1D_HUMAN, T; Q61224, 5H1D_MOUSE, T; P79400, 5H1D_PIG , T; DR P49145, 5H1D_RABIT, T; P28565, 5H1D_RAT , T; P28566, 5H1E_HUMAN, T; DR Q29003, 5H1E_PIG , T; O08890, 5H1F_CAVPO, T; P30939, 5H1F_HUMAN, T; DR Q02284, 5H1F_MOUSE, T; P30940, 5H1F_RAT , T; O46635, 5H2A_CANFA, T; DR P35382, 5H2A_CAVPO, T; P18599, 5H2A_CRIGR, T; P28223, 5H2A_HUMAN, T; DR P50128, 5H2A_MACMU, T; P35363, 5H2A_MOUSE, T; P50129, 5H2A_PIG , T; DR P14842, 5H2A_RAT , T; P41595, 5H2B_HUMAN, T; Q02152, 5H2B_MOUSE, T; DR Q29005, 5H2B_PIG , T; P30994, 5H2B_RAT , T; P28335, 5H2C_HUMAN, T; DR P34968, 5H2C_MOUSE, T; P08909, 5H2C_RAT , T; O70528, 5H4_CAVPO , T; DR Q13639, 5H4_HUMAN , T; P97288, 5H4_MOUSE , T; Q29006, 5H4_PIG , T; DR Q62758, 5H4_RAT , T; P47898, 5H5A_HUMAN, T; P30966, 5H5A_MOUSE, T; DR P35364, 5H5A_RAT , T; P31387, 5H5B_MOUSE, T; P35365, 5H5B_RAT , T; DR P50406, 5H6_HUMAN , T; Q9R1C8, 5H6_MOUSE , T; P31388, 5H6_RAT , T; DR P50407, 5H7_CAVPO , T; P34969, 5H7_HUMAN , T; P32304, 5H7_MOUSE , T; DR P32305, 5H7_RAT , T; Q91559, 5H7_XENLA , T; Q16950, 5HT1_APLCA, T; DR P20905, 5HT1_DROME, T; Q16951, 5HT2_APLCA, T; Q17239, 5HT_BOMMO , T; DR Q25190, 5HT_HELVI , T; Q25414, 5HT_LYMST , T; P28285, 5HTA_DROME, T; DR P28286, 5HTB_DROME, T; P18130, A1AA_BOVIN, T; O77621, A1AA_CANFA, T; DR Q9WU25, A1AA_CAVPO, T; P35348, A1AA_HUMAN, T; Q91175, A1AA_ORYLA, T; DR O02824, A1AA_RABIT, T; P43140, A1AA_RAT , T; P11615, A1AB_CANFA, T; DR P35368, A1AB_HUMAN, T; P18841, A1AB_MESAU, T; P97717, A1AB_MOUSE, T; DR P15823, A1AB_RAT , T; P25100, A1AD_HUMAN, T; P97714, A1AD_MOUSE, T; DR O02666, A1AD_RABIT, T; P23944, A1AD_RAT , T; Q28838, A2AA_BOVIN, T; DR Q60474, A2AA_CAVPO, T; P08913, A2AA_HUMAN, T; Q01338, A2AA_MOUSE, T; DR P18871, A2AA_PIG , T; P22909, A2AA_RAT , T; O18935, A2AB_AMBHO, T; DR O77700, A2AB_BOVIN, T; Q60475, A2AB_CAVPO, T; O77715, A2AB_DIDMA, T; DR O77713, A2AB_DUGDU, T; O77723, A2AB_ECHTE, T; O19014, A2AB_ELEMA, T; DR O19012, A2AB_ERIEU, T; O77721, A2AB_HORSE, T; P18089, A2AB_HUMAN, T; DR O19025, A2AB_MACPR, T; P30545, A2AB_MOUSE, T; O19032, A2AB_ORYAF, T; DR O19054, A2AB_PROHA, T; O77830, A2AB_RABIT, T; P19328, A2AB_RAT , T; DR O19091, A2AB_TALEU, T; Q60476, A2AC_CAVPO, T; P35405, A2AC_DIDMA, T; DR P18825, A2AC_HUMAN, T; Q01337, A2AC_MOUSE, T; P22086, A2AC_RAT , T; DR P35369, A2AD_HUMAN, T; P32251, A2AR_CARAU, T; Q91081, A2AR_LABOS, T; DR P28190, AA1R_BOVIN, T; P11616, AA1R_CANFA, T; P47745, AA1R_CAVPO, T; DR P49892, AA1R_CHICK, T; P30542, AA1R_HUMAN, T; Q60612, AA1R_MOUSE, T; DR P34970, AA1R_RABIT, T; P25099, AA1R_RAT , T; P11617, AA2A_CANFA, T; DR P46616, AA2A_CAVPO, T; P29274, AA2A_HUMAN, T; Q60613, AA2A_MOUSE, T; DR P30543, AA2A_RAT , T; O13076, AA2B_CHICK, T; P29275, AA2B_HUMAN, T; DR Q60614, AA2B_MOUSE, T; P29276, AA2B_RAT , T; Q28309, AA3R_CANFA, T; DR P33765, AA3R_HUMAN, T; Q61618, AA3R_MOUSE, T; O02667, AA3R_RABIT, T; DR P28647, AA3R_RAT , T; P35342, AA3R_SHEEP, T; P16395, ACM1_DROME, T; DR P11229, ACM1_HUMAN, T; P56489, ACM1_MACMU, T; P12657, ACM1_MOUSE, T; DR P04761, ACM1_PIG , T; P08482, ACM1_RAT , T; P30372, ACM2_CHICK, T; DR P08172, ACM2_HUMAN, T; Q9ERZ4, ACM2_MOUSE, T; P06199, ACM2_PIG , T; DR P10980, ACM2_RAT , T; P41984, ACM3_BOVIN, T; P49578, ACM3_CHICK, T; DR Q9N2A3, ACM3_GORGO, T; P20309, ACM3_HUMAN, T; Q9ERZ3, ACM3_MOUSE, T; DR Q9N2A4, ACM3_PANTR, T; P11483, ACM3_PIG , T; Q9N2A2, ACM3_PONPY, T; DR P08483, ACM3_RAT , T; P17200, ACM4_CHICK, T; P08173, ACM4_HUMAN, T; DR P32211, ACM4_MOUSE, T; P08485, ACM4_RAT , T; P30544, ACM4_XENLA, T; DR P08912, ACM5_HUMAN, T; P56490, ACM5_MACMU, T; P08911, ACM5_RAT , T; DR P34974, ACTR_BOVIN, T; Q9Z1S9, ACTR_CAVPO, T; Q01718, ACTR_HUMAN, T; DR P70115, ACTR_MESAU, T; Q64326, ACTR_MOUSE, T; Q28928, ACTR_PAPHA, T; DR Q9TU77, ACTR_SHEEP, T; O15218, ADMR_HUMAN, T; P43142, ADMR_MOUSE, T; DR P31392, ADMR_RAT , T; P50052, AG22_HUMAN, T; Q9Z0Z6, AG22_MERUN, T; DR P35374, AG22_MOUSE, T; P35351, AG22_RAT , T; Q28929, AG22_SHEEP, T; DR P25104, AG2R_BOVIN, T; P43240, AG2R_CANFA, T; Q9WV26, AG2R_CAVPO, T; DR P79785, AG2R_CHICK, T; P30556, AG2R_HUMAN, T; P33396, AG2R_MELGA, T; DR O35210, AG2R_MERUN, T; P29754, AG2R_MOUSE, T; P30555, AG2R_PIG , T; DR P34976, AG2R_RABIT, T; P25095, AG2R_RAT , T; O77590, AG2R_SHEEP, T; DR P32303, AG2R_XENLA, T; Q13725, AG2S_HUMAN, T; P29755, AG2S_MOUSE, T; DR P29089, AG2S_RAT , T; P35373, AG2S_XENLA, T; P34977, AG2T_RAT , T; DR P35414, APJ_HUMAN , T; O97666, APJ_MACMU , T; Q9WV08, APJ_MOUSE , T; DR Q90352, AVT_CATCO , T; Q9TT96, B1AR_BOVIN, T; P79148, B1AR_CANFA, T; DR Q9TST6, B1AR_FELCA, T; P08588, B1AR_HUMAN, T; P47899, B1AR_MACMU, T; DR P07700, B1AR_MELGA, T; O70430, B1AR_MERUN, T; P34971, B1AR_MOUSE, T; DR Q28998, B1AR_PIG , T; P18090, B1AR_RAT , T; Q28927, B1AR_SHEEP, T; DR O42574, B1AR_XENLA, T; Q28044, B2AR_BOVIN, T; P54833, B2AR_CANFA, T; DR Q9TST5, B2AR_FELCA, T; P07550, B2AR_HUMAN, T; Q28509, B2AR_MACMU, T; DR O70431, B2AR_MERUN, T; P04274, B2AR_MESAU, T; P18762, B2AR_MOUSE, T; DR Q28997, B2AR_PIG , T; P10608, B2AR_RAT , T; P46626, B3AR_BOVIN, T; DR O02662, B3AR_CANFA, T; Q9XT57, B3AR_CAPHI, T; Q60483, B3AR_CAVPO, T; DR Q9TST4, B3AR_FELCA, T; P13945, B3AR_HUMAN, T; Q28524, B3AR_MACMU, T; DR O70432, B3AR_MERUN, T; P25962, B3AR_MOUSE, T; Q95252, B3AR_PIG , T; DR P26255, B3AR_RAT , T; Q9XT58, B3AR_SHEEP, T; P43141, B4AR_MELGA, T; DR P46663, BRB1_HUMAN, T; Q61125, BRB1_MOUSE, T; P48748, BRB1_RABIT, T; DR P97583, BRB1_RAT , T; O70526, BRB2_CAVPO, T; P30411, BRB2_HUMAN, T; DR P32299, BRB2_MOUSE, T; Q9GLX8, BRB2_PIG , T; Q28642, BRB2_RABIT, T; DR P25023, BRB2_RAT , T; P35371, BRS3_CAVPO, T; P32247, BRS3_HUMAN, T; DR O54798, BRS3_MOUSE, T; O97967, BRS3_SHEEP, T; P47751, BRS4_BOMOR, T; DR O88680, C3AR_CAVPO, T; Q16581, C3AR_HUMAN, T; O09047, C3AR_MOUSE, T; DR O55197, C3AR_RAT , T; P49238, C3X1_HUMAN, T; Q9Z0D9, C3X1_MOUSE, T; DR P35411, C3X1_RAT , T; P30992, C5AR_CANFA, T; O70129, C5AR_CAVPO, T; DR P79175, C5AR_GORGO, T; P21730, C5AR_HUMAN, T; P79188, C5AR_MACMU, T; DR P30993, C5AR_MOUSE, T; P79240, C5AR_PANTR, T; P79234, C5AR_PONPY, T; DR Q9TUE1, C5AR_RABIT, T; P97520, C5AR_RAT , T; Q98894, CB1A_FUGRU, T; DR Q98895, CB1B_FUGRU, T; O02777, CB1R_FELCA, T; P21554, CB1R_HUMAN, T; DR P47746, CB1R_MOUSE, T; P56971, CB1R_POEGU, T; P20272, CB1R_RAT , T; DR Q9PUI7, CB1R_TARGR, T; P34972, CB2R_HUMAN, T; P47936, CB2R_MOUSE, T; DR Q9QZN9, CB2R_RAT , T; Q63931, CCKR_CAVPO, T; P32238, CCKR_HUMAN, T; DR O08786, CCKR_MOUSE, T; O97772, CCKR_RABIT, T; P30551, CCKR_RAT , T; DR P70031, CCKR_XENLA, T; P49682, CCR3_HUMAN, T; O88410, CCR3_MOUSE, T; DR P25930, CCR4_BOVIN, T; O62747, CCR4_CERTO, T; P56498, CCR4_FELCA, T; DR P30991, CCR4_HUMAN, T; Q28474, CCR4_MACFA, T; P79394, CCR4_MACMU, T; DR P70658, CCR4_MOUSE, T; P56491, CCR4_PAPAN, T; O08565, CCR4_RAT , T; DR Q28553, CCR4_SHEEP, T; P32302, CCR5_HUMAN, T; Q04683, CCR5_MOUSE, T; DR P34997, CCR5_RAT , T; O18983, CCR6_CERAE, T; O00574, CCR6_HUMAN, T; DR Q9XT45, CCR6_MACMU, T; O19024, CCR6_MACNE, T; O00590, CKD6_HUMAN, T; DR O08707, CKD6_MOUSE, T; O09027, CKD6_RAT , T; P32246, CKR1_HUMAN, T; DR P56482, CKR1_MACMU, T; P51675, CKR1_MOUSE, T; P41597, CKR2_HUMAN, T; DR O18793, CKR2_MACMU, T; P51683, CKR2_MOUSE, T; O55193, CKR2_RAT , T; DR Q9Z2I3, CKR3_CAVPO, T; P56492, CKR3_CERAE, T; P51677, CKR3_HUMAN, T; DR P56483, CKR3_MACMU, T; P51678, CKR3_MOUSE, T; O54814, CKR3_RAT , T; DR P51679, CKR4_HUMAN, T; P51680, CKR4_MOUSE, T; P56493, CKR5_CERAE, T; DR O62743, CKR5_CERTO, T; P56439, CKR5_GORGO, T; P51681, CKR5_HUMAN, T; DR O97883, CKR5_HYLLE, T; P79436, CKR5_MACMU, T; P51682, CKR5_MOUSE, T; DR P56440, CKR5_PANTR, T; P56441, CKR5_PAPHA, T; O97881, CKR5_PONPY, T; DR O97880, CKR5_PYGBI, T; O97882, CKR5_PYGNE, T; O08556, CKR5_RAT , T; DR O97878, CKR5_TRAFR, T; O97879, CKR5_TRAPH, T; P51684, CKR6_HUMAN, T; DR O54689, CKR6_MOUSE, T; P32248, CKR7_HUMAN, T; P47774, CKR7_MOUSE, T; DR P51685, CKR8_HUMAN, T; O97665, CKR8_MACMU, T; P56484, CKR8_MOUSE, T; DR P51686, CKR9_HUMAN, T; Q9WUT7, CKR9_MOUSE, T; P46092, CKRA_HUMAN, T; DR Q9JL21, CKRA_MOUSE, T; P35350, CKRB_BOVIN, T; Q9NPB9, CKRB_HUMAN, T; DR P51676, CKRV_MOUSE, T; Q99788, CML1_HUMAN, T; P97468, CML1_MOUSE, T; DR O35786, CML1_RAT , T; Q99527, CML2_HUMAN, T; O08878, CML2_RAT , T; DR P46094, CXC1_HUMAN, T; Q9R0M1, CXC1_MOUSE, T; P35406, D1DR_CARAU, T; DR P53452, D1DR_FUGRU, T; P47800, D1DR_OREMO, T; P24628, D2D1_XENLA, T; DR P34973, D2D2_XENLA, T; P20288, D2DR_BOVIN, T; P52702, D2DR_CERAE, T; DR P53453, D2DR_FUGRU, T; P14416, D2DR_HUMAN, T; O73810, D2DR_MELGA, T; DR P13953, D2DR_MOUSE, T; P52703, D3DR_CERAE, T; P35462, D3DR_HUMAN, T; DR P30728, D3DR_MOUSE, T; P19020, D3DR_RAT , T; P21917, D4DR_HUMAN, T; DR P51436, D4DR_MOUSE, T; P30729, D4DR_RAT , T; P53454, D5DR_FUGRU, T; DR Q95136, DADR_BOVIN, T; P42288, DADR_DIDMA, T; P21728, DADR_HUMAN, T; DR O77680, DADR_MACMU, T; Q61616, DADR_MOUSE, T; P50130, DADR_PIG , T; DR O02664, DADR_RABIT, T; P18901, DADR_RAT , T; P42289, DADR_XENLA, T; DR Q95137, DBDR_BOVIN, T; P21918, DBDR_HUMAN, T; Q95195, DBDR_MACMU, T; DR P25115, DBDR_RAT , T; P42290, DBDR_XENLA, T; P42291, DCDR_XENLA, T; DR P41596, DOP1_DROME, T; Q24563, DOP2_DROME, T; P32249, EBI2_HUMAN, T; DR O60883, EBP2_HUMAN, T; P21453, EDG1_HUMAN, T; O08530, EDG1_MOUSE, T; DR P48303, EDG1_RAT , T; Q28031, EDG2_BOVIN, T; Q92633, EDG2_HUMAN, T; DR Q61130, EDG2_MOUSE, T; P46628, EDG2_SHEEP, T; Q99500, EDG3_HUMAN, T; DR P52592, EDGL_MOUSE, T; P21450, ET1R_BOVIN, T; P25101, ET1R_HUMAN, T; DR Q61614, ET1R_MOUSE, T; Q29010, ET1R_PIG , T; P26684, ET1R_RAT , T; DR P32940, ET3R_XENLA, T; P28088, ETBR_BOVIN, T; P56497, ETBR_CANFA, T; DR Q90328, ETBR_COTJA, T; O62709, ETBR_HORSE, T; P24530, ETBR_HUMAN, T; DR Q28468, ETBR_MACFA, T; P48302, ETBR_MOUSE, T; P35463, ETBR_PIG , T; DR P21451, ETBR_RAT , T; P79177, FML1_GORGO, T; P25090, FML1_HUMAN, T; DR P79190, FML1_MACMU, T; O08790, FML1_MOUSE, T; P79242, FML1_PANTR, T; DR P79236, FML1_PONPY, T; P79178, FML2_GORGO, T; P25089, FML2_HUMAN, T; DR P79191, FML2_MACMU, T; P79243, FML2_PANTR, T; P79237, FML2_PONPY, T; DR P79176, FMLR_GORGO, T; P21462, FMLR_HUMAN, T; P79189, FMLR_MACMU, T; DR P33766, FMLR_MOUSE, T; P79241, FMLR_PANTR, T; P79235, FMLR_PONPY, T; DR Q05394, FMLR_RABIT, T; P35376, FSHR_BOVIN, T; P79763, FSHR_CHICK, T; DR Q95179, FSHR_EQUAS, T; P47799, FSHR_HORSE, T; P23945, FSHR_HUMAN, T; DR P32212, FSHR_MACFA, T; P35378, FSHR_MOUSE, T; P49059, FSHR_PIG , T; DR P20395, FSHR_RAT , T; P35379, FSHR_SHEEP, T; P47211, GALR_HUMAN, T; DR P56479, GALR_MOUSE, T; Q62805, GALR_RAT , T; O43603, GALS_HUMAN, T; DR O88854, GALS_MOUSE, T; O08726, GALS_RAT , T; O60755, GALT_HUMAN, T; DR O88853, GALT_MOUSE, T; O88626, GALT_RAT , T; P79266, GASR_BOVIN, T; DR P30552, GASR_CANFA, T; P32239, GASR_HUMAN, T; P56481, GASR_MOUSE, T; DR P30796, GASR_PRANA, T; P46627, GASR_RABIT, T; P30553, GASR_RAT , T; DR Q92847, GHSR_HUMAN, T; Q95254, GHSR_PIG , T; O08725, GHSR_RAT , T; DR P35409, GLHR_ANTEL, T; O15354, GP37_HUMAN, T; O43193, GP38_HUMAN, T; DR O43194, GP39_HUMAN, T; O14842, GP40_HUMAN, T; O14843, GP41_HUMAN, T; DR O15529, GP42_HUMAN, T; O15552, GP43_HUMAN, T; Q9Y5Y4, GP44_HUMAN, T; DR Q9Z2J6, GP44_MOUSE, T; Q9Y2T5, GP52_HUMAN, T; Q15743, GP68_HUMAN, T; DR Q9TTQ9, GP72_CANFA, T; Q9NYM4, GP72_HUMAN, T; P30731, GP72_MOUSE, T; DR Q9I919, GP85_BRARE, T; Q9NPD1, GP85_HUMAN, T; P46023, GPCR_LYMST, T; DR P46091, GPR1_HUMAN, T; O97664, GPR1_MACMU, T; P46090, GPR1_RAT , T; DR P46089, GPR3_HUMAN, T; P35413, GPR3_MOUSE, T; P46093, GPR4_HUMAN, T; DR P50132, GPR4_PIG , T; P46095, GPR6_HUMAN, T; P51651, GPR6_RAT , T; DR P48145, GPR7_HUMAN, T; P49681, GPR7_MOUSE, T; P48146, GPR8_HUMAN, T; DR P49683, GPRA_HUMAN, T; Q64121, GPRA_RAT , T; P47775, GPRC_HUMAN, T; DR P35412, GPRC_MOUSE, T; P30951, GPRC_RAT , T; O18982, GPRF_CERAE, T; DR P49685, GPRF_HUMAN, T; O97663, GPRF_MACMU, T; P56412, GPRF_MACNE, T; DR Q13304, GPRH_HUMAN, T; Q14330, GPRI_HUMAN, T; Q15760, GPRJ_HUMAN, T; DR Q61121, GPRJ_MOUSE, T; P70585, GPRJ_RAT , T; Q99678, GPRK_HUMAN, T; DR Q99679, GPRL_HUMAN, T; Q99680, GPRM_HUMAN, T; Q99705, GPRO_HUMAN, T; DR P97639, GPRO_RAT , T; O00155, GPRP_HUMAN, T; Q9NS67, GPRS_HUMAN, T; DR O54897, GPRS_MOUSE, T; Q9JJH3, GPRS_RAT , T; O00270, GPRV_HUMAN, T; DR O75388, GPRW_HUMAN, T; O88416, GPRX_MOUSE, T; Q91178, GPRX_ORYLA, T; DR Q9UPC5, GPRY_HUMAN, T; Q9R1K6, GPRY_MOUSE, T; Q9HC97, GPRZ_HUMAN, T; DR Q9ES90, GPRZ_MOUSE, T; Q93126, GRE1_BALAM, T; Q93127, GRE2_BALAM, T; DR P32236, GRHR_BOVIN, T; O42329, GRHR_CLAGA, T; O18821, GRHR_HORSE, T; DR P30968, GRHR_HUMAN, T; Q01776, GRHR_MOUSE, T; P49922, GRHR_PIG , T; DR P30969, GRHR_RAT , T; P32237, GRHR_SHEEP, T; P30550, GRPR_HUMAN, T; DR P21729, GRPR_MOUSE, T; P52500, GRPR_RAT , T; P35894, GU01_RAT , T; DR P35895, GU03_RAT , T; P34987, GU27_RAT , T; P35896, GU33_RAT , T; DR P35897, GU38_RAT , T; P35898, GU45_RAT , T; P35899, GU58_RAT , T; DR P47752, H218_RAT , T; O14626, H963_HUMAN, T; P30546, HH1R_BOVIN, T; DR P31389, HH1R_CAVPO, T; P35367, HH1R_HUMAN, T; P70174, HH1R_MOUSE, T; DR P31390, HH1R_RAT , T; P17124, HH2R_CANFA, T; P47747, HH2R_CAVPO, T; DR P25021, HH2R_HUMAN, T; P97292, HH2R_MOUSE, T; P25102, HH2R_RAT , T; DR Q9JI35, HH3R_CAVPO, T; Q9Y5N1, HH3R_HUMAN, T; P58406, HH3R_MOUSE, T; DR Q9QYN8, HH3R_RAT , T; Q9H3N8, HH4R_HUMAN, T; P49019, HM74_HUMAN, T; DR P55919, IL8A_GORGO, T; P25024, IL8A_HUMAN, T; P55920, IL8A_PANTR, T; DR P21109, IL8A_RABIT, T; P70612, IL8A_RAT , T; Q28003, IL8B_BOVIN, T; DR O97571, IL8B_CANFA, T; Q28422, IL8B_GORGO, T; P25025, IL8B_HUMAN, T; DR Q28519, IL8B_MACMU, T; P35343, IL8B_MOUSE, T; Q28807, IL8B_PANTR, T; DR P35344, IL8B_RABIT, T; P35407, IL8B_RAT , T; Q90334, ITR_CATCO , T; DR Q15391, KI01_HUMAN, T; O35881, KI01_RAT , T; Q28005, LSHR_BOVIN, T; DR O02721, LSHR_CALJA, T; Q90674, LSHR_CHICK, T; P22888, LSHR_HUMAN, T; DR P30730, LSHR_MOUSE, T; P16582, LSHR_PIG , T; P16235, LSHR_RAT , T; DR Q28585, LSHR_SHEEP, T; P04201, MAS_HUMAN , T; P30554, MAS_MOUSE , T; DR P12526, MAS_RAT , T; P41968, MC3R_HUMAN, T; P33033, MC3R_MOUSE, T; DR P32244, MC3R_RAT , T; Q9GLJ8, MC4R_BOVIN, T; P32245, MC4R_HUMAN, T; DR P56450, MC4R_MOUSE, T; O97504, MC4R_PIG , T; P70596, MC4R_RAT , T; DR P56451, MC5R_BOVIN, T; P33032, MC5R_HUMAN, T; P41149, MC5R_MOUSE, T; DR Q9TT23, MC5R_PANTR, T; Q9MZV8, MC5R_PIG , T; P35345, MC5R_RAT , T; DR P41983, MC5R_SHEEP, T; P51046, ML11_BRARE, T; P51047, ML12_BRARE, T; DR P51049, ML13_BRARE, T; Q90456, ML14_BRARE, T; O02769, ML1A_BOVIN, T; DR P49285, ML1A_CHICK, T; P48039, ML1A_HUMAN, T; Q61184, ML1A_MOUSE, T; DR P49217, ML1A_PHOSU, T; O02781, ML1A_PIG , T; P49218, ML1A_RAT , T; DR P48040, ML1A_SHEEP, T; P51048, ML1A_XENLA, T; P51050, ML1B_CHICK, T; DR P49286, ML1B_HUMAN, T; P49287, ML1B_RAT , T; P51051, ML1B_XENLA, T; DR P51052, ML1C_BRARE, T; P49288, ML1C_CHICK, T; P49219, ML1C_XENLA, T; DR Q13585, ML1X_HUMAN, T; O88495, ML1X_MOUSE, T; Q62953, ML1X_RAT , T; DR Q28558, ML1X_SHEEP, T; P35410, MRG_HUMAN , T; P56442, MSHR_ALCAA, T; DR P47798, MSHR_BOVIN, T; O77616, MSHR_CANFA, T; P56443, MSHR_CAPCA, T; DR P56444, MSHR_CAPHI, T; P56445, MSHR_CEREL, T; P55167, MSHR_CHICK, T; DR P56446, MSHR_DAMDA, T; P79166, MSHR_HORSE, T; Q01726, MSHR_HUMAN, T; DR Q01727, MSHR_MOUSE, T; P56447, MSHR_OVIMO, T; Q9TUK4, MSHR_PANTR, T; DR Q9TU05, MSHR_PIG , T; P56448, MSHR_RANTA, T; O19037, MSHR_SHEEP, T; DR Q29154, MSHR_VULVU, T; Q90252, MTR_BUFMA , T; Q9GZQ6, NFF1_HUMAN, T; DR Q9EP86, NFF1_RAT , T; Q9Y5X5, NFF2_HUMAN, T; Q9EQD2, NFF2_RAT , T; DR P30547, NK1R_CAVPO, T; P25103, NK1R_HUMAN, T; P30548, NK1R_MOUSE, T; DR Q98982, NK1R_RANCA, T; P14600, NK1R_RAT , T; P05363, NK2R_BOVIN, T; DR Q64077, NK2R_CAVPO, T; P21452, NK2R_HUMAN, T; P51144, NK2R_MESAU, T; DR P30549, NK2R_MOUSE, T; P79218, NK2R_RABIT, T; P16610, NK2R_RAT , T; DR P29371, NK3R_HUMAN, T; P47937, NK3R_MOUSE, T; O97512, NK3R_RABIT, T; DR P16177, NK3R_RAT , T; P30098, NK4R_HUMAN, T; P28336, NMBR_HUMAN, T; DR O54799, NMBR_MOUSE, T; P24053, NMBR_RAT , T; P30989, NTR1_HUMAN, T; DR O88319, NTR1_MOUSE, T; P20789, NTR1_RAT , T; O95665, NTR2_HUMAN, T; DR P70310, NTR2_MOUSE, T; Q63384, NTR2_RAT , T; O02813, NY1R_CANFA, T; DR Q9WVD0, NY1R_CAVPO, T; P25929, NY1R_HUMAN, T; Q04573, NY1R_MOUSE, T; DR O02835, NY1R_PIG , T; P21555, NY1R_RAT , T; Q28602, NY1R_SHEEP, T; DR P34992, NY1R_XENLA, T; P79113, NY2R_BOVIN, T; Q9Z2D5, NY2R_CAVPO, T; DR Q9DDN6, NY2R_CHICK, T; P49146, NY2R_HUMAN, T; Q9GK74, NY2R_MACMU, T; DR P97295, NY2R_MOUSE, T; O02836, NY2R_PIG , T; P79211, NY2R_SHEEP, T; DR P50391, NY4R_HUMAN, T; Q61041, NY4R_MOUSE, T; Q63447, NY4R_RAT , T; DR O62729, NY5R_CANFA, T; Q15761, NY5R_HUMAN, T; O70342, NY5R_MOUSE, T; DR O97969, NY5R_PIG , T; Q63634, NY5R_RAT , T; Q61212, NY6R_MOUSE, T; DR P79217, NY6R_RABIT, T; P25931, NYR_DROME , T; Q9P1Q5, O1A1_HUMAN, T; DR Q9Y585, O1A2_HUMAN, T; Q15619, O1C1_HUMAN, T; P34982, O1D2_HUMAN, T; DR P47884, O1D4_HUMAN, T; P58170, O1D5_HUMAN, T; P30953, O1E1_HUMAN, T; DR P47887, O1E2_HUMAN, T; Q9UM60, O1E5_HUMAN, T; O43749, O1F1_HUMAN, T; DR P47890, O1G1_HUMAN, T; O60431, O1I1_HUMAN, T; Q15612, O1Q1_HUMAN, T; DR O95047, O2A4_HUMAN, T; Q9GZK3, O2B2_HUMAN, T; O76000, O2B3_HUMAN, T; DR P58173, O2B6_HUMAN, T; O95371, O2C1_HUMAN, T; Q9H210, O2D2_HUMAN, T; DR Q13607, O2F1_HUMAN, T; O95006, O2F2_HUMAN, T; Q9H205, O2G1_HUMAN, T; DR Q9GZK4, O2H1_HUMAN, T; O95918, O2H2_HUMAN, T; Q15062, O2H3_HUMAN, T; DR O76002, O2J2_HUMAN, T; O76001, O2J3_HUMAN, T; Q9NQN1, O2S2_HUMAN, T; DR O43869, O2T1_HUMAN, T; Q9Y3N9, O2W1_HUMAN, T; P47881, O3A1_HUMAN, T; DR P47893, O3A2_HUMAN, T; P47888, O3A3_HUMAN, T; P47883, O3A4_HUMAN, T; DR Q15615, O4D1_HUMAN, T; P58180, O4D2_HUMAN, T; O95013, O4F3_HUMAN, T; DR Q9UP62, O5D4_HUMAN, T; O95221, O5F1_HUMAN, T; Q13606, O5I1_HUMAN, T; DR Q9UGF5, O5U1_HUMAN, T; Q9UGF6, O5V1_HUMAN, T; O95222, O6A1_HUMAN, T; DR O95007, O6B1_HUMAN, T; Q15622, O7A5_HUMAN, T; O76100, O7AA_HUMAN, T; DR O14581, O7AH_HUMAN, T; O76099, O7C1_HUMAN, T; O60412, O7C2_HUMAN, T; DR Q15620, O8B8_HUMAN, T; Q9GZM6, O8D2_HUMAN, T; P58181, OAA3_HUMAN, T; DR Q9H209, OAA4_HUMAN, T; Q9H207, OAA5_HUMAN, T; Q9Y4A9, OAH1_HUMAN, T; DR O60403, OAH2_HUMAN, T; O60404, OAH3_HUMAN, T; P30954, OAJ1_HUMAN, T; DR Q25321, OAR1_LOCMI, T; O77408, OAR1_LYMST, T; Q25322, OAR2_LOCMI, T; DR O01670, OAR2_LYMST, T; Q17232, OAR_BOMMO , T; P22270, OAR_DROME , T; DR Q25188, OAR_HELVI , T; Q9GZK7, OBA1_HUMAN, T; P58182, OCD2_HUMAN, T; DR Q9UGF7, OCD3_HUMAN, T; Q60883, OL10_MOUSE, T; Q60890, OL11_MOUSE, T; DR P34984, OL13_MOUSE, T; P23275, OL15_MOUSE, T; P47886, OL1F_HUMAN, T; DR P47889, OL1I_HUMAN, T; P47892, OL1L_HUMAN, T; P34985, OL7A_MOUSE, T; DR P34983, OL7B_MOUSE, T; Q60882, OL7C_MOUSE, T; Q60884, OL7D_MOUSE, T; DR Q60886, OL7E_MOUSE, T; Q60887, OL7F_MOUSE, T; Q60888, OL7G_MOUSE, T; DR Q60893, OL7H_MOUSE, T; Q60895, OL7I_MOUSE, T; P23269, OLF0_RAT , T; DR Q95154, OLF1_CANFA, T; P37067, OLF1_CHICK, T; Q60891, OLF1_MOUSE, T; DR P23274, OLF1_RAT , T; Q95155, OLF2_CANFA, T; P37068, OLF2_CHICK, T; DR P23268, OLF2_RAT , T; Q95156, OLF3_CANFA, T; P37069, OLF3_CHICK, T; DR Q60879, OLF3_MOUSE, T; P23265, OLF3_RAT , T; Q95157, OLF4_CANFA, T; DR P37070, OLF4_CHICK, T; P23273, OLF4_RAT , T; P37071, OLF5_CHICK, T; DR Q60889, OLF5_MOUSE, T; P23266, OLF5_RAT , T; P37072, OLF6_CHICK, T; DR P34986, OLF6_MOUSE, T; P23267, OLF6_RAT , T; P23270, OLF7_RAT , T; DR Q98913, OLF8_CHICK, T; Q60892, OLF8_MOUSE, T; P23271, OLF8_RAT , T; DR Q98914, OLF9_CHICK, T; Q60885, OLF9_MOUSE, T; P23272, OLF9_RAT , T; DR P30955, OLFD_CANFA, T; Q9H1Y3, OPN3_HUMAN, T; Q9WUK7, OPN3_MOUSE, T; DR Q9UHM6, OPN4_HUMAN, T; Q9QXZ9, OPN4_MOUSE, T; P41143, OPRD_HUMAN, T; DR P32300, OPRD_MOUSE, T; P79291, OPRD_PIG , T; P33533, OPRD_RAT , T; DR P41144, OPRK_CAVPO, T; P41145, OPRK_HUMAN, T; P33534, OPRK_MOUSE, T; DR P34975, OPRK_RAT , T; P79350, OPRM_BOVIN, T; P97266, OPRM_CAVPO, T; DR P35372, OPRM_HUMAN, T; P42866, OPRM_MOUSE, T; Q95247, OPRM_PIG , T; DR P33535, OPRM_RAT , T; P47748, OPRX_CAVPO, T; P41146, OPRX_HUMAN, T; DR P35377, OPRX_MOUSE, T; P79292, OPRX_PIG , T; P35370, OPRX_RAT , T; DR P22269, OPS1_CALVI, T; P06002, OPS1_DROME, T; P28678, OPS1_DROPS, T; DR Q25157, OPS1_HEMSA, T; P35360, OPS1_LIMPO, T; O15973, OPS1_PATYE, T; DR Q94741, OPS1_SCHGR, T; P08099, OPS2_DROME, T; P28679, OPS2_DROPS, T; DR Q25158, OPS2_HEMSA, T; P35361, OPS2_LIMPO, T; O15974, OPS2_PATYE, T; DR Q26495, OPS2_SCHGR, T; P04950, OPS3_DROME, T; P28680, OPS3_DROPS, T; DR P08255, OPS4_DROME, T; P29404, OPS4_DROPS, T; P17646, OPS4_DROVI, T; DR P91657, OPS5_DROME, T; O01668, OPS6_DROME, T; P51471, OPSB_ANOCA, T; DR P90680, OPSB_APIME, T; P51472, OPSB_ASTFA, T; P51490, OPSB_BOVIN, T; DR P32310, OPSB_CARAU, T; P28682, OPSB_CHICK, T; O13227, OPSB_CONCO, T; DR P35357, OPSB_GECGE, T; P03999, OPSB_HUMAN, T; P51491, OPSB_MOUSE, T; DR P87365, OPSB_ORYLA, T; Q63652, OPSB_RAT , T; O13092, OPSB_SAIBB, T; DR O42294, OPSD_ABYKO, T; P52202, OPSD_ALLMI, T; Q90245, OPSD_AMBTI, T; DR Q90214, OPSD_ANGAN, T; P41591, OPSD_ANOCA, T; Q17053, OPSD_APIME, T; DR P41590, OPSD_ASTFA, T; Q9YGZ1, OPSD_ATHBO, T; O42300, OPSD_BATMU, T; DR O42301, OPSD_BATNI, T; P02699, OPSD_BOVIN, T; P56514, OPSD_BUFBU, T; DR P56515, OPSD_BUFMA, T; Q17292, OPSD_CAMAB, T; O18312, OPSD_CAMHU, T; DR O16017, OPSD_CAMLU, T; O18315, OPSD_CAMMA, T; O16018, OPSD_CAMSC, T; DR P32308, OPSD_CANFA, T; P32309, OPSD_CARAU, T; Q17296, OPSD_CATBO, T; DR Q9YGZ8, OPSD_CHELB, T; P22328, OPSD_CHICK, T; O42327, OPSD_COMDY, T; DR Q90305, OPSD_CORAU, T; O42307, OPSD_COTBO, T; O42328, OPSD_COTGR, T; DR O42330, OPSD_COTIN, T; Q90373, OPSD_COTKE, T; P28681, OPSD_CRIGR, T; DR P51488, OPSD_CYPCA, T; O62791, OPSD_DELDE, T; Q9YGZ4, OPSD_DICLA, T; DR Q9YH05, OPSD_DIPAN, T; Q9YH04, OPSD_DIPVU, T; O93441, OPSD_GALML, T; DR P79756, OPSD_GAMAF, T; O62792, OPSD_GLOME, T; Q9YGZ2, OPSD_GOBNI, T; DR P08100, OPSD_HUMAN, T; O42268, OPSD_ICTPU, T; P22671, OPSD_LAMJA, T; DR O42427, OPSD_LIMBE, T; O42431, OPSD_LIMPA, T; Q9YH00, OPSD_LITMO, T; DR Q9YGZ6, OPSD_LIZAU, T; Q9YGZ7, OPSD_LIZSA, T; P24603, OPSD_LOLFO, T; DR Q17094, OPSD_LOLSU, T; Q28886, OPSD_MACFA, T; O62793, OPSD_MESBI, T; DR P15409, OPSD_MOUSE, T; Q9YGZ9, OPSD_MUGCE, T; Q9YH01, OPSD_MULSU, T; DR P79798, OPSD_MYRBE, T; P79807, OPSD_MYRVI, T; P79808, OPSD_NEOAR, T; DR P79809, OPSD_NEOAU, T; P79812, OPSD_NEOSA, T; P09241, OPSD_OCTDO, T; DR O18481, OPSD_ORCAU, T; O16019, OPSD_ORCVI, T; P87369, OPSD_ORYLA, T; DR O42452, OPSD_PARKN, T; Q98980, OPSD_PETMA, T; O62795, OPSD_PHOGR, T; DR O62794, OPSD_PHOVI, T; O18766, OPSD_PIG , T; P79848, OPSD_POERE, T; DR P35403, OPSD_POMMI, T; P35356, OPSD_PROCL, T; O42451, OPSD_PROJE, T; DR O16020, OPSD_PROML, T; O18485, OPSD_PROOR, T; O18486, OPSD_PROSE, T; DR P49912, OPSD_RABIT, T; P79863, OPSD_RAJER, T; P51470, OPSD_RANCA, T; DR P31355, OPSD_RANPI, T; P56516, OPSD_RANTE, T; P51489, OPSD_RAT , T; DR Q9YGZ3, OPSD_SALPV, T; P79898, OPSD_SARDI, T; P79901, OPSD_SARMI, T; DR Q9YGZ0, OPSD_SARPI, T; P79902, OPSD_SARPU, T; Q9YH03, OPSD_SARSL, T; DR P79903, OPSD_SARSP, T; P79911, OPSD_SARTI, T; P79914, OPSD_SARXA, T; DR O93459, OPSD_SCYCA, T; O16005, OPSD_SEPOF, T; P02700, OPSD_SHEEP, T; DR Q9YGZ5, OPSD_SOLSO, T; Q9YH02, OPSD_SPAAU, T; P35362, OPSD_SPHSP, T; DR O42466, OPSD_TAUBU, T; Q9DGG4, OPSD_TETNG, T; P31356, OPSD_TODPA, T; DR O62796, OPSD_TRIMA, T; O62798, OPSD_TURTR, T; P29403, OPSD_XENLA, T; DR O42604, OPSD_ZEUFA, T; Q9YGY9, OPSD_ZOSOP, T; Q90215, OPSF_ANGAN, T; DR P22330, OPSG_ASTFA, T; P32311, OPSG_CARAU, T; Q9R024, OPSG_CAVPO, T; DR P28683, OPSG_CHICK, T; P35358, OPSG_GECGE, T; P04001, OPSG_HUMAN, T; DR O35599, OPSG_MOUSE, T; O18911, OPSG_ODOVI, T; P87366, OPSG_ORYLA, T; DR O18910, OPSG_RABIT, T; O35476, OPSG_RAT , T; O35478, OPSG_SCICA, T; DR P22331, OPSH_ASTFA, T; P32312, OPSH_CARAU, T; P51474, OPSI_ASTFA, T; DR P34989, OPSL_CALJA, T; O13018, OPSO_SALSA, T; P51475, OPSP_CHICK, T; DR P51476, OPSP_COLLI, T; O42266, OPSP_ICTPU, T; O42490, OPSP_PETMA, T; DR P41592, OPSR_ANOCA, T; P22332, OPSR_ASTFA, T; O18914, OPSR_CANFA, T; DR Q95170, OPSR_CAPHI, T; P32313, OPSR_CARAU, T; P22329, OPSR_CHICK, T; DR O18913, OPSR_FELCA, T; O18912, OPSR_HORSE, T; P04000, OPSR_HUMAN, T; DR P87367, OPSR_ORYLA, T; O12948, OPSR_XENLA, T; P35359, OPSU_BRARE, T; DR Q90309, OPSU_CARAU, T; O61303, OPSV_APIME, T; P28684, OPSV_CHICK, T; DR P87368, OPSV_ORYLA, T; P51473, OPSV_XENLA, T; O14718, OPSX_HUMAN, T; DR O35214, OPSX_MOUSE, T; O43613, OX1R_HUMAN, T; P58307, OX1R_MOUSE, T; DR O97661, OX1R_PIG , T; P56718, OX1R_RAT , T; Q9TUP7, OX2R_CANFA, T; DR O43614, OX2R_HUMAN, T; P58308, OX2R_MOUSE, T; O62809, OX2R_PIG , T; DR P56719, OX2R_RAT , T; Q9Y5P1, OXB2_HUMAN, T; Q9Y5P0, OXB4_HUMAN, T; DR Q9H255, OXE2_HUMAN, T; O88628, OXE2_RAT , T; Q9H343, OXI1_HUMAN, T; DR Q9H344, OXI2_HUMAN, T; P56449, OXYR_BOVIN, T; P30559, OXYR_HUMAN, T; DR P56494, OXYR_MACMU, T; P97926, OXYR_MOUSE, T; P32306, OXYR_PIG , T; DR P70536, OXYR_RAT , T; Q28756, OXYR_SHEEP, T; Q9UKL2, OYA1_HUMAN, T; DR Q9H346, OYD1_HUMAN, T; P41231, P2UR_HUMAN, T; P35383, P2UR_MOUSE, T; DR P41232, P2UR_RAT , T; Q98907, P2Y3_CHICK, T; O93361, P2Y3_MELGA, T; DR P51582, P2Y4_HUMAN, T; P32250, P2Y5_CHICK, T; P43657, P2Y5_HUMAN, T; DR Q15077, P2Y6_HUMAN, T; Q63371, P2Y6_RAT , T; Q15722, P2Y7_HUMAN, T; DR P79928, P2Y8_XENLA, T; Q99677, P2Y9_HUMAN, T; P48042, P2YR_BOVIN, T; DR P34996, P2YR_CHICK, T; P47900, P2YR_HUMAN, T; P49652, P2YR_MELGA, T; DR P49650, P2YR_MOUSE, T; P49651, P2YR_RAT , T; P21556, PAFR_CAVPO, T; DR P25105, PAFR_HUMAN, T; P35366, PAFR_MACMU, T; Q62035, PAFR_MOUSE, T; DR P46002, PAFR_RAT , T; P55085, PAR2_HUMAN, T; P55086, PAR2_MOUSE, T; DR Q63645, PAR2_RAT , T; O00254, PAR3_HUMAN, T; O08675, PAR3_MOUSE, T; DR Q13258, PD2R_HUMAN, T; P70263, PD2R_MOUSE, T; P34995, PE21_HUMAN, T; DR P35375, PE21_MOUSE, T; P70597, PE21_RAT , T; Q9XT82, PE22_CANFA, T; DR P43116, PE22_HUMAN, T; Q62053, PE22_MOUSE, T; Q62928, PE22_RAT , T; DR P34979, PE23_BOVIN, T; P43115, PE23_HUMAN, T; P30557, PE23_MOUSE, T; DR P50131, PE23_PIG , T; P46069, PE23_RABIT, T; P34980, PE23_RAT , T; DR P35408, PE24_HUMAN, T; P32240, PE24_MOUSE, T; Q28691, PE24_RABIT, T; DR P43114, PE24_RAT , T; P37289, PF2R_BOVIN, T; P43088, PF2R_HUMAN, T; DR P43117, PF2R_MOUSE, T; P43118, PF2R_RAT , T; Q28905, PF2R_SHEEP, T; DR P79393, PI2R_BOVIN, T; P43119, PI2R_HUMAN, T; P43252, PI2R_MOUSE, T; DR P43253, PI2R_RAT , T; P11613, RDC1_CANFA, T; P25106, RDC1_HUMAN, T; DR P56485, RDC1_MOUSE, T; O89039, RDC1_RAT , T; P23820, REIS_TODPA, T; DR P47803, RGR_BOVIN , T; P47804, RGR_HUMAN , T; Q9Z2B3, RGR_MOUSE , T; DR P23749, RTA_RAT , T; Q9I918, SRB3_BRARE, T; Q9NS66, SRB3_HUMAN, T; DR Q9JJH2, SRB3_RAT , T; P30872, SSR1_HUMAN, T; P30873, SSR1_MOUSE, T; DR P28646, SSR1_RAT , T; P34993, SSR2_BOVIN, T; P30874, SSR2_HUMAN, T; DR P30875, SSR2_MOUSE, T; P34994, SSR2_PIG , T; P30680, SSR2_RAT , T; DR P32745, SSR3_HUMAN, T; P30935, SSR3_MOUSE, T; P30936, SSR3_RAT , T; DR P31391, SSR4_HUMAN, T; P49660, SSR4_MOUSE, T; P30937, SSR4_RAT , T; DR P35346, SSR5_HUMAN, T; O08858, SSR5_MOUSE, T; P30938, SSR5_RAT , T; DR O42179, SSRL_FUGRU, T; Q95125, TA2R_BOVIN, T; P56486, TA2R_CERAE, T; DR P21731, TA2R_HUMAN, T; P30987, TA2R_MOUSE, T; P34978, TA2R_RAT , T; DR Q61038, TDA8_MOUSE, T; Q00991, THRR_CRILO, T; P25116, THRR_HUMAN, T; DR P30558, THRR_MOUSE, T; P56488, THRR_PAPHA, T; P26824, THRR_RAT , T; DR P47749, THRR_XENLA, T; P30974, TLR1_DROME, T; P30975, TLR2_DROME, T; DR O46639, TRFR_BOVIN, T; O93603, TRFR_CHICK, T; P34981, TRFR_HUMAN, T; DR P21761, TRFR_MOUSE, T; Q01717, TRFR_RAT , T; Q28596, TRFR_SHEEP, T; DR Q27987, TSHR_BOVIN, T; P14763, TSHR_CANFA, T; P16473, TSHR_HUMAN, T; DR P47750, TSHR_MOUSE, T; P21463, TSHR_RAT , T; P56495, TSHR_SHEEP, T; DR P16849, UL33_HCMVA, T; P52380, UL33_HSV6U, T; P52381, UL33_HSV7J, T; DR Q83207, UL33_MCMVS, T; O12000, UL33_RCMVM, T; P49220, UR2R_BOVIN, T; DR Q9UKP6, UR2R_HUMAN, T; P49684, UR2R_RAT , T; P09703, US27_HCMVA, T; DR P09704, US28_HCMVA, T; Q9J5I0, V021_FOWPV, T; Q9J5H4, V027_FOWPV, T; DR P37288, V1AR_HUMAN, T; Q62463, V1AR_MOUSE, T; P30560, V1AR_RAT , T; DR P48043, V1AR_SHEEP, T; P47901, V1BR_HUMAN, T; Q9WU02, V1BR_MOUSE, T; DR P48974, V1BR_RAT , T; Q9J529, V206_FOWPV, T; P48044, V2R_BOVIN , T; DR P30518, V2R_HUMAN , T; P32307, V2R_PIG , T; Q00788, V2R_RAT , T; DR P32229, VC03_SPVKA, T; Q01035, VG74_HSVSA, T; Q98146, VG74_KSHV , T; DR Q08520, VK02_SPVKA, T; Q86917, VQ3L_CAPVK, T; P52382, VU51_HSV6U, T; DR P52542, VU51_HSV6Z, T; Q19084, YDBM_CAEEL, T; P34311, YKR5_CAEEL, T; DR Q03566, YLD1_CAEEL, T; P34488, YMJC_CAEEL, T; Q03613, YN84_CAEEL, T; DR Q09502, YQH2_CAEEL, T; O02213, YQNJ_CAEEL, T; Q09638, YR13_CAEEL, T; DR Q09388, YR41_CAEEL, T; Q09561, YR42_CAEEL, T; Q09965, YS96_CAEEL, T; DR Q11082, YT66_CAEEL, T; Q18007, YTJ5_CAEEL, T; Q10904, YWO1_CAEEL, T; DR Q18179, YXX5_CAEEL, T; Q18775, YYI3_CAEEL, T; Q18904, YYO1_CAEEL, T; DR Q09966, YS97_CAEEL, T; DR P46564, SG12_CAEEL, ?; Q19992, SRD1_CAEEL, ?; P16751, UL78_HCMVA, ?; DR Q09554, YQV5_CAEEL, ?; Q09344, YRP2_CAEEL, ?; Q11095, YWZ5_CAEEL, ?; DR Q9VRN2, MTH2_DROME, F; 3D 1MMH; 1BOJ; 1BOK; 1F88; DO PDOC00210; // ID G_PROTEIN_RECEP_F2_1; PATTERN. AC PS00649; DT JUN-1992 (CREATED); JUL-1998 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 2 signature 1. PA C-x(3)-[FYWLIV]-D-x(3,4)-C-[FW]-x(2)-[STAGV]-x(8,9)-C-[PF]. NR /RELEASE=40.7,103373; NR /TOTAL=57(57); /POSITIVE=57(57); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=16; /PARTIAL=2; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR O08893, CALR_CAVPO, T; P30988, CALR_HUMAN, T; Q60755, CALR_MOUSE, T; DR P25117, CALR_PIG , T; P79222, CALR_RABIT, T; P32214, CALR_RAT , T; DR Q16602, CGRR_HUMAN, T; Q63118, CGRR_RAT , T; Q90812, CRF1_CHICK, T; DR P34998, CRF1_HUMAN, T; P35347, CRF1_MOUSE, T; P35353, CRF1_RAT , T; DR O42602, CRF1_XENLA, T; Q13324, CRF2_HUMAN, T; Q60748, CRF2_MOUSE, T; DR P47866, CRF2_RAT , T; O42603, CRF2_XENLA, T; Q16983, DIHR_ACHDO, T; DR P35464, DIHR_MANSE, T; P48546, GIPR_HUMAN, T; P43218, GIPR_MESAU, T; DR P43219, GIPR_RAT , T; P43220, GLP1_HUMAN, T; O35659, GLP1_MOUSE, T; DR P32301, GLP1_RAT , T; O95838, GLP2_HUMAN, T; Q9Z0W0, GLP2_RAT , T; DR P47871, GLR_HUMAN , T; Q61606, GLR_MOUSE , T; P30082, GLR_RAT , T; DR Q02643, GRFR_HUMAN, T; P32082, GRFR_MOUSE, T; P34999, GRFR_PIG , T; DR Q02644, GRFR_RAT , T; Q29627, PACR_BOVIN, T; P41586, PACR_HUMAN, T; DR P70205, PACR_MOUSE, T; P32215, PACR_RAT , T; P70555, PTH2_RAT , T; DR P49190, PTR2_HUMAN, T; P25107, PTRR_DIDMA, T; Q03431, PTRR_HUMAN, T; DR P41593, PTRR_MOUSE, T; P50133, PTRR_PIG , T; P25961, PTRR_RAT , T; DR P47872, SCRC_HUMAN, T; O46502, SCRC_RABIT, T; P23811, SCRC_RAT , T; DR Q90308, VIPR_CARAU, T; P32241, VIPR_HUMAN, T; Q28992, VIPR_PIG , T; DR P30083, VIPR_RAT , T; P41587, VIPS_HUMAN, T; P41588, VIPS_MOUSE, T; DR P35000, VIPS_RAT , T; P30650, YOW3_CAEEL, T; Q09460, YQ44_CAEEL, T; DR Q91085, VIPR_MELGA, P; P97751, VIPR_MOUSE, P; DR O14514, BAI1_HUMAN, N; O60241, BAI2_HUMAN, N; O60242, BAI3_HUMAN, N; DR O62772, CRF1_SHEEP, N; Q9VXD9, MTH1_DROME, N; Q9VRN2, MTH2_DROME, N; DR Q9V818, MTH3_DROME, N; Q9V817, MTH4_DROME, N; Q9VGG8, MTH5_DROME, N; DR Q9VSE7, MTH7_DROME, N; O97148, MTH_DROME , N; P83120, MTH_DROSI , N; DR Q9GT50, MTH_DROYA , N; Q9W0R5, MTHA_DROME, N; P83118, MTHB_DROME, N; DR P83119, MTHC_DROME, N; DO PDOC00559; // ID G_PROTEIN_RECEP_F2_2; PATTERN. AC PS00650; DT JUN-1992 (CREATED); NOV-1997 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 2 signature 2. PA Q-G-[LMFCA]-[LIVMFT]-[LIV]-x-[LIVFST]-[LIF]-[VFYH]-C-[LFY]-x-N-x(2)-V. NR /RELEASE=40.7,103373; NR /TOTAL=60(60); /POSITIVE=60(60); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=18; /PARTIAL=0; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR O08893, CALR_CAVPO, T; P30988, CALR_HUMAN, T; Q60755, CALR_MOUSE, T; DR P25117, CALR_PIG , T; P79222, CALR_RABIT, T; P32214, CALR_RAT , T; DR P48960, CD97_HUMAN, T; Q16602, CGRR_HUMAN, T; Q63118, CGRR_RAT , T; DR Q90812, CRF1_CHICK, T; P34998, CRF1_HUMAN, T; P35347, CRF1_MOUSE, T; DR P35353, CRF1_RAT , T; O62772, CRF1_SHEEP, T; O42602, CRF1_XENLA, T; DR Q13324, CRF2_HUMAN, T; Q60748, CRF2_MOUSE, T; P47866, CRF2_RAT , T; DR O42603, CRF2_XENLA, T; Q16983, DIHR_ACHDO, T; P35464, DIHR_MANSE, T; DR Q14246, EMR1_HUMAN, T; Q61549, EMR1_MOUSE, T; P48546, GIPR_HUMAN, T; DR P43218, GIPR_MESAU, T; P43219, GIPR_RAT , T; P43220, GLP1_HUMAN, T; DR O35659, GLP1_MOUSE, T; P32301, GLP1_RAT , T; P47871, GLR_HUMAN , T; DR Q61606, GLR_MOUSE , T; P30082, GLR_RAT , T; Q02643, GRFR_HUMAN, T; DR P32082, GRFR_MOUSE, T; P34999, GRFR_PIG , T; Q02644, GRFR_RAT , T; DR Q29627, PACR_BOVIN, T; P41586, PACR_HUMAN, T; P70205, PACR_MOUSE, T; DR P32215, PACR_RAT , T; P49190, PTR2_HUMAN, T; P25107, PTRR_DIDMA, T; DR Q03431, PTRR_HUMAN, T; P41593, PTRR_MOUSE, T; P50133, PTRR_PIG , T; DR P25961, PTRR_RAT , T; P47872, SCRC_HUMAN, T; O46502, SCRC_RABIT, T; DR P23811, SCRC_RAT , T; Q90308, VIPR_CARAU, T; P32241, VIPR_HUMAN, T; DR Q91085, VIPR_MELGA, T; P97751, VIPR_MOUSE, T; Q28992, VIPR_PIG , T; DR P30083, VIPR_RAT , T; P41587, VIPS_HUMAN, T; P41588, VIPS_MOUSE, T; DR P35000, VIPS_RAT , T; P30650, YOW3_CAEEL, T; Q09460, YQ44_CAEEL, T; DR O14514, BAI1_HUMAN, N; O60241, BAI2_HUMAN, N; O60242, BAI3_HUMAN, N; DR O95838, GLP2_HUMAN, N; Q9Z0W0, GLP2_RAT , N; Q9VXD9, MTH1_DROME, N; DR Q9VRN2, MTH2_DROME, N; Q9V818, MTH3_DROME, N; Q9V817, MTH4_DROME, N; DR Q9VGG8, MTH5_DROME, N; Q9VSE7, MTH7_DROME, N; O97148, MTH_DROME , N; DR P83120, MTH_DROSI , N; Q9GT50, MTH_DROYA , N; Q9W0R5, MTHA_DROME, N; DR P83118, MTHB_DROME, N; P83119, MTHC_DROME, N; P70555, PTH2_RAT , N; DO PDOC00559; // ID G_PROTEIN_RECEP_F2_3; MATRIX. AC PS50227; DT DEC-2001 (CREATED); DEC-2001 (DATA UPDATE); DEC-2001 (INFO UPDATE). DE G-protein coupled receptors family 2 profile 1. MA /GENERAL_SPEC: ALPHABET='ABCDEFGHIKLMNPQRSTVWYZ'; LENGTH=81; MA /DISJOINT: DEFINITION=PROTECT; N1=6; N2=76; MA /NORMALIZATION: MODE=1; FUNCTION=LINEAR; R1=1.0635; R2=0.01305269; TEXT='-LogE'; MA /CUT_OFF: LEVEL=0; SCORE=569; N_SCORE=8.5; MODE=1; TEXT='!'; MA /CUT_OFF: LEVEL=-1; SCORE=416; N_SCORE=6.5; MODE=1; TEXT='?'; MA /DEFAULT: D=-20; I=-20; B1=-50; E1=-50; MI=-105; MD=-105; IM=-105; DM=-105; MA /I: B1=0; BI=-105; BD=-105; MA /M: SY='Q'; M=-8,-2,-21,-2,11,-23,-19,2,-19,6,-12,-4,-3,-14,16,8,-5,-8,-19,-23,-9,13; MA /M: SY='C'; M=-10,-19,114,-28,-26,-20,-30,-29,-30,-28,-20,-20,-19,-38,-28,-29,-10,-10,-11,-49,-30,-27; MA /M: SY='E'; M=-7,-5,-25,-3,10,-14,-20,2,-13,-3,-2,-4,-8,-16,5,-2,-8,-8,-14,-20,-1,7; MA /M: SY='E'; M=-7,5,-25,6,15,-22,-16,1,-20,5,-16,-10,5,-13,11,4,-1,-7,-19,-27,-13,12; MA /M: SY='R'; M=-5,-5,-22,-8,2,-12,-17,-6,-14,1,-9,-6,-1,-12,0,6,-2,1,-12,-24,-9,0; MA /I: MD=-10; MA /M: SY='L'; M=0,-21,-19,-24,-17,3,-23,-18,14,-22,24,14,-19,-21,-15,-19,-16,-6,8,-19,-3,-17; D=-2; MA /I: MD=-10; MA /M: SY='Q'; M=0,-2,-19,-3,5,-19,-15,-4,-10,-3,-11,-5,-3,-9,6,-6,3,1,-8,-24,-11,5; D=-2; MA /I: MD=-10; MA /M: SY='E'; M=-1,-4,-13,-4,7,-19,-10,-3,-16,5,-12,-7,-3,-10,7,4,-3,-7,-14,-18,-11,7; D=-2; MA /I: MD=-10; MA /M: SY='D'; M=-4,1,-13,3,1,-8,-11,0,-5,-5,-2,-3,-2,-11,-3,-6,-4,-5,-3,-15,-3,-1; D=-2; MA /I: MD=-10; MA /M: SY='P'; M=-1,-1,-13,-1,0,-11,-8,-6,-7,-3,-7,-6,-1,7,-2,-6,-2,-1,-9,-15,-9,-2; D=-2; MA /I: I=-3; MI=0; MD=-10; IM=0; DM=-10; MA /M: M=-1,-3,-24,-2,-3,-23,-2,-8,-18,-6,-16,-10,-3,-7,-2,-8,0,-4,-14,-26,-16,-4; MA /M: M=-5,-11,-14,-12,-5,-8,-17,-7,-12,-10,-8,-6,-10,-6,-5,-11,-3,-3,-12,-22,-5,-6; MA /M: SY='E'; M=-9,1,-29,0,10,-24,-14,-5,-19,3,-21,-12,5,8,7,3,-2,-6,-22,-28,-18,6; MA /M: SY='N'; M=-6,4,-26,4,3,-23,-5,-6,-18,-4,-19,-14,5,-1,-4,-8,-3,-8,-18,-29,-18,-1; MA /M: SY='S'; M=-2,2,-21,2,9,-22,-11,-10,-19,-1,-21,-15,2,0,0,-5,10,8,-14,-31,-18,4; MA /M: SY='G'; M=-5,1,-26,5,1,-22,7,-6,-26,-8,-23,-17,0,-6,-5,-10,3,-4,-21,-25,-13,-3; MA /M: SY='L'; M=-7,-14,-23,-13,-10,-11,-13,-15,-4,-12,1,-1,-10,-8,-10,-7,-6,-3,-5,-26,-13,-12; MA /M: SY='Y'; M=-4,-14,-26,-17,-15,1,0,-9,-15,-11,-13,-9,-10,-14,-12,-11,-7,-10,-13,-3,8,-14; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='N'; M=-6,8,-25,3,3,-19,-10,-6,-19,-2,-23,-16,16,9,-3,-6,4,-2,-23,-32,-19,-1; MA /M: SY='G'; M=1,-11,-27,-11,-7,-24,6,-15,-24,1,-22,-13,-5,1,-6,5,-2,-8,-18,-23,-20,-9; MA /M: SY='T'; M=0,-5,-16,-9,0,-12,-17,-11,-8,-6,-7,-1,-5,-12,-3,-7,8,17,-3,-26,-9,-2; MA /M: SY='W'; M=-17,-29,-37,-32,-23,18,-23,-23,-8,-17,-8,-10,-26,-27,-20,-17,-28,-20,-15,74,21,-19; MA /M: SY='D'; M=-18,40,-29,57,20,-37,-12,-2,-36,3,-27,-26,15,-10,1,-5,-1,-10,-26,-37,-19,10; MA /I: I=-6; MI=0; MD=-33; IM=0; DM=-33; MA /M: SY='N'; M=-7,3,-26,-2,-9,-19,10,-6,-17,-7,-18,-8,11,-18,-7,-6,-2,-9,-17,-25,-14,-9; MA /M: SY='W'; M=-10,-24,-29,-26,-22,0,-9,-16,3,-20,3,2,-19,-25,-17,-17,-17,-13,-2,9,8,-21; MA /M: SY='V'; M=0,-13,-16,-20,-17,-5,-21,-19,10,-17,6,4,-9,-19,-14,-16,-1,10,12,-27,-9,-16; MA /M: SY='C'; M=-10,-16,93,-25,-25,-20,-27,-25,-28,-25,-20,-18,-15,-35,-21,-25,-8,-8,-12,-40,-25,-23; MA /M: SY='W'; M=-20,-39,-48,-39,-30,14,-21,-27,-18,-20,-18,-18,-38,-30,-20,-20,-38,-28,-28,139,32,-20; MA /M: SY='P'; M=-12,0,-35,9,8,-31,-17,-11,-25,-1,-29,-20,-5,48,-2,-9,-6,-10,-29,-31,-25,0; MA /M: SY='D'; M=-4,2,-26,5,3,-22,-14,-6,-21,-1,-20,-15,0,4,2,-1,2,-2,-19,-26,-11,0; MA /M: SY='T'; M=13,-4,-12,-11,-10,-16,-2,-19,-14,-11,-15,-12,-2,-12,-10,-14,18,24,-4,-28,-16,-10; MA /M: SY='P'; M=3,-8,-26,-8,-1,-25,-11,-11,-19,-1,-21,-13,-7,18,-1,-5,-1,-3,-17,-25,-19,-3; MA /M: SY='P'; M=10,-16,-23,-17,-9,-16,-14,-19,-9,-8,-15,-10,-14,14,-9,-12,-1,-4,-6,-19,-17,-10; MA /M: SY='G'; M=-1,-2,-21,-6,-17,-28,51,-15,-35,-17,-29,-20,9,-21,-17,-17,2,-16,-28,-25,-28,-17; MA /I: I=-6; MD=-33; MA /M: SY='E'; M=2,-4,-19,-6,7,-18,-16,-9,-12,0,-10,-3,-5,-11,6,-2,4,5,-8,-25,-13,6; D=-6; MA /I: I=-6; MI=-33; IM=-33; DM=-33; MA /M: SY='L'; M=-6,-19,-19,-22,-16,1,-24,-16,6,-18,11,7,-18,-18,-15,-16,-9,5,6,-15,-2,-15; MA /M: SY='V'; M=12,-20,-11,-22,-20,-9,-17,-19,10,-16,-2,0,-18,-22,-19,-18,2,2,26,-29,-13,-20; MA /M: SY='T'; M=0,-7,-18,-10,0,-9,-18,-11,-7,-5,-7,-5,-5,-16,-6,-4,0,2,-1,-24,-9,-3; MA /M: SY='M'; M=-1,-16,-21,-19,-9,-14,-22,-13,3,-5,1,5,-15,-20,3,0,-9,-6,5,-22,-9,-4; MA /M: SY='P'; M=-5,-11,-29,-6,2,-24,-15,-10,-19,-6,-24,-15,-9,37,0,-12,2,-3,-22,-28,-18,-1; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='P'; M=-11,-21,-39,-12,-2,-23,-21,-20,-19,-11,-27,-19,-20,82,-12,-20,-11,-10,-28,-27,-26,-11; MA /M: SY='D'; M=-11,7,-30,11,9,-22,-16,-6,-25,5,-20,-16,0,-12,0,-2,-8,-11,-22,-4,-8,5; MA /M: SY='Y'; M=-14,-19,-27,-20,-16,16,-15,-1,-3,-15,-2,-1,-17,-20,-15,-14,-15,-11,-7,4,32,-18; MA /M: SY='F'; M=-12,-23,-21,-28,-18,29,-27,-18,6,-22,14,5,-18,-24,-21,-16,-15,-5,3,-10,9,-18; MA /M: SY='P'; M=-10,-7,-27,-7,-5,-10,-14,-2,-17,-2,-19,-12,-1,3,-2,1,-1,-4,-19,-17,2,-6; MA /I: I=-3; MD=-14; MA /M: SY='W'; M=-10,-1,-25,1,-8,-13,3,-2,-17,-11,-13,-8,-2,-16,-8,-11,-10,-13,-17,4,-4,-8; D=-3; MA /I: I=-3; MD=-14; MA /M: SY='F'; M=-10,-20,-15,-25,-20,30,-19,-9,3,-20,6,2,-14,-21,-22,-15,-13,-8,3,-3,14,-20; D=-3; MA /I: I=-3; MI=0; MD=-14; IM=0; DM=-14; MA /M: SY='Y'; M=-6,-6,-15,-6,-5,-2,-11,4,-6,-5,-6,-4,-5,-9,-3,-5,-2,0,-7,-7,11,-6; D=-3; MA /I: I=-3; DM=-14; MA /M: SY='B'; M=-6,14,-16,14,-1,-17,-9,-3,-11,-6,-16,-12,13,-14,-5,-8,6,2,-7,-30,-13,-3; D=-3; MA /I: I=-3; DM=-14; MA /M: SY='H'; M=-5,-2,-22,-1,0,-18,-16,5,-14,-2,-13,-7,-3,0,1,-3,-3,-3,-12,-21,-5,-1; D=-3; MA /I: I=-3; DM=-14; MA /M: SY='S'; M=0,2,-19,-1,2,-19,-7,-4,-17,1,-16,-9,3,-5,1,-2,4,4,-14,-24,-13,1; D=-3; MA /I: I=-3; DM=-14; MA /M: SY='G'; M=-6,-6,-29,-5,-5,-27,33,-11,-33,-5,-25,-16,2,-17,-4,0,-2,-15,-27,-21,-21,-6; MA /M: SY='N'; M=-7,0,-20,-7,-9,-5,-17,-1,-8,-4,-9,-6,8,-20,-8,-4,-3,-1,-8,-24,-3,-9; MA /M: SY='V'; M=13,-21,-14,-25,-20,-9,-16,-23,12,-17,4,3,-19,-21,-17,-19,-4,-2,21,-25,-12,-19; MA /M: SY='T'; M=-6,-13,-18,-16,-13,8,-18,-10,-6,-12,-6,-5,-8,-20,-11,-10,3,9,-3,-14,9,-12; MA /M: SY='R'; M=-17,-9,-30,-10,0,-18,-20,-5,-28,31,-22,-10,-2,-18,7,52,-12,-11,-20,-12,-7,1; MA /M: SY='H'; M=-10,1,-25,-2,1,-12,-18,7,-14,0,-13,-6,5,-18,5,4,-4,-6,-15,-20,1,2; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='T'; M=-8,5,-20,4,-6,-14,-9,-12,-15,-10,-7,-10,2,-17,-9,-8,1,11,-11,-27,-10,-8; MA /I: I=-6; MI=0; MD=-32; IM=0; DM=-32; MA /M: SY='E'; M=2,0,-23,3,19,-25,-13,-7,-19,0,-19,-14,-1,0,11,-4,8,-1,-18,-28,-18,14; D=-6; MA /I: I=-6; DM=-32; MA /M: SY='D'; M=-12,28,-23,29,12,-25,-11,2,-25,-2,-22,-19,22,-12,1,-6,5,3,-23,-36,-16,6; MA /M: SY='G'; M=2,-10,-29,-10,-19,-29,64,-19,-38,-18,-29,-19,0,-20,-18,-17,0,-19,-28,-20,-29,-19; MA /M: SY='W'; M=-10,-15,-30,-17,-10,-11,-16,-15,-15,-6,-15,-10,-13,-20,-1,-5,-10,-6,-15,27,1,-4; MA /M: SY='W'; M=-10,-28,-38,-29,-22,1,-15,-25,-19,-17,-21,-19,-26,-24,-15,-17,-18,-14,-23,93,15,-15; MA /M: SY='E'; M=-3,-3,-23,-2,9,-12,-12,-5,-16,-7,-9,-10,-3,-12,-2,-8,-3,-6,-14,-24,-10,3; MA /M: SY='P'; M=-6,-8,-24,-9,-6,-14,-15,-12,-13,-6,-11,-8,-5,6,-7,-4,-2,3,-13,-26,-14,-8; MA /I: I=-4; MD=-22; MA /M: SY='F'; M=-9,-6,-17,-7,0,4,-13,2,-10,-3,-7,-3,-5,-12,-6,-2,-7,-8,-8,-11,2,-2; D=-4; MA /I: I=-4; MD=-22; MA /M: SY='P'; M=-6,-8,-22,-3,0,-17,-12,-7,-11,-4,-16,-10,-9,38,-5,-9,-6,-6,-14,-18,-14,-5; D=-4; MA /I: I=-4; MI=0; MD=-22; IM=0; DM=-22; MA /M: SY='W'; M=-12,-19,-27,-20,-15,4,-7,-8,-13,-9,-11,-8,-17,-18,-10,-6,-18,-15,-16,51,15,-12; D=-4; MA /I: I=-4; DM=-22; MA /M: SY='P'; M=-4,-9,-24,-8,-3,-20,-17,-10,-15,-3,-18,-12,-7,12,-5,1,1,2,-11,-28,-16,-7; MA /M: SY='N'; M=-15,26,-26,23,1,-14,-11,5,-20,-4,-22,-18,27,-14,-4,-7,0,-6,-24,-26,-3,-3; MA /M: SY='Y'; M=-8,-15,-25,-16,-14,9,-23,7,-4,-14,-2,-2,-13,-14,-11,-13,-11,-6,-8,-3,27,-15; MA /M: SY='T'; M=1,-4,-15,-6,-8,-10,-15,-17,-8,-13,-11,-10,-3,-15,-10,-13,13,18,0,-25,-11,-9; MA /M: SY='N'; M=5,4,-22,-1,3,-21,-5,1,-16,-5,-14,-8,6,-15,5,-8,1,-7,-16,-25,-12,3; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='E'; M=-7,-2,-24,-5,1,-17,1,-10,-19,-7,-15,-10,1,-9,-2,-7,1,0,-17,-25,-15,0; MA /M: M=-4,-7,-21,-8,0,-9,-17,-4,-10,-3,-10,-5,-5,-12,-4,-4,-1,-3,-7,-23,-4,-3; MA /M: SY='N'; M=-12,0,-26,-3,-6,-7,-18,-10,-8,-9,-13,-10,6,-1,-9,-8,-4,-6,-13,-26,-10,-9; MA /M: M=-11,-6,-27,-4,-4,-11,-17,-16,-8,-12,-2,-6,-12,-16,-10,-13,-11,-4,-8,-6,-6,-7; MA /I: E1=0; NR /RELEASE=40.7,103373; NR /TOTAL=61(61); /POSITIVE=61(61); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=0; /PARTIAL=0; CC /MATRIX_TYPE=protein_domain; CC /SCALING_DB=reversed; CC /AUTHOR=K_Hofmann; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR O14514, BAI1_HUMAN, T; O60241, BAI2_HUMAN, T; O60242, BAI3_HUMAN, T; DR O08893, CALR_CAVPO, T; P30988, CALR_HUMAN, T; Q60755, CALR_MOUSE, T; DR P25117, CALR_PIG , T; P79222, CALR_RABIT, T; P32214, CALR_RAT , T; DR Q16602, CGRR_HUMAN, T; Q63118, CGRR_RAT , T; Q90812, CRF1_CHICK, T; DR P34998, CRF1_HUMAN, T; P35347, CRF1_MOUSE, T; P35353, CRF1_RAT , T; DR O62772, CRF1_SHEEP, T; O42602, CRF1_XENLA, T; Q13324, CRF2_HUMAN, T; DR Q60748, CRF2_MOUSE, T; P47866, CRF2_RAT , T; O42603, CRF2_XENLA, T; DR Q16983, DIHR_ACHDO, T; P35464, DIHR_MANSE, T; P48546, GIPR_HUMAN, T; DR P43218, GIPR_MESAU, T; P43219, GIPR_RAT , T; P43220, GLP1_HUMAN, T; DR O35659, GLP1_MOUSE, T; P32301, GLP1_RAT , T; O95838, GLP2_HUMAN, T; DR Q9Z0W0, GLP2_RAT , T; P47871, GLR_HUMAN , T; Q61606, GLR_MOUSE , T; DR P30082, GLR_RAT , T; Q02643, GRFR_HUMAN, T; P32082, GRFR_MOUSE, T; DR P34999, GRFR_PIG , T; Q02644, GRFR_RAT , T; Q29627, PACR_BOVIN, T; DR P41586, PACR_HUMAN, T; P70205, PACR_MOUSE, T; P32215, PACR_RAT , T; DR P70555, PTH2_RAT , T; P49190, PTR2_HUMAN, T; P25107, PTRR_DIDMA, T; DR Q03431, PTRR_HUMAN, T; P41593, PTRR_MOUSE, T; P50133, PTRR_PIG , T; DR P25961, PTRR_RAT , T; P47872, SCRC_HUMAN, T; O46502, SCRC_RABIT, T; DR P23811, SCRC_RAT , T; Q90308, VIPR_CARAU, T; P32241, VIPR_HUMAN, T; DR Q28992, VIPR_PIG , T; P30083, VIPR_RAT , T; P41587, VIPS_HUMAN, T; DR P41588, VIPS_MOUSE, T; P35000, VIPS_RAT , T; P30650, YOW3_CAEEL, T; DR Q09460, YQ44_CAEEL, T; DO PDOC00559; // ID G_PROTEIN_RECEP_F2_4; MATRIX. AC PS50261; DT DEC-2001 (CREATED); DEC-2001 (DATA UPDATE); DEC-2001 (INFO UPDATE). DE G-protein coupled receptors family 2 profile 2. MA /GENERAL_SPEC: ALPHABET='ABCDEFGHIKLMNPQRSTVWYZ'; LENGTH=257; MA /DISJOINT: DEFINITION=PROTECT; N1=6; N2=252; MA /NORMALIZATION: MODE=1; FUNCTION=LINEAR; R1=2.2048; R2=0.01286811; TEXT='-LogE'; MA /CUT_OFF: LEVEL=0; SCORE=645; N_SCORE=10.5; MODE=1; TEXT='!'; MA /CUT_OFF: LEVEL=-1; SCORE=333; N_SCORE=6.5; MODE=1; TEXT='?'; MA /DEFAULT: D=-20; I=-20; B1=-100; E1=-100; MI=-105; MD=-105; IM=-105; DM=-105; MM=1; M0=-10; MA /I: B1=0; BI=-105; BD=-105; MA /M: SY='L'; M=-1,-17,-20,-20,-13,-5,-22,-12,6,-14,10,5,-14,-19,-12,-10,-11,-4,6,-24,-7,-14; MA /M: M=-8,-7,-24,-8,-6,-10,-15,-10,-11,0,-11,-5,-5,-16,-4,0,-3,0,-8,-19,-3,-6; MA /M: SY='I'; M=-2,-22,-20,-27,-21,4,-26,-18,14,-19,10,6,-19,-22,-18,-18,-12,-2,13,-11,3,-20; MA /M: SY='I'; M=-7,-26,-24,-31,-23,2,-27,-24,20,-23,13,10,-22,-23,-17,-21,-16,-7,14,-4,0,-21; MA /M: SY='Y'; M=-5,-10,-21,-13,-12,1,-12,-4,-9,-10,-8,-5,-5,-20,-9,-7,0,3,-8,-11,12,-12; MA /M: SY='T'; M=-3,-16,-21,-23,-18,1,-22,-15,4,-16,1,-1,-13,-20,-15,-15,-6,6,3,-4,6,-18; MA /M: SY='V'; M=1,-27,-18,-29,-25,-1,-24,-27,19,-21,10,6,-25,-25,-23,-21,-12,-4,25,-12,-6,-24; MA /M: SY='G'; M=-1,-13,-22,-15,-19,-16,31,-19,-25,-19,-17,-12,-6,-21,-18,-18,-3,-11,-19,-10,-18,-18; MA /M: SY='Y'; M=-2,-16,-3,-19,-15,-3,-21,4,-5,-18,0,-1,-12,-24,-12,-16,-7,-6,-4,-19,7,-15; MA /M: SY='S'; M=7,-13,-12,-17,-15,-9,1,-19,-10,-18,-14,-10,-6,-18,-14,-18,8,-1,-5,-20,-13,-15; MA /M: SY='I'; M=0,-25,-12,-29,-22,-2,-25,-24,15,-23,14,7,-22,-25,-19,-21,-13,-5,15,-15,-6,-21; MA /M: SY='S'; M=5,-1,0,-1,-4,-19,-8,-13,-18,-12,-22,-17,3,-14,-6,-13,25,16,-7,-38,-19,-5; MA /M: SY='L'; M=-9,-28,-18,-32,-24,19,-30,-23,20,-28,26,12,-24,-27,-24,-22,-19,-7,15,-17,3,-24; MA /M: SY='V'; M=6,-19,-9,-25,-20,2,-17,-22,6,-20,0,0,-15,-21,-20,-20,-1,0,13,-23,-8,-20; MA /M: SY='C'; M=7,-9,14,-15,-13,-9,-11,-18,-15,-16,-15,-13,-4,-19,-14,-17,12,11,-5,-32,-16,-13; MA /M: SY='L'; M=-9,-26,-13,-29,-21,11,-29,-21,16,-27,32,14,-24,-27,-21,-20,-20,-4,10,-20,0,-21; MA /M: SY='L'; M=1,-23,-15,-27,-21,8,-24,-22,13,-23,16,6,-20,-23,-21,-20,-10,0,14,-20,-3,-21; MA /M: SY='V'; M=-3,-20,-12,-25,-21,4,-19,-22,7,-21,9,5,-18,-21,-21,-19,-8,3,11,-22,-6,-21; MA /M: SY='A'; M=17,-12,3,-19,-14,-7,-12,-20,-9,-15,-9,-9,-9,-17,-15,-18,8,8,0,-25,-13,-14; MA /M: SY='I'; M=-4,-24,-18,-28,-22,6,-27,-22,20,-23,18,12,-20,-23,-19,-20,-11,-2,18,-22,-3,-21; MA /M: SY='L'; M=0,-21,-14,-26,-20,8,-20,-22,6,-22,11,3,-18,-23,-21,-19,-8,2,8,-17,-3,-20; MA /M: SY='I'; M=-5,-16,-16,-21,-19,0,-28,-23,15,-20,6,4,-14,-20,-18,-20,-6,7,15,-24,-4,-20; MA /M: SY='F'; M=-14,-27,-21,-32,-23,38,-29,-15,8,-25,23,9,-23,-29,-25,-16,-22,-9,4,-4,19,-23; MA /M: SY='V'; M=-5,-20,-5,-24,-20,-4,-23,-22,7,-19,8,3,-17,-24,-18,-17,-8,2,10,-25,-9,-19; MA /M: SY='L'; M=-4,-15,-23,-18,-15,1,-14,-13,-2,-14,3,0,-12,-21,-14,-9,-10,-5,-2,-14,3,-16; MA /M: SY='F'; M=-16,-22,-18,-26,-22,42,-27,-19,1,-25,13,2,-19,-28,-29,-19,-19,-10,1,-2,14,-22; MA /M: SY='R'; M=-14,-12,-30,-12,-3,-15,-19,-9,-21,14,-16,-10,-7,-17,1,31,-10,-9,-16,-2,-6,-3; MA /M: SY='R'; M=-12,-2,-27,-1,1,-17,-16,-2,-20,10,-20,-11,2,-11,1,19,-3,-6,-16,-24,-8,-2; MA /M: SY='L'; M=-10,-19,-24,-20,-14,0,-24,-15,9,-18,20,9,-17,-16,-14,-12,-19,-9,1,-21,-3,-15; MA /M: SY='H'; M=-13,-3,-24,-4,6,-15,-18,16,-22,4,-16,-8,1,-16,7,12,-4,-8,-20,-24,-4,5; MA /I: I=-3; MI=0; MD=-14; IM=0; DM=-14; MA /M: SY='C'; M=-7,-11,24,-15,-13,-17,-16,-17,-20,-8,-15,-11,-8,-25,-10,-5,-5,-6,-10,-32,-18,-12; MA /M: SY='Y'; M=-12,-12,-11,-14,-11,2,-24,-4,-8,-13,-3,-5,-12,-14,-10,-12,-9,0,-9,-12,14,-12; MA /M: SY='R'; M=-15,-13,-31,-11,-3,-19,-20,-5,-21,11,-20,-10,-7,10,2,30,-9,-9,-19,-21,-10,-5; MA /M: SY='N'; M=-5,10,-20,2,2,-10,-12,-2,-11,-6,-15,-11,19,-17,-4,-7,4,1,-13,-30,-12,-1; MA /M: SY='Y'; M=-11,-14,-23,-17,-13,1,-22,1,-6,-3,-6,-1,-10,-22,-7,6,-8,-2,-4,-11,14,-12; MA /M: SY='I'; M=-6,-28,-26,-31,-23,-4,-32,-26,29,-25,15,11,-23,-5,-19,-25,-18,-8,18,-23,-7,-24; MA /M: SY='H'; M=-11,-10,-25,-13,-10,-12,-23,36,-5,-15,-5,3,-2,-19,-3,-9,-6,-7,-6,-28,6,-10; MA /M: SY='L'; M=-8,-19,-23,-23,-13,1,-25,-18,5,-8,7,7,-16,-20,-12,-6,-14,-5,4,-14,-2,-13; MA /M: SY='H'; M=-12,1,-18,-9,-10,2,-18,14,-9,-11,-11,-3,11,-23,-5,-8,-6,-8,-14,-19,8,-9; MA /M: SY='L'; M=-11,-29,-22,-31,-22,12,-31,-19,22,-28,40,19,-28,-29,-20,-20,-27,-10,12,-16,6,-22; MA /M: SY='F'; M=0,-16,3,-24,-19,15,-19,-18,-6,-20,-4,-4,-10,-21,-21,-19,-3,-1,-2,-19,-3,-19; MA /M: SY='V'; M=7,-20,-14,-27,-20,2,-20,-21,10,-20,10,7,-17,-21,-18,-20,-7,0,12,-21,-6,-19; MA /M: SY='C'; M=4,-10,28,-16,-15,-12,-10,-20,-20,-18,-19,-16,-5,-22,-16,-19,11,7,-8,-35,-19,-15; MA /M: SY='F'; M=-15,-23,-10,-27,-22,35,-28,-9,0,-22,8,2,-19,-29,-22,-17,-17,-9,-3,0,27,-22; MA /M: SY='L'; M=-5,-23,-19,-29,-22,6,-23,-20,18,-24,20,14,-19,-24,-19,-20,-17,-7,12,-20,-3,-21; MA /M: SY='L'; M=-6,-25,-12,-28,-21,6,-27,-21,14,-24,24,13,-23,-23,-19,-20,-17,-4,11,-22,-3,-20; MA /M: SY='R'; M=-1,-10,-18,-13,-9,-13,-12,-8,-10,-1,-12,-7,-5,-19,-6,3,-2,-4,-4,-21,-5,-9; MA /M: SY='A'; M=11,-1,-13,-5,-2,-15,-6,-12,-15,-9,-14,-12,2,-14,-5,-12,10,2,-9,-29,-16,-3; MA /M: SY='V'; M=3,-19,-10,-25,-19,-3,-20,-22,8,-18,6,3,-16,-21,-18,-17,-6,1,11,-24,-9,-19; MA /M: SY='A'; M=5,-15,-18,-18,-15,-10,-2,-20,-5,-13,-7,-4,-11,-16,-15,-14,0,0,2,-24,-14,-15; MA /M: SY='F'; M=-12,-21,-20,-24,-22,20,-27,-15,4,-19,4,0,-19,-27,-22,-16,-17,-9,5,6,18,-22; MA /M: SY='L'; M=-10,-28,-12,-34,-25,19,-30,-21,18,-27,25,16,-24,-28,-24,-21,-22,-9,13,-16,3,-24; MA /M: SY='I'; M=0,-22,-20,-28,-23,-3,-27,-24,25,-22,13,10,-17,-20,-19,-22,-12,-5,22,-25,-7,-23; MA /M: SY='R'; M=-8,-3,-28,-2,-2,-25,6,-6,-27,6,-22,-13,1,-16,0,8,-2,-9,-21,-21,-15,-2; MA /M: SY='L'; M=-6,-2,-22,-2,-8,-8,-21,-12,-2,-13,1,-2,-7,-18,-9,-13,-7,-2,-2,-23,-4,-9; MA /M: SY='V'; M=-2,-11,-21,-15,-13,-5,-18,-16,1,-14,-3,-2,-9,-14,-14,-13,-6,-1,2,-18,-7,-14; MA /M: SY='A'; M=4,-12,-19,-14,-9,-13,-16,-16,1,-10,0,-1,-11,-17,-8,-11,-4,-4,4,-25,-12,-9; MA /M: SY='T'; M=-2,-10,-21,-11,-5,-14,-4,-15,-11,-10,-4,-4,-9,-17,-6,-9,-3,1,-7,-23,-13,-6; MA /M: SY='H'; M=-8,-2,-22,-1,-2,-11,-13,1,-14,-8,-9,-8,-3,-17,-4,-7,-5,-7,-12,-23,-4,-4; MA /I: I=-2; MD=-9; MA /M: SY='E'; M=-6,-7,-18,-6,1,-11,-10,-4,-14,-5,-11,-9,-5,-11,-3,-3,-3,-5,-12,-14,-4,-2; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='E'; M=-6,1,-19,0,5,-13,-11,0,-15,1,-15,-9,4,-12,1,1,2,-2,-12,-22,-7,2; D=-2; MA /I: I=-2; MD=-9; MA /M: M=-2,-5,-19,-4,-6,-14,-9,-10,-6,-6,-8,-4,-7,-15,-7,-7,-4,-5,-1,-18,-8,-7; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='A'; M=4,-2,-9,-3,-1,-15,-6,-10,-14,-8,-13,-10,-3,-8,-5,-12,2,-3,-10,-23,-14,-3; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='C'; M=-4,-7,5,-9,-9,-14,-8,-13,-11,-11,-10,-7,-5,-15,-7,-10,0,2,-4,-25,-13,-8; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='D'; M=-5,8,-10,9,1,-18,-7,-7,-15,-5,-13,-11,5,-13,-3,-7,2,-1,-10,-26,-14,-1; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='H'; M=-4,-4,-15,-5,-2,-13,-6,3,-14,-2,-13,-7,-1,-8,3,2,-1,-4,-12,-16,-4,-1; D=-2; MA /I: I=-2; MD=-9; MA /M: M=-4,-4,-3,-5,-3,-11,-4,-6,-13,-6,-9,-6,-3,-13,-4,-4,-3,-5,-10,-19,-10,-4; D=-2; MA /I: I=-2; MD=-9; MA /M: SY='D'; M=-3,2,-14,3,3,-15,-4,-1,-15,-3,-13,-9,1,-6,3,-3,3,-1,-13,-17,-8,2; D=-2; MA /I: I=-2; MI=0; MD=-9; IM=0; DM=-9; MA /M: SY='T'; M=-2,-1,-13,-4,-3,-6,-5,-8,-10,-7,-8,-7,2,-11,-3,-6,5,7,-8,-18,-8,-3; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='N'; M=-2,7,-20,6,6,-21,3,-1,-21,1,-20,-12,9,-11,2,-3,5,-5,-19,-24,-13,4; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='N'; M=-5,0,-21,-3,-2,-17,-7,-7,-17,-1,-20,-13,5,4,-4,-3,3,1,-16,-23,-13,-4; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='V'; M=-1,-18,-16,-21,-15,5,-22,-18,5,-15,5,1,-16,-20,-17,-15,-6,2,10,-17,-3,-15; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='G'; M=1,-15,-23,-16,-13,-13,7,-16,-12,-16,-7,-6,-10,-14,-11,-15,-2,-7,-10,-17,-12,-12; D=-2; MA /I: I=-2; DM=-9; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='T'; M=-6,-8,-22,-11,-6,-11,-17,-10,-13,4,-12,-6,-5,-14,-4,5,0,8,-8,-20,-2,-7; MA /M: SY='V'; M=3,-25,-16,-29,-23,-2,-24,-25,22,-23,16,10,-22,-24,-21,-22,-12,-4,23,-24,-8,-23; MA /M: SY='V'; M=-1,-23,-18,-26,-21,0,-25,-18,18,-20,12,9,-20,-24,-17,-19,-9,-3,20,-23,-3,-20; MA /M: SY='F'; M=6,-20,-17,-28,-19,21,-17,-16,-1,-19,2,0,-15,-21,-20,-18,-7,-5,3,-10,3,-18; MA /M: SY='V'; M=-2,-24,-19,-28,-23,3,-21,-21,16,-21,13,12,-21,-24,-20,-19,-13,-4,17,-17,-2,-22; MA /M: SY='L'; M=-5,-27,-16,-31,-23,17,-27,-21,16,-24,22,12,-24,-26,-23,-20,-18,-6,15,-15,4,-23; MA /I: I=-6; MI=0; MD=-33; IM=0; DM=-33; MA /M: SY='L'; M=-9,-20,-19,-25,-19,7,-26,-9,12,-22,19,13,-17,-24,-16,-16,-14,-2,9,-21,2,-18; MA /M: SY='H'; M=-16,-8,-28,-9,-6,-3,-24,35,-11,-7,-9,0,-3,-21,6,-3,-10,-9,-17,-7,30,-4; MA /M: SY='Y'; M=-20,-24,-26,-29,-24,52,-30,2,0,-19,4,0,-20,-30,-23,-14,-20,-10,-6,21,58,-24; MA /M: SY='F'; M=-11,-25,-1,-33,-26,40,-20,-20,-5,-27,4,-1,-18,-29,-31,-21,-15,-9,-2,-8,9,-26; MA /M: SY='F'; M=-6,-15,-11,-20,-18,3,-8,-15,-6,-17,-4,-3,-9,-22,-17,-14,-3,-1,-3,-20,-3,-17; MA /M: SY='M'; M=-5,-22,-18,-27,-20,2,-21,-13,15,-17,21,30,-21,-23,-12,-15,-16,-3,12,-22,-3,-16; MA /M: SY='A'; M=30,-8,-3,-16,-11,-18,-5,-20,-12,-12,-13,-11,-7,-13,-11,-18,14,10,-1,-27,-19,-11; MA /M: SY='N'; M=6,8,-7,-1,-6,-19,2,-8,-19,-10,-24,-17,18,-16,-6,-11,18,8,-15,-35,-20,-6; MA /M: SY='F'; M=-9,-21,-16,-26,-21,32,-22,-12,-5,-20,-3,-4,-15,-25,-22,-16,-7,-4,-3,4,19,-20; MA /M: SY='F'; M=-5,-22,-6,-28,-22,12,-26,-20,8,-23,7,3,-17,-24,-21,-20,-9,-1,8,-18,2,-22; MA /M: SY='W'; M=-20,-40,-50,-40,-30,10,-20,-30,-20,-20,-20,-20,-40,-30,-20,-20,-40,-30,-30,150,30,-20; MA /M: SY='W'; M=-11,-26,-28,-31,-23,8,-23,-18,3,-18,7,12,-25,-24,-15,-16,-21,-7,-3,32,9,-17; MA /M: SY='L'; M=-7,-27,-14,-28,-22,9,-26,-23,16,-25,27,11,-26,-28,-23,-19,-18,-3,18,-22,-3,-22; MA /M: SY='V'; M=-2,-24,-5,-30,-25,-1,-27,-25,19,-23,12,11,-22,-25,-21,-22,-12,-2,21,-26,-8,-24; MA /M: SY='E'; M=-10,-9,-26,-4,22,-10,-25,-10,-6,-9,10,-2,-13,-14,1,-10,-13,-10,-11,-25,-10,11; MA /M: SY='G'; M=15,-6,-13,-11,-12,-22,20,-18,-23,-14,-21,-16,0,-15,-12,-17,13,4,-13,-27,-22,-12; MA /M: SY='L'; M=-10,-29,-17,-32,-25,22,-30,-20,16,-26,23,10,-25,-29,-25,-21,-22,-9,13,-8,11,-25; MA /M: SY='Y'; M=-4,-9,-20,-13,-10,1,-21,8,-9,-10,-9,-5,-7,-18,-5,-10,1,10,-8,-8,22,-9; MA /M: SY='W'; M=-13,-32,-30,-33,-24,8,-27,-24,9,-26,21,7,-31,-28,-19,-20,-30,-16,-1,35,9,-20; MA /M: SY='Y'; M=-19,-14,-26,-19,-17,27,-25,27,-9,-16,-5,-2,-7,-26,-13,-10,-15,-12,-13,4,38,-16; MA /M: SY='L'; M=-9,-13,-13,-18,-13,-3,-21,-13,-2,-12,10,5,-9,-23,-10,-3,-10,1,-3,-21,-6,-12; MA /M: SY='L'; M=12,-17,-16,-22,-13,-7,-15,-16,2,-15,13,7,-16,-19,-10,-13,-6,-1,2,-22,-10,-12; MA /M: SY='L'; M=9,-23,-18,-29,-21,0,-20,-23,15,-22,17,8,-21,-22,-19,-22,-12,-6,15,-20,-7,-21; MA /M: SY='V'; M=4,-15,-17,-19,-20,-11,0,-19,-1,-15,-7,-2,-11,-21,-19,-15,-2,-1,9,-25,-14,-19; MA /M: SY='M'; M=-5,-11,-22,-13,-3,-11,-14,-11,-7,0,-3,3,-9,-16,-3,0,-7,-6,-5,-23,-10,-4; MA /M: SY='K'; M=-1,-9,-20,-11,-6,-15,-17,-5,-13,8,-13,-4,-6,-16,-4,5,-2,0,-3,-24,-8,-6; MA /M: SY='W'; M=-13,-24,-30,-26,-16,15,-19,-19,-11,-12,-8,-9,-21,-24,-17,-10,-18,-13,-11,36,12,-14; MA /M: SY='G'; M=-7,-10,-25,-12,-10,-4,2,-13,-18,-14,-16,-11,-5,-4,-13,-13,-2,-4,-15,-20,-11,-11; MA /M: SY='S'; M=-2,3,-19,2,1,-21,-9,2,-19,-7,-22,-14,7,-4,-3,-8,10,4,-15,-33,-15,-2; MA /I: I=-5; MD=-27; MA /M: SY='E'; M=-8,6,-26,12,30,-23,-16,0,-22,3,-19,-16,2,-1,9,-2,1,-6,-21,-27,-13,19; D=-5; MA /I: I=-5; MI=0; MD=-27; IM=0; DM=-27; MA /M: SY='E'; M=-7,3,-26,6,19,-24,-14,-4,-26,11,-19,-14,2,-11,8,14,-1,-4,-21,-25,-14,12; MA /M: SY='K'; M=1,-5,-25,-7,-3,-22,-6,-12,-18,4,-16,-9,-3,-9,-1,3,-3,-6,-14,-22,-15,-3; MA /M: SY='Y'; M=-11,-11,-21,-14,-9,0,-22,1,-12,0,-9,-5,-6,-15,-7,2,-11,-7,-12,-13,10,-10; MA /M: SY='F'; M=-5,-13,-20,-15,-6,2,-14,-13,-7,-12,-2,-2,-9,-18,-10,-9,-1,0,-6,-17,-4,-8; MA /M: SY='R'; M=-7,-14,-25,-15,-6,-10,-18,-9,-16,1,-13,-7,-10,-15,2,5,-6,-7,-15,1,-3,-2; MA /M: SY='Y'; M=-14,-22,-29,-24,-19,13,-19,-1,-4,-14,0,1,-20,-24,-13,-12,-19,-12,-10,22,30,-18; MA /M: SY='F'; M=-19,-26,-24,-31,-25,51,-30,-4,3,-21,9,2,-21,-30,-26,-16,-20,-10,-2,15,47,-25; MA /M: SY='H'; M=-15,-14,-18,-16,-13,-2,-26,34,-6,-16,-1,4,-8,-24,-7,-10,-14,-12,-7,-20,16,-13; MA /M: SY='L'; M=-3,-25,-11,-29,-21,6,-25,-20,14,-25,26,13,-24,-26,-20,-20,-18,-7,11,-20,-3,-21; MA /M: SY='I'; M=5,-23,-16,-29,-22,3,-23,-21,16,-21,12,8,-20,-22,-20,-22,-11,-6,16,-18,-2,-22; MA /M: SY='G'; M=12,-12,-3,-16,-19,-24,31,-22,-27,-18,-21,-16,-7,-20,-19,-21,1,-11,-15,-25,-26,-19; MA /M: SY='W'; M=-20,-37,-47,-37,-29,13,-21,-23,-17,-19,-17,-17,-37,-30,-19,-19,-37,-27,-27,133,37,-20; MA /M: SY='G'; M=4,-12,-18,-14,-19,-24,43,-20,-28,-20,-20,-15,-5,-21,-19,-20,0,-14,-19,-23,-25,-19; MA /M: SY='V'; M=-3,-25,-18,-29,-23,5,-28,-25,21,-23,17,9,-22,-22,-22,-21,-13,0,22,-22,-4,-24; MA /M: SY='P'; M=-8,-18,-38,-9,0,-29,-18,-19,-20,-10,-30,-20,-18,82,-9,-19,-6,-8,-28,-31,-29,-9; MA /M: SY='A'; M=16,-16,-15,-21,-15,-3,-8,-19,-2,-17,2,-1,-13,-18,-15,-18,0,0,3,-21,-10,-15; MA /M: SY='V'; M=-5,-29,-18,-31,-26,4,-31,-27,28,-24,19,13,-26,-24,-25,-22,-16,-4,31,-24,-5,-27; MA /M: SY='V'; M=-8,-20,-17,-24,-16,6,-26,-17,5,-11,3,4,-17,-19,-15,-11,-12,-7,7,-19,-1,-15; MA /M: SY='T'; M=1,-11,-12,-17,-16,-8,-22,-22,4,-14,-5,-3,-8,-16,-15,-15,11,27,14,-30,-10,-16; MA /M: SY='V'; M=-1,-23,-18,-28,-25,-6,-16,-25,19,-21,6,9,-18,-22,-20,-22,-9,-3,21,-25,-10,-24; MA /M: SY='I'; M=3,-20,-19,-26,-20,-6,-25,-24,17,-19,7,6,-17,-13,-17,-20,-5,6,16,-25,-9,-20; MA /M: SY='W'; M=-5,-25,-28,-28,-22,-1,-21,-18,2,-16,-4,-1,-22,-23,-15,-14,-16,-10,0,30,6,-18; MA /M: SY='L'; M=6,-21,-16,-25,-18,-2,-20,-20,11,-21,18,9,-20,-22,-17,-19,-10,-2,12,-22,-6,-18; MA /M: SY='A'; M=11,-18,-18,-24,-19,-4,-11,-22,6,-19,1,0,-13,-18,-17,-21,-2,-1,8,-21,-10,-18; MA /M: SY='V'; M=-2,-21,-12,-26,-21,2,-18,-20,8,-18,8,8,-18,-23,-19,-17,-11,-3,11,-21,-5,-20; MA /M: SY='R'; M=-8,7,-24,3,0,-22,-6,1,-25,11,-24,-14,13,-16,3,16,3,-4,-20,-27,-13,0; MA /M: M=-2,-9,-19,-11,-4,-15,-17,-8,-9,-5,-9,-6,-6,-9,-1,-7,-2,-3,-10,-20,-7,-4; MA /M: SY='V'; M=-5,-18,-20,-19,-15,-3,-26,-8,10,-15,5,5,-17,-23,-13,-13,-11,-5,13,-21,2,-15; MA /M: SY='D'; M=-11,7,-26,11,1,-10,-7,-6,-21,-6,-15,-14,2,-17,-5,-9,-3,-8,-17,-21,-4,-2; MA /I: I=-4; MI=0; MD=-20; IM=0; DM=-20; MA /M: SY='D'; M=-10,12,-17,18,4,-9,-8,1,-15,-3,-12,-11,4,-10,-2,-6,-2,-5,-13,-12,3,0; D=-4; MA /I: I=-4; DM=-20; MA /M: M=-5,-5,-19,-3,-2,-12,-2,-5,-12,-9,-12,-9,-3,-1,-6,-10,0,-5,-10,-21,-11,-5; D=-4; MA /I: I=-4; DM=-20; MA /M: SY='L'; M=-7,-10,-22,-12,-4,-5,-24,-15,3,-12,5,1,-10,-16,-10,-12,-9,-1,2,-23,-6,-8; MA /M: SY='B'; M=-1,7,-21,6,-3,-22,-2,-9,-18,-9,-19,-14,7,-6,-7,-12,7,3,-13,-32,-18,-6; MA /M: SY='G'; M=-3,2,-25,3,-4,-23,17,-13,-27,-8,-23,-17,4,-11,-10,-11,2,-6,-20,-26,-20,-7; MA /M: SY='Y'; M=-9,-20,-27,-24,-17,1,-17,-12,2,-15,-1,1,-15,-23,-13,-12,-15,-12,-1,3,4,-16; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='W'; M=-18,-32,-39,-34,-26,20,-22,-19,-12,-19,-11,-13,-30,-27,-20,-18,-28,-20,-20,92,33,-20; MA /M: SY='V'; M=-6,-17,-20,-17,-18,-3,-25,-22,13,-19,10,4,-19,-23,-20,-19,-12,-3,18,-21,-6,-19; MA /M: SY='G'; M=-2,-5,-24,-6,-9,-23,22,-13,-23,-12,-19,-11,1,-15,-7,-11,4,-6,-19,-26,-20,-9; MA /M: SY='N'; M=-7,5,-20,-3,-8,-8,-15,-4,-5,-9,-8,-7,12,-20,-7,-9,0,3,-8,-26,-3,-9; MA /M: SY='E'; M=-6,3,-24,3,7,-18,-12,-4,-15,-4,-12,-9,2,-14,5,-6,1,-3,-16,-24,-8,6; MA /M: SY='N'; M=-8,16,-22,11,-2,-17,-7,0,-18,-6,-18,-14,20,-15,-3,-7,4,1,-19,-30,-11,-3; MA /I: MD=-27; MA /M: M=-3,-8,-19,-9,-8,-12,-7,-10,-7,-9,-6,-3,-6,-12,-8,-10,-1,0,-3,-24,-10,-9; D=-5; MA /I: I=-6; MI=0; MD=-27; IM=0; DM=-27; MA /M: SY='D'; M=0,6,-13,8,1,-14,0,-1,-13,-3,-11,-8,3,-7,-1,-4,2,-3,-9,-16,-9,-1; D=-5; MA /I: DM=-27; MA /M: SY='A'; M=5,-10,-17,-13,-10,-4,-11,-2,-6,-10,-7,-4,-7,-8,-8,-12,-2,-4,-5,-9,3,-10; D=-5; MA /I: DM=-27; MA /M: SY='L'; M=-13,-21,-23,-23,-14,10,-27,-6,4,-16,17,7,-17,-24,-13,-6,-18,-8,0,-15,7,-14; D=-5; MA /I: DM=-27; MA /M: SY='W'; M=-12,-19,-28,-21,-15,5,-23,-17,-4,-10,-1,-3,-16,-23,-14,-2,-17,-10,-6,10,5,-15; MA /M: SY='W'; M=-12,-27,-37,-27,-24,1,0,-18,-16,-18,-13,-12,-23,-26,-18,-17,-23,-20,-21,62,17,-20; MA /M: SY='F'; M=-8,-24,-21,-32,-24,25,-28,-22,15,-25,11,6,-16,-22,-24,-21,-11,-2,10,-12,7,-23; MA /M: SY='F'; M=-11,-29,-20,-34,-28,26,-32,-20,21,-26,15,9,-23,-27,-28,-21,-18,-7,21,-12,10,-28; MA /M: SY='L'; M=-7,-18,-23,-19,-13,-1,-25,-7,3,-9,10,5,-16,-23,-8,-5,-16,-9,1,-15,6,-12; MA /M: SY='G'; M=12,-12,-21,-17,-17,-17,25,-21,-18,-17,-14,-11,-7,-18,-18,-19,2,-5,-10,-21,-20,-17; MA /M: SY='P'; M=-10,-20,-39,-10,-1,-28,-20,-19,-19,-10,-29,-19,-20,85,-10,-20,-10,-9,-29,-29,-27,-10; MA /M: SY='I'; M=-6,-27,-21,-31,-23,7,-29,-22,23,-23,22,14,-23,-25,-20,-18,-18,-7,19,-20,-1,-23; MA /M: SY='L'; M=-6,-24,1,-28,-23,5,-20,-23,3,-24,8,3,-20,-27,-22,-21,-13,-6,6,-15,-5,-22; MA /M: SY='V'; M=1,-22,-16,-27,-22,6,-16,-23,9,-22,9,4,-18,-23,-22,-20,-9,-2,12,-19,-5,-22; MA /M: SY='V'; M=6,-16,-13,-20,-16,-4,-18,-13,4,-15,-4,-3,-14,-20,-14,-18,-1,-1,7,-16,5,-17; MA /M: SY='I'; M=-10,-30,-24,-34,-25,9,-32,-25,27,-28,27,15,-26,-26,-21,-23,-23,-10,17,-8,3,-25; MA /M: SY='L'; M=-1,-24,-16,-28,-23,7,-17,-22,11,-24,15,7,-21,-25,-22,-21,-13,-7,12,-19,-4,-22; MA /M: SY='I'; M=-5,-27,-18,-32,-26,1,-32,-24,31,-23,17,15,-23,-25,-21,-23,-16,-5,28,-21,-1,-26; MA /M: SY='N'; M=-4,19,-23,7,-7,-24,25,-2,-27,-8,-30,-20,34,-19,-7,-8,8,-6,-28,-33,-24,-7; MA /M: SY='F'; M=1,-17,-16,-23,-19,11,-12,-20,-1,-20,4,-1,-13,-21,-20,-17,-3,5,3,-17,-3,-18; MA /M: SY='I'; M=-7,-24,-15,-28,-23,12,-25,-20,15,-23,11,7,-19,-25,-22,-20,-10,-3,15,-19,2,-23; MA /M: SY='F'; M=-14,-28,-20,-35,-26,40,-30,-21,13,-28,23,10,-22,-28,-29,-20,-21,-7,8,-7,13,-25; MA /M: SY='L'; M=-12,-29,-21,-32,-24,27,-29,-19,16,-28,29,12,-25,-29,-25,-20,-24,-10,10,-10,12,-24; MA /M: SY='L'; M=-7,-26,-18,-30,-25,8,-22,-23,18,-25,19,12,-22,-26,-22,-21,-17,-6,15,-20,-2,-24; MA /M: SY='A'; M=2,-7,-9,-14,-11,-14,-12,-12,-9,-8,-6,-5,-2,-20,-6,-4,-3,-4,-7,-24,-13,-9; MA /M: SY='G'; M=-1,-17,-23,-22,-22,-13,6,-24,2,-21,-6,-2,-10,-19,-19,-21,-3,-2,5,-23,-15,-22; MA /M: SY='I'; M=-7,-27,-20,-33,-25,19,-29,-23,21,-25,16,12,-21,-25,-24,-21,-15,-6,18,-16,3,-25; MA /M: SY='R'; M=-10,-18,-16,-20,-14,-11,-22,-13,-8,2,-10,-3,-14,-24,-8,13,-13,-9,0,-7,-5,-13; MA /M: SY='V'; M=0,-15,-18,-19,-15,-7,-20,-16,10,-12,-3,4,-10,-18,-10,-14,1,2,11,-24,-4,-14; MA /M: SY='L'; M=-8,-26,-19,-28,-19,6,-27,-18,17,-25,37,20,-25,-27,-17,-18,-22,-4,11,-22,-2,-18; MA /M: SY='F'; M=-11,-23,-9,-29,-23,18,-28,-16,6,-18,6,3,-18,-27,-22,-12,-15,-7,7,-9,10,-23; MA /M: SY='R'; M=-5,-2,-22,-5,1,-23,-13,0,-22,9,-21,-10,4,-15,11,19,6,3,-16,-26,-12,5; MA /M: SY='K'; M=-5,-14,-26,-17,-8,-15,-25,-16,3,8,-7,2,-10,-16,-5,1,-11,-8,5,-22,-7,-8; MA /M: SY='R'; M=-13,-16,-25,-17,-8,-8,-25,-11,-4,2,5,4,-12,-21,-3,18,-15,-8,-4,-21,-6,-8; MA /M: SY='R'; M=-8,-4,-23,-6,-1,-20,-11,0,-22,8,-22,-11,4,-15,5,20,6,1,-16,-25,-11,0; MA /M: SY='S'; M=-2,-9,-16,-12,-4,-12,-17,-10,-4,-9,-10,-4,-6,-11,-6,-11,1,-1,-1,-27,-10,-6; MA /M: SY='M'; M=0,-10,-22,-14,-11,-11,-13,-12,-1,-12,-6,1,-7,-9,-9,-13,-2,-1,-2,-24,-9,-11; MA /M: SY='K'; M=-11,4,-26,3,2,-20,-18,0,-17,9,-19,-9,6,-10,6,6,-4,-6,-15,-25,-8,3; MA /M: SY='T'; M=-4,-5,-23,-7,-2,-18,-10,-1,-13,-6,-11,-4,-3,-11,0,-7,0,1,-12,-26,-10,-2; MA /M: SY='D'; M=-6,3,-24,7,2,-25,-3,-7,-22,-1,-16,-10,0,-13,1,-1,-3,-8,-17,-27,-16,1; MA /M: SY='G'; M=-5,-4,-21,-5,0,-21,7,-4,-25,-3,-19,-12,0,-17,-4,-5,-2,-10,-20,-22,-14,-3; MA /M: SY='S'; M=-2,3,-18,0,0,-22,-6,-7,-23,1,-22,-14,6,-11,-1,1,10,10,-16,-29,-15,-1; MA /M: SY='D'; M=-11,11,-27,14,10,-25,-17,-2,-23,9,-21,-14,7,-9,3,7,-2,-4,-18,-29,-13,5; MA /M: SY='T'; M=-6,-9,-20,-12,-7,-13,-18,-12,-5,-3,-6,-1,-8,-17,0,-2,-1,6,-1,-23,-6,-4; MA /I: I=-3; MI=0; MD=-11; IM=0; DM=-11; MA /M: SY='L'; M=-2,-6,-12,-7,-2,-5,-9,-7,-3,-4,1,0,-5,-10,-3,-4,-3,-2,-3,-13,-5,-2; D=-2; MA /I: DM=-11; MA /M: SY='M'; M=-5,-4,-15,-5,0,-10,-10,-3,-3,-1,-3,6,-4,-5,3,-2,-5,-4,-5,-14,-6,1; D=-2; MA /I: DM=-11; MA /M: SY='Y'; M=-7,-17,-22,-19,-13,4,-24,-1,3,-10,5,4,-14,-20,-10,-10,-12,-6,0,-8,17,-14; D=-2; MA /I: DM=-11; MA /M: SY='R'; M=-10,-13,-23,-15,-7,-10,-18,-10,-13,9,-7,-1,-8,-18,-2,20,-10,-8,-10,-12,-6,-6; D=-2; MA /I: DM=-11; MA /M: SY='R'; M=-6,-9,-19,-13,-8,-16,-19,-13,-6,4,-10,-2,-5,-18,-3,8,-4,-2,-2,-25,-11,-7; MA /M: SY='L'; M=1,-17,-24,-18,-15,-8,-1,-16,-8,-16,2,-2,-15,-19,-14,-15,-11,-9,-8,-8,-5,-15; MA /M: SY='V'; M=7,-17,-16,-22,-16,-7,-20,-19,9,-16,7,3,-14,-21,-12,-15,-5,0,13,-24,-10,-15; MA /M: SY='R'; M=-14,-13,-23,-17,-8,6,-20,-12,-17,9,-12,-7,-7,-20,-8,13,-12,-9,-13,-9,1,-7; MA /M: SY='S'; M=10,-7,-16,-9,-5,-19,0,-16,-18,-6,-22,-15,-2,-11,-6,-10,14,5,-10,-20,-16,-6; MA /M: SY='T'; M=4,-13,-16,-19,-16,-7,-12,-21,1,-18,3,-1,-11,-18,-15,-17,2,13,5,-25,-10,-16; MA /M: SY='L'; M=-9,-26,-19,-30,-21,11,-29,-17,18,-25,29,18,-24,-23,-18,-20,-22,-9,9,-16,4,-21; MA /M: SY='Y'; M=-2,-14,-19,-20,-18,4,-20,-10,6,-16,6,4,-11,-23,-15,-15,-9,0,5,-13,10,-17; MA /M: SY='L'; M=-9,-23,-18,-24,-16,2,-27,-16,9,-20,24,9,-22,-20,-14,-13,-18,-3,5,-20,0,-16; MA /M: SY='I'; M=-7,-28,-21,-32,-25,5,-32,-24,29,-26,26,16,-25,-26,-22,-22,-20,-6,24,-20,0,-25; MA /M: SY='P'; M=-10,-21,-29,-18,-9,-8,-20,-20,-8,-16,-13,-10,-18,37,-16,-20,-11,-8,-14,-23,-14,-15; MA /M: SY='L'; M=-3,-26,-18,-28,-20,4,-27,-21,19,-25,32,14,-25,-26,-19,-20,-19,-5,15,-21,-2,-20; MA /M: SY='F'; M=-9,-27,-19,-31,-22,25,-29,-21,14,-26,25,10,-24,-27,-25,-20,-19,-5,11,-14,6,-22; MA /M: SY='G'; M=0,-14,-19,-16,-20,-20,32,-21,-20,-21,-14,-10,-8,-20,-19,-20,-3,-12,-13,-23,-22,-20; MA /M: SY='V'; M=0,-23,-12,-28,-23,-3,-27,-24,21,-21,8,7,-19,-23,-19,-21,-8,-2,24,-26,-8,-22; MA /M: SY='T'; M=-7,-9,-7,-14,-11,-13,-21,3,-9,-14,-9,-4,-5,-16,-6,-13,-2,5,-8,-20,-4,-9; MA /M: SY='W'; M=-4,-19,-27,-23,-17,7,-19,-11,-10,-14,-10,-10,-18,-24,-12,-15,-16,-13,-14,42,22,-14; MA /M: SY='V'; M=-5,-25,-5,-31,-25,3,-27,-25,18,-22,10,10,-22,-26,-23,-21,-14,-5,21,-24,-6,-25; MA /M: SY='F'; M=-9,-25,-21,-28,-23,17,-19,-14,9,-23,14,10,-21,-24,-22,-18,-16,-9,8,-13,6,-22; MA /M: SY='F'; M=-1,-20,-17,-27,-21,21,-10,-19,-6,-21,-1,-2,-15,-20,-23,-18,-8,-5,-2,-7,2,-20; MA /M: SY='I'; M=-2,-21,-19,-26,-20,7,-25,-16,11,-19,8,5,-17,-18,-18,-17,-12,-4,8,-13,7,-20; MA /M: SY='F'; M=-12,-18,-18,-21,-11,17,-26,-14,1,-17,5,-1,-15,-22,-16,-15,-14,-8,-1,-6,8,-13; MA /M: M=-4,-8,-23,-13,-7,-7,-15,-10,-9,-6,-10,-6,-2,-16,-2,0,-2,-2,-9,-17,-6,-5; MA /M: SY='P'; M=-5,-18,-26,-19,-11,-9,-22,-16,-3,-10,-9,-5,-15,11,-11,-9,-9,-5,-4,-19,-10,-13; MA /M: SY='H'; M=-10,4,-26,3,1,-13,-11,6,-16,-10,-15,-10,6,-16,-4,-10,-2,-8,-16,-22,-5,-2; MA /M: SY='D'; M=-8,10,-25,11,5,-24,-8,-2,-22,-1,-20,-13,8,-7,0,-1,2,-4,-18,-31,-16,1; MA /I: I=-2; MD=-10; MA /M: SY='D'; M=-9,3,-20,4,0,-11,-12,-4,-18,-4,-17,-12,3,-10,-6,-3,-1,-5,-14,-25,-10,-3; D=-2; MA /I: I=-2; MD=-10; MA /M: SY='S'; M=-2,-7,-21,-10,-7,-13,-5,-7,-11,-4,-12,-4,-2,-16,-5,-2,1,-3,-8,-22,-9,-7; D=-2; MA /I: I=-2; MD=-10; MA /M: SY='W'; M=-4,-13,-21,-13,-6,-6,-12,-13,-8,-10,-2,-5,-13,-16,-7,-8,-8,-5,-9,8,-3,-5; D=-2; MA /I: I=-2; MD=-10; MA /M: SY='G'; M=0,-2,-18,-3,1,-14,2,-7,-14,-5,-13,-9,2,-5,-2,-7,1,-5,-14,-18,-12,-1; D=-2; MA /I: I=-2; MI=0; MD=-10; IM=0; DM=-10; MA /M: SY='F'; M=-6,-11,-16,-11,-4,2,-12,-8,-6,-5,-6,-5,-8,1,-6,-3,-6,-5,-8,-1,-1,-5; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='M'; M=-4,-11,-16,-14,-7,-2,-17,-7,2,-6,2,9,-10,-11,-4,-6,-7,-2,1,-15,-2,-5; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='V'; M=1,-23,-14,-26,-21,-2,-24,-19,15,-19,9,6,-21,-23,-18,-19,-11,-4,17,-13,-2,-20; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='Y'; M=-14,-15,-26,-17,-7,6,-24,-3,-10,3,-4,1,-12,-17,-5,4,-15,-11,-10,-6,10,-7; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='M'; M=-6,-9,-22,-13,-11,-4,-20,-2,2,-12,2,7,-7,-19,-5,-10,-8,-4,-2,-17,5,-10; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='Y'; M=-14,-23,-21,-26,-22,23,-27,-4,6,-19,6,3,-20,-25,-20,-16,-18,-9,3,4,31,-22; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='L'; M=-9,-21,-17,-25,-20,12,-23,-16,10,-21,18,15,-20,-21,-18,-17,-16,-4,7,-17,1,-18; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='E'; M=-9,-3,-17,-4,3,2,-19,-1,-17,-7,-11,-8,-2,-16,-5,-6,-3,-3,-14,-20,-2,0; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='L'; M=4,-14,-16,-19,-13,-4,-18,-12,2,-17,10,4,-12,-19,-11,-15,-4,5,2,-23,-6,-12; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='I'; M=-4,-19,-18,-25,-21,7,-19,-18,11,-20,4,3,-14,-22,-18,-19,-8,-3,10,-16,2,-20; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='L'; M=-7,-23,-14,-26,-20,14,-27,-20,12,-23,20,9,-21,-26,-21,-18,-15,-4,12,-18,0,-20; D=-2; MA /I: I=-2; DM=-10; MA /M: SY='N'; M=-1,4,-22,-1,-6,-21,12,-7,-21,-9,-21,-13,13,-17,-3,-9,8,-3,-19,-28,-17,-5; MA /M: SY='S'; M=2,-9,-4,-12,-8,-16,-13,-9,-7,-13,-15,-7,-3,-17,-7,-14,11,4,-2,-33,-14,-9; MA /M: SY='F'; M=-10,-21,-19,-27,-20,27,-25,-19,4,-23,13,2,-17,-24,-23,-17,-10,3,2,-8,9,-20; MA /M: SY='Q'; M=-6,-2,-26,-2,13,-29,-17,10,-18,3,-18,-2,0,-12,35,5,3,-5,-22,-24,-9,24; MA /M: SY='G'; M=-1,-8,-29,-9,-17,-29,60,-18,-38,-17,-29,-19,2,-19,-16,-16,1,-17,-29,-21,-28,-17; MA /M: SY='F'; M=-10,-27,-20,-33,-25,38,-25,-20,8,-26,13,6,-21,-24,-29,-20,-17,-8,7,-7,11,-25; MA /M: SY='F'; M=-15,-29,-24,-35,-27,39,-28,-20,4,-24,6,2,-24,-28,-28,-19,-20,-8,4,22,21,-25; MA /M: SY='V'; M=-1,-24,-19,-29,-25,-3,-29,-25,26,-20,8,9,-20,-21,-21,-20,-10,-4,29,-25,-6,-25; MA /M: SY='A'; M=9,-16,-12,-21,-13,0,-13,-16,-13,-16,-12,-11,-13,-17,-14,-17,0,-3,-8,2,-3,-13; MA /M: SY='V'; M=-4,-22,-18,-25,-20,2,-26,-21,17,-20,11,8,-18,-23,-18,-18,-7,1,19,-24,-3,-20; MA /M: SY='L'; M=-6,-21,-20,-26,-21,10,-17,-21,9,-24,13,6,-17,-23,-21,-20,-12,-4,7,-18,-2,-21; MA /M: SY='Y'; M=-17,-14,-28,-15,-13,14,-25,19,-10,-5,-7,-2,-12,-25,-8,-5,-15,-11,-13,8,40,-13; MA /M: SY='C'; M=-7,-18,64,-26,-25,-15,-25,-27,-15,-23,-13,-12,-16,-31,-24,-24,-5,0,0,-41,-22,-25; MA /M: SY='F'; M=-12,-25,-20,-30,-23,35,-28,-16,8,-23,13,4,-21,-26,-26,-18,-18,-8,8,-5,18,-23; MA /M: SY='L'; M=-5,-11,-11,-17,-10,0,-20,-9,-1,-17,9,3,-7,-22,-11,-14,-9,-3,-4,-24,-6,-10; MA /M: SY='N'; M=-4,24,-19,12,2,-21,-4,5,-21,0,-27,-18,38,-16,2,-1,13,3,-23,-36,-18,2; MA /I: E1=0; NR /RELEASE=40.7,103373; NR /TOTAL=128(128); /POSITIVE=128(128); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=0; /PARTIAL=0; CC /MATRIX_TYPE=protein_domain; CC /SCALING_DB=reversed; CC /AUTHOR=K_Hofmann; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR O14514, BAI1_HUMAN, T; O60241, BAI2_HUMAN, T; O60242, BAI3_HUMAN, T; DR O08893, CALR_CAVPO, T; P30988, CALR_HUMAN, T; Q60755, CALR_MOUSE, T; DR P25117, CALR_PIG , T; P79222, CALR_RABIT, T; P32214, CALR_RAT , T; DR P13773, CAR1_DICDI, T; P34907, CAR2_DICDI, T; P35352, CAR3_DICDI, T; DR Q9TX43, CAR4_DICDI, T; P48960, CD97_HUMAN, T; Q16602, CGRR_HUMAN, T; DR Q63118, CGRR_RAT , T; Q90812, CRF1_CHICK, T; P34998, CRF1_HUMAN, T; DR P35347, CRF1_MOUSE, T; P35353, CRF1_RAT , T; O62772, CRF1_SHEEP, T; DR O42602, CRF1_XENLA, T; Q13324, CRF2_HUMAN, T; Q60748, CRF2_MOUSE, T; DR P47866, CRF2_RAT , T; O42603, CRF2_XENLA, T; Q16983, DIHR_ACHDO, T; DR P35464, DIHR_MANSE, T; Q14246, EMR1_HUMAN, T; Q61549, EMR1_MOUSE, T; DR P18537, FRIZ_DROME, T; Q9DEB5, FZ0A_XENLA, T; Q9W742, FZ0B_XENLA, T; DR Q9PWH2, FZ10_CHICK, T; Q9ULW2, FZ10_HUMAN, T; O57328, FZD1_CHICK, T; DR Q9UP38, FZD1_HUMAN, T; O70421, FZD1_MOUSE, T; Q08463, FZD1_RAT , T; DR Q9I9M5, FZD1_XENLA, T; Q9IA06, FZD2_CHICK, T; Q14332, FZD2_HUMAN, T; DR Q9JIP6, FZD2_MOUSE, T; Q08464, FZD2_RAT , T; Q9PUU6, FZD2_XENLA, T; DR Q9PTW3, FZD3_CHICK, T; Q9NPG1, FZD3_HUMAN, T; Q61086, FZD3_MOUSE, T; DR O42579, FZD3_XENLA, T; Q9IA05, FZD4_CHICK, T; Q9ULV1, FZD4_HUMAN, T; DR Q61088, FZD4_MOUSE, T; Q9QZH0, FZD4_RAT , T; Q9PT62, FZD4_XENLA, T; DR Q13467, FZD5_HUMAN, T; Q9EQD0, FZD5_MOUSE, T; P58421, FZD5_XENLA, T; DR Q9PTW1, FZD6_CHICK, T; O60353, FZD6_HUMAN, T; Q61089, FZD6_MOUSE, T; DR O57329, FZD7_CHICK, T; O75084, FZD7_HUMAN, T; Q61090, FZD7_MOUSE, T; DR Q9PUK8, FZD7_XENLA, T; Q9IA03, FZD8_CHICK, T; Q9H461, FZD8_HUMAN, T; DR Q61091, FZD8_MOUSE, T; O93274, FZD8_XENLA, T; Q9IA02, FZD9_CHICK, T; DR O00144, FZD9_HUMAN, T; Q9R216, FZD9_MOUSE, T; P48546, GIPR_HUMAN, T; DR P43218, GIPR_MESAU, T; P43219, GIPR_RAT , T; P43220, GLP1_HUMAN, T; DR O35659, GLP1_MOUSE, T; P32301, GLP1_RAT , T; O95838, GLP2_HUMAN, T; DR Q9Z0W0, GLP2_RAT , T; P47871, GLR_HUMAN , T; Q61606, GLR_MOUSE , T; DR P30082, GLR_RAT , T; Q02643, GRFR_HUMAN, T; P32082, GRFR_MOUSE, T; DR P34999, GRFR_PIG , T; Q02644, GRFR_RAT , T; Q9VXD9, MTH1_DROME, T; DR Q9VRN2, MTH2_DROME, T; Q9V818, MTH3_DROME, T; Q9V817, MTH4_DROME, T; DR Q9VGG8, MTH5_DROME, T; Q9VSE7, MTH7_DROME, T; O97148, MTH_DROME , T; DR P83120, MTH_DROSI , T; Q9GT50, MTH_DROYA , T; Q9W0R5, MTHA_DROME, T; DR P83118, MTHB_DROME, T; P83119, MTHC_DROME, T; Q29627, PACR_BOVIN, T; DR P41586, PACR_HUMAN, T; P70205, PACR_MOUSE, T; P32215, PACR_RAT , T; DR P70555, PTH2_RAT , T; P49190, PTR2_HUMAN, T; P25107, PTRR_DIDMA, T; DR Q03431, PTRR_HUMAN, T; P41593, PTRR_MOUSE, T; P50133, PTRR_PIG , T; DR P25961, PTRR_RAT , T; P47872, SCRC_HUMAN, T; O46502, SCRC_RABIT, T; DR P23811, SCRC_RAT , T; O42224, SMO_CHICK , T; P91682, SMO_DROME , T; DR Q99835, SMO_HUMAN , T; P56726, SMO_MOUSE , T; P97698, SMO_RAT , T; DR Q90308, VIPR_CARAU, T; P32241, VIPR_HUMAN, T; Q91085, VIPR_MELGA, T; DR P97751, VIPR_MOUSE, T; Q28992, VIPR_PIG , T; P30083, VIPR_RAT , T; DR P41587, VIPS_HUMAN, T; P41588, VIPS_MOUSE, T; P35000, VIPS_RAT , T; DR P30650, YOW3_CAEEL, T; Q09460, YQ44_CAEEL, T; DO PDOC00559; // ID G_PROTEIN_RECEP_F3_1; PATTERN. AC PS00979; DT JUN-1994 (CREATED); NOV-1997 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 3 signature 1. PA [LV]-x-N-[LIVM](2)-x-L-F-x-I-[PA]-Q-[LIVM]-[STA]-x-[STA](3)-[STAN]. NR /RELEASE=40.7,103373; NR /TOTAL=23(23); /POSITIVE=23(23); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=5; /PARTIAL=0; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR P35384, CASR_BOVIN, T; P41180, CASR_HUMAN, T; Q9QY96, CASR_MOUSE, T; DR P48442, CASR_RAT , T; Q09630, MGR1_CAEEL, T; Q13255, MGR1_HUMAN, T; DR P23385, MGR1_RAT , T; Q14416, MGR2_HUMAN, T; P31421, MGR2_RAT , T; DR Q14832, MGR3_HUMAN, T; P31422, MGR3_RAT , T; Q14833, MGR4_HUMAN, T; DR P31423, MGR4_RAT , T; P41594, MGR5_HUMAN, T; P31424, MGR5_RAT , T; DR O15303, MGR6_HUMAN, T; P35349, MGR6_RAT , T; Q14831, MGR7_HUMAN, T; DR P35400, MGR7_RAT , T; O00222, MGR8_HUMAN, T; P47743, MGR8_MOUSE, T; DR P70579, MGR8_RAT , T; P91685, MGR_DROME , T; DR Q9UBS5, GBR1_HUMAN, N; Q9WV18, GBR1_MOUSE, N; Q9Z0U4, GBR1_RAT , N; DR O75899, GBR2_HUMAN, N; O88871, GBR2_RAT , N; DO PDOC00754; // ID G_PROTEIN_RECEP_F3_2; PATTERN. AC PS00980; DT JUN-1994 (CREATED); NOV-1997 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 3 signature 2. PA C-C-[FYW]-x-C-x(2)-C-x(4)-[FYW]-x(2,4)-[DN]-x(2)-[STAH]-C-x(2)-C. NR /RELEASE=40.7,103373; NR /TOTAL=23(23); /POSITIVE=23(23); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=5; /PARTIAL=0; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR P35384, CASR_BOVIN, T; P41180, CASR_HUMAN, T; Q9QY96, CASR_MOUSE, T; DR P48442, CASR_RAT , T; Q09630, MGR1_CAEEL, T; Q13255, MGR1_HUMAN, T; DR P23385, MGR1_RAT , T; Q14416, MGR2_HUMAN, T; P31421, MGR2_RAT , T; DR Q14832, MGR3_HUMAN, T; P31422, MGR3_RAT , T; Q14833, MGR4_HUMAN, T; DR P31423, MGR4_RAT , T; P41594, MGR5_HUMAN, T; P31424, MGR5_RAT , T; DR O15303, MGR6_HUMAN, T; P35349, MGR6_RAT , T; Q14831, MGR7_HUMAN, T; DR P35400, MGR7_RAT , T; O00222, MGR8_HUMAN, T; P47743, MGR8_MOUSE, T; DR P70579, MGR8_RAT , T; P91685, MGR_DROME , T; DR Q9UBS5, GBR1_HUMAN, N; Q9WV18, GBR1_MOUSE, N; Q9Z0U4, GBR1_RAT , N; DR O75899, GBR2_HUMAN, N; O88871, GBR2_RAT , N; DO PDOC00754; // ID G_PROTEIN_RECEP_F3_3; PATTERN. AC PS00981; DT JUN-1994 (CREATED); JUL-1998 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 3 signature 3. PA F-N-E-[STA]-K-x-I-[STAG]-F-[ST]-M. NR /RELEASE=40.7,103373; NR /TOTAL=23(23); /POSITIVE=23(23); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=5; /PARTIAL=0; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR P35384, CASR_BOVIN, T; P41180, CASR_HUMAN, T; Q9QY96, CASR_MOUSE, T; DR P48442, CASR_RAT , T; Q09630, MGR1_CAEEL, T; Q13255, MGR1_HUMAN, T; DR P23385, MGR1_RAT , T; Q14416, MGR2_HUMAN, T; P31421, MGR2_RAT , T; DR Q14832, MGR3_HUMAN, T; P31422, MGR3_RAT , T; Q14833, MGR4_HUMAN, T; DR P31423, MGR4_RAT , T; P41594, MGR5_HUMAN, T; P31424, MGR5_RAT , T; DR O15303, MGR6_HUMAN, T; P35349, MGR6_RAT , T; Q14831, MGR7_HUMAN, T; DR P35400, MGR7_RAT , T; O00222, MGR8_HUMAN, T; P47743, MGR8_MOUSE, T; DR P70579, MGR8_RAT , T; P91685, MGR_DROME , T; DR Q9UBS5, GBR1_HUMAN, N; Q9WV18, GBR1_MOUSE, N; Q9Z0U4, GBR1_RAT , N; DR O75899, GBR2_HUMAN, N; O88871, GBR2_RAT , N; DO PDOC00754; // ID G_PROTEIN_RECEP_F3_4; MATRIX. AC PS50259; DT DEC-2001 (CREATED); DEC-2001 (DATA UPDATE); DEC-2001 (INFO UPDATE). DE G-protein coupled receptors family 3 profile. MA /GENERAL_SPEC: ALPHABET='ABCDEFGHIKLMNPQRSTVWYZ'; LENGTH=265; MA /DISJOINT: DEFINITION=PROTECT; N1=6; N2=260; MA /NORMALIZATION: MODE=1; FUNCTION=LINEAR; R1=0.7772; R2=0.01251611; TEXT='-LogE'; MA /CUT_OFF: LEVEL=0; SCORE=617; N_SCORE=8.5; MODE=1; TEXT='!'; MA /CUT_OFF: LEVEL=-1; SCORE=457; N_SCORE=6.5; MODE=1; TEXT='?'; MA /DEFAULT: D=-20; I=-20; B1=-50; E1=-50; MI=-105; MD=-105; IM=-105; DM=-105; MA /I: B1=0; BI=-105; BD=-105; MA /M: SY='W'; M=-13,-27,-30,-26,-16,9,-20,-19,-2,-22,11,-1,-26,-15,-18,-18,-25,-15,-9,19,7,-16; MA /M: SY='G'; M=10,-7,-22,-9,-10,-24,20,2,-26,-14,-23,-14,0,-9,-8,-15,7,-8,-19,-26,-17,-10; MA /M: SY='M'; M=-4,-14,-19,-22,-17,-5,-22,-14,15,-15,7,16,-9,-20,-11,-15,-7,-2,11,-26,-7,-15; MA /M: SY='L'; M=6,-20,-20,-24,-19,-4,-10,-18,6,-20,9,3,-16,-21,-16,-20,-9,-5,5,-16,-2,-19; MA /M: SY='P'; M=0,-22,-25,-20,-14,-12,-12,-21,-2,-19,5,-2,-21,11,-17,-20,-13,-9,-4,-24,-16,-17; MA /M: SY='V'; M=1,-19,-13,-25,-19,-4,-21,-22,12,-21,12,7,-17,-21,-17,-19,-7,4,13,-25,-8,-19; MA /M: SY='L'; M=-1,-23,-12,-28,-21,7,-25,-23,14,-23,16,7,-20,-24,-21,-20,-11,0,15,-21,-4,-21; MA /M: SY='L'; M=-5,-25,-21,-31,-22,11,-27,-18,19,-24,26,20,-22,-24,-18,-20,-19,-7,11,-17,1,-20; MA /M: SY='A'; M=37,-10,-11,-18,-9,-17,-3,-18,-9,-12,-7,-9,-9,-12,-9,-18,11,2,-1,-23,-18,-9; MA /M: SY='V'; M=-7,-28,2,-32,-27,2,-31,-26,20,-26,18,12,-26,-30,-25,-23,-18,-6,25,-27,-7,-27; MA /M: SY='C'; M=-6,-21,9,-25,-22,3,-20,-23,-1,-24,8,-1,-18,-27,-23,-20,-9,2,6,-26,-9,-22; MA /M: SY='G'; M=-6,-19,-8,-22,-24,1,19,-22,-18,-24,-9,-9,-12,-26,-26,-21,-9,-14,-10,-18,-13,-24; MA /M: SY='I'; M=-2,-22,-21,-27,-20,3,-25,-22,19,-24,12,7,-15,-20,-16,-22,-7,-3,12,-22,-3,-20; MA /M: SY='V'; M=10,-19,-16,-25,-18,-3,-18,-20,11,-18,7,7,-16,-19,-15,-19,-3,-2,13,-23,-8,-17; MA /M: SY='L'; M=4,-23,-9,-28,-21,8,-23,-23,10,-22,15,5,-22,-24,-22,-20,-11,-2,14,-20,-4,-21; MA /M: SY='T'; M=0,1,-11,-8,-10,-11,-18,-17,-8,-10,-11,-10,4,-13,-10,-10,17,38,1,-32,-12,-10; MA /M: SY='A'; M=12,-18,-11,-25,-17,-8,-15,-21,7,-19,7,5,-15,-18,-14,-20,-5,-1,7,-23,-11,-17; MA /M: SY='F'; M=-7,-25,-20,-32,-25,31,-28,-18,12,-23,11,7,-20,-25,-25,-20,-15,-5,11,-5,16,-25; MA /M: SY='V'; M=3,-19,-13,-23,-24,-5,-24,-24,20,-17,3,4,-16,-25,-23,-18,-5,2,32,-29,-11,-24; MA /M: SY='L'; M=-5,-24,-18,-28,-21,1,-27,-21,22,-23,23,15,-21,-24,-17,-19,-13,-2,18,-25,-5,-20; MA /M: SY='A'; M=2,-17,-17,-20,-18,-11,-1,-10,-5,-19,-2,-2,-13,-22,-16,-18,-7,-7,1,-19,-10,-18; MA /M: SY='V'; M=-2,-26,-13,-28,-27,-1,-30,-28,27,-20,10,9,-24,-26,-25,-20,-8,6,38,-28,-8,-27; MA /M: SY='F'; M=-19,-27,-22,-35,-25,60,-29,-16,-1,-23,10,1,-19,-29,-31,-10,-20,-10,-2,5,25,-25; MA /M: SY='V'; M=-3,-28,-17,-30,-26,4,-30,-22,27,-21,15,13,-26,-27,-23,-20,-15,-4,31,-20,3,-26; MA /M: SY='R'; M=-12,-13,-28,-15,-5,-15,-26,-13,-6,17,-5,1,-9,-17,-1,19,-15,-10,-5,-20,-6,-5; MA /M: SY='H'; M=-16,-6,-27,-8,-7,-4,-22,46,-14,-9,-12,-1,1,-23,3,-4,-10,-13,-16,-14,25,-5; MA /M: SY='N'; M=-14,15,-26,6,3,-23,-12,17,-25,15,-26,-12,27,-18,8,21,-1,-7,-26,-30,-10,3; MA /M: SY='D'; M=-9,24,-23,31,12,-27,-10,-3,-26,-3,-20,-20,15,-13,0,-7,5,-3,-21,-36,-18,6; MA /M: SY='T'; M=-3,-2,-14,-10,-8,-9,-21,-13,-10,-8,-10,-8,-2,-12,-3,-8,14,38,-4,-23,-1,-6; MA /M: SY='P'; M=-10,-21,-38,-12,-2,-26,-21,-20,-16,-12,-22,-16,-21,78,-11,-20,-12,-10,-26,-29,-27,-11; MA /M: SY='I'; M=-8,-18,-22,-26,-22,-4,-29,-21,24,-17,9,9,-11,-22,-16,-12,-12,-3,19,-25,-6,-22; MA /M: SY='V'; M=-1,-30,-11,-30,-29,1,-30,-29,29,-21,14,11,-30,-30,-29,-20,-12,-1,46,-29,-9,-29; MA /M: SY='K'; M=-10,0,-27,-2,4,-25,-19,-9,-23,35,-25,-9,3,-14,5,27,-8,-8,-14,-23,-11,4; MA /M: SY='A'; M=33,-2,-13,-11,-6,-21,-2,-15,-14,-3,-17,-12,1,-11,-6,-12,12,1,-6,-25,-19,-6; MA /M: SY='N'; M=-1,9,-16,1,-5,-17,1,-7,-17,-10,-19,-15,20,-16,-5,-9,17,10,-15,-35,-18,-5; MA /M: SY='N'; M=-4,16,-21,5,-8,-21,17,-5,-24,-8,-27,-18,29,-18,-8,-8,10,4,-24,-32,-21,-8; MA /M: SY='R'; M=-10,-9,-23,-11,0,-22,-18,-2,-25,19,-17,-8,-3,-18,13,45,-7,-9,-18,-21,-11,4; MA /M: SY='E'; M=1,-1,-21,1,21,-20,-17,-11,-13,-3,-14,-12,-4,-8,3,-9,7,3,-8,-30,-17,12; MA /M: SY='L'; M=-9,-28,-21,-28,-20,6,-19,-20,14,-29,41,16,-27,-29,-20,-20,-27,-11,6,-20,-3,-20; MA /M: SY='S'; M=4,-6,20,-9,-10,-21,-1,-16,-24,-16,-27,-20,1,-18,-10,-16,23,10,-12,-40,-23,-10; MA /M: SY='Y'; M=-16,-19,-28,-20,-19,22,-28,12,3,-12,-1,0,-17,-27,-10,-12,-14,-7,-6,18,61,-19; MA /M: SY='I'; M=-5,-23,-20,-29,-22,-1,-30,-25,26,-24,15,11,-18,-21,-18,-22,-9,2,21,-25,-5,-22; MA /M: SY='L'; M=-4,-28,-20,-30,-20,6,-28,-21,20,-28,40,17,-27,-27,-19,-21,-25,-9,11,-20,-2,-20; MA /M: SY='L'; M=-10,-30,-20,-30,-20,10,-30,-20,21,-30,49,20,-30,-30,-20,-20,-30,-10,11,-20,0,-20; MA /M: SY='I'; M=3,-20,-21,-29,-21,4,-20,-22,15,-21,6,7,-14,-18,-17,-22,-7,-2,11,-18,-3,-20; MA /M: SY='G'; M=9,-6,-19,-9,-12,-23,29,-18,-26,-15,-24,-17,1,-15,-12,-16,14,3,-16,-26,-23,-12; MA /M: SY='I'; M=-8,-29,-21,-33,-25,4,-33,-24,31,-27,31,20,-26,-26,-21,-23,-23,-8,24,-22,-2,-25; MA /M: SY='F'; M=-12,-25,-22,-32,-25,26,-26,-21,14,-22,13,11,-19,-25,-24,-18,-17,-7,11,-11,7,-24; MA /M: SY='L'; M=-7,-26,-14,-29,-21,12,-27,-21,10,-24,16,8,-22,-14,-21,-21,-17,-8,6,-18,-2,-21; MA /M: SY='C'; M=-8,-20,67,-27,-25,-14,-22,-26,-16,-28,-5,-10,-19,-34,-25,-26,-10,-8,-5,-40,-22,-25; MA /M: SY='Y'; M=-18,-23,-26,-26,-24,42,-19,1,-4,-19,1,-2,-18,-29,-22,-15,-18,-11,-8,17,49,-24; MA /M: SY='L'; M=2,-21,8,-24,-18,-5,-22,-21,2,-24,13,2,-19,-26,-18,-21,-8,-3,5,-29,-12,-18; MA /M: SY='C'; M=-7,-7,30,-12,-17,-15,-22,-18,-10,-18,-11,-4,-7,-24,-15,-19,-2,2,-2,-37,-17,-16; MA /M: SY='S'; M=7,-5,-17,-8,-6,-20,-2,-17,-18,-11,-22,-16,-1,5,-8,-14,19,18,-11,-31,-20,-8; MA /M: SY='F'; M=-17,-29,-21,-35,-26,53,-30,-16,6,-28,21,6,-23,-30,-31,-19,-23,-10,2,3,26,-26; MA /M: SY='F'; M=-6,-19,-19,-25,-19,20,-24,-6,3,-22,13,4,-14,-25,-20,-17,-14,-5,2,-15,6,-19; MA /M: SY='F'; M=-16,-29,-21,-37,-27,47,-30,-19,12,-28,21,14,-22,-28,-29,-20,-22,-10,7,-3,17,-25; MA /M: SY='I'; M=-7,-28,-25,-34,-28,-2,-25,-28,33,-27,16,14,-21,-23,-22,-26,-17,-9,25,-22,-5,-28; MA /M: SY='G'; M=15,-11,-21,-16,-14,-14,21,-17,-23,-11,-18,-13,-5,-17,-14,-10,3,-8,-14,-18,-18,-14; MA /M: SY='E'; M=-10,3,-26,6,12,-26,-18,4,-24,12,-20,-11,1,-8,10,7,-1,-4,-20,-27,-10,10; MA /M: SY='P'; M=-10,-18,-39,-9,2,-31,-20,-17,-20,-8,-29,-18,-18,79,-3,-17,-9,-10,-30,-29,-28,-5; MA /M: SY='N'; M=-5,10,-11,5,-3,-11,-11,-7,-18,-9,-21,-16,15,-16,-4,-9,14,12,-15,-32,-12,-3; MA /M: SY='T'; M=-2,-1,-21,-2,-4,-16,-18,-15,-12,-7,-13,-11,-4,2,-7,-10,3,11,-8,-28,-14,-7; MA /M: SY='A'; M=8,-15,-17,-21,-18,-3,-7,-22,-4,-17,-8,-7,-11,-18,-18,-18,3,6,3,-13,-9,-17; MA /M: SY='T'; M=4,-14,-12,-19,-17,-8,-21,-23,8,-16,-4,-2,-11,-18,-16,-16,9,18,18,-30,-11,-17; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='Y'; M=-4,-17,-24,-22,-14,-2,-24,-11,6,-10,-2,2,-12,-20,-6,-5,-7,-4,3,-13,7,-12; MA /M: SY='L'; M=-2,-24,-19,-28,-19,7,-26,-20,15,-25,29,13,-22,-24,-18,-20,-18,-3,9,-19,-2,-19; MA /M: SY='R'; M=-14,-4,-29,-5,9,-29,-20,1,-25,23,-21,-6,0,-14,28,37,-4,-7,-23,-21,-10,16; MA /M: SY='R'; M=-12,-9,-25,-10,1,-21,-22,-3,-14,9,-10,-2,-6,-17,18,25,-5,-3,-12,-22,-9,8; MA /M: SY='W'; M=-10,-21,-26,-27,-21,2,-26,-23,8,-21,4,0,-18,-18,-18,-19,-15,-1,3,10,2,-20; MA /M: SY='L'; M=-1,-16,-19,-22,-18,-1,-9,-21,0,-21,9,1,-13,-20,-18,-18,-6,7,1,-21,-7,-18; MA /M: SY='F'; M=-14,-27,-22,-30,-22,27,-29,-11,10,-27,21,9,-22,-21,-24,-19,-22,-10,5,-12,10,-22; MA /M: SY='G'; M=2,-12,-14,-13,-19,-28,49,-21,-35,-20,-27,-19,-4,-15,-20,-21,-1,-17,-26,-24,-29,-20; MA /M: SY='I'; M=-8,-28,-20,-32,-24,7,-31,-24,27,-27,27,15,-24,-26,-22,-22,-19,-5,22,-21,-1,-24; MA /M: SY='G'; M=10,-13,-20,-15,-17,-18,23,-20,-16,-18,-12,-10,-8,-19,-17,-19,3,-7,-7,-24,-21,-17; MA /M: SY='F'; M=-17,-26,-20,-34,-26,62,-27,-15,0,-25,7,3,-18,-28,-31,-18,-15,-6,-1,6,28,-25; MA /M: SY='T'; M=10,-4,-6,-11,-9,-14,-13,-18,-11,-11,-14,-12,-1,-12,-9,-13,21,31,0,-32,-15,-9; MA /M: SY='I'; M=-7,-29,-12,-32,-26,2,-31,-24,28,-25,23,19,-26,-27,-22,-22,-19,-7,27,-25,-5,-25; MA /M: SY='C'; M=11,-13,21,-18,-16,-18,-6,-21,-15,-19,-15,-12,-9,-21,-16,-20,7,0,-4,-34,-21,-16; MA /M: SY='Y'; M=-14,-25,-24,-28,-23,30,-30,-3,9,-19,11,5,-22,-29,-20,-16,-20,-9,3,8,42,-23; MA /M: SY='S'; M=12,-4,-16,-5,-7,-23,20,-14,-25,-13,-28,-19,5,-13,-7,-14,25,6,-15,-32,-23,-7; MA /M: SY='A'; M=24,-8,-4,-17,-12,-15,-10,-20,-8,-12,-11,-10,-7,-13,-12,-17,13,16,3,-27,-17,-12; MA /M: SY='M'; M=-7,-27,-18,-30,-23,3,-27,-17,24,-20,27,31,-26,-26,-16,-17,-20,-7,23,-23,-3,-20; MA /M: SY='L'; M=-12,-30,-20,-33,-24,26,-31,-21,18,-29,35,15,-27,-29,-25,-21,-26,-9,11,-13,7,-24; MA /M: SY='T'; M=20,-7,-10,-15,-11,-14,-11,-20,-7,-11,-10,-9,-6,-12,-11,-15,15,23,4,-27,-15,-11; MA /M: SY='K'; M=-10,0,-30,0,10,-30,-20,-10,-30,50,-30,-10,0,-10,10,30,-10,-10,-20,-20,-10,10; MA /M: SY='T'; M=7,-5,-12,-15,-12,-11,-19,-21,-4,-12,-7,-7,-4,-11,-11,-14,14,36,3,-27,-11,-12; MA /M: SY='N'; M=-11,1,-27,-11,-15,-8,-16,-12,-1,-13,-13,-9,12,-21,-11,-13,-6,-2,-10,4,-4,-13; MA /M: SY='R'; M=-13,-10,-21,-14,-6,-16,-21,-9,-20,9,-16,-9,-5,-18,4,29,-5,3,-15,-7,-7,-3; MA /M: SY='V'; M=-5,-30,-18,-34,-29,1,-34,-29,37,-25,17,15,-26,-26,-25,-24,-16,-5,39,-25,-5,-29; MA /M: SY='H'; M=-1,-16,-20,-19,-15,-2,-21,8,-1,-12,-1,1,-12,-22,-11,-7,-8,-7,3,-18,6,-15; MA /M: SY='R'; M=-11,-10,-22,-14,-8,-10,-19,-8,-8,0,-1,1,-2,-20,-3,18,-6,0,-7,-25,-9,-8; MA /M: SY='I'; M=14,-22,-20,-31,-22,-8,-23,-26,24,-21,7,7,-18,-18,-18,-25,-7,-5,22,-21,-9,-22; MA /M: SY='F'; M=-18,-26,-21,-34,-21,63,-29,-18,-1,-26,12,0,-19,-27,-32,-18,-19,-10,-2,3,22,-23; MA /M: SY='K'; M=0,2,-21,0,8,-23,-13,-7,-22,9,-20,-12,1,-10,3,3,6,9,-15,-27,-13,5; MA /M: M=-1,-9,-21,-13,-11,-16,-3,-15,-6,-8,-7,-3,-4,-19,-6,-10,-3,-4,-3,-25,-14,-9; MA /M: SY='T'; M=2,-8,-20,-12,-10,-18,-1,-18,-12,-4,-16,-9,-3,-14,-9,-6,6,7,-5,-26,-15,-10; MA /M: SY='K'; M=-6,-5,-23,-7,1,-8,-18,-11,-16,9,-16,-9,-2,-15,-3,9,-2,-3,-10,-21,-7,-2; MA /M: SY='P'; M=-6,-7,-29,-6,5,-24,-10,-11,-20,6,-21,-8,-6,16,0,1,-5,-6,-20,-25,-18,0; MA /M: SY='G'; M=-2,2,-23,2,-3,-26,10,-9,-28,0,-25,-16,4,-14,-2,-1,8,1,-19,-27,-18,-3; MA /M: SY='R'; M=0,-13,-23,-16,-7,-17,-18,-12,-10,10,-12,-5,-7,-18,-2,24,-5,-5,-3,-22,-11,-7; MA /M: SY='K'; M=-10,-10,-18,-13,0,-14,-23,-11,-8,3,-8,2,-9,-11,3,1,-8,-5,-9,-23,-9,0; MA /M: SY='R'; M=-2,-14,-23,-18,-11,-11,-18,0,-10,-5,-7,0,-11,-15,-6,2,-7,-2,-7,-8,-3,-10; MA /M: SY='R'; M=-15,-17,-31,-17,-7,-13,-25,-12,-9,8,-11,-4,-10,-1,-3,23,-13,-10,-10,-20,-9,-9; MA /M: SY='K'; M=-11,-10,-32,-11,-4,-16,-16,-12,-21,17,-23,-12,-7,-13,-1,13,-11,-11,-19,9,0,-3; MA /M: SY='F'; M=-13,-16,-24,-17,-15,20,-24,-16,-4,-20,4,-2,-16,-12,-20,-17,-13,-4,-6,-5,7,-17; MA /M: SY='L'; M=-10,-25,-27,-27,-18,-2,-25,-18,10,-17,11,10,-20,-18,-12,-10,-18,-10,2,1,0,-16; MA /M: SY='S'; M=-5,-9,-20,-13,-7,-5,-15,-15,-1,-8,-9,-4,-3,-17,-10,-10,1,-2,1,-25,-9,-9; MA /M: SY='P'; M=-5,-16,-24,-14,-8,-10,-18,-13,-6,-15,-5,-4,-14,14,-9,-16,-2,0,-10,-24,-8,-11; MA /I: I=-5; MD=-28; MA /M: SY='R'; M=-2,-5,-19,-8,-6,-13,0,-9,-17,-2,-16,-11,-1,-13,-2,5,2,1,-13,-3,-8,-5; D=-5; MA /I: I=-5; MI=0; MD=-28; IM=0; DM=-28; MA /M: SY='A'; M=13,-4,-11,-7,-4,-15,-1,-11,-12,-3,-13,-9,-1,-9,-4,-7,12,6,-6,-21,-13,-4; D=-5; MA /I: I=-5; DM=-28; MA /M: SY='Q'; M=-7,-8,-25,-6,6,-23,-17,-3,-11,-2,-10,-3,-8,13,21,-3,-5,-7,-19,-19,-12,12; D=-5; MA /I: I=-5; DM=-28; MA /M: SY='F'; M=-11,-14,-22,-19,-18,10,-22,-12,2,-12,-1,-1,-8,-26,-16,-11,-13,-7,3,-2,7,-16; MA /M: SY='F'; M=1,-25,-19,-30,-23,15,-26,-18,15,-21,13,6,-22,-24,-22,-20,-14,-6,14,-10,10,-23; MA /M: SY='I'; M=-9,-27,-26,-35,-26,0,-35,-26,38,-27,21,19,-20,-21,-18,-25,-17,-4,24,-21,-1,-26; MA /M: SY='I'; M=-3,-21,-8,-27,-22,-3,-29,-25,17,-23,12,7,-18,-22,-19,-21,-8,7,17,-26,-7,-22; MA /M: SY='P'; M=-6,-19,-28,-19,-14,0,-2,-20,-16,-18,-14,-12,-14,10,-19,-19,-7,-8,-16,-9,-10,-17; MA /M: SY='I'; M=-6,-24,-24,-30,-23,4,-22,-22,21,-25,15,12,-17,-22,-18,-23,-13,-7,12,-19,-2,-22; MA /M: SY='C'; M=-10,-27,26,-31,-24,-1,-31,-24,6,-30,24,7,-26,-32,-23,-24,-22,-10,5,-30,-10,-24; MA /M: SY='V'; M=1,-18,-16,-24,-20,-5,-25,-24,16,-19,7,4,-15,-19,-18,-19,-1,12,20,-26,-8,-20; MA /M: SY='L'; M=1,-17,-19,-20,-17,2,2,-17,-8,-21,3,-2,-12,-22,-18,-18,-4,-6,-6,-19,-8,-16; MA /M: SY='I'; M=-6,-28,-20,-32,-26,0,-28,-23,30,-23,20,23,-24,-25,-20,-21,-18,-7,28,-23,-4,-25; MA /M: SY='Q'; M=-12,10,-30,14,20,-40,-18,8,-24,8,-22,-6,4,-10,48,6,0,-10,-30,-24,-12,34; MA /M: SY='I'; M=-8,-29,-6,-33,-27,1,-33,-27,27,-28,24,13,-25,-28,-23,-25,-20,-8,23,-25,-5,-27; MA /M: SY='L'; M=-2,-25,-3,-29,-23,1,-28,-25,16,-24,18,8,-23,-26,-22,-22,-14,-2,18,-25,-7,-23; MA /M: SY='L'; M=-7,-24,-21,-27,-23,1,-16,-23,15,-24,18,10,-20,-24,-20,-21,-15,-4,12,-21,-6,-22; MA /M: SY='C'; M=-3,-20,23,-23,-21,-8,-24,-24,1,-23,1,-3,-18,-28,-21,-21,-3,1,11,-35,-15,-21; MA /M: SY='A'; M=9,-18,-15,-23,-21,-11,3,-22,-1,-19,-4,-1,-14,-21,-19,-21,-4,-7,6,-23,-15,-20; MA /M: SY='I'; M=-1,-21,-21,-29,-23,-5,-29,-26,28,-22,7,8,-15,-18,-17,-23,-5,4,22,-25,-6,-23; MA /M: SY='W'; M=-18,-38,-47,-38,-29,8,-15,-29,-19,-21,-17,-18,-37,-29,-20,-20,-37,-28,-28,132,25,-20; MA /M: SY='L'; M=-5,-24,-21,-28,-18,11,-26,-16,12,-22,26,15,-22,-24,-13,-17,-20,-9,5,-15,2,-15; MA /M: SY='I'; M=0,-18,-20,-24,-21,0,-8,-20,7,-21,4,4,-12,-22,-19,-20,-8,-6,7,-21,-8,-20; MA /M: SY='V'; M=0,-17,-15,-20,-14,-7,-25,-21,11,-12,3,4,-17,-20,-15,-11,-4,7,21,-27,-10,-15; MA /M: SY='D'; M=-1,9,-21,15,13,-27,-9,-8,-23,-4,-25,-20,5,0,3,-9,15,3,-17,-35,-20,7; MA /M: SY='P'; M=-7,-21,-37,-15,-5,-17,-19,-21,-17,-12,-24,-17,-20,65,-13,-20,-11,-10,-25,-16,-21,-12; MA /M: SY='P'; M=-10,-21,-33,-16,-7,-12,-21,-18,-11,-14,-14,-7,-20,53,-13,-19,-12,-9,-19,-24,-18,-13; MA /M: SY='H'; M=-8,-6,-22,-8,-10,2,-1,8,-19,-15,-16,-8,1,-19,-10,-12,3,-5,-15,-20,1,-10; MA /M: SY='T'; M=-3,-16,-21,-20,-16,-5,-22,-17,3,-12,-6,-3,-12,-14,-12,-9,-1,7,6,-13,0,-16; MA /I: I=-5; MI=0; MD=-28; IM=0; DM=-28; MA /M: SY='D'; M=-12,9,-23,13,5,-18,-18,-7,-15,1,-13,-11,1,-13,-2,1,-2,1,-9,-25,-8,0; D=-5; MA /I: I=-5; DM=-28; MA /M: SY='Y'; M=-10,-18,-23,-21,-12,7,-25,-5,5,-11,7,7,-14,-20,-7,-6,-15,-9,0,-11,9,-11; D=-5; MA /I: I=-5; DM=-28; MA /M: SY='D'; M=-4,14,-25,19,2,-24,-13,-4,-19,-7,-19,-16,6,-2,-5,-13,1,-3,-16,-30,-12,-3; MA /M: SY='Y'; M=-2,-10,-25,-12,-6,-7,-9,9,-15,-3,-12,-6,-7,-18,-5,-6,-7,-9,-14,-11,11,-8; MA /M: SY='H'; M=-10,-7,-29,-4,7,-20,-22,12,-11,-7,-14,-6,-7,4,5,-9,-7,-11,-14,-24,-4,3; MA /M: SY='E'; M=0,-3,-23,-1,8,-20,-16,-13,-9,-9,-13,-11,-4,0,-2,-13,5,1,-9,-30,-16,2; MA /M: SY='E'; M=-12,2,-30,9,26,-26,-21,-1,-22,6,-16,-12,-3,0,13,5,-6,-10,-23,-27,-15,18; MA /M: SY='H'; M=-16,7,-29,11,5,-26,-18,21,-30,12,-24,-12,5,-9,5,15,-5,-7,-23,-28,-5,3; MA /M: SY='E'; M=-7,7,-26,6,13,-26,10,-5,-28,-2,-24,-17,12,-12,3,-2,3,-5,-26,-28,-21,8; MA /M: SY='H'; M=-10,-11,-23,-11,-13,-12,-25,10,3,-11,-3,5,-9,-21,-10,-10,-10,-7,8,-28,-1,-13; MA /I: I=-5; MD=-25; MA /M: SY='I'; M=-8,-15,-21,-19,-16,-7,-27,-18,23,-17,8,9,-12,-18,-10,-17,-12,-6,16,-21,-5,-15; D=-5; MA /I: I=-5; MI=0; MD=-25; IM=0; DM=-25; MA /M: SY='I'; M=-9,-27,-22,-31,-25,5,-32,-23,26,-20,15,11,-22,-25,-20,-15,-16,-5,24,-19,2,-25; MA /M: SY='L'; M=-10,-21,-25,-23,-7,-2,-30,-18,20,-20,23,16,-20,-20,-10,-18,-20,-10,9,-22,-4,-11; MA /M: SY='M'; M=-8,-11,-23,-14,-5,-11,-23,-4,-5,4,-3,5,-9,-17,-4,2,-10,-4,-2,-22,-4,-5; MA /M: SY='C'; M=-10,-20,120,-30,-30,-20,-30,-30,-30,-30,-20,-20,-20,-40,-30,-30,-10,-10,-10,-50,-30,-30; MA /M: SY='N'; M=-9,31,-21,22,3,-23,-5,7,-23,0,-28,-19,39,-16,0,-2,10,3,-25,-38,-17,1; MA /M: SY='M'; M=-4,-8,-21,-12,-5,-15,-19,-9,-4,3,-10,4,-6,-14,-2,-2,1,3,-1,-27,-9,-4; MA /M: SY='G'; M=-1,-5,-25,-4,-3,-27,23,-13,-30,-3,-28,-17,1,-9,-4,-5,8,-5,-23,-26,-22,-4; MA /M: SY='D'; M=-6,15,-22,19,7,-27,-10,4,-28,4,-28,-18,13,-12,2,-1,12,3,-20,-35,-13,4; MA /M: SY='V'; M=3,-17,-16,-21,-15,-7,-19,-12,7,-13,7,10,-15,-20,-12,-13,-5,-1,11,-26,-8,-14; MA /M: SY='S'; M=-3,-4,-20,-7,-5,-17,-2,-6,-10,-11,-13,-4,2,-16,-4,-11,6,0,-7,-30,-14,-5; MA /M: SY='F'; M=9,-20,-15,-27,-20,12,-16,-20,4,-19,5,3,-17,-21,-21,-19,-6,0,9,-15,-3,-20; MA /M: SY='F'; M=-13,-31,-23,-36,-27,35,-30,-23,13,-28,19,7,-26,-29,-29,-21,-23,-11,10,7,13,-26; MA /M: SY='Y'; M=-9,-17,-16,-19,-16,-3,-15,5,-4,-16,-2,0,-13,-24,-8,-14,-12,-11,-5,-13,11,-14; MA /M: SY='C'; M=4,-12,12,-15,-13,-15,-6,-19,-19,-17,-19,-15,-7,-9,-14,-19,9,2,-10,-33,-21,-14; MA /M: SY='I'; M=-7,-20,-18,-24,-20,3,-27,-20,18,-21,17,9,-16,-26,-18,-17,-14,-3,18,-24,-5,-20; MA /I: I=-6; MD=-32; MA /M: SY='L'; M=-6,-16,-11,-18,-12,12,-17,-11,10,-16,21,9,-15,-16,-13,-11,-15,-5,5,-8,3,-12; D=-6; MA /I: I=-6; MI=0; MD=-32; IM=0; DM=-32; MA /M: SY='G'; M=8,-13,-23,-17,-19,-9,32,-20,-24,-19,-16,-13,-6,-19,-20,-20,-1,-11,-16,-16,-17,-19; MA /M: SY='Y'; M=-18,-22,-29,-22,-22,26,-29,12,2,-12,0,0,-22,-30,-13,-12,-20,-10,-5,31,68,-21; MA /M: SY='B'; M=-8,1,-21,1,-4,-14,-18,-11,-8,-2,-2,-5,0,-19,-7,-5,-5,-2,-6,-29,-11,-6; MA /M: SY='G'; M=0,-18,-5,-22,-22,-11,13,-19,-11,-20,-8,1,-13,-24,-19,-20,-8,-12,-5,-23,-17,-21; MA /M: SY='L'; M=-7,-26,-11,-31,-23,15,-28,-23,16,-26,19,8,-22,-27,-24,-21,-15,-6,14,-19,0,-23; MA /M: SY='L'; M=-10,-30,-20,-30,-20,9,-30,-20,21,-29,48,21,-29,-29,-19,-20,-29,-10,11,-20,0,-20; MA /M: SY='I'; M=11,-21,-19,-30,-20,-6,-21,-19,20,-19,12,18,-18,-18,-13,-21,-10,-6,15,-20,-7,-18; MA /M: SY='L'; M=-4,-25,-18,-30,-23,13,-28,-22,18,-24,20,12,-22,-24,-22,-20,-15,-2,16,-18,0,-22; MA /M: SY='L'; M=-3,-20,-20,-23,-19,-5,-6,-19,5,-22,14,9,-16,-22,-16,-18,-11,-3,4,-23,-9,-18; MA /M: SY='C'; M=3,-11,41,-16,-18,-22,1,-21,-27,-20,-24,-19,-6,-25,-18,-21,8,-2,-13,-38,-26,-18; MA /M: SY='F'; M=-10,-16,-4,-24,-20,15,-27,-16,0,-20,3,-2,-13,-23,-20,-17,-5,13,1,-14,10,-20; MA /M: SY='F'; M=-10,-21,-17,-28,-23,29,-28,-20,6,-22,9,2,-17,-24,-25,-17,-9,8,9,-11,11,-23; MA /M: SY='Y'; M=-14,-25,-25,-28,-21,19,-30,-3,14,-20,23,17,-23,-28,-14,-16,-23,-10,4,0,31,-20; MA /M: SY='A'; M=46,-9,-10,-18,-9,-20,0,-19,-11,-10,-12,-11,-8,-10,-9,-19,13,2,-1,-22,-20,-9; MA /M: SY='F'; M=-17,-29,-25,-37,-28,45,-30,-17,8,-25,8,7,-23,-28,-28,-20,-22,-12,4,17,26,-26; MA /M: SY='K'; M=-10,-8,-26,-7,8,-13,-23,-10,-11,11,0,0,-9,-16,0,4,-14,-10,-11,-22,-8,4; MA /M: SY='T'; M=9,2,-11,-7,-7,-15,-11,-15,-13,-9,-16,-13,6,-11,-7,-11,22,31,-5,-32,-15,-7; MA /M: SY='R'; M=-18,-8,-30,-8,2,-22,-20,-2,-30,33,-22,-10,0,-18,10,64,-10,-10,-20,-20,-10,2; MA /M: SY='N'; M=-8,18,-23,9,0,-23,1,10,-25,6,-29,-16,31,-17,1,3,6,-4,-26,-33,-15,0; MA /M: SY='V'; M=-6,-29,-7,-31,-26,2,-31,-26,24,-26,25,13,-28,-29,-25,-22,-19,-6,27,-26,-6,-26; MA /M: SY='P'; M=-8,-10,-34,-5,1,-28,-16,-15,-21,-4,-30,-19,-7,60,-6,-13,-3,-6,-27,-31,-26,-6; MA /I: I=-6; MI=0; MD=-33; IM=0; DM=-33; MA /M: SY='E'; M=-10,13,-29,24,32,-30,-17,-4,-29,5,-22,-20,2,3,8,0,-2,-9,-25,-31,-20,19; MA /M: SY='N'; M=1,16,-16,3,-4,-18,-9,-6,-16,0,-21,-14,25,-14,-4,-3,11,16,-15,-32,-16,-4; MA /M: SY='F'; M=-18,-29,-22,-37,-28,61,-31,-17,7,-28,14,4,-21,-29,-33,-20,-21,-10,3,6,29,-28; MA /M: SY='N'; M=-10,36,-21,18,1,-21,-2,8,-21,4,-30,-19,55,-19,1,3,8,-1,-29,-38,-19,1; MA /M: SY='E'; M=-12,18,-30,30,52,-32,-18,0,-32,8,-22,-22,4,-2,16,-2,0,-10,-30,-32,-20,34; MA /M: SY='A'; M=27,-5,-12,-12,-7,-19,-5,-6,-14,-10,-16,-11,-2,-11,-6,-14,16,10,-5,-27,-15,-7; MA /M: SY='K'; M=-11,-1,-30,-1,9,-29,-20,-9,-30,48,-29,-10,0,-11,10,34,-10,-10,-20,-20,-10,9; MA /M: SY='F'; M=-9,-25,-25,-30,-22,38,-23,-14,-6,-21,-2,-6,-20,-14,-25,-18,-15,-10,-8,16,23,-22; MA /M: SY='I'; M=-9,-30,-25,-36,-27,3,-35,-26,38,-28,28,20,-24,-24,-20,-25,-22,-9,25,-21,-1,-26; MA /M: SY='G'; M=6,-9,-21,-12,-15,-21,30,-19,-24,-17,-17,-13,-3,-17,-15,-17,6,0,-16,-24,-21,-15; MA /M: SY='F'; M=-16,-25,-20,-34,-26,55,-28,-14,2,-24,10,6,-18,-27,-29,-17,-16,-4,1,3,26,-25; MA /M: SY='S'; M=7,-2,-12,-6,-4,-17,-9,-13,-17,-6,-21,-15,4,-11,-3,-3,25,25,-7,-33,-16,-4; MA /M: SY='M'; M=-8,-18,-20,-28,-20,-2,-23,-8,19,-13,14,40,-16,-18,-5,-13,-12,2,11,-22,-2,-13; MA /I: I=-3; MI=0; MD=-18; IM=0; DM=-18; MA /M: SY='Y'; M=-13,-25,-22,-26,-22,21,-30,-6,11,-19,18,8,-24,-29,-18,-15,-21,-8,8,0,31,-21; MA /M: SY='V'; M=3,-18,-5,-23,-20,-5,-24,-24,10,-17,4,3,-17,-21,-19,-18,-1,14,20,-28,-10,-20; MA /M: SY='F'; M=-10,-18,-3,-27,-22,30,-26,-22,-4,-22,0,-5,-13,-23,-27,-17,-3,13,4,-14,6,-22; MA /M: SY='C'; M=-12,-24,69,-32,-29,6,-30,-27,-15,-29,-5,-10,-22,-36,-31,-26,-14,-9,-3,-33,-13,-29; MA /M: SY='V'; M=-5,-21,7,-28,-23,-1,-27,-24,11,-24,-2,0,-14,-24,-20,-23,-3,-2,12,-29,-9,-23; MA /M: SY='V'; M=-3,-27,-16,-30,-27,0,-31,-28,29,-22,15,11,-25,-26,-25,-21,-11,2,36,-27,-7,-27; MA /M: SY='W'; M=-19,-39,-48,-40,-30,9,-22,-30,-13,-21,-16,-16,-38,-29,-20,-21,-38,-28,-24,132,27,-21; MA /M: SY='I'; M=-6,-27,-19,-31,-24,2,-32,-25,28,-25,25,14,-24,-25,-22,-22,-17,-1,25,-24,-4,-24; MA /M: SY='A'; M=30,-6,-10,-15,-9,-17,-6,-19,-11,-10,-12,-11,-5,-10,-9,-16,16,18,-1,-25,-17,-9; MA /M: SY='F'; M=-19,-30,-27,-36,-26,55,-27,-22,-4,-26,3,-4,-23,-17,-33,-20,-22,-13,-7,26,22,-26; MA /M: SY='L'; M=-5,-29,-20,-32,-24,4,-31,-25,29,-27,31,17,-26,-26,-22,-23,-21,-7,23,-22,-3,-24; MA /M: SY='P'; M=-8,-17,-31,-13,-6,-18,-21,-21,-14,-12,-21,-15,-16,55,-13,-18,-4,3,-17,-28,-21,-12; MA /M: SY='I'; M=-4,-28,-19,-33,-26,5,-30,-24,30,-24,20,18,-24,-25,-22,-22,-17,-6,28,-22,-3,-25; MA /M: SY='Y'; M=-19,-22,-27,-26,-22,36,-30,10,3,-17,3,2,-18,-28,-17,-14,-19,-11,-4,16,55,-22; MA /M: SY='H'; M=-14,-18,-23,-22,-16,19,-25,23,-5,-20,7,7,-12,-25,-13,-12,-17,-13,-8,-11,20,-15; MA /M: SY='G'; M=4,-9,-17,-9,-13,-20,20,-17,-18,-16,-23,-15,-1,-17,-13,-16,16,2,-7,-31,-22,-13; MA /M: SY='T'; M=1,-3,-11,-10,-9,-10,-18,-18,-9,-12,-7,-9,-1,-12,-9,-11,18,39,-1,-31,-11,-9; MA /M: SY='K'; M=-3,4,-23,3,2,-24,-5,-7,-24,9,-23,-13,5,-13,5,4,4,0,-18,-24,-11,3; MA /M: SY='G'; M=-6,-3,-27,-4,-4,-25,17,11,-28,-9,-24,-11,5,-17,7,-7,3,-11,-26,-22,-9,0; MA /M: SY='K'; M=-10,12,-27,18,15,-30,-14,-6,-31,23,-28,-17,6,-10,7,14,2,-5,-22,-29,-15,11; MA /M: SY='F'; M=7,-19,-17,-25,-20,13,-20,-13,5,-17,0,-1,-15,-22,-20,-18,-6,-4,10,-11,8,-20; MA /M: SY='M'; M=-5,-11,-24,-13,-1,-14,-20,-7,-4,0,-1,10,-10,-16,3,7,-10,-8,-4,-23,-9,0; MA /M: SY='V'; M=-6,-14,-17,-15,-17,-1,-25,-21,8,-10,-2,0,-15,-22,-20,-12,-6,0,20,-25,-7,-18; MA /M: SY='A'; M=17,-10,-16,-17,-7,-17,-14,-14,-4,-9,-3,1,-10,-13,2,-12,3,5,-1,-22,-12,-3; MA /M: SY='V'; M=-6,-19,-7,-24,-22,8,-25,-16,7,-17,2,5,-18,-24,-20,-16,-4,7,15,-19,5,-21; MA /M: SY='E'; M=-2,-8,-23,-11,7,-5,-22,-15,-2,-10,-5,-6,-8,-11,-5,-14,-1,1,-4,-22,-8,0; MA /M: SY='V'; M=3,-14,-15,-17,-12,-12,-19,-17,6,-13,-6,1,-10,-17,-6,-13,7,9,12,-29,-12,-10; MA /M: SY='F'; M=-11,-25,-18,-30,-24,26,-28,-18,13,-23,17,11,-21,-26,-24,-18,-16,-1,12,-11,11,-23; MA /M: SY='C'; M=9,-5,18,-9,-3,-20,-11,-16,-20,-12,-21,-17,-3,-16,-8,-15,17,10,-9,-37,-21,-6; MA /I: I=-5; MD=-26; MA /M: SY='I'; M=-7,-21,-21,-29,-22,-1,-30,-24,30,-23,15,12,-16,-17,-16,-22,-12,1,20,-19,-2,-22; D=-5; MA /I: I=-5; MI=0; MD=-26; IM=0; DM=-26; MA /M: SY='L'; M=-1,-15,-13,-16,-13,-3,-17,-16,7,-17,10,3,-12,-20,-13,-14,-1,2,10,-26,-8,-13; D=-5; MA /I: I=-5; DM=-26; MA /M: SY='L'; M=14,-20,-16,-26,-17,4,-17,-19,4,-20,17,7,-19,-21,-17,-19,-10,-4,4,-17,-5,-16; MA /M: SY='S'; M=10,-3,4,-4,-4,-20,-3,-13,-20,-12,-28,-19,6,-13,-4,-13,33,16,-9,-40,-21,-4; MA /M: SY='A'; M=25,-7,-12,-11,-8,-20,4,-16,-15,-11,-20,-14,-1,-12,-8,-15,21,7,-4,-30,-20,-8; MA /M: SY='S'; M=8,-6,-15,-13,-10,-2,-12,-11,-9,-12,-13,-10,-1,-15,-10,-13,13,13,-5,-20,-1,-10; MA /M: SY='V'; M=0,-22,-19,-25,-25,-9,-6,-26,13,-21,-1,2,-17,-23,-23,-21,-6,-5,21,-26,-14,-25; MA /M: SY='L'; M=10,-18,-15,-23,-16,-5,-18,-18,7,-18,12,8,-16,-19,-13,-17,-4,3,9,-24,-9,-15; MA /M: SY='L'; M=-10,-30,-20,-30,-20,10,-30,-20,20,-30,50,20,-30,-30,-20,-20,-30,-10,10,-20,0,-20; MA /M: SY='G'; M=8,-14,-22,-17,-20,-17,31,-21,-18,-18,-16,-9,-8,-20,-19,-19,0,-10,-8,-21,-21,-19; MA /M: SY='C'; M=-10,-24,49,-31,-27,0,-29,-24,-4,-26,3,2,-23,-33,-26,-24,-15,-9,4,-32,-12,-26; MA /M: SY='L'; M=-10,-27,-23,-32,-23,5,-32,-21,29,-27,34,21,-23,-26,-18,-22,-24,-9,17,-21,-1,-22; MA /M: SY='F'; M=-20,-28,-22,-37,-28,71,-30,-13,0,-27,8,0,-20,-30,-35,-18,-20,-10,-2,13,39,-28; MA /M: SY='A'; M=11,-17,-14,-22,-19,-1,-7,-20,0,-16,-4,0,-14,-20,-18,-18,1,1,10,-22,-10,-18; MA /M: SY='P'; M=-10,-17,-39,-8,0,-29,-19,-18,-20,-9,-30,-20,-15,84,-9,-19,-9,-9,-30,-31,-29,-9; MA /M: SY='K'; M=-10,0,-30,0,10,-30,-20,-10,-30,50,-30,-10,0,-10,10,30,-10,-10,-20,-20,-10,10; MA /M: SY='V'; M=-7,-25,11,-31,-26,-3,-30,-23,15,-23,13,15,-23,-28,-21,-22,-15,-5,18,-29,-9,-24; MA /M: SY='Y'; M=-20,-19,-31,-19,-17,21,-27,20,-7,-7,-5,-2,-16,-28,-8,-1,-19,-12,-14,25,60,-16; MA /M: SY='I'; M=-10,-27,-27,-34,-26,-2,-36,-26,36,-22,15,15,-18,-21,-17,-17,-17,-7,24,-21,-2,-26; MA /M: SY='I'; M=-10,-30,-26,-37,-27,3,-37,-27,41,-30,28,20,-23,-23,-20,-27,-22,-10,25,-20,0,-27; MA /M: SY='L'; M=-10,-30,-22,-32,-23,7,-32,-23,27,-30,42,20,-28,-28,-20,-22,-27,-10,16,-20,0,-23; MA /M: SY='F'; M=-10,-23,-18,-32,-23,39,-26,-17,5,-23,13,10,-18,-24,-25,-18,-14,-2,4,-6,12,-22; MA /M: SY='R'; M=-17,-3,-30,-3,7,-24,-20,26,-29,20,-22,-7,3,-16,15,34,-8,-13,-24,-23,-3,8; MA /M: SY='P'; M=-9,-17,-38,-9,-2,-30,-9,-19,-23,-7,-30,-19,-16,70,-10,-17,-9,-11,-29,-28,-29,-10; MA /M: SY='E'; M=-11,13,-29,22,43,-31,-17,7,-30,6,-22,-18,4,-4,19,-1,2,-9,-29,-31,-16,31; MA /M: SY='R'; M=-14,-10,-31,-10,1,-20,-20,-8,-25,25,-21,-9,-6,-17,9,31,-12,-11,-20,1,-5,4; MA /M: SY='N'; M=-10,28,-22,12,1,-21,-7,6,-15,1,-25,-14,44,-18,6,0,6,-2,-25,-35,-17,3; MA /M: SY='V'; M=1,-19,-13,-21,-19,-3,-21,-22,13,-18,-1,1,-14,-21,-18,-17,5,7,22,-30,-10,-19; MA /M: SY='R'; M=-11,-14,-20,-14,-2,-11,-24,-11,-6,-2,-4,-3,-11,-14,0,8,-9,-6,-5,-24,-9,-3; MA /M: SY='K'; M=-1,-2,-22,-4,6,-24,-16,-11,-19,13,-21,-10,-1,-5,8,4,6,6,-14,-26,-13,6; MA /M: SY='Q'; M=-8,-3,-25,-3,5,-20,-17,8,-18,6,-12,-4,0,-16,13,11,-4,-8,-18,-23,-5,7; MA /M: SY='R'; M=-13,-3,-26,-4,-3,-6,-23,-5,-8,0,-10,-5,-3,-18,-4,1,-8,-6,-7,-19,1,-5; MA /M: SY='R'; M=-8,-7,-23,-10,-3,-17,-14,-9,-19,8,-12,-5,-3,-16,2,23,-1,6,-12,-23,-11,-2; MA /M: SY='E'; M=-7,2,-13,0,5,-20,-12,1,-19,-4,-17,-7,4,-9,0,-7,4,2,-16,-32,-14,2; MA /M: SY='T'; M=-3,-7,-19,-9,-4,-12,-17,-14,-12,5,-12,-7,-3,-14,-4,0,6,8,-7,-26,-9,-4; MA /M: SY='T'; M=-7,2,-20,-1,-1,-19,-16,-5,-18,9,-17,-7,3,-13,0,8,5,10,-12,-28,-10,-1; MA /I: E1=0; NR /RELEASE=40.7,103373; NR /TOTAL=28(28); /POSITIVE=28(28); /UNKNOWN=0(0); /FALSE_POS=0(0); NR /FALSE_NEG=0; /PARTIAL=0; CC /MATRIX_TYPE=protein_domain; CC /SCALING_DB=reversed; CC /AUTHOR=K_Hofmann; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; DR P35384, CASR_BOVIN, T; P41180, CASR_HUMAN, T; Q9QY96, CASR_MOUSE, T; DR P48442, CASR_RAT , T; Q9UBS5, GBR1_HUMAN, T; Q9WV18, GBR1_MOUSE, T; DR Q9Z0U4, GBR1_RAT , T; O75899, GBR2_HUMAN, T; O88871, GBR2_RAT , T; DR Q09630, MGR1_CAEEL, T; Q13255, MGR1_HUMAN, T; P23385, MGR1_RAT , T; DR Q14416, MGR2_HUMAN, T; P31421, MGR2_RAT , T; Q14832, MGR3_HUMAN, T; DR P31422, MGR3_RAT , T; Q14833, MGR4_HUMAN, T; P31423, MGR4_RAT , T; DR P41594, MGR5_HUMAN, T; P31424, MGR5_RAT , T; O15303, MGR6_HUMAN, T; DR P35349, MGR6_RAT , T; Q14831, MGR7_HUMAN, T; P35400, MGR7_RAT , T; DR O00222, MGR8_HUMAN, T; P47743, MGR8_MOUSE, T; P70579, MGR8_RAT , T; DR P91685, MGR_DROME , T; DO PDOC00754; // ID OPSIN; PATTERN. AC PS00238; DT APR-1990 (CREATED); DEC-2001 (DATA UPDATE); DEC-2001 (INFO UPDATE). DE Visual pigments (opsins) retinal binding site. PA [LIVMFWAC]-[PSGAC]-x(3)-[SAC]-K-[STALIMR]-[GSACPNV]-[STACP]-x(2)-[DENF]- PA [AP]-x(2)-[IY]. NR /RELEASE=40.7,103373; NR /TOTAL=184(184); /POSITIVE=182(182); /UNKNOWN=0(0); /FALSE_POS=2(2); NR /FALSE_NEG=1; /PARTIAL=4; CC /TAXO-RANGE=??E??; /MAX-REPEAT=1; CC /SITE=5,retinal; DR Q9H1Y3, OPN3_HUMAN, T; Q9WUK7, OPN3_MOUSE, T; Q9UHM6, OPN4_HUMAN, T; DR Q9QXZ9, OPN4_MOUSE, T; P22269, OPS1_CALVI, T; P06002, OPS1_DROME, T; DR P28678, OPS1_DROPS, T; Q25157, OPS1_HEMSA, T; P35360, OPS1_LIMPO, T; DR O15973, OPS1_PATYE, T; Q94741, OPS1_SCHGR, T; P08099, OPS2_DROME, T; DR P28679, OPS2_DROPS, T; Q25158, OPS2_HEMSA, T; P35361, OPS2_LIMPO, T; DR O15974, OPS2_PATYE, T; Q26495, OPS2_SCHGR, T; P04950, OPS3_DROME, T; DR P28680, OPS3_DROPS, T; P08255, OPS4_DROME, T; P29404, OPS4_DROPS, T; DR P17646, OPS4_DROVI, T; P91657, OPS5_DROME, T; O01668, OPS6_DROME, T; DR P51471, OPSB_ANOCA, T; P90680, OPSB_APIME, T; P51472, OPSB_ASTFA, T; DR P51490, OPSB_BOVIN, T; P32310, OPSB_CARAU, T; P28682, OPSB_CHICK, T; DR O13227, OPSB_CONCO, T; P35357, OPSB_GECGE, T; P03999, OPSB_HUMAN, T; DR P51491, OPSB_MOUSE, T; P87365, OPSB_ORYLA, T; Q63652, OPSB_RAT , T; DR O13092, OPSB_SAIBB, T; O42294, OPSD_ABYKO, T; P52202, OPSD_ALLMI, T; DR Q90245, OPSD_AMBTI, T; Q90214, OPSD_ANGAN, T; P41591, OPSD_ANOCA, T; DR Q17053, OPSD_APIME, T; P41590, OPSD_ASTFA, T; Q9YGZ1, OPSD_ATHBO, T; DR O42300, OPSD_BATMU, T; O42301, OPSD_BATNI, T; P02699, OPSD_BOVIN, T; DR P56514, OPSD_BUFBU, T; P56515, OPSD_BUFMA, T; Q17292, OPSD_CAMAB, T; DR O18312, OPSD_CAMHU, T; O16017, OPSD_CAMLU, T; O18315, OPSD_CAMMA, T; DR O16018, OPSD_CAMSC, T; P32308, OPSD_CANFA, T; P32309, OPSD_CARAU, T; DR Q17296, OPSD_CATBO, T; Q9YGZ8, OPSD_CHELB, T; P22328, OPSD_CHICK, T; DR O42327, OPSD_COMDY, T; Q90305, OPSD_CORAU, T; O42307, OPSD_COTBO, T; DR O42328, OPSD_COTGR, T; O42330, OPSD_COTIN, T; Q90373, OPSD_COTKE, T; DR P28681, OPSD_CRIGR, T; P51488, OPSD_CYPCA, T; O62791, OPSD_DELDE, T; DR Q9YGZ4, OPSD_DICLA, T; Q9YH05, OPSD_DIPAN, T; Q9YH04, OPSD_DIPVU, T; DR O93441, OPSD_GALML, T; P79756, OPSD_GAMAF, T; O62792, OPSD_GLOME, T; DR Q9YGZ2, OPSD_GOBNI, T; P08100, OPSD_HUMAN, T; O42268, OPSD_ICTPU, T; DR P22671, OPSD_LAMJA, T; O42427, OPSD_LIMBE, T; O42431, OPSD_LIMPA, T; DR Q9YH00, OPSD_LITMO, T; Q9YGZ6, OPSD_LIZAU, T; Q9YGZ7, OPSD_LIZSA, T; DR P24603, OPSD_LOLFO, T; Q17094, OPSD_LOLSU, T; Q28886, OPSD_MACFA, T; DR O62793, OPSD_MESBI, T; P15409, OPSD_MOUSE, T; Q9YGZ9, OPSD_MUGCE, T; DR Q9YH01, OPSD_MULSU, T; P79798, OPSD_MYRBE, T; P79807, OPSD_MYRVI, T; DR P79808, OPSD_NEOAR, T; P79809, OPSD_NEOAU, T; P79812, OPSD_NEOSA, T; DR P09241, OPSD_OCTDO, T; O18481, OPSD_ORCAU, T; O16019, OPSD_ORCVI, T; DR P87369, OPSD_ORYLA, T; O42452, OPSD_PARKN, T; Q98980, OPSD_PETMA, T; DR O62795, OPSD_PHOGR, T; O62794, OPSD_PHOVI, T; O18766, OPSD_PIG , T; DR P79848, OPSD_POERE, T; P35403, OPSD_POMMI, T; P35356, OPSD_PROCL, T; DR O42451, OPSD_PROJE, T; O16020, OPSD_PROML, T; O18485, OPSD_PROOR, T; DR O18486, OPSD_PROSE, T; P49912, OPSD_RABIT, T; P79863, OPSD_RAJER, T; DR P51470, OPSD_RANCA, T; P31355, OPSD_RANPI, T; P56516, OPSD_RANTE, T; DR P51489, OPSD_RAT , T; Q9YGZ3, OPSD_SALPV, T; P79898, OPSD_SARDI, T; DR P79901, OPSD_SARMI, T; Q9YGZ0, OPSD_SARPI, T; P79902, OPSD_SARPU, T; DR Q9YH03, OPSD_SARSL, T; P79903, OPSD_SARSP, T; P79911, OPSD_SARTI, T; DR P79914, OPSD_SARXA, T; O93459, OPSD_SCYCA, T; O16005, OPSD_SEPOF, T; DR P02700, OPSD_SHEEP, T; Q9YGZ5, OPSD_SOLSO, T; Q9YH02, OPSD_SPAAU, T; DR P35362, OPSD_SPHSP, T; O42466, OPSD_TAUBU, T; Q9DGG4, OPSD_TETNG, T; DR P31356, OPSD_TODPA, T; O62796, OPSD_TRIMA, T; O62798, OPSD_TURTR, T; DR P29403, OPSD_XENLA, T; O42604, OPSD_ZEUFA, T; Q9YGY9, OPSD_ZOSOP, T; DR Q90215, OPSF_ANGAN, T; P22330, OPSG_ASTFA, T; P32311, OPSG_CARAU, T; DR Q9R024, OPSG_CAVPO, T; P28683, OPSG_CHICK, T; P35358, OPSG_GECGE, T; DR P04001, OPSG_HUMAN, T; O35599, OPSG_MOUSE, T; P87366, OPSG_ORYLA, T; DR O18910, OPSG_RABIT, T; O35476, OPSG_RAT , T; O35478, OPSG_SCICA, T; DR P22331, OPSH_ASTFA, T; P32312, OPSH_CARAU, T; P51474, OPSI_ASTFA, T; DR P34989, OPSL_CALJA, T; O13018, OPSO_SALSA, T; P51475, OPSP_CHICK, T; DR P51476, OPSP_COLLI, T; O42266, OPSP_ICTPU, T; O42490, OPSP_PETMA, T; DR P41592, OPSR_ANOCA, T; P22332, OPSR_ASTFA, T; Q95170, OPSR_CAPHI, T; DR P32313, OPSR_CARAU, T; P22329, OPSR_CHICK, T; O18913, OPSR_FELCA, T; DR P04000, OPSR_HUMAN, T; P87367, OPSR_ORYLA, T; O12948, OPSR_XENLA, T; DR P35359, OPSU_BRARE, T; Q90309, OPSU_CARAU, T; O61303, OPSV_APIME, T; DR P28684, OPSV_CHICK, T; P87368, OPSV_ORYLA, T; P51473, OPSV_XENLA, T; DR O14718, OPSX_HUMAN, T; O35214, OPSX_MOUSE, T; P23820, REIS_TODPA, T; DR P47803, RGR_BOVIN , T; P47804, RGR_HUMAN , T; DR P17645, OPS3_DROVI, P; O18911, OPSG_ODOVI, P; O18914, OPSR_CANFA, P; DR O18912, OPSR_HORSE, P; DR Q9Z2B3, RGR_MOUSE , N; DR Q9CL24, OADB_PASMU, F; Q99NF8, RP17_MOUSE, F; 3D 1BOJ; 1BOK; 1F88; DO PDOC00211; // bio-1.4.3.0001/test/data/fastq/0000755000004100000410000000000012200110570015671 5ustar www-datawww-databio-1.4.3.0001/test/data/fastq/error_double_qual.fastq0000644000004100000410000000123512200110570022437 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/illumina_full_range_as_sanger.fastq0000644000004100000410000000065212200110570024766 0ustar www-datawww-data@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + _^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! bio-1.4.3.0001/test/data/fastq/longreads_as_solexa.fastq0000644000004100000410000002030412200110570022744 0ustar www-datawww-data@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn + eeeccccccc`UUU^UWWeegffhhhhhhhhhhhhhhhhhhggghhhhhhhhhfgfeeeee\\\\ceeeeeeeeeeeeeec^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\bbeccccccccYUUUbceeee\[`a`\ZYRRRPPP[\\\XXZaWWXeeeeeeccacaccc\WWSSQRPMMKKKLKKKKKKKKPPRRMMLLLPVPPPKKKKKQQTTTPRPPQPMLLMKRRRPPKMKKRLLKKMKKLLKRTPPPQRMMLL;KKKKLLKLLLLXKKKKW\KKLKKKLKKKKLLLQUYXYTLMMPKKKKPPNNKKKK;KKPXPVLLKKKKLRMKLLKKPVKKKKLLLJPPPPRMOROOOOKKKOSSSOOORUZXUUUQMNNZV][Z;;; @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn + eeeeeeeeecccceeeefecccca`````\[SSSS__a\TTTYaaaaa__^WYW[^[WXWXW[WSSSQZ\\RKKKTPSKKKPPKKKMKKQPVVVTTTTPRKMMLLPPPTVTWMNNRSSWW][[ZZZZXXSSN;NSKKKTVWTT;; @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn + hhhhbbbbh^^UUUhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUh`hhhhh^^^hhhhbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUURRRdhbdYYRRW\NLLLLKW\]]^^YQLNNNNV]bddhdhggghhhhhhhhhdZZXXPPPXXa^^^habghhhhhhggghhggghhhhhhhhhhhhhhhhhhaabbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhc^\\\chhhggghhhhhhhhhggghhhhhhhhhhggghggghhhhhhhhhhhhhhhhhhhhhh^]ZXXWW\\TLLLLM__`dfhhhhhhhhhgg^^^^dhhheeXXXZdhhaa;; @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn + hhhh^^^^^hhhhhhhhhhhhhhggghhhhhhhhhhhhhggghhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhggghhhhhhhhhhh````hh]]]]hhhhhhhhhhhhhhhhhhhhhhhhhhddhddZRRRRRcVhhhhhhhhhhhhhhhhhhhhhbb__gghhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhaaaahgbcbghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhggbbchhhhhhggghhbbbg\bbhhhhhhhhhfffhhhhhhgggggghhhhhhhhhhhhhhhggghhggd^^]]^dedd^NNNNNZYWOLL;; @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn + eee`__eeeeeeeeeeggaYYY_aeeeeffghghhhhhhhhhhhhhhhhhhhhhhheeeeeeeee^\a`_PPPWWOPP[[WWZ^``accb^^^cc````c`UUUc^ccc\\\\\``]^]][[[\[PPPWW[[^^^``^XTTT\`aaa__^\]]^__PPPSQYYcc`^^^ceeeeeeeeeeeeea````[[OOOOMQQ\NNNNWKLLPPPPPP;QRLLNQS;RVYUUUU[ZWQQNMMS;SUTQPPVVTTRMLMQR;QRPPQPPPQKKLKKQPP\\TLLLLLLKPQKKKKKKLKKKLPKKKKLKKPTTLLKKKKPRPPPMKKKKKKKKJJPPPMMPPMMPKKKKKKKKKJRKKKKKLLQQLLLLLNNLLLLTTNNHHNLLQQLLHHKKKKHHKKKKKKMPMKHKKKKHHHKKKKKKKKKKKKKKKKKKKKKKKGKKLKKKKKKGKKKKKHHNNMGKKKNNNKKKKKKKKKKKMGGRRLLLKKKKKKKKKKFOKKK;M;;;;;;;;;;;; @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn + eee[WYY_ceeeeeeeffecb`````a__OOOOSU[ZUURQQRUUVUQQSRRSW[[\^^SSSTYY]`a```_[[\\a\YTTTYaac^^\acccceeebbbbbbbeebccceeeeeca``\\WWWWOOOS][[[XXUWWZWWX[WWX^aaaa`\^^^ccaaa__^^WWWWXLLLQRVVVPKKKKKKKKLLPPTQ[[OOPTW`_][[[[[SRQVVVPPKKKLLRV\\\VTKLLLLRSUUU;;;;;;;;;;;;;;;;;;;; @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn + hhhhhbb]]UUUhhhhbbbhhhhhhhhggghhhhhfUUUhhhhhhhhhhggghhhhhhhhbbbhhhhhhhhhhhhhhhhhh____hhhhhhhhhhhhhggghhhh^^^\ZhhddhYYNNNNNVTSSY^haaVQQSSdWddbdab\_______gghhhhhhhhhhaaahhhhhhhhhggghhhhhhhhhhhhhbbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUcdhhgda^^c_VVVVVQQQQcWXddhhhhhhhggghhhhhhhhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhh\\^\\hhhhh^^^\ZhURcccWQLLKKKRW\\YYLLLLKKLLLJJJRROUUZ_URWOOOWNYWWX[Yafhhhhhhhhhed[^eTTOOLLLLLTYZZZY]^_b[[VXXXdddddd____ddddd; @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn + hhh^UUU^^ggghhhhhhhhhfffhhhhhhhhhhhfffggghhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhhggghhh____hhhhdhdPPPPOOLLLLQQ^\WLLLYLLLLLLLKKKKRRLLLTYRKLLLLYPaadddghhhhhhhhhhha^^`PQQOOOMMMY^\OQSfhhhhhhhhhhhhhhhhhhdbbgga\NNLKKQP^^[TLOOQ\Ueaa^YX[\PPNSSSSNNLNNVV^^fdhddgh`bbhhhggghhhhhhhbbb`hhhgggggghhhhhhhhhhhhhhhhhhhhhhddPNNLLWQQZLLLLMVVV_dhhhhhh^^^hhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhhXXSQQVVVTTTT`dZhdddddhhhhh^^XVTT]_\\YRKKKKKRRRRU;; @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn + hhhhbbbbhZZZbbbbhhh^^^ggghhhhggghhhhhhhhhhggghhhggghhhhhhh____hehbbbhb``ZZZZdc^a__cUUSSTTTT[[[fhh]]``hhhhhhhhZZZYYhhh^^^bbbhhhZZZZheehhhhhbbbahahddcbSSSS^Saaad^dhhhbgghhZZZghhhhhhggZZZgghhhhhZZZhhhhggghhhhhh]]^^]hddaffYYPPPPNSUeaeaa^\Z\`^XVVVPPPXYd```ccacVVVV\NPPPPQQc`__aUWZZZhWgghhhhhZZZZ^]hdbbbaNNNNNZVST\; @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn + hh`XSSSTddhh\\\]hhhhhhhhhbbbbhhghhhbbZZZZhhhhhhhhhhhhhhhhhhhhhhhhheZZUUUcchhhhhhhhhhhhhhhhhhhddXSSSQQSS__UUUbb[[acc`\LLLLLQ[KKKKUTXNNOO\\\WbhhhZ]]\\ggZZhhhhhhbb__^^^hhh____hb^UUUghccbh^a^^bb[ddPPPPPaSaccbaZ\_aVVV]NNNNL\RQR^SQRKKKN\PKKKKLYSdZ^^dhhhhhbbbbh]ZZZhhhhhhh[[__^\NNNNV\`XXXWW[[SSTThdddhhhhhhhhhhhhh[XXXghhhhhhhhhhh^^^^^hhhhhhhhhhhb`bZTTTRXdhhhhhhhhhhhhhhhhggXXXgggh`\`ddee_\MMMMM`c___ccddddehhhZZZXVVeebbb_QSSSX^ecc; bio-1.4.3.0001/test/data/fastq/sanger_full_range_as_sanger.fastq0000644000004100000410000000104612200110570024431 0ustar www-datawww-data@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! bio-1.4.3.0001/test/data/fastq/solexa_full_range_as_sanger.fastq0000644000004100000410000000070212200110570024443 0ustar www-datawww-data@FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT + ""##$$%%&&'()*++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order) TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + _^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,++*)('&&%%$$##"" bio-1.4.3.0001/test/data/fastq/illumina_full_range_original_illumina.fastq0000644000004100000410000000065212200110570026522 0ustar www-datawww-data@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ bio-1.4.3.0001/test/data/fastq/solexa_full_range_as_illumina.fastq0000644000004100000410000000070212200110570024776 0ustar www-datawww-data@FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT + AABBCCDDEEFGHIJJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order) TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJJIHGFEEDDCCBBAA bio-1.4.3.0001/test/data/fastq/sanger_full_range_as_illumina.fastq0000644000004100000410000000104612200110570024764 0ustar www-datawww-data@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ bio-1.4.3.0001/test/data/fastq/error_short_qual.fastq0000644000004100000410000000113712200110570022325 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYS @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_qual_space.fastq0000644000004100000410000000114112200110570022254 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYY WWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/illumina_full_range_as_solexa.fastq0000644000004100000410000000065212200110570025002 0ustar www-datawww-data@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;; bio-1.4.3.0001/test/data/fastq/wrapping_as_solexa.fastq0000644000004100000410000000170312200110570022617 0ustar www-datawww-data@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + Zb^Ld`N\[d`NaZ[aZc]UOKGB;[\YT[_W[aZ\aZ[Zd`SE_WeaUH[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJ@^UZ[`X\[R]T_V_W[`[Fa\H`\G[[Q^TVa\Ha\Hc^LY\S @SRR014849.110027 EIXKN4201APUB0 length=131 CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG + \aYY_[EY\T`X^Vd`OY\[[^U_V[R^T[_ZBc^La\GYYO\S[c^Ld`Nc_Q;ZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUF[\[[d`OXTUZ[Q\\`W\\\Y_W\ @SRR014849.203935 EIXKN4201B4HU6 length=144 AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT + `Z_ZBVT^Y>[[Xd`PZ\d`RBaZaZ`ZaZ_ZBXd`Pd`Pd`RB[aZ`ZWd`Oc_R@d`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJC;c^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RB[Z\WLXM`\GYa\H bio-1.4.3.0001/test/data/fastq/longreads_as_sanger.fastq0000644000004100000410000002030412200110570022730 0ustar www-datawww-data@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn + FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFFD???:3104/76=:5...4.3,,,366////4<;!!! @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn + FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn + IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIIIIIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EICE::338=/----,8=>>??:2-////7>CEEIEIHHHIIIIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHIIIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIIIIIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIHH????EIIIFF999;EIIBB!! @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn + IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7IIIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIBBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIIIGGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??>>?EFEE?/////;:80--!! @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn + FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<<88;?AABDDC???DDAAAADA666D?DDD=====AA>?>><<<=<11188<>?@@11142::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=////8,--111111!23--/24!37:6666<;822/..4!46521177553.-.23!231121112,,-,,211==5------,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,,,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-----//----55//**/--22--**,,,,**,,,,,,.1.,*,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,,),,,,,**//.),,,///,,,,,,,,,,,.))33---,,,,,,,,,,(0,,,!.!!!!!!!!!!!! @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn + FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;66322366762243348<<=??4445::>ABAAA@<<==B=:555:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA==88880004><<<99688;889<889?BBBBA=???DDBBB@@??88889---237771,,,,,,,,--1152<<00158A@><<<<<43277711,,,--37===75,----34666!!!!!!!!!!!!!!!!!!!! @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn + IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666IIIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIIII@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI:://///7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIIIIIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIIIIIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@777772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIII==?==IIIII???=;I63DDD82--,,,38==::----,,---+++33066;@6380008/:889<:BGIIIIIIIIIFE?@C<<7999EEEEEE@@@@EEEEE! @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn + III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHHHIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII@@@@IIIIEIE111100----22?=8---:-------,,,,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//-,,21??<5-002=6FBB?:9<=11/4444//-//77??GEIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIEE1//--822;----.777@EIIIIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIII994227775555AE;IEEEEEIIIII??9755>@==:3,,,,,33336!! @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn + IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIIIIIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B@@D66445555<<>AAIIIIIIII;;;::III???CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE?EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHHHIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97771119:EAAADDBD7777=/111122DA@@B68;;;I8HHIIIII;;;;?>IECCCB/////;745=! @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn + IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIIIIIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC@@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIIIII?????IIIIIIIIIIICAC;55539EIIIIIIIIIIIIIIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFIII;;;977FFCCC@24449?FDD! bio-1.4.3.0001/test/data/fastq/error_no_qual.fastq0000644000004100000410000000074212200110570021603 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGC +SLXA-B3_649_FC8437_R1_1_1_850_123 @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 bio-1.4.3.0001/test/data/fastq/wrapping_as_sanger.fastq0000644000004100000410000000170312200110570022603 0ustar www-datawww-data@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + ;C?-EA/=60,)%"<=:5<@85@7@8+8<<3;=4:DA3%<;=8-9.A=):B=* bio-1.4.3.0001/test/data/fastq/misc_dna_as_illumina.fastq0000644000004100000410000000137712200110570023073 0ustar www-datawww-data@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + h^TJh^TJh^TJh^TJh^TJh^TJh^TJh^ bio-1.4.3.0001/test/data/fastq/error_long_qual.fastq0000644000004100000410000000114112200110570022120 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWYY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_qual_tab.fastq0000644000004100000410000000114112200110570021727 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYY YYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_trunc_at_plus.fastq0000644000004100000410000000104412200110570023023 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA bio-1.4.3.0001/test/data/fastq/error_qual_null.fastq0000644000004100000410000000114212200110570022134 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/wrapping_original_sanger.fastq0000644000004100000410000000171712200110570024011 0ustar www-datawww-data@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + ;C?-EA/=60,)%"<=:5< @85@7@8+8< <3;=4:DA3%<;=8-9.A=):B=* bio-1.4.3.0001/test/data/fastq/misc_dna_as_solexa.fastq0000644000004100000410000000137712200110570022554 0ustar www-datawww-data@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + h^TJh^TJh^TJh^TJh^TJh^TJh^TJh^ bio-1.4.3.0001/test/data/fastq/error_trunc_in_title.fastq0000644000004100000410000000100112200110570023154 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_bio-1.4.3.0001/test/data/fastq/error_tabs.fastq0000644000004100000410000000116512200110570021076 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAA TACCTTTGTA GAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYY YYYYYYYYYW YWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGA AAGAGAAATG AGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYY WYYYYWWYYY WYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTT GATCATGATG ATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYY YYYYWYYWYY SYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAA GTTTTTCTCA ACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYY YYYYYYYYYW WWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTT AATGGCATAC ACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYY YWYYYYWYWW UWWWQQ bio-1.4.3.0001/test/data/fastq/error_trunc_in_qual.fastq0000644000004100000410000000113712200110570023007 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQ bio-1.4.3.0001/test/data/fastq/error_qual_del.fastq0000644000004100000410000000114012200110570021724 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_spaces.fastq0000644000004100000410000000116412200110570021422 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAA TACCTTTGTA GAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYY YYYYYYYYYW YWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGA AAGAGAAATG AGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYY WYYYYWWYYY WYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTT GATCATGATG ATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYY YYYYWYYWYY SYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAA GTTTTTCTCA ACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYY YYYYYYYYYW WWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTT AATGGCATAC ACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYY YWYYYYWYWW UWWWQQ bio-1.4.3.0001/test/data/fastq/solexa_full_range_as_solexa.fastq0000644000004100000410000000070212200110570024457 0ustar www-datawww-data@FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT + ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order) TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<; bio-1.4.3.0001/test/data/fastq/misc_rna_original_sanger.fastq0000644000004100000410000000140612200110570023750 0ustar www-datawww-data@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI bio-1.4.3.0001/test/data/fastq/error_qual_vtab.fastq0000644000004100000410000000114012200110570022114 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYY YYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_qual_unit_sep.fastq0000644000004100000410000000114012200110570023006 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_trunc_in_plus.fastq0000644000004100000410000000106212200110570023025 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FCbio-1.4.3.0001/test/data/fastq/error_trunc_in_seq.fastq0000644000004100000410000000102712200110570022633 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGbio-1.4.3.0001/test/data/fastq/error_trunc_at_qual.fastq0000644000004100000410000000110612200110570023001 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 bio-1.4.3.0001/test/data/fastq/error_double_seq.fastq0000644000004100000410000000123512200110570022265 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/error_diff_ids.fastq0000644000004100000410000000114012200110570021705 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_124 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/illumina_full_range_as_illumina.fastq0000644000004100000410000000065212200110570025321 0ustar www-datawww-data@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ bio-1.4.3.0001/test/data/fastq/solexa_full_range_original_solexa.fastq0000644000004100000410000000070212200110570025660 0ustar www-datawww-data@FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT + ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order) TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<; bio-1.4.3.0001/test/data/fastq/error_qual_escape.fastq0000644000004100000410000000114012200110570022420 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYWYYYYWYWWUWWWQQ bio-1.4.3.0001/test/data/fastq/misc_rna_as_illumina.fastq0000644000004100000410000000140612200110570023102 0ustar www-datawww-data@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + cdefghcdefghcdefghcdefghcdefgh bio-1.4.3.0001/test/data/fastq/wrapping_as_illumina.fastq0000644000004100000410000000170312200110570023136 0ustar www-datawww-data@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + Zb^Ld`N\[d`NaZ[aZc]UOKHDA[\YT[_W[aZ\aZ[Zd`SF_WeaUI[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJC^UZ[`X\[R]T_V_W[`[Ga\I`\H[[Q^TVa\Ia\Ic^LY\S @SRR014849.110027 EIXKN4201APUB0 length=131 CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG + \aYY_[FY\T`X^Vd`OY\[[^U_V[R^T[_ZDc^La\HYYO\S[c^Ld`Nc_QAZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUG[\[[d`OXTUZ[Q\\`W\\\Y_W\ @SRR014849.203935 EIXKN4201B4HU6 length=144 AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT + `Z_ZDVT^YB[[Xd`PZ\d`RDaZaZ`ZaZ_ZDXd`Pd`Pd`RD[aZ`ZWd`Oc_RCd`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJEAc^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RD[Z\WLXM`\HYa\I bio-1.4.3.0001/test/data/fastq/misc_dna_original_sanger.fastq0000644000004100000410000000137712200110570023741 0ustar www-datawww-data@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? bio-1.4.3.0001/test/data/fastq/sanger_full_range_original_sanger.fastq0000644000004100000410000000104612200110570025632 0ustar www-datawww-data@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! bio-1.4.3.0001/test/data/fastq/sanger_full_range_as_solexa.fastq0000644000004100000410000000104612200110570024445 0ustar www-datawww-data@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;; bio-1.4.3.0001/test/data/fastq/misc_dna_as_sanger.fastq0000644000004100000410000000137712200110570022540 0ustar www-datawww-data@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? bio-1.4.3.0001/test/data/fastq/longreads_as_illumina.fastq0000644000004100000410000002030412200110570023263 0ustar www-datawww-data@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn + eeeccccccc`UUU^UWWeegffhhhhhhhhhhhhhhhhhhggghhhhhhhhhfgfeeeee\\\\ceeeeeeeeeeeeeec^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\bbeccccccccYUUUbceeee\[`a`\ZYRRRPPP[\\\XXZaWWXeeeeeeccacaccc\WWSSQRPMMKKKLKKKKKKKKPPRRMMLLLPVPPPKKKKKQQTTTPRPPQPMLLMKRRRPPKMKKRLLKKMKKLLKRTPPPQRMMLL@KKKKLLKLLLLXKKKKW\KKLKKKLKKKKLLLQUYXYTLMMPKKKKPPNNKKKK@KKPXPVLLKKKKLRMKLLKKPVKKKKLLLJPPPPRMOROOOOKKKOSSSOOORUZXUUUQMNNZV][Z@@@ @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn + eeeeeeeeecccceeeefecccca`````\[SSSS__a\TTTYaaaaa__^WYW[^[WXWXW[WSSSQZ\\RKKKTPSKKKPPKKKMKKQPVVVTTTTPRKMMLLPPPTVTWMNNRSSWW][[ZZZZXXSSN@NSKKKTVWTT@@ @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn + hhhhbbbbh^^UUUhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUh`hhhhh^^^hhhhbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUURRRdhbdYYRRW\NLLLLKW\]]^^YQLNNNNV]bddhdhggghhhhhhhhhdZZXXPPPXXa^^^habghhhhhhggghhggghhhhhhhhhhhhhhhhhhaabbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhc^\\\chhhggghhhhhhhhhggghhhhhhhhhhggghggghhhhhhhhhhhhhhhhhhhhhh^]ZXXWW\\TLLLLM__`dfhhhhhhhhhgg^^^^dhhheeXXXZdhhaa@@ @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn + hhhh^^^^^hhhhhhhhhhhhhhggghhhhhhhhhhhhhggghhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhggghhhhhhhhhhh````hh]]]]hhhhhhhhhhhhhhhhhhhhhhhhhhddhddZRRRRRcVhhhhhhhhhhhhhhhhhhhhhbb__gghhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhaaaahgbcbghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhggbbchhhhhhggghhbbbg\bbhhhhhhhhhfffhhhhhhgggggghhhhhhhhhhhhhhhggghhggd^^]]^dedd^NNNNNZYWOLL@@ @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn + eee`__eeeeeeeeeeggaYYY_aeeeeffghghhhhhhhhhhhhhhhhhhhhhhheeeeeeeee^\a`_PPPWWOPP[[WWZ^``accb^^^cc````c`UUUc^ccc\\\\\``]^]][[[\[PPPWW[[^^^``^XTTT\`aaa__^\]]^__PPPSQYYcc`^^^ceeeeeeeeeeeeea````[[OOOOMQQ\NNNNWKLLPPPPPP@QRLLNQS@RVYUUUU[ZWQQNMMS@SUTQPPVVTTRMLMQR@QRPPQPPPQKKLKKQPP\\TLLLLLLKPQKKKKKKLKKKLPKKKKLKKPTTLLKKKKPRPPPMKKKKKKKKJJPPPMMPPMMPKKKKKKKKKJRKKKKKLLQQLLLLLNNLLLLTTNNIINLLQQLLIIKKKKIIKKKKKKMPMKIKKKKIIIKKKKKKKKKKKKKKKKKKKKKKKHKKLKKKKKKHKKKKKIINNMHKKKNNNKKKKKKKKKKKMHHRRLLLKKKKKKKKKKGOKKK@M@@@@@@@@@@@@ @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn + eee[WYY_ceeeeeeeffecb`````a__OOOOSU[ZUURQQRUUVUQQSRRSW[[\^^SSSTYY]`a```_[[\\a\YTTTYaac^^\acccceeebbbbbbbeebccceeeeeca``\\WWWWOOOS][[[XXUWWZWWX[WWX^aaaa`\^^^ccaaa__^^WWWWXLLLQRVVVPKKKKKKKKLLPPTQ[[OOPTW`_][[[[[SRQVVVPPKKKLLRV\\\VTKLLLLRSUUU@@@@@@@@@@@@@@@@@@@@ @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn + hhhhhbb]]UUUhhhhbbbhhhhhhhhggghhhhhfUUUhhhhhhhhhhggghhhhhhhhbbbhhhhhhhhhhhhhhhhhh____hhhhhhhhhhhhhggghhhh^^^\ZhhddhYYNNNNNVTSSY^haaVQQSSdWddbdab\_______gghhhhhhhhhhaaahhhhhhhhhggghhhhhhhhhhhhhbbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUcdhhgda^^c_VVVVVQQQQcWXddhhhhhhhggghhhhhhhhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhh\\^\\hhhhh^^^\ZhURcccWQLLKKKRW\\YYLLLLKKLLLJJJRROUUZ_URWOOOWNYWWX[Yafhhhhhhhhhed[^eTTOOLLLLLTYZZZY]^_b[[VXXXdddddd____ddddd@ @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn + hhh^UUU^^ggghhhhhhhhhfffhhhhhhhhhhhfffggghhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhhggghhh____hhhhdhdPPPPOOLLLLQQ^\WLLLYLLLLLLLKKKKRRLLLTYRKLLLLYPaadddghhhhhhhhhhha^^`PQQOOOMMMY^\OQSfhhhhhhhhhhhhhhhhhhdbbgga\NNLKKQP^^[TLOOQ\Ueaa^YX[\PPNSSSSNNLNNVV^^fdhddgh`bbhhhggghhhhhhhbbb`hhhgggggghhhhhhhhhhhhhhhhhhhhhhddPNNLLWQQZLLLLMVVV_dhhhhhh^^^hhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhhXXSQQVVVTTTT`dZhdddddhhhhh^^XVTT]_\\YRKKKKKRRRRU@@ @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn + hhhhbbbbhZZZbbbbhhh^^^ggghhhhggghhhhhhhhhhggghhhggghhhhhhh____hehbbbhb``ZZZZdc^a__cUUSSTTTT[[[fhh]]``hhhhhhhhZZZYYhhh^^^bbbhhhZZZZheehhhhhbbbahahddcbSSSS^Saaad^dhhhbgghhZZZghhhhhhggZZZgghhhhhZZZhhhhggghhhhhh]]^^]hddaffYYPPPPNSUeaeaa^\Z\`^XVVVPPPXYd```ccacVVVV\NPPPPQQc`__aUWZZZhWgghhhhhZZZZ^]hdbbbaNNNNNZVST\@ @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn + hh`XSSSTddhh\\\]hhhhhhhhhbbbbhhghhhbbZZZZhhhhhhhhhhhhhhhhhhhhhhhhheZZUUUcchhhhhhhhhhhhhhhhhhhddXSSSQQSS__UUUbb[[acc`\LLLLLQ[KKKKUTXNNOO\\\WbhhhZ]]\\ggZZhhhhhhbb__^^^hhh____hb^UUUghccbh^a^^bb[ddPPPPPaSaccbaZ\_aVVV]NNNNL\RQR^SQRKKKN\PKKKKLYSdZ^^dhhhhhbbbbh]ZZZhhhhhhh[[__^\NNNNV\`XXXWW[[SSTThdddhhhhhhhhhhhhh[XXXghhhhhhhhhhh^^^^^hhhhhhhhhhhb`bZTTTRXdhhhhhhhhhhhhhhhhggXXXgggh`\`ddee_\MMMMM`c___ccddddehhhZZZXVVeebbb_QSSSX^ecc@ bio-1.4.3.0001/test/data/fastq/README.txt0000644000004100000410000001152712200110570017375 0ustar www-datawww-dataThis README file describes the FASTQ example files provided as supplementary information to the open-access publication: P.J.A. Cock, C.J. Fields, N. Goto, M.L. Heuer and P.M. Rice (2009). The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. These files are provided freely and we encourage anyone writing a FASTQ parser to use them as part of your test suite. Permission is granted to freely distribute and modify the files. We request (but do not insist) that this README file is included, or at least a reference to the above paper. Please cite the above paper if appropriate. We also request (but do not insist) that the example files are not modified, in order that they may serve as a common reference. Invalid FASTQ files =================== The archive contains the following sample FASTQ files with names of the form error_NAME.fastq, which all contain errors and should be rejected (if parsed as any of the three FASTQ variants): error_diff_ids.fastq error_double_qual.fastq error_double_seq.fastq error_long_qual.fastq error_no_qual.fastq error_qual_del.fastq error_qual_escape.fastq error_qual_null.fastq error_qual_space.fastq error_qual_tab.fastq error_qual_unit_sep.fastq error_qual_vtab.fastq error_short_qual.fastq error_spaces.fastq error_tabs.fastq error_trunc_at_seq.fastq error_trunc_at_plus.fastq error_trunc_at_qual.fastq error_trunc_in_title.fastq error_trunc_in_seq.fastq error_trunc_in_plus.fastq error_trunc_in_qual.fastq Of these, those with names error_qual_XXX.fastq would be valid except for the inclusion of spaces or non-printing ASCII characters outside the range allowed in the quality string. The files named error_trunc_XXX.fastq would be valid but for being truncated (e.g. simulating a partial copy over the network). The special cases of FASTQ files which would be valid as one variant, but not another, are covered below. Valid FASTQ =========== The archive contains the following valid sample FASTQ input files for testing: longreads_original_sanger.fastq wrapping_original_sanger.fastq illumina_full_range_original_illumina.fastq sanger_full_range_original_sanger.fastq solexa_full_range_original_solexa.fastq misc_dna_original_sanger.fastq misc_rna_original_sanger.fastq These all have the form NAME_original_FORMAT.fastq, where NAME is a prefix for that example, and FORMAT is one of sanger, solexa or illumina indicating which FASTQ variant that example is using. There are three matching files called NAME_as_FORMAT.fastq showing how the original file should be converted into each of the three FASTQ variants. These converted files are standardised not to use line wrapping (so each record has exactly four lines), and omit the optional repetition of the read titles on the plus line. The file longreads_original_sanger.fastq is based on real Roche 454 reads from the Sanger Institute for the the potato cyst nematodes Globodera pallida. Ten of the reads have been presented as FASTQ records, wrapping the sequence and the quality lines at 80 characters. This means some of the quality lines start with "@" or "+" characters, which may cause problems with naive parsers. Also note that the sequence is mixed case (with upper case denoting the trimmed region), and furthermore the free format title lines are over 100 characters and encode assorted read information (and are repeated on the "+" lines). The wrapping_original_sanger.fastq is based on three real reads from the NCBI Short Read Archive, but has been carefully edited to use line wrapping for the quality lines (but not the sequence lines) such that the due to the occurrence of "@" and "+" on alternating lines, the file may be misinterpreted by a simplistic parser. While this is therefore a very artificial example, it remains a valid FASTQ file, and is useful for testing purposes. The sanger_full_range_original_sanger.fastq file uses PHRED scores from 0 to 93 inclusive, covering ASCII characters from 33 (!) to 126 (~). This means it cannot be treated as a Solexa or Illumina 1.3+ FASTQ file, and attempting to parse it as such should raise an error. The solexa_full_range_original_solexa.fastq file uses Solexa scores from -5 to 62 inclusive, covering ASCII characters from 59 (;) to 126 (~). This means it cannot be treated as a Illumina 1.3+ FASTQ file, and attempting to parse it as such should raise an error. On the basis of the quality characters, the file would also qualify as a valid Sanger FASTQ file. The illumina_full_range_original_illumina.fastq file uses PHRED scores from 0 to 62 inclusive, covering ASCII characters from 64 (@) to 126 (~). On the basis of the quality characters, the file would also qualify as a valid Sanger or Solexa FASTQ file. The misc_dna_original_sanger.fastq and misc_rna_original_sanger.fastq files are artificial reads using the full range of IUPAC DNA or RNA letters, including ambiguous character codes, and both cases. bio-1.4.3.0001/test/data/fastq/error_trunc_at_seq.fastq0000644000004100000410000000101212200110570022623 0ustar www-datawww-data@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 bio-1.4.3.0001/test/data/fastq/misc_rna_as_solexa.fastq0000644000004100000410000000140612200110570022563 0ustar www-datawww-data@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + cdefghcdefghcdefghcdefghcdefgh bio-1.4.3.0001/test/data/fastq/misc_rna_as_sanger.fastq0000644000004100000410000000140612200110570022547 0ustar www-datawww-data@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI bio-1.4.3.0001/test/data/fastq/longreads_original_sanger.fastq0000644000004100000410000002237212200110570024140 0ustar www-datawww-data@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTAT GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaact ttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaatta aaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn +FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF D???:3104/76=:5...4.3,,,366////4<;!!! @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn +FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTT GGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATA GGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTG CCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTG TTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn +FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIIIIIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EICE::338=/----,8=>>??:2-////7>CEEIEIHHHII IIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHIIIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIII IIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIHH????EIIIFF999;EIIBB!! @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGA CGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTG CTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCA TTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGC TTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn +FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7I IIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIII BBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIII GGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??>>?EFEE?/////;:80--!! @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATT CCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTA TACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttanca acaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgtt ttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaa cgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaagg ggggggaagtaggngnnnnnnnnnnnn +FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<< 88;?AABDDC???DDAAAADA666D?DDD=====AA>?>><<<=<11188<>?@@1114 2::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=////8,--111111!23--/24!37:6666<;822/..4!46 521177553.-.23!231121112,,-,,211==5------,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,, ,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-----//----55//**/--22--**,,,,**,,,,,,.1., *,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,,),,,,,**//.),,,///,,,,,,,,,,,.))33---,, ,,,,,,,,(0,,,!.!!!!!!!!!!!! @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACT TCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATG AGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnn nnnnnnnnnnnnnnnnnn +FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;66322366762243348<<=??4445::>ABAAA@<<==B=:5 55:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA==88880004><<<99688;889<889?BBBBA=???DDBB B@@??88889---237771,,,,,,,,--1152<<00158A@><<<<<43277711,,,--37===75,----34666!! !!!!!!!!!!!!!!!!!! @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGT GAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCA GAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGA CCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATA GTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTC AGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn +FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666IIIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIII I@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI:://///7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIII IIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIIIIIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@7 77772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIII IIIIIIIII==?==IIIII???=;I63DDD82--,,,38==::----,,---+++33066;@6380008/:889<:BGII IIIIIIIFE?@C<<7999EEEEEE@@@@EEEEE! @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGA TTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAA ATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATC TCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAG CACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacag ggggataggnn +FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHHHIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII @@@@IIIIEIE111100----22?=8---:-------,,,,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122 000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//-,,21??<5-002=6FBB?:9<=11/4444//-//77??G EIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIEE1//--822;----.777@EII IIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIII994227775555AE;IEEEEEIIIII??9755>@==:3, ,,,,33336!! @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAAT TTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATA TTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATT TTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn +FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIIIIIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B @@D66445555<<>AAIIIIIIII;;;::III???CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE? EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHHHIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97 771119:EAAADDBD7777=/111122DA@@B68;;;I8HHIIIII;;;;?>IECCCB/////;745=! @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTAC ATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATT TTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTA TTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACA TATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagac tgccaaggcacacaggggataggn +FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIII IIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC @@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E ;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIII II?????IIIIIIIIIIICAC;55539EIIIIIIIIIIIIIIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFI II;;;977FFCCC@24449?FDD! bio-1.4.3.0001/test/data/gcg/0000755000004100000410000000000012200110570015313 5ustar www-datawww-databio-1.4.3.0001/test/data/gcg/pileup-aa.msf0000644000004100000410000000662512200110570017710 0ustar www-datawww-data!!AA_MULTIPLE_ALIGNMENT 1.0 PileUp of: @/home/ngoto/.seqlab-localhost/pileup_24.list Symbol comparison table: GenRunData:blosum62.cmp CompCheck: 1102 GapWeight: 8 GapLengthWeight: 2 pileup_24.msf MSF: 282 Type: P April 22, 2009 22:31 Check: 5701 .. Name: xx_3183087 Len: 282 Check: 6631 Weight: 1.00 Name: xx_3183086 Len: 282 Check: 3789 Weight: 1.00 Name: xx_192453532 Len: 282 Check: 2081 Weight: 1.00 Name: xx_72157730 Len: 282 Check: 5946 Weight: 1.00 Name: xx_210090185 Len: 282 Check: 7419 Weight: 1.00 Name: xx_45552463 Len: 282 Check: 9835 Weight: 1.00 // 1 50 xx_3183087 ~MAFLGLFSL LVLQSMATGA .TGEDENILF QKEIRHSMGY DSLKNGE.EF xx_3183086 ~MYFLGLLSL LVLPSKAFKA .AREDENILF LKEIRHSLGF DSLKNGE.EF xx_192453532 MLLLVVLPPL LLLRGCFCQA ISSGEENIIF LQEIRQAVGY SHFREDE.EF xx_72157730 ~~~~~~~~MA FSKQQDISGQ DERRGTNLFF ATQIADVFRF NQVDQDQLHG xx_210090185 ~~~~~~~~~~ ~~~MRSSTSQ EKDHPENIFF AQQMSRVLRF HKMDASDLHM xx_45552463 ~~~~~~~~~~ ~~~MADAAGQ KP..GENIVF ATQLDQGLGL ASSDPEQ... 51 100 xx_3183087 SNYINKWVEN NTRTFSF.TK DDEVQIPMMY QQGEFYYGEF SDGSNEAGGI xx_3183086 TTHINKWVEN NTRTFSF.TK DDEVQIPMMY QQGEFYYGEF SDGSNEAGGI xx_192453532 SERINSWVLN NTRTFSF.TR DDGVQTLMMY QQGDFYYGEF SDGTTEAGGV xx_72157730 TKSINDWVSK NTTQETFKVL DERVPVSLMI QKGKYALAV. .DNTNDC... xx_210090185 RQTINSWVEE RTRLGTFHI. SRDVEVPMMH QQGRFKLAY. .DEDLNC... xx_45552463 .ATINNWVEQ LTRPDTFH.L DGEVQVPMMS LKERFRYAD. .LPALDA... 101 150 xx_3183087 YQVLEIPYEG DEISMMLVLS RQEVPLATLE PLVKAQLVEE WANSVKKQKV xx_3183086 YQVLEIPYEG DEISMMIVLS RQEVPLVTLE PLVKASLINE WANSVKKQKV xx_192453532 YQVLEMLYEG EDMSMMIVLP RQEVPLASLE PIIKAPLLEE WANNVKRQKV xx_72157730 .LVLEMPYQG RNLSLLIALP VKDDGLGQLE TKLSADILQS WDAGLKSRQV xx_210090185 .QILEMPYRG KHLSMVVVLP DKMDDLSAIE TSLTPDLLRH WRKSMSEEST xx_45552463 .MALELPYKD SDLSMLIVLP NTKTGLPALE EKLRLTTLSQ ITQSLYETKV 151 200 xx_3183087 EVYLPRFTVE QEIDLKDVLK ALGITEIFIK D.ANLTG... .LSDNKEIFL xx_3183086 EVYLPRFTVE QEIDLKDVLK GLGITEVFSR S.ADLTA... .MSDNKELYL xx_192453532 EVYLPRFKVE QKIDLRESLQ QLGIRSIFSK D.ADLSAMTA QMTDGQDLFI xx_72157730 NVLLPKFKLE AQFQLKEFLQ RMGMSDAFDE DRANFEGISG ...DRE.LHI xx_210090185 MVQIPKFKVE QDFLLKEKLA EMGMTDLFSM ADADLSGITG ...SRD.LHV xx_45552463 ALKLPRFKAE FQVELSEVFQ KLGMSRMFS. DQAEFGKMLQ ...SPEPLKV 201 250 xx_3183087 SKAIHKSFLE VNEEGSEAAA VSGMIAISRM AVLYP..... QVIVDHPFFF xx_3183086 AKAFHKAFLE VNEEGSEAAA ASGMIAISRM AVLYP..... QVIVDHPFFF xx_192453532 GKAVQKAYLE VTEEGAEGAA GSGMIALTRT LVLYP..... QVMADHPFFF xx_72157730 SAVIHKAFVD VNEEGSEAAA ATAVVMMRRC APPREPEKPI LFRADHPFIF xx_210090185 SHVVHKAFVE VNEEGSEAAA ATAVNMMKRS L...DGE... MFFADHPFLF xx_45552463 SAIIHKAFIE VNEEGTEAAA ATGMVMCYAS MLTFEPQ.PV QFHVQHPFNY 251 282 xx_3183087 LIRNRRTGTI LFMGRVMHPE TMNTSGHDFE EL xx_3183086 LVRNRRTGTV LFMGRVMHPE AMNTSGHDFE EL xx_192453532 IIRNRKTGSI LFMGRVMNPE LIDPFDNNFD M~ xx_72157730 MIRHRPTKSV LFMGRMMDPS ~~~~~~~~~~ ~~ xx_210090185 LIRDNDSNSV LFLGRLVRPE GHTTKDEL~~ ~~ xx_45552463 YIINKDS.TI LFAGRINKF~ ~~~~~~~~~~ ~~ bio-1.4.3.0001/test/data/refseq/0000755000004100000410000000000012200110570016040 5ustar www-datawww-databio-1.4.3.0001/test/data/refseq/nm_126355.entret0000644000004100000410000000662712200110570020535 0ustar www-datawww-dataLOCUS NM_126355 615 bp mRNA linear PLN 25-JAN-2005 DEFINITION Arabidopsis thaliana Toll-Interleukin-Resistance (TIR) domain-containing protein (At2g03030) mRNA, complete cds. ACCESSION NM_126355 VERSION NM_126355.1 GI:18395472 KEYWORDS . SOURCE Arabidopsis thaliana (thale cress) ORGANISM Arabidopsis thaliana Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids; eurosids II; Brassicales; Brassicaceae; Arabidopsis. COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. This record is derived from an annotated genomic sequence (NC_003071). The reference sequence was derived from mrna.At2g03030.1. FEATURES Location/Qualifiers source 1..615 /organism="Arabidopsis thaliana" /mol_type="mRNA" /db_xref="taxon:3702" /chromosome="2" /map="unknown" /clone="CHR2v01212004" /ecotype="Columbia" gene 1..615 /locus_tag="At2g03030" /note="synonym: T17M13.20; Toll-Interleukin-Resistance (TIR) domain-containing protein" /db_xref="GeneID:814832" CDS 1..615 /locus_tag="At2g03030" /note="domain signature TIR exists, suggestive of a disease resistance protein; go_function: defense/immunity protein activity [goid 0003793]; go_process: defense response signaling pathway, resistance-gene dependent [goid 0009870]" /codon_start=1 /product="Toll-Interleukin-Resistance (TIR) domain-containing protein" /protein_id="NP_178403.1" /db_xref="GI:15227520" /db_xref="GeneID:814832" /translation="MTFFSPTQVFLNYRGEQLRRSFVSHLIDAFERNEINFFVDKYEQ RGKDLKNLFLRIQESKIALAIFSTRYTESSWCLDELVKIKKLADKKKLHVIPIFYKVK VEDVRKQTGEFGDNFWTLAKVSSGDQIKKWKEALECIPNKMGLSLGDKSSEADFIKEV VKAVQCVVATIGLEEEEENHFGKKKRKDCKCELPDLKKSRTKKL" misc_feature 22..423 /locus_tag="At2g03030" /note="TIR; Region: Toll - interleukin 1 - resistance" /db_xref="CDD:22729" ORIGIN 1 atgacattct tctctcccac tcaggtgttt ttgaactaca ggggagaaca actgcgtcgc 61 agcttcgtga gccacctcat tgatgccttt gaaaggaatg agatcaactt cttcgtagac 121 aaatacgaac agagaggcaa agacctcaaa aatctctttc ttaggatcca agagtcgaag 181 atcgcgcttg ccatcttctc aaccagatac acggagtcaa gctggtgttt ggatgagttg 241 gtgaagataa agaaacttgc tgataaaaaa aaactccatg tcattccaat tttctacaag 301 gtgaaggtag aagacgttcg aaaacagaca ggtgagtttg gtgacaactt ctggacgctg 361 gcaaaggttt caagtggtga tcagatcaag aaatggaaag aagccttgga atgtatcccc 421 aacaagatgg gtttgtcgtt gggagacaag agttctgaag cagatttcat caaggaagtt 481 gttaaggcgg ttcagtgtgt tgtagcaacg attggacttg aggaagaaga agagaatcat 541 tttgggaaaa agaagagaaa ggattgcaaa tgtgagcttc ctgatttgaa gaaaagcaga 601 accaaaaagt tgtga // bio-1.4.3.0001/test/data/bl2seq/0000755000004100000410000000000012200110570015743 5ustar www-datawww-databio-1.4.3.0001/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq0000644000004100000410000000304512200110570022143 0ustar www-datawww-dataQuery= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain precursor (T-lymphocyte differentiation antigen T8/Leu-2). (235 letters) >CD8B_HUMAN P10966 T-cell surface glycoprotein CD8 beta chain precursor (Antigen CD8B). Length = 210 Score = 29.6 bits (65), Expect = 5e-05 Identities = 21/90 (23%), Positives = 37/90 (41%), Gaps = 3/90 (3%) Query: 39 VELKCQVLLSNPTSGCSWLFQ---PRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLG 95 V L C+ +S WL Q P + L+ S E ++ ++ + R Sbjct: 37 VMLSCEAKISLSNMRIYWLRQRQAPSSDSHHEFLALWDSAKGTIHGEEVEQEKIAVFRDA 96 Query: 96 DTFVLTLSDFRRENEGYYFCSALSNSIMYF 125 F+L L+ + E+ G YFC + + + F Sbjct: 97 SRFILNLTSVKPEDSGIYFCMIVGSPELTF 126 Lambda K H 0.323 0.137 0.436 Gapped Lambda K H 0.267 0.0410 0.140 Matrix: BLOSUM62 Gap Penalties: Existence: 11, Extension: 1 Number of Hits to DB: 102 Number of Sequences: 0 Number of extensions: 5 Number of successful extensions: 3 Number of sequences better than 10.0: 1 Number of HSP's better than 10.0 without gapping: 1 Number of HSP's successfully gapped in prelim test: 0 Number of HSP's that attempted gapping in prelim test: 0 Number of HSP's gapped (non-prelim): 1 length of query: 235 length of database: 210 effective HSP length: 22 effective length of query: 213 effective length of database: 188 effective search space: 40044 effective search space used: 40044 T: 11 A: 40 X1: 16 ( 7.5 bits) X2: 38 (14.6 bits) X3: 64 (24.7 bits) S1: 20 (12.2 bits) S2: 20 (12.3 bits) bio-1.4.3.0001/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq0000644000004100000410000000173012200110570022240 0ustar www-datawww-dataQuery= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain precursor (T-lymphocyte differentiation antigen T8/Leu-2). (235 letters) Lambda K H 0.323 0.137 0.436 Gapped Lambda K H 0.267 0.0410 0.140 Matrix: BLOSUM62 Gap Penalties: Existence: 11, Extension: 1 Number of Hits to DB: 175 Number of Sequences: 0 Number of extensions: 8 Number of successful extensions: 0 Number of sequences better than 1.0e-05: 0 Number of HSP's better than 0.0 without gapping: 0 Number of HSP's successfully gapped in prelim test: 0 Number of HSP's that attempted gapping in prelim test: 0 Number of HSP's gapped (non-prelim): 0 length of query: 235 length of database: 393 effective HSP length: 27 effective length of query: 208 effective length of database: 366 effective search space: 76128 effective search space used: 76128 T: 11 A: 40 X1: 16 ( 7.5 bits) X2: 38 (14.6 bits) X3: 64 (24.7 bits) S1: 41 (22.0 bits) S2: 74 (33.1 bits) bio-1.4.3.0001/test/data/soft/0000755000004100000410000000000012200110570015526 5ustar www-datawww-databio-1.4.3.0001/test/data/soft/GSE3457_family_partial.soft0000755000004100000410000013275512200110570022461 0ustar www-datawww-data^DATABASE = GeoMiame !Database_name = Gene Expression Omnibus (GEO) !Database_institute = NCBI NLM NIH !Database_web_link = http://www.ncbi.nlm.nih.gov/projects/geo !Database_email = geo@ncbi.nlm.nih.gov !Database_ref = Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6 ^SERIES = GSE3457 !Series_title = Poplar flowering and maturation GeneXp arrays !Series_geo_accession = GSE3457 !Series_status = Public on Oct 11 2006 !Series_submission_date = Oct 14 2005 !Series_last_update_date = Oct 17 2005 !Series_summary = To identify candidate genes involved in maturation and flowering, we conducted microarray expression studies using two poplar genotypes (Populus trichocarpa x P. deltoides hybrids) represented in continuous age gradients of one to six years. We designed 70-mers for 228 poplar genes and microarray studies were carried out using the microplate-based 96-well BioGridArray platform (GeneXP Biosciences). Floral buds, vegetative buds and shoot tips were collected at different seasonal time points from juvenile and adult trees and from both basal and upper branches of mature trees. !Series_overall_design = The experiment was carried out using the microplate-based 96-well BioGridArray platform (GeneXP Biosciences). Each of 228 oligonucletides were duplicated in each well. Two human genes, beta-actin and gapdh , were also printed on all arrays as negative controls. plus ten arabidopsis oligonucleotides selected by GeneXP and used to measure quality of hybridization. For each of 16 samples, two seperate RNA isolations were performed and were considered as biological replicates. Each biological replicate was labeled with Cy5, and hybridized to duplicated wells. !Series_type = Maturation and Flowering !Series_contributor = Jingyi,,Li !Series_contributor = Olga,,Shevchenko !Series_contributor = Ove,,Nilsson !Series_contributor = Steve,H,Strauss !Series_contributor = Amy,M,Brunner !Series_sample_id = GSM77557 !Series_sample_id = GSM77558 !Series_sample_id = GSM77559 !Series_sample_id = GSM77560 !Series_sample_id = GSM77561 !Series_sample_id = GSM77562 !Series_sample_id = GSM77563 !Series_sample_id = GSM77564 !Series_sample_id = GSM77565 !Series_sample_id = GSM77566 !Series_sample_id = GSM77567 !Series_sample_id = GSM77568 !Series_sample_id = GSM77569 !Series_sample_id = GSM77570 !Series_sample_id = GSM77571 !Series_sample_id = GSM77572 !Series_sample_id = GSM77573 !Series_sample_id = GSM77574 !Series_sample_id = GSM77575 !Series_sample_id = GSM77576 !Series_sample_id = GSM77577 !Series_sample_id = GSM77578 !Series_sample_id = GSM77579 !Series_sample_id = GSM77580 !Series_sample_id = GSM77581 !Series_sample_id = GSM77582 !Series_sample_id = GSM77583 !Series_sample_id = GSM77584 !Series_sample_id = GSM77585 !Series_sample_id = GSM77586 !Series_sample_id = GSM77587 !Series_sample_id = GSM77588 !Series_sample_id = GSM77589 !Series_sample_id = GSM77590 !Series_sample_id = GSM77591 !Series_sample_id = GSM77592 !Series_sample_id = GSM77593 !Series_sample_id = GSM77594 !Series_sample_id = GSM77595 !Series_sample_id = GSM77596 !Series_sample_id = GSM77597 !Series_sample_id = GSM77598 !Series_sample_id = GSM77599 !Series_sample_id = GSM77600 !Series_sample_id = GSM77601 !Series_sample_id = GSM77602 !Series_sample_id = GSM77603 !Series_sample_id = GSM77604 !Series_sample_id = GSM77605 !Series_sample_id = GSM77606 !Series_sample_id = GSM77607 !Series_sample_id = GSM77608 !Series_sample_id = GSM77609 !Series_sample_id = GSM77610 !Series_sample_id = GSM77611 !Series_sample_id = GSM77612 !Series_sample_id = GSM77613 !Series_sample_id = GSM77614 !Series_sample_id = GSM77615 !Series_sample_id = GSM77616 !Series_sample_id = GSM77617 !Series_sample_id = GSM77618 !Series_sample_id = GSM77619 !Series_sample_id = GSM77620 !Series_sample_id = GSM77621 !Series_sample_id = GSM77622 !Series_sample_id = GSM77623 !Series_sample_id = GSM77624 !Series_sample_id = GSM77625 !Series_sample_id = GSM77626 !Series_sample_id = GSM77627 !Series_sample_id = GSM77628 !Series_sample_id = GSM77629 !Series_sample_id = GSM77630 !Series_contact_name = Jingyi,,Li !Series_contact_email = JINGYI.LI@OREGONSTATE.EDU !Series_contact_phone = 541-737-8496 !Series_contact_institute = Oregon State University !Series_contact_address = !Series_contact_city = Corvallis !Series_contact_zip/postal_code = 97331 !Series_contact_country = USA !Series_platform_id = GPL2092 ^PLATFORM = GPL2092 !Platform_title = GeneXp BioGrid_poplar maturation !Platform_geo_accession = GPL2092 !Platform_status = Public on Dec 31 2005 !Platform_submission_date = May 17 2005 !Platform_last_update_date = Jun 16 2005 !Platform_technology = spotted oligonucleotide !Platform_distribution = non-commercial !Platform_organism = Populus !Platform_description = Single channel oligonucletide array for comparing gene expression among 16 tissue types to explore maturation and flowering in poplar !Platform_description = Keywords = poplar maturation !Platform_contributor = Jingyi,,Li !Platform_contributor = Olga,,Shevchenko !Platform_contributor = Steve,H,Strauss !Platform_contributor = Amy,M,Brunner !Platform_contact_name = Jingyi,,Li !Platform_contact_email = JINGYI.LI@OREGONSTATE.EDU !Platform_contact_phone = 541-737-8496 !Platform_contact_institute = Oregon State University !Platform_contact_address = !Platform_contact_city = Corvallis !Platform_contact_zip/postal_code = 97331 !Platform_contact_country = USA !Platform_data_row_count = 240 #ID = #GB_ACC = Gene bank accession number #SPOT_ID = #Function/Family = gene functions and family #ORGANISM = sequence sources #SEQUENCE = oligo sequence used !platform_table_begin ID GB_ACC SPOT_ID Function/Family ORGANISM SEQUENCE A001P54Um AI161490 TF, MADS-box P. tremula x P. tremuloides CATGGGTTATGTTTAGATTGTAAGCAATGATTGGTTACATAATCATGTGTGTTGCATAAGTAACAAACTT A003P34U AI161604 TF, Homeodomain(Knox class) P. tremula x P. tremuloides CATTAATCAAAGGAAGCGACATTGGAAACCATCTGAAGATATGCAATTCATGGTGATGGATGGCCTTCAT A009P50U AI161918 TF, NAC(no apical meristem) P. tremula x P. tremuloides AGAGCCCCAATGGGCAGAAAACTGACTGGATCATGCATGAATACCGGCTTGAATCCGATGAGATGGTCCT A010P29U AI161962 TF, MADS-box P. tremula x P. tremuloides ACCAAGACGCCACAAATATTACAAGACCGGCAAATTATCGAGGTGGAGACAGAATTGTTCATAGGTCCAC A031P05U AI163047 Transcriptional regulation, SNF2 subfamily P. tremula x P. tremuloides GTCATATATGATCCTGATCCGAATCCTAAGAATGAGGAACAGGCAGTTGCTAGAGCCCACCGGATTGGCA A039P68U AI163321 TF, flowering protein CONSTANS P. tremula x P. tremuloides AGAAAATTCGATATACTGTCCGTAAAGAGGTAGCACTTAGAATGCAACGGAATAAAGGGCAGTTCACCTC A047P36U AI163743 Polycomb transcriptional repressor P. tremula x P. tremuloides TTATTTATGATCAAACTGTGGAATCACGGACTACTTGATGCACGGACGATGAACATGTGTAATATGATTC A069P31U AI164824 TF, MADS-box P. tremula x P. tremuloides AGGAGTTGAACAATACCATTGTGTGATGAGTTAAAATATCTGTTGGAAGATTACTGTACCATATTGTAAT AF240445 AF240445 Ubiquitin P. tremula x P. tremuloides AGGCTTAGGGGAGGAACTATGATCAAGGTGAAGACTCTCACTGGAAAAGAAATTGAAATTGACATTGAAC B014P39U(UB21CPA06) BU821251 curlyleaf-like 1 homeotic protein; polycomb; set domain P. tremula TGCTGGACGCGAATGTGACCCAGATGTTTGTAGGAATTGCTGGGTTAGTTGCGGAGATGGCTCATTAGGA B016P21Um TF, MADS-box TF, MADS-box P. tremula CCAGTAGAATCTTGCGAGGAACAGATCTGCTATCGCTGGAAGATCGACTTATCCCTTGAGATACTGGTAT B020P33U Post-transc. regulation/ RNA-binding, C3H zincfinger Post-transc. regulation/ RNA-binding, C3H zincfinger P. tremula TATCAAACTATTGACTCCAGATTGTCTATGCTGGGAGTGGGACCGACCTTCTACCCTCAACGACCTGGAC C025P36U BI068662 Putative signal transduction, DENN domain, WD-40 repeat P. tremula x P. tremuloides CAGATCACAAGCTTTAGGGACACAAATGGATTCGGCAATAGAATGGTGGAGACCACTGATCTGGAGTGAT C025P42U BI068668 Transcriptional regulation, putative helicase P. tremula x P. tremuloides AAACCAGCTATTCCTGACCTGCCACCCACTGTATCAGCAATAGCTCAATCAGTTCGTGTGCTATATGGAG C036P25U BI069375 TF, salt-tolerance protein, CO P. tremula x P. tremuloides AAGACAGAGCCCTCTTTTGCCAGGATTGTGATGAACCAATCCATTCAGCTGGTAACCTTTCTGCGAATCA C054P45U BI071264 transcriptional. regulation, peroxiredoxin P. tremula x P. tremuloides AATCAGCTTCATTACATTGTGAGCGCGAGGAACAGATAGGAAGATGGGGTCCACAGACTGTCGGACCTAC C061P33U BI071641 Post-translational regulation, F-box protein P. tremula x P. tremuloides TATTGATGTTTGGGGGTTTGCTAAGAGAGGCCCCTTCGTTCCGCTCCAGCGATGTGTTACCATGGATCTA C067P63U BI072011 TF, GIA/RGA-like gibberellin response modulator P. tremula x P. tremuloides TCAGTGGAGAACTCGGTCGATTCGCTGGTTTGACCGGTTCATCTAGGGCGAATGCCTTAAACAAGCTACA C092P78U BI073104 reduced vernalization response 1 P. tremula x P. tremuloides ATGGGAAACAGTGGCCTGTTCGATGCCTTTACAGGGGAGGCAGAGCTAAATTTAGTCAGGGATGGTATGA F003P03Y BI119647 TF, MYB P. trichocarpa ATGAAGAGGACCTCATTATCAAGCTACATGCCCTCCTTGGAAATAGGTGGTCATTGATAGCAGGAAGATT F011P01Y BI120134 Transcriptional regulation, Bromodomain, WD-40 repeats P. trichocarpa AAGAAGCAGGCCCTTGGAAAATAATGAAGGGAAACATTAGGGCGGTCGAGTTCTGTAAGGTTGAAGCCCT F012P39Y BI120241 Transc. Reg.--Nucleosome/ chromatin assembly factor group B, CAF1 sbunit p150 P. trichocarpa TGATGGAACGCTTTCTAAAAAGAAGCAAAAGTAGTTCCCCATGTCAGAATGACCAGAGTTTAACTAAAGC F027P14Y BI121043 TF, Homeodomain-Leucine Zipper (HD-ZIP) P. trichocarpa ACCACCACCAACCCATCCACCACACACACACACACACATAGCACCACCAACTCTAAACCATTGTCACTAC F028P06Y BI121069 Transcriptional regulator, SET-domain P. trichocarpa TACCATACCTGTCACGTCTATCGTTATGCCTCCTGTTCATCTAAAACCTCCCCAATGTACTCACATATAA F044P04Y BI121667 RNA processing/post-transcriptional regulation, DEAD/DEAH box RNA helicase P. trichocarpa TGGTTTTGGGCAAACCGGCTCCTCTTGTTAGTACTTTCAGACTGAGCTACTATTCGATATTAAATCTAAT F052P94Y BI135499 Transcriptional regulation, zinc-finger helicase P. trichocarpa TCCTGAGGGAATGATGACACGGGCTTTGCATGGAAGCAGACCGTCAAAGTTTCAATCGCATCTGACAGAC F066P33Y BI136301 TF, MADS-box P. trichocarpa CGAGCAAGCACAATGGGAGCAGCAAAACCTTGGCCAGAACTCATCCTCTTTTATGCTACCACAAGCACAA F068P73Y BI136466 CYCLIN D P. trichocarpa GCTCGTCAAGAAGCTGTGGAGTGGATGCTTAAGGTCATTGCTCATCATGGATTTTCAGCTCTCACATCAA F070P70Y BI136615 TF, MYB P. trichocarpa GGTTCTCTCCCTCGGGTCTGTTTGGATGAACCCCGGTTCAAACTTTGTCATCTTCAACTAGCACCACAGT F093P10Y BI137760 transcriptional corepressor, regulates AGAMOUS P. trichocarpa AGCAGCAACAGCTTCAGCAGCACGTGCTTTCAAATCAGCAATCACAGAGTTCAAGTCACAATCTTCACCC F098P59Y BI138094 TF, homeodomain P. trichocarpa AGGAGATCCAAAGGCTGTCAGACCGTATTGATGGGGTTCCACTAATAGTCCTAGTTCATCCTTGTCGATG F099P36Y BI138136 Transcriptional regulation, p300/CBP acetyltransferase-related P. trichocarpa CATTCTAGGAATGCGCAGAATCCCCCTCATCCAGACCGACAGCATGACATGAGCTCATCATTGACCACAA F099P51Y BI138143 Signal transduction, ovule/fiber cell elongation P. trichocarpa GATTCATTGGGACACTCGCCCTGCTAATCTGTCCCATATTTACAAGTGGCTCAACTCTTGCAACCATTCT F100P70Y BI138195 RNA processing, post-transcriptional regulation, DEAD/DEAH box helicase carpel factory (CAF P. trichocarpa GAGATGCCTTCGTTATCATGGTTCGAGCGATAAATAGATCCATCGACATCGATAAGACCTCTGCATCCAG F101P21Y BI138212 SWI/SNF-related matrix-associated actin-dependent regulator P. trichocarpa AAATTGCAGAACATGTTGGAACCAAGTCAAAAGCACAATGCATCCTTCACTTTCTGCGTCTGCCTGTAGA F101P86Y BI138245 TF, C2H2 zinc-finger P. trichocarpa CTCTAAGCGTCCACGTCTCGACCACCAGCCTACTGAAGAAGAATATCTCGCTCTCTGTCTTGTCATGCTA F103P52Y BI138326 TF, MYB P. trichocarpa CACATGGTACAATGGGAGAGTGCTAGAGTTGAGGCTGAGGCCCGTTTGTCAAAGCAGTCATTGCTTATTA F103P87Y BI138344 Transcriptional regulation, histone deacetylase, plant specific HD2 type P. trichocarpa GGTTTTTCAGGCTTGGCAGCTTTATCCTTAGCGGTAGCAGCTTTCATAATATCAGCTTTCGCAAATTCAA F106P90Y BI138460 TF, auxin response factor P. trichocarpa TGTTGCGGGTGATGCGTTTATATTTCTAAGAGGTGAGAATGGAGAACTTCGAGTTGGTGTTAGACGTGCA F110P74Y BI138617 AMP-binding protein P. trichocarpa AGAACTTGAGTAGTGTAGAGGTTGAGTCAGTGCTATACACGTTTCCAGATATTAACGAGGCGGCGGTTGT F115P67Y BI138781 TF, homeodomain P. trichocarpa TCCACTTGTTCCTACTCGTGAAAGTTACTATGTCAGGTACTGTAAACAACATGCTGATGGGACTTGGGCT F117P40Y BI138846 Signal transduction, HPt phosphotransmitter P. trichocarpa GTGTGGATCGGTTACGAGCCGATATGAGCCGTTTGCTGGCAATACTATTCCACCAGGTTTGCTGTGTTAG F117P48Y BI138849 Signal transduction, receptor protein kinase P. trichocarpa CAAGCTCCACCTGCCCCACTCCCATCTGCGAAAGTCCCATGCTATATCGATGAGTATGCGAATATCAAGT F118P50Y BI138891 TF, squamosa-promoter binding protein (SPB) P. trichocarpa ACAAATGCTAATGTCACATTGTGTGACAACCATGCATGGTGTCAGGATTGCTAGCTGTGGCACTAGGCAT F119P13Y BI138915 Signal transduction, cryptochrome P. trichocarpa TTTTCCGTGGAATGACAATCAGGTCCATTTTAAGGCTTGGAGACAGGGGCGGACTGGTTACCCATTAGTT F119P74Y BI138950 nucleosome/ chromatin assembly, histone P. trichocarpa TTGCTAATGGCAAGCTTGTTAAGGTGAAGAACTCCTTCAAGCTCCCTCCTAAATCTCCCGCTGCTGGTGC F121P83Y BI139042 Transcriptional regualtion, chromomethylase P. trichocarpa TGTCGGGAAGCTATAAAGAACTTTGTCATGCATGGCTACCTGTCAAATATTCTACCATTACCCGGTGATG F128P16Y BI139280 SET domain P. trichocarpa TTTCGTGCAGCATACCTGTATCCACTTCTCAGCAACAAGTGGTAGGTACACTGTCTTGGCTAATCCCTCT F129P24Y BI139316 Transcriptional regualtion, chromomethylase P. trichocarpa GGCAAAAGCTATGCAAACGGTATGCGGTCAATGATGTAGGAAGGACACATAAGTCACGGTCAATGGCATC F131P79Y BI139444 TF, C2H2 zinc-finger P. trichocarpa ACAATGTTTGATACTTTCGGAGGACAAGGCATCCGTGTACCTCCTCCTTTCCCCTCAGATATAGCTCCTC G070P72Y BI128093 squamosa-promoter binding(SPB) P. tremula x P. tremuloides ATGGTTGCAACTCTGACCTCAGTACTTGCAGGGACTACCATAGGCGCCATAAGGTCTGTGAGCTTCATTC G074P09Y BI128315 Signal transduction, gigantea P. tremula x P. tremuloides GGAGACAGGTTGTTGACGCACTATGCAATGTTGTATCAGCATCACCAACAATAGCAGCCACAGCAGTTGT G095P48Y BI129777 TF, homeodomain P. tremula x P. tremuloides AGCATGCATGGAGGCCACAAAGGGGTCTGCCTGAAAGCTCTGTTTCAATTCTTCGTGCTTGGCTGTTTGA G105P48Y BI130427 TF, B3 DNA binding, PEST domains P. tremula x P. tremuloides AAGATACTATGCGGTTGACAAAGGGTTCTAACATTTCACAAGTTGCTAATGAATCAGCACGGAATGTAGG G126P33Y BI131829 TF, homeodomain P. tremula x P. tremuloides GTGCTGGGACCTCAGGGATAACAAAAGGTGAGACACCGAGGCTTAAGTTATTAGAGCAAAGTCTAAGACA G134P12Y BI132257 TF, MADS-box P. tremula x P. tremuloides AAAACTGAGCGAGCTTATCACTTTAACGGGGACGGAAGCTGCGTATCTCGTCTTTTCACCTGGGGGTAAG GA20-OX CAC00709 GA biosynthesis, gibberellin 20-oxidase P. tremula x P. tremuloides CGAGGAATACTTTCACAACAGAATGGGGGAAGATTTCGCTGAGTTCGGGACGGTGTATCAGGACTACTGT HB1 CAA09366 TF, Homeodomain P. tremula x P. tremuloides GAACTTACATTCACAAGGCTCATTAGGGCTTAGGACTATCGTTGTGGCATGCATTTTACTGATTGCCTAA HB2 CAA09367 TF, homeodomain P. tremula x P. tremuloides GGATCTTTTGTATGTGTTCATAGAGTATACAAATTTGCGGAGAAAAACCGATGGTATGTTAAGGGTGTGG I012P09P BI122720 TF, homeodomain P. tremula x P. tremuloides TGAGCTGTCCTATTTAGTGAGTATTGTGTTAGTTTATAAATTTTATCATGGCTGTGTCGGGTCTGTGAAT I017P76P BI123112 TF, MADS-box P. tremula x P. tremuloides CAACTGGAGCGACAGTTAAAGACAGGGGTTGAACGTATCCGCTGTAAGAAGAAACGAGCCATCTCAGAGC M101C08 BU867525 post-transcriptional regulation, E3 ubiquitin ligase, Skp1 P. trichocarpa AAGGAAAGGGGTTTAATCTATTTCTATAATTGCCTTTTTCTGTTGGACGTGGATTGATGTTAAGGTTTTA M101C11 BU867528 histone H1C P. trichocarpa CTACTGCTTAGATGTATAAATTTTGTACTAGCAGAACGGCAGTAGTTAGTTTCTGATGCTCTACGTTGGG M102E03 BU867630 putative LRP(lateral root primordia) 1 P. trichocarpa TTGGCTTAAATGCAGAAGGCGTGAACCCCACTTTTGTCCTAAATTCTACTACCATAAATGGCATGCCATT M102E09 BU867635 AP2 domain P. trichocarpa AGCTGCAAGGGCGTATGATGCTGAGGCGCGTAGAATTCGTGGCAAGAAAGCTAAGGTGAACTTTCCTGAT M103H01 BU867748 nucleosome/ chromatin assembly, histone H1 P. trichocarpa TTTAGGGCGTGTAGATGGGGGGTAGTGATTGTAAATTTGAAAGAAAAGGGAGGGTGCTAATGGTAGTTAG M104E08 BU867803 TF, MADS-box, YABBY P. trichocarpa GAATGACTTTAGCGTTACAGTTCGAGGAATGGCTGATCATGAACTTCCTAGGCCACCCGTTATCCACAGA M105A07 BU867841 Post-trasnlational regulation, HSP90; heat shock superfamily P. trichocarpa AACCACCAAGTCGGATGGTAAGTTGACATCACTTGATCAGTACATATCAAGAATGAAATCTGGGCAAAAG M111B12 BU868098 Post-translational regulation, E3 ubiquitin ligase subunit, SKP1 P. trichocarpa GCAAGAAACACGTTGAGACTCCAAAACCCGAGGATCGAGGAACTAACAGCGGCGATGATGAGCTTAAGAA M112C09 BU868189 TF, MADS-box P. trichocarpa AATGATGATCCCGAAAACCATGGTAGCTGGACTTTGGAATATGCGAAGCTCAAAGCTCGGGTAGATGTTC M116D03 BU868480 TF, MYB P. trichocarpa ATCATCGTCTGCTCCCCTTGAACCTGGATATGTCTTGAGGCCTGATTCGTCTTCAATACCTATGAACCCC M116E04 BU868491 TF, NAC P. trichocarpa AAACTTTAATGGGTTATTACAACTATAAGAAGTGTGCATGAGTTTTTAGACTTTGAGCTAGAATTGCGCA M118F07 BU868664 TF, MYB P. trichocarpa AGAATTCTGTACCAACACTAGATATTCTCAAAAAGGCTAGCCCTTTCAGGGAGTTCGTATCTTCACCGAA M120B06 BU868703 Ubiquitin P. trichocarpa AGAGCTAATCTACTAGGGCATGCGGTGGTAGTAGGGTCATAGGATTGTTGGCAGTGAACATATCTATTCG M122B12 BU868794 translation initiation, eIF-5A P. trichocarpa CTGGGTTTTTTTTATTGTTAGTGTTTTCTAGTTCTCTTGATCCATCGCCGCCATGTCTGATGAGGAGCAT M124B09 BU868951 TF, 1B3 DNA binding, PEST domains P. trichocarpa GCTAATGTTTCACAAGTGGCCAATGAATCGGCACGAAATGTATTTGCTCAATACAACGAGCATAAGAATT M125D02 BU869042 TF, homeodomain P. trichocarpa AAGTCTAAGCAACTCGAAAGAGACTACAGCATGCTACGAGCTAATTACAACAGCTTGGCTTCCCGGTTTG M127F08 BU869233 translation elongation factor eEF1Balpha P. trichocarpa ATGGGATGATGAGACAGACATGGTGGCGTTGGAGAAGGCAGTTAGGAGTATTGAGATGCCCGGTCTCTTT M129E09 BU869391 TF, MADS box P. trichocarpa TCCAGGATGGATGCTTTGAGTTTCGTGCTCTTCATTGCTCATAAAGGAACACCTACTATGTAACTTTCTC PCEN-L Signal transduction, PBP Signal transduction, PBP P. trichocarpa GTCATGACAGACCCTGATGTTCCTGGTCCTAGTGATCCATACCTCAGGGAGCACCTACACTGGATAGTAA PHYA AF024544 Signal transduction (red light receptor), Phytochrome P. trichocarpa AGAACTTGAAGCAGTGACTAGTGAGATGGTCCGTTTAATTGAAACAGCTACAGTGCCAATTTTGGCAGTT PHYB1 AF024545 Signal transduction, Phytochrome P. trichocarpa GCATATGGAGTTCAAGTATGCTCTCCTCAACTCTTTTGCATGTCTTCCTCCTGAATTAGTTCAAGACATG PHYB2 AF024546 Signal transduction, Phytochrome P. trichocarpa ACAGATATTGAAGATCATACGAGACATTGATCTTGAGAGCATTGAAAATGGTTCACTGGAGCTTGAGAAG PMFT Signal transduction, PBP, Phosphatidylethanolamine-binding Signal transduction, PBP, Phosphatidylethanolamine-binding P. trichocarpa TCATTTCAACACTCGACTCTATGCTGCTCATTTGGACCTGGGCCTGCCTGTTGCCACCGTCTACTTCAAT PTAG1 AAC06237 TF, MADS-box, AGAMOUS homolog P. trichocarpa GCTACATGTATGCTAAAAAACCTGAAGTAGCGTAAATCATATTTGTCTGGGTGGGAGGGCCTAGTACTCT PTAG2 AAC06238 TF, MADS-box, AGAMOUS homolog P. trichocarpa CAACCATTTGACTCTCGGAACTATTCTCAAGTTAATGGATTGCCGCCTGCCAATCATTACCCTCATGAAG PTAP1-1B AY615964 TF, MADS-box P. trichocarpa TGCTAAAAATGTAGTTTTTGGTTCGTTATGAAATTAGATTATATATATATACGCACCTTGTTATAACTAA PTAP1-2A AY615966 TF, MADS-box P. trichocarpa TGTTGTTGTTAAAAGTTGTATCTGAATCAATTAAGAAGACTCCTACAGCTATATAATTTTCATAATATAT PTB2 AAG45501 TF, teosinte-branched-like P. trichocarpa AATTTCATGACCACCCCGCATGCCCTAGAACCAAACTTAAGTACAATGATCATGACAACATTAAACGCCC PTBF1 AF288616 TF, leucine zipper P. trichocarpa x P. deltoides TCTTCCCACTAAAGACGCTGGGATGTTGGGACAGATAAATGCGAGTGAAATAGCATTTTCTAGTCTTTTT PTCO11 TF, CO(C2C2 zinc-finger, B-box) TF, CO(C2C2 zinc-finger, B-box) P. trichocarpa AAGGTTCGCTAAACGTACAGATACTGAAGTCGAGGTTGATCGAAGCAGTCTTTATGGATTCGGTGTCGTT PTCO12 TF, CO TF, CO P. trichocarpa AATGTTGTTGTATATAATTAAGGTGGAAGCCTTGAATTTGTACACGTTTTTATGCAGTAATGAGTTTTGT PTCRY1D Signal transduction (blue light receptor), cryptochrome Signal transduction (blue light receptor), cryptochrome P. trichocarpa GAGGGTTACAAATTTGACCTAAATGGAGAATATGTACGCCGGTGGCTTCCTGAACTTGCTAGGCTACCAA PTD AAC13695 TF, MADS-box P. trichocarpa GAGGTCGACCTTCCAGCTCTTCAGACATCTTATCTAAATGCGTGTGCTAACTAGAGATGCTATCTAATAT PTID1L1A TF, C2H2 zinc-finger TF, C2H2 zinc-finger P. trichocarpa TATGTTTGTCCAGAACCTTCATGTGTCCATCACAATCCGGCTCGAGCATTAGGCGATCTTACAGGGATTA PTID1L3 TF, C2H2 zinc-finger TF, C2H2 zinc-finger P. trichocarpa ATCTATGCCGTTCAATCAGATTGGAAGGCTCACTCTAAAACCTGTGGAACAAGAGAGTATAGATGTGACT PTID1L4 TF, C2H2 zinc-finger TF, C2H2 zinc-finger P. trichocarpa GTTCAATCAGACTGGAAAGCTCACTCTAAAATTTGTGGCACAAGAGAGTATAGATGTGATTGCGGTACTC PTIR1 AF139835 Post-translational regulation, F-box P. tremula x P. tremuloides CCTTGAAAACGTCTTAGAAAACGTTCTCTGGTTCCTAACCTCACGTAAGGACCGAAACGCTGCGTCATTG PTLF O04064 TF, unique P. trichocarpa AGGCAATTGCTCTCTGGTGATAATAATACAAATACTCTTGATGCTCTCTCCCAAGAAGGTTTCTCTGAGG PTPGM Starch metabolism, phospho-glucomutase Starch metabolism, phospho-glucomutase P. tremula x P. tremuloides TTTTCTTTTTGTCGCCTTACCAAAAAAGGCAGGAGTACGATTTACTAAAACACGCCGATCCAAAGTTTTG PttGA2-OX P. tremula x P. alba P. tremula x P. alba CAGCGAGAGACTAGTAGCTTGGGGGGGGGGGGTAATGGTAGCACTGTAGTTTAGCCGTTAGTGTAAAATT PttIAA1 AF373100 Signal transduction, AUX/IAA gene family P. tremula x P. tremuloides TATTGTCTTGCTTAATAATGACCATCATTATTGTTTCTAGCTGGCTAGGTTGTCGAGAGCGTGTTCTGTC PttIAA2 CAC84706 Signal transduction, AUX/IAA P. tremula x P. tremuloides TTGGGAGTGCAACTGCAATTCTATGCGTCGAGAGTCTGGATCGTTTCCTGTTTCGCACAGGACTTCAGTT PttIAA3 CAC84707 Signal transduction,aux/IAA P. tremula x P. tremuloides TGTTGAGGATGATAGAACAAAAGTTGGTCTTTTGTTGTAAATTAAGGATTTTCAGAGTTCATCAAAAAAA PttIAA4 CAC84708 Signal transduction,AUX/IAA P. tremula x P. tremuloides TTGACACATATATAATATGGCGAGTCGAAAATAAATAGGTAGTGGATAGCTAGGTAAACTCTATATCAAG PttIAA5 CAC84709 Signal transduction,AUX/IAA P. tremula x P. tremuloides AAAGTAATTAACGGTCCTCAGCGGCTTTGTGTTGGATTTTGGTTGCTGTATGGTCTTATTATACTATTTG PttIAA6 CAC84710 Signal transduction, AUX/IAA P. tremula x P. tremuloides GCCTTGCATCCATGTATGAAGACTAGCAGGCAAGGCACTTGCGCAACATGGATGGATCAGTAGTGGTGTT PTTPIN1 AF190881 Signal transduction, polar auxin efflux carrier, PIN1 P. tremula x P. tremuloides AGAAAAACACCTATGATATTGGAATCCATGGAGACCCTATTTAGGCTACTAGGTAGCGCATGGAAAAGTT PTTPIN2 AF515435 Signal tranduction, auxin efflux carrier, PIN1 P. tremula x P. tremuloides GAGCTCCGACGAGCATGGCATCGCAGGAGCTGATGAGCAACAATTAGTTATTACAGTAGTAGTACCTAAC PTTPIN3 AF515434 Signal transduction, auxin efflux carrier, PIN1 P. tremula x P. tremuloides TGCTGGTCGTGTGTCACATCTGGCAGTAGCATTGATTACTGCGCCAAAATATATGTTCTTATAACAAGAG R01B05 CA821929 Transcriptional regluation, DNA (cytosine-5-)-methyltransferase P. trichocarpa x P. deltoides TTCAGACCAGATGATATTTCACCTGAAAAGGCATATTGTTCTGATATTCCGTGAGATATATTATAGTGAA R02H01 CA822027 ELONGATION FACTOR 1-ALPHA P. trichocarpa x P. deltoides GTTTGGAACGGTCCCAATTGGTCATGTGGAACTGGTGCATCAAGCCTGGACCAATTGGAAATTTCAGGCC R07C07 CA822366 TF, chitin-inducible gibberellin-responsive protein; GRAS protein/scarecrow-like P. trichocarpa x P. deltoides AGAGTCATCAGGCGCTAATATTTATCGTACCCTTAAGTGCAGGGAGCCTGAAGGCAAGGACTTGCTGTCT R08A10 CA822419 TF, AP2 P. trichocarpa x P. deltoides AGAGAAGTGAATTTGAAACGGAAATGGGGAAAGACAATGAGTTCTTGGATAATATTGTAGACGAAGAGTT R15C07 CA822886 TF, GRAS/SCARECROW P. trichocarpa x P. deltoides CAATGAGTGGAAACTCGATGGCTCAAGCACAACTTATATTGAACATGTTCCCTCCTGCTCATGGCTATAA R22G04 CA823228 Argonaute (AGO1)-like protein P. trichocarpa x P. deltoides TTTTCAGCAGATGATCTGCAGGAACCTGTGCATTCCCTCTCATATGTATACCAAAGAAGCACGACTGCCA R22G10 CA823232 FPF1 P. trichocarpa x P. deltoides AGAACCGTAATATGTTTGAAGTAAGGGATATGTAGTACGTTGATTACGTACGTCCTCATATATAGTCCTT R24E07 CA823338 TF, GRAS/Scarecrow P. trichocarpa x P. deltoides TGATCTATGATGCTACCTAGCTCGAGAGGAACCTTATGGAGTTTTAGACTTGCAGTTGGTAACTACCATC R26F09 CA823471 TF, AP2 P. trichocarpa x P. deltoides GGAAAAATCTATACAGAGGTATAAGGGCAACGACCTTGGGGTAAATGGGCAGCTGAGATTCGTGACCCAA R29F11 CA823676 FPF1 P. trichocarpa x P. deltoides AGTGAGGGATATGTAGTGCGTTAAGTACTCATATATAGCCCTTTATTCATGTTTGTCTGCATTTTAGTCC R33C03 CA823904 polyubiquitin P. trichocarpa x P. deltoides CTCCGTCTTCGTGGAGGCATGCAGATATTTGTCAAGACCTTAACAGGGAGGACCATTACTCTAGAGGTGG R34B09 CA823967 TF, homeodomain P. trichocarpa x P. deltoides ACAATCTCTACAAAGTTTGACTGGCGTATCTCCTGGGGAAGGCACCGGTGCAACTATGTCTGATGACGAA R34G04 CA824001 Actin P. trichocarpa x P. deltoides TTATGGCCGGGGACAGCGAATGAAGAAAGAAACACAAAGAACAATTACAGTTATTCTGAACGGTGAGAGG R35H10 CA824066 TF, GRAS/Scarecrow P. trichocarpa x P. deltoides TGGAGGTAGGATATACTATATAGTTGTATTAATTACATATACTTTTCATAGGTGGGGATGGTGCATTACC R43C04 CA824489 Transcriptional regualtion, WD-40 repeat P. trichocarpa x P. deltoides GATCATGTGTATGCCTCTTGTTCTGTGGATGGGCATATTGCAATATGGGATGCGCGTCTGGGGAAGTCAC R43G01 CA824511 TF, AP2 P. trichocarpa x P. deltoides AAGGCGATGGGGCGGAAAGTTTACAGGGAGTTTCGAGTGGGGTATGTTATCAAACATCTGGTGTGTGGAC R46E01 CA824677 TF, GRAS P. trichocarpa x P. deltoides ATTCAATATGCTGATTCGAGATATGATGCGTGATCTAGTGAAGCAGGTGGGCAGTGGACTGTCTTCCAAG R48C03 CA824775 Argonaute (AGO1)-like protein P. trichocarpa x P. deltoides AAGGCGATACGAGAGGCATGTGGCACCCTGGAAGAAGGATATAAGTGCCTCGGGTTACCTTTGTGGTATT R49E04 CA824857 Post-transcriptional regulation/ RNA-binding, CCHC zinc fingern P. trichocarpa x P. deltoides TGATCAATTTGTACATAAGGTGGAGCAAGTTGCCGCTACGAAACGTGCGAAGATGTGTGTACGAGAATTG R50C10 CA824915 Transcriptional regulation. nucleosome/ chromatin assembly factor group C, WD-40 repeat P. trichocarpa x P. deltoides TTTCTCTATGACTTCGTTATCTCTCACCCTCTCGAGTGGCCATCTCTAACCGTCCAATGGGTACCTCTCG R51DF126P TF, Bzip TF, Bzip P. trichocarpa x P. deltoides ATCCTGATGATATTGTCTCCAAGAAACGGAGAAGGCAGTTGAGAAATAAGGACGCGGCAGTGAGATCAAG R54A05 CA825137 TF, C2H2 zinc finger P. trichocarpa x P. deltoides ACCAAACTACTTCCTGTACAACTACTTCTGCCACGACAACACCCGTGTCTAACGGAAGTGGCAGGGTTCA R56B07 CA825263 Translation initiation, eIF3p40 P. trichocarpa x P. deltoides TTGTTTTCAAACAGTGGAATTGATCGAGGCATTTATGAATTACCAGGAAAATATTAGTACGGTGTGTTTG R59E12 CA825474 cyclophilin P. trichocarpa x P. deltoides GAATTTCATAAAGAAACATACTGGGCCTGGAATTCTATCTATGGCAAATTCTGGGCCTGGCACTAATGGG R59F10 CA825478 TF, CONSTANS P. trichocarpa x P. deltoides ACAACCTATATAACTCTCCGAGCTCTGTCCCTTTCATGGCCGATACCCAATACGGTGTCGTTCCCTCGTT R63D08 CA825614 Translation initiation, eIF-4b P. trichocarpa x P. deltoides TTCTCCGTTATAATTTTGCGAGTTTTGCCAGGACTCTTGCTATAGTAGAACAGTGTTGAGACAATTTTGG R65B04 CA825708 TF, NAC-domain P. trichocarpa x P. deltoides TACAACATCCGGGTACTGGAAGGCCACAGGCTCTCCTGGTTATGTTTACTCGTCGGACAATCGTGTGATT R67C05 CA825769 TF, AP2 P. trichocarpa x P. deltoides ATTGACGGAGAGTACTGGGTCAGGTGGATCTTCGCCTTTGTCGGATCTGACGTTTCCGGATTTTGAGGAA R69F12 CA825916 TF, AP2 P. trichocarpa x P. deltoides AATAGATACGGGACATGCCATTTTTTGATATGGCAATGAGTACGAGTACGAGTAATCACCCCACAGAGTA R70F11 CA825986 Post-transcriptional regulation/ RNA-binding, zinc finger P. trichocarpa x P. deltoides TGCATCAAGCTTTGTAGTGTTCTAGGATCAACAAAGCCTAAGACTTTTAAACTTTCTCATCAAAACATGG R71A05 CA826000 FPF1 P. trichocarpa x P. deltoides TAATTTCCCAGTGTGTAGGAATAATAATTGGAACAAGGTTCAGGTCGTTCCTTCGTTGGGTATTTACTAA R73H10 CA826180 transcriptional regulation, histone acetyltransferas P. trichocarpa x P. deltoides TGTTGGGTGTTTACTGCGGTGTATGCGTTTTTATCATTACCCCGACAGCACACGTATTGCGGCTCAGTCA R74C06 CA826202 TF, homeodomain P. trichocarpa x P. deltoides AAGAAGAGAGGCATTGTCTCTTTGTACTCTATTTTCTGACTTGGAACCAAAGCTCCTTATCACTCTAGAA R74E10 CA826218 TF, AP2 P. trichocarpa x P. deltoides GATGTGGGATGGGCCGTTTTTGGATATGTTAATGAATACGGGTACGTGTAATCACTCCACGGAATATGGG R-AP2 TF, AP2 TF, AP2 P. trichocarpa x P. deltoides TGTAAATAATAAGGCTACATGTTAGGGAGTTTCATCTCTACCAAGATATTGGTTTACCTCTATTATGAAG UA13CPF12 BU817168 Starch metabolism, pyruvate phosphate dikinase, PEP/pyruvate binding domain P. tremula AACTTAACAGAAGGCAGTCCTTCACCTATAAAGTTGGTCAGAAAGGAGTTCAGTGGCAGATATGCCATAT UA35CPA11 BU818819 TF, GRAS P. tremula ATTCTTCAGATAACTTCTATGAGACCTGCCCTTATATGAAATTCGCTCACTTCACGGCCAATCAAGCAAT UA35CPG02 BU818875 Polycomb P. tremula TGCGCGTCATCAATGCCAGCAATGAAGAGATACACAAGAGTGGTGTAGGGCATGGGGACTCGATAAATGA UA52DPC04 BU820187 TF, C2H2 zinc finger P. tremula CGTGTCTATGTGTGTCCAGAGTCATCTTGTGTACATCACAACCCAGTTAGAGCTCTCGGGGACCTTACTG UA52DPD03 BU820198 TF, C2H2 zinc finger P. tremula GAAGGTCTATATATGCCCAGAAAAGACCTGCGTGCACCACGATTCATCTAGAGCTCTCGGAGACTTAACT UB10CPB06 BU820432 Signal transduction, LRR protein kinase P. tremula GATTCAGACCTTGGGCAGGATTAGGCACCGACACATTGTTAGATTGTTGGGGTTCTGCTCTAACCACGAG UB10CPB07 BU820433 Transcriptional co-regulator, LIM-domain binding P. tremula ATGGTATGGCTAATGTAAATAACTCACTCAACACTGCATCTGCAACTACCTATGCCAGCGCCCGAGAAAC UB10CPF10 BU820482 Transcriptional regualtion, ploycomb group; VEF P. tremula TTCATCGTTGTCTCATATCATTCGGTTGAGGGCAGGAAATGTGATTTTCAACTATAGGTATTACAATAAT UB11CPH11 BU820600 TF, AP2 P. tremula GCTGCCCTCAAGTACTGGGGACCTTCAACCCATATAAATTTTCCGTTAGAAAATTACCCGGAAGAACTTG UB18CPG04 BU821074 TF, MADS-box P. tremula CAAACTCAAGCATGGGGCAAGTGATTGAAAGGCGCAATCTGCATCCAAAGAACATCGACACGCTCGATCA UB27CP09(F131P66Y) BI139437 unknown P. trichocarpa AATGAAACACCGTGGGGAAACAAACTTTTACTTGTGTGAAATCAATCGCAATATGGTAATTGATGCTACT UB28CPA08 BU821799 Signal transduction, pseudo-response regulator P. tremula ATCTTGTTTTAACTGAGGTGGCCATGCCTTGTTTATCAGGCATTGGCCTTTTAAGCAAGATTATGAGCCA UB28CPB01 BU821803 TF, auxin response factor(ARF) P. tremula AAGTTAGTTGCGGGTGATGCATTCATCTTCCTAAGGGGAGAAAATGGCGAGCTTCGTGTGGGAGTAAGGA UB36DPH07 BU822399 Transcriptional reglation, p300/CBP acetyltransferase P. tremula CTTCATATCTACCGGTGAATGTAAGGCAAAGGTCACAGCGGCTAGGCTACCCTACTTTGATGGGGATTAT UB53CPD11 BU823521 Signal transduction, Ser/Thr protein kinase P. tremula TTTGCGTGAGAGAGACCGCTATGAACTGGAAAAGGGACGGCTTATACGTGAATTGAAAAGGATACGAGAT UB53CPE02 BU823524 Transcriptional regulation, nucleosome/ chromatin assembly factor group C, WD-40 repeat P. tremula TAAGATAGTGGCCACTCATACTGACAGCCCTGATGTTCTTATATGGGATGTCGAAGCACAGCCTAACCGC UB60BPD08 BU824105 TF, squamosa-promoter binding (SPB) P. tremula AGCAGATAATTGCACCTCTGATCTGACTGATGCCAAGCGATACCATAGACGCCATAAGGTTTGTGAGTTC UM37TC05 BU879772 Transcriptional regulation, N-acetyatranferase, silencing group B P. trichocarpa GAATCTACCGAATGTCACGGTCATATAACTTCTCTAGCTGTCCTCCGTACTCATCGTAAGCTTGGCCTCG UM42TF11 BU880195 TF, AP2 P. trichocarpa ACGTGCCTACGATACTGCCGCACGTGAGTTCCGTGGCTCTAAGGCAAAGACTAACTTTCCATATCCATCA UM43TE12 BU880261 TF, CONSTANS P. trichocarpa GCTGCCTTGTTATGCAAAGCAGATGCGGCATCTCTGTGTACTGCCTGTGATGCAGATATTCACTCTGCAA UM44TA01 BU880290 TF, homeodomain leucine zipper P. trichocarpa GATGGGGTACATTCCTCACTCTTAGAGGCAGGTGATACTTCACATGTCTTCGAGCCCGACCAATCGGATT UM53TE12 BU880682 Translation initiation, eIF-6a P. trichocarpa AAATGGGCTTCTTGTGCCTCACACCACCACTGATCAAGAGCTCCAGCATTTGAGAAACAGCCTACCTGAT UM54TA09 BU880731 Translation inititation, elongation factor 2 (EF-2). P. trichocarpa TCATCAAGTCTGACCCTGTTGTGTCGTTCCGTGAGACTGTCATAGATAAGTCCTGCCGTGTGGTGATGAG UM54TC12 BU880748 TF, NAC P. trichocarpa AAGCACTTGTATTTTACGCCGGCAAAGCTCCCAGAGGAATCAAAACCAATTGGATCATGCACGAATACCG UM54TF05 BU880770 ATP synthase beta chain, mitochondrial precursor P. trichocarpa GTTCTTAACACTGGATCTCCAATTACTGTGCCCGTGGGCAGGGCCACTCTTGGTCGCATAATCAATGTCA UM57TC10 BU880985 Signal transduction, guanine nucleotide-binding protein beta subunit 2. P. trichocarpa GATTTGGGCTTTTGGAGGGCACAGGCAGGTAATCTGATTCGAGCAGAAGGATCGAATCACATGTCTCATT UM61TC08 BU881322 Transcriptioanl regulation, WD-40 repeat, MSI type nucleosome/chromatin assembly factor C P. trichocarpa TGAATAGGGCACGGTATATACCACAAAACCCATTTATGATAGCTACCAAGACTGTTAGTGCTGAGGTATA UM62TB07 BU881396 Transcriptional regulation, histone deacetylase P. trichocarpa TGCTATTTCTACGATCCAGAAGTCGGCAATTACTATTATGGCCAAGGTCACCCCATGAAGCCCCACCGCA UM65TE06 BU881637 TF, AP2 P. trichocarpa TAAGGGGTCAGTCAGTTTTAGATTCTAAGTACTATTAATTATGCTGTCGATATGTATAGTAAGTCGCTCT UM66TB05 BU881681 Transcriptioanl regulation, PHD-type zinc finger P. trichocarpa CTTTAACGTTTTAAAGTTGATCAGAGCCAGTTGCCTACTTTAGTCAATGAATTCGAATGACTGCAGATTG UM67TB05 BU881759 translation initiation, eIF-1A P. trichocarpa CGGCGGGGCCTGATTGATTTTCTTTTTCTATCTATCTATCTATCAGCTTCCTCGCTCCTCTCTCACCTTA UM67TE05 BU881786 TF, homeobox-leucine zipper P. trichocarpa GCTATTTGGTTTCAAAACAGGAGGGCTAGATGGAAGACTAAGCAGTTAGAGAAAGACTACGAGGTCTTAA UM69TD03 BU881923 TF, MYB P. trichocarpa AAGTGTCAGGAATTGAATCTTGAGCTGAGGATATGCCCTCCCTATCAGCACCCAACTCAGTCAATGGAGA UM69TF07 BU881946 TF, NAC (no apical meristem) P. trichocarpa CTCCACGAGGGAGCAAAACGGATTGGGTCATGAATGAGTACAGATTGCCTAGCAACTGCTATTTATCGAA UM77TC04 BU882460 Post-transcriptioal regulation/ RNA-binding, C3H zinc finger P. trichocarpa TGATGAGCCTGATGTATCTTGGGTTAATACATTGGTTAAAGATGTCCCCCCTGCTGGAAGCACATTTTTT UM82TH11 BU882839 TF, MYB P. trichocarpa AAGCTGATTCTTTCGGCTCAGGAAGTGGCCATTCTAAGGGTGCTGCCCATATAAGCCACATGGCTCAATG UM83TC03 BU882863 TF, AP2 P. trichocarpa CTCTAGATATCTTCTAAATGTACAGACACTCGGATGGCACAATGCATGGACTGTGGACTTCATGACTCCC UM83TF04 BU882889 TF, MADS-box P. trichocarpa AATGCTACAGAATTCTAACAGGCACTTAATGGGCGATGCTGTAAGTAATTTATCTGTGAAAGAGCTTAAG UM89TG10 BU883381 TF, MADS-box P. trichocarpa ATGTATTTTCTAGATATTCTATGTTTATTTCGCTAGCAGAGACGTTTATTATCCTCTTATGTCTAACATT UM91TD06 BU883487 TF, AP2 P. trichocarpa TGGAGAAGTATCCTTCAGTTGAGATTGACTGGTCATCTATCTAAGTTCTAAATTATGATTATGTAATCTT UM93TD04 BU883572 TF, MYB P. trichocarpa CAATTAAATAGTACTGTACAACAGCTACAGTTTATACGATTTTCTTAGACATGAGGCAGGAATTCAGTTC V001E09 BU875027 cyclophilin; peptidyl-prolyl cis-trans isomerase P. trichocarpa TGTTGCTGACTGTGGTCAACTCTCTTAGAGTCCCTTGGTTGACATGTCGTTCGGGTGGTGTCTTTGTTTC V001G07 BU875049 nucleosome/ chromatin assembly, histone 3 P. trichocarpa TAGATATTTCTGGGTAGATGGACAACTCTTATCTCTGTTTGTATCTTCTAGGTATCATAAGTCTATTTCC V002G11 BU875143 S-adenosylmethionine synthetase P. trichocarpa GACAAGAAGAGGAACATCATTTTCTTTCAAGCAGAGCAGCAACTCTACTCTTTCTCCCTAATCTCTCAAG V003A12 BU875165 TF, MADS-box P. trichocarpa AGTATCCACAGGCAAGTTGGGGATTCAAACTTCGAGGAGCTGACCTTAAGCGACCTAGAACAAACGGAGA V006H03 BU875423 Polycomb transcriptional repressor, enhancer of polycomb-like protein P. trichocarpa CAGCTGTAACCGTGAAGCAAGAGTTTAGACGGCGGCATACACCACTTGGATGGCTCAATAAAATGGACCC V006H09 BU875429 nucleosome/ chromatin assembly, histone 2A P. trichocarpa CTCACATTTCGGTTCGACTATTTGTTCGAAAAGGGGCAATCAATGGCTGGCAGAGGCAAAACCCTAGGAT V007C11 BU875452 TF, MADS-box P. trichocarpa CTTACGGCCGGGGACTCCTTTAAACTTCTAACTTTCTCTTTCTTCAAACTTTTGTTGTTCTCTCTATTAT V007E07 BU875471 TF, PHD zinc finger P. trichocarpa GCTCCCCACAATATTCGAGGCCGTGTCTGGAAATGTTAAGCAACCTAAGGACCAAACTGCCACTCACAAC V008A10 BU875511 Transcriptional regulation, nucleosome assembly factor group A, NAP1 P. trichocarpa GCTACAGAGTTTGACTGGACAGCACTCTGAATTGCTCGAGAACCTATCTCCGACTGTTAGGAAGCGTGTT V008B11 BU875524 Post-translational regulation,ubiquitin activating enzyme, ThiF P. trichocarpa ACCAAATACGATCGCCAACTCAGTTGGCGGAAGACTCGATGATAAAGCTCGATAAAATCTGCAGGGAGGC V009A10 BU875600 tubulin beta-6 chain P. trichocarpa CTATGGACAGTATCAGGACCGGTCCATATGGGCAGATTTTCCGGCCTGATAACTTCGTTTTTGGGCAATC V009B10 BU875611 Transcriptional regulation, histone deacetylase HD2 P. trichocarpa ATGATGAATCTGACGATGACGATTCTGATGACGATTCTGATGAGGATGATTCAGGCGATGGATCCGAGGG V009G10 BU875666 TF, MADS-box P. trichocarpa GCAACACAAACAAGCAGACAAGCGAACTGAACATGGAGCAACTGAAGGGCGAAGCAGCTAGCATGATAAA V012F11 BU875887 TF, MYB P. trichocarpa GCTCATGCCAAGTATGGTAATAAATGGGCTACCATAGCAAGACTCCTCGATGGACGTACGGACAACGCAA V012H04 BU875916 Transcriptional regulation, arginine methyltransferase P. trichocarpa AAACCAGCGCCGATTATTACTTCGATTCTTACTCTCACTTTGGTATTCATGAACAAATGCTGAACGATGT V012H06 BU875918 TF, homeodomain P. trichocarpa CAAGACAAGTCGCTGTTTGGTTCCAAAACCGCCGTGCCCGGTGGAAAACCAAGCAATTGGAGAGAGATTA V015E04 BU876053 Transcriptional regulation, arginine methyltransferase P. trichocarpa TGGTGGGCTGACATCGATGCTCAAACTGCTACCTGTTAGCAGGATGTATTTGGAAACCTGCATCTATTTC V016C11 BU876112 TF, MYB P. trichocarpa GCAAAAGTTGCAGGCTTAGATGGACTAACTACCTCAGACCAGGAATTAAGCGAGGCAACTTCACACCCCA V016F11 BU876142 TF, SEP3-related MADS-box P. trichocarpa AGAAAGGTTGATGGAAGGTTACGAAGTAAATTCACTCCAGTTGAATCTAAGTGCAGAAGATGTGGGTTTT V017G04 BU876211 TF, AP2 P. trichocarpa CAGTGATACAACCGTTGGATCTTAATCTCGGTCCTGCTGTATCCACCGTGAGATTGCCGTTTCAGCCGAT V020F08 BU876433 Transcriptional regulation, histone deacetylase P. trichocarpa TGTTGTTGAGAGAATTGACTAGTCTAATGTGCTTGACCTTGTGAAGTCTGGGGCATGACTCGACATGAAT V021F11 BU876515 TF, MADS-box ; AGL20 P. trichocarpa AGCTCTATGAATTTGCAAGCACAAGCATGCAGGAGACCATTGAACGTTATCGAAGGCACGTGAAAGAAAA V025A02 BU876760 Transcriptional regulation, SNF5 P. trichocarpa CAATCCATACAGACACAATTGGCCGAGTTTCGATCCTATGAAGGTCAAGATATGCTGTACACTGGAGACA V025E01 BU876802 Transcriptional regulation, DEMETER; DNA Glycosylase Domain P. trichocarpa AGAGCACCCCGACCTCTCAAGGCCAGATTACACTTTCCAGTAAGTAGGTTGGTAAAGACAAAGAACGAGA V031H01 BU877253 Signal transduction, response regulator P. trichocarpa CATTGAAATTAACATGATCATTACAGATTACTGTATGCCAGGAATGACAGGCTACGATCTCCTAAAAAAG V032H12 BU877347 Alpha-3 tubulin P. trichocarpa TGGTTTGGGGTCTTTGCTGTTAGAACGCTTGTCTGTTGATTACGGAAAGAAGTTAAAGCTTGGATTCACC V037B10 BU877632 Transcriptional regualtionm PHD zinc finger P. trichocarpa AGGAAAAGGCTGTTCAGCCTGATCAATGATCTGCCTACTGTCTTTGAAGTTGTAACAGAAAGGAAGCCTG V037D09 BU877646 TF, C2H2 zinc finger P. trichocarpa GCAAGCCACCCGCCGCCTGGCTTTAGCTTCCTATGTTACCATACCAAATTCCATGTTCTCGACGTTTATT V039F10 BU877824 Transcriptional regualtion, polycomb, ESC P. trichocarpa CTTTAGTTGACTGGTTTAAGGCTAATGTAACTTATTTGGAATTCTAACAGCTATTTGACCCGTGATTAGT V041C10 BU877951 Transcriptional regualtion, helicase, SNF2-N P. trichocarpa AAACTCGAACCAGCATTTACTGCCACACCCCAAGTTGATTCATCGCAGATGCAATATCCACATGCAAATA V047A09 BU878423 Signal transduction, G-proein beta family P. trichocarpa TTGTTAAATTCTCGCCTAATGGAAAGTTCATTCTCGTTGCTACTCTTGATAGCACTCTTAAACTTTGGAA V049F05 BU878632 Transcriptional regulation, PHD zinc finger protein P. trichocarpa GAACTTACAGGATGCGCATTATGCAGAGGTTATGATTTTATGAGATCTGGATTTGGTCCACGCACAATTA V049F07 BU878634 Signal transduction, pseudo-response regulator P. trichocarpa AATTCCTTCGGAAACCACTCTCTGAAGATAAACTGAGGAATATATGGCAGCACGTCGTTCATAAGGCATT V053C07 BU878904 TF, MYB P. trichocarpa TCTACACTTGAACCCGAGTAGCCCATCCGGATCCGATTTGAGCGATTCAAGTATACCCGGCGTGAATTCA V056C05 BU879121 SET domain P. trichocarpa TTGCAAGTGTTCCACTTATTACCATGCAATGCGTGCTTCGAGGGCAGGATATCGCATGGAGTTGCACTGC V057G12 BU879250 TF, NAC P. trichocarpa TGATTGGTATTTCTTTAGTCACAAAGACAAGAAATATCCCACAGGGACTAGAACAAATCGAGCTACGGCT V059D02 BU879372 TF, MADS-box P. trichocarpa CTAGTTCTGGAAGGATGCATGAGTACTGCAGCCCTTCCACTACGGTGGTCGATCTGTTGGACAAGTATCA V060H07 BU879493 Transcriptional regulation, methyl-CpG binding P. trichocarpa AGATCGGATTAGGACCTCTGGCGTGACAGCTGGCACCGTAGATAAGTATTACATTGATCCTGCCTCAGGT V061D11 BU879538 nucleosome/ chromatin assembly, histone 4 P. trichocarpa TGAGGGATCGGGTTTAGATTTGCTAATCAGGTTTATTTTAGGTACTGTTTTGTAAGTATAAGTGCGTGAT V063A06 BU879671 SET domain P. trichocarpa AAAATTTGGCTGTTACACTTGACAAGGTGAGATATGTAATGAGGTGTATATTTGGTGACCCAAAGCTGGC V063C08 BU879695 Actin P. trichocarpa CCTCTGTCTCCGACTTCAAAAGAATTTGTAGAAAATGGCCGATGCCGAGGATATTCAACCCCTTGTCTGT V063H06 BU879746 Transcriptional regulation, methyl-binding P. trichocarpa GATGCTAGACCTCTTGAACCTGGTGAAGCAAATCCCCTTAAATGGGTTGGTCCAGGTGATTGTACAGAAC SR001 X56062 Photosystem I chlorophyll a/b-binding protein A. Thaliana from stratagen SR002 X14212 RUBISCO activase A. Thaliana from stratagen SR003 U91966 RUBISCO large subunit A. Thaliana from stratagen SR004 AF159801 lipid transfer protein 4 A. Thaliana from stratagen SR005 AF159803 lipid transfer protein 6 A. Thaliana from stratagen SR006 AF191028 papain-type cysteine endopenptidase A. Thaliana from stratagen SR007 AF168390 root cap 1 A. Thaliana from stratagen SR008 AF198054 NAC1 A. Thaliana from stratagen SR009 AF247559 triosphosphate isomerase A. Thaliana from stratagen SR010 X58149 PRKase gene for ribulose-5-phosphate kinase A. Thaliana from stratagen B.Actin X63432 ?-actin Human from stratagen GapDh NM_002046 Glyseraldehyde-3-phosphate dehydrogenase Human from stratagen !platform_table_end ^SAMPLE = GSM77557 !Sample_title = fb6a1 !Sample_geo_accession = GSM77557 !Sample_status = Public on Oct 11 2006 !Sample_submission_date = Oct 11 2005 !Sample_last_update_date = Oct 17 2005 !Sample_type = RNA !Sample_channel_count = 1 !Sample_source_name_ch1 = floral bud from six year old poplar !Sample_organism_ch1 = Populus trichocarpa !Sample_organism_ch1 = Populus deltoides !Sample_characteristics_ch1 = floral bud at age six !Sample_molecule_ch1 = total RNA !Sample_label_ch1 = cy5 !Sample_description = wild type poplar floral bud at age six !Sample_data_processing = GeneXp !Sample_platform_id = GPL2092 !Sample_contact_name = Jingyi,,Li !Sample_contact_email = JINGYI.LI@OREGONSTATE.EDU !Sample_contact_phone = 541-737-8496 !Sample_contact_institute = Oregon State University !Sample_contact_address = !Sample_contact_city = Corvallis !Sample_contact_zip/postal_code = 97331 !Sample_contact_country = USA !Sample_series_id = GSE3457 !Sample_data_row_count = 217 #ID_REF = #VALUE = normalized signal intensities !sample_table_begin ID_REF VALUE A001P54Um 5.31 A003P34U 8.62 A009P50U 9.09 A010P29U 4.84 A031P05U 4.45 A039P68U 5.36 A047P36U 9.35 A069P31U 5.81 AF240445 10.54 B014P39U(UB21CPA06) 8.9 B016P21Um 7.96 B020P33U 8.31 C025P36U 7.48 C025P42U 8.74 C036P25U 10.31 C061P33U 8.04 C067P63U 5.54 C092P78U 8.9 F003P03Y 7.97 F011P01Y 6.7 F012P39Y 5.66 F027P14Y 10.57 F028P06Y 6.23 F044P04Y 7.83 F052P94Y 7.67 F066P33Y 9.01 F068P73Y 10.3 F070P70Y 8.62 F093P10Y 8.45 F098P59Y 9.44 F099P36Y 6.74 F100P70Y 4.77 F101P21Y 9.51 F101P86Y 9.86 F103P52Y 4.04 F103P87Y 5.41 F106P90Y 9.47 F110P74Y 9.95 F115P67Y 7.55 F117P40Y 5.1 F117P48Y 10.87 F118P50Y F119P13Y 8.66 F119P74Y 10.45 F121P83Y 7.11 F128P16Y 5.21 F129P24Y 5.36 F131P79Y 10.04 G070P72Y 6.6 G074P09Y 8.87 G095P48Y 8.83 G105P48Y 8.06 G126P33Y 7.37 G134P12Y 5.1 GA20-OX 4.84 HB1 7.83 HB2 9.34 I012P09P 9.5 I017P76P 8.27 M101C08 12.67 M101C11 13.32 M102E03 7.18 M102E09 11.57 M103H01 11.84 M104E08 9.41 M111B12 11.14 M112C09 10.76 M116D03 11.9 M116E04 8.34 M118F07 7.38 M120B06 11.76 M122B12 11.32 M124B09 7.47 M125D02 9.84 M127F08 12.91 M129E09 12.37 PCEN-L 6.74 PHYA 6.26 PHYB1 8.12 PHYB2 6.99 PMFT 7.75 PTAG1 10.85 PTAG2 10.97 PTAP1-1B 11.38 PTAP1-2A 13.85 PTB2 6.7 PTBF1 5.36 PTCO11 13.09 PTCO12 12.72 PTCRY1D 8.35 PTD 8 PTID1L1A 9.2 PTID1L3 7.07 PTID1L4 9.21 PTIR1 4.45 PTLF 10.62 PTPGM 11.01 PttGA2-OX 7.71 PttIAA1 9.14 PttIAA2 11.8 PttIAA4 6.86 PttIAA5 9.98 PttIAA6 6.54 PTTPIN1 7.69 PTTPIN2 8.72 PTTPIN3 5.54 R01B05 5.94 R02H01 7.87 R07C07 8.25 R15C07 7.58 R22G04 10.36 R22G10 4.36 R24E07 9.95 R26F09 10.48 R29F11 5.58 R33C03 12.59 R34B09 9.04 R35H10 8.98 R43C04 9.36 R43G01 7.72 R46E01 4.91 R48C03 6.04 R49E04 6.12 R50C10 7.96 R51DF126P 10.29 R54A05 8.69 R56B07 9.62 R59E12 12 R59F10 10.93 R63D08 11.03 R65B04 7.02 R67C05 9.34 R70F11 10.31 R71A05 5.58 R73H10 8.64 R74E10 4.91 R-AP2 12.43 UA13CPF12 9.34 UA35CPA11 8.42 UA35CPG02 5.54 UA52DPC04 8.55 UA52DPD03 8.02 UB10CPB06 9.02 UB10CPB07 9.04 UB10CPF10 7.99 UB11CPH11 8.74 UB18CPG04 8.51 UB27CP09(F131P66Y) 8.84 UB28CPA08 7.31 UB28CPB01 8.42 UB36DPH07 5.45 UB53CPE02 8.99 UB60BPD08 7.94 UM37TC05 9.6 UM42TF11 8.54 UM43TE12 9.33 UM44TA01 8.96 UM53TE12 9.54 UM54TA09 12.36 UM54TC12 10.19 UM54TF05 8.19 UM57TC10 8.17 UM61TC08 9.51 UM62TB07 9.12 UM65TE06 11.15 UM66TB05 10.42 UM67TB05 10.24 UM67TE05 7.85 UM69TD03 7.68 UM69TF07 UM77TC04 9.76 UM82TH11 5.91 UM83TC03 8.23 UM83TF04 4.04 UM89TG10 10.73 UM91TD06 8.54 UM93TD04 8.88 V001E09 13.39 V001G07 10.17 V002G11 11.51 V003A12 6.36 V006H03 7.64 V006H09 9.09 V007E07 11.16 V008A10 7.58 V009A10 7.74 V009B10 12.04 V009G10 7.79 V012F11 6.43 V012H04 9.41 V012H06 8.46 V015E04 8.06 V016C11 6.94 V016F11 9.46 V017G04 8.58 V020F08 4.7 V021F11 7.73 V025A02 9.54 V025E01 8.81 V031H01 5.88 V032H12 11.6 V037B10 10.1 V037D09 7.72 V039F10 9.18 V041C10 8.43 V047A09 8.85 V049F05 9.08 V049F07 9.71 V053C07 9.33 V056C05 6.58 V057G12 3.04 V059D02 11.35 V060H07 6.72 V061D11 12.81 V063A06 8.83 V063C08 10.53 V063H06 7.73 !sample_table_end ^SAMPLE = GSM77558 !Sample_title = fb6a2 !Sample_geo_accession = GSM77558 !Sample_status = Public on Oct 11 2006 !Sample_submission_date = Oct 11 2005 !Sample_last_update_date = Oct 17 2005 !Sample_type = RNA !Sample_channel_count = 1 !Sample_source_name_ch1 = floral bud from six year old poplar !Sample_organism_ch1 = Populus trichocarpa !Sample_organism_ch1 = Populus deltoides !Sample_characteristics_ch1 = floral bud at age six !Sample_molecule_ch1 = total RNA !Sample_label_ch1 = cy5 !Sample_description = wild type poplar floral bud at age six !Sample_data_processing = GeneXp !Sample_platform_id = GPL2092 !Sample_contact_name = Jingyi,,Li !Sample_contact_email = JINGYI.LI@OREGONSTATE.EDU !Sample_contact_phone = 541-737-8496 !Sample_contact_institute = Oregon State University !Sample_contact_address = !Sample_contact_city = Corvallis !Sample_contact_zip/postal_code = 97331 !Sample_contact_country = USA !Sample_series_id = GSE3457 !Sample_data_row_count = 217 #ID_REF = latform] #VALUE = normalized signal intensities !sample_table_begin ID_REF VALUE A001P54Um 4.89 A003P34U 8.01 A009P50U 9.16 A010P29U 5.02 A031P05U 5.7 A039P68U 5.99 A047P36U 9.18 A069P31U 5.49 AF240445 10.88 B014P39U(UB21CPA06) 8.56 B016P21Um 7.96 B020P33U 8.4 C025P36U 7.8 C025P42U 9.14 C036P25U 10.27 C061P33U 7.78 C067P63U 6.54 C092P78U 10.17 F003P03Y 8.44 F011P01Y 6.43 F012P39Y 5.62 F027P14Y 10.32 F028P06Y 6.52 F044P04Y 7.73 F052P94Y 7.51 F066P33Y 9.24 F068P73Y 9.14 F070P70Y 8.5 F093P10Y 8.53 F098P59Y 8.96 F099P36Y 6.68 F100P70Y 4.66 F101P21Y 8.64 F101P86Y 10.14 F103P52Y 4.49 F103P87Y 5.14 F106P90Y 9.48 F110P74Y 9.64 F115P67Y 8.18 F117P40Y 4.3 F117P48Y 10.64 F118P50Y 7.44 F119P13Y 8.63 F119P74Y 9.94 F121P83Y 7.61 F128P16Y 5.92 F129P24Y 6.54 F131P79Y 9.96 G070P72Y 6.56 G074P09Y 9.08 G095P48Y 8.84 G105P48Y 8.33 G126P33Y 7.17 G134P12Y 5.62 GA20-OX 5.25 HB1 7.92 HB2 9.27 I012P09P 9.47 I017P76P 8.29 M101C08 12.44 M101C11 13.51 M102E03 7.4 M102E09 11.38 M103H01 11.97 M104E08 9.51 M111B12 10.88 M112C09 11.16 M116D03 11.95 M116E04 8.83 M118F07 7.18 M120B06 11.85 M122B12 11.08 M124B09 6.66 M125D02 9.71 M127F08 12.09 M129E09 12.45 PCEN-L 6.33 PHYA 5.85 PHYB1 8.48 PHYB2 7.53 PMFT 7.91 PTAG1 11.19 PTAG2 10.9 PTAP1-1B 11.37 PTAP1-2A 14.15 PTB2 7.02 PTBF1 5.58 PTCO11 13.22 PTCO12 12.42 PTCRY1D 8.3 PTD 10.43 PTID1L1A 9.38 PTID1L3 6.99 PTID1L4 9.06 PTIR1 3.49 PTLF 10.63 PTPGM 11 PttGA2-OX 7.82 PttIAA1 9.2 PttIAA2 11.91 PttIAA4 6.45 PttIAA5 9.91 PttIAA6 7.39 PTTPIN1 7.9 PTTPIN2 8.61 PTTPIN3 5.25 R01B05 6.14 R02H01 7.68 R07C07 7.99 R15C07 7.89 R22G04 10.53 R22G10 5.4 R24E07 9.86 R26F09 10.67 R29F11 6.02 R33C03 12.75 R34B09 9.15 R35H10 8.78 R43C04 9.23 R43G01 7.68 R46E01 3.08 R48C03 6.33 R49E04 6.66 R50C10 7.95 R51DF126P 10.39 R54A05 8.61 R56B07 9.86 R59E12 12.14 R59F10 10.71 R63D08 11.46 R65B04 7.11 R67C05 9.54 R70F11 10.02 R71A05 4.95 R73H10 8.64 R74E10 5.14 R-AP2 12.44 UA13CPF12 9.45 UA35CPA11 8.51 UA35CPG02 5.08 UA52DPC04 9.02 UA52DPD03 7.83 UB10CPB06 9.26 UB10CPB07 9.03 UB10CPF10 7.86 UB11CPH11 8.93 UB18CPG04 8.57 UB27CP09(F131P66Y) 8.82 UB28CPA08 7.67 UB28CPB01 8.92 UB36DPH07 4.82 UB53CPE02 9.03 UB60BPD08 7.87 UM37TC05 9.35 UM42TF11 8.79 UM43TE12 9.23 UM44TA01 8.87 UM53TE12 9.93 UM54TA09 12.56 UM54TC12 10.51 UM54TF05 8.24 UM57TC10 8.32 UM61TC08 9.61 UM62TB07 9.58 UM65TE06 11.05 UM66TB05 10.17 UM67TB05 UM67TE05 7.85 UM69TD03 7.55 UM69TF07 5.89 UM77TC04 10 UM82TH11 5.82 UM83TC03 8.35 UM83TF04 4.74 UM89TG10 10.73 UM91TD06 8.99 UM93TD04 9.03 V001E09 13.43 V001G07 10.04 V002G11 11.22 V003A12 5.7 V006H03 7.34 V006H09 9.08 V007E07 11 V008A10 7.57 V009A10 8.22 V009B10 12.01 V009G10 7.99 V012F11 5.74 V012H04 9.23 V012H06 8.17 V015E04 7.97 V016C11 5.62 V016F11 9.51 V017G04 8.33 V020F08 5.49 V021F11 8.06 V025A02 9.77 V025E01 8.65 V031H01 6.49 V032H12 11.74 V037B10 10.47 V037D09 7.91 V039F10 9.48 V041C10 8.43 V047A09 9.08 V049F05 9.32 V049F07 9.93 V053C07 9.59 V056C05 6.14 V057G12 3.82 V059D02 11.53 V060H07 7.02 V061D11 11.67 V063A06 8.76 V063C08 10.07 V063H06 7.3 !sample_table_end bio-1.4.3.0001/test/data/soft/GDS100_partial.soft0000755000004100000410000001064312200110570021004 0ustar www-datawww-data^DATABASE = Geo !Database_name = Gene Expression Omnibus (GEO) !Database_institute = NCBI NLM NIH !Database_web_link = http://www.ncbi.nlm.nih.gov/projects/geo !Database_email = geo@ncbi.nlm.nih.gov !Database_ref = Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6 ^DATASET = GDS100 !dataset_title = UV exposure time course (ecoli_8.0) !dataset_description = Time course of UV-responsive genes and their role in cellular recovery. lexA SOS-deficient strains analyzed. !dataset_type = gene expression array-based !dataset_pubmed_id = 11333217 !dataset_platform = GPL18 !dataset_platform_organism = Escherichia coli !dataset_platform_technology_type = spotted DNA/cDNA !dataset_feature_count = 5764 !dataset_sample_organism = Escherichia coli !dataset_sample_type = RNA !dataset_channel_count = 2 !dataset_sample_count = 8 !dataset_value_type = log ratio !dataset_reference_series = GSE9 !dataset_order = none !dataset_update_date = Apr 06 2003 ^SUBSET = GDS100_1 !subset_dataset_id = GDS100 !subset_description = irradiated !subset_sample_id = GSM544,GSM545,GSM546,GSM547,GSM548 !subset_type = protocol ^SUBSET = GDS100_2 !subset_dataset_id = GDS100 !subset_description = not irradiated !subset_sample_id = GSM542,GSM543,GSM549 !subset_type = protocol ^SUBSET = GDS100_3 !subset_dataset_id = GDS100 !subset_description = 5 minute !subset_sample_id = GSM547 !subset_type = time ^SUBSET = GDS100_4 !subset_dataset_id = GDS100 !subset_description = 10 minute !subset_sample_id = GSM544 !subset_type = time ^SUBSET = GDS100_5 !subset_dataset_id = GDS100 !subset_description = 20 minute !subset_sample_id = GSM545,GSM542 !subset_type = time ^SUBSET = GDS100_6 !subset_dataset_id = GDS100 !subset_description = 40 minute !subset_sample_id = GSM546 !subset_type = time ^SUBSET = GDS100_7 !subset_dataset_id = GDS100 !subset_description = 60 minute !subset_sample_id = GSM548,GSM543 !subset_type = time ^SUBSET = GDS100_8 !subset_dataset_id = GDS100 !subset_description = 0 minute !subset_sample_id = GSM549 !subset_type = time ^DATASET = GDS100 #ID_REF = Platform reference identifier #IDENTIFIER = identifier #GSM549 = Value for GSM549: lexA vs. wt, before UV treatment, MG1655; src: 0' wt, before UV treatment, 25 ug total RNA, 2 ug pdN6; src: 0' lexA, before UV 25 ug total RNA, 2 ug pdN6 #GSM542 = Value for GSM542: lexA 20' after NOuv vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 20 min after NOuv, 25 ug total RNA, 2 ug pdN6 #GSM543 = Value for GSM543: lexA 60' after NOuv vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 60 min after NOuv, 25 ug total RNA, 2 ug pdN6 #GSM547 = Value for GSM547: lexA 5' after UV vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 5 min after UV treatment, 25 ug total RNA, 2 ug pdN6 #GSM544 = Value for GSM544: lexA 10' after UV vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 10 min after UV treatment, 25 ug total RNA, 2 ug pdN6 #GSM545 = Value for GSM545: lexA 20' after UV vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 20 min after UV treatment, 25 ug total RNA, 2 ug pdN6 #GSM546 = Value for GSM546: lexA 40' after UV vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 40 min after UV treatment, 25 ug total RNA, 2 ug pdN6 #GSM548 = Value for GSM548: lexA 60' after UV vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6; src: lexA 60 min after UV treatment, 25 ug total RNA, 2 ug pdN6 !dataset_table_begin ID_REF IDENTIFIER GSM549 GSM542 GSM543 GSM547 GSM544 GSM545 GSM546 GSM548 1 EMPTY 0.211 0.240 0.306 0.098 0.101 0.208 0.167 0.190 2 EMPTY 0.045 0.097 0.142 0.107 0.074 0.202 0.019 0.266 3 EMPTY 0.191 0.243 0.312 0.023 0.158 0.261 0.255 0.128 4 EMPTY -0.013 -0.041 0.112 -0.028 0.175 0.111 0.139 0.137 5 EMPTY 0.046 0.101 0.183 0.052 0.115 0.225 0.092 0.051 6 EMPTY 0.097 0.217 0.242 0.067 0.104 0.162 0.104 0.154 7 EMPTY 0.699 1.789 null null null null null null 8 EMPTY 1.026 1.509 -0.585 0.813 0.105 -0.280 1.242 0.336 9 EMPTY 0.816 null -1.322 null 1.327 null 1.242 null 10 EMPTY -0.756 null 2.585 -0.234 2.564 -3.087 0.444 null 11 EMPTY 0.157 -1.036 2.000 1.877 -0.813 3.115 null 0.599 12 EMPTY -1.084 2.608 null 1.614 -1.033 1.138 1.564 1.599 13 EMPTY null null 0.166 null null 1.322 -0.758 null 14 EMPTY -0.406 null 0.628 null -3.802 null 0.757 1.216 15 EMPTY null -1.714 1.794 null null 1.858 2.049 0.932 !dataset_table_end bio-1.4.3.0001/test/data/embl/0000755000004100000410000000000012200110570015472 5ustar www-datawww-databio-1.4.3.0001/test/data/embl/AB090716.embl0000644000004100000410000000473712200110570017317 0ustar www-datawww-dataID AB090716 standard; genomic DNA; VRT; 166 BP. XX AC AB090716; XX SV AB090716.1 XX DT 25-OCT-2002 (Rel. 73, Created) DT 29-NOV-2002 (Rel. 73, Last updated, Version 2) XX DE Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive DE opsin, partial cds, specimen_voucher:specimen No. HT-9361. XX KW . XX OS Haplochromis sp. 'muzu, rukwa' OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; OC Actinopterygii; Neopterygii; Teleostei; Euteleostei; Neoteleostei; OC Acanthomorpha; Acanthopterygii; Percomorpha; Perciformes; Labroidei; OC Cichlidae; Haplochromis. XX RN [1] RP 1-166 RA Terai Y., Mayer W.E., Klein J., Tichy H., Okada N.; RT ; RL Submitted (26-AUG-2002) to the EMBL/GenBank/DDBJ databases. RL Yohey Terai, Tokyo Institute of Technology, Graduate School of Bioscience RL and Biotechnology; 4259 Nagatsuta-cho, Midori-ku, Yokohama, Kanagawa RL 226-8501, Japan (E-mail:yterai@bio.titech.ac.jp, Tel:81-45-924-5744, RL Fax:81-45-924-5835) XX RN [2] RX DOI; 10.1073/pnas.232561099. RX MEDLINE; 22342723. RX PUBMED; 12438648. RA Terai Y., Mayer W.E., Klein J., Tichy H., Okada N.; RT "The effect of selection on a long wavelength-sensitive (LWS) opsin gene of RT Lake Victoria cichlid fishes"; RL Proc. Natl. Acad. Sci. U.S.A. 99(24):15501-15506(2002). XX FH Key Location/Qualifiers FH FT source 1..166 FT /db_xref="taxon:205497" FT /mol_type="genomic DNA" FT /organism="Haplochromis sp. 'muzu, rukwa'" FT /specimen_voucher="specimen No. HT-9361" FT /tissue_type="piece of fin" FT CDS <1..>166 FT /codon_start=2 FT /db_xref="UniProt/TrEMBL:Q8AUS6" FT /gene="LWS" FT /product="long wavelength-sensitive opsin" FT /protein_id="BAC22028.1" FT /translation="FWPHGLKTSCGPDVFSGSEDPGVQSYMIVLMITCCFIPLAIIILC FT YLAVWMAIRA" FT exon 1..166 FT /number=4 FT /gene="LWS" FT /product="long wavelength-sensitive opsin" XX SQ Sequence 166 BP; 29 A; 42 C; 41 G; 54 T; 0 other; gttctggcct catggactga agacttcctg tggacctgat gtgttcagtg gaagtgaaga 60 ccctggagta cagtcctaca tgattgttct catgattact tgctgtttca tccccctggc 120 tatcatcatc ctgtgctacc ttgctgtgtg gatggccatc cgtgct 166 // bio-1.4.3.0001/test/data/embl/AB090716.embl.rel890000644000004100000410000000477112200110570020257 0ustar www-datawww-dataID AB090716; SV 1; linear; genomic DNA; STD; VRT; 166 BP. XX AC AB090716; XX DT 25-OCT-2002 (Rel. 73, Created) DT 14-NOV-2006 (Rel. 89, Last updated, Version 3) XX DE Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive DE opsin, partial cds, specimen_voucher:specimen No. HT-9361. XX KW . XX OS Haplochromis sp. 'muzu, rukwa' OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; OC Actinopterygii; Neopterygii; Teleostei; Euteleostei; Neoteleostei; OC Acanthomorpha; Acanthopterygii; Percomorpha; Perciformes; Labroidei; OC Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; OC Haplochromis. XX RN [1] RP 1-166 RA Terai Y., Mayer W.E., Klein J., Tichy H., Okada N.; RT ; RL Submitted (26-AUG-2002) to the EMBL/GenBank/DDBJ databases. RL Yohey Terai, Tokyo Institute of Technology, Graduate School of Bioscience RL and Biotechnology; 4259 Nagatsuta-cho, Midori-ku, Yokohama, Kanagawa RL 226-8501, Japan (E-mail:yterai@bio.titech.ac.jp, Tel:81-45-924-5744, RL Fax:81-45-924-5835) XX RN [2] RX DOI; 10.1073/pnas.232561099. RX PUBMED; 12438648. RA Terai Y., Mayer W.E., Klein J., Tichy H., Okada N.; RT "The effect of selection on a long wavelength-sensitive (LWS) opsin gene of RT Lake Victoria cichlid fishes"; RL Proc. Natl. Acad. Sci. U.S.A. 99(24):15501-15506(2002). XX FH Key Location/Qualifiers FH FT source 1..166 FT /organism="Haplochromis sp. 'muzu, rukwa'" FT /mol_type="genomic DNA" FT /specimen_voucher="specimen No. HT-9361" FT /tissue_type="piece of fin" FT /db_xref="taxon:205497" FT CDS <1..>166 FT /codon_start=2 FT /gene="LWS" FT /product="long wavelength-sensitive opsin" FT /db_xref="UniProtKB/TrEMBL:Q8AUS6" FT /protein_id="BAC22028.1" FT /translation="FWPHGLKTSCGPDVFSGSEDPGVQSYMIVLMITCCFIPLAIIILC FT YLAVWMAIRA" FT exon 1..166 FT /gene="LWS" FT /product="long wavelength-sensitive opsin" FT /number=4 XX SQ Sequence 166 BP; 29 A; 42 C; 41 G; 54 T; 0 other; gttctggcct catggactga agacttcctg tggacctgat gtgttcagtg gaagtgaaga 60 ccctggagta cagtcctaca tgattgttct catgattact tgctgtttca tccccctggc 120 tatcatcatc ctgtgctacc ttgctgtgtg gatggccatc cgtgct 166 // bio-1.4.3.0001/test/data/clustalw/0000755000004100000410000000000012200110570016411 5ustar www-datawww-databio-1.4.3.0001/test/data/clustalw/example1.aln0000644000004100000410000000671312200110570020630 0ustar www-datawww-dataCLUSTAL 2.0.9 multiple sequence alignment query -MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQ gi|115023|sp|P10425| MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQ gi|115030|sp|P25910| -MKTVFILISMLFPVAVMAQKSVKIS-----------------DDISITQ gi|2984094 MGGFLFFFLLVLFSFSSEYPKHVKET------------------LRKITD gi|282554|pir||S25844 -------------------------------------------MTVEVRE .: : query LNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIE gi|115023|sp|P10425| LNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIE gi|115030|sp|P25910| LSDKVYTYVSLAEIEGWGMVPSNGMIVINNHQAALLDTPINDAQTEMLVN gi|2984094 RIYGVFGVYEQVSYEN-RGFISNAYFYVADDGVLVVDALSTYKLGKELIE gi|282554|pir||S25844 VAEGVYAYEQAP---G-GWCVSNAGIVVGGDGALVVDTLSTIPRARRLAE *: . . **. . .. ::*: . * : query MVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG gi|115023|sp|P10425| MVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG gi|115030|sp|P25910| WVTDSLHAKVTTFIPNHWHGDCIGGLGYLQRKGVQSYANQMTIDLAKEKG gi|2984094 SIRSVTNKPIRFLVVTHYHTDHFYGAKAFREVGAEVIAHEWAFDYISQPS gi|282554|pir||S25844 WVDKLAAGPGRTVVNTHFHGDHAFGNQVFAP-GTRIIAHEDMRSAMVTTG : . .: .* * * * : * . : . . query --------------------YEEPLGDLQSVTNLKFGN----MKVETFYP gi|115023|sp|P10425| --------------------YEEPLGDLQTVTNLKFGN----TKVETFYP gi|115030|sp|P25910| --------------------LPVPEHGFTDSLTVSLDG----MPLQCYYL gi|2984094 SYNFFLARKKILKEHLEGTELTPPTITLTKNLNVYLQVGKEYKRFEVLHL gi|282554|pir||S25844 ----LALTGLWPRVDWGEIELRPPNVTFRDRLTLHVGE----RQVELICV * : .: . .: query GKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIEN gi|115023|sp|P10425| GKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIEN gi|115030|sp|P25910| GGGHATDNIVVWLPTENILFGGCMLKDNQATSIGNISDADVTAWPKTLDK gi|2984094 CRAHTNGDIVVWIPDEKVLFSGDIVFDGRLPFLG---SGNSRTWLVCLDE gi|282554|pir||S25844 GPAHTDHDVVVWLPEERVLFAGDVVMSGVTPFAL---FGSVAGTLAALDR .*: ::***:* .:* .* :: . . ::. query VLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK------------- gi|115023|sp|P10425| MLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK------------- gi|115030|sp|P25910| VKAKFPSARYVVPGHGDYGGT-----ELIEHTKQIVNQYIESTSKP---- gi|2984094 ILKMKPRILLPGHGEALIGEKKIK--EAVSWTRKYIKDLRETIRKLYEEG gi|282554|pir||S25844 LAELEPEVVVGGHGPVAGPEVIDANRDYLRWVQRLAADAVDRRLTPLQAA : * : . query -------------------------------------------------- gi|115023|sp|P10425| -------------------------------------------------- gi|115030|sp|P25910| -------------------------------------------------- gi|2984094 --CDVECVRERINEELIKIDPSYAQVPVFF-NVNPVNAYYVYFEIENEIL gi|282554|pir||S25844 RRADLGAFAGLLDAERLVANLHRAHEELLGGHVRDAMEIFAELVAYNGGQ query ------ gi|115023|sp|P10425| ------ gi|115030|sp|P25910| ------ gi|2984094 MGE--- gi|282554|pir||S25844 LPTCLA bio-1.4.3.0001/test/data/genbank/0000755000004100000410000000000012200110570016160 5ustar www-datawww-databio-1.4.3.0001/test/data/genbank/CAA35997.gp0000644000004100000410000000401712200110570017517 0ustar www-datawww-dataLOCUS CAA35997 100 aa linear MAM 12-SEP-1993 DEFINITION unnamed protein product [Bos taurus]. ACCESSION CAA35997 VERSION CAA35997.1 GI:8 DBSOURCE embl accession X51700.1 KEYWORDS . SOURCE Bos taurus (cattle) ORGANISM Bos taurus Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos. REFERENCE 1 (residues 1 to 100) AUTHORS Kiefer,M.C., Saphire,A.C.S., Bauer,D.M. and Barr,P.J. JOURNAL Unpublished REFERENCE 2 (residues 1 to 100) AUTHORS Kiefer,M.C. TITLE Direct Submission JOURNAL Submitted (30-JAN-1990) Kiefer M.C., Chiron Corporation, 4560 Hortom St, Emeryville CA 94608-2916, U S A COMMENT See for Human sequence. Data kindly reviewed (08-MAY-1990) by Kiefer M.C. FEATURES Location/Qualifiers source 1..100 /organism="Bos taurus" /db_xref="taxon:9913" /clone="bBGP-3" /tissue_type="bone matrix" /clone_lib="Zap-bb" Protein 1..100 /name="unnamed protein product" Region 33..97 /region_name="Gla" /note="Vitamin K-dependent carboxylation/gamma-carboxyglutamic (GLA) domain; cl02449" /db_xref="CDD:141428" CDS 1..100 /coded_by="X51700.1:28..330" /note="bone Gla precursor (100 AA)" /db_xref="GOA:P02820" /db_xref="InterPro:IPR000294" /db_xref="InterPro:IPR002384" /db_xref="PDB:1Q3M" /db_xref="UniProtKB/Swiss-Prot:P02820" ORIGIN 1 mrtpmllall alatlclagr adakpgdaes gkgaafvskq egsevvkrlr ryldhwlgap 61 apypdplepk revcelnpdc deladhigfq eayrrfygpv // bio-1.4.3.0001/test/data/genbank/SCU49845.gb0000644000004100000410000002435612200110570017554 0ustar www-datawww-dataLOCUS SCU49845 5028 bp DNA linear PLN 23-MAR-2010 DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p (AXL2) and Rev7p (REV7) genes, complete cds. ACCESSION U49845 VERSION U49845.1 GI:1293613 KEYWORDS . SOURCE Saccharomyces cerevisiae (baker's yeast) ORGANISM Saccharomyces cerevisiae Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces. REFERENCE 1 (bases 1 to 5028) AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M. TITLE Selection of axial growth sites in yeast requires Axl2p, a novel plasma membrane glycoprotein JOURNAL Genes Dev. 10 (7), 777-793 (1996) PUBMED 8846915 REFERENCE 2 (bases 1 to 5028) AUTHORS Roemer,T. TITLE Direct Submission JOURNAL Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT 06520, USA FEATURES Location/Qualifiers source 1..5028 /organism="Saccharomyces cerevisiae" /mol_type="genomic DNA" /db_xref="taxon:4932" /chromosome="IX" mRNA <1..>206 /product="TCP1-beta" CDS <1..206 /codon_start=3 /product="TCP1-beta" /protein_id="AAA98665.1" /db_xref="GI:1293614" /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA AEVLLRVDNIIRARPRTANRQHM" gene <687..>3158 /gene="AXL2" mRNA <687..>3158 /gene="AXL2" /product="Axl2p" CDS 687..3158 /gene="AXL2" /note="plasma membrane glycoprotein" /codon_start=1 /product="Axl2p" /protein_id="AAA98666.1" /db_xref="GI:1293615" /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL VDFSNKSNVNVGQVKDIHGRIPEML" gene complement(<3300..>4037) /gene="REV7" mRNA complement(<3300..>4037) /gene="REV7" /product="Rev7p" CDS complement(3300..4037) /gene="REV7" /codon_start=1 /product="Rev7p" /protein_id="AAA98667.1" /db_xref="GI:1293616" /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK LISGDDKILNGVYSQYEEGESIFGSLF" ORIGIN 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc // bio-1.4.3.0001/test/data/command/0000755000004100000410000000000012200110570016171 5ustar www-datawww-databio-1.4.3.0001/test/data/command/echoarg2.bat0000755000004100000410000000001012200110570020345 0ustar www-datawww-data@echo %2bio-1.4.3.0001/test/data/command/echoarg2.sh0000755000004100000410000000003312200110570020216 0ustar www-datawww-data#!/bin/sh /bin/echo "$2" bio-1.4.3.0001/test/data/litdb/0000755000004100000410000000000012200110570015651 5ustar www-datawww-databio-1.4.3.0001/test/data/litdb/1717226.litdb0000644000004100000410000000106012200110570017517 0ustar www-datawww-dataCODE 1717226 TITLE Characterization of an Escherichia coli gene encoding betaine aldehyde dehydrogenase (BADH). Structural similarity to mammalian ALDHs and a plant BADH. FIELD q (sequence analysis) JOURNAL Gene VOLUME Vol.103, No.1, 45-52 (1991) KEYWORD *Betaine Aldehyde Dehydrogenase;;*betB Gene;E.coli;; Seq Determination;1854bp;491AAs;;Hydropathy Plot;*EC1.2.1.8;; Seq Comparison AUTHOR Boyd,L.A.;Adam,L.;Pelcher,L.E.;McHughen,A.;Hirji,R.; Selvaraj,G. END bio-1.4.3.0001/test/data/iprscan/0000755000004100000410000000000012200110570016212 5ustar www-datawww-databio-1.4.3.0001/test/data/iprscan/merged.raw0000644000004100000410000001674612200110570020206 0ustar www-datawww-dataQ9RHD9 D44DAE8C544CB7C1 267 HMMPfam PF00575 S1 1 55 3.3E-6 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMPfam PF00575 S1 68 142 4.1E-19 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMPfam PF00575 S1 155 228 1.8E-19 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMSmart SM00316 S1 3 55 7.1E-7 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMSmart SM00316 S1 70 142 8.1E-20 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMSmart SM00316 S1 157 228 1.5E-21 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 ProfileScan PS50126 S1 1 55 14.869 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 ProfileScan PS50126 S1 72 142 20.809 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 ProfileScan PS50126 S1 159 228 22.541 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 FPrintScan PR00681 RIBOSOMALS1 6 27 1.5E-17 T 11-Nov-2005 IPR000110 Ribosomal protein S1 Molecular Function:RNA binding (GO:0003723), Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Q9RHD9 D44DAE8C544CB7C1 267 FPrintScan PR00681 RIBOSOMALS1 85 104 1.5E-17 T 11-Nov-2005 IPR000110 Ribosomal protein S1 Molecular Function:RNA binding (GO:0003723), Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Q9RHD9 D44DAE8C544CB7C1 267 FPrintScan PR00681 RIBOSOMALS1 125 143 1.5E-17 T 11-Nov-2005 IPR000110 Ribosomal protein S1 Molecular Function:RNA binding (GO:0003723), Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Q9RHD9 D44DAE8C544CB7C1 267 superfamily SSF50249 Nucleic_acid_OB 3 60 1.4E-7 T 11-Nov-2005 IPR008994 Nucleic acid-binding OB-fold Molecular Function:nucleic acid binding (GO:0003676) Q9RHD9 D44DAE8C544CB7C1 267 superfamily SSF50249 Nucleic_acid_OB 61 205 6.3999999999999995E-24 T 11-Nov-2005 IPR008994 Nucleic acid-binding OB-fold Molecular Function:nucleic acid binding (GO:0003676) RS16_ECOLI F94D07049A6D489D 82 HMMTigr TIGR00002 S16 2 81 117.16 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 superfamily SSF54565 Ribosomal_S16 1 79 1.81E-8 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 HMMPfam PF00886 Ribosomal_S16 8 68 2.7000000000000004E-33 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 BlastProDom PD003791 Ribosomal_S16 10 77 4.0E-33 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 ProfileScan PS00732 RIBOSOMAL_S16 2 11 8.0E-5 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Y902_MYCTU CD84A335CCFFE6D7 446 superfamily SSF47384 His_kin_homodim 220 292 5.89E-7 T 11-Nov-2005 IPR009082 Histidine kinase, homodimeric Y902_MYCTU CD84A335CCFFE6D7 446 HMMSmart SM00304 HAMP 170 222 1.8E-6 T 11-Nov-2005 IPR003660 Histidine kinase, HAMP region Molecular Function:signal transducer activity (GO:0004871), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 ProfileScan PS50885 HAMP 170 222 7.777 T 11-Nov-2005 IPR003660 Histidine kinase, HAMP region Molecular Function:signal transducer activity (GO:0004871), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 HMMPfam PF00672 HAMP 151 219 1.1E-8 T 11-Nov-2005 IPR003660 Histidine kinase, HAMP region Molecular Function:signal transducer activity (GO:0004871), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 ProfileScan PS50109 HIS_KIN 237 446 34.449 T 11-Nov-2005 IPR005467 Histidine kinase Biological Process:protein amino acid phosphorylation (GO:0006468), Molecular Function:kinase activity (GO:0016301) Y902_MYCTU CD84A335CCFFE6D7 446 HMMSmart SM00388 HisKA 230 296 1.4E-12 T 11-Nov-2005 IPR003661 Histidine kinase A, N-terminal Molecular Function:two-component sensor molecule activity (GO:0000155), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 HMMPfam PF00512 HisKA 230 296 2.4E-11 T 11-Nov-2005 IPR003661 Histidine kinase A, N-terminal Molecular Function:two-component sensor molecule activity (GO:0000155), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 HMMSmart SM00387 HATPase_c 338 446 2.9E-24 T 11-Nov-2005 IPR003594 ATP-binding region, ATPase-like Molecular Function:ATP binding (GO:0005524) Y902_MYCTU CD84A335CCFFE6D7 446 HMMPfam PF02518 HATPase_c 338 445 2.5E-26 T 11-Nov-2005 IPR003594 ATP-binding region, ATPase-like Molecular Function:ATP binding (GO:0005524) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 374 388 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 392 402 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 406 424 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 430 443 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) bio-1.4.3.0001/test/data/iprscan/merged.txt0000644000004100000410000001225212200110570020220 0ustar www-datawww-dataSequence "Q9RHD9" crc64 checksum: D44DAE8C544CB7C1 length: 267 aa. InterPro IPR000110 Ribosomal protein S1 Molecular Function: RNA binding (GO:0003723), Molecular Function: structural constituent of ribosome (GO:0003735), Cellular Component: ribosome (GO:0005840), Biological Process: protein biosynthesis (GO:0006412) method AccNumber shortName location FPrintScan PR00681 RIBOSOMALS1 T[6-27] 1.5e-17 T[85-104] 1.5e-17 T[125-143] 1.5e-17 InterPro IPR003029 RNA binding S1 Molecular Function: RNA binding (GO:0003723) method AccNumber shortName location HMMPfam PF00575 S1 T[1-55] 1.3e-08 T[68-142] 1.6e-21 T[155-228] 6.8e-22 HMMSmart SM00316 S1 T[3-55] 1.2e-06 T[70-142] 1.4e-19 T[157-228] 2.6e-21 ProfileScan PS50126 S1 T[1-55] 14.869 T[72-142] 20.809 T[159-228] 22.541 InterPro IPR008994 Nucleic acid-binding OB-fold method AccNumber shortName location superfamily SSF50249 Nucleic acid-binding proteins T[147-257] 2.5e-24 T[59-145] 2.4e-22 T[3-58] 9.2e-14 InterPro NULL NULL method AccNumber shortName location Coil coil coiled-coil ?[225-246] Seg seg seg ?[29-40] ?[84-98] ?[222-237] Sequence "RS16_ECOLI" crc64 checksum: F94D07049A6D489D length: 82 aa. InterPro IPR000307 Ribosomal protein S16 Molecular Function: structural constituent of ribosome (GO:0003735), Cellular Component: intracellular (GO:0005622), Cellular Component: ribosome (GO:0005840), Biological Process: protein biosynthesis (GO:0006412) method AccNumber shortName location BlastProDom PD003791 sp_RS16_ECOLI_P02372; T[1-77] 3e-38 HMMPfam PF00886 Ribosomal_S16 T[8-68] 1.1e-35 HMMTigr TIGR00002 S16 T[2-81] 8.8e-33 ScanRegExp PS00732 RIBOSOMAL_S16 T[2-11] 8e-5 superfamily SSF54565 Ribosomal protein S16 T[1-79] 9.4e-28 Sequence "Y902_MYCTU" crc64 checksum: CD84A335CCFFE6D7 length: 446 aa. InterPro IPR003594 ATP-binding region, ATPase-like Molecular Function: ATP binding (GO:0005524) method AccNumber shortName location HMMPfam PF02518 HATPase_c T[338-445] 9.6e-29 HMMSmart SM00387 HATPase_c T[338-446] 5e-24 InterPro IPR003660 Histidine kinase, HAMP region Molecular Function: signal transducer activity (GO:0004871), Biological Process: signal transduction (GO:0007165), Cellular Component: membrane (GO:0016020) method AccNumber shortName location HMMPfam PF00672 HAMP T[151-219] 4.3e-11 HMMSmart SM00304 HAMP T[170-222] 3.1e-06 ProfileScan PS50885 HAMP T[170-222] 7.777 InterPro IPR003661 Histidine kinase A, N-terminal Molecular Function: two-component sensor molecule activity (GO:0000155), Biological Process: signal transduction (GO:0007165), Cellular Component: membrane (GO:0016020) method AccNumber shortName location HMMPfam PF00512 HisKA T[230-296] 9.2e-14 HMMSmart SM00388 HisKA T[230-296] 2.4e-12 InterPro IPR004358 Bacterial sensor protein, C-terminal Molecular Function: two-component sensor molecule activity (GO:0000155), Biological Process: signal transduction (GO:0007165) method AccNumber shortName location FPrintScan PR00344 BCTRLSENSOR T[374-388] 2e-12 T[392-402] 2e-12 T[406-424] 2e-12 T[430-443] 2e-12 InterPro IPR005467 Histidine kinase Biological Process: signal transduction (GO:0007165), Molecular Function: kinase activity (GO:0016301) method AccNumber shortName location ProfileScan PS50109 HIS_KIN T[237-446] 34.449 InterPro IPR009082 Histidine kinase, homodimeric method AccNumber shortName location superfamily SSF47384 Homodimeric domain of signal transducing histidine kinase T[220-292] 2.7e-11 InterPro NULL NULL method AccNumber shortName location Seg seg seg ?[44-55] ?[108-120] ?[160-173] ?[308-319] ?[400-424] superfamily SSF55874 ATPase domain of HSP90 chaperone/DNA topoisomerase II/histidine kinase T[295-444] 4.6e-30 bio-1.4.3.0001/test/data/genscan/0000755000004100000410000000000012200110570016171 5ustar www-datawww-databio-1.4.3.0001/test/data/genscan/sample.report0000644000004100000410000000531512200110570020713 0ustar www-datawww-dataGENSCAN 1.0 Date run: 30-May-103 Time: 14:06:28 Sequence HUMRASH : 12942 bp : 68.17% C+G : Isochore 4 (57 - 100 C+G%) Parameter matrix: HumanIso.smat Predicted genes/exons: Gn.Ex Type S .Begin ...End .Len Fr Ph I/Ac Do/T CodRg P.... Tscr.. ----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------ 1.01 Init + 1664 1774 111 1 0 94 83 212 0.997 21.33 1.02 Intr + 2042 2220 179 1 2 104 66 408 0.997 40.12 1.03 Intr + 2374 2533 160 1 1 89 94 302 0.999 32.08 1.04 Term + 3231 3350 120 2 0 115 48 202 0.980 18.31 1.05 PlyA + 3722 3727 6 -5.80 2.00 Prom + 6469 6508 40 -7.92 2.01 Init + 8153 8263 111 1 0 94 83 212 0.998 21.33 2.02 Intr + 8531 8709 179 1 2 104 66 408 0.997 40.12 2.03 Intr + 8863 9022 160 1 1 89 94 302 0.999 32.08 2.04 Term + 9720 9839 120 2 0 115 48 202 0.961 18.31 Predicted peptide sequence(s): Predicted coding sequence(s): >HUMRASH|GENSCAN_predicted_peptide_1|189_aa MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDL AARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG CMSCKCVLS >HUMRASH|GENSCAN_predicted_CDS_1|570_bp atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgacc atccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctac cggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggc caggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgt gtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatc aaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctg gctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccc tacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtg cgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggc tgcatgagctgcaagtgtgtgctctcctga >HUMRASH|GENSCAN_predicted_peptide_2|189_aa MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDL AARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG CMSCKCVLS >HUMRASH|GENSCAN_predicted_CDS_2|570_bp atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgacc atccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctac cggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggc caggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgt gtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatc aaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctg gctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccc tacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtg cgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggc tgcatgagctgcaagtgtgtgctctcctga bio-1.4.3.0001/test/data/TMHMM/0000755000004100000410000000000012200110570015435 5ustar www-datawww-databio-1.4.3.0001/test/data/TMHMM/sample.report0000644000004100000410000000161512200110570020156 0ustar www-datawww-data# O42385 Length: 423 # O42385 Number of predicted TMHs: 7 # O42385 Exp number of AAs in TMHs: 157.40784 # O42385 Exp number, first 60 AAs: 13.85627 # O42385 Total prob of N-in: 0.00993 # O42385 POSSIBLE N-term signal sequence O42385 TMHMM2.0 outside 1 46 O42385 TMHMM2.0 TMhelix 47 69 O42385 TMHMM2.0 inside 70 81 O42385 TMHMM2.0 TMhelix 82 104 O42385 TMHMM2.0 outside 105 118 O42385 TMHMM2.0 TMhelix 119 141 O42385 TMHMM2.0 inside 142 161 O42385 TMHMM2.0 TMhelix 162 184 O42385 TMHMM2.0 outside 185 205 O42385 TMHMM2.0 TMhelix 206 228 O42385 TMHMM2.0 inside 229 348 O42385 TMHMM2.0 TMhelix 349 371 O42385 TMHMM2.0 outside 372 380 O42385 TMHMM2.0 TMhelix 381 403 O42385 TMHMM2.0 inside 404 423bio-1.4.3.0001/test/data/uniprot/0000755000004100000410000000000012200110570016253 5ustar www-datawww-databio-1.4.3.0001/test/data/uniprot/p53_human.uniprot0000644000004100000410000021605312200110570021503 0ustar www-datawww-dataID P53_HUMAN STANDARD; PRT; 393 AA. AC P04637; Q15086; Q15087; Q15088; Q16535; Q16807; Q16808; Q16809; AC Q16810; Q16811; Q16848; Q86UG1; Q8J016; Q99659; Q9BTM4; Q9HAQ8; AC Q9NP68; Q9NPJ2; Q9NZD0; Q9UBI2; Q9UQ61; DT 13-AUG-1987 (Rel. 05, Created) DT 01-MAR-1989 (Rel. 10, Last sequence update) DT 13-SEP-2005 (Rel. 48, Last annotation update) DE Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein p53) DE (Antigen NY-CO-13). GN Name=TP53; Synonyms=P53; OS Homo sapiens (Human). OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; OC Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini; Hominidae; OC Homo. OX NCBI_TaxID=9606; RN [1] RP NUCLEOTIDE SEQUENCE. RX MEDLINE=85230577; PubMed=4006916; RA Zakut-Houri R., Bienz-Tadmor B., Givol D., Oren M.; RT "Human p53 cellular tumor antigen: cDNA sequence and expression in COS RT cells."; RL EMBO J. 4:1251-1255(1985). RN [2] RP NUCLEOTIDE SEQUENCE. RX MEDLINE=87064416; PubMed=2946935; RA Lamb P., Crawford L.; RT "Characterization of the human p53 gene."; RL Mol. Cell. Biol. 6:1379-1385(1986). RN [3] RP NUCLEOTIDE SEQUENCE. RX MEDLINE=85267676; PubMed=3894933; RA Harlow E., Williamson N.M., Ralston R., Helfman D.M., Adams T.E.; RT "Molecular cloning and in vitro expression of a cDNA clone for human RT cellular tumor antigen p53."; RL Mol. Cell. Biol. 5:1601-1610(1985). RN [4] RP NUCLEOTIDE SEQUENCE. RX MEDLINE=87089826; PubMed=3025664; RA Harris N., Brill E., Shohat O., Prokocimer M., Wolf D., Arai N., RA Rotter V.; RT "Molecular basis for heterogeneity of the human p53 protein."; RL Mol. Cell. Biol. 6:4650-4656(1986). RN [5] RP NUCLEOTIDE SEQUENCE. RX MEDLINE=89108008; PubMed=2905688; DOI=10.1016/0378-1119(88)90196-5; RA Buchman V.L., Chumakov P.M., Ninkina N.N., Samarina O.P., RA Georgiev G.P.; RT "A variation in the structure of the protein-coding region of the RT human p53 gene."; RL Gene 70:245-252(1988). RN [6] RP NUCLEOTIDE SEQUENCE OF 101-393. RX MEDLINE=85126934; PubMed=6396087; RA Matlashewski G., Lamb P., Pim D., Peacock J., Crawford L., RA Benchimol S.; RT "Isolation and characterization of a human p53 cDNA clone: expression RT of the human p53 gene."; RL EMBO J. 3:3257-3262(1984). RN [7] RP NUCLEOTIDE SEQUENCE, VARIANTS BURKITT'S LYMPHOMA, AND VARIANT ARG-72. RX MEDLINE=92007731; PubMed=1915267; RA Farrell P.J., Allan G., Shanahan F., Vousden K.H., Crook T.; RT "p53 is frequently mutated in Burkitt's lymphoma cell lines."; RL EMBO J. 10:2879-2887(1991). RN [8] RP NUCLEOTIDE SEQUENCE, AND VARIANTS ARG-72 AND LYS-286. RX MEDLINE=93303270; PubMed=8316628; RA Allalunis-Turner M.J., Barron G.M., Day R.S. III, Dobler K.D., RA Mirzayans R.; RT "Isolation of two cell lines from a human malignant glioma specimen RT differing in sensitivity to radiation and chemotherapeutic drugs."; RL Radiat. Res. 134:349-354(1993). RN [9] RP NUCLEOTIDE SEQUENCE, AND VARIANT ARG-72. RX MEDLINE=21264809; PubMed=11058590; DOI=10.1074/jbc.M007140200; RA Chang N.-S., Pratt N., Heath J., Schultz L., Sleve D., Carey G.B., RA Zevotek N.; RT "Hyaluronidase induction of a WW domain-containing oxidoreductase that RT enhances tumor necrosis factor cytotoxicity."; RL J. Biol. Chem. 276:3361-3370(2001). RN [10] RP NUCLEOTIDE SEQUENCE. RA Chumakov P.M., Almazov V.P., Jenkins J.R.; RL Submitted (JUN-1991) to the EMBL/GenBank/DDBJ databases. RN [11] RP NUCLEOTIDE SEQUENCE. RA Rozemuller E.H., Tilanus M.G.J.; RT "P53 genomic sequence. Corrections and polymorphism."; RL Submitted (MAR-1997) to the EMBL/GenBank/DDBJ databases. RN [12] RP NUCLEOTIDE SEQUENCE, AND VARIANTS SER-47; ARG-72; LYS-339 AND ALA-366. RA Livingston R.J., Rieder M.J., Chung M.-W., Ritchie T.K., Olson A.N., RA Nguyen C.P., Gildersleeve H., Cassidy C.M., Johnson E.J., RA Swanson J.E., McFarland I., Yool B., Park C., Nickerson D.A.; RT "NIEHS-SNPs, environmental genome project, NIEHS ES15478, Department RT of Genome Sciences, Seattle, WA (URL: http://egp.gs.washington.edu)."; RL Submitted (NOV-2004) to the EMBL/GenBank/DDBJ databases. RN [13] RP NUCLEOTIDE SEQUENCE, AND VARIANTS ARG-72 AND LYS-286. RX PubMed=11023613; RA Anderson C.W., Allalunis-Turner M.J.; RT "Human TP53 from the malignant glioma-derived cell lines M059J and RT M059K has a cancer-associated mutation in exon 8."; RL Radiat. Res. 154:473-476(2000). RN [14] RP NUCLEOTIDE SEQUENCE, AND VARIANTS ARG-72; HIS-273 AND SER-309. RA Azuma K., Shichijo S., Itoh K.; RT "Identification of a tumor-rejection antigen recognized by HLA-B46 RT restricted CTL."; RL Submitted (MAR-2002) to the EMBL/GenBank/DDBJ databases. RN [15] RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA], AND VARIANTS ARG-72 AND RP ALA-278. RC TISSUE=Kidney; RX MEDLINE=22388257; PubMed=12477932; DOI=10.1073/pnas.242603899; RA Strausberg R.L., Feingold E.A., Grouse L.H., Derge J.G., RA Klausner R.D., Collins F.S., Wagner L., Shenmen C.M., Schuler G.D., RA Altschul S.F., Zeeberg B., Buetow K.H., Schaefer C.F., Bhat N.K., RA Hopkins R.F., Jordan H., Moore T., Max S.I., Wang J., Hsieh F., RA Diatchenko L., Marusina K., Farmer A.A., Rubin G.M., Hong L., RA Stapleton M., Soares M.B., Bonaldo M.F., Casavant T.L., Scheetz T.E., RA Brownstein M.J., Usdin T.B., Toshiyuki S., Carninci P., Prange C., RA Raha S.S., Loquellano N.A., Peters G.J., Abramson R.D., Mullahy S.J., RA Bosak S.A., McEwan P.J., McKernan K.J., Malek J.A., Gunaratne P.H., RA Richards S., Worley K.C., Hale S., Garcia A.M., Gay L.J., Hulyk S.W., RA Villalon D.K., Muzny D.M., Sodergren E.J., Lu X., Gibbs R.A., RA Fahey J., Helton E., Ketteman M., Madan A., Rodrigues S., Sanchez A., RA Whiting M., Madan A., Young A.C., Shevchenko Y., Bouffard G.G., RA Blakesley R.W., Touchman J.W., Green E.D., Dickson M.C., RA Rodriguez A.C., Grimwood J., Schmutz J., Myers R.M., RA Butterfield Y.S.N., Krzywinski M.I., Skalska U., Smailus D.E., RA Schnerch A., Schein J.E., Jones S.J.M., Marra M.A.; RT "Generation and initial analysis of more than 15,000 full-length human RT and mouse cDNA sequences."; RL Proc. Natl. Acad. Sci. U.S.A. 99:16899-16903(2002). RN [16] RP NUCLEOTIDE SEQUENCE OF 1-379, AND VARIANTS ARG-72 AND ASN-139. RC TISSUE=Lung carcinoma; RX PubMed=14660794; DOI=10.1073/pnas.2536558100; RA Kanashiro C.A., Schally A.V., Groot K., Armatis P., Bernardino A.L., RA Varga J.L.; RT "Inhibition of mutant p53 expression and growth of DMS-153 small cell RT lung carcinoma by antagonists of growth hormone-releasing hormone and RT bombesin."; RL Proc. Natl. Acad. Sci. U.S.A. 100:15836-15841(2003). RN [17] RP NUCLEOTIDE SEQUENCE OF 126-185. RA Pan X.L., Zhang A.H.; RT "Study on the effect of tumor suppressor gene p53 in arsenism RT patients."; RL Submitted (SEP-2003) to the EMBL/GenBank/DDBJ databases. RN [18] RP NUCLEOTIDE SEQUENCE OF 261-298. RC TISSUE=Blood; RA Nimri L.F., Owais W., Momani E.; RT "Detection of P53 gene mutations and serum p53 antibodies associated RT with cigarette smoking."; RL Submitted (AUG-2003) to the EMBL/GenBank/DDBJ databases. RN [19] RP NUCLEOTIDE SEQUENCE OF 262-306. RC TISSUE=Ovarian adenocarcinoma; RA Filippini G., Soldati G.; RL Submitted (JUL-1996) to the EMBL/GenBank/DDBJ databases. RN [20] RP NUCLEOTIDE SEQUENCE OF 225-260. RC TISSUE=Glial cell, and Glial tumor; RA Thompson-Hehir J., Davies M.P.A., Green J.A., Halliwell N., RA Joyce K.A., Salisbury J., Sibson D.R., Vergote I., Walker C.; RT "Mutation detection utilizing a novel PCR approach for amplification RT of the p53 gene from microdissected tissue: application to archival RT tumor samples."; RL Submitted (DEC-1999) to the EMBL/GenBank/DDBJ databases. RN [21] RP NUCLEOTIDE SEQUENCE OF 225-260. RA Yavuz A.S., Farner N.L., Yavuz S., Grammer A.C., Girschick H.J., RA Lipsky P.E.; RT "Bcl6 and P53 gene mutations in tonsillar B cells."; RL Submitted (MAR-2000) to the EMBL/GenBank/DDBJ databases. RN [22] RP NUCLEOTIDE SEQUENCE OF 332-366. RA Pinto E.M., Mendonca B.B., Latronico A.C.; RT "Allelic variant in intron 9 of TP53 gene."; RL Submitted (APR-2003) to the EMBL/GenBank/DDBJ databases. RN [23] RP RNA-BINDING. RX MEDLINE=91141509; PubMed=1705009; RA Samad A., Carroll R.B.; RT "The tumor suppressor p53 is bound to RNA by a stable covalent RT linkage."; RL Mol. Cell. Biol. 11:1598-1606(1991). RN [24] RP ALTERNATIVE SPLICING. RX MEDLINE=96197761; PubMed=8632903; RA Flaman J.-M., Waridel F., Estreicher A., Vannier A., Limacher J.-M., RA Gilbert D., Iggo R., Frebourg T.; RT "The human tumour suppressor gene p53 is alternatively spliced in RT normal cells."; RL Oncogene 12:813-818(1996). RN [25] RP NUCLEAR LOCALIZATION SIGNAL. RX MEDLINE=90191730; PubMed=2156209; RA Addison C., Jenkins J.R., Sturzbecher H.-W.; RT "The p53 nuclear localisation signal is structurally linked to a RT p34cdc2 kinase motif."; RL Oncogene 5:423-426(1990). RN [26] RP MINIMAL REPRESSION DOMAIN. RX MEDLINE=21125692; PubMed=11007800; DOI=10.1074/jbc.M008231200; RA Hong T.M., Chen J.J., Peck K., Yang P.C., Wu C.W.; RT "p53 amino acids 339-346 represent the minimal p53 repression RT domain."; RL J. Biol. Chem. 276:1510-1515(2001). RN [27] RP INTERACTION WITH ING4. RX MEDLINE=22635239; PubMed=12750254; RA Shiseki M., Nagashima M., Pedeux R.M., Kitahama-Shiseki M., Miura K., RA Okamura S., Onogi H., Higashimoto Y., Appella E., Yokota J., RA Harris C.C.; RT "p29ING4 and p28ING5 bind to p53 and p300, and enhance p53 activity."; RL Cancer Res. 63:2373-2378(2003). RN [28] RP PHOSPHORYLATION BY P60/CDC2 AND CYCLIN B/CDC2. RX MEDLINE=90280456; PubMed=2141171; RA Bischoff J.R., Friedman P.N., Marshak D.R., Prives C., Beach D.; RT "Human p53 is phosphorylated by p60-cdc2 and cyclin B-cdc2."; RL Proc. Natl. Acad. Sci. U.S.A. 87:4766-4770(1990). RN [29] RP DEPHOSPHORYLATION BY PP2A. RX MEDLINE=91172186; PubMed=1848668; RA Scheidtmann K.H., Mumby M.C., Rundell K., Walter G.; RT "Dephosphorylation of simian virus 40 large-T antigen and p53 protein RT by protein phosphatase 2A: inhibition by small-t antigen."; RL Mol. Cell. Biol. 11:1996-2003(1991). RN [30] RP O-GLYCOSYLATION. RX MEDLINE=96197773; PubMed=8632915; RA Shaw P., Freeman J., Bovey R., Iggo R.; RT "Regulation of specific DNA binding by p53: evidence for a role for O- RT glycosylation and charged residues at the carboxy-terminus."; RL Oncogene 12:921-930(1996). RN [31] RP PHOSPHORYLATION BY PRPK. RX MEDLINE=21570176; PubMed=11546806; DOI=10.1074/jbc.M105669200; RA Abe Y., Matsumoto S., Wei S., Nezu K., Miyoshi A., Kito K., Ueda N., RA Shigemoto K., Hitsumoto Y., Nikawa J.-I., Enomoto Y.; RT "Cloning and characterization of a p53-related protein kinase RT expressed in interleukin-2-activated cytotoxic T-cells, epithelial RT tumor cell lines, and the testes."; RL J. Biol. Chem. 276:44003-44011(2001). RN [32] RP PHOSPHORYLATION SITE THR-18. RX MEDLINE=20406546; PubMed=10951572; DOI=10.1038/sj.onc.1203709; RA Lopez-Borges S., Lazo P.A.; RT "The human vaccinia-related kinase 1 (VRK1) phosphorylates threonine- RT 18 within the mdm-2 binding site of the p53 tumour suppressor RT protein."; RL Oncogene 19:3656-3664(2000). RN [33] RP IDENTIFICATION IN A COMPLEX WITH CABLES1 AND TP73. RX MEDLINE=21659718; PubMed=11706030; DOI=10.1074/jbc.M108535200; RA Tsuji K., Mizumoto K., Yamochi T., Nishimoto I., Matsuoka M.; RT "Differential effect of ik3-1/cables on p53- and p73-induced cell RT death."; RL J. Biol. Chem. 277:2951-2957(2002). RN [34] RP PHOSPHORYLATION SITE THR-55, MUTAGENESIS OF THR-55, AND INTERACTION RP WITH TAF1. RX PubMed=15053879; DOI=10.1016/S1097-2765(04)00123-6; RA Li H.-H., Li A.G., Sheppard H.M., Liu X.; RT "Phosphorylation on Thr-55 by TAF1 mediates degradation of p53: a role RT for TAF1 in cell G1 progression."; RL Mol. Cell 13:867-878(2004). RN [35] RP ACETYLATION SITE LYS-305. RX MEDLINE=22726738; PubMed=12724314; DOI=10.1074/jbc.M212574200; RA Wang Y.H., Tsay Y.G., Tan B.C., Lo W.Y., Lee S.C.; RT "Identification and characterization of a novel p300-mediated p53 RT acetylation site, lysine 305."; RL J. Biol. Chem. 278:25568-25576(2003). RN [36] RP ACETYLATION SITES LYS-373 AND LYS-382. RX MEDLINE=20123976; PubMed=10656795; DOI=10.1006/jmbi.1999.3415; RA Abraham J., Kelly J., Thibault P., Benchimol S.; RT "Post-translational modification of p53 protein in response to RT ionizing radiation analyzed by mass spectrometry."; RL J. Mol. Biol. 295:853-864(2000). RN [37] RP DEACETYLATION OF LYS-382 BY SIRT1. RX MEDLINE=21526627; PubMed=11672523; DOI=10.1016/S0092-8674(01)00527-X; RA Vaziri H., Dessain S.K., Ng Eaton E., Imai S.-I., Frye R.A., RA Pandita T.K., Guarente L., Weinberg R.A.; RT "hSIR2(SIRT1) functions as an NAD-dependent p53 deacetylase."; RL Cell 107:149-159(2001). RN [38] RP INTERACTION WITH HIPK2. RX MEDLINE=22191252; PubMed=11925430; DOI=10.1074/jbc.M200153200; RA Kim E.-J., Park J.-S., Um S.-J.; RT "Identification and characterization of HIPK2 interacting with p73 and RT modulating functions of the p53 family in vivo."; RL J. Biol. Chem. 277:32020-32028(2002). RN [39] RP INTERACTION WITH HIPK2, PHOSPHORYLATION SITE SER-46, AND MUTAGENESIS RP OF SER-46 AND LYS-382. RX MEDLINE=21638685; PubMed=11740489; DOI=10.1038/ncb715; RA Hofmann T.G., Moeller A., Sirma H., Zentgraf H., Taya Y., Droege W., RA Will H., Schmitz M.L.; RT "Regulation of p53 activity by its interaction with homeodomain- RT interacting protein kinase-2."; RL Nat. Cell Biol. 4:1-10(2002). RN [40] RP INTERACTION WITH HIPK2, AND PHOSPHORYLATION SITE SER-46. RX MEDLINE=21638694; PubMed=11780126; DOI=10.1038/ncb714; RA D'Orazi G., Cecchinelli B., Bruno T., Manni I., Higashimoto Y., RA Saito S., Gostissa M., Coen S., Marchetti A., Del Sal G., Piaggio G., RA Fanciulli M., Appella E., Soddu S.; RT "Homeodomain-interacting protein kinase-2 phosphorylates p53 at Ser 46 RT and mediates apoptosis."; RL Nat. Cell Biol. 4:11-19(2002). RN [41] RP INTERACTION WITH P53DINP1. RX MEDLINE=22863074; PubMed=12851404; DOI=10.1074/jbc.M301979200; RA Tomasini R., Samir A.A., Carrier A., Isnardon D., Cecchinelli B., RA Soddu S., Malissen B., Dagorn J.-C., Iovanna J.L., Dusetti N.J.; RT "TP53INP1s and homeodomain-interacting protein kinase-2 (HIPK2) are RT partners in regulating p53 activity."; RL J. Biol. Chem. 278:37722-37729(2003). RN [42] RP INTERACTION WITH HIPK1. RX MEDLINE=22608637; PubMed=12702766; DOI=10.1073/pnas.0530308100; RA Kondo S., Lu Y., Debbas M., Lin A.W., Sarosi I., Itie A., Wakeham A., RA Tuan J., Saris C., Elliott G., Ma W., Benchimol S., Lowe S.W., RA Mak T.W., Thukral S.K.; RT "Characterization of cells and gene-targeted mice deficient for the RT p53-binding kinase homeodomain-interacting protein kinase 1 (HIPK1)."; RL Proc. Natl. Acad. Sci. U.S.A. 100:5431-5436(2003). RN [43] RP INTERACTIONS WITH HRMT1L2; EP300 AND CARM1, AND FUNCTION. RX PubMed=15186775; DOI=10.1016/j.cell.2004.05.009; RA An W., Kim J., Roeder R.G.; RT "Ordered cooperative functions of PRMT1, p300, and CARM1 in RT transcriptional activation by p53."; RL Cell 117:735-748(2004). RN [44] RP NUCLEOCYTOPLASMIC SHUTTLING, AND NUCLEAR EXPORT SIGNAL. RX MEDLINE=22825602; PubMed=12944468; RX DOI=10.1128/MCB.23.18.6396-6405.2003; RA O'Keefe K., Li H., Zhang Y.; RT "Nucleocytoplasmic shuttling of p53 is essential for MDM2-mediated RT cytoplasmic degradation but not ubiquitination."; RL Mol. Cell. Biol. 23:6396-6405(2003). RN [45] RP REVIEW ON ZINC-BINDING PROPERTIES. RX MEDLINE=21438235; PubMed=11554448; DOI=10.1089/15230860152542961; RA Hainaut P., Mann K.; RT "Zinc binding and redox control of p53 structure and function."; RL Antioxid. Redox Signal. 3:611-623(2001). RN [46] RP STRUCTURE BY NMR OF 319-360. RX MEDLINE=94294808; PubMed=8023159; RA Clore G.M., Omichinski J.G., Sakaguchi K., Zambrano N., Sakamoto H., RA Appella E., Gronenborn A.M.; RT "High-resolution structure of the oligomerization domain of p53 by RT multidimensional NMR."; RL Science 265:386-391(1994). RN [47] RP STRUCTURE BY NMR OF 325-355. RX MEDLINE=95292092; PubMed=7773777; RA Lee W., Harvey T.S., Yin Y., Yau P., Litchfield D., Arrowsmith C.H.; RT "Solution structure of the tetrameric minimum transforming domain of RT p53."; RL Nat. Struct. Biol. 1:877-890(1994). RN [48] RP STRUCTURE BY NMR OF 326-354. RX MEDLINE=98026899; PubMed=9321402; DOI=10.1093/emboj/16.20.6230; RA McCoy M., Stavridi E.S., Waterman J.L., Wieczorek A.M., Opella S.J., RA Halazonetis T.D.; RT "Hydrophobic side-chain size is a determinant of the three-dimensional RT structure of the p53 oligomerization domain."; RL EMBO J. 16:6230-6236(1997). RN [49] RP X-RAY CRYSTALLOGRAPHY (2.2 ANGSTROMS) OF 94-289. RX MEDLINE=94294806; PubMed=8023157; RA Cho Y., Gorina S., Jeffrey P.D., Pavletich N.P.; RT "Crystal structure of a p53 tumor suppressor-DNA complex: RT understanding tumorigenic mutations."; RL Science 265:346-355(1994). RN [50] RP X-RAY CRYSTALLOGRAPHY (1.7 ANGSTROMS) OF 325-356. RX MEDLINE=95184011; PubMed=7878469; RA Jeffrey P.D., Gorina S., Pavletich N.P.; RT "Crystal structure of the tetramerization domain of the p53 tumor RT suppressor at 1.7 angstroms."; RL Science 267:1498-1502(1995). RN [51] RP X-RAY CRYSTALLOGRAPHY (2.3 ANGSTROMS) OF 13-29 IN COMPLEX WITH MDM2. RX MEDLINE=97081050; PubMed=8875929; DOI=10.1126/science.274.5289.948; RA Kussie P.H., Gorina S., Marechal V., Elenbaas B., Moreau J., RA Levine A.J., Pavletich N.P.; RT "Structure of the MDM2 oncoprotein bound to the p53 tumor suppressor RT transactivation domain."; RL Science 274:948-953(1996). RN [52] RP X-RAY CRYSTALLOGRAPHY (2.2 ANGSTROMS) OF 97-287 IN COMPLEX WITH 53BP2. RX MEDLINE=97035414; PubMed=8875926; DOI=10.1126/science.274.5289.1001; RA Gorina S., Pavletich N.P.; RT "Structure of the p53 tumor suppressor bound to the ankyrin and SH3 RT domains of 53BP2."; RL Science 274:1001-1005(1996). RN [53] RP REVIEW. RX MEDLINE=94090335; PubMed=8266092; RA Harris C.C.; RT "p53: at the crossroads of molecular carcinogenesis and risk RT assessment."; RL Science 262:1980-1981(1993). RN [54] RP REVIEW ON VARIANTS. RX MEDLINE=91289156; PubMed=1905840; RA Hoolstein M., Sidransky D., Vogelstein B., Harris C.C.; RT "p53 mutations in human cancers."; RL Science 253:49-53(1991). RN [55] RP REVIEW ON VARIANTS. RX MEDLINE=96271983; PubMed=8829653; RX DOI=10.1002/(SICI)1098-1004(1996)7:3<202::AID-HUMU4>3.3.CO;2-5; RA de Vries E.M.G., Ricke D.O., de Vries T.N., Hartmann A., Blaszyk H., RA Liao D., Soussi T., Kovach J.S., Sommer S.S.; RT "Database of mutations in the p53 and APC tumor suppressor genes RT designed to facilitate molecular epidemiological analyses."; RL Hum. Mutat. 7:202-213(1996). RN [56] RP VARIANT ARG-72. RX MEDLINE=91153807; PubMed=1999338; DOI=10.1007/BF00201836; RA Olschwang S., Laurent-Puig P., Vassal A., Salmon R.-J., Thomas G.; RT "Characterization of a frequent polymorphism in the coding sequence of RT the Tp53 gene in colonic cancer patients and a control population."; RL Hum. Genet. 86:369-370(1991). RN [57] RP VARIANT LFS THR-133. RX MEDLINE=92034774; PubMed=1933902; RA Law J.C., Strong L.C., Chidambaram A., Ferrell R.E.; RT "A germ line mutation in exon 5 of the p53 gene in an extended cancer RT family."; RL Cancer Res. 51:6385-6387(1991). RN [58] RP VARIANTS LFS CYS-245; TRP-248; PRO-252 AND LYS-258. RX MEDLINE=91057657; PubMed=1978757; RA Malkin D., Li F.P., Strong L.C., Fraumeni J.F. Jr., Nelson C.E., RA Kim D.H., Kassel J., Gryka M.A., Bischoff F.Z., Tainsky M.A., RA Friend S.H.; RT "Germ line p53 mutations in a familial syndrome of breast cancer, RT sarcomas, and other neoplasms."; RL Science 250:1233-1238(1990). RN [59] RP VARIANT LFS ASP-245. RX MEDLINE=91080929; PubMed=2259385; DOI=10.1038/348747a0; RA Srivastava S., Zou Z., Pirollo K., Blattner W., Chang E.H.; RT "Germ-line transmission of a mutated p53 gene in a cancer-prone family RT with Li-Fraumeni syndrome."; RL Nature 348:747-749(1990). RN [60] RP VARIANT LFS LEU-272. RX MEDLINE=92147883; PubMed=1737852; RA Felix C.A., Nau M.M., Takahashi T., Mitsudomi T., Chiba I., RA Poplack D.G., Reaman G.H., Cole D.E., Letterio J.J., Whang-Peng J., RA Knutsen T., Minna J.D.; RT "Hereditary and acquired p53 gene mutations in childhood acute RT lymphoblastic leukemia."; RL J. Clin. Invest. 89:640-647(1992). RN [61] RP VARIANTS LFS HIS-273 AND VAL-325. RX MEDLINE=92228023; PubMed=1565144; RA Malkin D., Jolly K.W., Barbier N., Look A.T., Friend S.H., RA Gebhardt M.C., Andersen T.I., Boerresen A.-L., Li F.P., Garber J., RA Strong L.C.; RT "Germline mutations of the p53 tumor-suppressor gene in children and RT young adults with second malignant neoplasms."; RL N. Engl. J. Med. 326:1309-1315(1992). RN [62] RP VARIANTS BREAST TUMORS GLN-132; SER-249; LYS-280 AND LYS-285. RX MEDLINE=90295284; PubMed=1694291; RA Bartek J., Iggo R., Gannon J., Lane D.P.; RT "Genetic and immunochemical analysis of mutant p53 in human breast RT cancer cell lines."; RL Oncogene 5:893-899(1990). RN [63] RP VARIANTS COLON TUMORS PHE-241 AND HIS-273. RX MEDLINE=91017544; PubMed=1699228; RA Rodrigues N.R., Rowan A., Smith M.E.F., Kerr I.B., Bodmer W.F., RA Gannon J.V., Lane D.P.; RT "p53 mutations in colorectal cancer."; RL Proc. Natl. Acad. Sci. U.S.A. 87:7555-7559(1990). RN [64] RP VARIANTS ESOPHAGUS TUMOR VAL-154; VAL-245; GLN-248; LEU-278 AND RP SER-278. RX MEDLINE=91088630; PubMed=2263646; RA Hollstein M.C., Metcalf R.A., Welsh J.A., Montesano R., Harris C.C.; RT "Frequent mutation of the p53 gene in human esophageal cancer."; RL Proc. Natl. Acad. Sci. U.S.A. 87:9958-9961(1990). RN [65] RP VARIANTS COLORECTAL CANCER MUTATIONS. RX MEDLINE=91282784; PubMed=1647768; RA Ishioka C., Sato T., Gamoh M., Suzuki T., Shibata H., Kanamaru R., RA Wakui A., Yamazaki T.; RT "Mutations of the P53 gene, including an intronic point mutation, in RT colorectal tumors."; RL Biochem. Biophys. Res. Commun. 177:901-906(1991). RN [66] RP VARIANTS ESOPHAGUS TUMORS LEU-152; ALA-155; HIS-175; PHE-176 AND RP HIS-273. RX MEDLINE=91330175; PubMed=1868473; RA Casson A.G., Mukhopadhyay T., Cleary K.R., Ro J.Y., Levin B., RA Roth J.A.; RT "p53 gene mutations in Barrett's epithelium and esophageal cancer."; RL Cancer Res. 51:4495-4499(1991). RN [67] RP VARIANTS HEPATOCELLULAR CARCINOMAS MUTATIONS IN CHINA. RX MEDLINE=91187113; PubMed=1849234; DOI=10.1038/350427a0; RA Hsu I.C., Metcalf R.A., Sun T., Welsh J.A., Wang N.J., Harris C.C.; RT "Mutational hotspot in the p53 gene in human hepatocellular RT carcinomas."; RL Nature 350:427-428(1991). RN [68] RP VARIANTS HEPATOCELLULAR CARCINOMAS MUTATIONS IN SOUTH AFRICA. RX MEDLINE=91187114; PubMed=1672732; DOI=10.1038/350429a0; RA Bressac B., Kew M., Wands J., Ozturk M.; RT "Selective G to T mutations of p53 gene in hepatocellular carcinoma RT from southern Africa."; RL Nature 350:429-431(1991). RN [69] RP VARIANTS HNSCC PHE-176; PHE-242; CYS-245; LEU-248 AND HIS-273. RX MEDLINE=93007999; PubMed=1394225; RA Somers K.D., Merrick M.A., Lopez M.E., Incognito L.S., Schechter G.L., RA Casey G.; RT "Frequent p53 mutations in head and neck cancer."; RL Cancer Res. 52:5997-6000(1992). RN [70] RP VARIANTS ANOGENITAL CARCINOMAS. RX MEDLINE=93010989; PubMed=1327751; RA Crook T., Vousden K.H.; RT "Properties of p53 mutations detected in primary and secondary RT cervical cancers suggest mechanisms of metastasis and involvement of RT environmental carcinogens."; RL EMBO J. 11:3935-3940(1992). RN [71] RP VARIANTS ORAL SQUAMOUS CELL CARCINOMA CYS-205; GLU-281 AND LYS-285. RX MEDLINE=93093790; PubMed=1459726; RA Sakai E., Rikimaru K., Ueda M., Matsumoto Y., Ishii N., Enomoto S., RA Yamamoto H., Tsuchida N.; RT "The p53 tumor-suppressor gene and ras oncogene mutations in oral RT squamous-cell carcinoma."; RL Int. J. Cancer 52:867-872(1992). RN [72] RP VARIANT PRO-HIS-PRO-178 INS. RX MEDLINE=93265016; PubMed=1303181; RA Bhatia K., Guiterrez M.I., Magrath I.T.; RT "A novel mutation in the p53 gene in a Burkitt's lymphoma cell line."; RL Hum. Mol. Genet. 1:207-208(1992). RN [73] RP VARIANTS BURKITT'S LYMPHOMAS. RX MEDLINE=93064692; PubMed=1437144; RA Duthu A., Debuire B., Romano J.W., Ehrhart J.C., Fiscella M., May E., RA Appella E., May P.; RT "p53 mutations in Raji cells: characterization and localization RT relative to other Burkitt's lymphomas."; RL Oncogene 7:2161-2167(1992). RN [74] RP VARIANT NASOPHARYNGEAL CARCINOMA THR-280. RX MEDLINE=92335329; PubMed=1631151; RA Sun Y., Hegamyer G., Heng Y.-J., Hildesheim A., Chen J.-Y., Cao Y., RA Yao K.-T., Colburn N.H.; RT "An infrequent point mutation of the p53 gene in human nasopharyngeal RT carcinoma."; RL Proc. Natl. Acad. Sci. U.S.A. 89:6516-6520(1992). RN [75] RP VARIANTS HNSCC. RX MEDLINE=93235942; PubMed=7682763; RA Caamano J., Zhang S.Y., Rosvold E.A., Bauer B., Klein-Szanto A.J.P.; RT "p53 alterations in human squamous cell carcinomas and carcinoma cell RT lines."; RL Am. J. Pathol. 142:1131-1139(1993). RN [76] RP VARIANTS HNSCC. RX MEDLINE=94006220; PubMed=8402617; RA Boyle J.O., Hakim J., Koch W., van der Riet P., Hruban R.H., Roa R.A., RA Correo R., Eby Y.J., Ruppert J.M., Sidransky D.; RT "The incidence of p53 mutations increases with progression of head and RT neck cancer."; RL Cancer Res. 53:4477-4480(1993). RN [77] RP VARIANTS COLON TUMORS. RX MEDLINE=93330562; PubMed=8336944; RA Hamelin R., Jego N., Laurent-Puig P., Vidaud M., Thomas G.; RT "Efficient screening of p53 mutations by denaturing gradient gel RT electrophoresis in colorectal tumors."; RL Oncogene 8:2213-2220(1993). RN [78] RP CHARACTERIZATION OF VARIANT ALA-143. RX MEDLINE=94283378; PubMed=8013454; RA Zhang W., Guo X.-Y., Hu G.-Y., Liu W.-B., Shay J.W., Deisseroth A.B.; RT "A temperature-sensitive mutant of human p53."; RL EMBO J. 13:2535-2544(1994). RN [79] RP VARIANTS LFS HIS-175; ARG-193; GLN-248; CYS-273 AND TYR-275. RX MEDLINE=95193787; PubMed=7887414; RA Frebourg T., Barbier N., Yan Y.-X., Garber J.E., Dreyfus M., RA Fraumeni J.F. Jr., Li F.P., Friend S.H.; RT "Germ-line p53 mutations in 15 families with Li-Fraumeni syndrome."; RL Am. J. Hum. Genet. 56:608-615(1995). RN [80] RP VARIANT LFS HIS-175. RX MEDLINE=96423319; PubMed=8825920; RA Varley J.M., McGrown G., Thorncroft M., Tricker K.J., Teare M.D., RA Santibanez-Koref M.F., Houlston R.S., Martin J., Birch J.M., RA Evans D.G.R.; RT "An extended Li-Fraumeni kindred with gastric carcinoma and a codon RT 175 mutation in TP53."; RL J. Med. Genet. 32:942-945(1995). RN [81] RP VARIANTS ESOPHAGEAL ADENOCARCINOMA PHE-176; SER-245; TRP-248; TRP-282 RP AND GLN-286. RX MEDLINE=96233927; PubMed=8829627; RX DOI=10.1002/(SICI)1098-1004(1996)7:2<109::AID-HUMU4>3.3.CO;2-0; RA Audrezet M.-P., Robaszkiewicz M., Mercier B., Nousbaum J.-B., RA Hardy E., Bail J.-P., Volant A., Lozac'H P., Gouerou H., Ferec C.; RT "Molecular analysis of the TP53 gene in Barrett's adenocarcinoma."; RL Hum. Mutat. 7:109-113(1996). RN [82] RP VARIANTS COLORECTAL TUMORS. RX MEDLINE=97255965; PubMed=9101296; RX DOI=10.1002/(SICI)1098-1004(1997)9:4<348::AID-HUMU8>3.3.CO;2-7; RA Guldberg P., Nedergaard T., Nielsen H.J., Olsen A.C., Ahrenkiel V., RA Zeuthen J.; RT "Single-step DGGE-based mutation scanning of the p53 gene: application RT to genetic diagnosis of colorectal cancer."; RL Hum. Mutat. 9:348-355(1997). RN [83] RP VARIANT COLORECTAL CARCINOMA ILE-157. RX MEDLINE=98080146; PubMed=9419979; DOI=10.1038/sj.onc.1201668; RA Miyaki M., Nishio J., Konishi M., Kikuchi-Yanoshita R., Tanaka K., RA Muraoka M., Nagato M., Chong J.-M., Koike M., Terada T., Kawahara Y., RA Fukutome A., Tomiyama J., Chuganji Y., Momoi M., Utsunomiya J.; RT "Drastic genetic instability of tumors and normal tissues in Turcot RT syndrome."; RL Oncogene 15:2877-2881(1997). RN [84] RP VARIANTS SER-152; ILE-169; PHE-176; THR-195; CYS-220; ILE-230; CYS-273 RP AND SER-278. RX MEDLINE=98111377; PubMed=9450901; RX DOI=10.1002/(SICI)1098-1004(1998)11:1<39::AID-HUMU6>3.0.CO;2-G; RA van Rensburg E.J., Engelbrecht S., van Heerden W.F.P., Kotze M.J., RA Raubenheimer E.J.; RT "Detection of p53 gene mutations in oral squamous cell carcinomas of a RT black African population sample."; RL Hum. Mutat. 11:39-44(1998). RN [85] RP VARIANT NONCLASSICAL LFS CYS-337. RX MEDLINE=98112421; PubMed=9452042; RA Luca J.W., Strong L.C., Hansen M.F.; RT "A germline missense mutation R337C in exon 10 of the human p53 RT gene."; RL Hum. Mutat. Suppl. 1:S58-S61(1998). RN [86] RP VARIANT LFS ILE-292. RX MEDLINE=99414637; PubMed=10484981; DOI=10.1016/S0165-4608(98)00276-3; RA Gueran S., Tunca Y., Imirzalioglu N.; RT "Hereditary TP53 codon 292 and somatic P16INK4A codon 94 mutations in RT a Li-Fraumeni syndrome family."; RL Cancer Genet. Cytogenet. 113:145-151(1999). RN [87] RP INVOLVEMENT IN CHOROID PLEXUS PAPILLOMA. RX MEDLINE=22079076; PubMed=12085209; DOI=10.1038/sj/bjc/6600269; RA Rutherford J., Chu C.E., Duddy P.M., Charlton R.S., Chumas P., RA Taylor G.R., Lu X., Barnes D.M., Camplejohn R.S.; RT "Investigations on a clinically and functionally unusual and novel RT germline p53 mutation."; RL Br. J. Cancer 86:1592-1596(2002). CC -!- FUNCTION: Acts as a tumor suppressor in many tumor types; induces CC growth arrest or apoptosis depending on the physiological CC circumstances and cell type. Involved in cell cycle regulation as CC a trans-activator that acts to negatively regulate cell division CC by controlling a set of genes required for this process. One of CC the activated genes is an inhibitor of cyclin-dependent kinases. CC Apoptosis induction seems to be mediated either by stimulation of CC BAX and FAS antigen expression, or by repression of Bcl-2 CC expression. CC -!- COFACTOR: Binds 1 zinc ion per subunit. CC -!- SUBUNIT: Interacts with AXIN1. Probably part of a complex CC consisiting of TP53, HIPK2 and AXIN1 (By similarity). Binds DNA as CC a homotetramer. Interacts with histone acetyltransferases EP300 CC and methyltransferases HRMT1L2 and CARM1, and recruits them to CC promoters. In vitro, the interaction of TP53 with cancer- CC associated/HPV (E6) viral proteins leads to ubiquitination and CC degradation of TP53 giving a possible model for cell growth CC regulation. This complex formation requires an additional factor, CC E6-AP, which stably associates with TP53 in the presence of E6. C- CC terminus interacts with TAF1, when TAF1 is part of the TFIID CC complex. Interacts with ING4 and this interaction may be indirect. CC Found in a complex with CABLES1 and TP73. Interacts with HIPK1, CC HIPK2, and P53DINP1. CC -!- INTERACTION: CC Q8TDN4:CABLES1; NbExp=1; IntAct=EBI-366083, EBI-604615; CC Q9ESJ1:Cables1 (xeno); NbExp=1; IntAct=EBI-366083, EBI-604411; CC Q92793:CREBBP; NbExp=3; IntAct=EBI-366083, EBI-81215; CC P42858:HD; NbExp=2; IntAct=EBI-366083, EBI-466029; CC P09429:HMGB1; NbExp=1; IntAct=EBI-366083, EBI-389432; CC P56273:MDM2 (xeno); NbExp=1; IntAct=EBI-366083, EBI-541233; CC Q00987:MDM2; NbExp=1; IntAct=EBI-366083, EBI-389668; CC P06748:NPM1; NbExp=3; IntAct=EBI-366083, EBI-78579; CC P06748-1:NPM1; NbExp=1; IntAct=EBI-366083, EBI-354150; CC Q06609:RAD51; NbExp=1; IntAct=EBI-366083, EBI-297202; CC Q96ST3:SIN3A; NbExp=2; IntAct=EBI-366083, EBI-347218; CC P20226:TBP; NbExp=1; IntAct=EBI-366083, EBI-355371; CC Q7Z6Z7:UREB1; NbExp=2; IntAct=EBI-366083, EBI-625934; CC -!- SUBCELLULAR LOCATION: Cytoplasmic and nuclear. CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative splicing; Named isoforms=2; CC Name=1; CC IsoId=P04637-1; Sequence=Displayed; CC Name=2; Synonyms=I9RET; CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536; CC Note=Seems to be non-functional. Expressed in quiescent CC lymphocytes; CC -!- DOMAIN: The nuclear export signal acts as a transcriptional CC repression domain. CC -!- PTM: Acetylated. Acetylation of Lys-382 by CREBBP enhances CC transcriptional activity. Deacetylation of Lys-382 by SIRT1 CC impairs its ability to induce proapoptotic program and modulate CC cell senescence. CC -!- PTM: Phosphorylated. Phosphorylation on Ser residues mediates CC transcriptional activation. Phosphorylated by HIPK1 (By CC similarity). Phosphorylated on Thr-18 by VRK1, which may prevent CC the interaction with MDM2. Phosphorylated on Thr-55 by TAF1, which CC promotes MDM2-mediated degradation. Phosphorylated on Ser-46 by CC HIPK2 upon UV irradiation. Phosphorylation on Ser-46 is required CC for acetylation by CREBBP. CC -!- PTM: Dephosphorylated by PP2A. SV40 small T antigen inhibits the CC dephosphorylation by the AC form of PP2A. CC -!- PTM: May be O-glycosylated in the C-terminal basic region. Studied CC in EB-1 cell line. CC -!- DISEASE: TP53 is found in increased amounts in a wide variety of CC transformed cells. TP53 is frequently mutated or inactivated in CC about 60% of cancers. CC -!- DISEASE: Defects in TP53 are involved in esophageal squamous cell CC carcinoma (ESCC) [MIM:133239]. ESCC is a tumor of the esophagus. CC -!- DISEASE: Defects in TP53 are a cause of Li-Fraumeni syndrome (LFS) CC [MIM:151623]. LFS is an autosomal dominant familial cancer CC syndrome that in its classic form is defined by the existence of CC both a proband with a sarcoma and two other first-degree relatives CC with a cancer by age 45 years. In these families the affected CC relatives develop a diverse set of malignancies at unusually early CC ages. The spectrum of cancers in LFS includes breast carcinomas, CC soft-tissue sarcomas, brain tumors, osteosarcoma, leukemia and CC adreno-cortical carcinoma. Other possible component tumors of LFS CC are melanoma, gonadal cell tumors and carcinomas of the lung, CC pancreas and prostate. CC -!- DISEASE: Defects in TP53 are found in Barrett metaplasia; also CC known as Barrett esophagus. It is a condition in which the CC normally stratified squamous epithelium of the lower esophagus is CC replaced by a metaplastic columnar epithelium. The condition CC develops as a complication in approximately 10% of patients with CC chronic gastroesophageal reflux disease and predisposes to the CC development of esophageal adenocarcinoma. CC -!- DISEASE: Defects in TP53 are involved in head and neck squamous CC cell carcinomas (HNSCC) [MIM:275355]. CC -!- DISEASE: Defects in TP53 are involved in oral squamous cell CC carcinoma (OSCC). Cigarette smoke is a prime mutagenic agent in CC cancer of the aerodigestive tract. CC -!- DISEASE: Defects in TP53 are a cause of lung cancer [MIM:211980]. CC -!- DISEASE: Defects in TP53 are a cause of choroid plexus papilloma CC [MIM:260500]. Choroid plexus papilloma is a slow-growing benign CC tumor of the choroid plexus that often invades the leptomeninges. CC In children it is usually in a lateral ventricle but in adults it CC is more often in the fourth ventricle. Hydrocephalus is common, CC either from obstruction or from tumor secretion of cerebrospinal CC fluid. If it undergoes malignant transformation it is called a CC choroid plexus carcinoma. Primary choroid plexus tumors are rare CC and usually occur in early childhood. CC -!- SIMILARITY: Belongs to the p53 family. CC -!- DATABASE: NAME=IARC TP53 mutation database; CC NOTE=IARC db of somatic p53 mutations; CC WWW="http://www.iarc.fr/p53/". CC -!- DATABASE: NAME=Tokyo p53; CC NOTE=University of Tokyo db of p53 mutations; CC WWW="http://p53.genome.ad.jp/". CC -!- DATABASE: NAME=p53 web site at the Institut Curie; CC WWW="http://p53.curie.fr/". CC -!- DATABASE: NAME=Atlas Genet. Cytogenet. Oncol. Haematol.; CC WWW="http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html". CC -------------------------------------------------------------------------- CC This Swiss-Prot entry is copyright. It is produced through a collaboration CC between the Swiss Institute of Bioinformatics and the EMBL outstation - CC the European Bioinformatics Institute. There are no restrictions on its CC use as long as its content is in no way modified and this statement is not CC removed. CC -------------------------------------------------------------------------- DR EMBL; X02469; CAA26306.1; -; mRNA. DR EMBL; M13121; AAA59987.1; -; Genomic_DNA. DR EMBL; M13112; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13113; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13114; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13115; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13116; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13117; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13118; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13119; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; M13120; AAA59987.1; JOINED; Genomic_DNA. DR EMBL; K03199; AAA59989.1; -; mRNA. DR EMBL; M14694; AAA61211.1; -; mRNA. DR EMBL; M14695; AAA61212.1; -; mRNA. DR EMBL; M22898; AAA59988.1; -; Genomic_DNA. DR EMBL; M22882; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22883; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22884; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22887; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22888; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22894; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22895; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22896; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; M22897; AAA59988.1; JOINED; Genomic_DNA. DR EMBL; X01405; CAA25652.1; -; mRNA. DR EMBL; X60011; CAA42626.1; -; mRNA. DR EMBL; X60012; CAA42627.1; ALT_TERM; mRNA. DR EMBL; X60013; CAA42628.1; -; mRNA. DR EMBL; X60014; CAA42629.1; -; mRNA. DR EMBL; X60015; CAA42630.1; -; mRNA. DR EMBL; X60016; CAA42631.1; -; mRNA. DR EMBL; X60017; CAA42632.1; -; mRNA. DR EMBL; X60018; CAA42633.1; -; mRNA. DR EMBL; X60019; CAA42634.1; -; mRNA. DR EMBL; X60020; CAA42635.1; -; mRNA. DR EMBL; AF135121; AAD28535.1; -; Genomic_DNA. DR EMBL; AF135120; AAD28535.1; JOINED; Genomic_DNA. DR EMBL; AF307851; AAG28785.1; -; mRNA. DR EMBL; X54156; CAA38095.1; -; Genomic_DNA. DR EMBL; U94788; AAC12971.1; -; Genomic_DNA. DR EMBL; AF136271; AAD28628.1; -; Genomic_DNA. DR EMBL; AF136270; AAD28628.1; JOINED; Genomic_DNA. DR EMBL; AB082923; BAC16799.1; -; mRNA. DR EMBL; AY838896; AAV80424.1; -; Genomic_DNA. DR EMBL; BC003596; AAH03596.1; -; mRNA. DR EMBL; AY429684; AAR10356.1; -; mRNA. DR EMBL; AY390341; AAQ90158.1; -; Genomic_DNA. DR EMBL; AY359814; AAR13239.1; -; Genomic_DNA. DR EMBL; U63714; AAB39322.1; -; Genomic_DNA. DR EMBL; AF209136; AAF36362.1; -; Genomic_DNA. DR EMBL; AF209128; AAF36354.1; -; Genomic_DNA. DR EMBL; AF209129; AAF36355.1; -; Genomic_DNA. DR EMBL; AF209130; AAF36356.1; -; Genomic_DNA. DR EMBL; AF209131; AAF36357.1; -; Genomic_DNA. DR EMBL; AF209132; AAF36358.1; -; Genomic_DNA. DR EMBL; AF209133; AAF36359.1; -; Genomic_DNA. DR EMBL; AF209134; AAF36360.1; -; Genomic_DNA. DR EMBL; AF209135; AAF36361.1; -; Genomic_DNA. DR EMBL; AF209148; AAF36374.1; -; Genomic_DNA. DR EMBL; AF209149; AAF36375.1; -; Genomic_DNA. DR EMBL; AF209150; AAF36376.1; -; Genomic_DNA. DR EMBL; AF209151; AAF36377.1; -; Genomic_DNA. DR EMBL; AF209152; AAF36378.1; -; Genomic_DNA. DR EMBL; AF209153; AAF36379.1; -; Genomic_DNA. DR EMBL; AF209154; AAF36380.1; -; Genomic_DNA. DR EMBL; AF209155; AAF36381.1; -; Genomic_DNA. DR EMBL; AF209156; AAF36382.1; -; Genomic_DNA. DR EMBL; AF210309; AAF63442.1; -; Genomic_DNA. DR EMBL; AF210308; AAF63442.1; JOINED; Genomic_DNA. DR EMBL; AF210310; AAF63443.1; -; Genomic_DNA. DR EMBL; AF240684; AAK76358.1; -; Genomic_DNA. DR EMBL; AF240685; AAK76359.1; -; Genomic_DNA. DR EMBL; AY270155; AAP30003.1; -; Genomic_DNA. DR PIR; A25224; DNHU53. DR PDB; 1A1U; NMR; A/C=324-358. DR PDB; 1AIE; X-ray; @=326-356. DR PDB; 1C26; X-ray; A=325-356. DR PDB; 1DT7; NMR; X/Y=367-388. DR PDB; 1GZH; X-ray; A/C=-. DR PDB; 1H26; X-ray; E=376-386. DR PDB; 1HS5; NMR; A/B=324-357. DR PDB; 1KZY; X-ray; A/B=95-289. DR PDB; 1OLG; NMR; A/B/C/D=319-360. DR PDB; 1OLH; NMR; A/B/C/D=319-360. DR PDB; 1PES; NMR; A/B/C/D=325-355. DR PDB; 1PET; NMR; A/B/C/D=325-355. DR PDB; 1SAE; NMR; A/B/C/D=319-360. DR PDB; 1SAF; NMR; A/B/C/D=319-360. DR PDB; 1SAG; NMR; A/B/C/D=319-360. DR PDB; 1SAH; NMR; A/B/C/D=319-360. DR PDB; 1SAI; NMR; A/B/C/D=319-360. DR PDB; 1SAJ; NMR; A/B/C/D=319-360. DR PDB; 1SAK; NMR; A/B/C/D=319-360. DR PDB; 1SAL; NMR; A/B/C/D=319-360. DR PDB; 1TSR; X-ray; A/B/C=94-312. DR PDB; 1TUP; X-ray; A/B/C=94-312. DR PDB; 1UOL; X-ray; A/B=94-312. DR PDB; 1YCQ; X-ray; B=13-29. DR PDB; 1YCR; X-ray; B=15-29. DR PDB; 1YCS; X-ray; A=94-292. DR PDB; 2BIM; X-ray; A/B=94-312. DR PDB; 2BIN; X-ray; A=94-312. DR PDB; 2BIO; X-ray; A=94-312. DR PDB; 2BIP; X-ray; A=94-312. DR PDB; 2BIQ; X-ray; A=94-312. DR PDB; 3SAK; NMR; A/B/C/D=319-360. DR IntAct; P04637; -. DR TRANSFAC; T00671; -. DR SWISS-2DPAGE; P04637; HUMAN. DR Ensembl; ENSG00000141510; Homo sapiens. DR HGNC; HGNC:11998; TP53. DR H-InvDB; HIX0013510; -. DR Reactome; P04637; -. DR MIM; 191170; -. DR MIM; 133239; -. DR MIM; 151623; -. DR MIM; 275355; -. DR MIM; 211980; -. DR MIM; 260500; -. DR GO; GO:0005739; C:mitochondrion; IDA. DR GO; GO:0005730; C:nucleolus; IDA. DR GO; GO:0005524; F:ATP binding; IDA. DR GO; GO:0005507; F:copper ion binding; IDA. DR GO; GO:0000739; F:DNA strand annealing activity; IDA. DR GO; GO:0004518; F:nuclease activity; TAS. DR GO; GO:0005515; F:protein binding; IPI. DR GO; GO:0046982; F:protein heterodimerization activity; IPI. DR GO; GO:0003700; F:transcription factor activity; IDA. DR GO; GO:0008270; F:zinc ion binding; TAS. DR GO; GO:0006915; P:apoptosis; IDA. DR GO; GO:0006284; P:base-excision repair; TAS. DR GO; GO:0008635; P:caspase activation via cytochrome c; IDA. DR GO; GO:0007569; P:cell aging; IMP. DR GO; GO:0007050; P:cell cycle arrest; TAS. DR GO; GO:0000075; P:cell cycle checkpoint; TAS. DR GO; GO:0030154; P:cell differentiation; TAS. DR GO; GO:0008283; P:cell proliferation; TAS. DR GO; GO:0008630; P:DNA damage response, signal transduction re...; TAS. DR GO; GO:0006310; P:DNA recombination; TAS. DR GO; GO:0008628; P:induction of apoptosis by hormones; TAS. DR GO; GO:0030308; P:negative regulation of cell growth; IMP. DR GO; GO:0051097; P:negative regulation of helicase activity; TAS. DR GO; GO:0006289; P:nucleotide-excision repair; IMP. DR GO; GO:0051262; P:protein tetramerization; TAS. DR GO; GO:0046902; P:regulation of mitochondrial membrane permea...; TAS. DR GO; GO:0006355; P:regulation of transcription, DNA-dependent; IDA. DR InterPro; IPR002117; P53. DR InterPro; IPR011615; P53_DNA_bd. DR InterPro; IPR012346; P53_RUNT_DNA_bd. DR InterPro; IPR010991; p53_tetrameristn. DR Pfam; PF00870; P53; 1. DR Pfam; PF07710; P53_tetramer; 1. DR PRINTS; PR00386; P53SUPPRESSR. DR ProDom; PD002681; P53; 1. DR PROSITE; PS00348; P53; 1. KW 3D-structure; Acetylation; Activator; Alternative splicing; KW Anti-oncogene; Apoptosis; Cell cycle; Disease mutation; DNA-binding; KW Glycoprotein; Li-Fraumeni syndrome; Metal-binding; Nuclear protein; KW Phosphorylation; Polymorphism; Transcription; KW Transcription regulation; Zinc. FT DNA_BIND 102 292 FT REGION 1 83 Interaction with HRMT1L2. FT REGION 1 44 Transcription activation (acidic). FT REGION 100 370 Interaction with HIPK1 (By similarity). FT REGION 116 292 Interaction with AXIN1 (By similarity). FT REGION 241 248 Interacts with the 53BP2 SH3 domain. FT REGION 300 393 Interaction with CARM1. FT REGION 319 360 Interaction with HIPK2. FT REGION 325 356 Oligomerization. FT REGION 368 387 Basic (repression of DNA-binding). FT MOTIF 305 321 Bipartite nuclear localization signal. FT MOTIF 339 350 Nuclear export signal. FT METAL 176 176 Zinc. FT METAL 179 179 Zinc. FT METAL 238 238 Zinc. FT METAL 242 242 Zinc. FT BINDING 392 392 5'-phospho-RNA (covalent). FT MOD_RES 15 15 Phosphoserine (by PRPK). FT MOD_RES 18 18 Phosphothreonine (by VRK1). FT MOD_RES 46 46 Phosphoserine (by HIPK2). FT MOD_RES 55 55 Phosphothreonine (by TAF1). FT MOD_RES 305 305 N6-acetyllysine. FT MOD_RES 315 315 Phosphoserine (by CDC2). FT MOD_RES 373 373 N6-acetyllysine. FT MOD_RES 382 382 N6-acetyllysine. FT VARSPLIC 332 341 IRGRERFEMF -> DGTSFQKENC (in isoform 2). FT /FTId=VSP_006535. FT VARSPLIC 342 393 Missing (in isoform 2). FT /FTId=VSP_006536. FT VARIANT 7 7 D -> H (in a skin tumor). FT /FTId=VAR_005851. FT VARIANT 35 35 L -> F (in a liver tumor). FT /FTId=VAR_005852. FT VARIANT 43 43 L -> S (in a renal tumor). FT /FTId=VAR_005853. FT VARIANT 47 47 P -> S (in dbSNP:1800371). FT /FTId=VAR_014632. FT VARIANT 53 53 W -> C (in a leukemia and a lymphoma). FT /FTId=VAR_005854. FT VARIANT 60 60 P -> S (in a leukemia and a lymphoma). FT /FTId=VAR_005855. FT VARIANT 72 72 P -> R (in dbSNP:1042522). FT /FTId=VAR_005856. FT VARIANT 79 79 A -> T (in clone P53-H-1). FT /FTId=VAR_005857. FT VARIANT 87 87 P -> Q (in a brain tumor). FT /FTId=VAR_005858. FT VARIANT 94 94 S -> T (in a colon tumor). FT /FTId=VAR_005859. FT VARIANT 110 110 R -> C (in a liver and an uterus tumor). FT /FTId=VAR_005860. FT VARIANT 110 110 R -> L (in a liver tumor). FT /FTId=VAR_005861. FT VARIANT 110 110 R -> P (in a breast tumor). FT /FTId=VAR_005862. FT VARIANT 113 113 F -> C (in a lung tumor). FT /FTId=VAR_005863. FT VARIANT 125 125 T -> M (in a lung tumor). FT /FTId=VAR_005864. FT VARIANT 126 126 Y -> D (in a colorectal tumor). FT /FTId=VAR_005865. FT VARIANT 126 126 Y -> N (in a leukemia and a lymphoma). FT /FTId=VAR_005866. FT VARIANT 127 127 S -> F (in a lung tumor). FT /FTId=VAR_005867. FT VARIANT 128 128 P -> S (in a breast tumor). FT /FTId=VAR_005868. FT VARIANT 129 129 A -> D (in a sarcoma). FT /FTId=VAR_005869. FT VARIANT 130 130 L -> R (in a liver tumor). FT /FTId=VAR_005870. FT VARIANT 131 131 N -> K (in a colon tumor). FT /FTId=VAR_005872. FT VARIANT 131 131 N -> S (in a liver tumor). FT /FTId=VAR_005871. FT VARIANT 132 132 K -> M (in a sarcoma). FT /FTId=VAR_005873. FT VARIANT 132 132 K -> Q (in a breast tumor). FT /FTId=VAR_005874. FT VARIANT 133 133 M -> T (in LFS). FT /FTId=VAR_005875. FT VARIANT 135 135 C -> F (in a colon tumor). FT /FTId=VAR_005877. FT VARIANT 135 135 C -> S (in a colon tumor). FT /FTId=VAR_005876. FT VARIANT 136 136 Q -> E (in a breast tumor). FT /FTId=VAR_005878. FT VARIANT 136 136 Q -> K (in a colon tumor). FT /FTId=VAR_005879. FT VARIANT 137 137 L -> Q (in a liver tumor). FT /FTId=VAR_005880. FT VARIANT 138 138 A -> P (in a lung tumor). FT /FTId=VAR_005881. FT VARIANT 139 139 K -> N (in a breast, an ovary tumor, a FT leukemia and a lymphoma). FT /FTId=VAR_005882. FT VARIANT 140 140 T -> Y (in a leukemia and a lymphoma). FT /FTId=VAR_005883. FT VARIANT 141 141 C -> F (in a breast tumor). FT /FTId=VAR_005885. FT VARIANT 141 141 C -> G (in an ovary tumor). FT /FTId=VAR_005884. FT VARIANT 141 141 C -> Y (in many types of tumors). FT /FTId=VAR_005886. FT VARIANT 143 143 V -> A (in a colon tumor; strong DNA FT binding ability at 32.5 degrees Celsius; FT strong reduction of transcriptional FT activity at 37.5 degrees Celsius). FT /FTId=VAR_005887. FT VARIANT 144 144 Q -> P (in a leukemia and a lymphoma). FT /FTId=VAR_005888. FT VARIANT 145 145 L -> P (in a brain tumor). FT /FTId=VAR_005889. FT VARIANT 145 145 L -> Q (in an esophagus tumor). FT /FTId=VAR_005890. FT VARIANT 147 147 V -> D (in an ovary tumor). FT /FTId=VAR_005891. FT VARIANT 147 147 V -> G (in a prostate tumor). FT /FTId=VAR_005892. FT VARIANT 149 149 S -> P (in a breast tumor). FT /FTId=VAR_005893. FT VARIANT 151 151 P -> A (in a brain and a colon tumor). FT /FTId=VAR_005894. FT VARIANT 151 151 P -> S (in many types of tumors). FT /FTId=VAR_005895. FT VARIANT 151 151 P -> T (in a breast tumor). FT /FTId=VAR_005896. FT VARIANT 152 152 P -> L (in an esophagus tumor). FT /FTId=VAR_005897. FT VARIANT 152 152 P -> S (in oral squamous cell carcinoma). FT /FTId=VAR_005898. FT VARIANT 153 153 P -> T (in a colon tumor). FT /FTId=VAR_005899. FT VARIANT 154 154 G -> V (in esophagus tumor). FT /FTId=VAR_005900. FT VARIANT 155 155 T -> A (in an esophagus tumor). FT /FTId=VAR_005901. FT VARIANT 156 156 R -> P (in an osteosarcoma cell line). FT /FTId=VAR_005902. FT VARIANT 157 157 V -> D (in a liver tumor). FT /FTId=VAR_005903. FT VARIANT 157 157 V -> I (in colorectal carcinoma; from a FT patient with Turcot syndrome). FT /FTId=VAR_012977. FT VARIANT 157 157 V -> S (in a S. African hepatocellular FT carcinoma). FT /FTId=VAR_005904. FT VARIANT 158 158 R -> C (in a noninvasive head and neck FT tumor). FT /FTId=VAR_005905. FT VARIANT 158 158 R -> G (in a brain and a lung tumor). FT /FTId=VAR_005906. FT VARIANT 158 158 R -> H (in many types of tumors). FT /FTId=VAR_005907. FT VARIANT 160 160 M -> I (in a lung and a skin tumor). FT /FTId=VAR_005908. FT VARIANT 161 161 A -> S (in a brain tumor). FT /FTId=VAR_005909. FT VARIANT 162 162 I -> S (in a brain tumor). FT /FTId=VAR_005910. FT VARIANT 162 162 I -> V (in an ovary tumor). FT /FTId=VAR_005911. FT VARIANT 163 163 Y -> H (in HNSCC). FT /FTId=VAR_005912. FT VARIANT 164 164 K -> N (in a lung tumor). FT /FTId=VAR_005913. FT VARIANT 164 164 K -> Q (in a breast tumor). FT /FTId=VAR_005914. FT VARIANT 165 165 Q -> L (in a breast tumor). FT /FTId=VAR_005915. FT VARIANT 165 165 Q -> R (in an ovary tumor). FT /FTId=VAR_005916. FT VARIANT 166 166 S -> L (in a lung tumor). FT /FTId=VAR_005917. FT VARIANT 168 168 H -> R (in a brain tumor). FT /FTId=VAR_005918. FT VARIANT 169 169 M -> I (in oral squamous cell carcinoma). FT /FTId=VAR_005919. FT VARIANT 169 169 M -> T (in a noninvasive head and neck FT tumor). FT /FTId=VAR_005920. FT VARIANT 170 170 T -> M (in a colon tumor). FT /FTId=VAR_005921. FT VARIANT 170 170 T -> S (in a colon tumor). FT /FTId=VAR_005922. FT VARIANT 172 172 V -> A (in a prostate tumor). FT /FTId=VAR_005923. FT VARIANT 173 173 V -> E (in a colon tumor). FT /FTId=VAR_005924. FT VARIANT 173 173 V -> L (in a cervical carcinoma). FT /FTId=VAR_005925. FT VARIANT 173 173 V -> M (in a colon tumor). FT /FTId=VAR_005926. FT VARIANT 174 174 R -> H (in the cell line Detroit 562 of FT squamous cell carcinoma). FT /FTId=VAR_005927. FT VARIANT 175 175 R -> C (in a colon and an uterus tumor). FT /FTId=VAR_005928. FT VARIANT 175 175 R -> G (in a brain tumor). FT /FTId=VAR_005929. FT VARIANT 175 175 R -> H (in LFS and colon/esophagus/ FT gastric tumors). FT /FTId=VAR_005932. FT VARIANT 175 175 R -> L (in a breast and a colon tumor). FT /FTId=VAR_005930. FT VARIANT 175 175 R -> P (in a cervical carcinoma). FT /FTId=VAR_005931. FT VARIANT 176 176 C -> F (in esophagus tumors and many FT types of tumors). FT /FTId=VAR_005933. FT VARIANT 176 176 C -> W (in a lung tumor). FT /FTId=VAR_005934. FT VARIANT 177 177 P -> L (in a skin tumor). FT /FTId=VAR_005935. FT VARIANT 178 178 H -> HPHP (in a Burkitt's lymphoma). FT /FTId=VAR_005936. FT VARIANT 181 181 R -> L (in a cervical carcinoma). FT /FTId=VAR_005937. FT VARIANT 182 182 C -> S (in a stomach tumor). FT /FTId=VAR_005938. FT VARIANT 184 184 D -> Y (in a leukemia and a lymphoma). FT /FTId=VAR_005939. FT VARIANT 186 186 D -> Y (in a breast tumor). FT /FTId=VAR_005940. FT VARIANT 187 187 G -> C (in a breast tumor). FT /FTId=VAR_005941. FT VARIANT 187 187 G -> S (in a leukemia and a lymphoma). FT /FTId=VAR_005942. FT VARIANT 189 189 A -> P (in an ovary tumor). FT /FTId=VAR_005943. FT VARIANT 190 190 P -> L (in a colorectal tumor). FT /FTId=VAR_005944. FT VARIANT 191 191 P -> T (in a colon tumor). FT /FTId=VAR_005945. FT VARIANT 192 192 Q -> R (in a colon tumor). FT /FTId=VAR_005946. FT VARIANT 193 193 H -> D (in an uterus tumor). FT /FTId=VAR_005947. FT VARIANT 193 193 H -> R (in LFS). FT /FTId=VAR_005948. FT VARIANT 194 194 L -> P (in a colon tumor). FT /FTId=VAR_005949. FT VARIANT 194 194 L -> R (in the cell line HU 281 of FT squamous cell carcinoma). FT /FTId=VAR_005950. FT VARIANT 195 195 I -> T (in oral squamous cell carcinoma). FT /FTId=VAR_005951. FT VARIANT 198 198 E -> K (in HNSCC). FT /FTId=VAR_005952. FT VARIANT 205 205 Y -> C (in oral squamous cell carcinoma). FT /FTId=VAR_005953. FT VARIANT 205 205 Y -> D (in HNSCC). FT /FTId=VAR_005954. FT VARIANT 213 213 R -> Q (in a Burkitt's lymphoma and a FT colorectal tumor). FT /FTId=VAR_005955. FT VARIANT 216 216 V -> M (in HNSCC). FT /FTId=VAR_005956. FT VARIANT 220 220 Y -> C (in oral squamous cell carcinoma). FT /FTId=VAR_005957. FT VARIANT 220 220 Y -> H (in a colon tumor). FT /FTId=VAR_005958. FT VARIANT 220 220 Y -> S (in HNSCC). FT /FTId=VAR_005959. FT VARIANT 228 228 D -> E (in HNSCC). FT /FTId=VAR_005960. FT VARIANT 230 230 T -> I (in oral squamous cell carcinoma). FT /FTId=VAR_005961. FT VARIANT 232 232 I -> T (in an anal tumor). FT /FTId=VAR_005962. FT VARIANT 234 234 Y -> C (in HNSCC). FT /FTId=VAR_005963. FT VARIANT 234 234 Y -> H (in a Burkitt's lymphoma). FT /FTId=VAR_005964. FT VARIANT 237 237 M -> I (in a colon tumor). FT /FTId=VAR_005965. FT VARIANT 238 238 C -> F (in an anal tumor). FT /FTId=VAR_005966. FT VARIANT 238 238 C -> Y (in a colorectal tumor). FT /FTId=VAR_005967. FT VARIANT 240 240 S -> I (in an anal tumor). FT /FTId=VAR_005968. FT VARIANT 241 241 S -> F (in a colon tumor). FT /FTId=VAR_005969. FT VARIANT 242 242 C -> F (in a skin tumor). FT /FTId=VAR_005970. FT VARIANT 245 245 G -> A (in a renal tumor). FT /FTId=VAR_005971. FT VARIANT 245 245 G -> C (in LFS, colon and larynx tumors). FT /FTId=VAR_005972. FT VARIANT 245 245 G -> D (in LFS and a colon tumor). FT /FTId=VAR_005973. FT VARIANT 245 245 G -> S (in esophageal adenocarcinoma and FT many types of tumors). FT /FTId=VAR_005974. FT VARIANT 245 245 G -> V (in HNSCC). FT /FTId=VAR_005975. FT VARIANT 246 246 M -> R (in a liver tumor). FT /FTId=VAR_005976. FT VARIANT 246 246 M -> T (in a leukemia and a lymphoma). FT /FTId=VAR_005977. FT VARIANT 246 246 M -> V (in many types of tumors). FT /FTId=VAR_005978. FT VARIANT 247 247 N -> I (in a lung tumor). FT /FTId=VAR_005980. FT VARIANT 247 247 N -> W (in a skin tumor). FT /FTId=VAR_005979. FT VARIANT 248 248 R -> G (in an endocrine tumor). FT /FTId=VAR_005981. FT VARIANT 248 248 R -> L (in tumors of hypopharynx, larynx FT and tonsil). FT /FTId=VAR_005982. FT VARIANT 248 248 R -> Q (in LFS and many types of tumors). FT /FTId=VAR_005983. FT VARIANT 248 248 R -> W (in LFS, esophageal adenocarcinoma FT and many types of tumors). FT /FTId=VAR_005984. FT VARIANT 249 249 R -> G (in a breast tumor). FT /FTId=VAR_005985. FT VARIANT 249 249 R -> S (in many types of tumors). FT /FTId=VAR_005986. FT VARIANT 251 251 I -> N (in HNSCC). FT /FTId=VAR_005987. FT VARIANT 252 252 L -> P (in LFS and many types of tumors). FT /FTId=VAR_005988. FT VARIANT 254 254 I -> N (in a breast tumor). FT /FTId=VAR_017908. FT VARIANT 254 254 I -> T (in a colon tumor). FT /FTId=VAR_017909. FT VARIANT 257 257 L -> P (in HNSCC). FT /FTId=VAR_005989. FT VARIANT 258 258 E -> D (in a colorectal tumor). FT /FTId=VAR_005990. FT VARIANT 258 258 E -> K (in LFS). FT /FTId=VAR_005991. FT VARIANT 272 272 V -> L (in LFS). FT /FTId=VAR_005992. FT VARIANT 273 273 R -> C (in LFS, colorectal tumor and oral FT squamous cell carcinoma). FT /FTId=VAR_005993. FT VARIANT 273 273 R -> G (in HNSCC). FT /FTId=VAR_005994. FT VARIANT 273 273 R -> H (in LFS, colon and esophagus FT tumors). FT /FTId=VAR_005995. FT VARIANT 274 274 V -> F (in a colorectal tumor). FT /FTId=VAR_005997. FT VARIANT 275 275 C -> W (in a breast and a stomach tumor). FT /FTId=VAR_005999. FT VARIANT 275 275 C -> Y (in LFS and tumors of brain, lung, FT kidney, stomach). FT /FTId=VAR_005998. FT VARIANT 277 277 C -> G (in a lung tumor). FT /FTId=VAR_006000. FT VARIANT 278 278 P -> A (in a breast tumor). FT /FTId=VAR_006001. FT VARIANT 278 278 P -> H (in a leukemia and a lymphoma). FT /FTId=VAR_006002. FT VARIANT 278 278 P -> L (in an esophagus and a lung FT tumor). FT /FTId=VAR_006003. FT VARIANT 278 278 P -> S (in oral squamous cell carcinoma). FT /FTId=VAR_006004. FT VARIANT 278 278 P -> T (in HNSCC; same patient as FT mutation His-281). FT /FTId=VAR_006005. FT VARIANT 279 279 G -> E (in a colorectal tumor). FT /FTId=VAR_006006. FT VARIANT 280 280 R -> I (in a colorectal tumor). FT /FTId=VAR_006008. FT VARIANT 280 280 R -> K (in a breast tumor). FT /FTId=VAR_006007. FT VARIANT 280 280 R -> T (in nasopharyngeal carcinoma). FT /FTId=VAR_006009. FT VARIANT 281 281 D -> A (in a leukemia and a lymphoma). FT /FTId=VAR_006010. FT VARIANT 281 281 D -> E (in many types of tumors). FT /FTId=VAR_006011. FT VARIANT 281 281 D -> G (in many types of tumors). FT /FTId=VAR_006012. FT VARIANT 281 281 D -> H (in HNSCC; same patient as FT mutation Thr-278). FT /FTId=VAR_006013. FT VARIANT 281 281 D -> V (in a colorectal tumor). FT /FTId=VAR_006014. FT VARIANT 282 282 R -> L (in a breast tumor). FT /FTId=VAR_006015. FT VARIANT 282 282 R -> W (in esophageal adenocarcinoma and FT many types of tumors). FT /FTId=VAR_006016. FT VARIANT 283 283 R -> C (in a colon tumor). FT /FTId=VAR_006017. FT VARIANT 283 283 R -> G (in a lung tumor). FT /FTId=VAR_006018. FT VARIANT 283 283 R -> H (in a colon tumor). FT /FTId=VAR_006019. FT VARIANT 283 283 R -> P (in a breast and a lung tumor). FT /FTId=VAR_006020. FT VARIANT 284 284 T -> A (in a colorectal tumor). FT /FTId=VAR_006021. FT VARIANT 284 284 T -> P (in a lung tumor). FT /FTId=VAR_006022. FT VARIANT 285 285 E -> K (in many types of tumors). FT /FTId=VAR_006023. FT VARIANT 285 285 E -> Q (in an uterus tumor). FT /FTId=VAR_006024. FT VARIANT 285 285 E -> V (in a liver tumor). FT /FTId=VAR_006025. FT VARIANT 286 286 E -> A (in LFS). FT /FTId=VAR_006026. FT VARIANT 286 286 E -> D (in a liver tumor). FT /FTId=VAR_006027. FT VARIANT 286 286 E -> G (in tumors of colon, lung, head FT and neck). FT /FTId=VAR_006028. FT VARIANT 286 286 E -> K (in many types of tumors). FT /FTId=VAR_006029. FT VARIANT 286 286 E -> Q (in esophageal adenocarcinoma). FT /FTId=VAR_006030. FT VARIANT 292 292 K -> I (in LFS). FT /FTId=VAR_015819. FT VARIANT 296 296 H -> P (in HNSCC). FT /FTId=VAR_006031. FT VARIANT 300 300 P -> R (in a skin tumor). FT /FTId=VAR_006032. FT VARIANT 301 301 P -> L (in a colon tumor). FT /FTId=VAR_006033. FT VARIANT 302 302 G -> E (in a colon tumor). FT /FTId=VAR_006034. FT VARIANT 302 302 G -> V (in a colon tumor). FT /FTId=VAR_006035. FT VARIANT 306 306 R -> Q (in a sarcoma). FT /FTId=VAR_006036. FT VARIANT 307 307 A -> T (in a breast tumor). FT /FTId=VAR_006037. FT VARIANT 309 309 P -> S (in a colon tumor). FT /FTId=VAR_006038. FT VARIANT 325 325 G -> V (in LFS). FT /FTId=VAR_006039. FT VARIANT 334 334 G -> V (in a lung tumor). FT /FTId=VAR_006040. FT VARIANT 337 337 R -> C (in LFS; nonclassical form; also FT found in a liver tumor). FT /FTId=VAR_006041. FT VARIANT 339 339 E -> K. FT /FTId=VAR_022316. FT VARIANT 366 366 S -> A. FT /FTId=VAR_022317. FT MUTAGEN 46 46 S->A: Abolishes phosphorylation by HIPK2 FT and acetylation of K-382 by CREBBP. FT MUTAGEN 55 55 T->A: Blocks phosphorylation by TAF1. FT MUTAGEN 135 135 C->Y: Decreased E6-mediated binding to FT E6-AP. FT MUTAGEN 382 382 K->A: Abolishes acetylation by CREBBP. FT CONFLICT 76 76 A -> G (in Ref. 2; AAA59987). FT CONFLICT 155 155 T -> P (in Ref. 16). FT CONFLICT 254 254 I -> D (in Ref. 7; CAA42635). FT CONFLICT 262 262 G -> V (in Ref. 19). FT CONFLICT 282 282 R -> Q (in Ref. 18). FT STRAND 103 103 FT HELIX 105 107 FT TURN 108 108 FT STRAND 110 112 FT TURN 120 121 FT STRAND 124 127 FT TURN 128 131 FT STRAND 132 135 FT TURN 137 138 FT STRAND 141 146 FT TURN 153 154 FT STRAND 156 163 FT TURN 166 170 FT HELIX 177 181 FT TURN 191 192 FT STRAND 195 197 FT TURN 201 202 FT STRAND 204 207 FT TURN 209 211 FT STRAND 214 219 FT TURN 225 226 FT STRAND 230 236 FT TURN 240 241 FT TURN 243 248 FT STRAND 251 258 FT TURN 260 261 FT STRAND 264 274 FT HELIX 278 286 FT HELIX 335 354 FT HELIX 376 378 FT HELIX 379 384 FT TURN 385 386 SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64; MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP DEAPRMPEAA PPVAPAPAAP TPAAPAPAPS WPLSSSVPSQ KTYQGSYGFR LGFLHSGTAK SVTCTYSPAL NKMFCQLAKT CPVQLWVDST PPPGTRVRAM AIYKQSQHMT EVVRRCPHHE RCSDSDGLAP PQHLIRVEGN LRVEYLDDRN TFRHSVVVPY EPPEVGSDCT TIHYNYMCNS SCMGGMNRRP ILTIITLEDS SGNLLGRNSF EVRVCACPGR DRRTEEENLR KKGEPHHELP PGSTKRALPN NTSSSPQPKK KPLDGEYFTL QIRGRERFEM FRELNEALEL KDAQAGKEPG GSRAHSSHLK SKKGQSTSRH KKLMFKTEGP DSD // bio-1.4.3.0001/test/data/fasta/0000755000004100000410000000000012200110570015651 5ustar www-datawww-databio-1.4.3.0001/test/data/fasta/EFTU_BACSU.fasta0000644000004100000410000000067712200110570020363 0ustar www-datawww-data>sp|P33166|EFTU_BACSU Elongation factor Tu; MAKEKFDRSKSHANIGTIGHVDHGKTTLTAAITTVLHKKSGKGTAMAYDQIDGAPEERER GITISTAHVEYETETRHYAHVDCPGHADYVKNMITGAAQMDGAILVVSAADGPMPQTREH ILLSKNVGVPYIVVFLNKCDMVDDEELLELVEMEVRDLLSEYDFPGDDVPVVKGSALKAL EGDAEWEAKIFELMDAVDEYIPTPERDTEKPFMMPVEDVFSITGRGTVATGRVERGQVKV GDEVEIIGLQEENKKTTVTGVEMFRKLLDYAEAGDNIGALLRGVSREEIQRGQVLAKPGT ITPHSKFKAEVYVLSKEEGGRHTPFFSNYRPQFYFRTTDVTGIIHLPEGVEMVMPGDNTE MNVELISTIAIEEGTRFSIREGGRTVGSGVVSTITE bio-1.4.3.0001/test/data/fasta/example1.txt0000644000004100000410000001124112200110570020125 0ustar www-datawww-data>At1g02580 mRNA (2291 bp) UTR's and CDS aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg tttatgttgtt >At1g65300: mRNA 837bp atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta g >At1g65300: mRNA 837bp (shortened at end) atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta tgatttttatgatcag >At1g65300: mRNA 837bp (shortened from start) ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta g >At1g02580 - shortened for test - inserted cutpoint gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca bio-1.4.3.0001/test/data/fasta/example2.txt0000644000004100000410000000254712200110570020137 0ustar www-datawww-data>At1g11545.1 68414.m01326 xyloglucan:xyloglucosyl transferase, putative / xyloglucan endotransglycosylase, putative / endo-xyloglucan transferase, putative similar to endo-xyloglucan transferase GI:2244732 from [Gossypium hirsutum] actcacggaacaagtgtagattgcattacctctctctctctctctcttcgaaatattcga agtagagacaaccaATGGAGACGGAAAGGAGGATCATAACGAGCTGTTCTGCCATGACGG CTCTGTTCTTGTTCATGACGGCTCTAATGGCGTCGTCCTCTATCGCAGCAACACCGACAC AATCGTTTGAAGATAATTTCAACATTATGTGGTCTGAAAATCACTTCACGACTTCCGATG ATGGAGAGATCTGGAATCTTTCCTTAGATAACGACACCGGATGTGGATTTCAGACAAAGC ACATGTATAGATTCGGATGGTTTAGTATGAAGCTAAAGCTCGTCGGAGGCGACTCCGCCG GCGTCGTCACCGCTTACTACATGTGTTCGGAGAATGGGGCAGGACCGGAGAGAGACGAGA TAGATTTCGAATTTCTAGGGAACCGAACCGGACAGCCTTACATTATTCAGACCAATGTGT ATAAGAACGGAACCGGGAATCGGGAGATGCGACATTCCCTCTGGTTCGACCCGACCAAGG ATTATCACACCTACTCAATTCTTTGGAATAACCACCAGCTTGTGTTCTTCGTGGATAGGG TACCAATTCGAGTATACAAGAACAGTGATAAGGTACCAAACAACGACTTCTTCCCGAACC AGAAGCCGATGTACTTGTTCTCCAGCATTTGGAACGCTGACGATTGGGCTACACGTGGTG GTCTGGAGAAGACTGACTGGAAAAAAGCTCCATTCGTCTCTTCTTACAAGGACTTCGCCG TCGAAGGCTGCCGTTGGAAGGATCCATTCCCTGCATGCGTCTCTACCACAACAGAGAATT GGTGGGATCAGTACGACGCGTGGCATTTGTCCAAGACACAGAAGATGGATTATGCGTGGG TGCAGCGTAATCTCGTCGTATACGATTATTGCAAAGACAGTGAGAGGTTCCCTACTCTTC CTTGGGAGTGTTCCATTAGCCCTTGGGCTTAAaatcaattttgttttgagtgtattaaag tggaaatggtttatgtaataattttactctcttttttttggcatttcttattttgttatg gactatatcctctgtttatttatttaattaattatttatttagtcggctat bio-1.4.3.0001/test/data/blast/0000755000004100000410000000000012200110570015660 5ustar www-datawww-databio-1.4.3.0001/test/data/blast/b0002.faa.m00000644000004100000410000001222612200110570017372 0ustar www-datawww-dataBLASTP 2.2.10 [Oct-19-2004] Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. Query= eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A) (820 letters) Database: b0002.faa 1 sequences; 820 total letters Searching.done Score E Sequences producing significant alignments: (bits) Value eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokin... 1567 0.0 >eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A) Length = 820 Score = 1567 bits (4058), Expect = 0.0 Identities = 806/820 (98%), Positives = 806/820 (98%) Query: 1 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA Sbjct: 1 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60 Query: 61 LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120 LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA Sbjct: 61 LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120 Query: 121 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP 180 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP Sbjct: 121 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP 180 Query: 181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV Sbjct: 181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240 Query: 241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD Sbjct: 241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300 Query: 301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF Sbjct: 301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360 Query: 361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 420 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL Sbjct: 361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 420 Query: 421 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQXXXXXXXXXXXXXXAL 480 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQ AL Sbjct: 421 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL 480 Query: 481 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL 540 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL Sbjct: 481 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL 540 Query: 541 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR 600 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR Sbjct: 541 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR 600 Query: 601 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA 660 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA Sbjct: 601 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA 660 Query: 661 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA 720 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA Sbjct: 661 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA 720 Query: 721 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF 780 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF Sbjct: 721 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF 780 Query: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV Sbjct: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820 Database: b0002.faa Posted date: Aug 7, 2005 7:29 AM Number of letters in database: 820 Number of sequences in database: 1 Lambda K H 0.319 0.134 0.383 Gapped Lambda K H 0.267 0.0410 0.140 Matrix: BLOSUM62 Gap Penalties: Existence: 11, Extension: 1 Number of Hits to DB: 1986 Number of Sequences: 1 Number of extensions: 52 Number of successful extensions: 8 Number of sequences better than 10.0: 1 Number of HSP's better than 10.0 without gapping: 1 Number of HSP's successfully gapped in prelim test: 0 Number of HSP's that attempted gapping in prelim test: 0 Number of HSP's gapped (non-prelim): 1 length of query: 820 length of database: 820 effective HSP length: 42 effective length of query: 778 effective length of database: 778 effective search space: 605284 effective search space used: 605284 T: 11 A: 40 X1: 16 ( 7.4 bits) X2: 38 (14.6 bits) X3: 64 (24.7 bits) S1: 30 (16.7 bits) S2: 30 (16.2 bits) bio-1.4.3.0001/test/data/blast/blastp-multi.m70000644000004100000410000002601012200110570020541 0ustar www-datawww-data blastp blastp 2.2.18 [Mar-02-2008] ~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402. BA000007.faa lcl|1_0 gi|1790845|gb|AAC77338.1| predicted DNA-binding transcriptional regulator [Escherichia coli str. K-12 substr. MG1655] 443 BLOSUM62 0.001 11 1 F 2 lcl|2_0 gi|1790846|gb|AAC77339.1| lipoate-protein ligase A [Escherichia coli str. K-12 346 1 gi|13364823|dbj|BAB38768.1| putative lipoate-protein ligase A [Escherichia coli O157:H7 str. Sakai] BAB38768 562 1 697.197 1798 0 9 346 225 562 1 1 331 335 338 MSTLRLLISDSYDPWFNLAVEECIFRQMPATQRVLFLWRNADTVVIGRAQNPWKECNTRRMEEDNVRLARRSSGGGAVFHDLGNTCFTFMAGKPEYDKTISTSIVLNALNALGVSAEASGRNDLVVKTVEGDRKVSGSAYRETKDRGFHHGTLLLNADLSRLANYLNPDKKKLAAKGITSVRSRVTNLTELLPGITHEQVCEAITEAFFAHYGERVEAEIISPNKTPDLPNFAETFARQSSWEWNFGQAPAFSHLLDERFTWGGVELHFDVEKGHITRAQVFTDSLNPAPLEALAGRLQGCLYRADMLQQECEALLVDFPEQEKELRELSAWMAGAVR MSTLRLLISDSYDPWFNLAVEECIFRQMPATQRVLFLWRNADTVVIGRAQNPWKECNTRRMEEDNVRLARRSSGGGAVFHDLGNTCFTFMAGKPEYDKTISTSIVLNALNALGVSAEASGRNDLVVKTAEGDRKVSGSAYRETKDRGFHHGTLLLNADLSRLANYLNPDKKKLAAKGITSVRSRVTNLTELLPGIPHEQVCEAITEAFFAHYGERVEAEIISPDKTPDLPNFAETFARQSSWEWNFGQAPAFSHLLDERFSWGGVELHFDVEKGHITRAQVFTDSLNPAPLEALAGRLQGCLYRADMLQQECEALLVDFPDQEKELRELSTWIAGAVR MSTLRLLISDSYDPWFNLAVEECIFRQMPATQRVLFLWRNADTVVIGRAQNPWKECNTRRMEEDNVRLARRSSGGGAVFHDLGNTCFTFMAGKPEYDKTISTSIVLNALNALGVSAEASGRNDLVVKT EGDRKVSGSAYRETKDRGFHHGTLLLNADLSRLANYLNPDKKKLAAKGITSVRSRVTNLTELLPGI HEQVCEAITEAFFAHYGERVEAEIISP+KTPDLPNFAETFARQSSWEWNFGQAPAFSHLLDERF+WGGVELHFDVEKGHITRAQVFTDSLNPAPLEALAGRLQGCLYRADMLQQECEALLVDFP+QEKELRELS W+AGAVR 5361 1609188 0 0 0.041 0.267 0.14 3 lcl|3_0 gi|1790847|gb|AAC77340.1| conserved protein [Escherichia coli str. K-12 substr. MG1655] 214 1 gi|13364823|dbj|BAB38768.1| putative lipoate-protein ligase A [Escherichia coli O157:H7 str. Sakai] BAB38768 562 1 432.95 1112 7.66702e-123 1 214 1 214 1 1 214 214 214 MARTKLKFRLHRAVIVLFCLALLVALMQGASWFSQNHQRQRNPQLEELARTLARQVTLNVAPLMRTDSPDEKRIQAILDQLTDESRILDAGVYDEQGDLIARSGESVEVRDRLALDGKKAGGYFNQQIVEPIAGKNGPLGYLRLTLDTHTLATEAQQVDNTTNILRLMLLLSLAIGVVLTRTLLQGKRTRWQQSPFLLTASKPVPEEEESEKKE MARTKLKFRLHRAVIVLFCLALLVALMQGASWFSQNHQRQRNPQLEELARTLARQVTLNVAPLMRTDSPDEKRIQAILDQLTDESRILDAGVYDEQGDLIARSGESVEVRDRLALDGKKAGGYFNQQIVEPIAGKNGPLGYLRLTLDTHTLATEAQQVDNTTNILRLMLLLSLAIGVVLTRTLLQGKRTRWQQSPFLLTASKPVPEEEESEKKE MARTKLKFRLHRAVIVLFCLALLVALMQGASWFSQNHQRQRNPQLEELARTLARQVTLNVAPLMRTDSPDEKRIQAILDQLTDESRILDAGVYDEQGDLIARSGESVEVRDRLALDGKKAGGYFNQQIVEPIAGKNGPLGYLRLTLDTHTLATEAQQVDNTTNILRLMLLLSLAIGVVLTRTLLQGKRTRWQQSPFLLTASKPVPEEEESEKKE 5361 1609188 0 0 0.041 0.267 0.14 5 lcl|5_0 gi|1790849|gb|AAC77341.1| 3-phosphoserine phosphatase [Escherichia coli str. K-12 substr. MG1655] 322 1 gi|13364824|dbj|BAB38769.1| 3-phosphoserine phosphatase [Escherichia coli O157:H7 str. Sakai] BAB38769 322 1 657.907 1696 0 1 322 1 322 1 1 322 322 322 MPNITWCDLPEDVSLWPGLPLSLSGDEVMPLDYHAGRSGWLLYGRGLDKQRLTQYQSKLGAAMVIVAAWCVEDYQVIRLAGSLTARATRLAHEAQLDVAPLGKIPHLRTPGLLVMDMDSTAIQIECIDEIAKLAGTGEMVAEVTERAMRGELDFTASLRSRVATLKGADANILQQVRENLPLMPGLTQLVLKLETLGWKVAIASGGFTFFAEYLRDKLRLTAVVANELEIMDGKFTGNVIGDIVDAQYKAKTLTRLAQEYEIPLAQTVAIGDGANDLPMIKAAGLGIAYHAKPKVNEKAEVTIRHADLMGVFCILSGSLNQK MPNITWCDLPEDVSLWPGLPLSLSGDEVMPLDYHAGRSGWLLYGRGLDKQRLTQYQSKLGAAMVIVAAWCVEDYQVIRLAGSLTARATRLAHEAQLDVAPLGKIPHLRTPGLLVMDMDSTAIQIECIDEIAKLAGTGEMVAEVTERAMRGELDFTASLRSRVATLKGADANILQQVRENLPLMPGLTQLVLKLETLGWKVAIASGGFTFFAEYLRDKLRLTAVVANELEIMDGKFTGNVIGDIVDAQYKAKTLTRLAQEYEIPLAQTVAIGDGANDLPMIKAAGLGIAYHAKPKVNEKAEVTIRHADLMGVFCILSGSLNQK MPNITWCDLPEDVSLWPGLPLSLSGDEVMPLDYHAGRSGWLLYGRGLDKQRLTQYQSKLGAAMVIVAAWCVEDYQVIRLAGSLTARATRLAHEAQLDVAPLGKIPHLRTPGLLVMDMDSTAIQIECIDEIAKLAGTGEMVAEVTERAMRGELDFTASLRSRVATLKGADANILQQVRENLPLMPGLTQLVLKLETLGWKVAIASGGFTFFAEYLRDKLRLTAVVANELEIMDGKFTGNVIGDIVDAQYKAKTLTRLAQEYEIPLAQTVAIGDGANDLPMIKAAGLGIAYHAKPKVNEKAEVTIRHADLMGVFCILSGSLNQK 2 gi|13363792|dbj|BAB37741.1| zinc-transporting ATPase [Escherichia coli O157:H7 str. Sakai] BAB37741 732 1 38.1206 87 0.000899657 190 311 569 668 1 1 39 56 24 123 VLKLETLGWKVAIASGGFTFFAEYLRDKLRLTAVVANELEIMDGKFTGNVIGDIVDAQYKAKTLTRLAQEYEIPLAQTVAIGDGANDLPMIKAAGLGIAYHAKPKVN-EKAEVTIRHADLMGV ISELNALGVKGVILTG----------DNPRAAAAIAGELGL---EFKAGLL-----PEDKVKAVTELNQHA--PLAM---VGDGINDAPAMKAAAIGIAMGSGTDVALETADAALTHNHLRGL + +L LG K I +G D R A +A EL + +F ++ + K K +T L Q PLA +GDG ND P +KAA +GIA + V E A+ + H L G+ 5361 1609188 0 0 0.041 0.267 0.14 bio-1.4.3.0001/test/data/blast/2.2.15.blastp.m70000644000004100000410000020625412200110570020147 0ustar www-datawww-data blastp blastp 2.2.15 [Oct-15-2006] ~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402. p53_barbu.fasta lcl|1_0 P53_HUMAN P04637 Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein p53) (Antigen NY-CO-13). 393 BLOSUM62 10 11 1 F 1 lcl|1_0 P53_HUMAN P04637 Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein p53) (Antigen NY-CO-13). 393 1 gnl|BL_ORD_ID|13 P53_HUMAN P04637 Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein p53) (Antigen NY-CO-13). 13 393 1 755.362 1949 0 1 393 1 393 1 1 366 366 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDE SWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD 2 gnl|BL_ORD_ID|6 P53_CERAE P13481 Cellular tumor antigen p53 (Tumor suppressor p53). 6 393 1 730.324 1884 0 1 393 1 393 1 1 353 357 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDPSIEPPLSQETFSDLWKLLPENNVLSPLPSQAVDDLMLSPDDLAQWLTEDPGPDEAPRMSEAAPHMAPTPAAPTPAAPAPAPSWPLSSSVPSQKTYHGSYGFRLGFLHSGTAKSVTCTYSPDLNKMFCQLAKTCPVQLWVDSTPPPGSRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYSDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKGEPCHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPAGSRAHSSHLKSKKGQSTSRHKKFMFKTEGPDSD MEEPQSDPS+EPPLSQETFSDLWKLLPENNVLSPLPSQA+DDLMLSPDD+ QW TEDPGPDE SWPLSSSVPSQKTY GSYGFRLGFLHSGTAKSVTCTYSP LNKMFCQLAKTCPVQLWVDSTPPPG+RVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEY DDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKKGEP HELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEP GSRAHSSHLKSKKGQSTSRHKK MFKTEGPDSD 3 gnl|BL_ORD_ID|17 P53_MACMU P56424 Cellular tumor antigen p53 (Tumor suppressor p53). 17 393 1 729.169 1881 0 1 393 1 393 1 1 352 357 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDPSIEPPLSQETFSDLWKLLPENNVLSPLPSQAVDDLMLSPDDLAQWLTEDPGPDEAPRMSEAAPPMAPTPAAPTPAAPAPAPSWPLSSSVPSQKTYHGSYGFRLGFLHSGTAKSVTCTYSPDLNKMFCQLAKTCPVQLWVDSTPPPGSRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYSDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKGEPCHQLPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPAGSRAHSSHLKSKKGQSTSRHKKFMFKTEGPDSD MEEPQSDPS+EPPLSQETFSDLWKLLPENNVLSPLPSQA+DDLMLSPDD+ QW TEDPGPDE SWPLSSSVPSQKTY GSYGFRLGFLHSGTAKSVTCTYSP LNKMFCQLAKTCPVQLWVDSTPPPG+RVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEY DDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKKGEP H+LPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEP GSRAHSSHLKSKKGQSTSRHKK MFKTEGPDSD 4 gnl|BL_ORD_ID|16 P53_MACFU P61260 Cellular tumor antigen p53 (Tumor suppressor p53). 16 393 1 729.169 1881 0 1 393 1 393 1 1 352 357 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDPSIEPPLSQETFSDLWKLLPENNVLSPLPSQAVDDLMLSPDDLAQWLTEDPGPDEAPRMSEAAPPMAPTPAAPTPAAPAPAPSWPLSSSVPSQKTYHGSYGFRLGFLHSGTAKSVTCTYSPDLNKMFCQLAKTCPVQLWVDSTPPPGSRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYSDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKGEPCHQLPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPAGSRAHSSHLKSKKGQSTSRHKKFMFKTEGPDSD MEEPQSDPS+EPPLSQETFSDLWKLLPENNVLSPLPSQA+DDLMLSPDD+ QW TEDPGPDE SWPLSSSVPSQKTY GSYGFRLGFLHSGTAKSVTCTYSP LNKMFCQLAKTCPVQLWVDSTPPPG+RVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEY DDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKKGEP H+LPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEP GSRAHSSHLKSKKGQSTSRHKK MFKTEGPDSD 5 gnl|BL_ORD_ID|15 P53_MACFA P56423 Cellular tumor antigen p53 (Tumor suppressor p53). 15 393 1 729.169 1881 0 1 393 1 393 1 1 352 357 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDPSIEPPLSQETFSDLWKLLPENNVLSPLPSQAVDDLMLSPDDLAQWLTEDPGPDEAPRMSEAAPPMAPTPAAPTPAAPAPAPSWPLSSSVPSQKTYHGSYGFRLGFLHSGTAKSVTCTYSPDLNKMFCQLAKTCPVQLWVDSTPPPGSRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYSDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKGEPCHQLPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPAGSRAHSSHLKSKKGQSTSRHKKFMFKTEGPDSD MEEPQSDPS+EPPLSQETFSDLWKLLPENNVLSPLPSQA+DDLMLSPDD+ QW TEDPGPDE SWPLSSSVPSQKTY GSYGFRLGFLHSGTAKSVTCTYSP LNKMFCQLAKTCPVQLWVDSTPPPG+RVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEY DDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKKGEP H+LPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEP GSRAHSSHLKSKKGQSTSRHKK MFKTEGPDSD 6 gnl|BL_ORD_ID|18 P53_MARMO O36006 Cellular tumor antigen p53 (Tumor suppressor p53). 18 391 1 659.448 1700 0 1 393 1 391 1 1 323 338 2 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEAQSDLSIEPPLSQETFSDLWNLLPENNVLSPVLSPPMDDLLLSSEDVENWF--DKGPDEALQMSAAPAPKAPTPAASTLAAPSPATSWPLSSSVPSQNTYPGVYGFRLGFLHSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKKSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYEPPEVGSECTTIHYNYMCNSSCMGGMNRRPILTIITLEGSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKRGEPCPEPPPRSTKRALPNGTSSSPQPKKKPLDGEYFTLKIRGRARFEMFQELNEALELKDAQAEKEPGESRPHPSYLKSKKGQSTSRHKKIIFKREGPDSD MEE QSD S+EPPLSQETFSDLW LLPENNVLSP+ S MDDL+LS +D+E WF D GPDE SWPLSSSVPSQ TY G YGFRLGFLHSGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWVDSTPPPGTRVRAMAIYK+SQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYEPPEVGS+CTTIHYNYMCNSSCMGGMNRRPILTIITLE SSGNLLGRNSFEVRVCACPGRDRRTEEEN RK+GEP E PP STKRALPN TSSSPQPKKKPLDGEYFTL+IRGR RFEMF+ELNEALELKDAQA KEPG SR H S+LKSKKGQSTSRHKK++FK EGPDSD 7 gnl|BL_ORD_ID|25 P53_RABIT Q95330 Cellular tumor antigen p53 (Tumor suppressor p53). 25 391 1 648.277 1671 0 1 393 1 391 1 1 322 338 4 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTS-SSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQSDLSLEPPLSQETFSDLWKLLPENNLLTTSLNPPVDDL-LSAEDVANWLNEDP--EEGLRVPAAPAPEAPAPAAPALAAPAPATSWPLSSSVPSQKTYHGNYGFRLGFLHSGTAKSVTCTYSPCLNKLFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKKSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKGEPCPELPPGSSKRALPTTTTDSSPQTKKKPLDGEYFILKIRGRERFEMFRELNEALELKDAQAEKEPGGSRAHSSYLKAKKGQSTSRHKKPMFKREGPDSD MEE QSD S+EPPLSQETFSDLWKLLPENN+L+ + +DDL LS +D+ W EDP +E SWPLSSSVPSQKTY G+YGFRLGFLHSGTAKSVTCTYSP LNK+FCQLAKTCPVQLWVDSTPPPGTRVRAMAIYK+SQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKKGEP ELPPGS+KRALP T+ SSPQ KKKPLDGEYF L+IRGRERFEMFRELNEALELKDAQA KEPGGSRAHSS+LK+KKGQSTSRHKK MFK EGPDSD 8 gnl|BL_ORD_ID|9 P53_DELLE Q8SPZ3 Cellular tumor antigen p53 (Tumor suppressor p53). 9 387 1 635.95 1639 0 1 393 1 387 1 1 318 332 8 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQAELGVEPPLSQETFSDLWKLLPENNLLSSELSPAVDDLLLSPEDVANWL--DERPDEAPQMPEPPAPAAPTPAAPAPAT-----SWPLSSFVPSQKTYPGSYGFHLGFLHSGTAKSVTCTYSPALNKLFCQLAKTCPVQLWVSSPPPPGTRVRAMAIYKKSEYMTEVVRRCPHHERCSDYSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNFMCNSSCMGGMNRRPILTIITLEDSNGNLLGRNSFEVRVCACPGRDRRTEEENFHKKGQSCPELPTGSAKRALPTGTSSSPPQKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGESRAHSSHLKSKKGQSPSRHKKLMFKREGPDSD MEE Q++ VEPPLSQETFSDLWKLLPENN+LS S A+DDL+LSP+D+ W D PDE SWPLSS VPSQKTY GSYGF LGFLHSGTAKSVTCTYSPALNK+FCQLAKTCPVQLWV S PPPGTRVRAMAIYK+S++MTEVVRRCPHHERCSD SDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYN+MCNSSCMGGMNRRPILTIITLEDS+GNLLGRNSFEVRVCACPGRDRRTEEEN KKG+ ELP GS KRALP TSSSP KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG SRAHSSHLKSKKGQS SRHKKLMFK EGPDSD 9 gnl|BL_ORD_ID|23 P53_PIG Q9TUB2 Cellular tumor antigen p53 (Tumor suppressor p53). 23 386 1 607.446 1565 4.50259e-177 1 393 1 386 1 1 308 326 11 395 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSP-LPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQSELGVEPPLSQETFSDLWKLLPENNLLSSELSLAAVNDLLLSP--VTNWLDENPDDASRVPAPPAATAPAPAAPAPAT-------SWPLSSFVPSQKTYPGSYDFRLGFLHSGTAKSVTCTYSPALNKLFCQLAKTCPVQLWVSSPPPPGTRVRAMAIYKKSEYMTEVVRRCPHHERSSDYSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNFMCNSSCMGGMNRRPILTIITLEDASGNLLGRNSFEVRVCACPGRDRRTEEENFLKKGQSCPEPPPGSTKRALPTSTSSSPVQKKKPLDGEYFTLQIRGRERFEMFRELNDALELKDAQTARESGENRAHSSHLKSKKGQSPSRHKKPMFKREGPDSD MEE QS+ VEPPLSQETFSDLWKLLPENN+LS L A++DL+LSP + W E+P SWPLSS VPSQKTY GSY FRLGFLHSGTAKSVTCTYSPALNK+FCQLAKTCPVQLWV S PPPGTRVRAMAIYK+S++MTEVVRRCPHHER SD SDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYN+MCNSSCMGGMNRRPILTIITLED+SGNLLGRNSFEVRVCACPGRDRRTEEEN KKG+ E PPGSTKRALP +TSSSP KKKPLDGEYFTLQIRGRERFEMFRELN+ALELKDAQ +E G +RAHSSHLKSKKGQS SRHKK MFK EGPDSD 10 gnl|BL_ORD_ID|4 P53_CANFA Q29537 Cellular tumor antigen p53 (Tumor suppressor p53). 4 381 1 604.749 1558 2.91849e-176 1 393 1 381 1 1 304 325 14 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDS-DGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQSELNIDPPLSQETFSELWNLLPENNVLSSELCPAVDELLL-PESVVNWLDEDSDD------------APRMPATSAPTAPGPAPSWPLSSSVPSPKTYPGTYGFRLGFLHSGTAKSVTWTYSPLLNKLFCQLAKTCPVQLWVSSPPPPNTCVRAMAIYKKSEFVTEVVRRCPHHERCSDSSDGLAPPQHLIRVEGNLRAKYLDDRNTFRHSVVVPYEPPEVGSDYTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNVLGRNSFEVRVCACPGRDRRTEEENFHKKGEPCPEPPPGSTKRALPPSTSSSPPQKKKPLDGEYFTLQIRGRERYEMFRNLNEALELKDAQSGKEPGGSRAHSSHLKAKKGQSTSRHKKLMFKREGLDSD MEE QS+ +++PPLSQETFS+LW LLPENNVLS A+D+L+L P+ + W ED SWPLSSSVPS KTY G+YGFRLGFLHSGTAKSVT TYSP LNK+FCQLAKTCPVQLWV S PPP T VRAMAIYK+S+ +TEVVRRCPHHERCSDS DGLAPPQHLIRVEGNLR +YLDDRNTFRHSVVVPYEPPEVGSD TTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGN+LGRNSFEVRVCACPGRDRRTEEEN KKGEP E PPGSTKRALP +TSSSP KKKPLDGEYFTLQIRGRER+EMFR LNEALELKDAQ+GKEPGGSRAHSSHLK+KKGQSTSRHKKLMFK EG DSD 11 gnl|BL_ORD_ID|11 P53_FELCA P41685 Cellular tumor antigen p53 (Tumor suppressor p53). 11 386 1 602.823 1553 1.10903e-175 1 393 1 386 1 1 303 325 9 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDS-DGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MQEPPLELTIEPPLSQETFSELWNLLPENNVLSSELSSAMNELPLS-EDVANWL--DEAPDDASGMSAVPAPAAPAPATPAPAI-----SWPLSSFVPSQKTYPGAYGFHLGFLQSGTAKSVTCTYSPPLNKLFCQLAKTCPVQLWVRSPPPPGTCVRAMAIYKKSEFMTEVVRRCPHHERCPDSSDGLAPPQHLIRVEGNLHAKYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNFMCNSSCMGGMNRRPIITIITLEDSNGKLLGRNSFEVRVCACPGRDRRTEEENFRKKGEPCPEPPPGSTKRALPPSTSSTPPQKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQSGKEPGGSRAHSSHLKAKKGQSTSRHKKPMLKREGLDSD M+EP + ++EPPLSQETFS+LW LLPENNVLS S AM++L LS +D+ W D PD+ SWPLSS VPSQKTY G+YGF LGFL SGTAKSVTCTYSP LNK+FCQLAKTCPVQLWV S PPPGT VRAMAIYK+S+ MTEVVRRCPHHERC DS DGLAPPQHLIRVEGNL +YLDDRNTFRHSVVVPYEPPEVGSDCTTIHYN+MCNSSCMGGMNRRPI+TIITLEDS+G LLGRNSFEVRVCACPGRDRRTEEEN RKKGEP E PPGSTKRALP +TSS+P KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQ+GKEPGGSRAHSSHLK+KKGQSTSRHKK M K EG DSD 12 gnl|BL_ORD_ID|5 P53_CAVPO Q9WUR6 Cellular tumor antigen p53 (Tumor suppressor p53). 5 391 1 600.512 1547 5.50404e-175 1 393 1 391 1 1 297 319 2 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPHSDLSIEPPLSQETFSDLWKLLPENNVLSDSLSPPMDHLLLSPEEVASWLGENPDGD--GHVSAAPVSEAPTSAGPALVAPAPATSWPLSSSVPSHKPYRGSYGFEVHFLKSGTAKSVTCTYSPGLNKLFCQLAKTCPVQVWVESPPPPGTRVRALAIYKKSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLHAEYVDDRTTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGKLLGRDSFEVRVCACPGRDRRTEEENFRKKGGLCPEPTPGNIKRALPTSTSSSPQPKKKPLDAEYFTLKIRGRKNFEILREINEALEFKDAQTEKEPGESRPHSSYPKSKKGQSTSCHKKLMFKREGLDSD MEEP SD S+EPPLSQETFSDLWKLLPENNVLS S MD L+LSP+++ W E+P D SWPLSSSVPS K Y+GSYGF + FL SGTAKSVTCTYSP LNK+FCQLAKTCPVQ+WV+S PPPGTRVRA+AIYK+SQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNL EY+DDR TFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSG LLGR+SFEVRVCACPGRDRRTEEEN RKKG E PG+ KRALP +TSSSPQPKKKPLD EYFTL+IRGR+ FE+ RE+NEALE KDAQ KEPG SR HSS+ KSKKGQSTS HKKLMFK EG DSD 13 gnl|BL_ORD_ID|27 P53_SHEEP P51664 Cellular tumor antigen p53 (Tumor suppressor p53). 27 382 1 595.89 1535 1.35569e-173 1 393 1 382 1 1 299 320 13 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQAELGVEPPLSQETFSDLWNLLPENNLLSSELSAPVDDLLPYSEDVVTWLDECPNE------------APQMPEPPAQAALAPATSWPLSSFVPSQKTYPGNYGFRLGFLHSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVDSPPPPGTRVRAMAIYKKLEHMTEVVRRSPHHERSSDYSDGLAPPQHLIRVEGNLRAEYFDDRNTFRHSVVVPYESPEIESECTTIHYNFMCNSSCMGGMNRRPILTIITLEDSRGNLLGRSSFEVRVCACPGRDRRTEEENFRKKGQSCPEPPPGSTKRALPSSTSSSPQQKKKPLDGEYFTLQIRGRKRFEMFRELNEALELMDAQAGREPGESRAHSSHLKSKKGPSPSCHKKPMLKREGPDSD MEE Q++ VEPPLSQETFSDLW LLPENN+LS S +DDL+ +D+ W E P SWPLSS VPSQKTY G+YGFRLGFLHSGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWVDS PPPGTRVRAMAIYK+ +HMTEVVRR PHHER SD SDGLAPPQHLIRVEGNLR EY DDRNTFRHSVVVPYE PE+ S+CTTIHYN+MCNSSCMGGMNRRPILTIITLEDS GNLLGR+SFEVRVCACPGRDRRTEEEN RKKG+ E PPGSTKRALP++TSSSPQ KKKPLDGEYFTLQIRGR+RFEMFRELNEALEL DAQAG+EPG SRAHSSHLKSKKG S S HKK M K EGPDSD 14 gnl|BL_ORD_ID|19 P53_MESAU Q00366 Cellular tumor antigen p53 (Tumor suppressor p53). 19 396 1 594.349 1531 3.94446e-173 1 393 1 396 1 1 300 324 7 398 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQ-AMDDLMLSPDDIEQWFTEDPGP----DEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDLSIELPLSQETFSDLWKLLPPNNVLSTLPSSDSIEELFLS-ENVAGWL-EDPGEALQGSAAAAAPAAPAAEDPVAETPAPVASAPATPWPLSSSVPSYKTYQGDYGFRLGFLHSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVSSTPPPGTRVRAMAIYKKLQYMTEVVRRCPHHERSSEGDGLAPPQHLIRVEGNMHAEYLDDKQTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDPSGNLLGRNSFEVRICACPGRDRRTEEKNFQKKGEPCPELPPKSAKRALPTNTSSSPQPKRKTLDGEYFTLKIRGQERFKMFQELNEALELKDAQALKASEDSGAHSSYLKSKKGQSASRLKKLMIKREGPDSD MEEPQSD S+E PLSQETFSDLWKLLP NNVLS LPS ++++L LS +++ W EDPG WPLSSSVPS KTYQG YGFRLGFLHSGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWV STPPPGTRVRAMAIYK+ Q+MTEVVRRCPHHER S+ DGLAPPQHLIRVEGN+ EYLDD+ TFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLED SGNLLGRNSFEVR+CACPGRDRRTEE+N +KKGEP ELPP S KRALP NTSSSPQPK+K LDGEYFTL+IRG+ERF+MF+ELNEALELKDAQA K S AHSS+LKSKKGQS SR KKLM K EGPDSD 15 gnl|BL_ORD_ID|8 P53_CRIGR O09185 Cellular tumor antigen p53 (Tumor suppressor p53). 8 393 1 594.349 1531 3.94446e-173 1 393 1 393 1 1 296 321 2 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQ-AMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEEPQSDLSIELPLSQETFSDLWKLLPPNNVLSTLPSSDSIEELFLS-ENVTGWLEDSGGALQGVAAAAASTAEDPVTETPAPVASAPATPWPLSSSVPSYKTYQGDYGFRLGFLHSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVNSTPPPGTRVRAMAIYKKLQYMTEVVRRCPHHERSSEGDSLAPPQHLIRVEGNLHAEYLDDKQTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDPSGNLLGRNSFEVRICACPGRDRRTEEKNFQKKGEPCPELPPKSAKRALPTNTSSSPPPKKKTLDGEYFTLKIRGHERFKMFQELNEALELKDAQASKGSEDNGAHSSYLKSKKGQSASRLKKLMIKREGPDSD MEEPQSD S+E PLSQETFSDLWKLLP NNVLS LPS ++++L LS +++ W + G + WPLSSSVPS KTYQG YGFRLGFLHSGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWV+STPPPGTRVRAMAIYK+ Q+MTEVVRRCPHHER S+ D LAPPQHLIRVEGNL EYLDD+ TFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLED SGNLLGRNSFEVR+CACPGRDRRTEE+N +KKGEP ELPP S KRALP NTSSSP PKKK LDGEYFTL+IRG ERF+MF+ELNEALELKDAQA K + AHSS+LKSKKGQS SR KKLM K EGPDSD 16 gnl|BL_ORD_ID|2 P53_BOVIN P67939 Cellular tumor antigen p53 (Tumor suppressor p53). 2 386 1 588.956 1517 1.65722e-171 1 393 1 386 1 1 299 318 9 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQAELNVEPPLSQETFSDLWNLLPENNLLSSELSAPVDDL-LPYTDVATWLDECPNE-------APQMPEPSAPAAPPPATPAPATSWPLSSFVPSQKTYPGNYGFRLGFLQSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVDSPPPPGTRVRAMAIYKKLEHMTEVVRRCPHHERSSDYSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYESPEIDSECTTIHYNFMCNSSCMGGMNRRPILTIITLEDSCGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGQSCPEPPPRSTKRALPTNTSSSPQPKKKPLDGEYFTLQIRGFKRYEMFRELNDALELKDALDGREPGESRAHSSHLKSKKRPSPSCHKKPMLKREGPDSD MEE Q++ +VEPPLSQETFSDLW LLPENN+LS S +DDL L D+ W E P SWPLSS VPSQKTY G+YGFRLGFL SGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWVDS PPPGTRVRAMAIYK+ +HMTEVVRRCPHHER SD SDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYE PE+ S+CTTIHYN+MCNSSCMGGMNRRPILTIITLEDS GNLLGRNSFEVRVCACPGRDRRTEEENLRKKG+ E PP STKRALP NTSSSPQPKKKPLDGEYFTLQIRG +R+EMFRELN+ALELKDA G+EPG SRAHSSHLKSKK S S HKK M K EGPDSD 17 gnl|BL_ORD_ID|1 P53_BOSIN P67938 Cellular tumor antigen p53 (Tumor suppressor p53). 1 386 1 588.956 1517 1.65722e-171 1 393 1 386 1 1 299 318 9 394 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQAELNVEPPLSQETFSDLWNLLPENNLLSSELSAPVDDL-LPYTDVATWLDECPNE-------APQMPEPSAPAAPPPATPAPATSWPLSSFVPSQKTYPGNYGFRLGFLQSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVDSPPPPGTRVRAMAIYKKLEHMTEVVRRCPHHERSSDYSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYESPEIDSECTTIHYNFMCNSSCMGGMNRRPILTIITLEDSCGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGQSCPEPPPRSTKRALPTNTSSSPQPKKKPLDGEYFTLQIRGFKRYEMFRELNDALELKDALDGREPGESRAHSSHLKSKKRPSPSCHKKPMLKREGPDSD MEE Q++ +VEPPLSQETFSDLW LLPENN+LS S +DDL L D+ W E P SWPLSS VPSQKTY G+YGFRLGFL SGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWVDS PPPGTRVRAMAIYK+ +HMTEVVRRCPHHER SD SDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYE PE+ S+CTTIHYN+MCNSSCMGGMNRRPILTIITLEDS GNLLGRNSFEVRVCACPGRDRRTEEENLRKKG+ E PP STKRALP NTSSSPQPKKKPLDGEYFTLQIRG +R+EMFRELN+ALELKDA G+EPG SRAHSSHLKSKK S S HKK M K EGPDSD 18 gnl|BL_ORD_ID|26 P53_RAT P10361 Cellular tumor antigen p53 (Tumor suppressor p53). 26 391 1 575.859 1483 1.4518e-167 1 393 1 391 1 1 295 317 8 396 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPS---QAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEDSQSDMSIELPLSQETFSCLWKLLPPDDILPTTATGSPNSMEDLFL-PQDVAELLE---GPEEALQVSAPAAQEPGTEAPAPVAPASATP-WPLSSSVPSQKTYQGNYGFHLGFLQSGTAKSVMCTYSISLNKLFCQLAKTCPVQLWVTSTPPPGTRVRAMAIYKKSQHMTEVVRRCPHHERCSDGDGLAPPQHLIRVEGNPYAEYLDDRQTFRHSVVVPYEPPEVGSDYTTIHYKYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRDSFEVRVCACPGRDRRTEEENFRKKEEHCPELPPGSAKRALPTSTSSSPQQKKKPLDGEYFTLKIRGRERFEMFRELNEALELKDARAAEESGDSRAHSSYPKTKKGQSTSRHKKPMIKKVGPDSD ME+ QSD S+E PLSQETFS LWKLLP +++L + +M+DL L P D+ + GP+E WPLSSSVPSQKTYQG+YGF LGFL SGTAKSV CTYS +LNK+FCQLAKTCPVQLWV STPPPGTRVRAMAIYK+SQHMTEVVRRCPHHERCSD DGLAPPQHLIRVEGN EYLDDR TFRHSVVVPYEPPEVGSD TTIHY YMCNSSCMGGMNRRPILTIITLEDSSGNLLGR+SFEVRVCACPGRDRRTEEEN RKK E ELPPGS KRALP +TSSSPQ KKKPLDGEYFTL+IRGRERFEMFRELNEALELKDA+A +E G SRAHSS+ K+KKGQSTSRHKK M K GPDSD 19 gnl|BL_ORD_ID|20 P53_MOUSE P02340 Cellular tumor antigen p53 (Tumor suppressor p53). 20 390 1 574.318 1479 4.22408e-167 1 393 4 390 1 1 293 314 6 393 MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD MEESQSDISLELPLSQETFSGLWKLLPPEDIL-PSP-HCMDDLLL-PQDVEEFFE---GPSEALRVSGAPAAQDPVTETPGPVAPAPATPWPLSSFVPSQKTYQGNYGFHLGFLQSGTAKSVMCTYSPPLNKLFCQLAKTCPVQLWVSATPPAGSRVRAMAIYKKSQHMTEVVRRCPHHERCSDGDGLAPPQHLIRVEGNLYPEYLEDRQTFRHSVVVPYEPPEAGSEYTTIHYKYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRDSFEVRVCACPGRDRRTEEENFRKKEVLCPELPPGSAKRALPTCTSASPPQKKKPLDGEYFTLKIRGRKRFEMFRELNEALELKDAHATEESGDSRAHSSYLKTKKGQSTSRHKKTMVKKVGPDSD MEE QSD S+E PLSQETFS LWKLLP ++L P P MDDL+L P D+E++F GP E WPLSS VPSQKTYQG+YGF LGFL SGTAKSV CTYSP LNK+FCQLAKTCPVQLWV +TPP G+RVRAMAIYK+SQHMTEVVRRCPHHERCSD DGLAPPQHLIRVEGNL EYL+DR TFRHSVVVPYEPPE GS+ TTIHY YMCNSSCMGGMNRRPILTIITLEDSSGNLLGR+SFEVRVCACPGRDRRTEEEN RKK ELPPGS KRALP TS+SP KKKPLDGEYFTL+IRGR+RFEMFRELNEALELKDA A +E G SRAHSS+LK+KKGQSTSRHKK M K GPDSD 20 gnl|BL_ORD_ID|28 P53_SPEBE Q64662 Cellular tumor antigen p53 (Tumor suppressor p53) (Fragment). 28 314 1 530.02 1364 9.13575e-154 21 335 1 313 1 1 256 268 2 315 DLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGR DLWNLLPENNVLSPVLSPPMDDLLLSSEDVENWF--DKGPDEALQMSAAPAPKAPTPAASTLAAPTPAISWPLSSSVPSQNTYPGVYGFRLGFIHSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKKSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYEPPEVGSESTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKRGEPCPEPPPGSTKRALPTGTNSSPQPKKKPLDGEYFTLKIRGR DLW LLPENNVLSP+ S MDDL+LS +D+E WF D GPDE SWPLSSSVPSQ TY G YGFRLGF+HSGTAKSVTCTYSP+LNK+FCQLAKTCPVQLWVDSTPPPGTRVRAMAIYK+SQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYEPPEVGS+ TTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RK+GEP E PPGSTKRALP T+SSPQPKKKPLDGEYFTL+IRGR 21 gnl|BL_ORD_ID|12 P53_HORSE P79892 Cellular tumor antigen p53 (Tumor suppressor p53) (Fragment). 12 280 1 448.743 1153 2.67666e-129 39 329 2 280 1 1 227 238 14 292 AMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDS-DGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFT AVNNLLLSPD-VVNWL--DEGPDEAPRMPAAPAPLAPAPAT----------SWPLSSFVPSQKTYPGCYGFRLGFLNSGTAKSVTCTYSPTLNKLFCQLAKTCPVQLLVSSPPPPGTRVRAMAIYKKSEFMTEVVRRCPHHERCSDSSDGLAPPQHLIRVEGNLRAEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNFMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKEEPCPEPPPRSTKRVLSSNTSSSPPQKKKPLDGEYFT A+++L+LSPD + W D GPDE SWPLSS VPSQKTY G YGFRLGFL+SGTAKSVTCTYSP LNK+FCQLAKTCPVQL V S PPPGTRVRAMAIYK+S+ MTEVVRRCPHHERCSDS DGLAPPQHLIRVEGNLR EYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYN+MCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKK EP E PP STKR L +NTSSSP KKKPLDGEYFT 22 gnl|BL_ORD_ID|10 P53_EQUAS Q29480 Cellular tumor antigen p53 (Tumor suppressor p53) (Fragment). 10 207 1 374.015 959 8.37873e-107 126 330 1 206 1 1 181 187 1 206 YSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDS-DGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTL YSPALNKMFCQLAKTCPVYLRISSPPPPGTRVRAMAIYKKSEFMTEVVRRCPHHERCSDSSDGLAPPQHLIRVEGNLRAEYLDDRNTLRHSVVVPYEPPEVGSDCTTIHYNFMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENFRKKEEPCPEPPPRSTKRVLSSNTSSSPPQKEDPLDGEYFTL YSPALNKMFCQLAKTCPV L + S PPPGTRVRAMAIYK+S+ MTEVVRRCPHHERCSDS DGLAPPQHLIRVEGNLR EYLDDRNT RHSVVVPYEPPEVGSDCTTIHYN+MCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEEN RKK EP E PP STKR L +NTSSSP K+ PLDGEYFTL 23 gnl|BL_ORD_ID|7 P53_CHICK P10360 Cellular tumor antigen p53 (Tumor suppressor p53). 7 367 1 355.91 912 2.36124e-101 5 386 4 364 1 1 197 240 27 385 QSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPL--DGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFK EMEPLLEPT---EVFMDLWSMLPYSMQQLPLPEDHSNWQELSPLE-----PSDPPPPPPPPPLPLAAAAPPPLNPPTPPRAAP------SPVVPSTEDYGGDFDFRVGFVEAGTAKSVTCTYSPVLNKVYCRLAKPCPVQVRVGVAPPPGSSLRAVAVYKKSEHVAEVVRRCPHHERCGGGTDGLAPAQHLIRVEGNPQARYHDDETTKRHSVVVPYEPPEVGSDCTTVLYNFMCNSSCMGGMNRRPILTILTLEGPGGQLLGRRCFEVRVCACPGRDRKIEEENFRKRGG-----AGGVAKRAMSPPTEAPEPPKKRVLNPDNEIFYLQVRGRRRYEMLKEINEALQL--AEGGSAPRPSKGRRVKV---EGPQPSCGKKLLQK + +P +EP E F DLW +LP + PLP + LSP + DP P S VPS + Y G + FR+GF+ +GTAKSVTCTYSP LNK++C+LAK CPVQ+ V PPPG+ +RA+A+YK+S+H+ EVVRRCPHHERC +DGLAP QHLIRVEGN + Y DD T RHSVVVPYEPPEVGSDCTT+ YN+MCNSSCMGGMNRRPILTI+TLE G LLGR FEVRVCACPGRDR+ EEEN RK+G G KRA+ T + PKK+ L D E F LQ+RGR R+EM +E+NEAL+L A+ G P S+ + +G S KKL+ K 24 gnl|BL_ORD_ID|21 P53_ONCMY P25035 Cellular tumor antigen p53 (Tumor suppressor p53). 21 396 1 343.969 881 9.28528e-98 9 393 7 396 1 1 194 245 33 404 SVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTED----PGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDS-DGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHEL---PPGSTKRALPNNTSSSPQP------KKKPL--DGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKK---GQSTSRHKKLMFKTEGPDSD NVSLPLSQESFEDLWKM-------------NLNLVAVQPPETESWVGYDNFMMEAPLQVEFDPSLFEVSATEPAPQPSISTLDTGS-PPTSTVPTTSDYPGALGFQLRFLQSSTAKSVTCTYSPDLNKLFCQLAKTCPVQIVVDHPPPPGAVVRALAIYKKLSDVADVVRRCPHHQSTSENNEGPAPRGHLVRVEGNQRSEYMEDGNTLRHSVLVPYEPPQVGSECTTVLYNFMCNSSCMGGMNRRPILTIITLETQEGQLLGRRSFEVRVCACPGRDRKTEEINLKKQQETTLETKTKPAQGIKRAMKEASLPAPQPGASKKTKSSPAVSDDEIYTLQIRGKEKYEMLKKFNDSLELSELVPVADADKYRQKCLTKRVAKRDFGVGPKKRKKLLVKEEKSDSD +V PLSQE+F DLWK+ ++ + + P + E W D P + S P +S+VP+ Y G+ GF+L FL S TAKSVTCTYSP LNK+FCQLAKTCPVQ+ VD PPPG VRA+AIYK+ + +VVRRCPHH+ S++ +G AP HL+RVEGN R EY++D NT RHSV+VPYEPP+VGS+CTT+ YN+MCNSSCMGGMNRRPILTIITLE G LLGR SFEVRVCACPGRDR+TEE NL+K+ E E P KRA+ + +PQP K P D E +TLQIRG+E++EM ++ N++LEL + + R + K G + KKL+ K E DSD 25 gnl|BL_ORD_ID|0 P53_BARBU Q9W678 Cellular tumor antigen p53 (Tumor suppressor p53). 0 369 1 341.658 875 4.60823e-97 92 393 56 369 1 1 181 219 12 314 PLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPH--HELPPGSTKRALPNNTSSSPQP---KKKPLDG----EYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAH-SSHLKSKKGQS--TSRHKKLMFKTEGPDSD PPTASVPVATDYPGEHGFKLGFPQSGTAKSVTCTYSSDLNKLFCQLAKTCPVQMVVNVAPPQGSVIRATAIYKKSEHVAEVVRRCPHHERTPDGDGLAPAAHLIRVEGNSRALYREDDVNSRHSVVVPYEVPQLGSEFTTVLYNFMCNSSCMGGMNRRPILTIISLETHDGQLLGRRSFEVRVCACPGRDRKTEESNFRKDQETKTLDKIPSANKRSLTKDSTSSVPRPEGSKKAKLSGSSDEEIYTLQVRGKERYEMLKKINDSLELSDVVPPSEMDRYRQKLLTKGKKKDGQTPEPKRGKKLMVKDEKSDSD P ++SVP Y G +GF+LGF SGTAKSVTCTYS LNK+FCQLAKTCPVQ+ V+ PP G+ +RA AIYK+S+H+ EVVRRCPHHER D DGLAP HLIRVEGN R Y +D RHSVVVPYE P++GS+ TT+ YN+MCNSSCMGGMNRRPILTII+LE G LLGR SFEVRVCACPGRDR+TEE N RK E ++P + + ++TSS P+P KK L G E +TLQ+RG+ER+EM +++N++LEL D E R + K K GQ+ R KKLM K E DSD 26 gnl|BL_ORD_ID|3 P53_BRARE P79734 Cellular tumor antigen p53 (Tumor suppressor p53). 3 373 1 338.576 867 3.9011e-96 92 393 60 373 1 1 181 215 12 314 PLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGS-TKRALPNNTSSS---PQPKKK----PLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTS---RHKKLMFKTEG-PDSD PPTSTVPETSDYPGDHGFRLRFPQSGTAKSVTCTYSPDLNKLFCQLAKTCPVQMVVDVAPPQGSVVRATAIYKKSEHVAEVVRRCPHHERTPDGDNLAPAGHLIRVEGNQRANYREDNITLRHSVFVPYEAPQLGAEWTTVLLNYMCNSSCMGGMNRRPILTIITLETQEGQLLGRRSFEVRVCACPGRDRKTEESNFKKDQETKTMAKTTTGTKRSLVKESSSATLRPEGSKKAKGSSSDEEIFTLQVRGRERYEILKKLNDSLELSDVVPASDAEKYRQKFMTKNKKENRESSEPKQGKKLMVKDEGRSDSD P +S+VP Y G +GFRL F SGTAKSVTCTYSP LNK+FCQLAKTCPVQ+ VD PP G+ VRA AIYK+S+H+ EVVRRCPHHER D D LAP HLIRVEGN R Y +D T RHSV VPYE P++G++ TT+ NYMCNSSCMGGMNRRPILTIITLE G LLGR SFEVRVCACPGRDR+TEE N +K E + TKR+L +SS+ P+ KK D E FTLQ+RGRER+E+ ++LN++LEL D + R K+ + +S + KKLM K EG DSD 27 gnl|BL_ORD_ID|14 P53_ICTPU O93379 Cellular tumor antigen p53 (Tumor suppressor p53). 14 376 1 333.183 853 1.63901e-94 10 393 12 376 1 1 192 240 41 395 VEPPLSQETFSDLW--KLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDS-DGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRAL---PNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDA--QAGKEPGGSRAHSSHLKSKKGQST---SRHKKLMFKTEGPDSD VEPPDSQE-FAELWLRNLIVRDNSLWGKEEEIPDDLQEVPCDVLLSDMLQPQSS----------------------------SSPPTSTVPVTSDYPGLLNFTLHFQESSGTKSVTCTYSPDLNKLFCQLAKTCPVLMAVSSSPPPGSVLRATAVYKRSEHVAEVVRRCPHHERSNDSSDGPAPPGHLLRVEGNSRAVYQEDGNTQAHSVVVPYEPPQVGSQSTTVLYNYMCNSSCMGGMNRRPILTIITLETQDGHLLGRRTFEVRVCACPGRDRKTEESNFKKQQEPKTS-GKTLTKRSMKDPPSHPEASKKSKNSSSDDEIYTLQVRGKERYEFLKKINDGLELSDVVPPADQEKYRQKLLSKTCRKERDGAAGEPKRGKKRLVKEEKCDSD VEPP SQE F++LW L+ +N L + DDL P D+ P S P +S+VP Y G F L F S KSVTCTYSP LNK+FCQLAKTCPV + V S+PPPG+ +RA A+YK+S+H+ EVVRRCPHHER +DS DG APP HL+RVEGN R Y +D NT HSVVVPYEPP+VGS TT+ YNYMCNSSCMGGMNRRPILTIITLE G+LLGR +FEVRVCACPGRDR+TEE N +K+ EP TKR++ P++ +S + K D E +TLQ+RG+ER+E +++N+ LEL D A +E + S + ++ + R KK + K E DSD 28 gnl|BL_ORD_ID|29 P53_TETMU Q9W679 Cellular tumor antigen p53 (Tumor suppressor p53). 29 367 1 267.7 683 8.45682e-75 7 393 3 367 1 1 159 220 32 392 DPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEXXXXXXXXXXXXXXXXXXXXXXXXXXXSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKK-----KPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD EENISLPLSQDTFQDLW-----DNVSAP----PISTIQTAALENEAWPAERQMNMMCNFMDSTFNEALFNLLPEPPSRDGANSSSP---TVPVTTDYPGEYGFKLRFQKSGTAKSVTSTYSEILNKLYCQLAKTSLVEVLLGKDPPMGAVLRATAIYKKTEHVAEVVRRCPHHQ---NEDSAEHRSHLIRMEGSERAQYFEHPHTKRQSVTVPYEPPQLGSEFTTILLSFMCNSSCMGGMNRRPILTILTLETQEGIVLGRRCFEVRVCACPGRDRKTEETNSTKM---QNDAKDAKKRKSVPTPDSTTIKKSKTASSAEEDNNEVYTLQIRGRKRYEMLKKINDGLDLLE----NKPKSKATH-----RPDGPIPPSGKRLLHRGEKSDSD + ++ PLSQ+TF DLW +NV +P + + + + E W E S P +VP Y G YGF+L F SGTAKSVT TYS LNK++CQLAKT V++ + PP G +RA AIYK+++H+ EVVRRCPHH+ + D HLIR+EG+ R +Y + +T R SV VPYEPP++GS+ TTI ++MCNSSCMGGMNRRPILTI+TLE G +LGR FEVRVCACPGRDR+TEE N K ++ ++++P S++ + K + + E +TLQIRGR+R+EM +++N+ L+L + +P H G K+L+ + E DSD 29 gnl|BL_ORD_ID|22 P53_ORYLA P79820 Cellular tumor antigen p53 (Tumor suppressor p53). 22 352 1 262.307 669 3.55304e-73 92 371 77 351 1 1 142 190 21 288 PLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPK-----KKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQA---GKEPGGSRAHSSHLKS PPPTTVPVTTDYPGSYELELRFQKSGTAKSVTSTYSETLNKLYCQLAKTSPIEVRVSKEPPKGAILRATAVYKKTEHVADVVRRCPHHQ---NEDSVEHRSHLIRVEGSQLAQYFEDPYTKRQSVTVPYEPPQPGSEMTTILLSYMCNSSCMGGMNRRPILTILTLE-TEGLVLGRRCFEVRICACPGRDRKTEEES-RQKTQP--------KKRKVTPNTSSSKRKKSHSSGEEEDNREVFHFEVYGRERYEFLKKINDGLELLEKESKSKNKDSGMVPSSGKKLKS P ++VP Y GSY L F SGTAKSVT TYS LNK++CQLAKT P+++ V PP G +RA A+YK+++H+ +VVRRCPHH+ + D + HLIRVEG+ +Y +D T R SV VPYEPP+ GS+ TTI +YMCNSSCMGGMNRRPILTI+TLE + G +LGR FEVR+CACPGRDR+TEEE+ R+K +P KR + NTSSS + K ++ + E F ++ GRER+E +++N+ LEL + ++ K+ G + LKS 30 gnl|BL_ORD_ID|24 P53_PLAFE O12946 Cellular tumor antigen p53 (Tumor suppressor p53). 24 366 1 259.225 661 3.00783e-72 92 393 70 366 1 1 145 197 23 311 PLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEE------NLRKKGEPHHELPPGS---TKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD PPSSTVPVVTDYPGEYGFQLRFQKSGTAKSVTSTFSELLKKLYCQLAKTSPVEVLLSKEPPQGAVLRATAVYKKTEHVADVVRRCPHHQT---EDTAEHRSHLIRLEGSQRALYFEDPHTKRQSVTVPYEPPQLGSETTAILLSFMCNSSCMGGMNRRQILTILTLETPDGLVLGRRCFEVRVCACPGRDRKTDEESSTKTPNGPKQTKKRKQAPSNSAPHTTTVMKSKSSSSAEEE----DKEVFTVLVKGRERYEIIKKINEAFE---GAAEKE----KAKNKVAVKQELPVPSSGKRLVQRGERSDSD P SS+VP Y G YGF+L F SGTAKSVT T+S L K++CQLAKT PV++ + PP G +RA A+YK+++H+ +VVRRCPHH+ D HLIR+EG+ R Y +D +T R SV VPYEPP++GS+ T I ++MCNSSCMGGMNRR ILTI+TLE G +LGR FEVRVCACPGRDR+T+EE N K+ + + P S T + + +SSS + + D E FT+ ++GRER+E+ +++NEA E A KE +A + ++ S K+L+ + E DSD 30 11169 53 3.25686e+06 0.041 0.267 0.14 bio-1.4.3.0001/test/data/blast/b0002.faa0000644000004100000410000000202212200110570017050 0ustar www-datawww-data>eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A) MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV bio-1.4.3.0001/test/data/blast/b0002.faa.m80000644000004100000410000000007012200110570017374 0ustar www-datawww-dataeco:b0002 eco:b0002 100.00 820 0 0 1 820 1 820 0.0 1567 bio-1.4.3.0001/test/data/blast/b0002.faa.m70000644000004100000410000001302512200110570017377 0ustar www-datawww-data blastp blastp 2.2.10 [Oct-19-2004] ~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402. b0002.faa lcl|QUERY eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A) 820 BLOSUM62 10 11 1 S 1 1 gnl|BL_ORD_ID|0 eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A) 0 820 1 1567.75 4058 0 1 820 1 820 1 1 820 820 820 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 1 820 42 605284 0.041 0.267 0.14 bio-1.4.3.0001/test/data/HMMER/0000755000004100000410000000000012200110570015423 5ustar www-datawww-databio-1.4.3.0001/test/data/HMMER/hmmpfam.out0000644000004100000410000000620712200110570017606 0ustar www-datawww-datahmmpfam - search one or more sequences against HMM database HMMER 2.3.2 (Oct 2003) Copyright (C) 1992-2003 HHMI/Washington University School of Medicine Freely distributed under the GNU General Public License (GPL) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - HMM file: /Users/nakao/Sites/iprscan/data/Pfam Sequence file: /Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Query sequence: 104K_THEPA Accession: [none] Description: [none] Scores for sequence family classification (score includes all domains): Model Description Score E-value N -------- ----------- ----- ------- --- PF04385.4 Domain of unknown function, DUF529 259.3 6.6e-75 4 Parsed for domains: Model Domain seq-f seq-t hmm-f hmm-t score E-value -------- ------- ----- ----- ----- ----- ----- ------- PF04385.4 1/4 36 111 .. 1 80 [] 65.0 2e-16 PF04385.4 2/4 149 224 .. 1 80 [] 64.7 2.5e-16 PF04385.4 3/4 265 343 .. 1 80 [] 64.6 2.7e-16 PF04385.4 4/4 379 456 .. 1 80 [] 65.0 2e-16 Alignments of top-scoring domains: PF04385.4: domain 1 of 4, from 36 to 111: score 65.0, E = 2e-16 *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+W 104K_THEPA 36 TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIW 81 eseddpefglivtlsfyldsnkfLvlllintak<-* e++ + +l++ ++++++++++++++++ +++ 104K_THEPA 82 ENA---STPLYTGAIVTNNDGPYMAYVEVLGDP 111 PF04385.4: domain 2 of 4, from 149 to 224: score 64.7, E = 2.5e-16 *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW +L++ ++ +++k+ + ++a+ng ++vt++p++G+ +++++++n++++ 104K_THEPA 149 SLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIY 195 eseddpefglivtlsfyldsnkfLvlllintak<-* ++ ++++vt++++++ +++L+l+++ + 104K_THEPA 196 KA----TGNDTVTSVVGFFRGLRLLLINVFSID 224 PF04385.4: domain 3 of 4, from 265 to 343: score 64.6, E = 2.7e-16 *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW +Dl+ +++ +++f+ + a+++ ++++++p++G+++tk++dG++v++ 104K_THEPA 265 PVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLY 311 eseddpefglivtlsfyldsnkfLvlllintak<-* es+ + + ++i +++y+++n ++++l++n+++ 104K_THEPA 312 ESFNP-LIHCINEVHIYDRNNGSIICLHLNYSP 343 PF04385.4: domain 4 of 4, from 379 to 456: score 65.0, E = 2e-16 *->tLDlndtgstlkqfdykvalngdivvty.tpkpGvkftkitdGnevv +LD+n ++++k+ +++ +n d +t+ tp+p+ + +++dG+ev+ 104K_THEPA 379 ELDVN--FISDKDLYVAALTNADLNYTMvTPRPHRDVIRVSDGSEVL 423 WeseddpefglivtlsfyldsnkfLvlllintak<-* W++e+ ++ l++++++++d++ +Lv+l+i++ 104K_THEPA 424 WYYEGL-DNFLVCAWIYVSDGVASLVHLRIKDRI 456 // bio-1.4.3.0001/test/data/HMMER/hmmsearch.out0000644000004100000410000000726012200110570020130 0ustar www-datawww-datahmmsearch - search a sequence database with a profile HMM HMMER 2.2g (August 2001) Copyright (C) 1992-2001 HHMI/Washington University School of Medicine Freely distributed under the GNU General Public License (GPL) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - HMM file: /sw/share/hmmer/demo/7tm_1 [7tm_1] Sequence database: /sw/share/hmmer/demo/P08908.fasta per-sequence score cutoff: [none] per-domain score cutoff: [none] per-sequence Eval cutoff: <= 10 per-domain Eval cutoff: [none] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Query HMM: 7tm_1 Accession: PF00001 Description: 7 transmembrane receptor (rhodopsin family) [HMM has been calibrated; E-values are empirical estimates] Scores for complete sequences (score includes all domains): Sequence Description Score E-value N -------- ----------- ----- ------- --- sp|P08908|5H1A_HUMAN 5-hydroxytryptamine 1A receptor 377.1 5.5e-130 1 Parsed for domains: Sequence Domain seq-f seq-t hmm-f hmm-t score E-value -------- ------- ----- ----- ----- ----- ----- ------- sp|P08908|5H1A_HUMAN 1/1 53 400 .. 1 275 [] 377.1 5.5e-130 Alignments of top-scoring domains: sp|P08908|5H1A_HUMAN: domain 1 of 1, from 53 to 400: score 377.1, E = 5.5e-130 *->GNlLVilvilrtkklrtptnifilNLAvADLLflltlppwalyylvg GN+ V+++i+++++l++++n++i++LAv+DL+++++++p+a++y v sp|P08908| 53 GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVL 99 gsedWpfGsalCklvtaldvvnmyaSillLtaISiDRYlAIvhPlryrrr + W++G++ C+l++aldv+++++Sil+L+aI++DRY+AI++P+ y ++ sp|P08908| 100 N--KWTLGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK 147 rtsprrAkvvillvWvlalllslPpllfswvktveegngtlnvnvtvCli rt prrA+++i+l+W++++l+s+Pp +++w++++ + +C+i sp|P08908| 148 RT-PRRAAALISLTWLIGFLISIPP-MLGWRTPEDRSD------PDACTI 189 dfpeestasvstwlvsyvllstlvgFllPllvilvcYtrIlrtlrkrark + +++ y+++st+++F++Pll++lv+Y+rI+r++r r rk sp|P08908| 190 SKDHG-----------YTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK 228 gas............................................... + + ++++ +++++ ++ ++++++ ++++++++ + + +++ ++ + sp|P08908| 229 TVKkvektgadtrhgaspapqpkksvngesgsrnwrlgveskaggalcan 278 .................................................. + ++++++ + + ++ ++++++ + +++ ++++ + + +++++++ sp|P08908| 279 gavrqgddgaalevievhrvgnskehlplpseagptpcapasferknern 328 .....kkrsskerkaaktllvvvvvFvlCWlPyfivllldtlc.lsiims + ++k+ +erk++ktl++++++F+lCWlP+fiv+l+ ++c++s++m sp|P08908| 329 aeakrKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPFCeSSCHM- 377 stCelervlptallvtlwLayvNsclNPiIY<-* + + +++wL+y+Ns lNP+IY sp|P08908| 378 --------PTLLGAIINWLGYSNSLLNPVIY 400 Histogram of all scores: score obs exp (one = represents 1 sequences) ----- --- --- 377 1 0|= % Statistical details of theoretical EVD fit: mu = -10.6639 lambda = 0.7676 chi-sq statistic = 0.0000 P(chi-square) = 0 Total sequences searched: 1 Whole sequence top hits: tophits_s report: Total hits: 1 Satisfying E cutoff: 1 Total memory: 16K Domain top hits: tophits_s report: Total hits: 1 Satisfying E cutoff: 1 Total memory: 17K bio-1.4.3.0001/test/data/SOSUI/0000755000004100000410000000000012200110570015455 5ustar www-datawww-databio-1.4.3.0001/test/data/SOSUI/sample.report0000644000004100000410000000063612200110570020200 0ustar www-datawww-data>Q9HC19 MEMBRANE PROTEIN NUMBER OF TM HELIX = 7 TM 1 31- 53 SECONDARY HIRMTFLRKVYSILSLQVLLTTV TM 2 69- 90 PRIMARY HESPALILLFALGSLGLIFALT TM 3 99- 121 PRIMARY NLYLLFGFTLLEALTVAVVVTFY TM 4 124- 146 PRIMARY YIILQAFILTTTVFFGLTVYTLQ TM 5 153- 175 PRIMARY KFGAGLFALLWILCLSGILEVFF TM 6 181- 203 PRIMARY ELVLAAAGALLFCGFIIYDTHSL TM 7 212- 234 SECONDARY YVLAAISLYLDIINLFLHLLRFL bio-1.4.3.0001/test/data/go/0000755000004100000410000000000012200110570015160 5ustar www-datawww-databio-1.4.3.0001/test/data/go/selected_gene_association.sgd0000644000004100000410000000345412200110570023047 0ustar www-datawww-data!CVS Version: Revision: 1.1498 $ !GOC Validation Date: 06/05/2010 $ !Submission Date: 6/5/2010 ! ! The above "Submission Date" is when the annotation project provided ! this file to the Gene Ontology Consortium (GOC). The "GOC Validation ! Date" indicates when this file was last changed as a result of a GOC ! validation and filtering process. The "CVS Version" above is the ! GOC version of this file. ! ! Note: The contents of this file may differ from that submitted to the ! GOC. The identifiers and syntax of the file have been checked, rows of ! data not meeting the standards set by the GOC have been removed. This ! file may also have annotations removed because the annotations for the ! listed Taxonomy identifier are only allowed in a file provided by ! another annotation project. The original submitted file is available from: ! http://www.geneontology.org/gene-associations/submission/ ! ! For information on which taxon are allowed in which files please see: ! http://www.geneontology.org/GO.annotation.shtml#script ! !Project_name: Saccharomyces Genome Database (SGD) !URL: http://www.yeastgenome.org/ !Contact Email: yeast-curator@yeastgenome.org !Funding: NHGRI of US National Institutes of Health, 5-P41-HG001315-12 !gaf-version: 2.0 !Date: 06/05/2010 $ ! SGD S000007287 15S_RRNA GO:0005763 SGD_REF:S000073642|PMID:6261980 ISS C Ribosomal RNA of the small mitochondrial ribosomal subunit 15S_rRNA|15S_RRNA_2 gene taxon:4932 20040202 SGD SGD S000007287 15S_RRNA GO:0006412 SGD_REF:S000073643|PMID:6280192 IGI P Ribosomal RNA of the small mitochondrial ribosomal subunit 15S_rRNA|15S_RRNA_2 gene taxon:4932 20060630 SGD SGD S000007287 15S_RRNA GO:0042255 SGD_REF:S000051605|PMID:2167435 IGI P Ribosomal RNA of the small mitochondrial ribosomal subunit 15S_rRNA|15S_RRNA_2 gene taxon:4932 20030723 SGD bio-1.4.3.0001/test/data/go/selected_wikipedia2go0000644000004100000410000000143612200110570021335 0ustar www-datawww-data! version: $Revision: 1.17 $ ! date: $Date: 2010/06/11 01:01:37 $ ! ! Generated from file ontology/editors/gene_ontology_write.obo, ! CVS revision: 1.1296; date: 10:06:2010 16:16 ! ! Mapping of Gene Ontology terms to Wikipedia entries. ! Wikipedia: http://en.wikipedia.org ! Last update at Thu Jun 10 17:21:44 2010 by the script /users/cjm/cvs/go-moose/bin/daily_from_obo.pl ! Wikipedia:11beta-hydroxysteroid_dehydrogenase > GO:11-beta-hydroxysteroid dehydrogenase activity ; GO:0003845 Wikipedia:2-(hydroxymethyl)-3-(acetamidomethylene)succinate_amidohydrolase_(deaminating\,_decarboxylating) > GO:2-(hydroxymethyl)-3-(acetamidomethylene)succinate hydrolase activity ; GO:0047414 Wikipedia:2-hydroxymethylglutarate_dehydrogenase > GO:2-hydroxymethylglutarate dehydrogenase activity ; GO:0043718 bio-1.4.3.0001/test/data/go/selected_component.ontology0000644000004100000410000000105112200110570022623 0ustar www-datawww-data!date: Sun Jul 26 21:12:00 PDT 2009 !version: $Revision: 1.21 $ !type: % OBO_REL:is_a is_a !type: ^ transitive_over transitive_over !type: - negatively_regulates negatively_regulates !type: < part_of part_of !type: + positively_regulates positively_regulates !type: & regulates regulates $Gene_Ontology ; GO:0003673 P1;CRAB_ANAPL ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN). MDITIHNPLI RRPLFSWLAP SRIFDQIFGE HLQESELLPA SPSLSPFLMR SPIFRMPSWL ETGLSEMRLE KDKFSVNLDV KHFSPEELKV KVLGDMVEIH GKHEERQDEH GFIAREFNRK YRIPADVDPL TITSSLSLDG VLTVSAPRKQ SDVPERSIPI TREEKPAIAG AQRK* bio-1.4.3.0001/test/data/phyloxml/0000755000004100000410000000000012200110570016427 5ustar www-datawww-databio-1.4.3.0001/test/data/phyloxml/bcl_2.xml0000644000004100000410000032151312200110570020137 0ustar www-datawww-data Bcl-2 1 0.21409500000000004 33.0 1 0.22234 58.0 1 0.68133 99.0 1 0.0946 72.0 1 0.24716 35.0 1 0.18828 33.0 1 0.10487 37.0 1 51_CHICK 0.28045 CHICK 145_XENLA 0.59876 XENLA 0.38418 100.0 1 0.0267 39.0 1 88_CANFA 0.08127 CANFA 115_MOUSE 0.14168 MOUSE 1.0E-5 55.0 1 74_BOVIN 0.09416 BOVIN 9_HUMAN 0.06828 HUMAN 0.35342 100.0 1 159_BRARE 0.01275 BRARE 166_BRARE 0.02259 BRARE 0.39739 99.0 1 52_CHICK 0.27747 CHICK 144_XENLA 0.34644 XENLA 0.51997 100.0 1 0.0983 56.0 1 0.5126 100.0 1 1.0E-5 46.0 1 90_CANFA 0.03744 CANFA 12_HUMAN 0.07368 HUMAN 64_BOVIN 0.05675 BOVIN 142_XENLA 0.26292 XENLA 0.34592 99.0 1 155_BRARE 1.0E-5 BRARE 154_BRARE 1.0E-5 BRARE 1.25362 100.0 1 125_MOUSE 0.04425 MOUSE 0.14928 40.0 1 95_CANFA 0.09977 CANFA 13_HUMAN 0.07433 HUMAN 0.57089 66.0 1 0.53798 100.0 1 16_SPHGR 1.0E-5 SPHGR 17_SPHGR 1.0E-5 SPHGR 0.69471 89.0 1 0.15935 83.0 1 0.24123 100.0 1 158_BRARE 1.0E-5 BRARE 169_BRARE 1.0E-5 BRARE 130_TETNG 0.37777 TETNG 0.13835 44.0 1 0.1765 73.0 1 122_MOUSE 0.10759 MOUSE 0.09256 44.0 1 0.01016 40.0 1 0.0243 32.0 1 11_HUMAN 0.02336 HUMAN 68_BOVIN 0.24624 BOVIN 92_CANFA 1.0E-5 CANFA 46_CHICK 1.06707 CHICK 147_XENLA 0.43513 XENLA 0.21984499999999996 33.0 1 0.13885 14.0 1 0.07066 2.0 1 0.0137 3.0 1 0.10632 4.0 1 0.09782 15.0 1 0.08093 32.0 1 0.17228 52.0 1 0.22305 76.0 1 0.00625 55.0 1 140_TETNG 0.92309 TETNG 0.07662 43.0 1 152_XENLA 0.19286 XENLA 0.0429 35.0 1 0.06125 68.0 1 1.0E-5 93.0 1 1.0E-5 75.0 1 0_HUMAN 1.0E-5 HUMAN 72_BOVIN 0.0105 BOVIN 123_MOUSE 0.01053 MOUSE 97_CANFA 1.0E-5 CANFA 41_CHICK 0.03818 CHICK 0.07344 38.0 1 0.18878 96.0 1 0.02241 81.0 1 132_TETNG 1.0E-5 TETNG 135_TETNG 1.0E-5 TETNG 102_FUGRU 0.00904 FUGRU 167_BRARE 0.26949 BRARE 0.0375 23.0 1 0.23552 99.0 1 151_XENLA 0.12315 XENLA 0.22499 100.0 1 126_MOUSE 1.0E-5 MOUSE 0.01047 100.0 1 96_CANFA 1.0E-5 CANFA 1.0E-5 100.0 1 8_HUMAN 1.0E-5 HUMAN 79_BOVIN 1.0E-5 BOVIN 0.04974 25.0 1 0.20281 86.0 1 1.0E-5 92.0 1 164_BRARE 0.02901 BRARE 0.06877 100.0 1 136_TETNG 0.01045 TETNG 100_FUGRU 1.0E-5 FUGRU 0.17649 84.0 1 0.05352 97.0 1 129_TETNG 6.9E-4 TETNG 104_FUGRU 0.03078 FUGRU 0.23291 95.0 1 128_TETNG 0.36032 TETNG 0.24906 100.0 1 103_FUGRU 0.24206 FUGRU 0.09945 65.0 1 106_FUGRU 0.04182 FUGRU 1.0E-5 77.0 1 0.0141 71.0 1 112_FUGRU 0.03123 FUGRU 113_FUGRU 0.09693 FUGRU 0.195 100.0 1 108_FUGRU 0.01027 FUGRU 111_FUGRU 0.01081 FUGRU 0.03668 58.0 1 149_XENLA 0.32167 XENLA 0.07846 90.0 1 49_CHICK 0.05537 CHICK 0.03275 83.0 1 0.192 99.0 1 63_BOVIN 0.1324 BOVIN 80_BOVIN 0.13424 BOVIN 1.0E-5 51.0 1 57_BOVIN 0.01041 BOVIN 1.0E-5 20.0 1 117_MOUSE 1.0E-5 MOUSE 0.01043 66.0 1 87_CANFA 1.0E-5 CANFA 1_HUMAN 1.0E-5 HUMAN 25_BRAFL 0.4534 BRAFL 15_SPHGR 0.68488 SPHGR 21_SPHGR 1.05022 SPHGR 0.08808 22.0 1 33_NEMVE 0.482 NEMVE 30_NEMVE 0.47217 NEMVE 0.28685 9.0 1 0.17977 14.0 1 0.4495 92.0 1 0.33029 89.0 1 0.13116 61.0 1 165_BRARE 0.30255 BRARE 162_BRARE 0.62996 BRARE 0.22822 100.0 1 137_TETNG 0.24501 TETNG 107_FUGRU 0.04204 FUGRU 0.25726 94.0 1 0.15529 59.0 1 42_CHICK 0.29479 CHICK 143_XENLA 0.60398 XENLA 0.13595 100.0 1 58_BOVIN 0.05604 BOVIN 1.0E-5 60.0 1 0.02217 78.0 1 1.0E-5 42.0 1 85_CANFA 0.01109 CANFA 2_HUMAN 0.01103 HUMAN 124_MOUSE 0.04504 MOUSE 73_BOVIN 1.0E-5 BOVIN 0.53052 100.0 1 45_CHICK 0.17601 CHICK 0.40974 98.0 1 0.05702 52.0 1 91_CANFA 0.10905 CANFA 0.06948 63.0 1 0.18193 100.0 1 118_MOUSE 0.18633 MOUSE 127_MOUSE 1.0E-5 MOUSE 3_HUMAN 0.04341 HUMAN 0.09486 100.0 1 76_BOVIN 1.0E-5 BOVIN 71_BOVIN 1.0E-5 BOVIN 0.13006 23.0 1 0.40777 73.0 1 0.33571 100.0 1 157_BRARE 0.2958 BRARE 109_FUGRU 0.19527 FUGRU 0.1832 68.0 1 0.78705 100.0 1 55_BOVIN 0.43001 BOVIN 0.08251 65.0 1 119_MOUSE 0.64738 MOUSE 10_HUMAN 0.18995 HUMAN 0.09353 48.0 1 171_CHICK 0.56823 CHICK 141_XENLA 0.57176 XENLA 40_CAEEL 1.20795 CAEEL 0.09583 8.0 1 0.03647 6.0 1 0.15942 15.0 1 0.37935 100.0 1 1.0E-5 65.0 1 23_BRAFL 1.0E-5 BRAFL 24_BRAFL 0.08958 BRAFL 26_BRAFL 1.0E-5 BRAFL 0.07297 24.0 1 0.74877 100.0 1 0.22523 100.0 1 168_BRARE 0.13915 BRARE 131_TETNG 0.07883 TETNG 146_XENLA 0.27947 XENLA 14_SPHGR 0.61862 SPHGR 0.145 29.0 1 0.14662 25.0 1 35_NEMVE 0.49481 NEMVE 38_NEMVE 0.35293 NEMVE 34_NEMVE 0.32221 NEMVE 0.40409 89.0 1 0.10452 28.0 1 0.1139 41.0 1 0.18551 35.0 1 0.13606 92.0 1 0.05378 57.0 1 0.11874 90.0 1 110_FUGRU 0.56162 FUGRU 134_TETNG 0.03296 TETNG 0.091 66.0 1 0.04421 62.0 1 0.13167 94.0 1 7_HUMAN 1.0E-5 HUMAN 0.01865 61.0 1 0.51198 100.0 1 89_CANFA 0.01988 CANFA 62_BOVIN 1.0E-5 BOVIN 114_MOUSE 0.01315 MOUSE 48_CHICK 1.0E-5 CHICK 150_XENLA 0.0853 XENLA 1.0E-5 38.0 1 0.19569 100.0 1 101_FUGRU 0.02185 FUGRU 133_TETNG 0.00907 TETNG 0.1765 100.0 1 160_BRARE 1.0E-5 BRARE 161_BRARE 1.0E-5 BRARE 0.43964 100.0 1 98_DROME 0.38195 DROME 99_DROME 0.30802 DROME 0.18404 45.0 1 22_BRAFL 0.39195 BRAFL 18_SPHGR 0.4734 SPHGR 20_SPHGR 0.64378 SPHGR 0.06108 45.0 1 39_NEMVE 0.56478 NEMVE 0.12162 57.0 1 0.46294 100.0 1 37_NEMVE 1.0E-5 NEMVE 29_NEMVE 1.0E-5 NEMVE 31_NEMVE 0.56073 NEMVE 0.11059 29.0 1 0.24695 52.0 1 172_XENLA 0.53649 XENLA 0.09295 50.0 1 0.14603 87.0 1 0.09214 65.0 1 19_SPHGR 0.37571 SPHGR 0.09991 100.0 1 28_BRAFL 1.0E-5 BRAFL 27_BRAFL 1.0E-5 BRAFL 36_NEMVE 0.36151 NEMVE 0.34289 100.0 1 50_CHICK 0.05827 CHICK 0.22124 92.0 1 170_MOUSE 0.17237 MOUSE 1.0E-5 59.0 1 67_BOVIN 0.05006 BOVIN 0.00337 81.0 1 84_CANFA 0.01072 CANFA 0.02097 98.0 1 6_HUMAN 0.02091 HUMAN 5_HUMAN 0.01043 HUMAN 0.20775 73.0 1 32_NEMVE 0.30744 NEMVE 0.27579 64.0 1 53_CIOIN 0.40206 CIOIN 0.28261 92.0 1 0.05338 67.0 1 156_BRAREb 0.64914 BRARE 0.0829 70.0 1 156_BRAREa 0.18704 BRARE 0.03919 100.0 1 138_TETNG 0.03397 TETNG 173_FUGRU 1.0E-5 FUGRU 0.15101 78.0 1 153_XENLA 0.15191 XENLA 0.24267 100.0 1 116_MOUSE 1.0E-5 MOUSE 0.04527 49.0 1 4_HUMAN 1.0E-5 HUMAN 0.011 37.0 1 94_CANFA 1.0E-5 CANFA 82_BOVIN 1.0E-5 BOVIN bio-1.4.3.0001/test/data/phyloxml/phyloxml_examples.xml0000644000004100000410000003517412200110570022735 0ustar www-datawww-data example from Prof. Joe Felsenstein's book "Inferring Phylogenies" phyloXML allows to use either a "branch_length" attribute or element to indicate branch lengths. A B C example from Prof. Joe Felsenstein's book "Inferring Phylogenies" phyloXML allows to use either a "branch_length" attribute or element to indicate branch lengths. 0.06 A 0.102 B 0.23 C 0.4 same example, with support of type "bootstrap" AB 89 A B C same example, with species and sequence AB A E. coli J. G. Cooper, 1863 alcohol dehydrogenase 0.99 B B. subtilis alcohol dehydrogenase 0.91 C C. elegans alcohol dehydrogenase 0.67 same example, with gene duplication information and sequence relationships 1 1 Bacillus subtilis adhB AAB80874 alcohol dehydrogenase Bacillus subtilis gbsB CAB15083 alcohol dehydrogenase Caenorhabditis elegans ADHX Q17335 alcohol dehydrogenase similar example, with more detailed sequence data 6645 OCTVU Octopus vulgaris ADHX P81431 Alcohol dehydrogenase class-3 TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD 44689 DICDI Dictyostelium discoideum RT4I1 Q54II4 Reticulon-4-interacting protein 1 homolog, mitochondrial precursor MKGILLNGYGESLDLLEYKTDLPVPKPIKSQVLIKIHSTSINPLDNVMRK 1488 CLOAB Clostridium acetobutylicum ADHB Q04945 NADH-dependent butanol dehydrogenase B MVDFEYSIPTRIFFGKDKINVLGRELKKYGSKVLIVYGGGSIKRNGIYDK network, node B is connected to TWO nodes: AB and C AB A B C same example, using property elements to indicate a "depth" value for marine organisms AB A 1200 B 2300 C 200 same example, using property elements to indicate a "depth" value for marine organisms by using id refs in order to have property elements outside of the tree topology AB A B C 1200 2300 200 monitor lizards a pylogeny of some monitor lizards 8556 Varanus genus http://www.embl-heidelberg.de/~uetz/families/Varanidae.html 62046 Varanus niloticus Nile monitor species Africa Odatria subgenus 169855 Varanus storri Storr's monitor species Australia 62053 Varanus timorensis Timor monitor species Asia A tree with phylogeographic information A Hirschweg, Winterthur, Switzerland 47.481277 8.769303 472 B Nagoya, Aichi, Japan 35.155904 136.915863 10 C ETH Zürich 47.376334 8.548108 452 D San Diego 32.880933 -117.217543 104 A tree with date information A Silurian 425 B Devonian 320 C Ediacaran 600 570 630 Using another XML language to store an alignment A B C acgtcgcggcccgtggaagtcctctcct aggtcgcggcctgtggaagtcctctcct taaatcgc--cccgtgg-agtccc-cct bio-1.4.3.0001/test/data/phyloxml/made_up.xml0000644000004100000410000001357412200110570020575 0ustar www-datawww-data testing confidence 89 0.71 b 0.2 c 0.9 0.71 testing polygon A Polygon from random points 47.481277 8.769303 472 35.155904 136.915863 10 47.376334 8.548108 452 40.481277 8.769303 42 25.155904 136.915863 10 47.376334 7.548108 452 B testing references A Phyutility: a phyloinformatics tool for trees, alignments and molecular data A Example of domain. Taken from apaf.mxl 22_MOUSE 0.05998 MOUSE murine vermin alcohol dehydrogenase 0.99 1200 CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 dollo_on_domains__cofilin_e1/o_tol_332 [Method: Dollo parsimony on domain presence/absence] [Date: 2008/08/20 18:23:46] [Cost: 4] [Gains: 2] [Losses: 2] [Unchanged: 1314] [Parameters: E-value: 0.1, Cutoff-scores-file: not-set, Max-overlap: 0, Engulfing-overlaps: not-allowed, Ignore-dufs: false] 4.0 cellular_organisms Neomura Eukaryota Cofilin_ADF Gelsolin Cofilin_ADF Gelsolin bio-1.4.3.0001/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml0000644000004100000410000000355512200110570025150 0ustar www-datawww-data Mollusca Gastropoda Orthogastropoda Vetigastropoda Haliotoidea Haliotidae Sulculus Sulculus diversicolor Eogastropoda Docoglossa Bivalvia Heteroconchia bio-1.4.3.0001/test/data/phyloxml/apaf.xml0000644000004100000410000011333312200110570020064 0ustar www-datawww-data 0.18105 89.0 0.07466 32.0 0.26168 100.0 0.22058 89.0 0.28901 100.0 0.06584 100.0 0.02309 43.0 0.0746 100.0 0.02365 88.0 22_MOUSE 0.05998 MOUSE CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 Apaf-1_HUMAN 0.01825 HUMAN CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 12_CANFA 0.04683 CANFA CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 11_CHICK 0.15226 CHICK CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 16_XENLA 0.4409 XENLA CARD NB-ARC NACHT WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 0.17031 100.0 0.10929 100.0 14_FUGRU 0.02255 FUGRU CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 15_TETNG 0.09478 TETNG CARD NB-ARC WD40 WD40 WD40 17_BRARE 0.1811 BRARE CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 0.01594 53.0 0.10709 68.0 1_BRAFL 0.26131 BRAFL CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 18_NEMVE 0.38014 NEMVE CARD CARD NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 23_STRPU 0.48179 STRPU NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 0.34475 100.0 26_STRPU 0.36374 STRPU Death NB-ARC WD40 25_STRPU 0.33137 STRPU Death NB-ARC WD40 WD40 Adeno_VII 1.31498 100.0 CED4_CAEEL 0.13241 CAEEL CARD NB-ARC 31_CAEBR 0.04777 CAEBR CARD NB-ARC 0.13172 45.0 0.24915 95.0 0.76898 100.0 28_DROPS 0.1732 DROPS NB-ARC Dark_DROME 0.18863 DROME NB-ARC 29_AEDAE 0.86398 AEDAE NB-ARC 30_TRICA 0.97698 TRICA CARD NB-ARC WD40 0.18105 89.0 0.15891 64.0 0.54836 100.0 0.09305 46.0 0.21648 61.0 0.93134 100.0 34_BRAFL 0.093 BRAFL NB-ARC 35_BRAFL 0.08226 BRAFL DED NB-ARC 8_BRAFL 0.58563 BRAFL NB-ARC 0.28437 84.0 20_NEMVE 0.71946 NEMVE DED PAAD_DAPIN NB-ARC TPR_1 TPR_2 TPR_2 21_NEMVE 0.9571 NEMVE CARD Death NB-ARC TPR_1 RVT_1 9_BRAFL 1.09612 BRAFL NB-ARC BTAD TPR_1 TPR_1 TPR_2 TPR_2 TPR_3 TPR_4 Death LTXXQ Death 0.34914 98.0 0.22189 95.0 3_BRAFL 0.48766 BRAFL NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 Collagen 2_BRAFL 0.65293 BRAFL NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 19_NEMVE 0.57144 NEMVE DED NB-ARC WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 WD40 SGL WD40 WD40 WD40 0.43438 92.0 0.92214 100.0 37_BRAFL 0.21133 BRAFL CARD CARD NB-ARC NACHT 36_BRAFL 0.16225 BRAFL CARD NB-ARC 33_BRAFL 0.8363 BRAFL Death NB-ARC bio-1.4.3.0001/test/data/meme/0000755000004100000410000000000012200110570015476 5ustar www-datawww-databio-1.4.3.0001/test/data/meme/meme.out0000644000004100000410000000027612200110570017157 0ustar www-datawww-data******************************************************************************** MEME - Motif discovery tool ******************************************************************************** bio-1.4.3.0001/test/data/meme/mast.out0000644000004100000410000000104412200110570017172 0ustar www-datawww-data# All non-overlapping hits in all sequences. # sequence_name motif hit_start hit_end hit_p-value gi|33357914|pdb|1P85|M 6 54 74 3.70e-05 gi|14488684|pdb|1IHO|A 2 195 215 3.12e-05 gi|2624640|pdb|1HXQ|B 1 173 220 4.27e-05 gi|9954928|pdb|1EYZ|A 6 7 27 2.72e-05 gi|9954928|pdb|1EYZ|A 10 311 325 9.75e-05 gi|28948818|pdb|1NEK|A 2 29 49 8.96e-06 gi|85544130|pdb|2AVU|E 8 112 140 4.25e-05 gi|83754486|pdb|2C27|A 6 42 62 2.72e-05 gi|18655936|pdb|1KP9|A 8 92 120 3.96e-05 gi|88192540|pdb|2C4N|A 8 178 206 5.42e-05 gi|157831485|pdb|1IOV|A 3 130 150 6.63e-07 bio-1.4.3.0001/test/data/meme/mast0000644000004100000410000000000012200110570016353 0ustar www-datawww-databio-1.4.3.0001/test/data/meme/db0000644000004100000410000000000012200110570015774 0ustar www-datawww-databio-1.4.3.0001/test/data/paml/0000755000004100000410000000000012200110570015504 5ustar www-datawww-databio-1.4.3.0001/test/data/paml/codeml/0000755000004100000410000000000012200110570016747 5ustar www-datawww-databio-1.4.3.0001/test/data/paml/codeml/models/0000755000004100000410000000000012200110570020232 5ustar www-datawww-databio-1.4.3.0001/test/data/paml/codeml/models/aa.aln0000644000004100000410000000254712200110570021317 0ustar www-datawww-dataCLUSTAL 2.0.9 multiple sequence alignment PITG_23265T0 MKSQAFAQEEPVLWTDIHHGARFGDGCNRRVIERQLTYALADKRMPTKTKGFQKGRCSCG PITG_23253T0 ----------------MIHGARVGDGCNRRDIERQLTYALADKRMPTKTKGFKKERCSCG PITG_23257T0 --------------------------------MRLLTSG-----CQRRPKDLKKRRCSCG PITG_23264T0 --------------------------------------------MPTKTKGFQKGRCSCG PITG_23267T0 --------------------------------------------MPTKTKGFQKGRCSCG PITG_23293T0 --------------------------------------------MPTKTKGFQKGRCSCG :.*.::* ***** PITG_23265T0 NDQGKCFLYQSVDASPSSDAFLCLEDATKGFFHEVHQCKCDEGNLSLDVSKQERSKAAT- PITG_23253T0 NDQGKCFLYQPADGSPSEDAFLCLEDATKGFFHEVHQCKCVEGNLSLDVFEQERLRLLVP PITG_23257T0 NDQGKCFLYQPADGPPSGDAFLCLEDATKGFFHEVHQCKCVEGNLSLDVFEQERLKAAGP PITG_23264T0 NDQGKCFLYQSADGSPFGDAFLCLEDATKGFFHEVHQCKCVEGNLSLDVFKQERSKAAGP PITG_23267T0 NDQGKCFLYQSVDASPSSDAFLCLEDATKGFFHEVHQCKCDEGNLSLDVSKQERSKAAT- PITG_23293T0 NDQGKCFLYQSVDASPSSDAFLCLEDATKGFFHEVHQCKCDEGNLSLSDS---------- **********..*..* ********************** ******. PITG_23265T0 ---YSFCIARVR-- PITG_23253T0 -TTYSSYIARA--- PITG_23257T0 ---IHLILHEPSDG PITG_23264T0 DDLFILCCTSPSDG PITG_23267T0 ---YSFCIARVK-- PITG_23293T0 -------------- bio-1.4.3.0001/test/data/paml/codeml/models/alignment.phy0000644000004100000410000000476012200110570022741 0ustar www-datawww-data 6 402 PITG_23265T0 ATGAAATCGCAAGCTTTCGCCCAGGAGGAACCTGTGCTATGGACGGATATACATCATGGG GCACGCTTTGGCGATGGATGCAATCGACGCGTTATTGAGCGGCAATTGACGTATGCGCTT GCTGACAAGCGAATGCCAACGAAGACCAAAGGATTTCAGAAAGGAAGGTGCTCTTGCGGT AATGATCAAGGGAAGTGCTTTCTGTATCAATCTGTAGACGCGTCTCCGTCTAGTGATGCA TTTCTGTGTCTGGAAGATGCGACGAAGGGCTTCTTTCACGAGGTTCATCAGTGTAAGTGT GACGAAGGGAATCTATCACTGGACGTTTCCAAGCAAGAGCGGTCGAAGGCTGCTACC--- ---------TATTCATTTTGTATTGCACGAGTCAGG------ PITG_23253T0 ------------------------------------------------ATGATTCATGGG GCACGGGTTGGCGATGGATGCAATCGACGCGATATTGAGCGGCAATTGACGTATGCGCTA GCTGACAAGCGGATGCCAACGAAGACCAAAGGATTTAAGAAAGAAAGGTGCTCTTGCGGT AATGATCAAGGGAAGTGTTTTCTGTATCAGCCTGCAGACGGGTCTCCGTCCGAAGATGCA TTTCTGTGTCTGGAAGATGCGACGAAGGGCTTCTTTCACGAGGTTCATCAGTGTAAATGC GTCGAAGGGAATCTATCACTGGACGTTTTCGAGCAAGAGCGGTTAAGGCTGCTGGTCCCG ---ACGACCTATTCATCTTATATTGCACGAGCC--------- PITG_23257T0 ------------------------------------------------------------ ------------------------------------ATGCGCTTGCTGACAAGCGGA--- ------------TGCCAACGAAGACCCAAGGATTTAAAGAAAAGAAGGTGCTCTTGCGGT AATGATCAAGGGAAGTGTTTTCTGTATCAGCCTGCAGACGGGCCTCCATCCGGAGATGCA TTTCTGTGTCTGGAAGATGCGACGAAGGGCTTCTTTCACGAGGTTCATCAGTGTAAATGC GTCGAAGGGAATCTATCACTGGACGTTTTCGAGCAAGAGCGGTTGAAGGCTGCTGGACCT ---------ATTCATCTTATATTGCACGAGCCTAGTGACGGT PITG_23264T0 ------------------------------------------------------------ ------------------------------------------------------------ ------------ATGCCAACGAAGACCAAAGGATTTCAGAAAGGAAGGTGCTCTTGCGGT AATGATCAAGGAAAGTGCTTTCTGTATCAATCTGCAGACGGGTCTCCGTTCGGAGATGCA TTTCTGTGTCTGGAAGATGCGACGAAGGGCTTCTTTCACGAGGTTCATCAGTGTAAATGC GTCGAAGGGAATCTATCACTGGACGTTTTCAAGCAAGAGCGGTCGAAGGCTGCTGGTCCC GACGACCTATTCATCTTGTGTTGCACGAGCCCAAGTGACGGC PITG_23267T0 ------------------------------------------------------------ ------------------------------------------------------------ ------------ATGCCAACGAAGACCAAAGGATTTCAGAAAGGAAGGTGCTCTTGCGGT AATGATCAAGGGAAGTGCTTTCTGTATCAATCTGTAGACGCGTCTCCGTCTAGTGATGCA TTTCTGTGTCTGGAAGATGCGACGAAGGGCTTCTTTCACGAGGTTCATCAGTGTAAGTGT GACGAAGGGAATCTATCACTGGACGTTTCCAAGCAAGAGCGGTCGAAGGCTGCTACC--- ---------TATTCATTTTGTATTGCACGAGTCAAG------ PITG_23293T0 ------------------------------------------------------------ ------------------------------------------------------------ ------------ATGCCAACGAAGACCAAAGGATTTCAGAAAGGAAGGTGCTCTTGCGGT AATGATCAAGGGAAGTGCTTTCTGTATCAATCTGTAGACGCGTCTCCGTCTAGTGATGCA TTTCTGTGTCTGGAAGATGCGACGAAGGGCTTCTTTCACGAGGTTCATCAGTGTAAGTGC GACGAAGGGAATCTATCACTGTCAGACTCC------------------------------ ------------------------------------------ bio-1.4.3.0001/test/data/paml/codeml/models/results0-3.txt0000644000004100000410000004760012200110570022723 0ustar www-datawww-data seed used = 727791417 6 402 PITG_23265T0 ATG AAA TCG CAA GCT TTC GCC CAG GAG GAA CCT GTG CTA TGG ACG GAT ATA CAT CAT GGG GCA CGC TTT GGC GAT GGA TGC AAT CGA CGC GTT ATT GAG CGG CAA TTG ACG TAT GCG CTT GCT GAC AAG CGA ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGC TTT CTG TAT CAA TCT GTA GAC GCG TCT CCG TCT AGT GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAG TGT GAC GAA GGG AAT CTA TCA CTG GAC GTT TCC AAG CAA GAG CGG TCG AAG GCT GCT ACC --- --- --- --- TAT TCA TTT TGT ATT GCA CGA GTC AGG --- --- PITG_23253T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG ATT CAT GGG GCA CGG GTT GGC GAT GGA TGC AAT CGA CGC GAT ATT GAG CGG CAA TTG ACG TAT GCG CTA GCT GAC AAG CGG ATG CCA ACG AAG ACC AAA GGA TTT AAG AAA GAA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGT TTT CTG TAT CAG CCT GCA GAC GGG TCT CCG TCC GAA GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAA TGC GTC GAA GGG AAT CTA TCA CTG GAC GTT TTC GAG CAA GAG CGG TTA AGG CTG CTG GTC CCG --- ACG ACC TAT TCA TCT TAT ATT GCA CGA GCC --- --- --- PITG_23257T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CGC TTG CTG ACA AGC GGA --- --- --- --- --- TGC CAA CGA AGA CCC AAG GAT TTA AAG AAA AGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGT TTT CTG TAT CAG CCT GCA GAC GGG CCT CCA TCC GGA GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAA TGC GTC GAA GGG AAT CTA TCA CTG GAC GTT TTC GAG CAA GAG CGG TTG AAG GCT GCT GGA CCT --- --- --- ATT CAT CTT ATA TTG CAC GAG CCT AGT GAC GGT PITG_23264T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGA AAG TGC TTT CTG TAT CAA TCT GCA GAC GGG TCT CCG TTC GGA GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAA TGC GTC GAA GGG AAT CTA TCA CTG GAC GTT TTC AAG CAA GAG CGG TCG AAG GCT GCT GGT CCC GAC GAC CTA TTC ATC TTG TGT TGC ACG AGC CCA AGT GAC GGC PITG_23267T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGC TTT CTG TAT CAA TCT GTA GAC GCG TCT CCG TCT AGT GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAG TGT GAC GAA GGG AAT CTA TCA CTG GAC GTT TCC AAG CAA GAG CGG TCG AAG GCT GCT ACC --- --- --- --- TAT TCA TTT TGT ATT GCA CGA GTC AAG --- --- PITG_23293T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGC TTT CTG TAT CAA TCT GTA GAC GCG TCT CCG TCT AGT GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAG TGC GAC GAA GGG AAT CTA TCA CTG TCA GAC TCC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- Printing out site pattern counts 6 363 P PITG_23265T0 --- --- --- --- --- --- AAA AAA AAA AAG AAG AAG AAG AAG AAG AAT AAT ACC ACC ACG ACG ACG ACG AGG AGG AGT ATA ATG ATG ATT ATT CAA CAA CAA CAA CAA CAC CAG CAG CAG CAT CAT CAT CCA CCG CCT CGA CGA CGA CGC CGC CGG CGG CTA CTA CTG CTT GAA GAA GAC GAC GAC GAC GAG GAG GAG GAG GAT GAT GAT GCA GCA GCA GCC GCG GCG GCG GCT GCT GCT GGA GGA GGA GGC GGC GGG GGG GGG GGT GTA GTC GTG GTT GTT GTT TAT TAT TAT TCA TCA TCC TCG TCG TCT TCT TCT TCT TGC TGC TGC TGG TGT TGT TGT TTC TTC TTG TTT TTT TTT TTT PITG_23253T0 --- --- --- ACC ACG CCG --- AAA AAA AAA AAG AAG AAG AGG GAG AAT AAT ACC GTC --- ACG ACG ACG --- AGG GAA ATG --- ATG ATT ATT --- CAA CAA CAA CAG CAC --- AAG CAG ATT CAT CAT CCA CCG --- CGA CGA CGG CGC CGG CGG CGG --- CTA CTG CTA --- GAA GAC GAC GAC GTC --- GAG GAG GAG --- GAT GAT GCA GCA GCA --- GCG GCG GGG --- CTG GCT GAA GGA GGA GGC GGC GGG GGG GGG GGT GCA GCC --- GAT GTT GTT TAT TAT TAT TCA TCA TTC --- TTA CCT TCC TCT TCT TGC TGC TGT --- TAT TGC TGT --- TTC TTG GTT TCT TTT TTT PITG_23257T0 --- GAC GGT --- --- CCT --- AAA AAG AAA --- AAG AGA AAG GAG --- AAT CCC GGA --- ACA ACG CGA AGT AGG GGA --- --- TGC --- TTG --- CAA CAA TTG CAG CAC --- AAG CAG --- --- CAT CAA CCA --- --- GAG --- --- --- CGC CGG --- CTA CTG --- --- GAA --- GAC GAC GTC --- ATG GAG GAG --- --- GAT --- CAC GCA --- GCG GGA GGG --- GCT --- AGA --- GAT --- GGC --- GGG GGG GGT GCA CCT --- --- GTT GTT AGC ATT TAT CAT TCA TTC --- TTG CCT TCC CCT TCT --- TGC TGT --- ATA TGC TGT --- TTC CTG --- CTT TTA TTT PITG_23264T0 GAC GAC GGC CTA GAC CCC --- AAA AAA AAA --- AAG AAG AAG AAG --- AAT ACC GGT --- --- ACG ACG AGT AGG GGA --- --- ATG --- TGC --- CAA CAA --- CAA CAC --- CAG CAG --- --- CAT CCA CCG --- --- AGC --- --- --- --- CGG --- CTA CTG --- --- GAA --- GAC GAC GTC --- --- GAG GAG --- --- GAT --- ACG GCA --- GCG --- GGG --- GCT --- GGA --- GGA --- GGC --- GGA GGG GGT GCA CCA --- --- GTT GTT --- TTC TAT ATC TCA TTC --- TCG TCT TTC TCT TCT --- TGC TGC --- TGT TGC TGT --- TTC --- --- TTG TTT TTT PITG_23267T0 --- --- --- --- --- --- --- AAA AAA AAG --- AAG AAG AAG AAG --- AAT ACC ACC --- --- ACG ACG AAG AGG AGT --- --- ATG --- ATT --- CAA CAA --- CAA CAC --- CAG CAG --- --- CAT CCA CCG --- --- CGA --- --- --- --- CGG --- CTA CTG --- --- GAA --- GAC GAC GAC --- --- GAG GAG --- --- GAT --- GCA GCA --- GCG --- GCG --- GCT --- GGA --- GGA --- GGC --- GGG GGG GGT GTA GTC --- --- GTT GTT --- TAT TAT TCA TCA TCC --- TCG TCT TCT TCT TCT --- TGC TGC --- TGT TGT TGT --- TTC --- --- TTT TTT TTT PITG_23293T0 --- --- --- --- --- --- --- AAA AAA AAG --- AAG AAG --- --- --- AAT ACC --- --- --- ACG ACG --- AGG AGT --- --- ATG --- --- --- --- CAA --- CAA CAC --- CAG CAG --- --- CAT CCA CCG --- --- --- --- --- --- --- --- --- CTA CTG --- --- GAA --- GAC TCA GAC --- --- --- GAG --- --- GAT --- --- GCA --- GCG --- GCG --- --- --- GGA --- GGA --- GGC --- GGG GGG GGT GTA --- --- --- GAC GTT --- --- TAT --- TCA TCC --- --- TCT TCT TCT TCT --- TGC TGC --- --- TGC TGT --- TTC --- --- --- TTT TTT 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 3 CODONML (in paml version 4.3, August 2009) alignment.phy Model: One dN/dS ratio for branches Codon frequency model: F3x4 Site-class models: ns = 6 ls = 134 Codon usage in sequences -------------------------------------------------------------------------------------------------------------- Phe TTT 6 4 3 4 5 4 | Ser TCT 4 3 1 3 4 4 | Tyr TAT 3 4 1 1 2 1 | Cys TGT 4 3 3 3 4 2 TTC 2 2 2 4 1 1 | TCC 1 1 1 0 1 1 | TAC 0 0 0 0 0 0 | TGC 4 4 4 5 3 4 Leu TTA 0 1 1 0 0 0 | TCA 2 2 1 1 2 2 | *** TAA 0 0 0 0 0 0 | *** TGA 0 0 0 0 0 0 TTG 1 1 3 1 0 0 | TCG 2 0 0 1 1 0 | TAG 0 0 0 0 0 0 | Trp TGG 1 0 0 0 0 0 -------------------------------------------------------------------------------------------------------------- Leu CTT 1 0 1 0 0 0 | Pro CCT 1 1 4 0 0 0 | His CAT 3 2 2 1 1 1 | Arg CGT 0 0 0 0 0 0 CTC 0 0 0 0 0 0 | CCC 0 0 1 1 0 0 | CAC 1 1 2 1 1 1 | CGC 2 1 1 0 0 0 CTA 2 2 1 2 1 1 | CCA 1 1 1 2 1 1 | Gln CAA 5 3 3 3 3 2 | CGA 3 2 1 0 1 0 CTG 4 6 5 4 4 4 | CCG 1 2 0 1 1 1 | CAG 3 2 2 2 2 2 | CGG 2 4 1 1 1 0 -------------------------------------------------------------------------------------------------------------- Ile ATT 2 3 1 0 1 0 | Thr ACT 0 0 0 0 0 0 | Asn AAT 3 3 2 2 2 2 | Ser AGT 1 0 1 1 1 1 ATC 0 0 0 1 0 0 | ACC 2 2 0 1 2 1 | AAC 0 0 0 0 0 0 | AGC 0 0 1 1 0 0 ATA 1 0 1 0 0 0 | ACA 0 0 1 0 0 0 | Lys AAA 3 3 2 3 2 2 | Arg AGA 0 0 2 0 0 0 Met ATG 2 2 1 1 1 1 | ACG 4 4 1 3 2 2 | AAG 7 5 5 5 7 4 | AGG 2 2 1 1 1 1 -------------------------------------------------------------------------------------------------------------- Val GTT 3 3 2 2 2 1 | Ala GCT 4 1 2 2 2 0 | Asp GAT 5 5 4 3 3 3 | Gly GGT 1 1 2 2 1 1 GTC 1 2 1 1 1 0 | GCC 1 1 0 0 0 0 | GAC 4 3 3 5 3 3 | GGC 2 2 1 2 1 1 GTA 1 0 0 0 1 1 | GCA 3 4 2 2 2 1 | Glu GAA 3 4 2 2 2 2 | GGA 3 2 3 4 2 2 GTG 1 0 0 0 0 0 | GCG 3 2 1 1 2 2 | GAG 4 4 4 2 2 1 | GGG 3 4 3 2 2 2 -------------------------------------------------------------------------------------------------------------- Codon position x base (3x4) table for each sequence. #1: PITG_23265T0 position 1: T:0.23438 C:0.22656 A:0.21094 G:0.32812 position 2: T:0.21094 C:0.22656 A:0.34375 G:0.21875 position 3: T:0.32031 C:0.15625 A:0.21094 G:0.31250 Average T:0.25521 C:0.20312 A:0.25521 G:0.28646 #2: PITG_23253T0 position 1: T:0.21930 C:0.23684 A:0.21053 G:0.33333 position 2: T:0.22807 C:0.21053 A:0.34211 G:0.21930 position 3: T:0.28947 C:0.16667 A:0.21053 G:0.33333 Average T:0.24561 C:0.20468 A:0.25439 G:0.29532 #3: PITG_23257T0 position 1: T:0.21277 C:0.26596 A:0.20213 G:0.31915 position 2: T:0.23404 C:0.17021 A:0.34043 G:0.25532 position 3: T:0.30851 C:0.18085 A:0.22340 G:0.28723 Average T:0.25177 C:0.20567 A:0.25532 G:0.28723 #4: PITG_23264T0 position 1: T:0.25556 C:0.20000 A:0.21111 G:0.33333 position 2: T:0.22222 C:0.20000 A:0.33333 G:0.24444 position 3: T:0.26667 C:0.24444 A:0.21111 G:0.27778 Average T:0.24815 C:0.21481 A:0.25185 G:0.28519 #5: PITG_23267T0 position 1: T:0.27381 C:0.19048 A:0.22619 G:0.30952 position 2: T:0.20238 C:0.23810 A:0.35714 G:0.20238 position 3: T:0.33333 C:0.15476 A:0.20238 G:0.30952 Average T:0.26984 C:0.19444 A:0.26190 G:0.27381 #6: PITG_23293T0 position 1: T:0.28788 C:0.19697 A:0.21212 G:0.30303 position 2: T:0.19697 C:0.22727 A:0.36364 G:0.21212 position 3: T:0.30303 C:0.18182 A:0.21212 G:0.30303 Average T:0.26263 C:0.20202 A:0.26263 G:0.27273 Sums of codon usage counts ------------------------------------------------------------------------------ Phe F TTT 26 | Ser S TCT 19 | Tyr Y TAT 12 | Cys C TGT 19 TTC 12 | TCC 5 | TAC 0 | TGC 24 Leu L TTA 2 | TCA 10 | *** * TAA 0 | *** * TGA 0 TTG 6 | TCG 4 | TAG 0 | Trp W TGG 1 ------------------------------------------------------------------------------ Leu L CTT 2 | Pro P CCT 6 | His H CAT 10 | Arg R CGT 0 CTC 0 | CCC 2 | CAC 7 | CGC 4 CTA 9 | CCA 7 | Gln Q CAA 19 | CGA 7 CTG 27 | CCG 6 | CAG 13 | CGG 9 ------------------------------------------------------------------------------ Ile I ATT 7 | Thr T ACT 0 | Asn N AAT 14 | Ser S AGT 5 ATC 1 | ACC 8 | AAC 0 | AGC 2 ATA 2 | ACA 1 | Lys K AAA 15 | Arg R AGA 2 Met M ATG 8 | ACG 16 | AAG 33 | AGG 8 ------------------------------------------------------------------------------ Val V GTT 13 | Ala A GCT 11 | Asp D GAT 23 | Gly G GGT 8 GTC 6 | GCC 2 | GAC 21 | GGC 9 GTA 3 | GCA 14 | Glu E GAA 15 | GGA 16 GTG 1 | GCG 11 | GAG 17 | GGG 16 ------------------------------------------------------------------------------ (Ambiguity data are not used in the counts.) Codon position x base (3x4) table, overall position 1: T:0.24306 C:0.22222 A:0.21181 G:0.32292 position 2: T:0.21701 C:0.21181 A:0.34549 G:0.22569 position 3: T:0.30382 C:0.17882 A:0.21181 G:0.30556 Average T:0.25463 C:0.20428 A:0.25637 G:0.28472 Codon frequencies under model, for use in evolver (TTT TTC TTA TTG ... GGG): 0.01695921 0.00998171 0.01182300 0.01705612 0.01655219 0.00974215 0.01153924 0.01664678 0.02699907 0.01589088 0.00000000 0.00000000 0.01763758 0.01038098 0.00000000 0.01773837 0.01550557 0.00912613 0.01080960 0.01559417 0.01513343 0.00890711 0.01055017 0.01521991 0.02468486 0.01452881 0.01720888 0.02482592 0.01612579 0.00949118 0.01124198 0.01621794 0.01477874 0.00869835 0.01030290 0.01486319 0.01442405 0.00848959 0.01005563 0.01450648 0.02352776 0.01384777 0.01640221 0.02366221 0.01536989 0.00904628 0.01071501 0.01545772 0.02253153 0.01326141 0.01570769 0.02266028 0.02199077 0.01294314 0.01533071 0.02211643 0.03587019 0.02111217 0.02500665 0.03607517 0.02343279 0.01379187 0.01633600 0.02356669 Nei & Gojobori 1986. dN/dS (dN, dS) (Pairwise deletion) (Note: This matrix is not used in later m.l. analysis. Use runmode = -2 for ML pairwise comparison.) PITG_23265T0 PITG_23253T0 0.7388 (0.1143 0.1547) PITG_23257T0 0.5661 (0.2657 0.4694) 0.5585 (0.2359 0.4223) PITG_23264T0 0.4754 (0.1086 0.2285) 0.4895 (0.1580 0.3229) 0.5040 (0.1709 0.3391) PITG_23267T0 -1.0000 (0.0052 0.0000) 0.7996 (0.1242 0.1553) 0.7913 (0.2544 0.3214) 0.4998 (0.1145 0.2291) PITG_23293T0 0.3307 (0.0214 0.0648) 0.6077 (0.0901 0.1483) 0.5382 (0.1653 0.3071) 0.5146 (0.0663 0.1288) 0.3307 (0.0214 0.0648) Model 0: one-ratio TREE # 1: ((((1, 2), 3), 4), 5, 6); MP score: -1 check convergence.. lnL(ntime: 9 np: 11): -1125.800375 +0.000000 7..8 8..9 9..10 10..1 10..2 9..3 8..4 7..5 7..6 0.000004 0.000004 0.000004 0.000004 0.400074 0.952614 0.445507 0.011814 0.092242 2.143108 0.585889 Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site). tree length = 1.90227 ((((1: 0.000004, 2: 0.400074): 0.000004, 3: 0.952614): 0.000004, 4: 0.445507): 0.000004, 5: 0.011814, 6: 0.092242); ((((PITG_23265T0: 0.000004, PITG_23253T0: 0.400074): 0.000004, PITG_23257T0: 0.952614): 0.000004, PITG_23264T0: 0.445507): 0.000004, PITG_23267T0: 0.011814, PITG_23293T0: 0.092242); Detailed output identifying parameters kappa (ts/tv) = 2.14311 omega (dN/dS) = 0.58589 dN & dS for each branch branch t N S dN/dS dN dS N*dN S*dS 7..8 0.000 300.3 101.7 0.5859 0.0000 0.0000 0.0 0.0 8..9 0.000 300.3 101.7 0.5859 0.0000 0.0000 0.0 0.0 9..10 0.000 300.3 101.7 0.5859 0.0000 0.0000 0.0 0.0 10..1 0.000 300.3 101.7 0.5859 0.0000 0.0000 0.0 0.0 10..2 0.400 300.3 101.7 0.5859 0.1131 0.1931 34.0 19.6 9..3 0.953 300.3 101.7 0.5859 0.2694 0.4598 80.9 46.7 8..4 0.446 300.3 101.7 0.5859 0.1260 0.2150 37.8 21.9 7..5 0.012 300.3 101.7 0.5859 0.0033 0.0057 1.0 0.6 7..6 0.092 300.3 101.7 0.5859 0.0261 0.0445 7.8 4.5 tree length for dN: 0.5379 tree length for dS: 0.9182 Time used: 0:08 Model 3: discrete (3 categories) TREE # 1: ((((1, 2), 3), 4), 5, 6); MP score: -1 lnL(ntime: 9 np: 15): -1070.964046 +0.000000 7..8 8..9 9..10 10..1 10..2 9..3 8..4 7..5 7..6 0.014562 0.000004 0.000004 0.000004 0.762597 2.721710 0.924326 0.000004 0.237433 2.658917 0.564128 0.356131 0.009283 1.982520 23.441603 Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site). tree length = 4.66064 ((((1: 0.000004, 2: 0.762597): 0.000004, 3: 2.721710): 0.000004, 4: 0.924326): 0.014562, 5: 0.000004, 6: 0.237433); ((((PITG_23265T0: 0.000004, PITG_23253T0: 0.762597): 0.000004, PITG_23257T0: 2.721710): 0.000004, PITG_23264T0: 0.924326): 0.014562, PITG_23267T0: 0.000004, PITG_23293T0: 0.237433); Detailed output identifying parameters kappa (ts/tv) = 2.65892 dN/dS (w) for site classes (K=3) p: 0.56413 0.35613 0.07974 w: 0.00928 1.98252 23.44160 dN & dS for each branch branch t N S dN/dS dN dS N*dN S*dS 7..8 0.015 296.6 105.4 2.5805 0.0058 0.0022 1.7 0.2 8..9 0.000 296.6 105.4 2.5805 0.0000 0.0000 0.0 0.0 9..10 0.000 296.6 105.4 2.5805 0.0000 0.0000 0.0 0.0 10..1 0.000 296.6 105.4 2.5805 0.0000 0.0000 0.0 0.0 10..2 0.763 296.6 105.4 2.5805 0.3029 0.1174 89.8 12.4 9..3 2.722 296.6 105.4 2.5805 1.0809 0.4189 320.5 44.2 8..4 0.924 296.6 105.4 2.5805 0.3671 0.1422 108.9 15.0 7..5 0.000 296.6 105.4 2.5805 0.0000 0.0000 0.0 0.0 7..6 0.237 296.6 105.4 2.5805 0.0943 0.0365 28.0 3.9 Naive Empirical Bayes (NEB) analysis Positively selected sites (*: P>95%; **: P>99%) (amino acids refer to 1st sequence: PITG_23265T0) Pr(w>1) post mean +- SE for w 17 I 0.988* 3.293 18 H 1.000** 17.975 23 F 0.991** 6.283 31 V 0.990** 6.051 33 E 1.000** 2.550 35 Q 0.988* 5.010 38 Y 1.000** 3.432 39 A 0.970* 2.517 45 M 1.000** 2.007 46 P 0.961* 1.914 47 T 0.989* 1.998 48 K 0.866 1.722 49 T 0.970* 1.932 51 G 0.967* 1.936 52 F 0.901 1.788 53 Q 1.000** 2.023 55 G 1.000** 2.003 71 S 1.000** 1.995 72 V 1.000** 2.230 74 A 1.000** 2.529 75 S 0.906 1.798 77 S 0.937 1.904 78 S 1.000** 19.276 101 D 1.000** 2.825 108 D 1.000** 5.270 109 V 0.890 1.816 110 S 1.000** 2.274 111 K 0.999** 2.034 115 S 1.000** 2.138 116 K 0.853 1.717 117 A 1.000** 4.916 118 A 1.000** 4.916 119 T 1.000** 23.358 122 * 1.000** 16.228 123 * 1.000** 20.300 124 Y 1.000** 3.572 125 S 1.000** 22.544 126 F 1.000** 2.265 127 C 1.000** 3.943 128 I 1.000** 13.386 129 A 1.000** 7.238 130 R 1.000** 13.002 131 V 1.000** 22.797 132 R 1.000** 10.800 Note: more than one w>1. Check rst for details Time used: 0:49 Time used: 0:49 bio-1.4.3.0001/test/data/paml/codeml/models/results7-8.txt0000644000004100000410000005256412200110570022744 0ustar www-datawww-data seed used = 727954689 6 402 PITG_23265T0 ATG AAA TCG CAA GCT TTC GCC CAG GAG GAA CCT GTG CTA TGG ACG GAT ATA CAT CAT GGG GCA CGC TTT GGC GAT GGA TGC AAT CGA CGC GTT ATT GAG CGG CAA TTG ACG TAT GCG CTT GCT GAC AAG CGA ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGC TTT CTG TAT CAA TCT GTA GAC GCG TCT CCG TCT AGT GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAG TGT GAC GAA GGG AAT CTA TCA CTG GAC GTT TCC AAG CAA GAG CGG TCG AAG GCT GCT ACC --- --- --- --- TAT TCA TTT TGT ATT GCA CGA GTC AGG --- --- PITG_23253T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG ATT CAT GGG GCA CGG GTT GGC GAT GGA TGC AAT CGA CGC GAT ATT GAG CGG CAA TTG ACG TAT GCG CTA GCT GAC AAG CGG ATG CCA ACG AAG ACC AAA GGA TTT AAG AAA GAA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGT TTT CTG TAT CAG CCT GCA GAC GGG TCT CCG TCC GAA GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAA TGC GTC GAA GGG AAT CTA TCA CTG GAC GTT TTC GAG CAA GAG CGG TTA AGG CTG CTG GTC CCG --- ACG ACC TAT TCA TCT TAT ATT GCA CGA GCC --- --- --- PITG_23257T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CGC TTG CTG ACA AGC GGA --- --- --- --- --- TGC CAA CGA AGA CCC AAG GAT TTA AAG AAA AGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGT TTT CTG TAT CAG CCT GCA GAC GGG CCT CCA TCC GGA GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAA TGC GTC GAA GGG AAT CTA TCA CTG GAC GTT TTC GAG CAA GAG CGG TTG AAG GCT GCT GGA CCT --- --- --- ATT CAT CTT ATA TTG CAC GAG CCT AGT GAC GGT PITG_23264T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGA AAG TGC TTT CTG TAT CAA TCT GCA GAC GGG TCT CCG TTC GGA GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAA TGC GTC GAA GGG AAT CTA TCA CTG GAC GTT TTC AAG CAA GAG CGG TCG AAG GCT GCT GGT CCC GAC GAC CTA TTC ATC TTG TGT TGC ACG AGC CCA AGT GAC GGC PITG_23267T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGC TTT CTG TAT CAA TCT GTA GAC GCG TCT CCG TCT AGT GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAG TGT GAC GAA GGG AAT CTA TCA CTG GAC GTT TCC AAG CAA GAG CGG TCG AAG GCT GCT ACC --- --- --- --- TAT TCA TTT TGT ATT GCA CGA GTC AAG --- --- PITG_23293T0 --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG CCA ACG AAG ACC AAA GGA TTT CAG AAA GGA AGG TGC TCT TGC GGT AAT GAT CAA GGG AAG TGC TTT CTG TAT CAA TCT GTA GAC GCG TCT CCG TCT AGT GAT GCA TTT CTG TGT CTG GAA GAT GCG ACG AAG GGC TTC TTT CAC GAG GTT CAT CAG TGT AAG TGC GAC GAA GGG AAT CTA TCA CTG TCA GAC TCC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- Printing out site pattern counts 6 363 P PITG_23265T0 --- --- --- --- --- --- AAA AAA AAA AAG AAG AAG AAG AAG AAG AAT AAT ACC ACC ACG ACG ACG ACG AGG AGG AGT ATA ATG ATG ATT ATT CAA CAA CAA CAA CAA CAC CAG CAG CAG CAT CAT CAT CCA CCG CCT CGA CGA CGA CGC CGC CGG CGG CTA CTA CTG CTT GAA GAA GAC GAC GAC GAC GAG GAG GAG GAG GAT GAT GAT GCA GCA GCA GCC GCG GCG GCG GCT GCT GCT GGA GGA GGA GGC GGC GGG GGG GGG GGT GTA GTC GTG GTT GTT GTT TAT TAT TAT TCA TCA TCC TCG TCG TCT TCT TCT TCT TGC TGC TGC TGG TGT TGT TGT TTC TTC TTG TTT TTT TTT TTT PITG_23253T0 --- --- --- ACC ACG CCG --- AAA AAA AAA AAG AAG AAG AGG GAG AAT AAT ACC GTC --- ACG ACG ACG --- AGG GAA ATG --- ATG ATT ATT --- CAA CAA CAA CAG CAC --- AAG CAG ATT CAT CAT CCA CCG --- CGA CGA CGG CGC CGG CGG CGG --- CTA CTG CTA --- GAA GAC GAC GAC GTC --- GAG GAG GAG --- GAT GAT GCA GCA GCA --- GCG GCG GGG --- CTG GCT GAA GGA GGA GGC GGC GGG GGG GGG GGT GCA GCC --- GAT GTT GTT TAT TAT TAT TCA TCA TTC --- TTA CCT TCC TCT TCT TGC TGC TGT --- TAT TGC TGT --- TTC TTG GTT TCT TTT TTT PITG_23257T0 --- GAC GGT --- --- CCT --- AAA AAG AAA --- AAG AGA AAG GAG --- AAT CCC GGA --- ACA ACG CGA AGT AGG GGA --- --- TGC --- TTG --- CAA CAA TTG CAG CAC --- AAG CAG --- --- CAT CAA CCA --- --- GAG --- --- --- CGC CGG --- CTA CTG --- --- GAA --- GAC GAC GTC --- ATG GAG GAG --- --- GAT --- CAC GCA --- GCG GGA GGG --- GCT --- AGA --- GAT --- GGC --- GGG GGG GGT GCA CCT --- --- GTT GTT AGC ATT TAT CAT TCA TTC --- TTG CCT TCC CCT TCT --- TGC TGT --- ATA TGC TGT --- TTC CTG --- CTT TTA TTT PITG_23264T0 GAC GAC GGC CTA GAC CCC --- AAA AAA AAA --- AAG AAG AAG AAG --- AAT ACC GGT --- --- ACG ACG AGT AGG GGA --- --- ATG --- TGC --- CAA CAA --- CAA CAC --- CAG CAG --- --- CAT CCA CCG --- --- AGC --- --- --- --- CGG --- CTA CTG --- --- GAA --- GAC GAC GTC --- --- GAG GAG --- --- GAT --- ACG GCA --- GCG --- GGG --- GCT --- GGA --- GGA --- GGC --- GGA GGG GGT GCA CCA --- --- GTT GTT --- TTC TAT ATC TCA TTC --- TCG TCT TTC TCT TCT --- TGC TGC --- TGT TGC TGT --- TTC --- --- TTG TTT TTT PITG_23267T0 --- --- --- --- --- --- --- AAA AAA AAG --- AAG AAG AAG AAG --- AAT ACC ACC --- --- ACG ACG AAG AGG AGT --- --- ATG --- ATT --- CAA CAA --- CAA CAC --- CAG CAG --- --- CAT CCA CCG --- --- CGA --- --- --- --- CGG --- CTA CTG --- --- GAA --- GAC GAC GAC --- --- GAG GAG --- --- GAT --- GCA GCA --- GCG --- GCG --- GCT --- GGA --- GGA --- GGC --- GGG GGG GGT GTA GTC --- --- GTT GTT --- TAT TAT TCA TCA TCC --- TCG TCT TCT TCT TCT --- TGC TGC --- TGT TGT TGT --- TTC --- --- TTT TTT TTT PITG_23293T0 --- --- --- --- --- --- --- AAA AAA AAG --- AAG AAG --- --- --- AAT ACC --- --- --- ACG ACG --- AGG AGT --- --- ATG --- --- --- --- CAA --- CAA CAC --- CAG CAG --- --- CAT CCA CCG --- --- --- --- --- --- --- --- --- CTA CTG --- --- GAA --- GAC TCA GAC --- --- --- GAG --- --- GAT --- --- GCA --- GCG --- GCG --- --- --- GGA --- GGA --- GGC --- GGG GGG GGT GTA --- --- --- GAC GTT --- --- TAT --- TCA TCC --- --- TCT TCT TCT TCT --- TGC TGC --- --- TGC TGT --- TTC --- --- --- TTT TTT 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 3 CODONML (in paml version 4.3, August 2009) alignment.phy Model: One dN/dS ratio for branches Codon frequency model: F3x4 Site-class models: ns = 6 ls = 134 Codon usage in sequences -------------------------------------------------------------------------------------------------------------- Phe TTT 6 4 3 4 5 4 | Ser TCT 4 3 1 3 4 4 | Tyr TAT 3 4 1 1 2 1 | Cys TGT 4 3 3 3 4 2 TTC 2 2 2 4 1 1 | TCC 1 1 1 0 1 1 | TAC 0 0 0 0 0 0 | TGC 4 4 4 5 3 4 Leu TTA 0 1 1 0 0 0 | TCA 2 2 1 1 2 2 | *** TAA 0 0 0 0 0 0 | *** TGA 0 0 0 0 0 0 TTG 1 1 3 1 0 0 | TCG 2 0 0 1 1 0 | TAG 0 0 0 0 0 0 | Trp TGG 1 0 0 0 0 0 -------------------------------------------------------------------------------------------------------------- Leu CTT 1 0 1 0 0 0 | Pro CCT 1 1 4 0 0 0 | His CAT 3 2 2 1 1 1 | Arg CGT 0 0 0 0 0 0 CTC 0 0 0 0 0 0 | CCC 0 0 1 1 0 0 | CAC 1 1 2 1 1 1 | CGC 2 1 1 0 0 0 CTA 2 2 1 2 1 1 | CCA 1 1 1 2 1 1 | Gln CAA 5 3 3 3 3 2 | CGA 3 2 1 0 1 0 CTG 4 6 5 4 4 4 | CCG 1 2 0 1 1 1 | CAG 3 2 2 2 2 2 | CGG 2 4 1 1 1 0 -------------------------------------------------------------------------------------------------------------- Ile ATT 2 3 1 0 1 0 | Thr ACT 0 0 0 0 0 0 | Asn AAT 3 3 2 2 2 2 | Ser AGT 1 0 1 1 1 1 ATC 0 0 0 1 0 0 | ACC 2 2 0 1 2 1 | AAC 0 0 0 0 0 0 | AGC 0 0 1 1 0 0 ATA 1 0 1 0 0 0 | ACA 0 0 1 0 0 0 | Lys AAA 3 3 2 3 2 2 | Arg AGA 0 0 2 0 0 0 Met ATG 2 2 1 1 1 1 | ACG 4 4 1 3 2 2 | AAG 7 5 5 5 7 4 | AGG 2 2 1 1 1 1 -------------------------------------------------------------------------------------------------------------- Val GTT 3 3 2 2 2 1 | Ala GCT 4 1 2 2 2 0 | Asp GAT 5 5 4 3 3 3 | Gly GGT 1 1 2 2 1 1 GTC 1 2 1 1 1 0 | GCC 1 1 0 0 0 0 | GAC 4 3 3 5 3 3 | GGC 2 2 1 2 1 1 GTA 1 0 0 0 1 1 | GCA 3 4 2 2 2 1 | Glu GAA 3 4 2 2 2 2 | GGA 3 2 3 4 2 2 GTG 1 0 0 0 0 0 | GCG 3 2 1 1 2 2 | GAG 4 4 4 2 2 1 | GGG 3 4 3 2 2 2 -------------------------------------------------------------------------------------------------------------- Codon position x base (3x4) table for each sequence. #1: PITG_23265T0 position 1: T:0.23438 C:0.22656 A:0.21094 G:0.32812 position 2: T:0.21094 C:0.22656 A:0.34375 G:0.21875 position 3: T:0.32031 C:0.15625 A:0.21094 G:0.31250 Average T:0.25521 C:0.20312 A:0.25521 G:0.28646 #2: PITG_23253T0 position 1: T:0.21930 C:0.23684 A:0.21053 G:0.33333 position 2: T:0.22807 C:0.21053 A:0.34211 G:0.21930 position 3: T:0.28947 C:0.16667 A:0.21053 G:0.33333 Average T:0.24561 C:0.20468 A:0.25439 G:0.29532 #3: PITG_23257T0 position 1: T:0.21277 C:0.26596 A:0.20213 G:0.31915 position 2: T:0.23404 C:0.17021 A:0.34043 G:0.25532 position 3: T:0.30851 C:0.18085 A:0.22340 G:0.28723 Average T:0.25177 C:0.20567 A:0.25532 G:0.28723 #4: PITG_23264T0 position 1: T:0.25556 C:0.20000 A:0.21111 G:0.33333 position 2: T:0.22222 C:0.20000 A:0.33333 G:0.24444 position 3: T:0.26667 C:0.24444 A:0.21111 G:0.27778 Average T:0.24815 C:0.21481 A:0.25185 G:0.28519 #5: PITG_23267T0 position 1: T:0.27381 C:0.19048 A:0.22619 G:0.30952 position 2: T:0.20238 C:0.23810 A:0.35714 G:0.20238 position 3: T:0.33333 C:0.15476 A:0.20238 G:0.30952 Average T:0.26984 C:0.19444 A:0.26190 G:0.27381 #6: PITG_23293T0 position 1: T:0.28788 C:0.19697 A:0.21212 G:0.30303 position 2: T:0.19697 C:0.22727 A:0.36364 G:0.21212 position 3: T:0.30303 C:0.18182 A:0.21212 G:0.30303 Average T:0.26263 C:0.20202 A:0.26263 G:0.27273 Sums of codon usage counts ------------------------------------------------------------------------------ Phe F TTT 26 | Ser S TCT 19 | Tyr Y TAT 12 | Cys C TGT 19 TTC 12 | TCC 5 | TAC 0 | TGC 24 Leu L TTA 2 | TCA 10 | *** * TAA 0 | *** * TGA 0 TTG 6 | TCG 4 | TAG 0 | Trp W TGG 1 ------------------------------------------------------------------------------ Leu L CTT 2 | Pro P CCT 6 | His H CAT 10 | Arg R CGT 0 CTC 0 | CCC 2 | CAC 7 | CGC 4 CTA 9 | CCA 7 | Gln Q CAA 19 | CGA 7 CTG 27 | CCG 6 | CAG 13 | CGG 9 ------------------------------------------------------------------------------ Ile I ATT 7 | Thr T ACT 0 | Asn N AAT 14 | Ser S AGT 5 ATC 1 | ACC 8 | AAC 0 | AGC 2 ATA 2 | ACA 1 | Lys K AAA 15 | Arg R AGA 2 Met M ATG 8 | ACG 16 | AAG 33 | AGG 8 ------------------------------------------------------------------------------ Val V GTT 13 | Ala A GCT 11 | Asp D GAT 23 | Gly G GGT 8 GTC 6 | GCC 2 | GAC 21 | GGC 9 GTA 3 | GCA 14 | Glu E GAA 15 | GGA 16 GTG 1 | GCG 11 | GAG 17 | GGG 16 ------------------------------------------------------------------------------ (Ambiguity data are not used in the counts.) Codon position x base (3x4) table, overall position 1: T:0.24306 C:0.22222 A:0.21181 G:0.32292 position 2: T:0.21701 C:0.21181 A:0.34549 G:0.22569 position 3: T:0.30382 C:0.17882 A:0.21181 G:0.30556 Average T:0.25463 C:0.20428 A:0.25637 G:0.28472 Codon frequencies under model, for use in evolver (TTT TTC TTA TTG ... GGG): 0.01695921 0.00998171 0.01182300 0.01705612 0.01655219 0.00974215 0.01153924 0.01664678 0.02699907 0.01589088 0.00000000 0.00000000 0.01763758 0.01038098 0.00000000 0.01773837 0.01550557 0.00912613 0.01080960 0.01559417 0.01513343 0.00890711 0.01055017 0.01521991 0.02468486 0.01452881 0.01720888 0.02482592 0.01612579 0.00949118 0.01124198 0.01621794 0.01477874 0.00869835 0.01030290 0.01486319 0.01442405 0.00848959 0.01005563 0.01450648 0.02352776 0.01384777 0.01640221 0.02366221 0.01536989 0.00904628 0.01071501 0.01545772 0.02253153 0.01326141 0.01570769 0.02266028 0.02199077 0.01294314 0.01533071 0.02211643 0.03587019 0.02111217 0.02500665 0.03607517 0.02343279 0.01379187 0.01633600 0.02356669 Nei & Gojobori 1986. dN/dS (dN, dS) (Pairwise deletion) (Note: This matrix is not used in later m.l. analysis. Use runmode = -2 for ML pairwise comparison.) PITG_23265T0 PITG_23253T0 0.7388 (0.1143 0.1547) PITG_23257T0 0.5661 (0.2657 0.4694) 0.5585 (0.2359 0.4223) PITG_23264T0 0.4754 (0.1086 0.2285) 0.4895 (0.1580 0.3229) 0.5040 (0.1709 0.3391) PITG_23267T0 -1.0000 (0.0052 0.0000) 0.7996 (0.1242 0.1553) 0.7913 (0.2544 0.3214) 0.4998 (0.1145 0.2291) PITG_23293T0 0.3307 (0.0214 0.0648) 0.6077 (0.0901 0.1483) 0.5382 (0.1653 0.3071) 0.5146 (0.0663 0.1288) 0.3307 (0.0214 0.0648) Model 7: beta (10 categories) TREE # 1: ((((1, 2), 3), 4), 5, 6); MP score: -1 check convergence.. lnL(ntime: 9 np: 12): -1085.258650 +0.000000 7..8 8..9 9..10 10..1 10..2 9..3 8..4 7..5 7..6 0.013157 0.000004 0.000004 0.000004 0.450945 1.280990 0.536965 0.000004 0.122969 1.933218 0.005041 0.005042 Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site). tree length = 2.40504 ((((1: 0.000004, 2: 0.450945): 0.000004, 3: 1.280990): 0.000004, 4: 0.536965): 0.013157, 5: 0.000004, 6: 0.122969); ((((PITG_23265T0: 0.000004, PITG_23253T0: 0.450945): 0.000004, PITG_23257T0: 1.280990): 0.000004, PITG_23264T0: 0.536965): 0.013157, PITG_23267T0: 0.000004, PITG_23293T0: 0.122969); Detailed output identifying parameters kappa (ts/tv) = 1.93322 Parameters in M7 (beta): p= 0.00504 q= 0.00504 dN/dS (w) for site classes (K=10) p: 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 w: 0.00000 0.00000 0.00000 0.00000 0.00000 1.00000 1.00000 1.00000 1.00000 1.00000 dN & dS for each branch branch t N S dN/dS dN dS N*dN S*dS 7..8 0.013 302.2 99.8 0.5000 0.0035 0.0070 1.1 0.7 8..9 0.000 302.2 99.8 0.5000 0.0000 0.0000 0.0 0.0 9..10 0.000 302.2 99.8 0.5000 0.0000 0.0000 0.0 0.0 10..1 0.000 302.2 99.8 0.5000 0.0000 0.0000 0.0 0.0 10..2 0.451 302.2 99.8 0.5000 0.1204 0.2408 36.4 24.0 9..3 1.281 302.2 99.8 0.5000 0.3420 0.6841 103.4 68.3 8..4 0.537 302.2 99.8 0.5000 0.1434 0.2868 43.3 28.6 7..5 0.000 302.2 99.8 0.5000 0.0000 0.0000 0.0 0.0 7..6 0.123 302.2 99.8 0.5000 0.0328 0.0657 9.9 6.6 Naive Empirical Bayes (NEB) analysis Time used: 2:02 Model 8: beta&w>1 (11 categories) TREE # 1: ((((1, 2), 3), 4), 5, 6); MP score: -1 check convergence.. lnL(ntime: 9 np: 14): -1073.575282 +0.000000 7..8 8..9 9..10 10..1 10..2 9..3 8..4 7..5 7..6 0.013828 0.000004 0.000004 0.000004 0.603503 2.093442 0.753012 0.000004 0.185207 2.435233 0.886216 0.005000 0.007538 9.541689 Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site). tree length = 3.64901 ((((1: 0.000004, 2: 0.603503): 0.000004, 3: 2.093442): 0.000004, 4: 0.753012): 0.013828, 5: 0.000004, 6: 0.185207); ((((PITG_23265T0: 0.000004, PITG_23253T0: 0.603503): 0.000004, PITG_23257T0: 2.093442): 0.000004, PITG_23264T0: 0.753012): 0.013828, PITG_23267T0: 0.000004, PITG_23293T0: 0.185207); Detailed output identifying parameters kappa (ts/tv) = 2.43523 Parameters in M8 (beta&w>1): p0= 0.88622 p= 0.00500 q= 0.00754 (p1= 0.11378) w= 9.54169 dN/dS (w) for site classes (K=11) p: 0.08862 0.08862 0.08862 0.08862 0.08862 0.08862 0.08862 0.08862 0.08862 0.08862 0.11378 w: 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.00000 1.00000 1.00000 1.00000 9.54169 dN & dS for each branch branch t N S dN/dS dN dS N*dN S*dS 7..8 0.014 298.1 103.9 1.4402 0.0050 0.0035 1.5 0.4 8..9 0.000 298.1 103.9 1.4402 0.0000 0.0000 0.0 0.0 9..10 0.000 298.1 103.9 1.4402 0.0000 0.0000 0.0 0.0 10..1 0.000 298.1 103.9 1.4402 0.0000 0.0000 0.0 0.0 10..2 0.604 298.1 103.9 1.4402 0.2184 0.1517 65.1 15.8 9..3 2.093 298.1 103.9 1.4402 0.7577 0.5261 225.9 54.7 8..4 0.753 298.1 103.9 1.4402 0.2725 0.1892 81.2 19.7 7..5 0.000 298.1 103.9 1.4402 0.0000 0.0000 0.0 0.0 7..6 0.185 298.1 103.9 1.4402 0.0670 0.0465 20.0 4.8 Naive Empirical Bayes (NEB) analysis Positively selected sites (*: P>95%; **: P>99%) (amino acids refer to 1st sequence: PITG_23265T0) Pr(w>1) post mean +- SE for w 18 H 0.869 8.422 78 S 0.974* 9.316 119 T 0.999** 9.536 122 * 0.842 8.195 123 * 0.885 8.558 125 S 0.988* 9.439 128 I 0.783 7.684 129 A 0.611 6.221 130 R 0.695 6.935 131 V 0.990** 9.456 Bayes Empirical Bayes (BEB) analysis (Yang, Wong & Nielsen 2005. Mol. Biol. Evol. 22:1107-1118) Positively selected sites (*: P>95%; **: P>99%) (amino acids refer to 1st sequence: PITG_23265T0) Pr(w>1) post mean +- SE for w 17 I 0.672 2.847 +- 1.967 18 H 0.964* 4.162 +- 1.684 23 F 0.734 3.145 +- 2.009 31 V 0.720 3.084 +- 2.016 33 E 0.598 2.528 +- 1.799 35 Q 0.717 3.057 +- 2.015 38 Y 0.736 3.101 +- 1.835 45 M 0.667 2.730 +- 1.627 55 G 0.503 2.123 +- 1.635 72 V 0.845 3.535 +- 1.694 74 A 0.853 3.585 +- 1.711 78 S 0.996** 4.292 +- 1.597 101 D 0.888 3.756 +- 1.701 108 D 0.686 2.924 +- 1.897 110 S 0.870 3.649 +- 1.678 115 S 0.597 2.494 +- 1.729 117 A 0.626 2.689 +- 1.911 118 A 0.626 2.689 +- 1.911 119 T 1.000** 4.306 +- 1.589 122 * 0.970* 4.178 +- 1.660 123 * 0.963* 4.161 +- 1.690 124 Y 0.864 3.641 +- 1.717 125 S 0.997** 4.295 +- 1.597 126 F 0.754 3.133 +- 1.721 127 C 0.931 3.955 +- 1.659 128 I 0.972* 4.179 +- 1.642 129 A 0.957* 4.096 +- 1.650 130 R 0.941 4.049 +- 1.710 131 V 0.998** 4.298 +- 1.595 132 R 0.876 3.756 +- 1.803 The grid p0: 0.050 0.150 0.250 0.350 0.450 0.550 0.650 0.750 0.850 0.950 p : 0.100 0.300 0.500 0.700 0.900 1.100 1.300 1.500 1.700 1.900 q : 0.100 0.300 0.500 0.700 0.900 1.100 1.300 1.500 1.700 1.900 ws: 1.500 2.500 3.500 4.500 5.500 6.500 7.500 8.500 9.500 10.500 Posterior on the grid p0: 0.000 0.000 0.000 0.000 0.000 0.017 0.280 0.515 0.185 0.002 p : 0.640 0.273 0.070 0.014 0.002 0.000 0.000 0.000 0.000 0.000 q : 0.010 0.146 0.108 0.102 0.100 0.101 0.103 0.107 0.110 0.114 ws: 0.000 0.149 0.398 0.223 0.104 0.054 0.031 0.019 0.013 0.009 Time used: 5:58 Time used: 5:58 bio-1.4.3.0001/test/data/paml/codeml/models/aa.ph0000644000004100000410000000025312200110570021144 0ustar www-datawww-data( ( ( ( PITG_23265T0:0.50488, PITG_23253T0:0.44824) :0.06966, PITG_23257T0:0.34831) :0.21826, PITG_23264T0:0.10596) :0.03467, PITG_23267T0:0.05518, PITG_23293T0:0.08545); bio-1.4.3.0001/test/data/paml/codeml/models/aa.dnd0000644000004100000410000000025512200110570021304 0ustar www-datawww-data( ( ( PITG_23265T0:0.01382, PITG_23267T0:-0.00191) :0.06405, PITG_23293T0:-0.02454) :0.04698, ( PITG_23253T0:0.11780, PITG_23257T0:0.18007) :0.04032, PITG_23264T0:0.09963); bio-1.4.3.0001/test/data/paml/codeml/control_file.txt0000644000004100000410000000063112200110570022167 0ustar www-datawww-dataseqfile = abglobin.aa treefile = abglobin.trees outfile = output.txt clock = 0 ncatG = 8 noisy = 0 fix_omega = 0 aaDist = 0 fix_kappa = 1 getSE = 0 runmode = 0 omega = 0.4 aaRatefile = wag.dat method = 0 seqtype = 2 NSsites = 0 fix_alpha = 0 RateAncestor = 1 verbose = 1 icode = 0 model = 1 alpha = 0.5 Small_Diff = 5.0e-06 CodonFreq = 2 cleandata = 1 ndata = 1 Mgene = 0 kappa = 2 Malpha = 0 fix_blength = 0 bio-1.4.3.0001/test/data/paml/codeml/rates0000644000004100000410000001664012200110570020017 0ustar www-datawww-data Estimated rates for sites from AAML. (((FYDL177C: 0.089767, PYDL177C: 0.044687): 0.021205, MYDL177C: 0.096521): 0.108719, BYDL177C: 0.058006); Frequencies and rates for categories (K=8) rate: 0.03346 0.14763 0.30690 0.51693 0.79689 1.18983 1.81043 3.19794 freq: 0.12500 0.12500 0.12500 0.12500 0.12500 0.12500 0.12500 0.12500 Site Freq Data Rate (posterior mean & category) 1 1 ***M 1.000 5 2 2 ***H 1.000 1 3 2 ***H 1.000 1 4 4 ***L 1.000 7 5 2 ***S 1.000 1 6 1 ***P 1.000 7 7 1 ***I 1.000 6 8 3 ***K 1.000 1 9 3 ***K 1.000 1 10 2 ***T 1.000 8 11 4 ***L 1.000 7 12 1 ***Y 1.000 8 13 3 ***K 1.000 1 14 1 ***A 1.000 7 15 4 ***L 1.000 7 16 1 ***D 1.000 2 17 1 ***R 1.000 8 18 1 ***N 1.000 1 19 1 ***Q 1.000 7 20 2 ***C 1.000 3 21 4 ***L 1.000 7 22 2 ***T 1.000 8 23 2 ***S 1.000 1 24 1 ***V 1.000 8 25 1 ***F 1.000 8 26 2 ***C 1.000 3 27 2 ***E 1.000 8 28 2 ***E 1.000 8 29 3 MMMM 0.638 1 30 1 SSSN 1.473 7 31 15 KKKK 0.645 1 32 6 NNNN 0.624 1 33 7 VVVV 0.709 1 34 8 GGGG 0.828 1 35 1 KRKK 1.510 7 36 13 LLLL 0.779 1 37 7 VVVV 0.709 1 38 1 KKKR 1.534 8 39 3 VIII 1.539 8 40 3 WWWW 0.861 1 41 6 NNNN 0.624 1 42 7 EEEE 0.654 1 43 7 SSSS 0.629 1 44 7 EEEE 0.654 1 45 7 VVVV 0.709 1 46 13 LLLL 0.779 1 47 2 VIVI 2.233 8 48 3 DDDD 0.662 1 49 8 RRRR 0.651 1 50 15 KKKK 0.645 1 51 7 SSSS 0.629 1 52 15 KKKK 0.645 1 53 3 FFFF 0.813 1 54 9 QQQQ 0.598 1 55 9 AAAA 0.681 1 56 8 RRRR 0.651 1 57 4 CCCC 0.845 1 58 4 CCCC 0.845 1 59 1 TTPT 1.553 8 60 13 LLLL 0.779 1 61 1 RQQQ 1.470 7 62 1 NDNN 1.472 7 63 9 QQQQ 0.598 1 64 15 KKKK 0.645 1 65 3 DDDD 0.662 1 66 4 IIII 0.671 1 67 1 SPPP 1.716 8 68 1 SFSF 2.387 8 69 3 VIII 1.539 8 70 13 LLLL 0.779 1 71 1 EQQQ 1.426 7 72 1 EDEE 1.509 7 73 13 LLLL 0.779 1 74 1 VVTT 1.653 8 75 9 QQQQ 0.598 1 76 2 SNNN 1.463 7 77 6 NNNN 0.624 1 78 15 KKKK 0.645 1 79 1 SASS 1.481 7 80 7 VVVV 0.709 1 81 7 SSSS 0.629 1 82 15 KKKK 0.645 1 83 9 AAAA 0.681 1 84 7 SSSS 0.629 1 85 4 HHHH 0.664 1 86 3 MMMM 0.638 1 87 4 HHHH 0.664 1 88 3 MMMM 0.638 1 89 2 YYYY 0.769 1 90 9 AAAA 0.681 1 91 3 WWWW 0.861 1 92 8 RRRR 0.651 1 93 5 TTTT 0.647 1 94 9 AAAA 0.681 1 95 7 EEEE 0.654 1 96 2 VIVI 2.233 8 97 1 PASS 2.257 8 98 2 SNNN 1.463 7 99 1 DEND 2.198 8 100 1 SLLL 1.779 8 101 1 NNHN 1.462 7 102 1 SFLF 2.403 8 103 9 QQQQ 0.598 1 104 1 H*** 1.000 1 105 1 G*** 1.000 1 106 2 N*** 1.000 1 107 9 QQQQ 0.598 1 108 1 DDEE 1.646 8 109 9 QQQQ 0.598 1 110 15 KKKK 0.645 1 111 15 KKKK 0.645 1 112 15 KKKK 0.645 1 113 1 NSGS 2.221 8 114 1 GSNS 2.262 8 115 15 KKKK 0.645 1 116 1 NNAT 2.357 8 117 1 HNNN 1.475 7 118 15 KKKK 0.645 1 119 1 NKSS 2.237 8 120 2 NSNN 1.464 7 121 2 NSNN 1.464 7 122 1 NNSR 2.289 8 123 2 N*** 1.000 1 124 4 HHHH 0.664 1 125 1 GAVA 2.305 8 126 1 NNND 1.485 7 127 15 KKKK 0.645 1 128 1 TSSS 1.476 7 129 1 TKRK 2.270 8 130 1 KMNK 2.247 8 131 1 IVIV 2.243 8 132 1 TITT 1.519 7 133 1 VVVM 1.621 8 134 9 QQQQ 0.598 1 135 2 PPPP 0.802 1 136 15 KKKK 0.645 1 137 6 NNNN 0.624 1 138 3 VIII 1.539 8 139 7 EEEE 0.654 1 140 9 QQQQ 0.598 1 141 8 GGGG 0.828 1 142 1 CSCC 1.771 8 143 9 AAAA 0.681 1 144 3 DDDD 0.662 1 145 4 CCCC 0.845 1 146 8 GGGG 0.828 1 147 7 EEEE 0.654 1 148 9 AAAA 0.681 1 149 1 AGGG 1.762 8 150 9 AAAA 0.681 1 151 8 GGGG 0.828 1 152 9 QQQQ 0.598 1 153 8 RRRR 0.651 1 154 13 LLLL 0.779 1 155 13 LLLL 0.779 1 156 5 TTTT 0.647 1 157 13 LLLL 0.779 1 158 13 LLLL 0.779 1 159 7 EEEE 0.654 1 160 8 RRRR 0.651 1 161 9 AAAA 0.681 1 162 6 NNNN 0.624 1 163 4 IIII 0.671 1 164 3 FFFF 0.813 1 165 6 NNNN 0.624 1 166 1 IVVV 1.581 8 167 13 LLLL 0.779 1 168 7 VVVV 0.709 1 169 4 IIII 0.671 1 170 7 VVVV 0.709 1 171 5 TTTT 0.647 1 172 8 RRRR 0.651 1 173 3 WWWW 0.861 1 174 2 YYYY 0.769 1 175 8 GGGG 0.828 1 176 8 GGGG 0.828 1 177 5 TTTT 0.647 1 178 2 PPPP 0.802 1 179 13 LLLL 0.779 1 180 8 GGGG 0.828 1 181 7 SSSS 0.629 1 182 7 SSSS 0.629 1 183 8 RRRR 0.651 1 184 3 FFFF 0.813 1 185 8 RRRR 0.651 1 186 4 HHHH 0.664 1 187 4 IIII 0.671 1 188 7 SSSS 0.629 1 189 5 TTTT 0.647 1 190 4 CCCC 0.845 1 191 9 AAAA 0.681 1 192 7 VVVV 0.709 1 193 7 EEEE 0.654 1 194 1 STTT 1.494 7 195 13 LLLL 0.779 1 196 15 KKKK 0.645 1 197 15 KKKK 0.645 1 198 8 GGGG 0.828 1 199 1 GGGE 1.784 8 200 1 FYFF 1.749 8 201 13 LLLL 0.779 1 202 1 PHPP 1.752 8 lnL = -907.674000 mean(r^)= 1.0031 var(r^)= 0.2349 Accuracy of rate prediction: corr(r^,r) = 0.4236 bio-1.4.3.0001/test/data/paml/codeml/output.txt0000644000004100000410000001273712200110570021062 0ustar www-datawww-data seed used = 552599837 Data set 1 5 285 human VLSPADKTNV KAAWGKVGAH AGEYGAEALE RMFLSFPTTK TYFPHFDLSH GSAQVKGHGK KVADALTNAV AHVDDMPNAL SALSDLHAHK LRVDPVNFKL LSHCLLVTLA AHLPAEFTPA VHASLDKFLA SVSTVLTSKY RLTPEEKSAV TALWGKVNVD EVGGEALGRL LVVYPWTQRF FESFGDLSTP DAVMGNPKVK AHGKKVLGAF SDGLAHLDNL KGTFATLSEL HCDKLHVDPE NFRLLGNVLV CVLAHHFGKE FTPPVQAAYQ KVVAGVANAL AHKYH goat-cow VLSAADKSNV KAAWGKVGGN AGAYGAEALE RMFLSFPTTK TYFPHFDLSH GSAQVKGHGE KVAAALTKAV GHLDDLPGTL SDLSDLHAHK LRVDPVNFKL LSHSLLVTLA CHLPNDFTPA VHASLDKFLA NVSTVLTSKY RLTAEEKAAV TAFWGKVKVD EVGGEALGRL LVVYPWTQRF FESFGDLSTA DAVMNNPKVK AHGKKVLDSF SNGMKHLDDL KGTFAALSEL HCDKLHVDPE NFKLLGNVLV VVLARNFGKE FTPVLQADFQ KVVAGVANAL AHRYH rabbit VLSPADKTNI KTAWEKIGSH GGEYGAEAVE RMFLGFPTTK TYFPHFDFTH GSEQIKAHGK KVSEALTKAV GHLDDLPGAL STLSDLHAHK LRVDPVNFKL LSHCLLVTLA NHHPSEFTPA VHASLDKFLA NVSTVLTSKY RLSSEEKSAV TALWGKVNVE EVGGEALGRL LVVYPWTQRF FESFGDLSSA NAVMNNPKVK AHGKKVLAAF SEGLSHLDNL KGTFAKLSEL HCDKLHVDPE NFRLLGNVLV IVLSHHFGKE FTPQVQAAYQ KVVAGVANAL AHKYH rat VLSADDKTNI KNCWGKIGGH GGEYGEEALQ RMFAAFPTTK TYFSHIDVSP GSAQVKAHGK KVADALAKAA DHVEDLPGAL STLSDLHAHK LRVDPVNFKF LSHCLLVTLA CHHPGDFTPA MHASLDKFLA SVSTVLTSKY RLTDAEKAAV NALWGKVNPD DVGGEALGRL LVVYPWTQRY FDSFGDLSSA SAIMGNPKVK AHGKKVINAF NDGLKHLDNL KGTFAHLSEL HCDKLHVDPE NFRLLGNMIV IVLGHHLGKE FTPCAQAAFQ KVVAGVASAL AHKYH marsupial VLSDADKTHV KAIWGKVGGH AGAYAAEALA RTFLSFPTTK TYFPHFDLSP GSAQIQGHGK KVADALSQAV AHLDDLPGTM SKLSDLHAHK LRVDPVNFKL LSHCLIVTLA AHLSKDLTPE VHASMDKFFA SVATVLTSKY RLTSEEKNCI TTIWSKVQVD QTGGEALGRM LVVYPWTTRF FGSFGDLSSP GAVMSNSKVQ AHGAKVLTSF GEAVKHLDNL KGTYAKLSEL HCDKLHVDPE NFKMLGNIIV ICLAEHFGKD FTPECQVAWQ KLVAGVAHAL AHKYH Printing out site pattern counts 5 126 P human VLSPADKTNV AAWGGAHAEY GAEALERMFL SPTPHFLSHA QVKGKADTNV AVDMNALANL CLALPAEFAV LSSTPESAVT ALGNVDEVQF ETPDVMGKLG ASDLANFTCR VLCVAHFEPV AAYVNK goat-cow VLSAADKSNV AAWGGGNAAY GAEALERMFL SPTPHFLSHA QVKGEAATKV GLDLGTLDNL SLCLPNDFAV LNSTAEAAVT AFGKVDEVQF ETADVMNKLD SSNMKDFACK VLVVARFEVL ADFVNR rabbit VLSPADKTNI TAWEGSHGEY GAEAVERMFL GPTPHFFTHE QIKAKSETKV GLDLGALTNL CLNHPSEFAV LNSSSESAVT ALGNVEEVQF ESANVMNKLA ASELSNFKCR VLIVSHFEQV AAYVNK rat VLSADDKTNI NCWGGGHGEY GEEALQRMFA APTSHIVSPA QVKAKADAKA DVELGALTNF CLCHPGDFAM LSSTDAAAVN ALGNPDDVQY DSASIMGKIN ANDLKNFHCR MIIVGHLECA AAFVSK marsupial VLSDADKTHV AIWGGGHAAY AAEALARTFL SPTPHFLSPA QIQGKADSQV ALDLGTMKNL CIALSKDLEV FSATSENCIT TISQVDQTTF GSPGVMSALT SGEVKNYKCK IIICAEFDEC VAWLHK 16 21 10 1 1 10 17 1 1 2 1 1 3 1 13 1 2 1 1 5 2 1 5 15 1 1 5 1 10 1 1 7 10 1 14 1 1 1 1 1 3 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 4 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 AAML (in paml version 4, June 2007) ./test/material/codeml/abglobin.aa Model: EqualInput dGamma (ncatG=8) ns = 5 ls = 285 Frequencies.. A R N D C Q E G H I L K M F P S T W Y V human 0.1263 0.0211 0.0351 0.0526 0.0105 0.0140 0.0421 0.0702 0.0632 0.0000 0.1263 0.0772 0.0105 0.0526 0.0491 0.0561 0.0561 0.0105 0.0211 0.1053 goat-cow 0.1193 0.0246 0.0421 0.0632 0.0070 0.0140 0.0386 0.0737 0.0526 0.0000 0.1298 0.0842 0.0105 0.0596 0.0351 0.0596 0.0526 0.0105 0.0175 0.1053 rabbit 0.0982 0.0211 0.0421 0.0386 0.0070 0.0175 0.0596 0.0702 0.0667 0.0140 0.1228 0.0842 0.0070 0.0561 0.0386 0.0737 0.0561 0.0105 0.0211 0.0947 rat 0.1193 0.0211 0.0386 0.0667 0.0175 0.0175 0.0316 0.0807 0.0667 0.0246 0.1123 0.0842 0.0140 0.0491 0.0386 0.0596 0.0456 0.0105 0.0211 0.0807 marsupial 0.1088 0.0175 0.0211 0.0561 0.0175 0.0281 0.0351 0.0702 0.0632 0.0281 0.1088 0.0842 0.0175 0.0491 0.0351 0.0737 0.0667 0.0140 0.0211 0.0842 Homogeneity statistic: X2 = 0.13569 G = 0.15654 Average 0.114386 0.021053 0.035789 0.055439 0.011930 0.018246 0.041404 0.072982 0.062456 0.013333 0.120000 0.082807 0.011930 0.053333 0.039298 0.064561 0.055439 0.011228 0.020351 0.094035 # constant sites: 170 (59.65%) ln Lmax (unconstrained) = -1189.106658 TREE # 1: (((3, 4), 1), 2, 5); MP score: 177 lnL(ntime: 7 np: 8): -1817.465211 +0.000000 6..7 7..8 8..3 8..4 7..1 6..2 6..5 0.033639 0.038008 0.082889 0.187866 0.055050 0.096992 0.284574 0.588710 tree length = 0.77902 (((3: 0.082889, 4: 0.187866): 0.038008, 1: 0.055050): 0.033639, 2: 0.096992, 5: 0.284574); (((rabbit: 0.082889, rat: 0.187866): 0.038008, human: 0.055050): 0.033639, goat-cow: 0.096992, marsupial: 0.284574); Detailed output identifying parameters alpha (gamma, K = 8) = 0.58871 rate: 0.01357 0.09006 0.22445 0.42476 0.71414 1.14510 1.85916 3.52875 freq: 0.12500 0.12500 0.12500 0.12500 0.12500 0.12500 0.12500 0.12500 Time used: 0:01 Data set 2 bio-1.4.3.0001/test/data/KEGG/0000755000004100000410000000000012200110570015270 5ustar www-datawww-databio-1.4.3.0001/test/data/KEGG/R00006.reaction0000644000004100000410000000133212200110570017604 0ustar www-datawww-dataENTRY R00006 Reaction NAME pyruvate:pyruvate acetaldehydetransferase (decarboxylating); 2-acetolactate pyruvate-lyase (carboxylating) DEFINITION 2-Acetolactate + CO2 <=> 2 Pyruvate EQUATION C00900 + C00011 <=> 2 C00022 COMMENT TPP-dependent enzymatic reaction (R00014+R03050) RPAIR RP: RP00440 C00022_C00900 main RP: RP05698 C00011_C00022 leave RP: RP12733 C00022_C00900 trans PATHWAY PATH: rn00770 Pantothenate and CoA biosynthesis ENZYME 2.2.1.6 ORTHOLOGY KO: K01652 acetolactate synthase I/II/III large subunit [EC:2.2.1.6] KO: K01653 acetolactate synthase I/III small subunit [EC:2.2.1.6] bio-1.4.3.0001/test/data/KEGG/map00052.pathway0000644000004100000410000000140612200110570020034 0ustar www-datawww-dataENTRY map00052 Pathway NAME Galactose metabolism CLASS Metabolism; Carbohydrate Metabolism PATHWAY_MAP map00052 Galactose metabolism MODULE M00097 UDP-glucose and UDP-galactose biosynthesis, Glc-1P/Gal-1P => UDP-Glc/UDP-Gal M00614 PTS system, N-acetylgalactosamine-specific II component M00616 PTS system, galactitol-specific II component M00618 PTS system, lactose-specific II component M00624 PTS system, galactosamine-specific II component REL_PATHWAY map00010 Glycolysis / Gluconeogenesis map00040 Pentose and glucuronate interconversions map00051 Fructose and mannose metabolism map00520 Amino sugar and nucleotide sugar metabolism bio-1.4.3.0001/test/data/KEGG/D00063.drug0000644000004100000410000000765212200110570016741 0ustar www-datawww-dataENTRY D00063 Drug NAME Tobramycin (JP15/USP); TOB; Tobracin (TN); Tobrex (TN) FORMULA C18H37N5O9 MASS 467.2591 SOURCE Streptomyces tenebrarius [TAX:1933] TARGET 16S rRNA of 30S ribosomal subunit, protein synthesis inhibitor [BR:ko03011(16S)] ACTIVITY Antibacterial REMARK Same as: C00397 Therapeutic category: 1317 6123 ATC code: J01GB01 S01AA12 COMMENT natural product PATHWAY PATH: map07021 Aminoglycosides PRODUCTS TOBI (Novartis Pharma) 94F9E516-6BF6-4E30-8DDE-8833C25C2560 TOBRAMYCIN (Bristol-Myers Squibb) 7305F9BB-622B-43C0-981A-56E2F226CFD7 TOBRAMYCIN (Hospira) C5A005B0-7B6F-4E30-DF92-9A20B1CA66A1 Tobramycin (Akorn-Strides) 49151A62-191A-4BA8-8B8C-BD8535F2FDB3 Tobramycin (Bausch and Lomb) A5693EC9-D2F7-4D45-90B0-A113C54840D7 Tobramycin (Falcon Pharma) 27E2C16E-19B0-4745-93EB-5CF99F94BB92 Tobramycin (Hospira) 4E115874-3637-4AED-B6AF-77D53A850208 Tobramycin (Hospira) EB02166C-18F6-4BE0-F493-AC89D65DA759 Tobramycin (X-Gen Pharma) A384641C-04E3-4AB5-B152-7408CD07B64D Tobramycin in Sodium Chloride (Hospira) EE907146-E4A8-4578-A9B0-C8E9790E3D55 Tobrex (Alcon Lab) 4B8716C4-0FFD-49AA-9006-A3BF5B6D19A6 Tobrex (Alcon Lab) CDD423C5-A231-47D4-BF51-00B5C29E6A60 DBLINKS CAS: 32986-56-4 PubChem: 7847131 ChEBI: 28864 DrugBank: DB00684 PDB-CCD: TOY LigandBox: D00063 NIKKAJI: J4.533K ATOM 32 1 C1y C 20.6560 -20.0968 2 C1y C 20.6560 -21.4973 3 C1y C 21.8689 -22.1975 4 C1y C 23.0818 -21.4973 5 C1y C 23.0818 -20.0968 6 O2x O 21.8689 -19.3965 7 C1b C 19.4432 -19.3965 8 O1a O 18.2473 -20.0872 9 O1a O 19.4432 -22.1975 10 N1a N 21.8689 -23.5978 11 O1a O 24.3134 -22.2085 12 O2a O 24.3134 -19.3855 13 C1y C 25.4878 -18.6963 14 C1y C 26.7056 -19.3879 15 C1x C 27.9134 -18.6791 16 C1y C 27.9035 -17.2786 17 C1y C 26.6857 -16.5869 18 C1y C 25.4779 -17.2958 19 N1a N 26.7157 -20.7965 20 N1a N 29.0779 -16.5893 21 O1a O 24.2675 -16.6084 22 O2a O 26.6757 -15.1950 23 C1y C 27.8854 -14.4851 24 O2x O 29.0946 -15.1718 25 C1y C 30.3025 -14.4631 26 C1y C 30.2926 -13.0626 27 C1x C 29.0835 -12.3758 28 C1y C 27.8755 -13.0846 29 C1b C 31.5468 -15.1693 30 N1a N 31.5569 -16.5953 31 O1a O 31.5060 -12.3503 32 N1a N 26.6567 -12.3923 BOND 34 1 1 2 1 2 2 3 1 3 3 4 1 4 4 5 1 5 5 6 1 6 1 6 1 7 1 7 1 #Up 8 7 8 1 9 2 9 1 #Down 10 3 10 1 #Up 11 4 11 1 #Down 12 5 12 1 #Down 13 13 12 1 #Down 14 13 14 1 15 14 15 1 16 15 16 1 17 16 17 1 18 17 18 1 19 13 18 1 20 14 19 1 #Up 21 16 20 1 #Up 22 18 21 1 #Up 23 17 22 1 #Down 24 23 22 1 #Down 25 23 24 1 26 24 25 1 27 25 26 1 28 26 27 1 29 27 28 1 30 23 28 1 31 25 29 1 #Up 32 29 30 1 33 26 31 1 #Down 34 28 32 1 #Down /// bio-1.4.3.0001/test/data/KEGG/G01366.glycan0000644000004100000410000000104112200110570017251 0ustar www-datawww-dataENTRY G01366 Glycan COMPOSITION (4dlyxHex)1 (GlcNAc)1 (Man)2 MASS 691.6 CLASS Glycoprotein; N-Glycan COMMENT synthetic (CCSD:2549) DBLINKS CCSD: 2549 2550 16559 25204 GlycomeDB: 5567 JCGGDB: JCGG-STR026574 NODE 4 1 GlcNAc 11.4 0 2 4dlyxHex -0.6 0 3 Man -10.6 5 4 Man -10.6 -5 EDGE 3 1 2:b1 1:4 2 3:a1 2:6 3 4:a1 2:3 /// bio-1.4.3.0001/test/data/KEGG/T00070.genome0000644000004100000410000000224712200110570017263 0ustar www-datawww-dataENTRY T00070 Complete Genome NAME atu, A.tumefaciens, AGRT5, 176299 DEFINITION Agrobacterium tumefaciens C58 ANNOTATION manual TAXONOMY TAX:176299 LINEAGE Bacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales; Rhizobiaceae; Rhizobium/Agrobacterium group; Agrobacterium DATA_SOURCE RefSeq ORIGINAL_DB UWash DISEASE Crown gall disease in plants COMMENT Originally called Agrobacterium tumefaciens C58 (U.Washington/Dupont) to distinguish from Agrobacterium tumefaciens C58 (Cereon) [GN:atc] CHROMOSOME Circular SEQUENCE RS:NC_003062 LENGTH 2841580 CHROMOSOME L (linear chromosome) SEQUENCE RS:NC_003063 LENGTH 2075577 PLASMID Ti; Circular SEQUENCE RS:NC_003065 LENGTH 214233 PLASMID AT; Circular SEQUENCE RS:NC_003064 LENGTH 542868 STATISTICS Number of nucleotides: 5674258 Number of protein genes: 5355 Number of RNA genes: 74 REFERENCE PMID:11743193 AUTHORS Wood DW, et al. TITLE The genome of the natural genetic engineer Agrobacterium tumefaciens C58. JOURNAL Science 294:2317-23 (2001) /// bio-1.4.3.0001/test/data/KEGG/hsa00790.pathway0000644000004100000410000000607012200110570020045 0ustar www-datawww-dataENTRY hsa00790 Pathway NAME Folate biosynthesis - Homo sapiens (human) CLASS Metabolism; Metabolism of Cofactors and Vitamins PATHWAY_MAP hsa00790 Folate biosynthesis MODULE M00251 Folate biosynthesis, GTP => THF [PATH:hsa00790] M00304 Methanogenesis [PATH:hsa00790] DISEASE H00167 Phenylketonuria (PKU) H00213 Hypophosphatasia DRUG D00142 Methotrexate (JP15/USP/INN) D02115 Methotrexate sodium D06238 Trimetrexate (USAN/INN) D06239 Trimetrexate glucuronate (USAN) DBLINKS GO: 0046656 ORGANISM Homo sapiens (human) [GN:hsa] GENE 2643 GCH1, DYT14, DYT5, DYT5a, GCH, GTP-CH-1, GTPCH1, HPABH4B [KO:K01495] [EC:3.5.4.16] 248 ALPI, IAP [KO:K01077] [EC:3.1.3.1] 249 ALPL, AP-TNAP, APTNAP, FLJ40094, FLJ93059, HOPS, MGC161443, MGC167935, TNAP, TNSALP [KO:K01077] [EC:3.1.3.1] 250 ALPP, ALP, FLJ61142, PALP, PLAP [KO:K01077] [EC:3.1.3.1] 251 ALPPL2, ALPG, ALPPL, GCAP [KO:K01077] [EC:3.1.3.1] 1719 DHFR, DHFRP1, DYR [KO:K00287] [EC:1.5.1.3] 2356 FPGS [KO:K01930] [EC:6.3.2.17] 8836 GGH, GH [KO:K01307] [EC:3.4.19.9] 5805 PTS, FLJ97081, PTPS [KO:K01737] [EC:4.2.3.12] 6697 SPR, SDR38C1 [KO:K00072] [EC:1.1.1.153] 5860 QDPR, DHPR, FLJ42391, PKU2, SDR33C1 [KO:K00357] [EC:1.5.1.34] COMPOUND C00044 GTP C00101 Tetrahydrofolate C00251 Chorismate C00266 Glycolaldehyde C00268 Dihydrobiopterin C00272 Tetrahydrobiopterin C00415 Dihydrofolate C00504 Folate C00568 4-Aminobenzoate C00921 Dihydropteroate C01217 5,6,7,8-Tetrahydromethanopterin C01300 2-Amino-4-hydroxy-6-hydroxymethyl-7,8-dihydropteridine C03541 Tetrahydrofolyl-[Glu](n) C03684 6-Pyruvoyltetrahydropterin C04244 6-Lactoyl-5,6,7,8-tetrahydropterin C04807 2-Amino-7,8-dihydro-4-hydroxy-6-(diphosphooxymethyl)pteridine C04874 2-Amino-4-hydroxy-6-(D-erythro-1,2,3-trihydroxypropyl)-7,8-dihydropteridine C04895 2-Amino-4-hydroxy-6-(erythro-1,2,3-trihydroxypropyl)dihydropteridine triphosphate C05922 Formamidopyrimidine nucleoside triphosphate C05923 2,5-Diaminopyrimidine nucleoside triphosphate C05924 Molybdopterin C05925 Dihydroneopterin phosphate C05926 Neopterin C05927 7,8-Dihydromethanopterin C06148 2,5-Diamino-6-(5'-triphosphoryl-3',4'-trihydroxy-2'-oxopentyl)-amino-4-oxopyrimidine C06149 6-(3'-Triphosphoryl-1'-methylglyceryl)-7-methyl-7,8-dihydrobiopterin C09332 Tetrahydrofolyl-[Glu](2) C11355 4-Amino-4-deoxychorismate REL_PATHWAY hsa00230 Purine metabolism hsa00400 Phenylalanine, tyrosine and tryptophan biosynthesis hsa00670 One carbon pool by folate hsa00680 Methane metabolism KO_PATHWAY ko00790 /// bio-1.4.3.0001/test/data/KEGG/ec00072.pathway0000644000004100000410000000154612200110570017655 0ustar www-datawww-dataENTRY ec00072 Pathway NAME Synthesis and degradation of ketone bodies CLASS Metabolism; Lipid Metabolism PATHWAY_MAP ec00072 Synthesis and degradation of ketone bodies MODULE M00177 Ketone body biosynthesis, acetyl-CoA => acetoacetate/3-hydroxybutyrate/acetone [PATH:ec00072] ENZYME 1.1.1.30 2.3.1.9 2.3.3.10 2.8.3.5 4.1.1.4 4.1.3.4 COMPOUND C00024 Acetyl-CoA C00164 Acetoacetate C00207 Acetone C00332 Acetoacetyl-CoA C00356 (S)-3-Hydroxy-3-methylglutaryl-CoA C01089 (R)-3-Hydroxybutanoate REL_PATHWAY ec00010 Glycolysis / Gluconeogenesis ec00071 Fatty acid metabolism ec00620 Pyruvate metabolism ec00650 Butanoate metabolism KO_PATHWAY ko00072 /// bio-1.4.3.0001/test/data/KEGG/G00024.glycan0000644000004100000410000000450712200110570017251 0ustar www-datawww-dataENTRY G00024 Glycan NAME T antigen COMPOSITION (Gal)1 (GalNAc)1 (Ser/Thr)1 MASS 365.3 (Ser/Thr) CLASS Glycoprotein; O-Glycan Neoglycoconjugate REMARK Same as: C04750 C04776 REFERENCE 1 [PMID:12950230] Backstrom M, Link T, Olson FJ, Karlsson H, Graham R, Picco G, Burchell J, Taylor-Papadimitriou J, Noll T, Hansson GC. Recombinant MUC1 mucin with a breast cancer-like O-glycosylation produced in large amounts in Chinese-hamster ovary cells. Biochem. J. 376 (2003) 677-86. 2 [PMID:14631106] Wu AM. Carbohydrate structural units in glycoproteins and polysaccharides as important ligands for Gal and GalNAc reactive lectins. J. Biomed. Sci. 10 (2003) 676-88. REACTION R05908 R05912 R05913 R06140 PATHWAY PATH: ko00512 O-Glycan biosynthesis PATH: ko01100 Metabolic pathways ENZYME 2.4.1.102 2.4.1.122 2.4.99.4 3.2.1.97 ORTHOLOGY KO: K00727 beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase [EC:2.4.1.102] KO: K00731 glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase [EC:2.4.1.122] KO: K00780 beta-galactoside alpha-2,3-sialyltransferase (sialyltransferase 4A) [EC:2.4.99.4] KO: K03368 beta-galactoside alpha-2,3-sialyltransferase (sialyltransferase 4B) [EC:2.4.99.4] DBLINKS CCSD: 98 99 100 2225 2236 2237 2238 2239 2240 2241 2242 2243 3406 5035 5038 5887 14321 18613 25363 27572 28182 29046 29092 29175 29393 29521 29554 30734 30735 30848 30849 30850 30917 32646 33022 33851 33878 33952 34823 34829 34986 34995 35029 35050 35107 35108 35805 35833 35991 36236 36826 36863 37982 38587 38640 38672 42797 43915 44029 44775 45346 46438 46466 47186 48015 48891 49283 49293 50466 50469 50477 GlycomeDB: 475 JCGGDB: JCGG-STR025711 NODE 3 1 Ser/Thr 8 0 2 GalNAc -1 0 3 Gal -9 0 EDGE 2 1 2:a1 1 2 3:b1 2:3 /// bio-1.4.3.0001/test/data/KEGG/map00030.pathway0000644000004100000410000000546312200110570020037 0ustar www-datawww-dataENTRY map00030 Pathway NAME Pentose phosphate pathway DESCRIPTION The pentose phosphate pathway is a process of glucose turnover that produces NADPH as reducing equivalents and pentoses as essential parts of nucleotides. There are two different phases in the pathway. One is irreversible oxidative phase in which glucose-6P is converted to ribulose-5P by oxidative decarboxylation, and NADPH is generated [MD:M00006]. The other is reversible non-oxidative phase in which phosphorylated sugars are interconverted to generate xylulose-5P, ribulose-5P, and ribose-5P [MD:M00007]. Phosphoribosyl pyrophosphate (PRPP) formed from ribose-5P [MD:M00005] is an activated compound used in the biosynthesis of histidine and purine/pyrimidine nucleotides. This pathway map also shows the Entner-Doudoroff pathway where 6-P-gluconate is dehydrated and then cleaved into pyruvate and glyceraldehyde-3P [MD:M00008]. CLASS Metabolism; Carbohydrate Metabolism PATHWAY_MAP map00030 Pentose phosphate pathway MODULE M00004 Pentose phosphate pathway (Pentose phosphate cycle) [PATH:map00030] M00005 PRPP biosynthesis, ribose 5P -> PRPP [PATH:map00030] M00006 Pentose phosphate pathway, oxidative phase, glucose 6P => ribulose 5P [PATH:map00030] M00007 Pentose phosphate pathway, non-oxidative phase, fructose 6P => ribose 5P [PATH:map00030] M00008 Entner-Doudoroff pathway, glucose-6P => glyceraldehyde-3P + pyruvate [PATH:map00030] M00680 Semi-phosphorylative Entner-Doudoroff pathway, gluconate => glyceraldehyde-3P + pyruvate [PATH:map00030] M00681 Non-phosphorylative Entner-Doudoroff pathway, gluconate => glyceraldehyde + pyruvate [PATH:map00030] DISEASE H00196 Phosphoribosylpyrophosphate synthetase I superactivity DBLINKS GO: 0006098 REFERENCE (map 3) AUTHORS Nishizuka Y (ed). TITLE [Metabolic Maps] (In Japanese) JOURNAL Tokyo Kagaku Dojin (1980) REFERENCE (map 4) AUTHORS Nishizuka Y, Seyama Y, Ikai A, Ishimura Y, Kawaguchi A (eds). TITLE [Cellular Functions and Metabolic Maps] (In Japanese) JOURNAL Tokyo Kagaku Dojin (1997) REFERENCE AUTHORS Michal G. TITLE Biochemical Pathways JOURNAL Wiley (1999) REFERENCE PMID:12700258 AUTHORS Hove-Jensen B, Rosenkrantz TJ, Haldimann A, Wanner BL. TITLE Escherichia coli phnN, encoding ribose 1,5-bisphosphokinase activity (phosphoribosyl diphosphate forming): dual role in phosphonate degradation and NAD biosynthesis pathways. JOURNAL J Bacteriol 185:2793-801 (2003) REL_PATHWAY map00010 Glycolysis / Gluconeogenesis map00040 Pentose and glucuronate interconversions map00230 Purine metabolism map00240 Pyrimidine metabolism map00340 Histidine metabolism KO_PATHWAY ko00030 /// bio-1.4.3.0001/test/data/KEGG/K02338.orthology0000644000004100000410000007726212200110570020050 0ustar www-datawww-dataENTRY K02338 KO NAME DPO3B, dnaN DEFINITION DNA polymerase III subunit beta [EC:2.7.7.7] PATHWAY ko00230 Purine metabolism ko00240 Pyrimidine metabolism ko03030 DNA replication ko03430 Mismatch repair ko03440 Homologous recombination MODULE M00597 DNA polymerase III complex CLASS Metabolism; Nucleotide Metabolism; Purine metabolism [PATH:ko00230] Metabolism; Nucleotide Metabolism; Pyrimidine metabolism [PATH:ko00240] Genetic Information Processing; Replication and Repair; DNA replication [PATH:ko03030] Genetic Information Processing; Replication and Repair; DNA replication proteins [BR:ko03032] Genetic Information Processing; Replication and Repair; Mismatch repair [PATH:ko03430] Genetic Information Processing; Replication and Repair; Homologous recombination [PATH:ko03440] Genetic Information Processing; Replication and Repair; DNA repair and recombination proteins [BR:ko03400] DBLINKS RN: R00375 R00376 R00377 R00378 COG: COG0592 GO: 0003887 GENES ECO: b3701(dnaN) ECJ: JW3678(dnaN) ECD: ECDH10B_3887(dnaN) EBW: BWG_3391(dnaN) ECE: Z5192(dnaN) ECS: ECs4636 ECF: ECH74115_5129(dnaN) ETW: ECSP_4747(dnaN) ECG: E2348C_4012(dnaN) ECC: c4623(dnaN) ECI: UTI89_C4251(dnaN) ECP: ECP_3902 ECV: APECO1_2756(dnaN) ECW: EcE24377A_4211(dnaN) ECX: EcHS_A3914(dnaN) ECM: EcSMS35_4066(dnaN) ECY: ECSE_3987 ECL: EcolC_0002 ECK: EC55989_4170(dnaN) ECQ: ECED1_4392(dnaN) ECR: ECIAI1_3879(dnaN) ECT: ECIAI39_4305(dnaN) ECZ: ECS88_4124(dnaN) EUM: ECUMN_4232(dnaN) ELF: LF82_0508(dnaN) EBL: B21_03528(dnaN) EBD: ECBD_0002 EBR: ECB_03584(dnaN) EOH: ECO103_4457(dnaN) EOI: ECO111_4528(dnaN) EOJ: ECO26_4881(dnaN) EOK: G2583_4490(dnaN) EFE: EFER_3996(dnaN) STY: STY3941(dnaN) STT: t3682(dnaN) STM: STM3837(dnaN) SPT: SPA3681(dnaN) SEK: SSPA3436 SPQ: SPAB_04776 SEI: SPC_3924(dnaN) SEC: SC3755(dnaN) SEH: SeHA_C4171(dnaN) SEE: SNSL254_A4122(dnaN) SEW: SeSA_A4049(dnaN) SEA: SeAg_B4065(dnaN) SED: SeD_A4229(dnaN) SEG: SG3594(dnaN) SET: SEN3654(dnaN) SES: SARI_03808 YPE: YPO4096(dnaN) YPK: y4112(dnaN) YPM: YP_4004(dnaN) YPA: YPA_4142 YPN: YPN_3954 YPG: YpAngola_A4176(dnaN) YPP: YPDSF_0002 YPZ: YPZ3_3515(dnaN) YPS: YPTB3942(dnaN) YPI: YpsIP31758_4152(dnaN) YPY: YPK_0002 YPB: YPTS_0002 YEN: YE4172(dnaN) SFL: SF3763(dnaN) SFX: S4008(dnaN) SFV: SFV_3811(dnaN) SSN: SSON_3651(dnaN) SBO: SBO_3676(dnaN) SBC: SbBS512_E4224(dnaN) SDY: SDY_4183(dnaN) ECA: ECA4440(dnaN) PCT: PC1_0002 PWA: Pecwa_0002 ETA: ETA_34520(dnaN) EPY: EpC_36690(dnaN) EAM: EAMY_3677(dnaN) EAY: EAM_3452(dnaN) PLU: plu0002(dnaN) PAY: PAU_00002(dnaN) BUC: BU011(dnaN) BAS: BUsg011(dnaN) BAB: bbp011(dnaN) BCC: BCc_003(dnaN) BAP: BUAP5A_011(dnaN) BAU: BUAPTUC7_011(dnaN) WBR: WGLp015(dnaN) SGL: SG0002 ENT: Ent638_0002 ESA: ESA_03975 CTU: Ctu_00390(dnaN) KPN: KPN_04104(dnaN) KPE: KPK_0002(dnaN) KPU: KP1_5482(dnaN) KVA: Kvar_0006 CKO: CKO_00047 CRO: ROD_40261(dnaN) SPE: Spro_0033 PMR: PMI3133(dnaN) EIC: NT01EI_0002 ETR: ETAE_0002(dnaN) BFL: Bfl016(dnaN) BPN: BPEN_015(dnaN) HDE: HDEF_1740(dnaN) DDA: Dd703_0002 DDC: Dd586_0002 DZE: Dd1591_0002 XBO: XBJ1_0002(dnaN) PAM: PANA_0089(dnaN) HIN: HI0992(dnaN) HIT: NTHI1166(dnaN) HIP: CGSHiEE_07005 HIQ: CGSHiGG_08555 HDU: HD0850(dnaN) HAP: HAPS_1502(dnaN) HSO: HS_0137(dnaN) HSM: HSM_0002 PMU: PM1160(dnaN) MSU: MS0486(dnaN) APL: APL_0002(dnaN) APJ: APJL_0002(dnaN) APA: APP7_0002 ASU: Asuc_0002 AAP: NT05HA_0052 AAT: D11S_1973 XFA: XF0002 XFT: PD0002(dnaN) XFM: Xfasm12_0002 XFN: XfasM23_0002 XCC: XCC0002(dnaN) XCB: XC_0002 XCA: xccb100_0002(dnaN) XCV: XCV0002(dnaN) XAC: XAC0002(dnaN) XOO: XOO0002(dnaN) XOM: XOO_0002 XOP: PXO_03482(dnaN) XAL: XALc_0002(dnaN) SML: Smlt0002(dnaN) SMT: Smal_0002 VCH: VC0013 VCO: VC0395_A2506(dnaN) VCM: VCM66_0013(dnaN) VCJ: VCD_001481 VVU: VV1_0998 VVY: VV0012 VPA: VP0012 VHA: VIBHAR_00443 VSP: VS_0011 VEX: VEA_002008 VFI: VF_0010(dnaN) VFM: VFMJ11_0010(dnaN) VSA: VSAL_I0010(dnaN) PPR: PBPRA0009 PAE: PA0002(dnaN) PAU: PA14_00020(dnaN) PAP: PSPA7_0002(dnaN) PAG: PLES_00011(dnaN) PPU: PP_0011(dnaN) PPF: Pput_0002 PPG: PputGB1_0004 PPW: PputW619_0002 PST: PSPTO_0002(dnaN) PSB: Psyr_0002 PSP: PSPPH_0002(dnaN) PFL: PFL_0002(dnaN) PFO: Pfl01_0002 PFS: PFLU0002(dnaN) PEN: PSEEN0002(dnaN) PMY: Pmen_0002 PSA: PST_0002(dnaN) CJA: CJA_0002(dnaN) AVN: Avin_00020 PAR: Psyc_0002(dnaN) PCR: Pcryo_0004 PRW: PsycPRwf_0003 ACI: ACIAD0002(dnaN) ACB: A1S_0002 ABM: ABSDF0002(dnaN) ABY: ABAYE0002(dnaN) ABC: ACICU_00004 ABN: AB57_0019(dnaN) ABB: ABBFA_000002(dnaN) SON: SO_0009(dnaN) SDN: Sden_0002 SFR: Sfri_0002 SAZ: Sama_0014 SBL: Sbal_0002 SBM: Shew185_0002 SBN: Sbal195_0002 SBP: Sbal223_0002 SLO: Shew_0002 SPC: Sputcn32_0002 SSE: Ssed_0007 SPL: Spea_0002 SHE: Shewmr4_0002 SHM: Shewmr7_0002 SHN: Shewana3_0010 SHW: Sputw3181_0002 SHL: Shal_0002 SWD: Swoo_0002 SWP: swp_0016 SVO: SVI_0032(dnaN) ILO: IL0002(dnaN) CPS: CPS_0002(dnaN) PHA: PSHAa0002(dnaN) PAT: Patl_0002 SDE: Sde_0002 MAQ: Maqu_0002 AMC: MADE_00003 PIN: Ping_3715 TTU: TERTU_0003(dnaN) CBU: CBU_0002(dnaN) CBS: COXBURSA331_A0002(dnaN) CBD: CBUD_0002(dnaN) CBG: CbuG_0003 CBC: CbuK_0002 LPN: lpg0002 LPF: lpl0002(dnaN) LPP: lpp0002(dnaN) LPC: LPC_0002(dnaN) LLO: LLO_0002(dnaN) MCA: MCA3032(dnaN) FTU: FTT_0002(dnaN) FTF: FTF0002(dnaN) FTW: FTW_0002(dnaN) FTL: FTL_0002 FTH: FTH_0002(dnaN) FTA: FTA_0003(dnaN) FTM: FTM_0002(dnaN) FTN: FTN_0002(dnaN) FPH: Fphi_0843 TCX: Tcr_0002 NOC: Noc_0002 NHL: Nhal_0002 ALV: Alvin_0002 AEH: Mlg_0002 HHA: Hhal_1226 TGR: Tgr7_0002 TKM: TK90_0002 HNA: Hneap_0002 HCH: HCH_00003(dnaN) CSA: Csal_0002 ABO: ABO_0002(dnaN) MMW: Mmwyl1_0002 AHA: AHA_0002(dnaN) ASA: ASA_0002(dnaN) TAU: Tola_0002 DNO: DNO_0002(dnaN) AFE: Lferr_0002 AFR: AFE_0001(dnaN) BCI: BCI_0132(dnaN) RMA: Rmag_0002 VOK: COSY_0002(dnaN) KKO: Kkor_0002 NMA: NMA0553(dnaN) NME: NMB1902(dnaN) NMC: NMC0321(dnaN) NMN: NMCC_0318(dnaN) NMI: NMO_0268(dnaN) NGO: NGO0002 NGK: NGK_0002 CVI: CV_0002(dnaN) LHK: LHK_03241(dnaN) RSO: RSc3441(dnaN) RPI: Rpic_0002 RPF: Rpic12D_0002 REU: Reut_A0002 REH: H16_A0002(dnaN) RME: Rmet_0002 CTI: RALTA_A0002(dnaN) BMA: BMA0002(dnaN) BMV: BMASAVP1_A2849(dnaN) BML: BMA10229_A2236(dnaN) BMN: BMA10247_0002(dnaN) BPS: BPSL0074(dnaN) BPM: BURPS1710b_0300(dnaN) BPL: BURPS1106A_0102(dnaN) BPD: BURPS668_0088(dnaN) BPR: GBP346_A4277(dnaN) BTE: BTH_I3240(dnaN) BVI: Bcep1808_0002 BUR: Bcep18194_A3183 BCN: Bcen_2554 BCH: Bcen2424_0002 BCM: Bcenmc03_0002 BCJ: BCAL0422(dnaN) BAM: Bamb_0002 BAC: BamMC406_0002 BMU: Bmul_0002 BMJ: BMULJ_00069(dnaN) BXE: Bxe_A4461 BPH: Bphy_0002 BPY: Bphyt_0002 BGL: bglu_1g00020 PNU: Pnuc_0002 PNE: Pnec_0002 BPE: BP0490(dnaN) BPA: BPP4400(dnaN) BBR: BB4988(dnaN) BPT: Bpet0002(dnaN) BAV: BAV3411(dnaN) RFR: Rfer_0002 Rfer_4311 POL: Bpro_0002 PNA: Pnap_4118 AAV: Aave_0002 AJS: Ajs_4145 DIA: Dtpsy_0002 VEI: Veis_0002 DAC: Daci_0002 VAP: Vapar_0002 CTT: CtCNB1_0002 MPT: Mpe_A0002 HAR: HEAR0003(dnaN) MMS: mma_0002(dnaN) LCH: Lcho_0002 NEU: NE0002(dnaN) NET: Neut_0002 NMU: Nmul_A0002 EBA: ebA2847(dnaN) AZO: azo0002(dnaN) DAR: Daro_0002 TMZ: Tmz1t_0002 TBD: Tbd_0002 MFA: Mfla_0002 MMB: Mmol_0002 MEI: Msip34_0002 APP: CAP2UW1_0002 SLT: Slit_0002 HPY: HP0500 HPJ: jhp0452(dnaN) HPA: HPAG1_0476 HPS: HPSH_04415 HPG: HPG27_460(dnaN) HPP: HPP12_0508(dnaN) HPB: HELPY_0852(dnaN) HHE: HH1126(dnaN) HAC: Hac_0821(dnaN) HMS: HMU00020(dnaN) WSU: WS0001(dnaN) TDN: Suden_0002 CJE: Cj0002(dnaN) CJR: CJE0002(dnaN) CJJ: CJJ81176_0028(dnaN) CJU: C8J_0002(dnaN) CJD: JJD26997_0002(dnaN) CFF: CFF8240_0002(dnaN) CCV: CCV52592_2055(dnaN) CHA: CHAB381_0003(dnaN) CCO: CCC13826_1851(dnaN) CLA: Cla_0002(dnaN) ABU: Abu_0002(dnaN) SDL: Sdel_0004 NIS: NIS_0002(dnaN) SUN: SUN_0002(dnaN) NAM: NAMH_0002(dnaN) GSU: GSU0001(dnaN) GME: Gmet_0002 GUR: Gura_0002 GLO: Glov_0002 GBM: Gbem_0002 GEO: Geob_0002 GEM: GM21_0002 PCA: Pcar_0002(dnaN) PPD: Ppro_0002 DVU: DVU0002(dnaN) DVL: Dvul_0006 DVM: DvMF_2082 DDE: Dde_0002 DDS: Ddes_0004 DMA: DMR_00020(dnaN) DSA: Desal_3784 LIP: LI0786(dnaN) DBA: Dbac_3096 DRT: Dret_2471 BBA: Bd0002(dnaN) DPS: DP0648 DOL: Dole_0094 DAL: Dalk_0275 Dalk_4110 DAT: HRM2_00010(dnaN) ADE: Adeh_0002 ACP: A2cp1_0002 AFW: Anae109_0002 ANK: AnaeK_0002 MXA: MXAN_0236(dnaN) SCL: sce8164(dnaN) HOH: Hoch_0002 SAT: SYN_02050 SFU: Sfum_2688 RPR: RP419 RTY: RT0405(dnaN) RCM: A1E_03260 RCO: RC0583(dnaN) RFE: RF_0649(dnaN) RAK: A1C_03150 RRI: A1G_03290 RRJ: RrIowa_0695 RMS: RMA_0600(dnaN) RPK: RPR_05615 RAF: RAF_ORF0544(dnaN) RBE: RBE_0656(dnaN) RBO: A1I_04260 OTS: OTBS_0002(dnaN) OTT: OTT_1572(dnaN) WOL: WD1067(dnaN) WBM: Wbm0429 WRI: WRi_011020(dnaN) WPI: WPa_0934(dnaN) AMA: AM1014(dnaN) AMF: AMF_773(dnaN) ACN: ACIS_00336(dnaN) APH: APH_1097(dnaN) ERU: Erum7880(dnaN) ERW: ERWE_CDS_08330(dnaN) ERG: ERGA_CDS_08230(dnaN) ECN: Ecaj_0815 ECH: ECH_1009(dnaN) ECH_1010 NSE: NSE_0170(dnaN) NRI: NRI_0166(dnaN) PUB: SAR11_0411(dnaN) MLO: mll5580 MES: Meso_0002 PLA: Plav_0004 SME: SMc00415(dnaN) SMD: Smed_3543 ATU: Atu0301(dnaN) ATC: AGR_C_520 ARA: Arad_0542(dnaN) AVI: Avi_0351(dnaN) RET: RHE_CH00318(dnaN) REC: RHECIAT_CH0000356(dnaN) RLE: RL0334(dnaN) RLT: Rleg2_4329 RLG: Rleg_4592 RHI: NGR_c36750(dnaN) LAS: CLIBASIA_01695(dnaA) BME: BMEI1942 BMF: BAB1_0002(dnaN) BMB: BruAb1_0002(dnaN) BMC: BAbS19_I00020 BMS: BR0002(dnaN) BMT: BSUIS_A0002(dnaN) BOV: BOV_0002(dnaN) BCS: BCAN_A0002(dnaN) BMR: BMI_I2(dnaN) OAN: Oant_0004 BJA: bll0829(dnaN) BRA: BRADO0002(dnaN) BBT: BBta_0002(dnaN) RPA: RPA0002(dnaN) RPB: RPB_0002 RPC: RPC_0002 RPD: RPD_0002 RPE: RPE_0002 RPT: Rpal_0002 NWI: Nwi_0002 NHA: Nham_0002 OCA: OCAR_4536(dnaN) BHE: BH01190(dnaN) BQU: BQ01120(dnaN) BBK: BARBAKC583_1278(dnaN) BTR: Btr_0130(dnaN) BGR: Bgr_01170(dnaN) XAU: Xaut_0002 AZC: AZC_1013 MEX: Mext_0002 MEA: Mex_1p0002(dnaN) MDI: METDI0002(dnaN) MRD: Mrad2831_2239 MET: M446_0002 MPO: Mpop_0003 MCH: Mchl_0003 MNO: Mnod_0002 BID: Bind_3047 MSL: Msil_3705 CCR: CC_0156 CCS: CCNA_00155 CAK: Caul_0162 PZU: PHZ_c3468(dnaN) SIL: SPO0150(dnaN) SIT: TM1040_0002 RSP: RSP_1343 RSH: Rsph17029_0012 RSQ: Rsph17025_0003 RSK: RSKD131_2743 RCP: RCAP_rcc00002(dnaN) JAN: Jann_0002 RDE: RD1_0209(dnaN) PDE: Pden_0342 Pden_0970 Pden_2834 DSH: Dshi_3374(dnaN) MMR: Mmar10_0002 HNE: HNE_0563(dnaN) HBA: Hbal_3037 ZMO: ZMO0980 ZMN: Za10_0322 NAR: Saro_0639 Saro_1429 SAL: Sala_0708 SWI: Swit_2795 SJP: SJA_C1-09100(dnaN) ELI: ELI_07330 GOX: GOX0002 GBE: GbCGDNIH1_0319 ACR: Acry_1437 GDI: GDI_1772(dnaN) GDJ: Gdia_0002 APT: APA01_12310 RRU: Rru_A0002 RCE: RC1_3063(dnaN) MAG: amb0637 AZL: AZL_027260(dnaN) APB: SAR116_1557 MGM: Mmc1_0002 BSU: BSU00020(dnaN) BHA: BH0002(dnaN) BAN: BA_0002(dnaN-1) BA_2684(dnaN-2) BAR: GBAA0002(dnaN-1) GBAA2684(dnaN-2) BAA: BA_0597 BA_3202 BAT: BAS0002 BAS2499 BAH: BAMEG_0002(dnaN1) BAMEG_1910(dnaN2) BAI: BAA_0002(dnaN1) BAA_2748(dnaN2) BCE: BC0002 BC2693 BCA: BCE_0002(dnaN) BCE_2720(dnaN) BCE_A0091(dnaN) BCZ: BCZK0002(dnaN) BCZK2430(dnaN) BCR: BCAH187_A0002(dnaN1) BCAH187_A2750(dnaN2) BCAH187_C0074(dnaN) BCB: BCB4264_A0002(dnaN1) BCB4264_A2706(dnaN2) BCU: BCAH820_0002(dnaN1) BCAH820_2699(dnaN2) BCAH820_B0051(dnaN) BCG: BCG9842_B2604(dnaN2) BCG9842_B5318(dnaN1) BCQ: BCQ_0002(dnaN) BCQ_2544(dnaN-2) BCQ_PI001 BCX: BCA_0002(dnaN1) BCA_2773(dnaN2) BCY: Bcer98_0002 BTK: BT9727_0002(dnaN) BT9727_2460(dnaN) BTL: BALH_0002(dnaN) BALH_2417 BWE: BcerKBAB4_0002 BcerKBAB4_2456 BLI: BL00077(dnaN) BLD: BLi00002(dnaN) BCL: ABC0002(dnaN) BAY: RBAM_000020(dnaN) BPU: BPUM_0002(dnaN) BPF: BpOF4_07960 BMQ: BMQ_0002(dnaN) OIH: OB0002(dnaN) GKA: GK0002(dnaN) GTN: GTNG_0002 GWC: GWCH70_0002 GYM: GYMC10_0002 GYC: GYMC61_0002 AFL: Aflv_0002(dnaN) SAU: SA0002(dnaN) SAV: SAV0002(dnaN) SAW: SAHV_0002(dnaN) SAH: SaurJH1_0002 SAJ: SaurJH9_0002 SAM: MW0002(dnaN) SAS: SAS0002 SAR: SAR0002(dnaN) SAC: SACOL0002(dnaN) SAX: USA300HOU_0002(dnaN) SAA: SAUSA300_0002(dnaN) SAO: SAOUHSC_00002 SAE: NWMN_0002(dnaN) SAD: SAAV_0002(dnaN) SAB: SAB0002(dnaN) SEP: SE0002 SER: SERP2552(dnaN) SHA: SH0002(dnaN) SSP: SSP0002 SCA: Sca_2473(dnaN) SLG: SLGD_02547 LMO: lmo0002(dnaN) LMF: LMOf2365_0002(dnaN) LMH: LMHCC_2662(dnaN) LMC: Lm4b_00002(dnaN) LMN: LM5578_3021(dnaN) LMY: LM5923_2970(dnaN) LIN: lin0002(dnaN) LWE: lwe0002(dnaN) LSG: lse_0002(dnaN) LSP: Bsph_0005(dnaN) Bsph_p018 ESI: Exig_0002 EAT: EAT1b_1729 MCL: MCCL_0002(dnaN) BBE: BBR47_00020(dnaN) PJD: Pjdr2_0002 AAC: Aaci_0002 Aaci_0484 LLA: L0275(dnaN) LLK: LLKF_0002(dnaN) LLC: LACR_0002 LLM: llmg_0002(dnaN) SPY: SPy_0003(dnaN) SPZ: M5005_Spy_0002(dnaN) SPM: spyM18_0002(dnaN) SPG: SpyM3_0002(dnaN) SPS: SPs0002 SPH: MGAS10270_Spy0002(dnaN) SPI: MGAS10750_Spy0002(dnaN) SPJ: MGAS2096_Spy0002(dnaN) SPK: MGAS9429_Spy0002(dnaN) SPF: SpyM50002(dnaN) SPA: M6_Spy0002 SPB: M28_Spy0002(dnaN) SOZ: Spy49_0002(dnaN) SPN: SP_0002 SPD: SPD_0002(dnaN) SPR: spr0002(dnaN) SPW: SPCG_0002(dnaN) SPX: SPG_0002(dnaN) SNE: SPN23F_00020(dnaN) SPV: SPH_0002(dnaN) SNM: SP70585_0002(dnaN) SJJ: SPJ_0002(dnaN) SPP: SPP_0002(dnaN) SNT: SPT_0002(dnaN) SAG: SAG0002(dnaN) SAN: gbs0002(dnaN) SAK: SAK_0002(dnaN) SMU: SMU.02(dnaN) SMC: SmuNN2025_0002(dnaN) STC: str0002(dnaN) STL: stu0002(dnaN) STE: STER_0002 SSA: SSA_0002(dnaN) SSU: SSU05_0002 SSV: SSU98_0002 SSU98_0003 SSB: SSUBM407_0002(dnaN) SSI: SSU0002(dnaN) SSS: SSUSC84_0002(dnaN) SGO: SGO_0002(dnaN) SEQ: SZO_00020 SEZ: Sez_0002(dnaN) SEU: SEQ_0002 SUB: SUB0002(dnaN) SDS: SDEG_0002(dnaN) SGA: GALLO_0002(dnaN) SMB: smi_0002(dnaN) LPL: lp_0002(dnaN) LPJ: JDM1_0002(dnaN) LJO: LJ0002 LJF: FI9785_64(dnaN) LAC: LBA0002(dnaN) LSA: LSA0002(dnaN) LSL: LSL_0002(dnaN) LDB: Ldb0002(dnaN) LBU: LBUL_0002 LBR: LVIS_0002 LCA: LSEI_0002 LCB: LCABL_00020(dnaN) LGA: LGAS_0002 LRE: Lreu_0002 LRF: LAR_0002 LHE: lhv_0002 LFE: LAF_0002 LRH: LGG_00002(dnaN) LRL: LC705_00002(dnaN) PPE: PEPE_0002 EFA: EF0002(dnaN) OOE: OEOE_0002 LME: LEUM_0002 LCI: LCK_00002(dnaN) CAC: CAC0002(dnaN) CPE: CPE0002(dnaN) CPF: CPF_0002(dnaN) CPR: CPR_0002(dnaN) CTC: CTC00093 CNO: NT01CX_0866(dnaN) CTH: Cthe_2372 CDF: CD0002(dnaN) CDC: CD196_0002(dnaN) CBO: CBO0002(dnaG) CBA: CLB_0002(dnaN) CBH: CLC_0002(dnaN) CBY: CLM_0002(dnaN) CBL: CLK_3134(dnaN) CLK_A0115(dnaN) CBK: CLL_A0002(dnaN) CBB: CLD_0824(dnaN) CLD_A0160(dnaN) CBI: CLJ_0039(dnaN_2) CLJ_B0002(dnaN_1) CBT: CLH_0002(dnaN) CBF: CLI_0002(dnaN) CBE: Cbei_0002 CKL: CKL_0002(dnaN) CKR: CKR_0002 CPY: Cphy_0002 CCE: Ccel_0002 AMT: Amet_0002 AOE: Clos_0002 STH: STH2 SWO: Swol_0002 VPR: Vpar_0002 AFN: Acfer_0004 ATE: Athe_0003 DSY: DSY0002(dnaN) DHD: Dhaf_0002 DRM: Dred_0002 DAE: Dtox_0002 PTH: PTH_0002(dnaN) DAU: Daud_0002 HMO: HM1_0904(dnaN) FMA: FMG_0002 APR: Apre_0002 EEL: EUBELI_00002 ERE: EUBREC_0002 CLO: HMPREF0868_0092(dnaN) TTE: TTE0002(dnaN) TEX: Teth514_0002 TPD: Teth39_0002 TIT: Thit_0002 CHY: CHY_2709(dnaN) MTA: Moth_0003 ADG: Adeg_0002 Adeg_0689 CSC: Csac_0002 CPO: COPRO5265_0066(dnaN) NTH: Nther_0002 HOR: Hore_00020 MGE: MG_001(dnaN) MPN: MPN001(dnaN) MPU: MYPU_0020(dnaN) MPE: MYPE20(dnaN) MGA: MGA_0618(dnaN) MMY: MSC_0002(dnaN) MMO: MMOB0020(dnaN) MHY: mhp002(dnaN) MHJ: MHJ_0002(dnaN) MHP: MHP7448_0002(dnaN) MSY: MS53_0002(dnaN) MCP: MCAP_0002(dnaN) MAA: MAG_0020(dnaN) MAL: MAGa0020(dnaN) MAT: MARTH_orf003(dnaN) MCO: MCJ_000020(dnaN) MHO: MHO_0050(dnaN) MCD: MCRO_0002(dnaN) UUR: UU079(dnaN) UPA: UPA3_0078(dnaN) UUE: UUR10_0085(dnaN) POY: PAM_002(dnaN) AYW: AYWB_002(dnaN) PML: ATP_00194(dnaN) PAL: PAa_0002(dnaN) MFL: Mfl002 MTU: Rv0002(dnaN) MTC: MT0002(dnaN) MRA: MRA_0002(dnaN) MTF: TBFG_10002 MTB: TBMG_00002(TBMG_00002.1) MBO: Mb0002(dnaN) MBB: BCG_0002(dnaN_1) BCG_0032(dnaN_2) MBT: JTY_0002(dnaN) MLE: ML0002(dnaN) MLB: MLBr_00002(dnaN) MPA: MAP0002(dnaN) MAV: MAV_0002(dnaN) MSM: MSMEG_0001(dnaN) MUL: MUL_0002(dnaN) MVA: Mvan_0002 MGI: Mflv_0826 MAB: MAB_0002(dnaN) MMC: Mmcs_0002 MKM: Mkms_0010 MJL: Mjls_0002 MMI: MMAR_0002(dnaN) CGL: NCgl0002(cgl0003) CGB: cg0004(dnaN) CGT: cgR_0003 CEF: CE0003 CDI: DIP0002(dnaN) CJK: jk0002(dnaN) CUR: cur_0002 CAR: cauri_0002(dnaN) CKP: ckrop_0001 NFA: nfa20(dnaN) nfa39660(dnaN2) RHA: RHA1_ro03667(dnaN) RER: RER_00020(dnaN) ROP: ROP_34840(dnaN) GBR: Gbro_0002 SCO: SCO3878(dnaN) SCP1.119(dnaN) SMA: SAV_3362(dnaN2) SAV_4317(dnaN1) SGR: SGR_3397(dnaN2) SGR_3701(dnaN) SCB: SCAB_45811(dnaN) TWH: TWT002(dnaN) TWS: TW002(dnaN) LXX: Lxx00020(dnaN) CMI: CMM_0002(dnaN) CMS: CMS_0002(dnaN) ART: Arth_0002 AAU: AAur_0002(dnaN) ACH: Achl_0002 RSA: RSal33209_0002 KRH: KRH_00020(dnaN) MLU: Mlut_00020 RMU: RMDY18_00020 BCV: Bcav_0002 BFA: Bfae_00020 JDE: Jden_0002 KSE: Ksed_00020 XCE: Xcel_0002 PAC: PPA0002 NCA: Noca_0002 KFL: Kfla_0002 TFU: Tfu_0002 Tfu_0086 TCU: Tcur_0002 SRO: Sros_0002 FRA: Francci3_0003 Francci3_0738 Francci3_1710 FRE: Franean1_0003 Franean1_2567 Franean1_5871 FAL: FRAAL0004(dnaN) FRAAL1257 ACE: Acel_0002 NML: Namu_0002 GOB: Gobs_0002 KRA: Krad_0002 Krad_1769 SEN: SACE_0003(dnaN) SACE_0327(dnaN) SVI: Svir_00020 AMI: Amir_0002 Amir_0251 STP: Strop_0003 SAQ: Sare_0002 CAI: Caci_0002 SNA: Snas_0002 BLO: BL0638(dnaN) BLJ: BLD_1435(dnaN) BLN: Blon_0003 BAD: BAD_0002(dnaN) BLA: BLA_0002(dnaN) BLC: Balac_0002 BLT: Balat_0002 BDE: BDP_0002 GVA: HMPREF0424_0044(dnaN) RXY: Rxyl_0002 CWO: Cwoe_0002 AFO: Afer_0002 CCU: Ccur_00020 SHI: Shel_00020 APV: Apar_0003 ELE: Elen_0003 CTR: CT075(dnaN) CTA: CTA_0080(dnaN) CTB: CTL0331(dnaN) CTL: CTLon_0326(dnaN) CTJ: JALI_0741(dnaN) CMU: TC0347(dnaN) CPN: CPn0338(dnaN) CPA: CP0419 CPJ: CPj0338(dnaN) CPT: CpB0347 CCA: CCA00445(dnaN) CAB: CAB431(dnaN) CFE: CF0562(dnaN) PCU: pc1705(dnaN) BBU: BB0438 BBZ: BbuZS7_0444(dnaN) BGA: BG0445(dnaN) BAF: BAPKO_0460(dnaN) BTU: BT0438 BHR: BH0438 BDU: BDU_435(dnaN) BRE: BRE_438(dnaN) TPA: TP0002 TPP: TPASS_0002(dnaN) TDE: TDE0231(dnaN) LIL: LA0002(dnaN) LIC: LIC10002(dnaN) LBJ: LBJ_0005(dnaN) LBL: LBL_0005(dnaN) LBI: LEPBI_I0002(dnaN) LBF: LBF_0005(dnaN) BHY: BHWA1_01258(dnaN) ABA: Acid345_0002 ACA: ACP_2196(dnaN) SUS: Acid_0002 BTH: BT_1364 BFR: BF2981 BFS: BF2857 BVU: BVU_2894 PGI: PG1853(dnaN) PGN: PGN_1786 PDI: BDI_0694 APS: CFPG_644 PRU: PRU_0839(dnaN) SRU: SRU_0002(dnaN) SRM: SRM_00003(dnaN) RMR: Rmar_0002 CHU: CHU_1549(dnaN) DFE: Dfer_3720 SLI: Slin_4880 CPI: Cpin_0616 PHE: Phep_3235 GFO: GFO_1914(dnaN) FJO: Fjoh_2720 FPS: FP1091(dnaN) FBA: FIC_00920 COC: Coch_1752 RBI: RB2501_13564 SMG: SMGWSS_005(dnaN) SMS: SMDSEM_005(dnaN) SMH: DMIN_00040 BBL: BLBBGE_048(dnaN) BPI: BPLAN_585(dnaN) AAS: Aasi_0152 FSU: Fisuc_1334 FNU: FN0536 FN0617 LBA: Lebu_1267 STR: Sterm_2002 SMF: Smon_0578 OTE: Oter_0953 CAA: Caka_2082 MIN: Minf_0002(dnaN) AMU: Amuc_0816 GAU: GAU_0002(dnaN) RBA: RB10108(dnaN) PSL: Psta_1042 EMI: Emin_0003 RSD: TGRD_004 TAI: Taci_0002 ACO: Amico_0002 SYN: slr0965(dnaN) SYW: SYNW0001(dnaN) SYC: syc1496_c(dnaN) SYF: Synpcc7942_0001 SYD: Syncc9605_0001 SYE: Syncc9902_0001 SYG: sync_0001(dnaN) SYR: SynRCC307_0001(dnaN) SYX: SynWH7803_0001(dnaN) SYP: SYNPCC7002_A1263(dnaN) CYA: CYA_1647(dnaN) CYB: CYB_1539(dnaN) TEL: tll2349(dnaN) MAR: MAE_38670(dnaN) CYT: cce_1864(dnaN1) cce_4866(dnaN2) CYP: PCC8801_1016 CYC: PCC7424_0340 CYN: Cyan7425_0199 CYH: Cyan8802_1045 CYU: UCYN_01290 GVI: gvip446(dnaN) ANA: alr2010(dnaN) alr7569 NPU: Npun_R0034 AVA: Ava_0002 Ava_C0075 PMA: Pro0001(dnaN) PMM: PMM0001(dnaN) PMT: PMT0001(dnaN) PMN: PMN2A_1328 PMI: PMT9312_0001 PMB: A9601_00001(dnaN) PMC: P9515_00001(dnaN) PMF: P9303_00001(dnaN) PMG: P9301_00001(dnaN) PMH: P9215_00001(dnaN) PMJ: P9211_00001 PME: NATL1_00001(dnaN) TER: Tery_0008 AMR: AM1_3096(dnaN) CTE: CT0001(dnaN) CPC: Cpar_0002 CCH: Cag_0002 CPH: Cpha266_0002 CPB: Cphamn1_0002 CLI: Clim_0002 PVI: Cvib_0002 PLT: Plut_0001 PPH: Ppha_0002 PAA: Paes_0002 CTS: Ctha_0842 DET: DET1227(dnaN) DEH: cbdb_A1145(dnaN) DEB: DehaBAV1_1036 DEV: DhcVS_1009(dnaN) DEG: DehalGT_0967 RRS: RoseRS_4406 RCA: Rcas_0749 CAU: Caur_2639 CAG: Cagg_0019 CHL: Chy400_2852 HAU: Haur_1513 TRO: trd_0714(dnaN) STI: Sthe_1312 DRA: DR_0001 DGE: Dgeo_0003 Dgeo_3063 DDR: Deide_00020(dnaN) TTH: TTC1609 TTJ: TTHA0001 MRB: Mrub_0002 AAE: aq_1882(dnaN) HYA: HY04AAS1_1088 HTH: HTH_0569(dnaN) TAL: Thal_1223 SUL: SYO3AOP1_0963 SAF: SULAZ_0725(dnaN) PMX: PERMA_1539(dnaN) TMA: TM0262 TPT: Tpet_0662 TLE: Tlet_0486 TRQ: TRQ2_0686 TNA: CTN_0424 TNP: Tnap_0892 TME: Tmel_0535 TAF: THA_1368(dnaN) FNO: Fnod_1203 PMO: Pmob_1171 KOL: Kole_1868 DTH: DICTH_1442(dnaN) DTU: Dtur_1551 TYE: THEYE_A0016(dnaN) TTR: Tter_0993 DDF: DEFDS_0001(dnaN) DAP: Dacet_2869 REFERENCE PMID:8087839 AUTHORS Stillman B. TITLE Smart machines at the DNA replication fork. JOURNAL Cell 78:725-8 (1994) /// bio-1.4.3.0001/test/data/KEGG/M00118.module0000644000004100000410000000425012200110570017266 0ustar www-datawww-dataENTRY M00118 Pathway Module NAME Pentose interconversion, arabinose/ribulose/xylulose/xylose DEFINITION K00011 K01804 K00853 (K01786,K03080) K03331 K05351 K00854 K00011 K01805 K01783 (K00853,K00875) K00039 CLASS Metabolism; Central metabolism; Other carbohydrate metabolism PATHWAY ko00040(K00011+K01804+K00853+K01786+K03080+K03331+K05351+K00854+K00011+K01805+K01783+K00853+K00875+K00039) Pentose and glucuronate interconversions ORTHOLOGY K00011 aldehyde reductase [EC:1.1.1.21] [RN:R01758 R01759] K01804 L-arabinose isomerase [EC:5.3.1.4] [RN:R01761] K00853 L-ribulokinase [EC:2.7.1.16] [RN:R02439] K01786,K03080 L-ribulose-5-phosphate 4-epimerase [EC:5.1.3.4] [RN:R05850] K03331 L-xylulose reductase [EC:1.1.1.10] [RN:R01904] K05351 D-xylulose reductase [EC:1.1.1.9] [RN:R01896] K00854 xylulokinase [EC:2.7.1.17] [RN:R01639] K00011 aldehyde reductase [EC:1.1.1.21] [RN:R01431] K01805 xylose isomerase [EC:5.3.1.5] [RN:R01432] K01783 ribulose-phosphate 3-epimerase [EC:5.1.3.1] [RN:R01529] K00853,K00875 ribulokinase [EC:2.7.1.16 2.7.1.47] [RN:R01526] K00039 ribitol 2-dehydrogenase [EC:1.1.1.56] [RN:R01895] REACTION R01903 C00312 -> C00532 R01758,R01759 C00532 -> C00259 R01761 C00259 -> C00508 R02439 C00508 -> C01101 R05850 C01101 -> C00231 R01904 C00312 -> C00379 R01896 C00379 -> C00310 R01639 C00310 -> C00231 R01431 C00379 -> C00181 R01432 C00181 -> C00310 R01529 C00199 -> C00231 R01526 C00231 -> C00309 R01895 C00309 -> C00474 COMPOUND C00312 L-Xylulose C00532 L-Arabitol C00259 L-Arabinose C00508 L-Ribulose C01101 L-Ribulose 5-phosphate C00231 D-Xylulose 5-phosphate C00379 Xylitol C00310 D-Xylulose C00181 D-Xylose C00199 D-Ribulose 5-phosphate C00309 D-Ribulose C00474 Ribitol bio-1.4.3.0001/test/data/KEGG/b0529.gene0000644000004100000410000000502212200110570016670 0ustar www-datawww-dataENTRY b0529 CDS E.coli NAME folD, ads, ECK0522, JW0518 DEFINITION bifunctional 5,10-methylene-tetrahydrofolate dehydrogenase/5,10-methylene-tetrahydrofolate cyclohydrolase (EC:1.5.1.5 3.5.4.9) ORTHOLOGY K01491 methylenetetrahydrofolate dehydrogenase (NADP+) / methenyltetrahydrofolate cyclohydrolase [EC:1.5.1.5 3.5.4.9] PATHWAY eco00630 Glyoxylate and dicarboxylate metabolism eco00670 One carbon pool by folate eco01100 Metabolic pathways CLASS Metabolism; Carbohydrate Metabolism; Glyoxylate and dicarboxylate metabolism [PATH:eco00630] Metabolism; Metabolism of Cofactors and Vitamins; One carbon pool by folate [PATH:eco00670] POSITION complement(556098..556964) MOTIF Pfam: THF_DHG_CYH_C THF_DHG_CYH Amidohydro_1 PROSITE: THF_DHG_CYH_1 THF_DHG_CYH_2 DBLINKS NCBI-GI: 16128513 NCBI-GeneID: 945221 RegulonDB: B0529 EcoGene: EG10328 ECOCYC: EG10328 UniProt: P24186 STRUCTURE PDB: 1B0A AASEQ 288 MAAKIIDGKTIAQQVRSEVAQKVQARIAAGLRAPGLAVVLVGSNPASQIYVASKRKACEE VGFVSRSYDLPETTSEAELLELIDTLNADNTIDGILVQLPLPAGIDNVKVLERIHPDKDV DGFHPYNVGRLCQRAPRLRPCTPRGIVTLLERYNIDTFGLNAVVIGASNIVGRPMSMELL LAGCTTTVTHRFTKNLRHHVENADLLIVAVGKPGFIPGDWIKEGAIVIDVGINRLENGKV VGDVVFEDAAKRASYITPVPGGVGPMTVATLIENTLQACVEYHDPQDE NTSEQ 867 atggcagcaaagattattgacggtaaaacgattgcgcagcaggtgcgctctgaagttgct caaaaagttcaggcgcgtattgcagccggactgcgggcaccaggactggccgttgtgctg gtgggtagtaaccctgcatcgcaaatttatgtcgcaagcaaacgcaaggcttgtgaagaa gtcgggttcgtctcccgctcttatgacctcccggaaaccaccagcgaagcggagctgctg gagcttatcgatacgctgaatgccgacaacaccatcgatggcattctggttcaactgccg ttaccggcgggtattgataacgtcaaagtgctggaacgtattcatccggacaaagacgtg gacggtttccatccttacaacgtcggtcgtctgtgccagcgcgcgccgcgtctgcgtccc tgcaccccgcgcggtatcgtcacgctgcttgagcgttacaacattgataccttcggcctc aacgccgtggtgattggcgcatcgaatatcgttggccgcccgatgagcatggaactgctg ctggcaggttgcaccactacagtgactcaccgcttcactaaaaatctgcgtcatcacgta gaaaatgccgatctattgatcgttgccgttggcaagccaggctttattcccggtgactgg atcaaagaaggcgcaattgtgattgatgtcggcatcaaccgtctggaaaatggcaaagtt gtgggcgacgtcgtgtttgaagacgcggctaaacgcgcctcatacattacgcctgttccc ggcggcgttggcccgatgacggttgccacgctgattgaaaacacgctacaggcgtgcgtt gaatatcatgatccacaggatgagtaa /// bio-1.4.3.0001/test/data/KEGG/test.kgml0000644000004100000410000000331512200110570017125 0ustar www-datawww-data bio-1.4.3.0001/test/data/KEGG/rn00250.pathway0000644000004100000410000001417012200110570017700 0ustar www-datawww-dataENTRY rn00250 Pathway NAME Alanine, aspartate and glutamate metabolism CLASS Metabolism; Amino Acid Metabolism PATHWAY_MAP rn00250 Alanine, aspartate and glutamate metabolism MODULE M00017 Glutamate biosynthesis, oxoglutarate => glutamate (glutamate dehydrogenase) [PATH:rn00250] M00018 Glutamine biosynthesis, glutamate => glutamine [PATH:rn00250] M00019 Glutamate biosynthesis, oxoglutarete => glutamate (glutamate synthase) [PATH:rn00250] M00021 Aspartate biosynthesis, oxaloacetate => aspartate [PATH:rn00250] M00022 Asparagine biosynthesis, aspartate => asparagine [PATH:rn00250] M00026 Alanine biosynthesis, pyruvate => alanine [PATH:rn00250] M00038 Glutamine degradation, glutamine => glutamate + NH3 [PATH:rn00250] M00040 GABA (gamma-Aminobutyrate) shunt [PATH:rn00250] M00044 Aspartate degradation, aspartate => fumarate [PATH:rn00250] M00045 Aspartate degradation, aspartate => oxaloacetate [PATH:rn00250] M00046 Asparagine degradation, asparagine => aspartate +NH3 [PATH:rn00250] DISEASE H00074 Canavan disease (CD) H00185 Citrullinemia (CTLN) H00197 Adenylosuccinate lyase deficiency DBLINKS GO: 0006522 0006531 0006536 REACTION R00093 L-glutamate:NAD+ oxidoreductase (transaminating) R00114 L-Glutamate:NADP+ oxidoreductase (transaminating) R00149 Carbon-dioxide:ammonia ligase (ADP-forming,carbamate-phosphorylating) R00243 L-Glutamate:NAD+ oxidoreductase (deaminating) R00248 L-Glutamate:NADP+ oxidoreductase (deaminating) R00253 L-Glutamate:ammonia ligase (ADP-forming) R00256 L-Glutamine amidohydrolase R00258 L-Alanine:2-oxoglutarate aminotransferase R00261 L-glutamate 1-carboxy-lyase (4-aminobutanoate-forming) R00269 2-Oxoglutaramate amidohydrolase R00348 2-Oxosuccinamate amidohydrolase R00355 L-Aspartate:2-oxoglutarate aminotransferase R00357 L-Aspartic acid:oxygen oxidoreductase (deaminating) R00359 D-Aspartate:oxygen oxidoreductase (deaminating) R00369 L-Alanine:glyoxylate aminotransferase R00396 L-Alanine:NAD+ oxidoreductase (deaminating) R00397 L-aspartate 4-carboxy-lyase (L-alanine-forming) R00400 L-alanine:oxaloacetate aminotransferase R00483 L-aspartate:ammonia ligase (AMP-forming) R00484 N-Carbamoyl-L-aspartate amidohydrolase R00485 L-Asparagine amidohydrolase R00487 Acetyl-CoA:L-aspartate N-acetyltransferase R00488 N-Acetyl-L-aspartate amidohydrolase R00490 L-Aspartate ammonia-lyase R00491 aspartate racemase R00575 hydrogen-carbonate:L-glutamine amido-ligase (ADP-forming, carbamate-phosphorylating) R00576 L-Glutamine:pyruvate aminotransferase R00578 L-aspartate:L-glutamine amido-ligase (AMP-forming) R00707 (S)-1-pyrroline-5-carboxylate:NAD+ oxidoreductase R00708 (S)-1-pyrroline-5-carboxylate:NADP+ oxidoreductase R00713 Succinate-semialdehyde:NAD+ oxidoreductase R00714 Succinate-semialdehyde:NADP+ oxidoreductase R00768 L-glutamine:D-fructose-6-phosphate isomerase (deaminating) R01072 5-phosphoribosylamine:diphosphate phospho-alpha-D-ribosyltransferase (glutamate-amidating) R01083 N6-(1,2-dicarboxyethyl)AMP AMP-lyase (fumarate-forming) R01086 2-(Nomega-L-arginino)succinate arginine-lyase (fumarate-forming) R01135 IMP:L-aspartate ligase (GDP-forming) R01346 L-Asparagine:2-oxo-acid aminotransferase R01397 carbamoyl-phosphate:L-aspartate carbamoyltransferase R01648 4-Aminobutanoate:2-oxoglutarate aminotransferase R01954 L-Citrulline:L-aspartate ligase (AMP-forming) COMPOUND C00014 NH3 C00022 Pyruvate C00025 L-Glutamate C00026 2-Oxoglutarate C00036 Oxaloacetate C00041 L-Alanine C00042 Succinate C00049 L-Aspartate C00064 L-Glutamine C00122 Fumarate C00152 L-Asparagine C00169 Carbamoyl phosphate C00232 Succinate semialdehyde C00334 4-Aminobutanoate C00352 D-Glucosamine 6-phosphate C00402 D-Aspartate C00438 N-Carbamoyl-L-aspartate C00940 2-Oxoglutaramate C01042 N-Acetyl-L-aspartate C02362 2-Oxosuccinamate C03090 5-Phosphoribosylamine C03406 N-(L-Arginino)succinate C03794 N6-(1,2-Dicarboxyethyl)-AMP C03912 (S)-1-Pyrroline-5-carboxylate REFERENCE AUTHORS Nishizuka Y, Seyama Y, Ikai A, Ishimura Y, Kawaguchi A (eds). TITLE [Cellular Functions and Metabolic Maps] (In Japanese) JOURNAL Tokyo Kagaku Dojin (1997) REFERENCE PMID:9687539 AUTHORS Wu G TITLE Intestinal mucosal amino acid catabolism. JOURNAL J Nutr 128:1249-52 (1998) REL_PATHWAY rn00010 Glycolysis / Gluconeogenesis rn00020 Citrate cycle (TCA cycle) rn00230 Purine metabolism rn00240 Pyrimidine metabolism rn00253 Tetracycline biosynthesis rn00260 Glycine, serine and threonine metabolism rn00300 Lysine biosynthesis rn00330 Arginine and proline metabolism rn00340 Histidine metabolism rn00410 beta-Alanine metabolism rn00460 Cyanoamino acid metabolism rn00471 D-Glutamine and D-glutamate metabolism rn00473 D-Alanine metabolism rn00480 Glutathione metabolism rn00650 Butanoate metabolism rn00660 C5-Branched dibasic acid metabolism rn00760 Nicotinate and nicotinamide metabolism rn00770 Pantothenate and CoA biosynthesis rn00860 Porphyrin and chlorophyll metabolism rn00910 Nitrogen metabolism KO_PATHWAY ko00250 /// bio-1.4.3.0001/test/data/KEGG/ko00312.pathway0000644000004100000410000000130612200110570017666 0ustar www-datawww-dataENTRY ko00312 Pathway NAME beta-Lactam resistance CLASS Metabolism; Biosynthesis of Other Secondary Metabolites PATHWAY_MAP ko00312 beta-Lactam resistance DRUG D00660 Tazobactam (JAN/USAN/INN) ORTHOLOGY K02172 bla regulator protein blaR1 K02171 penicillinase repressor K01467 beta-lactamase [EC:3.5.2.6] K02352 drp35 K02547 methicillin resistance protein K02546 methicillin resistance regulatory protein K02545 penicillin-binding protein 2 prime COMPOUND C00039 DNA C03438 beta-Lactam antibiotics REL_PATHWAY ko00311 Penicillin and cephalosporin biosynthesis /// bio-1.4.3.0001/test/data/KEGG/C00025.compound0000644000004100000410000001232112200110570017606 0ustar www-datawww-dataENTRY C00025 Compound NAME L-Glutamate; L-Glutamic acid; L-Glutaminic acid; Glutamate FORMULA C5H9NO4 MASS 147.0532 REMARK Same as: D00007 COMMENT The name "glutamate" also means DL-Glutamate (see [CPD:C00302]) REACTION R00021 R00093 R00114 R00239 R00241 R00243 R00245 R00248 R00250 R00251 R00253 R00254 R00256 R00257 R00258 R00259 R00260 R00261 R00262 R00263 R00355 R00372 R00411 R00457 R00494 R00525 R00573 R00575 R00578 R00609 R00667 R00668 R00684 R00694 R00707 R00708 R00734 R00768 R00894 R00895 R00908 R00942 R00986 R01072 R01090 R01155 R01161 R01214 R01231 R01339 R01585 R01586 R01648 R01654 R01684 R01716 R01939 R01956 R02040 R02077 R02199 R02237 R02274 R02282 R02283 R02285 R02287 R02313 R02315 R02433 R02619 R02700 R02772 R02773 R02929 R02930 R03053 R03120 R03189 R03207 R03243 R03248 R03266 R03651 R03905 R03916 R03952 R03970 R03971 R04028 R04029 R04051 R04171 R04173 R04188 R04212 R04217 R04234 R04241 R04269 R04338 R04438 R04463 R04467 R04475 R04529 R04558 R04776 R05052 R05085 R05197 R05207 R05224 R05225 R05507 R05578 R05815 R06423 R06426 R06844 R06977 R07275 R07276 R07277 R07396 R07414 R07419 R07456 R07613 R07643 R07659 R08244 PATHWAY PATH: ko00250 Alanine, aspartate and glutamate metabolism PATH: ko00330 Arginine and proline metabolism PATH: ko00340 Histidine metabolism PATH: ko00471 D-Glutamine and D-glutamate metabolism PATH: ko00480 Glutathione metabolism PATH: ko00650 Butanoate metabolism PATH: ko00660 C5-Branched dibasic acid metabolism PATH: ko00860 Porphyrin and chlorophyll metabolism PATH: ko00910 Nitrogen metabolism PATH: ko00970 Aminoacyl-tRNA biosynthesis PATH: map01060 Biosynthesis of plant secondary metabolites PATH: ko01064 Biosynthesis of alkaloids derived from ornithine, lysine and nicotinic acid PATH: ko01100 Metabolic pathways PATH: ko02010 ABC transporters PATH: ko04080 Neuroactive ligand-receptor interaction PATH: ko04540 Gap junction PATH: ko04720 Long-term potentiation PATH: ko04730 Long-term depression PATH: ko04742 Taste transduction PATH: ko05014 Amyotrophic lateral sclerosis (ALS) PATH: ko05016 Huntington's disease ENZYME 1.4.1.2 1.4.1.3 1.4.1.4 1.4.1.13 1.4.1.14 1.4.3.11 1.4.7.1 1.5.1.9 1.5.1.10 1.5.1.12 1.5.99.5 2.1.1.21 2.1.2.5 2.3.1.1 2.3.1.14 2.3.1.35 2.3.2.2 2.3.2.- 2.4.2.14 2.4.2.- 2.6.1.1 2.6.1.2 2.6.1.3 2.6.1.4 2.6.1.5 2.6.1.6 2.6.1.7 2.6.1.8 2.6.1.9 2.6.1.11 2.6.1.13 2.6.1.16 2.6.1.17 2.6.1.19 2.6.1.22 2.6.1.23 2.6.1.24 2.6.1.26 2.6.1.27 2.6.1.29 2.6.1.33 2.6.1.34 2.6.1.36 2.6.1.38 2.6.1.39 2.6.1.40 2.6.1.42 2.6.1.48 2.6.1.49 2.6.1.52 2.6.1.55 2.6.1.57 2.6.1.59 2.6.1.65 2.6.1.67 2.6.1.68 2.6.1.72 2.6.1.75 2.6.1.76 2.6.1.79 2.6.1.80 2.6.1.81 2.6.1.82 2.6.1.83 2.6.1.85 2.6.1.- 2.7.2.11 2.7.2.13 3.5.1.2 3.5.1.38 3.5.1.55 3.5.1.65 3.5.1.68 3.5.1.87 3.5.1.94 3.5.1.96 3.5.2.9 3.5.3.8 4.1.1.15 4.1.3.27 4.1.3.- 5.1.1.3 5.4.99.1 6.1.1.17 6.1.1.24 6.3.1.2 6.3.1.6 6.3.1.11 6.3.1.- 6.3.2.2 6.3.2.12 6.3.2.17 6.3.2.18 6.3.4.2 6.3.4.12 6.3.5.1 6.3.5.2 6.3.5.3 6.3.5.4 6.3.5.5 6.3.5.6 6.3.5.7 6.3.5.9 6.3.5.10 DBLINKS CAS: 56-86-0 PubChem: 3327 ChEBI: 16015 KNApSAcK: C00001358 PDB-CCD: GLU 3DMET: B00007 NIKKAJI: J9.171E ATOM 10 1 C1c C 23.8372 -17.4608 2 C1b C 25.0252 -16.7233 3 C6a C 22.6023 -16.7994 4 N1a N 23.8781 -18.8595 5 C1b C 26.2601 -17.3788 6 O6a O 21.4434 -17.5954 7 O6a O 22.6198 -15.4007 8 C6a C 27.4482 -16.6414 9 O6a O 28.6830 -17.3028 10 O6a O 27.4714 -15.2426 BOND 9 1 1 2 1 2 1 3 1 3 1 4 1 #Down 4 2 5 1 5 3 6 1 6 3 7 2 7 5 8 1 8 8 9 1 9 8 10 2 bio-1.4.3.0001/test/data/KEGG/1.1.1.1.enzyme0000644000004100000410000012400712200110570017322 0ustar www-datawww-dataENTRY EC 1.1.1.1 Enzyme NAME alcohol dehydrogenase; aldehyde reductase; ADH; alcohol dehydrogenase (NAD); aliphatic alcohol dehydrogenase; ethanol dehydrogenase; NAD-dependent alcohol dehydrogenase; NAD-specific aromatic alcohol dehydrogenase; NADH-alcohol dehydrogenase; NADH-aldehyde dehydrogenase; primary alcohol dehydrogenase; yeast alcohol dehydrogenase CLASS Oxidoreductases; Acting on the CH-OH group of donors; With NAD+ or NADP+ as acceptor SYSNAME alcohol:NAD+ oxidoreductase REACTION an alcohol + NAD+ = an aldehyde or ketone + NADH + H+ [RN:R07326 R07327] ALL_REAC R07326 > R00623 R00754 R02124 R04805 R04880 R05233 R05234 R06917 R06927 R08281 R08306 R08557 R08558; R07327 > R00624 R08310; (other) R07105 SUBSTRATE alcohol [CPD:C00069]; NAD+ [CPD:C00003] PRODUCT aldehyde [CPD:C00071]; ketone [CPD:C01450]; NADH [CPD:C00004]; H+ [CPD:C00080] COFACTOR Zinc [CPD:C00038] COMMENT A zinc protein. Acts on primary or secondary alcohols or hemi-acetals; the animal, but not the yeast, enzyme acts also on cyclic secondary alcohols. REFERENCE 1 AUTHORS Branden, G.-I., Jornvall, H., Eklund, H. and Furugren, B. TITLE Alcohol dehydrogenase. JOURNAL In: Boyer, P.D. (Ed.), The Enzymes, 3rd ed., vol. 11, Academic Press, New York, 1975, p. 103-190. REFERENCE 2 [PMID:320001] AUTHORS Jornvall H. TITLE Differences between alcohol dehydrogenases. Structural properties and evolutionary aspects. JOURNAL Eur. J. Biochem. 72 (1977) 443-52. ORGANISM Homo sapiens [GN:hsa], Rattus norvegicus [GN:rno], Equus caballus [GN:ecb], Drosophila melanogaster [GN:dme], Bacillus stearotherrnophilus REFERENCE 3 AUTHORS Negelein, E. and Wulff, H.-J. TITLE Diphosphopyridinproteid ackohol, acetaldehyd. JOURNAL Biochem. Z. 293 (1937) 351-389. REFERENCE 4 AUTHORS Sund, H. and Theorell, H. TITLE Alcohol dehydrogenase. JOURNAL In: Boyer, P.D., Lardy, H. and Myrback, K. (Eds.), The Enzymes, 2nd ed., vol. 7, Academic Press, New York, 1963, p. 25-83. REFERENCE 5 [PMID:13605979] AUTHORS THEORELL H. TITLE Kinetics and equilibria in the liver alcohol dehydrogenase system. JOURNAL Adv. Enzymol. Relat. Subj. Biochem. 20 (1958) 31-49. PATHWAY PATH: ec00010 Glycolysis / Gluconeogenesis PATH: ec00071 Fatty acid metabolism PATH: ec00260 Glycine, serine and threonine metabolism PATH: ec00350 Tyrosine metabolism PATH: ec00624 1- and 2-Methylnaphthalene degradation PATH: ec00641 3-Chloroacrylic acid degradation PATH: ec00830 Retinol metabolism PATH: ec00980 Metabolism of xenobiotics by cytochrome P450 PATH: ec00982 Drug metabolism - cytochrome P450 PATH: ec01100 Metabolic pathways ORTHOLOGY KO: K00001 alcohol dehydrogenase KO: K11440 choline dehydrogenase GENES HSA: 124(ADH1A) 125(ADH1B) 126(ADH1C) 127(ADH4) 128(ADH5) 130(ADH6) 131(ADH7) PTR: 461394(ADH4) 461395(ADH6) 461396(ADH1B) 471257(ADH7) 743928(ADH5) 744064(ADH1A) 744176(ADH1C) MCC: 697703(ADH5) 707258(ADH5) 707367 707682(ADH1A) 708520 711061(ADH1C) MMU: 11522(Adh1) 11529(Adh7) 11532(Adh5) 26876(Adh4) RNO: 100145871(Adh5) 171178(Adh7) 24172(Adh1) 29646(Adh4) 310903(Adh6) CFA: 474946 478487(ADH4) 478489(ADH1C) 609781 BTA: 505515(ADH5) 509744(ADH1C) 510551(ADH6) 520508(ADH4) ECB: 100034175 100034242 MDO: 100015779 100015820 100015858 GGA: 395979(ADH1B) 422705(ADH5) TGU: 100190137 100222320 XLA: 398993 444547 445841(adh3) 446738(adh6) XTR: 496861(adh1) 496906(adh6) 496916(adh1b) 497007 DRE: 116517(adh5) CIN: 445671(Adh3) SPU: 579220 DME: Dmel_CG18814 Dmel_CG3481(Adh) Dmel_CG3763(Fbp2) Dmel_CG4842 Dmel_CG4899(Pdh) Dmel_CG6598(Fdh) DPO: Dpse_GA14399 Dpse_GA17670 Dpse_GA19711 DAN: Dana_GF16175 DER: Dere_GG17256 DPE: Dper_GL12061 DSE: Dsec_GM26140 DSI: Dsim_GD20694 DYA: Dyak_GE24658 DGR: Dgri_GH18738 DMO: Dmoj_GI24655 AGA: AgaP_AGAP010499 AAG: AaeL_AAEL006458 CQU: CpipJ_CPIJ008783 AME: 409773 NVI: 100118368 TCA: 654905(Fdh) API: 100164837 CEL: D2063.1(dehydrogenase) H24K24.3 K12G11.3(sodh-1) K12G11.4(sodh-2) BMY: Bm1_52445 SMM: Smp_044440 TAD: TRIADDRAFT_55892 ATH: AT1G22430 AT1G22440 AT1G32780 AT1G64710 AT1G77120(ADH1) AT5G42250 AT5G43940(ADH2) POP: POPTR_645542(GSH-FDH1) POPTR_815974 RCU: RCOM_0996200 RCOM_1600190 VVI: 100245520(GSVIVT00015186001) 100258412 100264420 OSA: 4331130(Os02g0815500) 4335572(Os04g0358000) 4344469(Os08g0109200) 4350053(Os11g0210300) 4350054(Os11g0210500) SBI: SORBI_01g044580(SORBIDRAFT_01g044580) SORBI_04g037050(SORBIDRAFT_04g037050) ZMA: 100193894 542459(fdh1) PPP: PHYPADRAFT_137950 PHYPADRAFT_183406 CRE: CHLREDRAFT_129874(FDH1) OLU: OSTLU_27414(ADHX) CME: CMS125C SCE: YBR145W(ADH5) YDL168W(SFA1) YGL256W(ADH4) YMR083W(ADH3) YMR303C(ADH2) YOL086C(ADH1) AGO: AGOS_AAR084W AGOS_ABL033C AGOS_ACL148C AGOS_AER032W KLA: KLLA0B00451g KLLA0C19382g KLLA0D12342g KLLA0F13530g KLLA0F18260g DHA: DEHA0A00154g DEHA0G06457g DEHA0G22330g PIC: PICST_27980(ADH2) PICST_29252(FDH1) PICST_44171(IFR1) PICST_67803(SAD2) PICST_68558(ADH1) PPA: PAS_chr3_1028 VPO: Kpol_530p31 CGR: CAGL0I07843g CAGL0J01441g CAGL0L01111g YLI: YALI0A15147g YALI0A16379g YALI0D25630g YALI0E17787g YALI0F09603g SPO: SPAC5H10.06c(adh4) SPBC1773.06c SPCC13B11.01(adh) NCR: NCU01754 NCU02476 NCU06303 NCU06652 PAN: PODANSg8007 MGR: MGG_03880 MGG_06011 MGG_07773 FGR: FG10200.1 ANI: AN2286.2 AN5913.2 AN7632.2 AN7636.2 AN8979.2 AFM: AFUA_2G00970 AFUA_2G01040 AFUA_2G10960 AFUA_2G13270 AFUA_4G08240 AFUA_5G06240 AFUA_6G00510 AFUA_7G01010 AFUA_7G04380 AOR: AO090003001407 AO090009000634 AO090011000117 AO090023000745 AO090026000555 AO090103000187 AO090701000350 AO090701000373 ANG: An02g02060 An02g02870 An04g02690 An08g09750 An10g00510 An10g00570 An13g00950 An13g03330 An14g02160 An17g01530(adhA) AFV: AFLA_058310 PCS: Pc22g25070 NFI: NFIA_033340 SSL: SS1G_10135 CNE: CNE00710 CNG00600 CNM01690 CNB: CNBM1570 LBC: LACBIDRAFT_292173 UMA: UM01885.1 UM02790.1 UM06244.1 MGL: MGL_0044 MBR: MONBRDRAFT_20739 MONBRDRAFT_32407 DDI: DDB_0238196 DDB_0238276 EHI: EHI_042260 EHI_107210 EHI_150490 CPV: cgd8_1720 CHO: Chro.80199 TGO: TGME49_111780 TET: TTHERM_00295700 TCR: 506357.50 511277.60 LMA: LmjF30.2090 ECO: b0356(frmA) b1241(adhE) b1478(adhP) ECJ: JW0347(frmA) JW1228(adhE) JW1474(adhP) JW5648(yiaY) ECD: ECDH10B_0311(frmA) ECDH10B_1302(adhE) EBW: BWG_0245(frmA) BWG_1066(adhE) ECE: Z0456(adhC) Z2016(adhE) Z2232(adhP) Z5010(yiaY) ECS: ECs0411 ECs1741 ECs2082(adhA) ECs4466 ECF: ECH74115_0431(adhC) ECH74115_1725(adhE) ETW: ECSP_0420(frmA) ECSP_1631(adhE) ECG: E2348C_0303(frmA) E2348C_1367(adhE) ECC: c0465(adhC) c1705(adhE) c1911(adhP) c4410(yiaY) ECI: UTI89_C0359(yahK) UTI89_C0376(adhC) UTI89_C0680 UTI89_C1438(adhE) UTI89_C1696(adhP) UTI89_C4131(yiaY) UTI89_C4465 ECP: ECP_0421 ECP_0696 ECP_1287 ECP_1480 ECP_3691 ECV: APECO1_1388 APECO1_1645(adhC) APECO1_1661(yahK) APECO1_2282 APECO1_2864(yiaY) APECO1_355(adhE) APECO1_614(adhP) ECW: EcE24377A_0381 EcE24377A_1389(adhE) ECX: EcHS_A0421(adhC) EcHS_A1350(adhE) ECM: EcSMS35_0387(adhC) EcSMS35_1901(adhE) ECY: ECSE_0381 ECSE_1289 ECL: EcolC_2387 EcolC_3269 ECK: EC55989_0365(frmA) EC55989_1337(adhE) ECQ: ECED1_0384(frmA) ECED1_1392(adhE) ECR: ECIAI1_0357(frmA) ECIAI1_1259(adhE) ECT: ECIAI39_0322(frmA) ECIAI39_1575(adhE) ECZ: ECS88_0356(frmA) ECS88_1308(adhE) EUM: ECUMN_0399(frmA) ECUMN_1538(adhE) ELF: LF82_0035(adhE) LF82_0737(frmA) EBL: B21_00314(frmA) B21_01225(adhE) EBD: ECBD_2383 ECBD_3301 EBR: ECB_00310(frmA) ECB_01215(adhE) EFE: EFER_1582(frmA) EFER_1714(adhE) STY: STY1302(adh) STY1493(adhP) STY3830 STT: t1482(adhP) t1660(adh) t3575 SPT: SPA1129(adh) SPA1301(adhP) SPA3887 SEK: SSPA1048 SSPA1209 SSPA3615 SPQ: SPAB_01493 SPAB_01652 SPAB_01731 SPAB_02687 SEI: SPC_1980(adh) SPC_2108 SPC_2171(adhP) SPC_4148 SEC: SC1568(adhP) SC1622(adh3) SC1744(adhE) SC3934(adh2) SEH: SeHA_C1744 SeHA_C1805 SeHA_C1939(adhE) SEE: SNSL254_A1681 SNSL254_A1741 SNSL254_A1876(adhE) SEW: SeSA_A1683 SeSA_A1746 SeSA_A1882(adhE) SEA: SeAg_B1396(adhE) SeAg_B1530 SeAg_B1598 SED: SeD_A1579(adhE) SeD_A1715 SeD_A1776 SEG: SG1367(adhE) SG1492 SET: SEN1287(adhE) SEN1425 SEN3835 SES: SARI_01204 SARI_01356 SARI_01402 STM: STM1567(adhP) STM1627 STM1749(adhE) STM4044 YPE: YPO1502 YPO2180(adhE) YPK: y2023(adhE) y2667(adhC) YPM: YP_1392(adhC) YP_1976(adhE) YPA: YPA_0797 YPA_1537 YPN: YPN_1646 YPN_2477 YPP: YPDSF_0956 YPDSF_1475 YPDSF_3645 YPG: YpAngola_A2165(adhE) YpAngola_A3011 YPS: YPTB1517 YPTB2103(adhE) YPI: YpsIP31758_1963(adhE) YpsIP31758_2473 YPY: YPK_2072 YPK_2571 YPB: YPTS_1627 YPTS_2171 YEN: YE2238(adhE) YE2821 SFL: SF1240(adhE) SF1747(adhP) SF3627(yiaY) SFX: S1326(adhE) S1880(adhP) S4141(yiaY) SFV: SFV_1253(adhE) SFV_3944(yiaY) SSN: SSON_0335(adhC) SSON_1939(adhE) SBO: SBO_1828(adhE) SBO_3589(yiaY) SBC: SbBS512_E0271(adhC) SbBS512_E1406(adhE) SbBS512_E4004(adhB) SDY: SDY_1295(adhE) SDY_1608(adhP) ECA: ECA2326(adhE) ECA2714(adhC) ECA3950 PCT: PC1_1667 PC1_1983 ETA: ETA_12850(adhC) ETA_15810(adhE) ETA_24370(adhP) PLU: plu1563 plu2496(adhE) plu4332(adhC) PAY: PAU_02030(adhE) PAU_03905(adhC) SGL: SG1372 ENT: Ent638_0142 Ent638_1960 Ent638_2304 Ent638_4074 ESA: ESA_01539 ESA_01709 CTU: Ctu_22450(frmA) Ctu_23840(adhE) KPN: KPN_01853(adhP) KPN_01952 KPN_02199(adhE) KPN_04206 KPE: KPK_2109(adhE) KPK_2397(adhC) KPK_2503 KPK_5475 KPU: KP1_0065 KP1_2916(adhP) KP1_3020 KP1_3311(adhE) CKO: CKO_01318 CKO_01455 CKO_03104 CKO_05045 SPE: Spro_1557 Spro_2403 Spro_2704 Spro_3658 PMR: PMI0450(adhC) PMI1486(adhE) EIC: NT01EI_1665 DDA: Dd703_1955 Dd703_2311 DZE: Dd1591_2197 Dd1591_2411 HIN: HI0185(adhC) HIP: CGSHiEE_02335 HIQ: CGSHiGG_03525 PMU: PM1453(adh2) MSU: MS1386(adhC) MS1802(eutG) MS2190(eutG) APL: APL_1011(adh2) APL_1208(adhC) APL_1959(adhI) APJ: APJL_1029(adh2) APJL_1221(adh1) APA: APP7_1068(adhE) APP7_1259 ASU: Asuc_0067 Asuc_0591 AAP: NT05HA_1312 AAT: D11S_0481 D11S_1811 XFA: XF1746 XF2389 XFT: PD1406(yahK) XCC: XCC2730 XCC3389 XCC3475(adhC) XCC3703(ybdR) XCB: XC_0686 XC_0775 XC_1384 XC_3774 XCA: xccb100_0720(adh) xccb100_0810 XCV: XCV0185 XCV0713 XCV0788 XCV2170 XCV2333 XCV3047 XCV3866 XAC: XAC0201(adh) XAC0652(adhC) XAC0734 XAC2826 XAC2896 XAC3747(ybdR) XOO: XOO1535 XOO3867 XOM: XOO_1325 XOO_1423 XOO_3647 XOO_4231 XOP: PXO_04416 SML: Smlt3978(adhC) SMT: Smal_3387 VCH: VC2033 VCO: VC0395_A1619(adhE) VCM: VCM66_1957(adhE) VCJ: VCD_002334 VVU: VV1_0344 VV1_3111 VV2_0019 VVY: VV0841 VV1175 VV2175 VVA0527 VPA: VP2121 VPA0071 VPA0566 VHA: VIBHAR_03018 VIBHAR_06925 VSP: VS_0958 VS_II1327 VEX: VEA_000929 VEA_002931 VFI: VF_0023(yhdH) VF_0645(frmA) VF_0918(adhE) VF_1188(yiaY) VFM: VFMJ11_0659 VFMJ11_0663 VFMJ11_0957 VSA: VSAL_I0746(adhC) VSAL_I1855(adhE) PPR: PBPRA0784 PBPRA1103 PBPRA1480 PBPRA2519 PAE: PA2119 PA2158 PA2188 PA3629(adhC) PA5427(adhA) PAU: PA14_17400(adhC) PA14_71630(adhA) PAP: PSPA7_1510 PAG: PLES_14061(adhC) PPU: PP_1616 PP_3839(adhA) PPF: Pput_1932(adhA) Pput_4161 PPG: PputGB1_1170 PPW: PputW619_4059 PST: PSPTO_1558(adhC) PSPTO_4285(adhB) PSB: Psyr_1367 Psyr_2956 Psyr_3987 PSP: PSPPH_3816(adhC) PSPPH_3994(adhB) PFL: PFL_1200 PFL_1399(adhB) PFL_4068(adh) PFL_4742 PFO: Pfl01_1125 Pfl01_1311 Pfl01_3346 PFS: PFLU1295(adhC) PEN: PSEEN0727 PSEEN2613(adhC-1) PSEEN2959(adhB) PSEEN4196(adhC-2) PMY: Pmen_0120 Pmen_0381 Pmen_2529 Pmen_3028 PSA: PST_0312 PST_1563(adhC) PST_2239 PST_3803 PST_4153(adhA) CJA: CJA_3340(adhC) AVN: Avin_38750 PAR: Psyc_0011 Psyc_1042 Psyc_1671(adhC) Psyc_2008(adh) PCR: Pcryo_0018 Pcryo_1428 Pcryo_1939 PRW: PsycPRwf_1032 PsycPRwf_1071 ACI: ACIAD1879(adhC) ACIAD1950 ACIAD2015 ACIAD2929 ACIAD3339(adhA) ACB: A1S_2053 A1S_2098 A1S_2702 ABM: ABSDF0756 ABSDF1641 ABSDF2205 ABY: ABAYE0763 ABAYE1463 ABAYE1522 ABAYE1861(frmA) p2ABAYE0004 p3ABAYE0020 p3ABAYE0024(adhC) ABC: ACICU_01808 ABN: AB57_2002 ABB: ABBFA_001721 SON: SO_1490(adhB) SO_2054(adhC) SO_2136(adhE) SO_A0161 SO_A0164 SDN: Sden_2000 Sden_2578 Sden_3767 SFR: Sfri_1898 Sfri_2798 Sfri_4059 SAZ: Sama_0004 Sama_1693 Sama_2455 SBL: Sbal_0633 Sbal_1321 Sbal_1922 Sbal_2419 SBM: Shew185_1312 Shew185_1948 Shew185_2408 SBN: Sbal195_1348 Sbal195_1955 Sbal195_2526 SBP: Sbal223_1940 Sbal223_2371 SLO: Shew_1166 Shew_1540 Shew_1910 SPC: Sputcn32_1244 Sputcn32_1811 Sputcn32_2174 SSE: Ssed_1258 Ssed_2349 Ssed_2530 Ssed_4227 SPL: Spea_0269 Spea_2044 SHE: Shewmr4_1683 Shewmr4_1786 Shewmr4_2762 SHM: Shewmr7_1758 Shewmr7_2191 Shewmr7_2840 SHN: Shewana3_1788 Shewana3_1837 Shewana3_2938 SHW: Sputw3181_1835 Sputw3181_2214 SHL: Shal_2250 Shal_4033 SWD: Swoo_0375 Swoo_2241 SWP: swp_2093 swp_2623 ILO: IL0796 IL1773(adhC) CPS: CPS_0817 CPS_1431 CPS_3853 PHA: PSHAa1386(adhC) PSHAa1488(yhdH) PSHAa2213(adhC) PSHAb0511(adhP) PAT: Patl_0425 Patl_1438 Patl_1481 MAQ: Maqu_0321 Maqu_1048 Maqu_1713 Maqu_2251 AMC: MADE_03331 PIN: Ping_1955 Ping_3530 TTU: TERTU_0454 LPN: lpg2492 LPF: lpl2034 lpl2413 LPP: lpp2558 LPC: LPC_1984(adhT) MCA: MCA0775 FTM: FTM_0472 FPH: Fphi_0427 TCX: Tcr_0211 NOC: Noc_0592 AEH: Mlg_2137 HHA: Hhal_0140 HCH: HCH_01446(adhC) HCH_05054 CSA: Csal_1091 Csal_1426 Csal_2912 ABO: ABO_0061 ABO_0117(adhC) ABO_0817 ABO_1231(adhA) ABO_1352(adhB) ABO_2419 ABO_2483 MMW: Mmwyl1_3680 AHA: AHA_1331 AHA_2616 ASA: ASA_1695(adhE) ASA_2860 ASA_2902 ASA_4180(adhC) TAU: Tola_2176 Tola_2293 Tola_2876 AFE: Lferr_0845 Lferr_1744 AFR: AFE_0697 AFE_2088 KKO: Kkor_2048 NME: NMB0546(adhP) NMB1304 NMB1395 NMA: NMA0725(adhA) NMA1518 NMC: NMC0486(adhA) NMC1241(adhC) NMN: NMCC_1217(adhC) NMI: NMO_1145(adhC) NGO: NGO0711 NGO1442(adhA) NGK: NGK_1317 CVI: CV_0740(adhC) CV_0808 CV_1137(adhE) CV_2051 CV_2728 LHK: LHK_02379(fdhG) RSO: RS00889(RSp0430) RSc0605(adhC2) RSc1505 RSc3130(adhA) RSp0069(adhC1) RPI: Rpic_0535 RPF: Rpic12D_0522 REU: Reut_B3677 Reut_B4000 Reut_B4710 Reut_B4833 Reut_B5201 Reut_C5952 Reut_C6082 Reut_C6089 Reut_C6321 REH: H16_A0757(adh) H16_A0861 H16_A3330 H16_B0517 H16_B1195(adhC) H16_B1433(adhP) H16_B1699 H16_B1745 H16_B2470 RME: Rmet_0545 Rmet_1093 Rmet_4784 Rmet_4943 Rmet_5102 Rmet_5545 Rmet_5645 CTI: RALTA_B1113(adhC) RALTA_B2189 BMA: BMA0324 BMA2006 BMAA0132 BMAA0163 BMAA0709 BMV: BMASAVP1_0107 BMASAVP1_1300 BMASAVP1_1333 BMASAVP1_A0622 BMASAVP1_A0904 BML: BMA10229_0317 BMA10229_0753 BMA10229_A2456 BMA10229_A2737 BMN: BMA10247_0070 BMA10247_1871 BMA10247_A0157 BMA10247_A0189 BMA10247_A1276 BMA10247_A1718 BPS: BPSL0820(flhA) BPSL2701 BPSS0840 BPSS1918 BPSS1944(adhA) BPSS2024 BPM: BURPS1710b_2237(eutG) BURPS1710b_3181 BURPS1710b_A1018 BURPS1710b_A1046(adhA) BURPS1710b_A1142 BURPS1710b_A2432 BPL: BURPS1106A_0866 BURPS1106A_2097 BURPS1106A_3164 BURPS1106A_A1160 BURPS1106A_A1693 BURPS1106A_A2605 BURPS1106A_A2642 BPD: BURPS668_0862 BURPS668_2042 BURPS668_3127 BURPS668_A1234 BURPS668_A1779 BURPS668_A2748 BURPS668_A2784 BPR: GBP346_A0790 BTE: BTH_I0686 BTH_I1436 BTH_II0428 BTH_II0459 BTH_II1565 BVI: Bcep1808_1701 Bcep1808_4026 Bcep1808_4278 Bcep1808_4603 Bcep1808_4991 Bcep1808_5687 Bcep1808_6339 Bcep1808_6458 Bcep1808_6708 BUR: Bcep18194_A5935 Bcep18194_B2077 Bcep18194_B2551 Bcep18194_C6773 BCN: Bcen_1392 Bcen_3928 Bcen_4360 Bcen_4742 BCH: Bcen2424_4006 Bcen2424_4438 BCM: Bcenmc03_2628 Bcenmc03_3517 BCJ: BCAL2817(flhA) BCAM1570(adhA) BAM: Bamb_3412 Bamb_3811 Bamb_3870 BAC: BamMC406_6016 BMU: Bmul_0693 Bmul_5633 BMJ: BMULJ_02567(adhC) BMULJ_05877(adhC) BXE: Bxe_A0713(flhA) Bxe_A1730 Bxe_A1872 Bxe_B1066 Bxe_B2290 Bxe_B2357 Bxe_B2518 Bxe_C0527 Bxe_C1266 Bxe_C1356 BPH: Bphy_0483 BPY: Bphyt_0108 Bphyt_3253 Bphyt_5114 BGL: bglu_1g29580 BPE: BP0777 BP2601 BP3751(adhI) BPA: BPP0339 BPP2723 BPP4251(adhI) BBR: BB0342 BB2775 BB4838(adhI) BPT: Bpet0155 BAV: BAV3289(adhC) RFR: Rfer_0121 Rfer_2865 POL: Bpro_1201 Bpro_3129 Bpro_5563 PNA: Pnap_1230 Pnap_1291 Pnap_2134 Pnap_2299 Pnap_3852 AAV: Aave_2195 Aave_3231 AJS: Ajs_0645 Ajs_1049 Ajs_1375 Ajs_1889 Ajs_2098 VEI: Veis_3295 DAC: Daci_2414 DIA: Dtpsy_2290 VAP: Vapar_1703 MPT: Mpe_A0936 Mpe_A2064 HAR: HEAR0089 HEAR2039 HEAR2367(adh) MMS: mma_0221(adhC) LCH: Lcho_3255 NEU: NE0620 NE0820 NE0907(adhC1) NET: Neut_1385 NMU: Nmul_A1937 EBA: ebA3118 ebA4623(adhB) ebA5713(fdhG) p2A334 AZO: azo0111(adhA) azo1971 azo2700 azo3181 azo3317(yhdH) azo3623(adhC) DAR: Daro_1348 TMZ: Tmz1t_0608 TBD: Tbd_1767 MFA: Mfla_0203 APP: CAP2UW1_1902 GSU: GSU0573 GME: Gmet_1046 GUR: Gura_0018 Gura_2420 Gura_3537 Gura_3574 Gura_4352 GLO: Glov_0177 PCA: Pcar_0251 Pcar_0255 Pcar_1246 Pcar_1594 Pcar_2506 Pcar_2847 Pcar_2848 Pcar_2851 PPD: Ppro_0756 Ppro_1924 DVU: DVU2396 DVU2405 DVU2885 DVU2905 DVL: Dvul_0460 Dvul_0480 Dvul_0825 Dvul_0834 DVM: DvMF_1315 DvMF_2322 DDE: Dde_1164 Dde_3062 Dde_3126 Dde_3523 Dde_3534 BBA: Bd0898(adhC) Bd2813(adh) DPS: DP0950 DP0951 DP0955 DP1034 MXA: MXAN_6802(adh) MXAN_7094 SCL: sce2769(adh2) sce4900(adh5) HOH: Hoch_5737 SAT: SYN_01159 PUB: SAR11_0272(adhP) SAR11_0529 SAR11_1244(yhdH) SAR11_1287 MLO: mlr0872 mlr1136 mlr1178 MES: Meso_1317 Meso_1868 Meso_2263 Meso_2671 PLA: Plav_2451 Plav_3009 Plav_3345 SME: SM_b20170(fdh) SMa1156 SMa1296(adhA1) SMa2113(adhC2) SMc00105 SMc01270(adhC1) SMc03929 SMc04270 SMD: Smed_1024 Smed_3946 ATU: Atu0626(adhP) Atu1595 Atu1670(adhC) Atu2151(adh) Atu4289(adhC) Atu5240(adhP) ATC: AGR_C_1112 AGR_C_2931(adh4) AGR_C_3072 AGR_C_3897 AGR_L_1150(adhE) AGR_pAT_339 ARA: Arad_2510 AVI: Avi_2506(adhC) RET: RHE_CH00460(ypch00170) RHE_CH01189(ypch00392) RHE_CH02227(adhCch) RHE_CH02474(ypch00832) RHE_CH02884(ypch00995) RHE_CH03285(ypch01140) RHE_PB00029(ypb00013) RHE_PB00045(ypb00024) RHE_PC00105(adhA1) RHE_PC00106(adhA2) RHE_PD00107(adhE) RHE_PF00203(ypf00099) RHE_PF00365(ypf00190) RHE_PF00402(adhCf) REC: RHECIAT_CH0002337(adhCch) RHECIAT_PC0000584(adhCc) RLE: RL0310 RL1325 RL1876(adh) RL2813 RL3344 RL3711 pRL100135 pRL120042 pRL120182 pRL120524(adhI) pRL120602 pRL90027(adhA) pRL90098 RLT: Rleg2_1875 Rleg2_4957 RLG: Rleg_4830 RHI: NGR_b14640(adhC2) NGR_b16470(adhC3) NGR_b23560(adhC4) NGR_c12280(adhC6) NGR_c20470(adhC5) NGR_c25690 BME: BMEI0925 BMEI1114 BMEI1746 BMEI1819 BMEII0553 BMEII0867 BMF: BAB1_0128 BAB1_0204 BAB1_0871 BAB2_0506 BAB2_0821 BMB: BruAb1_0127 BruAb1_0198 BruAb1_0864 BruAb2_0498 BruAb2_0800 BMC: BAbS19_I01220 BMS: BR0203 BR0852 BR1061 BRA0401 BRA0734 BOV: BOV_0195 BOV_0844 BOV_1026 BCS: BCAN_A0133 BMR: BMI_I133 OAN: Oant_0144 BJA: bll4101 bll5655 bll7898 blr2780 blr3675 blr4874 blr6070 blr6215(adhC) BRA: BRADO0250 BRADO0921(adh) BRADO2457 BRADO2816 BRADO2866(adhA) BRADO3321 BRADO4151 BRADO4353 BRADO5171 BRADO5308 BRADO5486(adhC) BRADO6611 BBT: BBta_2803 BBta_3627 BBta_3825 BBta_4528 BBta_5308(adhA) BBta_5367 BBta_5638 BBta_5755 BBta_5970(adhC) BBta_7131(adh) RPA: RPA0374 RPA0656(badC) RPA1205 RPA1955 RPA2018 RPA3067 RPA3655 RPB: RPB_0946 RPB_1148 RPB_1871 RPB_2474 RPB_3357 RPB_3418 RPC: RPC_0105 RPC_1030 RPC_1164 RPC_1965 RPC_2107 RPC_2307 RPC_2974 RPC_3411 RPC_4481 RPD: RPD_1250 RPD_1539 RPD_2034 RPD_2086 RPD_2972 RPD_3495 RPE: RPE_0587 RPE_0609 RPE_1228 RPE_2019 RPE_3298 RPE_3649 RPE_3692 RPE_4678 RPT: Rpal_2166 Rpal_4444 NWI: Nwi_1739 NHA: Nham_1916 Nham_2346 Nham_2536 OCA: OCAR_5733 OCAR_6856 BHE: BH07430(adh) BQU: BQ05280(adh) BBK: BARBAKC583_0618 BTR: Btr_1329(adh) AZC: AZC_0105 MEX: Mext_3514 BID: Bind_0909 Bind_2495 CCR: CC_2516 CC_3029 CCS: CCNA_02601 CCNA_03124 CAK: Caul_0743 Caul_3517 PZU: PHZ_c0845(adhI) PHZ_c2025 SIL: SPO1889 SPO3399 SPO3850 SPOA0272 SIT: TM1040_0040 TM1040_0536 RSP: RSP_0960 RSP_1824(adh) RSP_2576(adhI) RSP_2799 RSP_3537 RSH: Rsph17029_1234 Rsph17029_1501 RSQ: Rsph17025_1947 Rsph17025_3061 RSK: RSKD131_0888 JAN: Jann_3964 RDE: RD1_3806(flhA) PDE: Pden_0016 Pden_2367 Pden_4206 DSH: Dshi_0103 Dshi_0931 Dshi_1078 MMR: Mmar10_2836 HNE: HNE_0560 HBA: Hbal_2865 ZMO: ZMO1236(adhA) ZMO1596(adhB) ZMO1722(adhC) NAR: Saro_0874 SAL: Sala_2455 SWI: Swit_1179 Swit_1980 Swit_3018 GOX: GOX0313 GOX0314 GOX2018 GBE: GbCGDNIH1_0098 ACR: Acry_0843 Acry_2641 Acry_2794 GDI: GDI_0902(adh) GDI_1852(frmA) GDI_2473(adh) GDJ: Gdia_0079 Gdia_1118 APT: APA01_12700 RRU: Rru_A0904 Rru_A0930 Rru_A3405 RCE: RC1_0269(adhC) BSU: BSU18430(yogA) BSU26970(adhB) BSU31050(gbsB) BHA: BH1829(adhB) BAN: BA2222 BA2267(adhA) BA2588 BA2647 BA3131(adhB) BA3566 BA4599 BAR: GBAA2222 GBAA2267(adhP) GBAA2588 GBAA2647 GBAA3131(adhB) GBAA3566 GBAA4599 BAA: BA_2770 BA_3097 BA_3165 BA_3633 BA_4058 BA_5039 BAT: BAS2066 BAS2111 BAS2412 BAS2466 BAS2912 BAS3306 BAS4267 BAH: BAMEG_1479 BAMEG_4636 BAI: BAA_3180 BAA_4619 BCE: BC0802 BC2220(adhA) BC2529 BC2660 BC3092 BC4365 BCA: BCE_0198 BCE_0742 BCE_0877 BCE_1099 BCE_2252 BCE_2296(adhA) BCE_2604 BCE_2677 BCE_3145(adhB) BCE_3521 BCE_4453 BCZ: BCZK0168 BCZK0681 BCZK2005(gbsB) BCZK2048(adhB) BCZK2331(adhB) BCZK2390 BCZK2838(adhB) BCZK3221 BCZK4115 BCR: BCAH187_A3155 BCAH187_A4504 BCAH187_C0241 BCB: BCB4264_A3114 BCB4264_A4490 BCU: BCAH820_3133 BCAH820_4451 BCAH820_B0169 BCG: BCG9842_B0746 BCG9842_B2124 BCQ: BCQ_2941(adhB) BCQ_4155 BCQ_PI181 BCX: BCA_3189 BCA_4483 BCY: Bcer98_1477 BTK: BT9727_0165 BT9727_2006(gbsB) BT9727_2050(adhB) BT9727_2367(adhB) BT9727_2429 BT9727_2881(adhB) BT9727_3273 BT9727_4104 BTL: BALH_0175 BALH_0616 BALH_0714 BALH_1985 BALH_2027(adhA) BALH_2382 BALH_2794 BALH_3158 BALH_3956 BWE: BcerKBAB4_4218 BLI: BL00199 BL02473(adhB) BL02563(gbsB) BL05087(yogA) BLD: BLi00992(yogA) BLi03269(gbsB) BLi03502 BLi03831(adhB) BLi04290 BCL: ABC0046(adhA) ABC0979(gbsB) ABC3428(adhB) BAY: RBAM_003560 RBAM_005890(adhB) RBAM_028150(gbsB) BPU: BPUM_0237(adhB) BPUM_2734(gbsB) OIH: OB1376 OB2738 OB2801 GKA: GK0731 GK0938 GK2774 GTN: GTNG_1754 AFL: Aflv_1185(gutB) Aflv_2814 SAU: SA0143(adhE) SA0562(adh1) SAV: SAV0148(adhE) SAV0605(adh1) SAW: SAHV_0147(adhE) SAM: MW0123(adhE) MW0568(adh1) SAR: SAR0150(adhE) SAR0613(adhA) SAS: SAS0123 SAS0573(adhA) SAC: SACOL0135 SACOL0237 SACOL0241 SACOL0660(adhP) SACOL2177 SACOL2178 SACOL2367 SAB: SAB0089 SAB0557(adhA) SAB1296c SAA: SAUSA300_0055 SAUSA300_0151(adhE) SAUSA300_0250 SAUSA300_0594(adh) SAUSA300_2147 SAX: USA300HOU_0162 SAO: SAOUHSC_00113 SAOUHSC_00608(adhA) SAJ: SaurJH9_0134 SaurJH9_0628(adhA) SAH: SaurJH1_0139 SaurJH1_0643(adhA) SAE: NWMN_0094(adhE) NWMN_0577(adh1) SEP: SE0375(adhA) SE0506 SE2098 SER: SERP0197 SERP0257(adhA) SERP0389 SERP1785 SERP1786 SERP1962 SERP2112 SERP2469 SHA: SH0373(adhC) SH0522 SSP: SSP0237 SSP1623 LMO: lmo1634 LMF: LMOf2365_1656(adhE) LMH: LMHCC_0928 LMC: Lm4b_01645 LIN: lin1675 LWE: lwe0565 lwe1650 ESI: Exig_1232 Exig_1536 Exig_2206 EAT: EAT1b_0155 MCL: MCCL_1471(adhC) BBE: BBR47_08010(adhB) BBR47_46510 LLA: L13145(adhE) L190278(ypjA) L55758(adhA) LLC: LACR_1641 LACR_1990(adhA) LACR_2457 LLM: llmg_0955 llmg_1991(adhA) llmg_2432(adhE) SPY: SPy_0044(adhA) SPy_1111 SPZ: M5005_Spy_0039(adh2) M5005_Spy_0040(adhA) M5005_Spy_0834 SPM: spyM18_0043(adhE) spyM18_0045(adhP) spyM18_1073 SPG: SpyM3_0036(adh2) SpyM3_0037(adh1) SpyM3_0772 SPS: SPs0037 SPs0038(adhA) SPs0972 SPH: MGAS10270_Spy0042 MGAS10270_Spy0043(adhA) MGAS10270_Spy0950 SPI: MGAS10750_Spy0041 MGAS10750_Spy0042(adhA) MGAS10750_Spy0985 SPJ: MGAS2096_Spy0041 MGAS2096_Spy0042(adhA) MGAS2096_Spy0043 MGAS2096_Spy0044 MGAS2096_Spy0909 SPK: MGAS9429_Spy0040(adh2) MGAS9429_Spy0041(adhA) MGAS9429_Spy0953 SPF: SpyM50039(adhE) SpyM50040(adhP) SpyM50954 SPA: M6_Spy0088 M6_Spy0090(adhA) M6_Spy0832 M6_Spy0833 SPB: M28_Spy0039 M28_Spy0040(adhA) M28_Spy0811 SOZ: Spy49_0038(adhE) Spy49_0039(adhP) SPN: SP_0285(adhA) SP_1855 SP_2026 SP_2055 SP_2157 SPD: SPD_0265(adhA) SPD_1636 SPD_1834 SPD_1865 SPD_1985 SPR: spr0262(adhP) spr1670(adhB) spr1837(adhE) spr1866(adh) spr1963(adh2) SPW: SPCG_0297(adhP) SPCG_1829(adhB) SPCG_1991(adhE) SPCG_2022(adh) SPX: SPG_1938 SNE: SPN23F_20460(adhE) SPV: SPH_0401 SPH_2179 SPH_2210 SNM: SP70585_2111 SJJ: SPJ_2031 SPP: SPP_2062 SNT: SPT_2021 SAG: SAG0053(adhE) SAG0054(adhP) SAG0428 SAG1637(adh) SAN: gbs0053 gbs0054(adhA) gbs1684 SAK: SAK_0086 SAK_0087(adhA) SAK_1651 SMU: SMU.119(adh) SMU.148(adhE) SMU.1867c STC: str0882 str0936(adhB) str1879 str1881 str1882 str1884 STL: stu0882 stu0936(adhB) stu1879 stu1880 stu1881 stu1882 stu1883 stu1884 STE: STER_0949 SSA: SSA_0068(adhE) SSA_0921(adhB) SSA_1917(adh) SSU: SSU05_0279(adhA) SSU05_0280 SSU05_1388 SSV: SSU98_0274(adhA) SSU98_0275 SSU98_1402 SSB: SSUBM407_0252(adhE) SSI: SSU0261(adhE) SSS: SSUSC84_0250(adhE) SGO: SGO_0113(acdH) SGO_0565(adhA) SGO_1774 SEQ: SZO_00440 SEZ: Sez_0043(adh2) Sez_0045(adh) Sez_0046(adhA) SEU: SEQ_0044 SUB: SUB0063(adhE) SDS: SDEG_0061(adh2) LPL: lp_1665(adh1) lp_2873(adh2) lp_3662(adhE) LPJ: JDM1_2930(adhE) LJO: LJ1120 LJ1766 LAC: LBA0461(adhE) LBA1114 LSA: LSA0258 LSA0379(adhE) LSA0925(adh) LSA1364 LSA1702(adhA) LSL: LSL_1901 LBR: LVIS_0119 LVIS_0254(adhA) LVIS_0869 LCA: LSEI_0775 LSEI_2787 LCB: LCABL_08380(adhE) LGA: LGAS_1561 LRE: Lreu_0321 Lreu_1496 Lreu_1535 Lreu_1589 LRF: LAR_0310 LFE: LAF_0277 LRH: LGG_00757(adhE) LRL: LC705_00751(adhE) EFA: EF0900(adhE) EF1826(adhA) OOE: OEOE_0527 OEOE_1248 LME: LEUM_0146 LEUM_1919 LCI: LCK_00119(adhE) CAC: CAC3375 CA_P0035(adhE) CA_P0162(adhe1) CPE: CPE0449(adh) CPE2531(adhE) CPF: CPF_0451 CPF_2855(adhE) CPR: CPR_0442 CPR_2540 CTC: CTC01366 CTC01453 CTH: Cthe_0423 CDF: CD0334(adhE) CD2843 CD2966(adhE) CD3006 CDC: CD196_0353(adhE) CD196_2753(adhE) CDL: CDR20291_0339(adhE) CDR20291_2800(adhE) CBO: CBO0345(aad) CBA: CLB_0388(adhE) CBH: CLC_0403(adhE) CBY: CLM_0413(adhE) CBL: CLK_3533(adhE) CBB: CLD_0407(adhE) CBI: CLJ_B0401(adhE) CBT: CLH_1998 CBF: CLI_0416(adhE) CBE: Cbei_0305 CKL: CKL_1067 CKL_1614(aad) CKL_2967 CKL_2978 CKL_3000 CKL_3425 CKR: CKR_1499 CPY: Cphy_3925 CCE: Ccel_1083 Ccel_3198 AMT: Amet_1657 STH: STH2049 ATE: Athe_2244 DSY: DSY0565 DSY0623 DSY1101 DSY2755 DSY3044 DHD: Dhaf_3921 Dhaf_4212 DRM: Dred_1495 Dred_2277 DAE: Dtox_1709 PTH: PTH_0606(eutG) PTH_1584 DAU: Daud_2043 HMO: HM1_2632(adhE) FMA: FMG_0340 APR: Apre_1060 EEL: EUBELI_00428 EUBELI_01490 EUBELI_02054 TTE: TTE0696(eutG) TEX: Teth514_0627 TPD: Teth39_0206 CHY: CHY_0925 MTA: Moth_1911 CSC: Csac_0407 Csac_0711 HOR: Hore_02110 MPE: MYPE4620(gbsB) MAA: MAG_2740 MAG_4280(adhT) MAG_4340(adhT) ACL: ACL_0177 MTU: Rv0162c(adhE1) Rv0761c(adhB) Rv1530(adh) Rv1862(adhA) MTC: MT0171 MT0786 MT1581 MT1911 MRA: MRA_0170(adhE1) MRA_0771(adhB) MRA_1542(adh) MRA_1873(adhA) MTF: TBFG_10776 TBFG_13103 MTB: TBMG_00776(TBMG_00776.1) TBMG_00881(TBMG_00881.1) MBO: Mb0167c(adhE1) Mb0784c(adhB) Mb1557(adh) Mb1893(adhA) MBB: BCG_0198c(adhE1) BCG_0813c(adhB) BCG_1582(adh) BCG_1898(adhA) MBT: JTY_0168(adhE1) JTY_0783(adhB) JTY_3106(adhD) MLE: ML2053 MPA: MAP0595c(adhB) MAP1571(adhA_1) MAP1613c(adhA_2) MAP3596c(adhE) MAV: MAV_0705 MAV_1614 MAV_1943 MAV_2646 MAV_2812 MAV_2857 MAV_5022 MAV_5214 MSM: MSMEG_0127 MSMEG_0217 MSMEG_0303 MSMEG_0590(rhaD) MSMEG_1138 MSMEG_1977 MSMEG_2048 MSMEG_3388 MSMEG_3464 MSMEG_3915 MSMEG_4167 MSMEG_5866 MSMEG_6242 MSMEG_6833 MSMEG_6834 MUL: MUL_0470(adhB) MUL_0495(adhX) MUL_2965(adhC) MUL_3015(adhA) MUL_3976 MVA: Mvan_0427 Mvan_0669 MGI: Mflv_0236 Mflv_3343 Mflv_4626 Mflv_4951 MAB: MAB_0983c MAB_2041c MAB_4371 MAB_4560 MMC: Mmcs_0111 Mmcs_0137 Mmcs_1713 MKM: Mkms_0120 Mkms_1113 MJL: Mjls_0101 Mjls_0499 Mjls_3236 MMI: MMAR_0127(adhD_1) MMAR_3150 MMAR_4110 MMAR_4935(adhB) CGL: NCgl0219(cgl0222) NCgl2449(cgl2537) NCgl2709(cgl2807) CGB: cg0273 cg3107(adhA) CGT: cgR_2695 CEF: CE0053 CDI: DIP2114(adhA) CUR: cur_1829(adhA) NFA: nfa12770 nfa16040 nfa20970 nfa28910 nfa35620 nfa38820 RHA: RHA1_ro00325(adh1) RHA1_ro00447 RHA1_ro00525 RHA1_ro01205 RHA1_ro02150 RHA1_ro02497 RHA1_ro04547 RHA1_ro05321 RHA1_ro08443 RHA1_ro08627 RHA1_ro08635 RHA1_ro08965(adh2) RHA1_ro11170(adhB) RER: RER_18210 RER_36780 ROP: ROP_01120 ROP_09280 ROP_22230 ROP_25560 ROP_30230 ROP_71160 SCO: SCO0199(SCJ12.11c) SCO0259(SCF1.01) SCO5262(2SC7G11.24) SMA: SAV_1141(adhA1) SAV_1357(adhA2) SAV_1393(adhA3) SAV_2980 SAV_414(pteB) SAV_5335 SAV_7169 SGR: SGR_3049 SGR_5208 TWH: TWT326(adh) TWS: TW445(adh) LXX: Lxx22610(adh) ART: Arth_0343 Arth_1320 Arth_3087 AAU: AAur_0342(adhT) AAur_1148 AAur_1866 AAur_3063(adhA) AAur_3716(rhaD) AAur_pTC10058 AAur_pTC20218 KRH: KRH_02680 KRH_07610(adh) KRH_12360(adh) JDE: Jden_1798 KSE: Ksed_25570 PAC: PPA0539 NCA: Noca_3322 TFU: Tfu_1270 Tfu_1276 Tfu_1755 FRA: Francci3_0264 Francci3_2240 Francci3_2514 Francci3_2945 Francci3_3750 FRE: Franean1_6841 FAL: FRAAL0199 FRAAL0607 FRAAL1637 FRAAL3122 FRAAL3376(adhC) FRAAL5274(adhC1) FRAAL5983 ACE: Acel_0584 NML: Namu_3211 KRA: Krad_2224 SEN: SACE_1535 SACE_2374 SACE_2733 SACE_2944 SACE_2945 SACE_3519(adh) SACE_4085(adh) SACE_4163(adh1) SACE_4519 SACE_4852(adh1) SACE_6396 STP: Strop_1166 Strop_2723 Strop_3694 SAQ: Sare_2799 CAI: Caci_5895 Caci_8750 BLO: BL1090 BL1131(badC) BL1575(adh2) BLJ: BLD_1704(putA2) BLN: Blon_2241 BAD: BAD_0319 BAD_0514(badC) BLA: BLA_0358(adh) BLC: Balac_0378 BLT: Balat_0378 RXY: Rxyl_1958 Rxyl_3153 APV: Apar_0133 Apar_0629 LIL: LA0296 LA0325 LA1616(adeH) LA2361 LA2629 LA3158 LIC: LIC10253 LIC10282 LIC10958 LIC11357 LIC11586 LIC12166 BHY: BHWA1_01768(adh) BHWA1_01827 ABA: Acid345_2645 Acid345_3044 Acid345_4584 SUS: Acid_1558 BFR: BF1290 BVU: BVU_0760 PGN: PGN_0723 SRU: SRU_0710 SRU_2306 CHU: CHU_1246(adhP) GFO: GFO_0760 GFO_1574 GFO_2086 LBA: Lebu_2076 AMU: Amuc_1911 GAU: GAU_2688(adh) RBA: RB10805 RB13260(yhdH) RB4131(yjjN) RB5320 RB5856(adh) RB5948(adh) EMI: Emin_0755 Emin_0772 RSD: TGRD_595 SYN: sll0990 SYC: syc1059_d SYF: Synpcc7942_0459 SYG: sync_2669 SYP: SYNPCC7002_A0868 CYA: CYA_0473(adhE) CYA_0992 CYB: CYB_0241 CYB_0338 TEL: tlr0227 MAR: MAE_21280 MAE_49340 CYT: cce_1588 cce_4888 CYP: PCC8801_3587 CYC: PCC7424_3674 PCC7424_3972 CYN: Cyan7425_4034 Cyan7425_4050 CYH: Cyan8802_2527 GVI: gll2836 gll4111 glr4425 ANA: all0879 all2810 all5334 NPU: Npun_R1946 AVA: Ava_1089 Ava_4482 PMB: A9601_14521(adhC) TER: Tery_2840 AMR: AM1_0442 AM1_2463 CTE: CT1275 CPC: Cpar_1277 CCH: Cag_0888 Cag_1128 Cag_1710 CPB: Cphamn1_1084 PVI: Cvib_0926 Cvib_0955 RRS: RoseRS_3921 RCA: Rcas_3224 CAU: Caur_1113 Caur_2281 TRO: trd_0767 trd_A0843 DRA: DR_2279 TTH: TTC0097 TTC1572 TTJ: TTHA0466 AAE: aq_1240(adh2) aq_1362(adh1) TMA: TM0111 TM0920 TPT: Tpet_0007 Tpet_0813 DTH: DICTH_0069 DICTH_0188 DTU: Dtur_0334 Dtur_0630 TYE: THEYE_A0694 MMP: MMP0802 MAC: MA2630(adh) MBA: Mbar_A0784 Mbar_A2344 MMA: MM_2769 HAL: VNG1821G(adh4) VNG2617G(adh2) HMA: pNG7022(adh1) pNG7032(adh10) pNG7101(adh8) pNG7103(adh7) pNG7278(adh12) pNG7289(adh6) pNG7314(adhC) pNG7351(adh13) rrnAC0012(adh11) rrnAC0191(adh3) rrnAC1300(adh9) rrnAC1402(adh4) rrnAC1975(adh5) rrnAC2172(adh14) rrnAC3506(adh2) HWA: HQ1124A(adh) HQ1729A(adh) HQ2366A(adh) HQ3648A(adh) NPH: NP1260A(adh) TAC: Ta0832m Ta0841 TVO: TVN0396 TVN0944 TVN1284 PTO: PTO0846 PTO1151 PTO1249 PHO: PH0743 PFU: PF0075 PF0608 TKO: TK1008 TK1569 TON: TON_0936 TGA: TGAM_1572(adh) APE: APE_1245.1 APE_1557.1 APE_1963.1 APE_2239.1(adh) SMR: Smar_0045 Smar_0696 Smar_1072 SSO: SSO0472(adh-1) SSO0764(adh-2) SSO1646(adh-5) SSO2494(adh-8) SSO2536(adh-10) SSO2878(adh-13) STO: ST0038 ST0053 ST0075 ST0480 ST2056 ST2577 SAI: Saci_0911(adh) Saci_1115(adh) Saci_2057 Saci_2126(adh) Saci_2145(adh) Saci_2205 Saci_2224(adh) SIS: LS215_1759 SIA: M1425_1632 SIM: M1627_1747 SID: M164_1679 SIY: YG5714_1723 SIN: YN1551_1180 MSE: Msed_0398 Msed_0464 Msed_1424 Msed_1426 PAI: PAE1921 PAE2051 PAE2332 PAE2931 PIS: Pisl_0251 Pisl_0338 Pisl_0639 Pisl_1601 Pisl_1770 Pisl_1949 PCL: Pcal_0882 Pcal_1311 Pcal_1391 Pcal_1581 PAS: Pars_0396 Pars_0534 Pars_0547 Pars_1545 Pars_2114 TNE: Tneu_0419 TPE: Tpen_1006 Tpen_1516 STRUCTURES PDB: 1A4U 1A71 1A72 1ADB 1ADC 1ADF 1ADG 1AGN 1AXE 1AXG 1B14 1B15 1B16 1B2L 1BTO 1CDO 1D1S 1D1T 1DEH 1E3E 1E3I 1E3L 1EE2 1H2B 1HDX 1HDY 1HDZ 1HET 1HEU 1HF3 1HLD 1HSO 1HSZ 1HT0 1HTB 1JU9 1JVB 1LDE 1LDY 1LLU 1M6H 1M6W 1MA0 1MC5 1MG0 1MG5 1MGO 1MP0 1N8K 1N92 1NTO 1NVG 1O2D 1P1R 1QLH 1QLJ 1QV6 1QV7 1R37 1RJW 1SBY 1TEH 1U3T 1U3U 1U3V 1U3W 1VJ0 1YE3 2EER 2FZE 2FZW 2HCY 2JHF 2JHG 2OHX 2OXI 3BTO 3COS 3HUD 3I4C 5ADH 6ADH 7ADH DBLINKS ExplorEnz - The Enzyme Database: 1.1.1.1 IUBMB Enzyme Nomenclature: 1.1.1.1 ExPASy - ENZYME nomenclature database: 1.1.1.1 UM-BBD (Biocatalysis/Biodegradation Database): 1.1.1.1 BRENDA, the Enzyme Database: 1.1.1.1 CAS: 9031-72-5 /// bio-1.4.3.0001/test/data/KEGG/T00005.genome0000644000004100000410000001145712200110570017264 0ustar www-datawww-dataENTRY T00005 Complete Genome NAME sce, S.cerevisiae, YEAST, 4932 DEFINITION Saccharomyces cerevisiae S288C ANNOTATION manual TAXONOMY TAX:4932 LINEAGE Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces DATA_SOURCE RefSeq ORIGINAL_DB SGD MIPS CHROMOSOME I SEQUENCE RS:NC_001133 LENGTH 230208 CHROMOSOME II SEQUENCE RS:NC_001134 LENGTH 813178 CHROMOSOME III SEQUENCE RS:NC_001135 LENGTH 316617 CHROMOSOME IV SEQUENCE RS:NC_001136 LENGTH 1531919 CHROMOSOME V SEQUENCE RS:NC_001137 LENGTH 576869 CHROMOSOME VI SEQUENCE RS:NC_001138 LENGTH 270148 CHROMOSOME VII SEQUENCE RS:NC_001139 LENGTH 1090947 CHROMOSOME VIII SEQUENCE RS:NC_001140 LENGTH 562643 CHROMOSOME IX SEQUENCE RS:NC_001141 LENGTH 439885 CHROMOSOME X SEQUENCE RS:NC_001142 LENGTH 745741 CHROMOSOME XI SEQUENCE RS:NC_001143 LENGTH 666454 CHROMOSOME XII SEQUENCE RS:NC_001144 LENGTH 1078175 CHROMOSOME XIII SEQUENCE RS:NC_001145 LENGTH 924429 CHROMOSOME XIV SEQUENCE RS:NC_001146 LENGTH 784333 CHROMOSOME XV SEQUENCE RS:NC_001147 LENGTH 1091289 CHROMOSOME XVI SEQUENCE RS:NC_001148 LENGTH 948062 CHROMOSOME MT (mitochondrion); Circular SEQUENCE RS:NC_001224 LENGTH 85779 STATISTICS Number of nucleotides: 12156676 Number of protein genes: 5881 Number of RNA genes: 414 REFERENCE PMID:8849441 AUTHORS Goffeau A, et al. TITLE Life with 6000 genes. JOURNAL Science 274:546-67 (1996) REFERENCE PMID:7731988 (chromosome I) AUTHORS Bussey H, et al. TITLE The nucleotide sequence of chromosome I from Saccharomyces cerevisiae. JOURNAL Proc Natl Acad Sci U S A 92:3809-13 (1995) REFERENCE PMID:7813418 (chromosome II) AUTHORS Feldmann,H., et al. TITLE Complete DNA sequence of yeast chromosome II. JOURNAL EMBO J 13:5795-809 (1994) REFERENCE PMID:1574125 (chromosome III) AUTHORS Oliver,S.G., et al. TITLE The complete DNA sequence of yeast chromosome III. JOURNAL Nature 357:38-46 (1992) REFERENCE PMID:9169867 (chromosome IV) AUTHORS Jacq C, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome IV. JOURNAL Nature 387(6632 Suppl):75-8 (1997) REFERENCE PMID:9169868 (chromosome V) AUTHORS Dietrich FS, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome V. JOURNAL Nature 387(6632 Suppl):78-81 (1997) REFERENCE PMID:7670463 (chromosome VI) AUTHORS Murakami,Y., et al. TITLE Analysis of the nucleotide sequence of chromosome VI from Saccharomyces cerevisiae. JOURNAL Nat Genet 10:261-8 (1995) REFERENCE PMID:9169869 (chromosome VII) AUTHORS Tettelin H, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome VII. JOURNAL Nature 387(6632 Suppl):81-4 (1997) REFERENCE PMID:8091229 (chromosome VIII) AUTHORS Johnston,M., et al. TITLE Complete nucleotide sequence of Saccharomyces cerevisiae chromosome VIII. JOURNAL Science 265:2077-82 (1994) REFERENCE PMID:9169870 (chromosome IX) AUTHORS Churcher C, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome IX. JOURNAL Nature 387(6632 Suppl):84-7 (1997) REFERENCE PMID:8641269 (chromosome X) AUTHORS Galibert,F., et al. TITLE Complete nucleotide sequence of Saccharomyces cerevisiae chromosome X. JOURNAL EMBO J 15:2031-49 (1996) REFERENCE PMID:8196765 (chromosome XI) AUTHORS Dujon,B., et al. TITLE Complete DNA sequence of yeast chromosome XI. JOURNAL Nature 369:371-8 (1994) REFERENCE PMID:9169871 (chromosome XII) AUTHORS Johnston M, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome XII. JOURNAL Nature 387(6632 Suppl):87-90 (1997) REFERENCE PMID:9169872 (chromosome XIII) AUTHORS Bowman S, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome XIII. JOURNAL Nature 387(6632 Suppl):90-3 (1997) REFERENCE PMID:9169873 (chromosome XIV) AUTHORS Philippsen P, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome XIV and its evolutionary implications. JOURNAL Nature 387(6632 Suppl):93-8 (1997) REFERENCE PMID:9169874 (chromosome XV) AUTHORS Dujon B, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome XV. JOURNAL Nature 387(6632 Suppl):98-102 (1997) REFERENCE PMID:9169875 (chromosome XVI) AUTHORS Bussey H, et al. TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome XVI. JOURNAL Nature 387(6632 Suppl):103-5 (1997) /// bio-1.4.3.0001/test/data/rpsblast/0000755000004100000410000000000012200110570016405 5ustar www-datawww-databio-1.4.3.0001/test/data/rpsblast/misc.rpsblast0000644000004100000410000001641112200110570021117 0ustar www-datawww-dataRPS-BLAST 2.2.18 [Mar-02-2008] Database: Pfam.v.22.0 9318 sequences; 1,769,994 total letters Searching..................................................done Query= TestSequence mixture of globin and rhodopsin (computationally randomly concatenated) (495 letters) Score E Sequences producing significant alignments: (bits) Value gnl|CDD|84466 pfam00042, Globin, Globin.. 110 2e-25 gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodop... 91 2e-19 gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (D... 37 0.003 >gnl|CDD|84466 pfam00042, Globin, Globin.. Length = 110 Score = 110 bits (277), Expect = 2e-25 Identities = 50/110 (45%), Positives = 69/110 (62%), Gaps = 5/110 (4%) Query: 148 EKQLITGLWGKV--NVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPMVRAHG 205 +K L+ WGKV N E GAE LARL YP T+ +F FG+LS+ A+ +P +AHG Sbjct: 1 QKALVKASWGKVKGNAPEIGAEILARLFTAYPDTKAYFPKFGDLSTAEALKSSPKFKAHG 60 Query: 206 KKVLTSFGDAVKNLDN---IKNTFSQLSELHCDKLHVDPENFRLLGDILI 252 KKVL + G+AVK+LD+ +K +L H + HVDP NF+L G+ L+ Sbjct: 61 KKVLAALGEAVKHLDDDGNLKAALKKLGARHAKRGHVDPANFKLFGEALL 110 >gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodopsin family). This family contains, amongst other G-protein-coupled receptors (GCPRs), members of the opsin family, which have been considered to be typical members of the rhodopsin superfamily. They share several motifs, mainly the seven transmembrane helices, GCPRs of the rhodopsin superfamily. All opsins bind a chromophore, such as 11-cis-retinal. The function of most opsins other than the photoisomerases is split into two steps: light absorption and G-protein activation. Photoisomerases, on the other hand, are not coupled to G-proteins - they are thought to generate and supply the chromophore that is used by visual opsins.. Length = 258 Score = 90.8 bits (225), Expect = 2e-19 Identities = 37/162 (22%), Positives = 76/162 (46%), Gaps = 10/162 (6%) Query: 299 HAIMGVAFTWVMALACAAPPLAGWSRY-IPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV 357 A + + WV+AL + PPL + EG +C ID+ S+ + ++ Sbjct: 100 RAKVLILLVWVLALLLSLPPLLFSWLRTVEEGNVTTCLIDFPEESLLR---SYTLLSTLL 156 Query: 358 HFTIPMIIIFFCYGQLVFTV----KEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWV 413 F +P+++I CY +++ T+ + A+ + +E++ +M++++V+ F++CW+ Sbjct: 157 GFVLPLLVILVCYTRILRTLRRRARSGASIARSLKRRSSSERKAAKMLLVVVVVFVLCWL 216 Query: 414 PYASVAFY--IFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIY 453 PY V + P + I + A + NP+IY Sbjct: 217 PYHIVLLLDSLCLLSIIRVLPTALLITLWLAYVNSCLNPIIY 258 Score = 73.4 bits (180), Expect = 3e-14 Identities = 32/86 (37%), Positives = 47/86 (54%) Query: 55 NFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEG 114 N L + V ++ K+LRTP N LLNLAVADL +L LY + G + FG C L G Sbjct: 2 NLLVILVILRTKRLRTPTNIFLLNLAVADLLFLLTLPPWALYYLVGGDWPFGDALCKLVG 61 Query: 115 FFATLGGEIALWSLVVLAIERYVVVC 140 + G ++ L ++I+RY+ + Sbjct: 62 ALFVVNGYASILLLTAISIDRYLAIV 87 >gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (DUF1300). This family represents a conserved region approximately 80 residues long within a number of proteins of unknown function that seem to be specific to C. elegans. Some family members contain more than one copy of this region.. Length = 336 Score = 37.1 bits (86), Expect = 0.003 Identities = 32/145 (22%), Positives = 58/145 (40%), Gaps = 7/145 (4%) Query: 336 IDYYTLKPEVNNESFVIYMFV--VHFT-IPMIIIFFCYGQLVFTVKEAAAQQQESATTQK 392 I+Y E+ S+ I + + + F IP II+ L+F +K+ S+T+ Sbjct: 192 IEYIIETTELFGSSYEILLLIEGILFKLIPSIILPIATILLIFQLKKNKKVSSRSSTSSS 251 Query: 393 AEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVI 452 + T++V + I+FLI VP + F + + A + N I Sbjct: 252 SNDRSTKLVTFVTISFLIATVPLGILYLIKFFVFEYEGLVMIIDKLAIIFTFLSTINGTI 311 Query: 453 YIM----MNKQFRNCMLTTICCGKN 473 + + M+ Q+RN + K Sbjct: 312 HFLICYFMSSQYRNTVREMFGRKKK 336 Query= randomseq3 (1087 letters) ***** No hits found ****** Query= gi|6013469|gb|AAD49229.2|AF159462_1 EHEC factor for adherence [Escherichia coli] (3223 letters) Score E Sequences producing significant alignments: (bits) Value gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sug... 84 1e-16 gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-bindi... 37 0.019 >gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sugar-binding region containing DXD motif. The DXD motif is a short conserved motif found in many families of glycosyltransferases, which add a range of different sugars to other sugars, phosphates and proteins. DXD-containing glycosyltransferases all use nucleoside diphosphate sugars as donors and require divalent cations, usually manganese. The DXD motif is expected to play a carbohydrate binding role in sugar-nucleoside diphosphate and manganese dependent glycosyltransferases.. Length = 86 Score = 84.2 bits (208), Expect = 1e-16 Identities = 33/85 (38%), Positives = 40/85 (47%), Gaps = 2/85 (2%) Query: 505 RISIKDVNSLTSLSKSENNHNYQTEMLLRWNYPAA-SDLLRMYILKEHGGIYTDTDMMPA 563 I L SL N + + EM LRW Y AA SD LR IL ++GGIY DTD++P Sbjct: 1 YDVILVTPDLESLFIDTNAYPWFQEMFLRWPYNAAASDFLRYAILYKYGGIYLDTDVIPL 60 Query: 564 YSKQVIFKIMMQTN-GDNRFLEDLK 587 S V+ I R E L Sbjct: 61 KSLDVLINIEGSNFLDGERSFERLN 85 >gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-binding domain. Xanthine dehydrogenases, that also bind FAD/NAD, have essentially no similarity.. Length = 110 Score = 37.2 bits (86), Expect = 0.019 Identities = 16/82 (19%), Positives = 36/82 (43%), Gaps = 3/82 (3%) Query: 959 IKGFLASNPHTKINILYSNKTEHNIFIKDLFSFAVMENELRDIINNMSKDKTPENWEGRV 1018 +K L T++ ++Y N+TE ++ +++ + R + + T + W GR Sbjct: 16 LKALLEDEDGTEVYLVYGNRTEDDLLLREELEELAKKYPGRLKVVAVVSR-TDDGWYGRK 74 Query: 1019 MLQRYLELKMKDHLSLQSSQEA 1040 + +++HLSL + Sbjct: 75 G--YVTDALLEEHLSLIDLDDT 94 Database: Pfam.v.22.0 Posted date: Nov 8, 2007 6:06 PM Number of letters in database: 1,769,994 Number of sequences in database: 9318 Lambda K H 0.327 0.139 0.439 Gapped Lambda K H 0.267 0.0632 0.140 Matrix: BLOSUM62 Gap Penalties: Existence: 11, Extension: 1 Number of Sequences: 9318 Number of Hits to DB: 28,279,060 Number of extensions: 2147710 Number of successful extensions: 3028 Number of sequences better than 2.0e-02: 3 Number of HSP's gapped: 3016 Number of HSP's successfully gapped: 20 Length of database: 1,769,994 Neighboring words threshold: 11 Window for multiple hits: 40 X1: 15 ( 7.1 bits) X2: 38 (14.6 bits) X3: 64 (24.7 bits) S1: 40 (21.7 bits) S2: 77 (33.6 bits) bio-1.4.3.0001/test/data/sanger_chromatogram/0000755000004100000410000000000012200110570020575 5ustar www-datawww-databio-1.4.3.0001/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf0000644000004100000410000013461712200110570026272 0ustar www-datawww-data.scft 2.00989898:8 ;80<9M=:]>:`>;]=</\<=I`;>ji;?v;A;A<B<B/<AY;?:>z9=q8=i8=?a9>oY;?Q<?H=>@=>.9==Z4<=1<>/=?,??*A>)C<+E:3F7AG5TI4mL5Q6 Y7b8k8u8<7q6666lT6P747 7&9V>;U=k@Cr"DS=F3KFLGDH7H%II_H8GFEjE3DDqCzlTCl%6CbC^D`]Fh"GtGFDp}@To=@b;3W;-N=/G@07CC=F?ECZ<EBp9C96@+2=/<,=*? )B.)DQ)E^r)D9*B)@)>)>*?*@+C}+Fn+Jc*NZ)SS(ZN&aI%jD$vA$>%=&>t'?S(A/)B(B'=B%bAq$A8#B#D#E#=G_#hG3"F !E C B A!B"Bp"Bc!AZ?U>R=P<O=M@K DI IHPGZGgG yHJKLLlL:ELrKKKfK6J >HZGqEDDrEbEWDOBI@F>D=B=A?@@?B ?B ?A??@=@<?<>==?=@@AEAOA]@p?!>$t<&;:'9'17&t6%F6#5"g3!1".#9o+$f\)%O&&G$(C!)uA+N?->.<0;0h:05:0 : .:"-:#+;");!'= $B !/I!>V$Bh':)&*)(5&uo#?"    ;X mi Wp!Go!<k"6k#2s#1#1#0"0!01! 2W"#7m$%?p'&M\*'`1,'|-'-'F,&+&h*%0*#2+",! y- kW-@,2* +( )& (&)&l('')$+l!-+/00/,)7(&G7#JP!=q! DE~jI2 ""#%')L+,,,$ +4!*N!+t ,d. 2I5J9<>l>E@=z!:6s#081+G%qh! w[5C=Hf"&$(7**;w)&$"Y !4 $ -;!_"n#$(% &Z (; Z.' ; Q t   ; M# & sI( )*|+X+P+e)?'#?0$t9[ZX Qi   \  Z g  Eaz7 !%())&#          /LYS=      :QY Q ; ! ! ! "<I@/zi;7f]p7 LE 3[!c#T "#0 [U@"8W s( 8 i  37?z;8Z!(      My0  .zj6[G +cq.SB_Q7|C  o%  a J+  r`   ~ \ {VY| ! ""  xn`[ p{T|f    q# %0B%x!.   #    *   N    M  $ 0={ ^ o 7:nUDR@$ 7=g U2Om c $x    S5@ s >C  +$fBBl: I ~l   8    $   M  | w &L!j qS*i$_.0& |4D   w  * d R oO D`Oh4 8 ukX$"u&'#=LTJ f 0goF"a PV!? 8 k 1 , O  $  4 p F   "V!'?I,-m-d *|%Z 8WF 2UG*4;  x o   ]  L9  U   ME S   \ ( b  2 1 9 #!V {E9aZ+ \jK Ba"8ib { C%!5):/ 3'43 / V+' +% x& (( b, 0 5 8 ?; <<S=> ?<<BD;FGLGF C @<8}3/+(&& % %% %% |$ !'  O   - T [ 2 R>  ? U:H!Y % ' \'&%#   C H " >!b#!$@)".i-(" P{(KJ 6VS  y]3| 8Y y82   $`)*&W { <    ? + =KO3G 0A S {+%)'(3%!,ucW mn  y1>U `  !  H 9$'w   N / ~ $ V g3 tH WB}  > 2  "$%#8nV-U 5 F06VL   e x Z PU2 W  N SdG 9 7 E n   P& _     9 B{*0 K  [C   !  | 5Z ' @f0 ?    {  *?} U 9  ?   ~$#NM d [      ! h  h $6 ot1       /,;&   % G S # ( .) ' $"!>U  ( 7 Q  b0 c $ ' '"9 +RI s cXc""| A ,O   , ]  `t@5 V PX1 e5T ` u"" |Q& v F     6 ]68   I    ^'Y<6 "[S  5''  U  .  D 8 >  3 i >"z$#+$!j   R ZBm 5^z  Q /s   \ K    g   "# ##,##N##"!- Ja#a 'R ,{ . ,!%n,~ Q T> M  ,F  Z V=!*% (!  * } j b W  )    ? < X] txmbD!#H#"   N)e_ y'  Y$P)[ ' g   )j  "=(+&%22= CC > 4(' v [ +    K   3t"X   x'OFK  3 :   #Bb   ?  _  Q C    ; }M 2    \ Oy Eh ] [YT% _    P " 45P  o )   1 z !]s   Q   " c P %{ S  W  %{ $#Vi     m b /  N Z  ` N {T%LS    }' 5#  Oj`Z&C%?t  RoX '   =tY  o n#]7     Z +  k 4 ). 2&  K    = I f /:3  A  " s   c q ING wC myY s    S     !&.'$ Q y     j<  ] [ O  o      , P  j< =  |      " t   >  *m E  @E2    I p B >?wz r       @  k & X  1  ! 6 Z *r  FTge  f S&X 9l$ s s J^ )  /)G~  ^ d   y 7 G  ( 6 -  P B B  _   'x$H    - L :; ~#y  :  { e  d y   X  ' -  |e +:Rf    i )4N 5  @ 7-ND    f X$j E)jxd&   B H> W 0   X <T5F 7A  I Jj ! lx.( s 9U0    4YGv?\^4 X. , 2  n $ v%  [  5     !H {7!!;"c!2 H u,VR  Z t   q B   ^ ]\   d 1 9 " > /PN~ t   <0hT =#SN &6H 6W     3h'2@/LihRc    > } 5 ]r X /'| V7  e "I        L!W  =gf:<  f  T     O  q ( C  X      v) 0]  8     u : 3! Rg.n    RG =?     a {-+uL  = k&:   E  4 cj  4  8 >  "8L )  6  ' 5  3  v   ~7  y$  ?[3k  Jj# 7  3 vr.{5  D3 7 mVoh =   9 + fs'J 9joF    S P)!##  DW E  (I  ) (Vl; D    r  H 7  ZGB R  . =   j(       "L's]     M K _   L   I d  W  C    7c!L      NM  # KL" ~ . ,    \J:3 n   Lj # 2w  -t } = > N m    6  T  %~R z *p.(u \ ^x ,  ; 2 u C H  A  / epM a & Et R;x5 @$[&7 t  @ S >   q 4AZ    T u    G   { j  4   5   h   {&j) ]p w"f*&eD_ L e-!E) M F?&j 1&b"K/Z \ 4 O   F |    X* 4m    - Y&B   ) (  @ 0 s  & l  w :  A  :   ~     2X6k   G  = T f M q3 T   ; e c 32#m  FQ 3a *9 Z    p  KI1 ` e > U  4 ~E  i> D  f1#Qo/  c   X q Z_P 9 P <q5[l  { (l*  1 l   0 ?% Z $   Z  !     \x"3  8 4 I 3 k    &  X&'g # z   v   F *y& V Z 5\ '5|  6    @ v+ "b  < G )  [' 'a  6 P @  q3 (f   ]{G P 3x.xG <  8xFG I7q   (  O E o} >H   k> /k D ^ N }<,T  g; 3w/|QC&@ ? ya o1 ,T   Z1  % Q  y=*c   R$'_ ; ] V &: 4 o   \/ &N}[%\2F 3 p6N   : jqMd*An  |M% /^Z(L2$_'&^*:G -p>G(\'1X    xN)4i     O"3bZ+#N  k7 +\   S% @~19  V#%W  * "y? ,Z    K"@z"  S$ 6t   <  K 1  `)  ; H YI2W~   nH'8j  K"" Iza06p  b+J<2m.?Z'!R5)j0"Q#T$,`   r6<|#$s;<v  { : - k   ! ) \+ ?u   t@ /d  &    V    6s   h78g  R( L'&  w 3X#7%]"=  # zA7m     T %  /[ k =F}     j72b zG*a ' > / h20`i4?v      \.Cy s?7n     D  ?x  N& 0`e/ 'WN%=o   ]2"J|  Z.5`  W.<be9K   xI# 5q 2D/ k22Uzj? P  W+ 9 X r v^A& Gy  c6 *T_2-Y\19d  {N(ExP)\ ^2?n    ^2 !Ck]5 !>_|iH)-Ki  oR4=hxK% 8fc6?k R* /b  Y'#O    zG Aq   `55dP' 8czN( .V  rE! 1W    tJ& 7h  xFCsU& B s    yO, 9V p | gK/!Q@B S%)P   ~T. 6TpnR5 $B`z zaC' "DmiA  'Mx   mC!  #Fq{K$,W     e= &Gk}Z74R n}gI,9h[- 6X u  iL/  ;Yu    r V8 3 ]   mB( Ioa<#2YuF$ ;_  s R2@j   [5 " I{  |J" +MuxL (   6 `   e= 7c  n>& I t  f@! (E c | z`B%K  -4 c/ =b   vR1 Et  e91Wc8 9 b   rK)<da: ,@Q[[Q@+  7Vs mL+  3[!Z 46XzeA%&Dm  nF$3QmiK,!?h^9 /C S ]^ UE1 8[~   tQ04V x  }\;  8 X y    pO0#B f lG' !@f   sM/"(?ac@" *A Wg l g W A*.Kh}cE* (Dg%*+' }Z8 !8 Wt ~ e G +* Fe     d F) /F [hj `M6+G au|ve M5  )Fd    {_@$0G [ksq e Q:% $7L\ ee\L 8$ .CV b f _P;& 2J^klbO8# 3I]ildS?* ,> N X Y Q C1->LTSJ:'-DZioj\H1 !- 6 9 5 ,  AT%T1A@A@CPG\TgAqAv A| AGG TTTGGT T GG#T2T=CL GU!Cc!Tm%Ax'T%A)A,A;A%A%C%T%C2T2T2A5T5T5T)5T9#GG#GR#AZ%Td%Ap%Ay/T/T/T/G/T/T2T9A9GDCDTDGDTDT+DG6DC?6AJ9AV9Ta;Al;Tx;A;A9A9TDTDG2A2C9C6C9A9T9TDTDA*DA5DTBDTNDTYDAdDToDA{DADADTDTDGDGDADTDTDCDTDC DGDT&DT2DG>DCHDASDA^DTiDAuDADADTDTDTDCDCDADGDA;T;C;C;T;G*;A5DA@DALDAWDAcDGnDCzDTDCDTDGDGDCDTDTDADADCDCDADA DA,DT8DTEDGQDC]DCjDTwDTDGDGDCDTDADTDCDADADTDGDCDTDT+DC8DTBDAMDCXDAdDCpDC{DADADGDADADGDGDCDTDTDT DA DA DA $DG 0DA <DG IDA SDT _DA kDG xDG DA DC DT DA DA DC DT DG DA DA DA DC DG !>A ,>C 7>A B>C O>T \>T iDT uDT DT DC DC DC DG DT DT DG DC DT DT DG DA DT )DG 4DT ?DA KDT WDT cDT oDC zDA DA DC DA DG DC DA DT DG DT DC DT DT DA !DT .DG ;DG FDT SDT _DT jDC vDT DG DG DC DT DT DC DC DT DG DA ;A ;T ;G ;G -;A 8;G EDA PDA \DG gDT sDT DG DG DT DT DG DT DA DA DA DA DGDCDADA&DT2DA>DCI;AU;Cb;To9G|9T;C;A;ADADA;A;A9A9A9C;C ;T;C#;C.;A:;TG;TS;T_;Gk;Cx;T;G;A;A;A;C;T;T;ADA;A;C;A;G;G,;A8;GD9GP9T\9Cg;As;A;TDA;A;CDADG;T;A;T;G;A;A;TDC(;A3;C?;AK;TX;Ce9Cq9G~9A;A;G;T;A;T;C;C;G;T;G;G;C;T';A3;A?;AK;TW;Cd;Tp;T|;C;C;G;A;T;C;T;T/A;G+C+C /A;A$;G19G=9TI0AT;Ca9Tn9G{9A/A/G'C+C+C+C+A+T+A+T(T (G;A%/A1/C>GKGWAcNoNSIGN=A=1744,C=1747,G=1607,T=2271 SPAC= 15.78 PRIM=0 MACH=Tom-19135-018 DYEP=KB_3730_POP7_BDTv3.mob TPSW=KB 1.2 BCSW=phred 0.071220.b NAME=3276528 LANE=50 GELN= PROC= RTRK= CONV=phred version=0.071220.b COMM=RR0 700 0791-GATC-neuA-Rev-130425 SRCE=ABI 3730bio-1.4.3.0001/test/data/sanger_chromatogram/test_chromatogram_abif.ab10000644000004100000410000067646012200110570025710 0ustar www-datawww-dataABIFetdir{o0 Tom-19135-01837XXUnitID=3;CPUBoard=ECPU550;ArraySize=96;SerialNumber=19135-018;3730xl0710297 May 03, 2008 GATC_Owner Ooa(%7BLkm},H\nw +&EHyz+yz}efQSjHD+0 zy{}z|xN}' %28E_wx}w9(3]^fjYsu|^>b\KUU`n<  A { 9L?dT1Q]2[ c8Vw A F^hy:#Oe;p+yrq+ n +7 x8T L N Q## ? IX{dz N +[Uq~VK}8 ' Z0MsU f+qR %6%.+>)>KGKDjgu~tf`X -* **Ke:bfu?. ' %##&+ %%"&96B]VU\?D0r{nZb`{D9 Q )#j:  ! 2 ** WTr&#$4#  -9>FI_nGWXK(~~mdu!?wU t0 S""+ 1{ g'am2y 6\} '-o }\89{?&A RR$'('y$t O7)\j=+2$9G\svd PW -yPClhf[<M@+-2KZMeL];@653  &,37RKQQK<"  *:QXSF`jt\J6C4 0?Snjs`|p_]mzonNK3-346<SjPLZKvpui}~ 1XN$ q9xbN R%q}sXG*'/"*@<T_k\iuylouL[B88)! !(()+,(.+0"=/129/+$*GQon}pioWoTZYNBp{viupTHE*A4)$+(&(6+  !00<I>#&$+O<@8Q]nh{}r}GWBT^t|"2l0) 2< "" yQe|X  v  S*e(xzjyaELHG0:986MMJGonapMG,7PepS3. (B;fDs{ i}= 0Rcta>=U\OIQg[F^- a/zV Z~q^VND+   ?87H"4 + 5/#("" ,LEQ5*  "R_)5 - J[ SKK6>=<GBDJPM/=99+-"2\a. ?~ /1ye}mghfnS(%$4=B(SMjZgdldc_ho~Ef=: ($?Lb>g Hz'd 4 \;!!. '4Of:P+#!"6LK[JkkheQ S9* s"~"Y$ ?C+ ~ B3' )CGbK@<+:BKQG13Vl:7cB =lg= G;@$+7"7Gpxuq`SD& 1,3<,/7(HZl ; N w(d _+Q} z x U H]Z.S[*  pww*JNVH'CXHP % I X*rvR>AI6.   } (2kc   ' E U .EPuH!! '*)<QTg]=n+Z ' k M P Cj   (cdk" RSJG0 1 4 Zk3+)SEYdTd3N F V < 7]D$    85h`9  kB & p 3 # vx{rzxyqyokjnoglruyqztpbilodigsqyir{{z{{gx`dTRd^d 0n)c S% Zji(|{|gpco}{F9  _ / 7{wbTz~qoohp{{{ebYFTM>8KV\{qnq}peUY>C>9;@JY16>;982B@YZPZfk\]YabVWWNZan]N]Y^KMGAFSRIT`iokmaYl]S`Wevky|zrja_[fVmw{|vquh|p #* >sj  e A C |3w    :v}u{|yxqz $as    u* *VRKV8 " + @<U}oy][ylgbvz{fd]KN@JIV[Ye_[W\V\FHW[@Pf\]VU_gYRTYQTTON[DNd^c]WYbqnoffjbwqjjab_`^i_w}y|wda^[V@EHW`w|.x_2#iednk}eh_al{$ ! 1 y nHzytcVYU>Tg}ugVKUKKFLkkoxywqrpl^eoatz}qonpgft 8E5 T|Yx[fb zv}zx}mnhcbf]tlqqg^V[^Xgeu}{sinZY_kuy?) w  ! b0ssc[cacljk[KRLH@8=A8HE@PBFGFM?16*55*1EBM=M?8B2'.*6.:IJULcX]YKKPA9=9AQNx*t ^ 1 - YA8-taZQowNq   v b `tF Nb TSilZ"/4w{~ypZUUPOJOrlV0:Ge{zbacYlp\z"K; 7 YHE3mJ?CJ#Uw~ y|sb^gkix%owJ z EUDj^VN=A;>FFOF>SUJjca[NQ5C6A;C:ICALURTCE?GA8HEJMa]j__QF>=/1I1>M]Kmsx{oj'/8Pnm<OYRaK?QE;77B?E3<CJB=(&!3.'2"&/ !50#('%*3/'! .,&.-:C>8H<7;/!.+'*-7( ++55+(" 04QTGWGTZ6-.*<EPis{ ^{ Q  5$LOk_QNN:150&'273(,2762<*#+    !   4"*#&   )#   $4=(/%$#-"!-*'*$       '6/3:3+& !'$0-0)39+1. %!*$!*-(#%+908.4@BMKE:97,1% !+$'#,9ISRNON8?:?>JTYn m ;:  {`f\c`UWHNA<==BKB]c(`O,6?al_XQONI?;46,'&/)7J86%3./  (49 #5/28.$ .#*"!++*)451E3*)6&%(.?::HEGTP:5:-74,*%2230"&#4+:LFAZ[KM1.>I/>M_s"Mi@-hq8A@:hX|19XSj8{|iSE<*=338040/-,2/(()-()"-""+#$  ")#, $%$  !  #) *).$*26D80**,$3?I?PbgXaT\at]>9cE+%)%")!/.8 +429124,&(-;MOb}wfejy>n POmp}^K@>?*#'&*%      #         &    "+63<1#%  %=3@CSTJILc|U1Rmw6Qy\JD3=8K<9A;60$<?7/FFSi]jpiqt3 #t\HWPR3*4A510F914;WE5BIB@IN772?-@8/C2?,27=7<81-.060-98225@6,,*)'%17I\caerhWdltVU4g;! Io+>:)}m #m_RA1-00$+&#%      #         %7AO:BF@AMSs mW>~OH^uZ7***'%*-.#+.%6-&"(   @  ,2/'&1!&$#(07,-==<'57EKXs9ZE5{W>   !$$133(9<6561,2-I>HRgfuzsowb`G^ w3)( 5oIK/..0''(&6,+-,4%&$%"/(+)  8(:R`gi\]g8.9LI$kN3'.((0-'!448OIGWKPNps!O~(]kQ:)#))$358+      %2;3JUZQTLU1.  ',)$'4&1%59G?990=7&.+ "   0<LNSQdE]]jf&n]x(5yN3.# "+!! !*&/<LDjVYjYjmBCYJ%f7!5b S?)vlpp}klYSC99'#      507@RB/*    & \3  !#6)@\hh !Z^&v I g > !0' #bhK5% &y`XL&.|`cM:6?/% '324>/6MONE??Yc^]\labrUw"G?` /\wX hj.p~Qv!$}'*a,.".-,*(&"0P Ve#U@~Y~E,P)  m6 |DXpqW70DZTuZJgb,/&*=H MP>{VOf^<=>J,197M~YCLC%#U,&7..@2 9-H7CDHF+,/)5;7^j|v~jg{ bM>U   [a V nwxTj} <H.WCqBA!X~)} O}qin2&harqXa]M>bK^ROJ^?@C6( )u` `W@s VH#Bd%`gD4;]F\H  ,@ ( 'B]WNYEqSgPrf_cadx, 0 )N;/V ~ Q)r P O|t4sJ&} =37`J4,D:-orrcvH'ao j ioQ=KS<Xavr~yjameo{kdMD 0CGK%"|(1Hfoq PJsj4?` z?Zg3I kz:DG ?Ve7PV S t b r ec # 0:9 c 0V,o=.4<5DC 427g&0 klmCD.@on ld ppEBkjLjid 0Mq7 "trH/mIh=VnUKLa d+WT  i %O)  My&aOK1":PWQuRcl ; `j e /?lsA  - \  X ejZ<. y 5 Ljq@VW die-kMV`lA:aB#36OVZo0Risti: cF AZ]$ Y^fE)6ZnXZP!>?d}ijgS`|toYYrgUdt[ZLs}015U' ja z e SCgd Dj5olb ` ST *M { Q ^V+( ) f Q c!LdQBB'+U^X3Gj@*=Z>pD a's ~rtHb=rm$R p"#" U"j2Vo~ r^\mOKv|K% osJ-YG u ! D t g5V u L QY ] L B  i-NpI =HxR.'~yW}_m|o6"ljxWc]|tmf6/4:muOG-3] HUTIHIM|W rg- h  aKz W (Gh;$$#@=A)3:Guo| :  ` P] 1 *  2  <9l l ~n3./7Nw<)!+Jkl}`_P,4 .:G_9=(97):3B\ULMQF>)   *Jj]=E)(5%=RD\[S_jrwo[Ypu j~^ ..B~R=(O}r #z}   b fq&S r u )xpQ]O?fqKfbBTD=,%3M]ID3H@<'#?7B>" 0X]``XU2!=;V}q\V?AF^jxKh  q&l- C-7$ @ : | m * ]S?(Abz 8 M _ o# K / [k &H 1*C8WbX b    ^dH  C'9IiicTDO.  %       ".)4Mtt[uru: ( 2 vcX2 ' DAbYg Q  c*6  bpl { x Z(=;J M   @}O}kWo \1  wE![xoV<?) '(4D?[q~Nm1 \!SA0_ {,yuz\kZ[@ ~ @ O (!7 3 i M  @RW_d5N4#5(k"(PDb7+Vwyj'#  g ( qzFj' X $ ^ I"S!   . ^&$HE6L;Aj{q~ph "  q>Vimjol^h_Q.!C 9 JNL  zM[RVsZJ7I@Fg_uzt]II6*>Ee{`S`XZ}rPDXStqt~ys)]o0=| R t ;<5f*( l.ZX,aq, vk{luv2* $0+ vt#fgHLUT*KOLMR?3LQ(29XD-N;9#-3wKNJ\Xz~rVdSiq~wlr#F` _ 0cAggVZGX``u{sfhXJRVky~|ePKjpwb;_TDZ}z|y{l_@bbll|{{q}%%'#W  /?rCVd9DQ"}[ %qvM6{{ 7'! ZB<77>OI=,59U!r  =s{ w gf^=jm4ykhrWyk`J_=;8Pso{xourq^ovxqrw{mSPSH`nx{xtIROop|*K[kz+HSlsF6aEg^ `/1-akM'.+ {kM"hwf_5|!6r32$ N\We$f&$) K-~jh~p]ILe }]b $|wD |QJn|8pZA7uC=w2Z-WPFb1AZMd7RwFL>i='MJ qsdcV]uqrxw&gN 4,I ~}gj[eTVTVCZrqxyoecGNJVoa}}fl^V?RYnbln|r}p|vynfgIPr}3+):@( 8P.( &Be{Q*-])"d)mi(o[}n|!aCb='hd}KvWl@[7P1r.'/?)#@-3i 8%p+! "bE'FdQ"~#+8  h==/H9--f@}/nQi ]j4 B ~=Vg!@t LV*`{(&uv8ZqlwR  L,jb3n!}4>B2$3(<<^<<l8^A b I#{fwz d(je}(<{{8ZyHwX$w_Q\n]ZSk|xz{zguuvywf#x]nt.b  ai]C,I~}sn)=>hBDa|rQRL>/DKHYddytT^V^uy{y]\fcSdtsceg|_kkSWa[_PXQLgbjRX?7SJkdZq|p~|kldg`Y]c[jxepfg``MHQJFJ@M[KXXenerumos}Q$CAY.o?>68bS? VuRy.yhfodW=INRT>CUdoqkd_EPKHEJ`IU^`dhYF8<J'""2!<%EQ\IULN6?>2$3/"17GGB9@DA9ONXLNfeiymuxf^YcYngWp^T"7M Ffy^q]UUaaUg^yiulevdTJQB;HKPUnjzd{m\WTcj\hlz{4C>J~nlRei^eSrz|6'?   V W%Bq45d?wp{4}u{_[UT`^Jbups|xdym`xgtdzf'0!>  oRq{T|boxewxFkQ]RB `,g@'/V}!n&=L8_P go P Vufm+={<_s>%x?b\seg+y ~QDm#Z"pws_Oln}r]XL\GH?JG==RHHWVQ]bHbkbZi+)l~`C!kMJ<,/0$(%)  (*&$,9+(/*:*,                !7122?83'           %   - 06@hq'6X>oX^;jy/ 4yfB`I2{}zdQS"txgbkijvgrs=Wo )s*0 q E!%')*+++~*)(' %n#! waVP<;KSL  X F*=-;-!C1Z,9A"Dq&  aD#dskMXUkdAB! mK/$!$cj13 V 'N^1qrz8<m  ^!]RI0hq"^ Z f=MiW@v*mS pP;q6{c*\+ L[$ z 0  _ V w0Aul j v("aax^tn.>l>6g:6wk$ c";1YVf  xwve 7Gk rzg_c'  ID y$oF<| &^'Ugg9 w Q~whcUJQ9.;7&JtkID0_Zh N 'F7r# ^) E3&i a \HNa 2 u @a tQ_i] F;'V( 7  {u  HP?"%60EGb~l QhIF-. <cP.Tozi  wf@^*0 `yG#!  o@K l H `^m  a6ZFYr/auyXLMq];P Za} x Te{z^fh "{j 2u\<<`,"\I `l0v g B6p sEZ o`CIc4K,*Sri.  62ruW 6a\o?*k~v~d+  ,mCW{ V G + at(>X a V Vsx( 7VY$2&J} "r&HP P\%{t = K < X !#Y"\ X d  4=v Z 0"D { Xj FGc~C kO:8<*EOk3 J MP_ ) , L <t@F/ V )jC&,:YNbfNEA"_bO & GU^4HaH 0  mDIWO wUdFn LS  n Q F+D7=|M919  :W~tHI 4 <c'_ y OIo 9bHA-Ialp"@_y  Oji 0d`B@4)7CFGY^991qA  * QR d = -F >\ S A UAS Q 4Ers@FW5[l{{X[KWJ6Yo !?KA]j} V [~~ ?+FXK ^Nk( 3 #EC ,|a_,abf<VPFL8Ide|sO8:502C^/XRNS:LIZnrypzVG*,SC.8>D@RL`nT`K$Q:<l a HFs u`;T{t  Y | 7-mX1)skbT0AyZ^ l \F \   )WC we) ox[Qc+$5JU285+a/<?CJ #)2#RjodBVKxsDTvi x.w9   &.d_! PM- {B 8Z~PynP1)gz] dNl $.>a W =.<O+J!Cy  M nRYp XY_z_c|\B u t;LGt~vT 3L W YM-DeN.yh`q8fs6^ 0 EQ N ({ aE   4YLk 2 >M( E + ) s>?Y{P: <? ) 5  52)  (.5#-#F("TTtx~faR/1$?{:Ih bJ c TKZ :+ \  i '[  vf*itymY?9 :OT|lxSX4   '+'0(<   %#%)529.JD@#H"*Yniwl#0 + t  2LNnAC=.#     1O*mbQYn$;\Oi nO 8 X *nmlQMCQ>B"615K16@,%N7FAo3Tsz` ?w \ g _b||nVlF3!2QF8FO*#0  (7 Q>%=k( ^8+Ho !;(=DS9F5u0 l [= LfwO ;uU W } A  : 9  e7u 5 B yS w w ;u % w%$6xl{s$PPm^JI;?Ov_<Q G)rMb   >{t[_V 2<9@\`:l C &  ZgxZ  7 CoJ2N+:6,$0-$ 6/+'I#;j&B # A: cPNkihASGJJOq!MyRH!}bLYv  d N  P NwyCf  ^ r 1 ,t, D m  %f; K\Y#0DSrzmjdPJZPiw s01 F Zk"HF6(.:&*( IHiTeL<m,*#0.2).F R%zq ~ap = J lla_GdM4Wd V&3"A6" #y#6 " Y#J#|D5b 0 ~}e! &W~,XG5||M##K}~nyz =;F#oF f Z 4 .+qSU  { yq.W &+Y\b wH\(7Qjq }nZ{#uwa`ex~~Urupa{~ouwuss ZmaPjUV 3m*  %PZ=?']YSef0I#B?jzONrjxy\vzgq`dcOvzxygW`NVXLNaat{s}f][QYNZFbfalyydhda:$) ,-IhZgXW\Mbn~zxiebhxkw`8"&RHwyjZ`:1+ 4LQqow{hielf:T9AS\BM;p@< n=ElP6/WROUUm{pntnify{pboVn`fmkqgbZNcsoRRmd_hdxbcnhUk|Wwwz{ytzon^zl~wqlphog}|zgej~r}ipktZun}_jm|lySUC0@+$IanLA/C+=jLU_wgVGpv}t:6,G1#FfEED(UP'u5/; I++0x}XDe/o^ey  a Od?H||bmldF=RSr#,0!^JY*cb:Z@J;5vnni|eisshic]JQDLP9G.M<<XSMHGWWGNH 0CTB;JUNFKM@JRM,AG>F?QDOZ<GSZY^]jLFHSe?SKgd`i_ep{milmmcMN[MQ>NXaYZg@Xx]eiguqw]jSE[UE_^Zedl4tCNXN5=?FKIL]6>XRjoV^kuoqgsmPrsvO5_Ao@R~$|xgyxe[~jkn}gV^a\B@AO=Bddozxsep_rur|vsm~rbWyxC|}M-"LU JT6}y~b^Xyte|qx6e$?rW rVYor\uonl^k`xk}[eiWVLF?3'F9A?elqutpa_jfru &D*F@,F~FdXc\raovgqSZK_JQNfrj[kupnkzkq{jp~ryrgamgYtrfdopeQdZegv}}w|q|wqp[ceXUuq jLd T*GLO2a8 cckYfguuwXrC9I;3-,+HAGMWZ\Y\NaMew tPQ)3e|szjjskXRbX8dkfmpqusWqzfZ[Vihb{qk.WG60~d{wziz~rmfrxut,ih&S.3Ywao,pgRr=0Ljt^rT[rfz{yy{whszz|#W?sW" .2v{y }j8C?<6)6|N_#xnac]ZjlWgbjspifnnvcXSM?WRKHSGKSOOB;@@5'/..4?.7;L6FO9:K;TL<HKJ7A;?SZe9jBJN;/40,5''.?)66F@7NJ0?PZUXX_gmkn{~rrAKmzUSor~hu{yeafOF0R8eZ@9>39= :C:}kir\tWY^jV]\\htjaWuMq]l[_`l]^pbZ:HZ98;HUMOU;LM:E?7M2;*)&*&'#:1?'5?/<':FD6<38>CG:bcgjkR791#- 92VPXNoftr`|`B+IeK``=E0WO?hq ^FJdYWD-O1;)853A=1A6>9.<6;>LHC<BW?MJXUk`YbWjRX;CC4$0(#;(" #%&#*.3LH1ED)3/%*#.   &155:?9><'# 2!4#+(<1D--1"1 *+-&*($2 -$->/1$"",34D2:C65.(3%+(! )""%#%,(.+"/'*/%&*(+   6"/00-'- '        '7+1#+,'* !$  2!'  %(92=0.7&3+!       #%   # #                 -)0=DBWagy %.:mu&C\i|"P6=9C,12m^qaNAE91?>5U46C wxrm}j~nyu|sw~jttvuy 1=al .K=| o / qmGu M7lcK;M?8KB74(%7mn`}J.R7:M,5Qh$"*E>_no^VK*~vV8:61;RULpi   ^_lih$t$ R(0>  9 ] + hE. f?+=')4<$.!}NU@Se34Su~SHF0"%1>;2+ #&NXddQ Vrf  c z"n0@u +Dc" ##|!  >WP+>.<Ol~o}qkNONL?]NbZ<A7 "[YadlQG6*Z?fcb~nPmqpYtZK3LB\huiAIF*" $-716(  %#'  %q|{uykq}g}z{aL+)')<:A6B4/Q3Ur#%T?E8*  3;8UzkP a | C]\ Ly/s eKDO5v^oGhk]}3/]T?xF G 6j?X Q z6fU9<5.;F=#:@4TRDA<3/\f!"Nogj^, x?Ffv{}X_TUNGtyUl>340)@!<W_Xm Kxu(aW V ?dc0-a I#M/G  }#usu_{ifcykp^K^6GSJv}tc[_<D#(96,60,E(8&N\rpuxdhPG9".CYN`gipk_YAK:"  8$442@R++%1`X\ssy~{{WX`feQTP]WL5 &*7-7BRist`/A+6=@46Tg4QvxxT |n}u( /-9} (,n x7 o =\=,$  :FyU lC~s_ </Z#pztw~qlv -+$&uiiZONHVX\p*-<KQ@QGXML[HVK,?Ho\||uA$e>bf yik?tF s ) 73.Z ol+ -$<K/ ;*;KIo6Cv EcR a S X#nU%y ?Zv2xgF?3,5MagfRY^RS2"3) 0.SLBA2%#B]qlqeHL@H_K;*);-2E`r-?q~ `]  M$^EI7 q2K  1( pkl}Q_|h{zvki?* {Sk lPpbC-5?w rM-c[ F *zA#N #uu]3=Tf|{TQ@7   Rh~ytPUe1-00nC o`Be h( = ,_U< S.TZ]I+@>GKNhn^jKA=+'#9Fbm X $ azrb|ykfB<B8GA@1%$)*Hk{ J| A8  ) 9r{viG=  ,S[^ca?|ymahgz e-5 JxgTrrwpqbo+36gk , T&  "o0091`,P 9#1J O $w}F(A$)=JH` 2VQ &X ( f @$CiRN>%01'& -=:RERtKp) a b WoX[ibaM4>=,2*  1 !#0   .<G)R{r $b H '`zomH*4-?(B@E^alYF51;HVv|}jI  5B { m ,xbY\[KY-bF6; &07H9R]  T a qc tqI32!'BQpix[ugY dw M0qY#  imn D vH|(^z]`cPaW2P<D4@KF@]RdH@94A&)-9  4SG=cRCAVZH9@0CK62LCTS\;ZQ]kcYq^idNa # s r ZA  ^ Y] q4  + F4 , {tA12-*  V u N / L S ft2;O~r}||iqergs{{xxere[rywep !CN- y [  -XttY?6:6;B`q = XU $TuV: = u!@Z+ !JF1E K   ;unC ~yzyqocldz\Uqe][try{}xux  '+Kx , 4 ( x =\n*7L5cB >7*JF-"8B8RG<MGyy ~ B3@ H 9K:koA{{thrZ[[i]cZkl{`sbthdq^L[`beZVLZB]ISSOLJDSDTXYelb~fny{othtovtqnkjolp_\dqSRbxej_buYR`UVfMKXTKVY`n`Y^d]i`b[]jjc~ouheZfQNL[cK\^I_Vb\MKHF@COJb`\_kkd_f`WSaYPPFJB?6CQKBDUWne~xadhkcJRXId]Sit]{{w]fi`^_JVBCLAVQGVXEL=3:D<?61CALW_PhYbZMg\V\^kT_]]]fSXCJV]yywgoj]VKHPTPLSS_]Msghwmrxzk{ywmecg_fdvxsk]_ZQ_f]mfetv~wyffo~arxxm~ilz^a[Ngmxo)SUgd+3y#vsokgxj~jnq[Ubhieu?)l V I,Z0 $>xM E !]3ie)YN<+ZZ|~z~|~vwmyb|bl]aha_Yip^fe_kAHA?P_GNiRZPY[\QBHDYOc_ipp_|qiV^BK]O^R_]kvvnowgS`jen!Er{]k"qo  r_=R= n {,T\K0BSopvraS6> ; &O CP$55f~W52<uippvlspqz~~{y71wY:Gw}`]w~b^_YI^]VONIN9LNF1=HQOH7OUSajfs~xr]iea& @<P5 R,zy~nlL^cQjOg]^SIQU`YZYkpuy}ulvvtiimuA8 G*_?CpR ~xjj__ih]YWddvw|v|sns[lm_NJaUg`\tkoXdVdO\CK?CJK11HFPFH[QFTDBMOKSSKQi^tsl_hX^^borOea_T[moyw$a\V8?W4*Dh|ylgd_vc]rbYRacwrk\_YbJCN\jW\ddl}qfi]iW`TTJXNM\\TZt^b]kpAf"}N4/~z}i][b^TWd^XQYZHSIEH>MR5OPQUQMMM\EQYPV]aah_fWXV>WYQIYe]qaqkhxo|stsebMTbbTQS]YJWQVFT_V_[d\bU\UfTZhnnvmvv{:`7PL6G) >w}4MA~CT*5J },DV]C/Z=AZj{yssvjurwnvuwlwdrxb\agaddegiqkyWVXU_eZLKMMO\fWY^TVVjegj_bseeczhlhjndnjpiu*dgCUN :r3T :{ Jx{nxZutt}mtvwumxpwsv{z~3`m}wtGz}/1Hx-& srX 4 .KTH`z~Y c-(Se ~~x E#wrv7A0f}xxmzh`cUVxfel^ZgfQkbPeZlW]_lSnZQ\aO[^Y_f{x H7y+<l+4~XF2w#pvbcf_bf~crs`^k[jZ`h^\[W_TDNYGPLVXQ^VQIC]RUVd_Xdcmoiyiy{ya?C>je3qOc$ruszws_e_efdX^Y[WTN^^[Q^SWVSIemvZq`fgp}wn{|~ ;m4@qBW_g \[:!~]`XpgGFhW'|{trbfqc`_qrbk[NQ_SPZVZaKSPKci[SC_ag[m`qWfb^Y]fRAPNL<;B9=HPKDOT4QWCFA>;[AKGNPccZcrgng{qogdbo_]QW[IVGOINVafbs}~ DHZR|nh^WCNL4BD2.83::3=/+/50@@3,*,9&*3.-&*(&%  !$ "       #          #25-'(  $      "#$       d,,,IIIg^^{{{&&&999DDDXXXaaavvv      ***>>>GGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR>VVUNNNmmm   )))333<<<EEENNNWWV__^ggfnnnvvv}}}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<+6258050-03 6258002-04 6258003-03 6258005-00 Run Started Run StoppedCollection StartedCollection Stopped) POP7 Z-BigDyeV3Dye1Dye2Dye3Dye4Comment:!RR0 700 0791-GATC-neuA-Rev-130425 24e1efb8d86d11dc93a800101813fad5 Seq50_V3_IT15CS_V3_KB_PCR KB.bcpKB_3730_POP7_BDTv3.mobtruefalsefalse10.010KB_3730_POP7_BDTv3.mobQUJJRgBldGRpcgAAAAED/wAcAAAACAAAAdwAAACXAAAAAP////////////////////////////// //////////////////////////////////////////////////////////////////////////// //////////////////8WS0JfMzczMF9QT1A3X0JEVHYzLmJjY0NNTlQAAAABABIAAQAAAAEAAAAB AAAAAAAAAABDTkFNAAAAAQASAAEAAAAXAAAAFwAAAIAAAAAARFlFXwAAAAEABAACAAAAAQAAAAIA AwAAAAAAAEZXT18AAAABAAIAAQAAAAQAAAAER0FUQwAAAABTSEZDAAAAAQAEAAIAAAABAAAAAgAA AAAAAAAAU0hGRAAAAAEABAACAAAAAQAAAAIAAAAAAAAAAFNIRk4AAAABAAQAAgAAAAEAAAACAAAA AAAAAABTSEZTAAAAAQAEAAIAAAABAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 00.000truefalse20false510false800false25.0falsefalsefalsefalse20true5502042020420falseKB.bcp# 5Prime Basecaller Paramters Seq50_V3_IT15 Seq50_V3_IT153LongSeq50_POP71DC_Cap96_Fill17800DC_Cap96_EPS_ARC_SELF10DC_Prep_Time600DC_Scale_Divisor2.0DC_Down_Sample1DC_Laser_Power_Setting25.0DC_RS_Binning_PatternDC_RS_NumCapsDC_RS_CSDataOven_Temperature60PreRun_Voltage15.0DC_PreRun_Time180Injection_Voltage1.5DC_Injection_Time15First_ReadOut_Time300Second_ReadOut_Time300Run_Voltage8.5Voltage_Number_Of_Steps40Voltage_Step_Interval15Voltage_Tolerance0.6Current_Stability20.0DC_Ramp_Delay600DC_Data_Delay405DC_Run_Time5640Seq50_V3_IT15Run_Tom_2008-02-11_07-48_0522GATC_Results_GroupGATC74832K-384-3334GATC_Owner384-Well3276528KB_3730_POP7_BDTv3.mob CS_V3_KB_PCR888889::;<=>?AABBA?>===>??>>==>??><:7545678887666667779;=@CDFFGHHIIHGFEEDDCCCCDFGGFD@=;;=@CEEC@=<=?BDEDB@>>?@CFJNSZajv=b=h   l:f6 rbWOIFDBA@????@@?>==@EO]p1tg9fuNh5 />B:&u? mWG<621100027?M`|h2 kll+7GJ=!EjI2" !! #1Ghw57wY4    !!!       0x.# *NM$0{o7      !jqS*i0    "&'#Jf0goFV!  ?md|Z8WF2G4  MS\1         iP{(K6            #NMd[! hh$6o1     $''"  ""O,]`t@    QvF6]8          X] txmb      Xx'OFK     h][YT_   !     Vi mb/NZ`N{%       Qy j         EIpB>?wzr@k&        H-L:;~           U0 4Y   [5 !H{7;   6W3h'Rc>}5    !W=gf:<fTOq(   48>"8   73vr.{5       LN   3nLj#     (u\^x,    EtR    F|X*    :~ 2X      4~Ei> Df1  Z!\x"384 I (f]{GP    Eo}>Hk> ,Tg;  ,TZ1  '_;]V&: 4o\/ N:jqMd*1XxN)   ;HYI  7m T%  2bzG  <be9KxI#   2Uzj? PW+ !>_|iH)?kR* 9Vp|gK/  6TpnR5  &Gk}Z7 9h[- 6XuiL/ (Ec|z`B% ,@Q[[Q@+  3QmiK,!?h^9 #BflG'*AWglgWA*   .CVbf_P;& ->LTSJ:'-DZioj\H1  0M]`]\`ivzqiaYQH@941/,*)+3ATm<qTV"=KLD7%j3l%]"pT@3-/7FZp .QrtS/q8_3 pcZURPOMKIHGGGHJKLLLLKKKKJHGEDDEEDB@>==?@BBA?=<<=?@AAA@?><:9766531.+)&$! "#"! !$')*)(&#"  !!"####"!!"$'*,--,+**+,--,*(&&&')+-/00/,)&#! "#%')+,,,+*+,.259<>>=:60+%!"&(**)&$"! !"#$%&(.;Qt;I?0t[Xi ZEz /LYS=:QYQ;"<I@z7]7 !#"   +c.S_QC%a+  !""   #%%!  =^:nUR$= 2mxS5@s >C$B      8Xu=LT            #V {9aZ+\jKa" C5'V+xb  <L}&%|'O-T[R  !).-("JVSy3$)*& +=KOG0+',ucWm   HB}>28n-   d97En& B0K[C!|5Z'@f     t,&        Y6"[S5''U.D  >+jRZBm5^zQ/s\      %(!jW)?<DHN)e $)'  %2=CC>4(   3:#Bb ?_QC;}M2\OyE           %tRoX'=tYon#  .&K=If/:3A"scqIN            TefS )~ ^dy7G(6-P         -DfX )xd&B   7IJj !lx.(s9 Gv?\^4X.,2n$v%     N~t<0hT=#SN&6H 2@/Lih]rX'IL    CXv)0   .nRG=?a{-   L) 6'53v~7 y$ ?[   D37mVoh  J9joFS          ;2uCH   7t @S>q4     *eD_Le-!E)MF? K/Z\4O  B)(@     9Zp K             3x.xG< 8xFG I     /kD^N}<   3w/|QC&@?yao1    /^Z(L2$_'&^*:G -p>G(\'  3bZ+#N k7 @~19V#  ,ZK"      "Iza06p  b+J<2m.?Z'!R5)j0"Q#T$,` r6<|s;    8gR(   *a'>/h20`i4?v\.   0`e/ 'WN%=o]2  5`W. 5q2D/k2   *T_2-Y\1  ExP)\^2 !Ck]5 =hxK%   /b  Y'#OzG 8czN( 1WtJ&   !Q@B S%)P~T.  "DmiA    (Ioa<#2YuF$ ;_sR2  K-4c/ <da:   ! 6XzeA%&DmnF$   8[~tQ0    !@fsM/"(?ac@"    %*+'      /F[hj`M6  )Fd{_@$      2J^klbO8# 3I]ildS?*   !-695,  /Ij/Y?o.Z lP4rS3_8zlb^`ht0=CB9+^9}ncZSNIDA>=>?ABBBAABDEGGFECBABBBA?>=<=@DIPZgyEr>Zqt;Fo\OGCA?><;:::::;;=BIVh5o;Xipokks1Wmp\1F0yW@2+)()('$!(7Pq D~Ld Jl@! $;_(ZMs|XPe?$9ZQ\ga7   E[c0[U@8s8?z8(My0.zj6[GqB7|oJr`~\{VY|xn`[p{T|fqB   D@7gUOc$ +fBl:I~l8$ M|w&L$_.&|4Dw*dRoOD`Oh4uk$ ,$4pF"VIU*;xo]L9UE B8ib {%:  (?S<;   2>?U:HY\% CH">b@      %)(%! ny1>U`!H9$'wN/~$Vg3tW "$%#VU5F06VLexZPU2WNSG   P_9{*    0? {*?}U9?~$   /; %GS.>U(7Q0 RIscX    5VPX1e5T`u|&   8>3iz#   Kg,N"-JaaR{ !n,~ QT  !    _y'YP   "(&   + K3t"      Po)1z] %{SW{#    #Oj`Z&C?           GwCmyYsS .       <=|"t>*mE@2    *rFg&X9l      BB_'x$    dyX'-|e+:N5@7N    !"! VRZt qB^]\d 19">/P       /|V7e"   3!Rg     : E4cj          !##  WE(I )(Vl;Dr H7ZGBR.=j(  "L        M#KL"~.,\J:    N m6T %~Rz*p. A/epMa&;x5@$[& AZTuG{j45h{&j)  0s&lw:A6k G  T;ec32#mFQ3a*  I1`e>U  #Qo/cXqZ_P9P<q5  1l0?%Z$  'g#zvF *y&VZ5\'    'a6P@ q3 7q( O   %Qy=*cR$    &N}[%\2F3p6An|M%   +\S%   6t <K1`)2W~nH'    ?ut@    L'&w3X#7%]"=#zA   F}j7   7n D?xN&   9Xrv^A&9d{N( -KioR48fc6 Aq`5  7hxF $B`zzaC' 'MxmC! 4Rn}gI, 3]mB  @j[5   +MuxL(6`e=       =bvR1   1Wc89brK) /CS]^UE1   8XypO0     +Gau|veM5   0G[ksqeQ:%$7L\ee\L8$999:;<=>>=<;;;;<<<;:9889;<===<<=?ACEFGILQYbku &>UkqT6}obWNGC?<962/,*))))*)))**+++*)(&%$$%&'()('%$#####"! !""! !$&''&%#"!"#$%&()+-.0000.-+)'$!  #%&''''&&%#"!  $4NtIEzs8q [C=Hf$;-nZ;' #&()*+++)'#!%())&#   /i;fpL3T# "W(i 37;Z!                 "aP?8k1O !',--*%      (b29!E !)/343/+'%&(,058;<<=>?BDFGGFC@<83/+(&%%%%$!    !%''&# !#$"  ]|8Yy82`W{<?3AS {3                  #()'$"! bc9+ c|A, "" 6I^'<     "$$!    "#######! #',.,%>M,FZV=* *}b  !##" [g)j=+2'v[        %P"45s Q"cP   %$   TLS}'5   ]7Z +k4)2       !&'$   <][ O o,Pj      X1!6Z   $ssJ^)/G       #y:{e     Rf i)4  $jEj H>W0X<T5FA    !c2Hu,      ]8u:    +uL=k&     3kJj#   =9+fs' P)D       's] MK _LIdWC7c!  2w-t}=>    ]pw"f&  &j1&b"  4m -Y&   =TfMq3   [l{(l* 3k&X&  5|6@v+ "b<G)[' 4iO"  %W*"y? @z"S$   8jK"  #$<v{: -k!)\+   /d&V 6sh7  /[k= Cys?   "J|Z.   Gyc6 ?n^2 5dP' .VrE!   CsU& BsyO,  #Fq{K$,We=   ;YurV8     "I{|J"   7cn>&Itf@!  Ete9 7VsmL+ 3[Z4 4Vx}\;  .Kh}cE*(Dg}Z8 !8Wt~eG+*FedF) ,>NXYQC1G@O@*0A[@xlKB.bcpKB_3730_POP7_BDTv3.mobtermNNNNNNNNNNNTTGGTTGGTTCGCTATAAAAACTCTTATTTTGGATAATTTGTTTAGCTGTTGCAATATAAATTGACCCATTTAATTTATAAATTGGATTCTCGTTGCAATAAATTTCCAGATCCTGAAAAAGCTCTGGCTTAACCAAATTGCCTTGGCTATCAATGCTTCTACACCAAGAAGGCTTTAAAGAGATAGGACTAACTGAAACGACACTTTTTCCCGTTGCTTGATGTATTTCAACAGCATGTCTTATGGTTTCTGGCTTCCTGAATGGAGAAGTTGGTTGTAAAAGCAATACACTGTCAAAAAAAACCTCCATTTGCTGAAACTTAAACAGGAGGTCAATAACAGTATGAATCACATCCGAAGTATCCGTGGCTAAATCTTCCGATCTTAGCCAAGGTACTGAAGCCCCATATTGAACNNNNNNNNNNNNTTGGTTGGTTCGCTATAAAAACTCTTATTTTGGATAATTTGTTTAGCTGTTGCAATATAAATTGACCCATTTAATTTATAAATTGGATTCTCGTTGCAATAAATTTCCAGATCCTGAAAAAGCTCTGGCTTAACCAAATTGCCTTGGCTATCAATGCTTCTACACCAAGAAGGCTTTAAAGAGATAGGACTAACTGAAACGACACTTTTTCCCGTTGCTTGATGTATTTCAACAGCATGTCTTATGGTTTCTGGCTTCCTGAATGGAGAAGTTGGTTGTAAAAGCAATACACTGTCAAAAAAAACCTCCATTTGCTGAAACTTAAACAGGAGGTCAATAACAGTATGAATCACATCCGAAGTATCCGTGGCTAAATCTTCCGATCTTAGCCAAGGTACTGAAGCCCCATATTGAACN&=B[is %1>LUcmx)9GRZeoz,6@JValx +6BOYdo{&3>HS_ju+6ALWcnz ,9EQ^jw+8CNXdq{   $ 1 = I S _ k x   ! , 7 C P \ i u    ) 5 @ K W c o z  ! . ; G S _ j w   - 9 E Q \ h t '2>JUco| #.:GS`kx -8EP\gs(4?KXer~(4@KWdp| %1=ITan{ %1>&=B[is %1>LUcmx)9GRZeoz,6@JValx +6BOYdo{&3>HS_ju+6ALWcnz ,9EQ^jw+8CNXdq{   $ 1 = I S _ k x   ! , 7 C P \ i u    ) 5 @ K W c o z  ! . ; G S _ j w   - 9 E Q \ h t '2>JUco| #.:GS`kx -8EP\gs(4?KXer~(4@KWdp| %1=ITan{ %1>  !$"924!*'+//952'===4&==/=&&=7/==&1==11==/==;;;;=============;==================;=========;==========;===============================;==================================;;==========================;=777;;;;======================;======================================;;===========;;==========;;=;;;;;;;;;===;7===============7=========================7=========77=7====7======7=77==7*2727==2=772=5!+"+#%2+++-=   !$"924!*'+//952'===4&==/=&&=7/==&1==11==/==;;;;=============;==================;=========;==========;===============================;==================================;;==========================;=777;;;;======================;======================================;;===========;;==========;;=;;;;;;;;;===;7===============7=========================7=========77=7====7======7=77==7*2727==2=772=5!+"+#%2+++-= KB 1.22008-02-11 12:04:00 +01:00AEPt $AEPt $APFN APXV2APrN wAPrV1APrX@@wARTNASPFASPtASPtB1PtB1PtBCTSMFBufTk CMNT""wPCTIDCTNM CTOw CTTL wGCpEPDATAIHDATAIHDATAIH&DATAIHDATAk K9DATAk VDATAk `DATAk kDATA t(DATA t(DATA t(DATA t(DCHTDSamDySN w(Dye#DyeNw3DyeNw8DyeNw=DyeNwBDyeWDyeW8DyeWSDyeWgEPVt!4EVNT vEVNT vEVNTvEVNTvFWO_GATCGTyp!!wHCFGCEHCFGHCFGHCFG??InScInVtLANE2LIMS!!wrLNTD2LsrPaMCHNMODFwMODL3730NAVGNLNE`NOIS?PBASAPBAS?PCONKPCONIPDMFPDMF?PLOCXFPLOCXC7PSZEPTYP PXLBRGNmRGOwRMXV1RMdNRMdV1RMdXRPrNRPrV1RUND  RUND  RUND  RUND  RUNT  -RUNT  (RUNT  !RUNT  !Rate vRunNS/N%?SCANIHSMED SMLtSMPLSPACA|wpSPAC?SPACA|wpSVER3.0SVERM?SVER,,vScal@ScanIHTUBEP15Tmpr<User phARphCH?phDYbigphQLcphTRphTRThis is the confidence range *ABI_Limits..FVocAEPt $AEPt $APFN APXV2APrN wAPrV1APrX@@wARTNASPFASPtASPtB1PtB1PtBCTSMFBufTk CMNT""wPCTIDCTNM CTOw CTTL wGCpEPDATAIHDATAIHDATAIH&DATAIHDATAk K9DATAk VDATAk `DATAk kDATA t(DATA t(DATA t(DATA t(DCHTDSamDySN w(Dye#DyeNw3DyeNw8DyeNw=DyeNwBDyeWDyeW8DyeWSDyeWgEPVt!4EVNT vEVNT vEVNTvEVNTvFTab[FVoc[FWO_GATCFeat//[aGTyp!!wHCFGCEHCFGHCFGHCFG??InScInVtLANE2LIMS!!wrLNTD2LsrPaMCHNMODFwMODL3730NAVGNLNE`NOIS?PBASAPBAS?PCONKPCONIPDMFPDMF?PLOCXFPLOCXC7PSZEPTYP PXLBRGNmRGOwRMXV1RMdNRMdV1RMdXRPrNRPrV1RUND  RUND  RUND  RUND  RUNT  -RUNT  (RUNT  !RUNT  !Rate vRunNS/N%?SCANIHSMED SMLtSMPLSPACA|wpSPAC?SPACA|wpSVER3.0SVERM?SVER,,vScal@ScanIHTUBEP15Tmpr<User phARphCH?phDYbigphQLcphTRphTR@~A67OQ1{zK90plkm$48V^p$&:?wMB>-j%;U*E^h9i_Qw|k8 <.(P-S"EnKl/:5t9z"DEoh9ZzZW-W 7W}$wFTN]%jEX@u]YMAmB] ]ddc}PMfOl>o UekB}qHqHStXK#XTE-=[c}Sn2M5FiK&=RUR6? sdW W>|#)|C]MMk9UHK&]a(b'`]U3\U5|8e7s+]i@C,TAq;vn;l|t"eAK^,0t;L,p2i#G&RcP3eZHBa+b8ENIRJ*Wi4GG\}d M5_@{&]t[X&rd+'n#wCdNmbcnPkIp?Hdsgbk1^DR!_Tr%,yPyN+8ds16;evr./b?Ay0]:jjMSDDl[BLwecfo*X&~"Z1HxITP1W+|{$v65MT3 #pJE?RVuU,\)lOYW|_nw|]jL@A[]"dMI[h?&0E(_y"X_tXL ViY2]2rRU)>`VZI;P>W+YYo%+04NRY0GbR31}ssb[l"G>}O#pJl&v`VfXr9lKd,}Sn2M5FiKKB*oCaRZ^.EAx8aPI^QJH1}1"/>0-AMj!e~8l{+9 nVI! 4J'BNpfcGHO5QfOl>o ZbCd]'GE#!#hmmfc4.=+I}S$oc/b_bRohj4TIx*&`OE!0z^n7aC<)\iyKG>?hECel7B]%j'k0$9eKoQD5pYHT+(aQQxnJilXSXW .C=vz^`%C`Cp39VaLg\M9LzqmIsIcb@+(,]_03BSmv^N6T.iP2i#G2rfJX|7"Wa%#;-T)RJ0vrs|%3*z$t48i598Bbo,N;[6_b.O_l;{IF}o-,(B~bHjAt3_iP-2ZE<}DU$~~8WYhY8|k~_D}<|hG6`VsIefaqQleA./q}.}:>JWT~Q;maHjlUOE6OLhcDW4KD@|v^&Gd\{diO}L6>U{mwYBT3?^NHEDX+9m#k@l'Z9d[TCs]jL'#k=sDa)x+7E2]^8:7&15,DceEmL`5ad[  365LjdA IgdB GefG GluT<>7 %75" >]^A)(#(,)@]`EIcaE'.* 22')23 ),(@RJ-&& -0+"04 )& &$ ''#8KJ3$ 9=5RjdB-,% (@D4%AJ>$" 298 +;8/1'07' '"!& 96-140  &' 5G=: &#(&&AH:5EB* $) 0,#+0  ,0$    #"! -4(  .7-  ###)$'+" "      &$ %$$" &#"$ $$   "& !                  "                                9         "#%%"'#&'  !#-4-8ZeR-;Z`N##1/% >@8/>=+ AA;1A>,&+++-*  073AcgQ! '3, 7=2NlmQ+3/%<8#34/?>0 8=< ,, &+)  55+ (Ukc> 32.  ((#?X]G)54$#$$ $+& ++#3/"/1 62( -3.  $85&38  0-$ <X<D )' !! ""'"#&  %" !"%% !$#  +'!$%' *$%( #'!8FB**@B0 "   !       -3+      #%     #                                             8           #)%,64-  *45& "22)    1<F=!)?E8*'02# *4/'9A;$2A9, #KOB}w5Ro.N" AGB$,DC:  ;@:'?=4  :<10;:JNH#w=PO47,-97 X[GjlO{NZG 11%GG@;HF EMJ'x7TqN"  HVM)wqBUR %%$####N<X5 .2/ "! 8<! $" ..)(..  &$"&'% "$# &&  $$% !"    (&#& !6,  4"3  # '2       *"    !                              t                -62$ $! (;A;' !7B<' +?A6 #$$%  ""$?PM;>UUC! *& .AB76KL9&+&*4:2*72)#0EI>!'& (/.-.( (66("33-  ,8>/=gxoH,>:0 :>6+UopY.LjnW,14-4956e~xUwuQwc103/ 61' -+"09=()XqoO ,:6$-:<+)_~[$ 3E>0 *+% %0.',-'Ma\B &60& 062$54#AE@! NzzDu8iwM4;4  ?>16=:  +/. .0"B@84?;%12!!25##<<,/]qgA 03/  9=6Kyep41BF  23-=nAf2!8:0  *62%5; &7/#32 42& .0$2/!)]5f7 -/,*)), ., FHR61P;D%(%#" )( $) .)#(*   -(!H+P0 *L.K%7GB: .N6D %& ".,-  %<14 #9"?$ -45,   2#'   ) )+,,  &24%      !# !"         %# &#   !                                   %1@@P\gqv| #2=LUcmx)9GRZdpy+6?JValx*5BNYdo{ &2>HS^iu*5@LWcnz ,8EQ]jw+8BMXdp{  $ 0 < I S _ k x   ! , 7 B O \ i u    ) 4 ? K W c o z  ! . ; F S _ j v   - 8 E P \ g s &2>IUbo| #.:GS_kx,8DP\gs(3?KXeq~'3?KWdp| $1=ITan{ %1>KWco %%),;%2#%%969;;;929DDDDDDDDDDDDDD;DDDDDDDDDDDDDDDDDDDDDDDDDDDDDD>>DDDDDDDD;;DDDDDDDDD;;;DD;;99;;;;;D;;;;;D;D;;;;;9;;;;;;//;09/++;/!%%DD296DDDDD;;DDDDDDDDDDDDDDDDDD>>DDDDDDDDDDDDDD;;9;;;;;;9;D;;9;;;;;;;++;'+++/ ##/9DDDDDDDD;DDDDDDDDDDDDDDDDDDDDDDDDDDD;;;DDDDD9;;;;;9D;9;;;;;;;99/(  !'%225555%/////2DDD9;9D99DDDDDDDDDDDDDDDD;;DDDDDDDDDDDDDDDDDDDD>>DDDDDDDDDDDDDDDDDDDDDDDD;DDDDDD;9;;;;;;;9;;;;;;;;;;;;;;;99++(ATTAACGTAAAAGGTTTGGTTGGTTCGCTATAAAAACTCTTATTTTGGATAATTTGTTTAGCTGTTGCAATATAAATTGACCCATTTAATTTATAAATTGGATTCTCGTTGCAATAAATTTCCAGATCCTGAAAAAGCTCTGGCTTAACCAAATTGCCTTGGCTATCAATGCTTCTACACCAAGAAGGCTTTAAAGAGATAGGACTAACTGAAACGACACTTTTTCCCGTTGCTTGATGTATTTCAACAGCATGTCTTATGGTTTCTGGCTTCCTGAATGGAGAAGTTGGTTGTAAAAGCAATACACTGTCAAAAAAAACCTCCATTTGCTGAAACTTAAACAGGAGGTCAATAACAGTATGAATCACATCCGAAGTATCCGTGGCTAAATCTTCCGATCTTAGCCAAGGTACTGAAGCCCCATATTGAACGGANNSIGN=A=1744,C=1747,G=1607,T=2271 SPAC= 15.78 PRIM=0 MACH=Tom-19135-018 DYEP=KB_3730_POP7_BDTv3.mob TPSW=KB 1.2 BCSW=phred 0.071220.b NAME=3276528 LANE=50 GELN= PROC= RTRK= CONV=phred version=0.071220.b COMM=RR0 700 0791-GATC-neuA-Rev-130425 SRCE=ABI 3730bio-1.4.3.0001/test/unit/0000755000004100000410000000000012200110570014621 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/0000755000004100000410000000000012200110570015372 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/test_location.rb0000644000004100000410000005425112200110570020575 0ustar www-datawww-data# # test/unit/bio/test_location.rb - Unit test for Bio::Location and Bio::Locations # # Copyright:: Copyright (C) 2004 Moses Hohman # 2008 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/location' module Bio class TestLocations < Test::Unit::TestCase def test_should_not_modify_argument assert_nothing_raised { Locations.new(' 123..456 '.freeze) } end def test_normal loc = Locations.new('123..456') assert_equal([123, 456], loc.span) assert_equal(123..456, loc.range) assert_equal(1, loc[0].strand) end def test_hat loc = Locations.new('754^755') assert_equal([754, 755], loc.span, "span wrong") assert_equal(754..755, loc.range, "range wrong") assert_equal(1, loc[0].strand, "strand wrong") assert_equal(true, loc[0].carat, "carat wrong") end def test_complement loc = Locations.new('complement(53^54)') assert_equal([53, 54], loc.span, "span wrong") assert_equal(53..54, loc.range, "range wrong") assert_equal(-1, loc[0].strand, "strand wrong") assert_equal(true, loc[0].carat, "carat wrong") end def test_replace_single_base loc = Locations.new('replace(4792^4793,"a")') assert_equal("a", loc[0].sequence) end end class TestLocationsRoundTrip < Test::Unit::TestCase class TestLoc def initialize(*arg) @xref_id = nil @lt = nil @from = nil @gt = nil @to = nil @carat = nil @sequence = nil @strand = 1 arg.each do |x| case x when :complement @strand = -1 when '<' @lt = true when '>' @gt = true when '..' # do nothing when '^' @carat = true when Integer @from ||= x @to = x when Hash @sequence ||= x[:sequence] else @xref_id ||= x end end end def to_location loc = Bio::Location.new loc.from = @from loc.to = @to loc.gt = @gt loc.lt = @lt loc.strand = @strand loc.xref_id = @xref_id loc.sequence = @sequence loc.carat = @carat loc end end #class TestLoc TestData = [ # (C) n^m # # * [AB015179] 754^755 [ 'AB015179', '754^755', nil, TestLoc.new(754, '^', 755) ], # * [AF179299] complement(53^54) # (see below) # * [CELXOL1ES] replace(4480^4481,"") # (see below) # * [ECOUW87] replace(4792^4793,"a") # (see below) # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc") # (see below) # (n.m) and one-of() are not supported. # (D) (n.m) # # * [HACSODA] 157..(800.806) # * [HALSODB] (67.68)..(699.703) # * [AP001918] (45934.45974)..46135 # * [BACSPOJ] <180..(731.761) # * [BBU17998] (88.89)..>1122 # * [ECHTGA] complement((1700.1708)..(1715.1721)) # * [ECPAP17] complement(<22..(255.275)) # * [LPATOVGNS] complement((64.74)..1525) # * [PIP404CG] join((8298.8300)..10206,1..855) # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534) # * [HUMMIC2A] replace((651.655)..(651.655),"") # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181) # # (n.m) and one-of() are not supported. # (E) one-of # # * [ECU17136] one-of(898,900)..983 # * [CELCYT1A] one-of(5971..6308,5971..6309) # * [DMU17742] 8050..one-of(10731,10758,10905,11242) # * [PFU27807] one-of(623,627,632)..one-of(628,633,637) # * [BTBAINH1] one-of(845,953,963,1078,1104)..1354 # * [ATU39449] join(one-of(969..1094,970..1094,995..1094,1018..1094),1518..1587,1726..2119,2220..2833,2945..3215) # # (F) join, order, group # # * [AB037374S2] join(AB037374.1:1..177,1..807) [ 'AB037374S2', 'join(AB037374.1:1..177,1..807)', nil, TestLoc.new('AB037374.1', 1, 177), TestLoc.new(1, 807) ], # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505)) # (see below) # * [ASNOS11] join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128) [ 'ASNOS11', 'join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128)', nil, TestLoc.new('AF130124.1', '<', 2563, 2964), TestLoc.new('AF130125.1', 21, 157), TestLoc.new('AF130126.1', 12, 174), TestLoc.new('AF130127.1', 21, 112), TestLoc.new('AF130128.1', 21, 162), TestLoc.new('AF130128.1', 281, 595), TestLoc.new('AF130128.1', 661, 842), TestLoc.new('AF130128.1', 916, 1030), TestLoc.new('AF130129.1', 21, 115), TestLoc.new('AF130130.1', 21, 165), TestLoc.new('AF130131.1', 21, 125), TestLoc.new('AF130132.1', 21, 428), TestLoc.new('AF130132.1', 492, 746), TestLoc.new('AF130133.1', 21, 168), TestLoc.new('AF130133.1', 232, 401), TestLoc.new('AF130133.1', 475, 906), TestLoc.new('AF130133.1', 970, 1107), TestLoc.new('AF130133.1', 1176, 1367), TestLoc.new(21, '>', 128) ], # * [AARPOB2] order(AF194507.1:<1..510,1..>871) # (see below) # * [AF006691] order(912..1918,20410..21416) [ 'AF006691', 'order(912..1918,20410..21416)', :order, TestLoc.new(912,1918), TestLoc.new(20410,21416) ], # * [AF024666] complement(order(13965..14892,18919..19224)) # (Note that in older version of GenBank, the order of # "order" and "complement" was different.) # * [AF024666] order(complement(18919..19224),complement(13965..14892)) [ 'AF024666', 'complement(order(13965..14892,18919..19224))', :complement_order, TestLoc.new(13965, 14892), TestLoc.new(18919, 19224) ], # * [AF264948] order(27066..27076,27089..27099,27283..27314,27330..27352) [ 'AF264948', 'order(27066..27076,27089..27099,27283..27314,27330..27352)', :order, TestLoc.new(27066, 27076), TestLoc.new(27089, 27099), TestLoc.new(27283, 27314), TestLoc.new(27330, 27352) ], # * [D63363] order(3..26,complement(964..987)) # (see below) # * [ECOCURLI2] order(complement(1009..>1260),complement(AF081827.1:<1..177)) [ 'ECOCURLI2', 'order(complement(1009..>1260),complement(AF081827.1:<1..177))', :order, TestLoc.new(:complement, 1009, '>', 1260), TestLoc.new(:complement, 'AF081827.1', '<', 1, 177) ], # * [S72388S2] order(join(S72388.1:757..911,S72388.1:609..1542),1..>139) # (not supported) # * [HEYRRE07] order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133)) [ 'HEYRRE07', 'order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133))', :order, TestLoc.new(:complement, 1, 38), TestLoc.new(:complement, 'M82666.1', 1, 140), TestLoc.new(:complement, 'M82665.1', 1, 176), TestLoc.new(:complement, 'M82664.1', 1, 215), TestLoc.new(:complement, 'M82663.1', 1, 185), TestLoc.new(:complement, 'M82662.1', 1, 49), TestLoc.new(:complement, 'M82661.1', 1, 133) ], # * [COL11A1G34] order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611) [ 'COL11A1G34', 'order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611)', :order, TestLoc.new('AF101079.1', 558, 1307), TestLoc.new('AF101080.1', 1, 749), TestLoc.new('AF101081.1', 1, 898), TestLoc.new('AF101082.1', 1, 486), TestLoc.new('AF101083.1', 1, 942), TestLoc.new('AF101084.1', 1, 1734), TestLoc.new('AF101085.1', 1, 2385), TestLoc.new('AF101086.1', 1, 1813), TestLoc.new('AF101087.1', 1, 2287), TestLoc.new('AF101088.1', 1, 1073), TestLoc.new('AF101089.1', 1, 989), TestLoc.new('AF101090.1', 1, 5017), TestLoc.new('AF101091.1', 1, 3401), TestLoc.new('AF101092.1', 1, 1225), TestLoc.new('AF101093.1', 1, 1072), TestLoc.new('AF101094.1', 1, 989), TestLoc.new('AF101095.1', 1, 1669), TestLoc.new('AF101096.1', 1, 918), TestLoc.new('AF101097.1', 1, 1114), TestLoc.new('AF101098.1', 1, 1074), TestLoc.new('AF101099.1', 1, 1709), TestLoc.new('AF101100.1', 1, 986), TestLoc.new('AF101101.1', 1, 1934), TestLoc.new('AF101102.1', 1, 1699), TestLoc.new('AF101103.1', 1, 940), TestLoc.new('AF101104.1', 1, 2330), TestLoc.new('AF101105.1', 1, 4467), TestLoc.new('AF101106.1', 1, 1876), TestLoc.new('AF101107.1', 1, 2465), TestLoc.new('AF101108.1', 1, 1150), TestLoc.new('AF101109.1', 1, 1170), TestLoc.new('AF101110.1', 1, 1158), TestLoc.new('AF101111.1', 1, 1193), TestLoc.new(1, 611) ], # group() are found in the COMMENT field only (in GenBank 122.0) # # gbpat2.seq: FT repeat_region group(598..606,611..619) # gbpat2.seq: FT repeat_region group(8..16,1457..1464). # gbpat2.seq: FT variation group(t1,t2) # gbpat2.seq: FT variation group(t1,t3) # gbpat2.seq: FT variation group(t1,t2,t3) # gbpat2.seq: FT repeat_region group(11..202,203..394) # gbpri9.seq:COMMENT Residues reported = 'group(1..2145);'. # # (G) ID:location # * [AARPOB2] order(AF194507.1:<1..510,1..>871) [ 'AARPOB2', 'order(AF194507.1:<1..510,1..>871)', :order, TestLoc.new('AF194507.1', '<', 1, 510), TestLoc.new(1, '>', 871) ], # * [AF178221S4] join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90) [ 'AF178221S4', 'join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90)', nil, TestLoc.new('AF178221.1', '<', 1, 60), TestLoc.new('AF178222.1', 1, 63), TestLoc.new('AF178223.1', 1, 42), TestLoc.new( 1, '>', 90) ], # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534) # not supported # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181) # not supported # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233) [ 'SL16SRRN1', 'order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)', :order, TestLoc.new( '<', 1, '>', 267), TestLoc.new('X67092.1', '<', 1, '>', 249), TestLoc.new('X67093.1', '<', 1, '>', 233) ], # (I) <, > # * [A5U48871] <1..>318 [ 'A5U48871', '<1..>318', nil, TestLoc.new('<', 1, '>', 318) ], # * [AA23SRRNP] <1..388 [ 'AA23SRRNP', '<1..388', nil, TestLoc.new('<', 1, 388) ], # * [AA23SRRNP] 503..>1010 [ 'AA23SRRNP', '503..>1010', nil, TestLoc.new(503, '>', 1010) ], # * [AAM5961] complement(<1..229) [ 'AAM5961', 'complement(<1..229)', nil, TestLoc.new(:complement, '<', 1, 229) ], # * [AAM5961] complement(5231..>5598) [ 'AAM5961', 'complement(5231..>5598)', nil, TestLoc.new(:complement, 5231, '>', 5598) ], # * [AF043934] join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843) [ 'AF043934', 'join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843)', nil, TestLoc.new('<', 1), TestLoc.new(60, 99), TestLoc.new(161,241), TestLoc.new(302,370), TestLoc.new(436,594), TestLoc.new(676,887), TestLoc.new(993,1141), TestLoc.new(1209,1329), TestLoc.new(1387,1559), TestLoc.new(1626,1646), TestLoc.new(1708, '>', 1843) ], # * [BACSPOJ] <180..(731.761) # not supported # * [BBU17998] (88.89)..>1122 # not supported # * [AARPOB2] order(AF194507.1:<1..510,1..>871) # (see above) # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233) # (see above) # (J) complement # * [AF179299] complement(53^54) [ 'AF179299', 'complement(53^54)', nil, TestLoc.new(:complement, 53, '^', 54) ], # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505)) [ 'AP000001', 'join(complement(1..61),complement(AP000007.1:252907..253505))', nil, TestLoc.new(:complement, 1, 61), TestLoc.new(:complement, 'AP000007.1', 252907, 253505) ], # * [AF209868S2] order(complement(1..>308),complement(AF209868.1:75..336)) [ 'AF209868S2', 'order(complement(1..>308),complement(AF209868.1:75..336))', :order, TestLoc.new(:complement, 1, '>', 308), TestLoc.new(:complement, 'AF209868.1', 75, 336) ], # * [CPPLCG] complement(<1..(1093.1098)) # not supported # * [D63363] order(3..26,complement(964..987)) [ 'D63363]', 'order(3..26,complement(964..987))', :order, TestLoc.new(3,26), TestLoc.new(:complement, 964, 987) ], # * [ECHTGA] complement((1700.1708)..(1715.1721)) # not supported # * [ECOUXW] complement(order(1636..1641,1658..1663)) # (Note that in older version of GenBank, the order of # "order" and "complement" was different.) # * [ECOUXW] order(complement(1658..1663),complement(1636..1641)) # [ 'ECOUXW', 'complement(order(1636..1641,1658..1663))', :complement_order, TestLoc.new(:complement, 1636, 1641), TestLoc.new(:complement, 1658, 1663) ], # * [LPATOVGNS] complement((64.74)..1525) # not supported # * [AF129075] complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403)) [ 'AF129075', 'complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403))', :complement_join, TestLoc.new(71606,71829), TestLoc.new(75327,75446), TestLoc.new(76039,76203), TestLoc.new(76282,76353), TestLoc.new(76914,77029), TestLoc.new(77114,77201), TestLoc.new(77276,77342), TestLoc.new(78138,78316), TestLoc.new(79755,79892), TestLoc.new(81501,81562), TestLoc.new(81676,81856), TestLoc.new(82341,82490), TestLoc.new(84208,84287), TestLoc.new(85032,85122), TestLoc.new(88316,88403) ], # * [ZFDYST2] join(AF137145.1:<1..18,complement(<1..99)) [ 'ZFDYST2', 'join(AF137145.1:<1..18,complement(<1..99))', nil, TestLoc.new('AF137145.1', '<', 1, 18), TestLoc.new(:complement, '<', 1, 99) ], # (K) replace # * [CSU27710] replace(64,"A") [ 'CSU27710', 'replace(64,"a")', nil, TestLoc.new(64, :sequence => "a") ], # * [CELXOL1ES] replace(5256,"t") [ 'CELXOL1ES', 'replace(5256,"t")', nil, TestLoc.new(5256,:sequence => "t") ], # * [ANICPC] replace(1..468,"") [ 'ANICPC', 'replace(1..468,"")', nil, TestLoc.new(1, 468, :sequence => "") ], # * [CSU27710] replace(67..68,"GC") [ 'CSU27710', 'replace(67..68,"gc")', nil, TestLoc.new(67, 68, :sequence => "gc") ], # * [CELXOL1ES] replace(4480^4481,"") <= ? only one case in GenBank 122.0 [ 'CELXOL1ES', 'replace(4480^4481,"")', nil, TestLoc.new(4480, '^', 4481, :sequence => "") ], # * [ECOUW87] replace(4792^4793,"a") [ 'ECOUW87', 'replace(4792^4793,"a")', nil, TestLoc.new(4792, '^', 4793, :sequence => "a") ], # * [CEU34893] replace(1..22,"ggttttaacccagttactcaag") [ 'CEU34893', 'replace(1..22,"ggttttaacccagttactcaag")', nil, TestLoc.new(1, 22, :sequence => "ggttttaacccagttactcaag") ], # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc") [ 'APLPCII', 'replace(1905^1906,"acaaagacaccgccctacgcc")', nil, TestLoc.new(1905, '^', 1906, :sequence => "acaaagacaccgccctacgcc") ], # * [MBDR3S1] replace(1400..>9281,"") [ 'MBDR3S1', 'replace(1400..>9281,"")', nil, TestLoc.new(1400, '>', 9281, :sequence => "") ], # * [HUMMHDPB1F] replace(complement(36..37),"ttc") [ 'HUMMHDPB1F', 'replace(complement(36..37),"ttc")', nil, TestLoc.new(:complement, 36, 37, :sequence => "ttc") ], # * [HUMMIC2A] replace((651.655)..(651.655),"") # not supported # * [LEIMDRPGP] replace(1..1554,"L01572") # not supported # * [TRBND3] replace(376..395,"atttgtgtgtggtaatta") [ 'TRBND3', 'replace(376..395,"atttgtgtgtggtaatta")', nil, TestLoc.new(376, 395, :sequence => "atttgtgtgtggtaatta") ], # * [TRBND3] replace(376..395,"atttgtgtgggtaatttta") # * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta") # * [TRBND3] replace(376..395,"atgtgtggtgaatta") # * [TRBND3] replace(376..395,"atgtgtgtggtaatta") # * [TRBND3] replace(376..395,"gatttgttgtggtaatttta") # (see above) # * [MSU09460] replace(193,"t") [ 'MSU09460', 'replace(193,"t")', nil, TestLoc.new(193, :sequence => "t") ], # * [HUMMAGE12X] replace(3002..3003, "GC") [ 'HUMMAGE12X', 'replace(3002..3003,"gc")', nil, TestLoc.new(3002, 3003, :sequence => "gc") ], # * [ADR40FIB] replace(510..520, "taatcctaccg") [ 'ADR40FIB', 'replace(510..520,"taatcctaccg")', nil, TestLoc.new(510, 520, :sequence => "taatcctaccg") ], # * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa") [ 'RATDYIIAAB', 'replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")', nil, TestLoc.new(1306, 1443, :sequence => "aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa") ] ] #TestData= def test_locations_to_s TestData.each do |a| label = a[0] str = a[1] op = a[2] locs = a[3..-1] locs.collect! { |x| x.to_location } case op when :complement_join, :complement_order locs.reverse! locs.each { |loc| loc.strand = -1 } op = op.to_s.sub(/complement_/, '').intern end locations = Bio::Locations.new(locs) locations.operator = op if op assert_equal(str, locations.to_s, "to_s: wrong for #{label}") end end def test_locations_roundtrip TestData.each do |a| label = a[0] str = a[1] locations = Bio::Locations.new(str) assert_equal(str, locations.to_s, "round trip: wrong for #{label}") end end end end bio-1.4.3.0001/test/unit/bio/util/0000755000004100000410000000000012200110570016347 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/util/test_restriction_enzyme.rb0000644000004100000410000000223512200110570023671 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme.rb - Unit test for Bio::RestrictionEnzyme # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme' module Bio; module TestRestrictionEnzyme #:nodoc: class TestRestrictionEnzyme < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme end def test_rebase assert_equal(@t.rebase.respond_to?(:enzymes), true) assert_not_nil @t.rebase['AarI'] assert_nil @t.rebase['blah'] end def test_enzyme_name assert_equal(@t.enzyme_name?('AarI'), true) assert_equal(@t.enzyme_name?('atgc'), false) assert_equal(@t.enzyme_name?('aari'), true) assert_equal(@t.enzyme_name?('EcoRI'), true) assert_equal(@t.enzyme_name?('EcoooRI'), false) end end end; end bio-1.4.3.0001/test/unit/bio/util/test_contingency_table.rb0000644000004100000410000000565112200110570023431 0ustar www-datawww-data# # test/unit/bio/util/test_contingency_table.rb - Unit test for Bio::ContingencyTable # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/contingency_table' module Bio #:nodoc: class TestContingencyTable < Test::Unit::TestCase #:nodoc: def lite_example(sequences, max_length, characters) output = [] 0.upto(max_length - 1) do |i| (i+1).upto(max_length - 1) do |j| ctable = Bio::ContingencyTable.new( characters ) sequences.each do |seq| i_char = seq[i].chr j_char = seq[j].chr ctable.table[i_char][j_char] += 1 end chi_square = ctable.chi_square contingency_coefficient = ctable.contingency_coefficient output << [(i+1), (j+1), chi_square, contingency_coefficient] end end return output end def test_lite_example ctable = Bio::ContingencyTable allowed_letters = 'abcdefghijk'.split('') seqs = Array.new seqs << 'abcde' seqs << 'abcde' seqs << 'kacje' seqs << 'aacae' seqs << 'akcfa' seqs << 'akcfe' length_of_every_sequence = seqs[0].size # 5 letters long results = lite_example(seqs, length_of_every_sequence, allowed_letters) =begin i j chi_square contingency_coefficient 1 2 2.4 0.534522483824849 1 3 0.0 0.0 1 4 6.0 0.707106781186548 1 5 0.24 0.196116135138184 2 3 0.0 0.0 2 4 12.0 0.816496580927726 2 5 2.4 0.534522483824849 3 4 0.0 0.0 3 5 0.0 0.0 4 5 2.4 0.534522483824849 =end #assert_equal(2.4, results[0][2]) assert_in_delta(2.4, results[0][2], 1e-13) assert_in_delta(0.534522483824849, results[0][3], 1e-15) assert_in_delta(12.0, results[5][2], 1e-13) assert_in_delta(0.816496580927726, results[5][3], 1e-15) assert_in_delta(2.4, results[9][2], 1e-13) assert_in_delta(0.534522483824849, results[9][3], 1e-15) ctable = Bio::ContingencyTable.new ctable.table['a']['t'] = 4 ctable.table['a']['g'] = 2 ctable.table['g']['t'] = 3 assert_in_delta(1.28571428571429, ctable.chi_square, 1e-14) assert_equal(ctable.column_sum_all, ctable.row_sum_all) assert_equal(ctable.column_sum_all, ctable.table_sum_all) end end end bio-1.4.3.0001/test/unit/bio/util/test_color_scheme.rb0000644000004100000410000000170512200110570022400 0ustar www-datawww-data# # test/unit/bio/util/test_color_scheme.rb - Unit test for Bio::ColorScheme # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/color_scheme' module Bio #:nodoc: class TestColorScheme < Test::Unit::TestCase #:nodoc: def test_buried s = Bio::ColorScheme::Buried assert_equal('00DC22', s['A']) assert_equal('00BF3F', s[:c]) assert_equal(nil, s[nil]) assert_equal('FFFFFF', s['-']) assert_equal('FFFFFF', s[7]) assert_equal('FFFFFF', s['junk']) assert_equal('00CC32', s['t']) end end end bio-1.4.3.0001/test/unit/bio/util/test_sirna.rb0000644000004100000410000001422612200110570021054 0ustar www-datawww-data# # test/unit/bio/util/test_sirna.rb - Unit test for Bio::SiRNA. # # Copyright:: Copyright (C) 2005 Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/sirna' module Bio RANDOM_SEQ = "ctttcggtgcggacgtaaggagtattcctgtactaactaaatggagttaccaaggtaggaccacggtaaaatcgcgagcagcctcgatacaagcgttgtgctgaagcctatcgctgacctgaaggggggcgtaagcaaggcagcggttcaccttcatcagttctgctagaaatcacctagcaccccttatcatccgcgtcaggtccattacccttcccattatgtcggactcaattgaggtgcttgtgaacttatacttgaatccaaaacgtctactgtattggcgactaaaaagcacttgtggggagtcggcttgatcagcctccattagggccaggcactgaggatcatccagttaacgtcagattcaaggtctggctcttagcactcggagttgcac" class TestSiRNANew < Test::Unit::TestCase def test_new naseq = Bio::Sequence::NA.new(RANDOM_SEQ) assert(Bio::SiRNA.new(naseq)) assert(Bio::SiRNA.new(naseq, 21)) assert(Bio::SiRNA.new(naseq, 21, 60.0)) assert(Bio::SiRNA.new(naseq, 21, 60.0, 40.0)) assert_raise(ArgumentError) { Bio::SiRNA.new(naseq, 21, 60.0, 40.0, 10.0) } end end class TestSiRNA < Test::Unit::TestCase def setup naseq = Bio::Sequence::NA.new(RANDOM_SEQ) @obj = Bio::SiRNA.new(naseq) end def test_antisense_size assert_equal(21, @obj.antisense_size) end def test_max_gc_percent assert_equal(60.0, @obj.max_gc_percent) end def test_min_gc_percent assert_equal(40.0, @obj.min_gc_percent) end def test_uitei? target = "aaGaa" assert_equal(false, @obj.uitei?(target)) target = "aaAaa" assert_equal(false, @obj.uitei?(target)) target = "G" * 9 assert_equal(false, @obj.uitei?(target)) end def test_reynolds? target = "G" * 9 assert_equal(false, @obj.reynolds?(target)) target = "aaaaAaaaaaaUaaAaaaaaAaa" assert_equal(true, @obj.reynolds?(target)) end def test_uitei assert(@obj.uitei) end def test_reynolds assert(@obj.reynolds) end def test_design assert(@obj.design) end def test_design_uitei assert(@obj.design('uitei')) end def test_design_reynolds assert(@obj.design('reynolds')) end end class TestSiRNAPairNew < Test::Unit::TestCase def test_new target = "" sense = "" antisense = "" start = 0 stop = 1 rule = 'rule' gc_percent = 60.0 assert_raise(ArgumentError) { Bio::SiRNA::Pair.new(target, sense, antisense, start, stop, rule) } assert(Bio::SiRNA::Pair.new(target, sense, antisense, start, stop, rule, gc_percent)) assert_raise(ArgumentError) { Bio::SiRNA::Pair.new(target, sense, antisense, start, stop, rule, gc_percent, "") } end end class TestSiRNAPair < Test::Unit::TestCase def setup naseq = Bio::Sequence::NA.new(RANDOM_SEQ) @obj = Bio::SiRNA.new(naseq).design.first end def test_target assert_equal("gcggacguaaggaguauuccugu", @obj.target) end def test_sense assert_equal("ggacguaaggaguauuccugu", @obj.sense) end def test_antisense assert_equal("aggaauacuccuuacguccgc", @obj.antisense) end def test_start assert_equal(9, @obj.start) end def test_stop assert_equal(32, @obj.stop) end def test_rule assert_equal("uitei", @obj.rule) end def test_gc_percent assert_equal(52.0, @obj.gc_percent) end def test_report report =< # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/analysis' require 'bio/sequence' module Bio; module TestRestrictionEnzyme #:nodoc: class TestAnalysis < Test::Unit::TestCase #:nodoc: def setup @enz = Bio::RestrictionEnzyme @t = Bio::RestrictionEnzyme::Analysis @obj_1 = @t.cut('cagagag', 'ag^ag') @obj_2 = @t.cut('agagag', 'ag^ag') @obj_3 = @t.cut('cagagagt', 'ag^ag') e1 = @enz.new('atgcatgc', [3,3]) @obj_4 = @t.cut('atgcatgcatgc', e1) @obj_4bd = @t.cut('atgcatgcatgccc', e1, 'cc^c') # mix of always cut and sometimes cut e2 = @enz.new('atgcatgc', [3,5]) @obj_5 = @t.cut('atgcatgcatgc', e2) e3 = @enz.new('anna', [1,1], [3,3]) e4 = @enz.new('gg', [1,1]) @obj_6 = @t.cut('agga', e3, e4) @obj_7 = @t.cut('gaccaggaaaaagaccaggaaagcctggaaaagttaac', 'EcoRII') @obj_7b = @t.cut('gaccaggaaaaagaccaggaaagcctggaaaagttaaccc', 'EcoRII', 'HincII', 'cc^c') @obj_7bd = @t.cut_without_permutations('gaccaggaaaaagaccaggaaagcctggaaaagttaaccc', 'EcoRII', 'HincII', 'cc^c') @obj_8 = @t.cut('gaccaggaaaaagaccaggaaagcctggaaaagttaac', 'EcoRII', 'HincII') @obj_9 = @t.cut('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'EcoRII') @obj_9 = @t.cut('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'EcoRII', 'HincII') @obj_1d = @t.cut_without_permutations('cagagag', 'ag^ag') @obj_2d = @t.cut_without_permutations('agagag', 'ag^ag') @obj_3d = @t.cut_without_permutations('cagagagt', 'ag^ag') e1 = @enz.new('atgcatgc', [3,3]) @obj_4d = @t.cut_without_permutations('atgcatgcatgc', e1) e2 = @enz.new('atgcatgc', [3,5]) @obj_5d = @t.cut_without_permutations('atgcatgcatgc', e2) e3 = @enz.new('anna', [1,1], [3,3]) e4 = @enz.new('gg', [1,1]) @obj_6d = @t.cut_without_permutations('agga', e3, e4) @obj_7d = @t.cut_without_permutations('gaccaggaaaaagaccaggaaagcctggaaaagttaac', 'EcoRII') @obj_8d = @t.cut_without_permutations('gaccaggaaaaagaccaggaaagcctggaaaagttaac', 'EcoRII', 'HincII') @obj_98 = @t.cut('', 'EcoRII', 'HincII') @obj_99 = @t.cut_without_permutations('', 'EcoRII', 'HincII') @obj_vr1 = @t.cut('gaccaggaaaaagaccaggaaagcctggaaaagttaac', 'EcoRII', {:view_ranges => true}) @obj_vr2 = @t.cut('cagagag', {:view_ranges => true}, 'ag^ag') end def test_cut assert_equal(["ag", "cag"], @obj_1.primary) assert_equal(["gtc", "tc"], @obj_1.complement) assert_equal(2, @obj_1.size) assert_equal(Bio::RestrictionEnzyme::Fragments, @obj_1.class) assert_equal(Bio::RestrictionEnzyme::Fragment, @obj_1[0].class) assert_equal(["ag"], @obj_2.primary) assert_equal(["ag", "agt", "cag"], @obj_3.primary) assert_equal(["atg", "atgcatg", "catg", "catgc"], @obj_4.primary) =begin A T G^C A T G C A T G C A T G C A T G C A T G^C A T G^C A T G C A T G C A T G^C A T G C =end assert_equal(["atg", "atgcatg", "catgc", "catgcatgc"], @obj_5.primary) assert_equal(["a", "ag", "g", "ga"], @obj_6.primary) assert_equal(["ccaggaaaaaga", "ccaggaaag", "cctggaaaagttaac", "ga"], @obj_7.primary) assert_equal(["aac", "ccaggaaaaaga", "ccaggaaag", "cctggaaaagtt", "ga"], @obj_8.primary) =begin e1 = @enz.new('atgcatgc', [3,3]) @obj_4bd = @t.cut('atgcatgcatgccc', e1, 'cc^c') # mix of sometimes cut and always cut [#, #, #, #, #] =end assert_equal(["atg", "atgcatg", "c", "catg", "catgcc"], @obj_4bd.primary) assert_equal(["gg", "gtac", "gtacg", "tac", "tacgtac"], @obj_4bd.complement) end def test_cut_without_permutations assert_equal(["ag", "cag"], @obj_1d.primary) assert_equal(["ag"], @obj_2d.primary) assert_equal(["ag", "agt", "cag"], @obj_3d.primary) assert_equal(["atg", "catg", "catgc"], @obj_4d.primary) assert_equal(["atg", "catg", "catgc"], @obj_5d.primary) assert_equal(["a", "g"], @obj_6d.primary) assert_equal(["ccaggaaaaaga", "ccaggaaag", "cctggaaaagttaac", "ga"], @obj_7d.primary) assert_equal(["aac", "ccaggaaaaaga", "ccaggaaag", "cctggaaaagtt", "ga"], @obj_8d.primary) end def test_cut_from_bio_sequence_na assert_equal(["ag", "cag"], Bio::Sequence::NA.new('cagagag').cut_with_enzyme('ag^ag').primary ) assert_equal(["ag", "cag"], Bio::Sequence::NA.new('cagagag').cut_with_enzymes('ag^ag').primary ) assert_equal(["ag", "cag"], Bio::Sequence::NA.new('cagagag').cut_with_enzymes('ag^ag', 'EcoRII').primary ) # Note how EcoRII needs extra padding on the beginning and ending of the # sequence 'ccagg' to make the match since the cut must occur between # two nucleotides and can not occur on the very end of the sequence. # # EcoRII: # :blunt: "0" # :c2: "5" # :c4: "0" # :c1: "-1" # :pattern: CCWGG # :len: "5" # :name: EcoRII # :c3: "0" # :ncuts: "2" # # -1 1 2 3 4 5 # 5' - n^c c w g g n - 3' # 3' - n g g w c c^n - 5' # # (w == [at]) assert_equal(["ag", "agccagg", "cag"], Bio::Sequence::NA.new('cagagagccagg').cut_with_enzymes('ag^ag', 'EcoRII').primary ) assert_equal(["ag", "agccagg", "cag"], Bio::Sequence::NA.new('cagagagccagg').cut_with_enzymes('ag^ag').primary ) assert_equal(:no_cuts_found, Bio::Sequence::NA.new('cagagagccagg').cut_with_enzymes('EcoRII') ) assert_equal(["ag", "ag", "cag", "ccaggt"], Bio::Sequence::NA.new('cagagagccaggt').cut_with_enzymes('ag^ag', 'EcoRII').primary ) assert_equal(["ag", "agccaggt", "cag"], Bio::Sequence::NA.new('cagagagccaggt').cut_with_enzymes('ag^ag').primary ) assert_equal(["cagagag", "ccaggt"], Bio::Sequence::NA.new('cagagagccaggt').cut_with_enzymes('EcoRII').primary ) assert_equal(["a", "gtctctcggtcc"], Bio::Sequence::NA.new('cagagagccaggt').cut_with_enzymes('EcoRII').complement ) end def test_view_ranges assert_equal(["ccaggaaaaaga", "ccaggaaag", "cctggaaaagttaac", "ga"], @obj_vr1.primary) assert_equal(["ctggtcc", "tttcggacc", "ttttcaattg", "tttttctggtcc"], @obj_vr1.complement) a0 = @obj_vr1[0] assert_equal('ga ', a0.primary) assert_equal('ctggtcc', a0.complement) assert_equal(0, a0.p_left) assert_equal(1, a0.p_right) assert_equal(0, a0.c_left) assert_equal(6, a0.c_right) a1 = @obj_vr1[1] assert_equal('ccaggaaaaaga ', a1.primary) assert_equal(' tttttctggtcc', a1.complement) assert_equal(2, a1.p_left) assert_equal(13, a1.p_right) assert_equal(7, a1.c_left) assert_equal(18, a1.c_right) a2 = @obj_vr1[2] assert_equal('ccaggaaag ', a2.primary) assert_equal(' tttcggacc', a2.complement) assert_equal(14, a2.p_left) assert_equal(22, a2.p_right) assert_equal(19, a2.c_left) assert_equal(27, a2.c_right) a3 = @obj_vr1[3] assert_equal('cctggaaaagttaac', a3.primary) assert_equal(' ttttcaattg', a3.complement) assert_equal(23, a3.p_left) assert_equal(37, a3.p_right) assert_equal(28, a3.c_left) assert_equal(37, a3.c_right) a4 = @obj_vr1[4] assert_equal(nil, a4) assert_equal(["ag", "ag", "cag"], @obj_vr2.primary) assert_equal(["gtc", "tc", "tc"], @obj_vr2.complement) a0 = @obj_vr2[0] assert_equal('cag', a0.primary) assert_equal('gtc', a0.complement) assert_equal(0, a0.p_left) assert_equal(2, a0.p_right) assert_equal(0, a0.c_left) assert_equal(2, a0.c_right) a1 = @obj_vr2[1] assert_equal('ag', a1.primary) assert_equal('tc', a1.complement) assert_equal(3, a1.p_left) assert_equal(4, a1.p_right) assert_equal(3, a1.c_left) assert_equal(4, a1.c_right) a2 = @obj_vr2[2] assert_equal('ag', a2.primary) assert_equal('tc', a2.complement) assert_equal(5, a2.p_left) assert_equal(6, a2.p_right) assert_equal(5, a2.c_left) assert_equal(6, a2.c_right) a3 = @obj_vr2[3] assert_equal(nil, a3) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb0000644000004100000410000001643512200110570027255 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb - Unit test for Bio::RestrictionEnzyme::SortedNumArray # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme' require 'bio/util/restriction_enzyme/sorted_num_array' module Bio module TestRestrictionEnzyme class TestSortedNumArray < Test::Unit::TestCase def setup @klass = Bio::RestrictionEnzyme::SortedNumArray @obj = @klass[14, 265, 4626, -1, 358, 159, 979, 3238, 3] end def test_self_bracket assert_equal([ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ], @obj.to_a) end def test_self_new a = @klass.new assert_instance_of(Bio::RestrictionEnzyme::SortedNumArray, a) end def test_dup assert_equal(@obj.to_a, @obj.dup.to_a) h_obj = @obj.instance_eval { internal_data_hash } h_dup = @obj.dup.instance_eval { internal_data_hash } assert(h_obj == h_dup) assert_not_equal(h_obj.__id__, h_dup.__id__) end def test_internal_data_hash h = @obj.instance_eval { internal_data_hash } expected = { -1 => true, 3 => true, 14 => true, 159 => true, 265 => true, 358 => true, 979 => true, 3238 => true, 4626 => true } assert_equal(expected, h) end def test_internal_data_hash_eq h = { 0 => true, 50 => true, 100 => true } @obj.last # creating cache (if exists) @obj.instance_eval { self.internal_data_hash = h } assert_equal(100, @obj.last) assert_equal([0, 50, 100], @obj.to_a) end #def test_private_clear_cache # assert_nothing_raised { # @obj.instance_eval { clear_cache } # } # @obj.last # creating cache # @obj.instance_eval { clear_cache } # assert_nil(@obj.instance_eval { @sorted_keys }) #end def test_private_sorted_keys a = @obj.instance_eval { sorted_keys } assert_equal([ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ], a) end def test_private_push_element assert_equal(false, @obj.include?(50)) @obj.instance_eval { push_element(50) } assert_equal(true, @obj.include?(50)) end def test_private_push_element_noeffect assert_equal(true, @obj.include?(159)) @obj.instance_eval { push_element(159) } assert_equal(true, @obj.include?(159)) end def test_private_push_element_last @obj.last # creating cache (if exists) @obj.instance_eval { push_element(9999) } assert_equal(true, @obj.include?(9999)) assert_equal(9999, @obj.last) end def test_private_push_element_intermediate @obj.last # creating cache (if exists) @obj.instance_eval { push_element(100) } assert_equal(true, @obj.include?(100)) assert_equal(4626, @obj.last) end def test_private_unshift_element assert_equal(false, @obj.include?(50)) @obj.instance_eval { unshift_element(50) } assert_equal(true, @obj.include?(50)) end def test_private_unshift_element_noeffect assert_equal(true, @obj.include?(159)) @obj.instance_eval { unshift_element(159) } assert_equal(true, @obj.include?(159)) end def test_private_unshift_element_first @obj.last # creating cache (if exists) @obj.instance_eval { unshift_element(-999) } assert_equal(true, @obj.include?(-999)) assert_equal(-999, @obj.first) end def test_private_unshift_element_intermediate @obj.last # creating cache (if exists) @obj.instance_eval { unshift_element(100) } assert_equal(true, @obj.include?(100)) assert_equal(-1, @obj.first) end def test_bracket assert_equal(-1, @obj[0]) assert_equal(159, @obj[3]) assert_equal(4626, @obj[-1]) assert_equal([14, 159, 265], @obj[2..4]) assert_equal([14, 159, 265], @obj[2,3]) end def test_bracket_eq assert_raise(NotImplementedError) { @obj[3] = 999 } end def test_each expected_values = [ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ] @obj.each do |i| assert_equal(expected_values.shift, i) end end def test_reverse_each expected_values = [ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ] @obj.reverse_each do |i| assert_equal(expected_values.pop, i) end end def test_plus obj2 = @klass[ 2, 3, 14, 15 ] assert_equal([ -1, 2, 3, 14, 15, 159, 265, 358, 979, 3238, 4626 ], (@obj + obj2).to_a) end def test_plus_error assert_raise(TypeError) { @obj + 2 } end def test_eqeq obj2 = @klass[ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ] assert_equal(true, @obj == obj2) end def test_eqeq_self assert_equal(true, @obj == @obj) end def test_eqeq_false obj2 = @klass[ 2, 3, 14, 15 ] assert_equal(false, @obj == obj2) end def test_eqeq_other obj2 = 'test' assert_equal(false, @obj == obj2) end def test_concat ary = [ 9999, -2, 14, 15 ] expected = [ -2, -1, 3, 14, 15, 159, 265, 358, 979, 3238, 4626, 9999 ] # checks if the method returns self assert_equal(@obj, @obj.concat(ary)) # checks the value assert_equal(expected, @obj.to_a) end def test_push expected = [ -2, -1, 3, 14, 15, 159, 265, 358, 979, 3238, 4626, 9999 ] # checks if the method returns self assert_equal(@obj, @obj.push(15, 14, -2, 9999)) # checks the value assert_equal(expected, @obj.to_a) end def test_unshift expected = [ -2, -1, 3, 14, 15, 159, 265, 358, 979, 3238, 4626, 9999 ] # checks if the method returns self assert_equal(@obj, @obj.unshift(15, 14, -2, 9999)) # checks the value assert_equal(expected, @obj.to_a) end def test_ltlt expected = [ -1, 3, 14, 15, 159, 265, 358, 979, 3238, 4626 ] # checks if the method returns self assert_equal(@obj, @obj << 15) # checks the value assert_equal(expected, @obj.to_a) end def test_ltlt_noeffect expected = [ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ] # checks if the method returns self assert_equal(@obj, @obj << 159) # checks the value assert_equal(expected, @obj.to_a) end def test_include? assert_equal(true, @obj.include?(159)) assert_equal(false, @obj.include?(999)) end def test_size assert_equal(9, @obj.size) end def test_length assert_equal(9, @obj.length) end def test_delete assert_equal(nil, @obj.delete(100)) assert_equal(159, @obj.delete(159)) end def test_sort! assert_equal(@obj, @obj.sort!) end def test_uniq! assert_equal(@obj, @obj.uniq!) end def test_to_a expected = [ -1, 3, 14, 159, 265, 358, 979, 3238, 4626 ] assert_equal(expected, @obj.to_a) end end #class TestSortedNumArray end #module TestRestrictionEnzyme end #module Bio bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/single_strand/0000755000004100000410000000000012200110570025137 5ustar www-datawww-data././@LongLink0000000000000000000000000000015400000000000011565 Lustar rootrootbio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rbbio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_nota0000644000004100000410000000462412200110570034073 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb - Unit test for Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation' module Bio; module TestRestrictionEnzyme #:nodoc: class TestSingleStrandCutLocationsInEnzymeNotation < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation @obj_1 = @t.new([-2,1,3]) @obj_2 = @t.new(-2,1,3) @obj_3 = @t.new(7,1,3) @obj_4 = @t.new(-7,-8,-2,1,3) end def test_max assert_equal(3, @obj_1.max) assert_equal(3, @obj_2.max) assert_equal(7, @obj_3.max) end def test_min assert_equal(-2, @obj_1.min) assert_equal(-2, @obj_2.min) assert_equal(1, @obj_3.min) end def test_to_array_index assert_equal([0,2,4], @obj_1.to_array_index) assert_equal([0,2,4], @obj_2.to_array_index) assert_equal([0,2,6], @obj_3.to_array_index) assert_equal([0, 1, 6, 8, 10], @obj_4.to_array_index) end def test_initialize_with_pattern @obj_5 = @t.new('n^ng^arraxt^n') @obj_6 = @t.new('g^arraxt^n') @obj_7 = @t.new('nnn^nn^nga^rraxt^nn') @obj_8 = @t.new('^g^arraxt^n') assert_equal([-2,1,7], @obj_5) assert_equal([0,2,8], @obj_5.to_array_index) assert_equal([1,7], @obj_6) assert_equal([0,6], @obj_6.to_array_index) assert_equal([-4, -2, 2, 7], @obj_7) assert_equal([0, 2, 5, 10], @obj_7.to_array_index) assert_equal([-1,1,7], @obj_8) assert_equal([0,1,7], @obj_8.to_array_index) end def test_argument_error assert_raise(ArgumentError) { @t.new([0,1,2]) } assert_raise(ArgumentError) { @t.new(0,1,2,0) } assert_raise(ArgumentError) { @t.new([nil,1,2]) } assert_raise(ArgumentError) { @t.new(nil,1,2,nil) } assert_raise(ArgumentError) { @t.new([1,1,2]) } assert_raise(ArgumentError) { @t.new(1,1,2,2) } end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/0000755000004100000410000000000012200110570025441 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb0000644000004100000410000000650712200110570032176 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb - Unit test for Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/util/restriction_enzyme/double_stranded/aligned_strands' require 'bio/util/restriction_enzyme/double_stranded' module Bio; module TestRestrictionEnzyme #:nodoc: class TestDoubleStrandedAlignedStrands < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands @s = Bio::Sequence::NA @ds = Bio::RestrictionEnzyme::DoubleStranded @s_1 = @s.new('gattaca') @s_2 = @s_1.forward_complement @s_3 = @s.new('tttttttnnn') @s_4 = @s.new('nnnaaaaaaa') @ds_1 = @ds.new('nnnn^ngattacann^nn^n') @obj_1 = @t.align(@s_1, @s_2) @obj_2 = @t.align(@s_1, @s_3) @obj_3 = @t.align(@s_1, @s_4) @obj_4 = @t.align(@s_3, @s_4) @obj_5 = @t.align(@ds_1.primary, @ds_1.complement) @obj_8 = @t.align_with_cuts(@ds_1.primary, @ds_1.complement, @ds_1.primary.cut_locations, @ds_1.complement.cut_locations) @obj_6 = @t.align_with_cuts(@s_1, @s_2, [1,2], [3,4]) @obj_7 = @t.align_with_cuts(@s_3, @s_4, [1,2], [3,4]) end def test_ds assert_equal('nngattacannnnn', @ds_1.primary) assert_equal('nnnnnctaatgtnn', @ds_1.complement) assert_equal( 'n^ngattacann^nn^n', @ds_1.primary.with_cut_symbols) assert_equal('n^nn^nnctaatgtn^n' , @ds_1.complement.with_cut_symbols) assert_equal([0, 10, 12], @ds_1.primary.cut_locations) assert_equal([0, 2, 12], @ds_1.complement.cut_locations) end def test_align assert_equal('gattaca', @obj_1.primary) assert_equal('ctaatgt', @obj_1.complement) assert_equal('gattacannn', @obj_2.primary) assert_equal('tttttttnnn', @obj_2.complement) assert_equal('nnngattaca', @obj_3.primary) assert_equal('nnnaaaaaaa', @obj_3.complement) assert_equal('nnntttttttnnn', @obj_4.primary) assert_equal('nnnaaaaaaannn', @obj_4.complement) assert_equal('nnnnngattacannnnn', @obj_5.primary) assert_equal('nnnnnctaatgtnnnnn', @obj_5.complement) end def test_align_with_cuts assert_equal('g a^t^t a c a', @obj_6.primary) assert_equal('c t a a^t^g t', @obj_6.complement) # Looks incorrect at first, but this is deliberate. # The correct cuts need to be supplied by the user. assert_equal('n n n t t^t^t t t t n n n', @obj_7.primary) assert_equal('n n n a^a^a a a a a n n n', @obj_7.complement) assert_equal('n n n n^n g a t t a c a n n^n n^n', @obj_8.primary) assert_equal('n^n n^n n c t a a t g t n^n n n n', @obj_8.complement) end def test_argument_error assert_raise(ArgumentError) { @t.new('arg', 'agg') } assert_raise(ArgumentError) { @t.new(@s.new('arg'), 'agg') } assert_raise(ArgumentError) { @t.new('arg', @s.new('agg')) } assert_raise(ArgumentError) { @t.new(@s.new('argg'), @s.new('agg')) } end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb0000644000004100000410000000424712200110570032532 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb - Unit test for Bio::RestrictionEnzyme::DoubleStranded::CutLocationPair # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/double_stranded/cut_location_pair' module Bio; module TestRestrictionEnzyme #:nodoc: class TestDoubleStrandedCutLocationPair < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::DoubleStranded::CutLocationPair @obj_1 = @t.new([3,5]) @obj_2 = @t.new(3, 5) @obj_3 = @t.new((3..5)) @obj_4 = @t.new(0..5) @obj_5 = @t.new(3) @obj_6 = @t.new(nil,3) @obj_7 = @t.new(3,nil) end def test_contents assert_equal([3,5], @obj_1) assert_equal([3,5], @obj_2) assert_equal([3,5], @obj_3) assert_equal([0,5], @obj_4) assert_equal([3,nil], @obj_5) assert_equal([nil,3], @obj_6) assert_equal([3,nil], @obj_7) end def test_primary assert_equal(3, @obj_1.primary) assert_equal(3, @obj_2.primary) assert_equal(3, @obj_3.primary) assert_equal(0, @obj_4.primary) assert_equal(3, @obj_5.primary) assert_equal(nil, @obj_6.primary) assert_equal(3, @obj_7.primary) end def test_complement assert_equal(5, @obj_1.complement) assert_equal(5, @obj_2.complement) assert_equal(5, @obj_3.complement) assert_equal(5, @obj_4.complement) assert_equal(nil, @obj_5.complement) assert_equal(3, @obj_6.complement) assert_equal(nil, @obj_7.complement) end def test_argument_error assert_raise(ArgumentError) { @t.new([3,5,6]) } assert_raise(ArgumentError) { @t.new(3,-1) } assert_raise(ArgumentError) { @t.new(-3,1) } assert_raise(ArgumentError) { @t.new(nil,nil) } assert_raise(ArgumentError) { @t.new('3',5) } end end end; end ././@LongLink0000000000000000000000000000016200000000000011564 Lustar rootrootbio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rbbio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzym0000644000004100000410000000431112200110570034030 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb - Unit test for Bio::RestrictionEnzyme::DoubleStranded::CutLocationPairInEnzymeNotation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation' module Bio; module TestRestrictionEnzyme #:nodoc: class TestDoubleStrandedCutLocationPairInEnzymeNotation < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::DoubleStranded::CutLocationPairInEnzymeNotation @obj_1 = @t.new([3,5]) @obj_2 = @t.new(3, 5) @obj_3 = @t.new((3..5)) @obj_4 = @t.new(-3..5) @obj_5 = @t.new(3) @obj_6 = @t.new(nil,3) @obj_7 = @t.new(3,nil) end def test_contents assert_equal([3,5], @obj_1) assert_equal([3,5], @obj_2) assert_equal([3,5], @obj_3) assert_equal([-3,5], @obj_4) assert_equal([3,nil], @obj_5) assert_equal([nil,3], @obj_6) assert_equal([3,nil], @obj_7) end def test_primary assert_equal(3, @obj_1.primary) assert_equal(3, @obj_2.primary) assert_equal(3, @obj_3.primary) assert_equal(-3, @obj_4.primary) assert_equal(3, @obj_5.primary) assert_equal(nil, @obj_6.primary) assert_equal(3, @obj_7.primary) end def test_complement assert_equal(5, @obj_1.complement) assert_equal(5, @obj_2.complement) assert_equal(5, @obj_3.complement) assert_equal(5, @obj_4.complement) assert_equal(nil, @obj_5.complement) assert_equal(3, @obj_6.complement) assert_equal(nil, @obj_7.complement) end def test_argument_error assert_raise(ArgumentError) { @t.new([3,5,6]) } assert_raise(ArgumentError) { @t.new(0,1) } assert_raise(ArgumentError) { @t.new(0,0) } assert_raise(ArgumentError) { @t.new('3',5) } end end end; end ././@LongLink0000000000000000000000000000015600000000000011567 Lustar rootrootbio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rbbio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_no0000644000004100000410000000617712200110570034055 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb - Unit test for Bio::RestrictionEnzyme::DoubleStranded::CutLocationsInEnzymeNotation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation' module Bio; module TestRestrictionEnzyme #:nodoc: class TestDoubleStrandedCutLocationsInEnzymeNotation < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::DoubleStranded::CutLocationPairInEnzymeNotation @tt = Bio::RestrictionEnzyme::DoubleStranded::CutLocationsInEnzymeNotation @obj_1 = @t.new([3,5]) @obj_2 = @t.new(3, 5) @obj_3 = @t.new((3..5)) @obj_4 = @t.new(-3..5) @obj_5 = @t.new(3) @obj_6 = @t.new(nil,3) @obj_7 = @t.new(3,nil) @obj_8 = @t.new(-8, -7) @locations = @tt.new(@obj_1, @obj_2, @obj_3, @obj_4, @obj_5, @obj_6, @obj_7, @obj_8) @loc_2 = @tt.new(@t.new(-2,-2), @t.new(1,1)) @loc_3 = @tt.new(@t.new(1,2)) end def test_contents assert_equal([3,5], @locations[0]) assert_equal([3,nil], @locations[-2]) end def test_primary assert_equal([3, 3, 3, -3, 3, nil, 3, -8], @locations.primary) end def test_complement assert_equal([5, 5, 5, 5, nil, 3, nil, -7], @locations.complement) end def test_primary_to_array_index assert_equal([10, 10, 10, 5, 10, nil, 10, 0], @locations.primary_to_array_index) assert_equal([0,2], @loc_2.primary_to_array_index) assert_equal([0], @loc_3.primary_to_array_index) end def test_primary_to_array_index_class assert_equal(Array, @locations.primary_to_array_index.class) assert_equal(Array, @loc_2.primary_to_array_index.class) end def test_complement_to_array_index assert_equal([12, 12, 12, 12, nil, 10, nil, 1], @locations.complement_to_array_index) assert_equal([0,2], @loc_2.complement_to_array_index) assert_equal([1], @loc_3.complement_to_array_index) end def test_complement_to_array_index_class assert_equal(Array, @locations.complement_to_array_index.class) assert_equal(Array, @loc_2.complement_to_array_index.class) end def test_to_array_index assert_equal( [ [10, 12], [10, 12], [10, 12], [5, 12], [10, nil], [nil, 10], [10, nil], [0, 1] ], @locations.to_array_index) assert_equal( [ [0, 0], [2, 2], ], @loc_2.to_array_index) assert_equal([[0,1]], @loc_3.to_array_index) end def test_to_array_index_class assert_equal(Bio::RestrictionEnzyme::DoubleStranded::CutLocations, @locations.to_array_index.class) assert_equal(Bio::RestrictionEnzyme::DoubleStranded::CutLocations, @loc_2.to_array_index.class) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb0000644000004100000410000000274612200110570031704 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb - Unit test for Bio::RestrictionEnzyme::DoubleStranded::CutLocations # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/double_stranded/cut_locations' module Bio; module TestRestrictionEnzyme #:nodoc: class TestDoubleStrandedCutLocations < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::DoubleStranded::CutLocationPair @tt = Bio::RestrictionEnzyme::DoubleStranded::CutLocations @obj_1 = @t.new([3,5]) @obj_2 = @t.new(3, 5) @obj_3 = @t.new((3..5)) @obj_4 = @t.new(3..5) @obj_5 = @t.new(3) @obj_6 = @t.new(nil,3) @obj_7 = @t.new(3,nil) @locations = @tt.new(@obj_1, @obj_2, @obj_3, @obj_4, @obj_5, @obj_6, @obj_7) end def test_contents assert_equal([3,5], @locations[0]) assert_equal([3,nil], @locations[-1]) end def test_primary assert_equal([3, 3, 3, 3, 3, nil, 3], @locations.primary) end def test_complement assert_equal([5, 5, 5, 5, nil, 3, nil], @locations.complement) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb0000644000004100000410000001201512200110570027034 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb - Unit test for Bio::RestrictionEnzyme::DenseIntArray # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme' require 'bio/util/restriction_enzyme/dense_int_array' module Bio module TestRestrictionEnzyme class TestDenseIntArray < Test::Unit::TestCase def setup @klass = Bio::RestrictionEnzyme::DenseIntArray @obj = @klass[ -1, 11, 12, 13, 14, 15, 50, 60 ] end def test_self_bracket assert_equal([ -1, 11, 12, 13, 14, 15, 50, 60 ], @obj.to_a) end def test_self_new a = @klass.new assert_instance_of(Bio::RestrictionEnzyme::DenseIntArray, a) end def test_dup assert_equal(@obj.to_a, @obj.dup.to_a) d_obj = @obj.instance_eval { internal_data } d_dup = @obj.dup.instance_eval { internal_data } assert(d_obj == d_dup) assert_not_equal(d_obj.__id__, d_dup.__id__) end def test_internal_data d = @obj.instance_eval { internal_data } r = @klass::MutableRange expected = [ r.new(-1, -1), r.new(11, 15), r.new(50, 50), r.new(60, 60) ] assert_equal(expected, d) end def test_internal_data_eq r = @klass::MutableRange d = [ r.new(-2, -2), r.new(50, 50), r.new(65, 70) ] @obj.instance_eval { self.internal_data = d } assert_equal(70, @obj.last) assert_equal([-2, 50, 65, 66, 67, 68, 69, 70], @obj.to_a) end def test_bracket assert_equal(-1, @obj[0]) assert_equal(13, @obj[3]) assert_equal(60, @obj[-1]) assert_equal([-1, 11, 12], @obj[0..2]) assert_equal([14, 15, 50], @obj[4,3]) end def test_bracket_eq assert_raise(NotImplementedError) { @obj[3] = 999 } end def test_each expected_values = [ -1, 11, 12, 13, 14, 15, 50, 60 ] @obj.each do |i| assert_equal(expected_values.shift, i) end end def test_reverse_each expected_values = [ -1, 11, 12, 13, 14, 15, 50, 60 ] @obj.reverse_each do |i| assert_equal(expected_values.pop, i) end end def test_plus obj2 = @klass[ 9, 10, 11, 12, 30 ] assert_equal([ -1, 9, 10, 11, 12, 13, 14, 15, 30, 50, 60 ], (@obj + obj2).to_a) end def test_plus_error assert_raise(TypeError) { @obj + 2 } end def test_eqeq obj2 = @klass[ -1, 11, 12, 13, 14, 15, 50, 60 ] assert_equal(true, @obj == obj2) end def test_eqeq_self assert_equal(true, @obj == @obj) end def test_eqeq_false obj2 = @klass[ 2, 3, 14, 15 ] assert_equal(false, @obj == obj2) end def test_eqeq_other obj2 = 'test' assert_equal(false, @obj == obj2) end def test_concat ary = [ 61, 62, -2, 14, 15 ] expected = [ -1, 11, 12, 13, 14, 15, 50, 60, 61, 62, -2, 14, 15 ] # checks if the method returns self assert_equal(@obj, @obj.concat(ary)) # checks the value assert_equal(expected, @obj.to_a) end def test_push expected = [ -1, 11, 12, 13, 14, 15, 50, 60, 61, 62, -2, 14, 15 ] # checks if the method returns self assert_equal(@obj, @obj.push(61, 62, -2, 14, 15)) # checks the value assert_equal(expected, @obj.to_a) end def test_unshift assert_raise(NotImplementedError) { @obj.unshift(-5, -2) } end def test_ltlt expected = [ -1, 11, 12, 13, 14, 15, 50, 60, 61 ] # checks if the method returns self assert_equal(@obj, @obj << 61) # checks the value assert_equal(expected, @obj.to_a) end def test_ltlt_larger expected = [ -1, 11, 12, 13, 14, 15, 50, 60, 70 ] # checks if the method returns self assert_equal(@obj, @obj << 70) # checks the value assert_equal(expected, @obj.to_a) end def test_ltlt_middle expected = [ -1, 11, 12, 13, 14, 15, 50, 60, 30 ] # checks if the method returns self assert_equal(@obj, @obj << 30) # checks the value assert_equal(expected, @obj.to_a) end def test_include? assert_equal(true, @obj.include?(13)) assert_equal(false, @obj.include?(999)) end def test_size assert_equal(8, @obj.size) end def test_length assert_equal(8, @obj.length) end def test_delete assert_raise(NotImplementedError) { @obj.delete(11) } end def test_sort! assert_equal(@obj, @obj.sort!) end def test_uniq! assert_equal(@obj, @obj.uniq!) end def test_to_a expected = [ -1, 11, 12, 13, 14, 15, 50, 60 ] assert_equal(expected, @obj.to_a) end end #class TestDenseIntArray end #module TestRestrictionEnzyme end #module Bio bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb0000644000004100000410000000751712200110570027037 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_double_stranded.rb - Unit test for Bio::RestrictionEnzyme::DoubleStranded # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/double_stranded' require 'bio/sequence' module Bio; module TestRestrictionEnzyme #:nodoc: class TestDoubleStranded < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::DoubleStranded @cl = Bio::RestrictionEnzyme::DoubleStranded::CutLocationPairInEnzymeNotation @s = String @obj_1 = @t.new(@s.new('gata'), [1,2]) @obj_2 = @t.new('gata', [1,2]) @obj_3 = @t.new('garraxt', [1,2]) @obj_4 = @t.new('nnnnnnngarraxtnn', [1,2]) @obj_5 = @t.new('garraxt', @cl.new(3,2), @cl.new(-2,-1), @cl.new(9,11)) @obj_6 = @t.new('garraxt', @cl.new(3,2)) @obj_7 = @t.new('garraxt', @cl.new(3,2), @cl.new(9,11)) # @obj_8 = @t.new('garraxt', 3..2, 9..11) @obj_9 = @t.new('garraxt', [3,2], [9,11]) @obj_10 = @t.new('garraxt', [3,2], [9,11]) @obj_11 = @t.new('n^ngar^raxtnn^n') @obj_12 = @t.new('nnnn^ngar^raxtnn^nnnn') @obj_13 = @t.new(Bio::RestrictionEnzyme.rebase['EcoRII']) @obj_14 = @t.new('EcoRII') @obj_15 = @t.new('ecorii') end def test_primary assert_equal('nngarraxtnnn', @obj_5.primary) end def test_primary_with_cut_symbols assert_equal('n^ngar^raxtnn^n', @obj_5.primary.with_cut_symbols) assert_equal('gar^raxt', @obj_6.primary.with_cut_symbols) assert_equal('gar^raxtnn^n', @obj_7.primary.with_cut_symbols) # assert_equal('gar^raxtnn^n', @obj_8.primary.with_cut_symbols) assert_equal('gar^raxtnn^n', @obj_9.primary.with_cut_symbols) assert_equal('gar^raxtnn^n', @obj_10.primary.with_cut_symbols) assert_equal('n^ngar^raxtnn^n', @obj_11.primary.with_cut_symbols) assert_equal('n^ngar^raxtnn^n', @obj_12.primary.with_cut_symbols) assert_equal('n^ccwgg', @obj_13.primary.with_cut_symbols) assert_equal('n^ccwgg', @obj_14.primary.with_cut_symbols) assert_equal('n^ccwgg', @obj_15.primary.with_cut_symbols) end def test_complement_with_cut_symbols assert_equal('n^ct^yytxannnn^n', @obj_5.complement.with_cut_symbols) assert_equal('ct^yytxa', @obj_6.complement.with_cut_symbols) assert_equal('ct^yytxannnn^n', @obj_7.complement.with_cut_symbols) # assert_equal('ct^yytxannnn^n', @obj_8.complement.with_cut_symbols) assert_equal('ct^yytxannnn^n', @obj_9.complement.with_cut_symbols) assert_equal('ct^yytxannnn^n', @obj_10.complement.with_cut_symbols) assert_equal('n^nnctyy^txan^n', @obj_11.complement.with_cut_symbols) assert_equal('n^nnctyy^txan^n', @obj_12.complement.with_cut_symbols) assert_equal('ggwcc^n', @obj_13.complement.with_cut_symbols) assert_equal('ggwcc^n', @obj_14.complement.with_cut_symbols) assert_equal('ggwcc^n', @obj_15.complement.with_cut_symbols) end def test_complement assert_equal('nctyytxannnnn', @obj_5.complement) end def test_cut_locations assert_equal([[4, 3], [0, 1], [10, 12]], @obj_5.cut_locations) end def test_cut_locations_in_enzyme_notation assert_equal([[3, 2], [-2, -1], [9, 11]], @obj_5.cut_locations_in_enzyme_notation) end def test_argument_error assert_raise(ArgumentError) { @t.new('garraxt', [3,2,9,11]) } assert_raise(ArgumentError) { @t.new(Bio::RestrictionEnzyme.rebase['ecorii'] )} assert_raise(ArgumentError) { @t.new(Bio::RestrictionEnzyme.rebase['EzzRII']) } end # NOTE def test_to_re end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/analysis/0000755000004100000410000000000012200110570024126 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb0000644000004100000410000002433012200110570030653 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb - Unit test for Bio::RestrictionEnzyme::Analysis::CalculatedCuts # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/range/sequence_range/calculated_cuts' require 'bio/util/restriction_enzyme/range/cut_range' require 'bio/util/restriction_enzyme/range/cut_ranges' require 'bio/util/restriction_enzyme/range/horizontal_cut_range' require 'bio/util/restriction_enzyme/range/vertical_cut_range' module Bio; module TestRestrictionEnzyme #:nodoc: class TestAnalysisCalculatedCuts < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts @vcr = Bio::RestrictionEnzyme::Range::VerticalCutRange @crs = Bio::RestrictionEnzyme::Range::CutRanges @hcr = Bio::RestrictionEnzyme::Range::HorizontalCutRange #a.add_cut_range(p_cut_left, p_cut_right, c_cut_left, c_cut_right ) @obj_1 = @t.new(6) @obj_1.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,nil,nil,3), @vcr.new(nil,2,nil,nil)] )) @obj_1b = @obj_1.dup @obj_1b.remove_incomplete_cuts @obj_2 = @t.new(6) @obj_2.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,2,nil,nil), @vcr.new(3,nil,4,nil)] )) @obj_2b = @obj_2.dup @obj_2b.remove_incomplete_cuts @obj_3 = @t.new(6) @obj_3.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,2,nil,nil), @vcr.new(3,nil,4,nil)] )) @obj_3.add_cuts_from_cut_ranges( @crs.new( [@hcr.new(0), @hcr.new(5)] )) @obj_3b = @obj_3.dup @obj_3b.remove_incomplete_cuts @obj_4 = @t.new(6) @obj_4.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,2,1,3)] )) @obj_4b = @obj_4.dup @obj_4b.remove_incomplete_cuts # Same thing, declared a different way @obj_4_c1 = @t.new(6) @obj_4_c1.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(nil,nil,1,3), @vcr.new(0,2,nil,nil)] )) @obj_4b_c1 = @obj_4_c1.dup @obj_4b_c1.remove_incomplete_cuts # Same thing, declared a different way @obj_4_c2 = @t.new(6) @obj_4_c2.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,nil,nil,3), @vcr.new(nil,2,1,nil)] )) @obj_4b_c2 = @obj_4_c2.dup @obj_4b_c2.remove_incomplete_cuts @obj_5 = @t.new(6) @obj_5.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,nil,nil,nil), @vcr.new(nil,4,3,nil), @hcr.new(1,2)] )) @obj_5b = @obj_5.dup @obj_5b.remove_incomplete_cuts @obj_6 = @t.new(6) @obj_6.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(nil,nil,0,nil), @hcr.new(1,2), @vcr.new(nil,4,3,nil)] )) @obj_6b = @obj_6.dup @obj_6b.remove_incomplete_cuts @obj_7 = @t.new(6) @obj_7.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(nil,2,nil,nil), @hcr.new(0,2)] )) @obj_7b = @obj_7.dup @obj_7b.remove_incomplete_cuts @obj_8 = @t.new(12) @obj_8.add_cuts_from_cut_ranges( @crs.new( [@hcr.new(0,1), @vcr.new(nil,nil,nil,5), @hcr.new(7,8), @hcr.new(10), @vcr.new(nil,10,nil,nil)] )) @obj_8b = @obj_8.dup @obj_8b.remove_incomplete_cuts @obj_9 = @t.new(6) @obj_9.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(nil,3,nil,3)] )) @obj_9b = @obj_9.dup @obj_9b.remove_incomplete_cuts @obj_10 = @t.new(6) @obj_10.add_cuts_from_cut_ranges( @crs.new( [@vcr.new(0,nil,nil,3), @vcr.new(nil,2,nil,2)] )) @obj_10b = @obj_10.dup @obj_10b.remove_incomplete_cuts end def test_cuts x = @obj_1 assert_equal([0,2], x.vc_primary) assert_equal([3], x.vc_complement) assert_equal([1,2,3], x.hc_between_strands) x = @obj_2 assert_equal([0,2,3], x.vc_primary) assert_equal([4], x.vc_complement) assert_equal([1,2,4], x.hc_between_strands) x = @obj_3 assert_equal([0,2,3], x.vc_primary) assert_equal([4], x.vc_complement) assert_equal([0,1,2,4,5], x.hc_between_strands) x = @obj_4 assert_equal([0,2], x.vc_primary) assert_equal([1,3], x.vc_complement) assert_equal([1,2,3], x.hc_between_strands) x = @obj_4_c1 assert_equal([0,2], x.vc_primary) assert_equal([1,3], x.vc_complement) assert_equal([1,2,3], x.hc_between_strands) x = @obj_4_c2 assert_equal([0,2], x.vc_primary) assert_equal([1,3], x.vc_complement) assert_equal([1,2,3], x.hc_between_strands) x = @obj_5 assert_equal([0,4], x.vc_primary) assert_equal([3], x.vc_complement) assert_equal([1,2,4], x.hc_between_strands) x = @obj_6 assert_equal([4], x.vc_primary) assert_equal([0,3], x.vc_complement) assert_equal([1,2,4], x.hc_between_strands) x = @obj_7 assert_equal([2], x.vc_primary) assert_equal([], x.vc_complement) assert_equal([0,1,2], x.hc_between_strands) x = @obj_8 assert_equal([10], x.vc_primary) assert_equal([5], x.vc_complement) assert_equal([0,1,7,8,10], x.hc_between_strands) x = @obj_9 assert_equal([3], x.vc_primary) assert_equal([3], x.vc_complement) assert_equal([], x.hc_between_strands) x = @obj_10 assert_equal([0,2], x.vc_primary) assert_equal([2,3], x.vc_complement) assert_equal([1,2,3], x.hc_between_strands) end def test_cuts_after_remove_incomplete_cuts x = @obj_1b assert_equal([0,2], x.vc_primary) assert_equal([3], x.vc_complement) assert_equal([1,2,3], x.hc_between_strands) end def test_strands_for_display_current #check object_id end def test_strands_for_display x = @obj_1 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +---+-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_1b assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +---+-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_2 assert_equal('0|1 2|3|4 5', x.strands_for_display[0]) assert_equal(' +---+ +-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3 4|5', x.strands_for_display[2]) x = @obj_2b assert_equal('0|1 2|3|4 5', x.strands_for_display[0]) assert_equal(' +---+ +-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3 4|5', x.strands_for_display[2]) x = @obj_3 assert_equal('0|1 2|3|4 5', x.strands_for_display[0]) assert_equal('-+---+ +-+-', x.strands_for_display[1]) assert_equal('0 1 2 3 4|5', x.strands_for_display[2]) x = @obj_3b assert_equal('0|1 2|3|4 5', x.strands_for_display[0]) assert_equal('-+---+ +-+-', x.strands_for_display[1]) assert_equal('0 1 2 3 4|5', x.strands_for_display[2]) x = @obj_4 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +-+-+-+ ', x.strands_for_display[1]) assert_equal('0 1|2 3|4 5', x.strands_for_display[2]) x = @obj_4b assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +-+-+-+ ', x.strands_for_display[1]) assert_equal('0 1|2 3|4 5', x.strands_for_display[2]) x = @obj_4_c1 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +-+-+-+ ', x.strands_for_display[1]) assert_equal('0 1|2 3|4 5', x.strands_for_display[2]) x = @obj_4b_c1 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +-+-+-+ ', x.strands_for_display[1]) assert_equal('0 1|2 3|4 5', x.strands_for_display[2]) x = @obj_4_c2 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +-+-+-+ ', x.strands_for_display[1]) assert_equal('0 1|2 3|4 5', x.strands_for_display[2]) x = @obj_4b_c2 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +-+-+-+ ', x.strands_for_display[1]) assert_equal('0 1|2 3|4 5', x.strands_for_display[2]) x = @obj_5 assert_equal('0|1 2 3 4|5', x.strands_for_display[0]) assert_equal(' +--- +-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_5b assert_equal('0 1 2 3 4|5', x.strands_for_display[0]) assert_equal(' +-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_6 assert_equal('0 1 2 3 4|5', x.strands_for_display[0]) assert_equal(' +--- +-+ ', x.strands_for_display[1]) assert_equal('0|1 2 3|4 5', x.strands_for_display[2]) x = @obj_6b assert_equal('0 1 2 3 4|5', x.strands_for_display[0]) assert_equal(' +-+ ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_7 assert_equal('0 1 2|3 4 5', x.strands_for_display[0]) assert_equal('-----+ ', x.strands_for_display[1]) assert_equal('0 1 2 3 4 5', x.strands_for_display[2]) x = @obj_7b assert_equal('0 1 2|3 4 5', x.strands_for_display[0]) assert_equal('-----+ ', x.strands_for_display[1]) assert_equal('0 1 2 3 4 5', x.strands_for_display[2]) x = @obj_8 assert_equal('0 1 2 3 4 5 6 7 8 9 0|1', x.strands_for_display[0]) assert_equal('--- + --- -+ ', x.strands_for_display[1]) assert_equal('0 1 2 3 4 5|6 7 8 9 0 1', x.strands_for_display[2]) x = @obj_8b assert_equal('0 1 2 3 4 5 6 7 8 9 0 1', x.strands_for_display[0]) assert_equal(' ', x.strands_for_display[1]) assert_equal('0 1 2 3 4 5 6 7 8 9 0 1', x.strands_for_display[2]) x = @obj_9 assert_equal('0 1 2 3|4 5', x.strands_for_display[0]) assert_equal(' + ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_9b assert_equal('0 1 2 3|4 5', x.strands_for_display[0]) assert_equal(' + ', x.strands_for_display[1]) assert_equal('0 1 2 3|4 5', x.strands_for_display[2]) x = @obj_10 assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +---+-+ ', x.strands_for_display[1]) assert_equal('0 1 2|3|4 5', x.strands_for_display[2]) x = @obj_10b assert_equal('0|1 2|3 4 5', x.strands_for_display[0]) assert_equal(' +---+-+ ', x.strands_for_display[1]) assert_equal('0 1 2|3|4 5', x.strands_for_display[2]) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb0000644000004100000410000000671312200110570027653 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb - Unit test for Bio::RestrictionEnzyme::Analysis::SequenceRange # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/range/sequence_range' require 'bio/util/restriction_enzyme/range/sequence_range/fragments' require 'bio/util/restriction_enzyme/range/cut_range' require 'bio/util/restriction_enzyme/range/horizontal_cut_range' require 'bio/util/restriction_enzyme/range/vertical_cut_range' require 'bio/util/restriction_enzyme/range/cut_ranges' module Bio; module TestRestrictionEnzyme #:nodoc: class TestCutRanges < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::Range::SequenceRange @fs = Bio::RestrictionEnzyme::Range::SequenceRange::Fragments #a.add_cut_range(p_cut_left, p_cut_right, c_cut_left, c_cut_right ) @vcr = Bio::RestrictionEnzyme::Range::VerticalCutRange @crs = Bio::RestrictionEnzyme::Range::CutRanges @hcr = Bio::RestrictionEnzyme::Range::HorizontalCutRange @obj_2 = @crs.new( [@vcr.new(0,2,nil,nil), @vcr.new(3,nil,4,nil)] ) @obj_3 = @crs.new( [@vcr.new(0,2,nil,nil), @vcr.new(3,nil,4,nil), @hcr.new(0), @hcr.new(5)] ) @obj_7 = @crs.new( [@vcr.new(nil,2,nil,nil), @hcr.new(0,2)] ) @obj_z = @crs.new( [@vcr.new(nil,2,nil,5), @hcr.new(1,6)] ) end def test_obj_z assert_equal(6, @obj_z.max) assert_equal(1, @obj_z.min) assert_equal(2, @obj_z.min_vertical) assert_equal(5, @obj_z.max_vertical) assert_equal(true, @obj_z.include?(6)) assert_equal(true, @obj_z.include?(4)) assert_equal(true, @obj_z.include?(2)) assert_equal(false, @obj_z.include?(-1)) assert_equal(false, @obj_z.include?(0)) assert_equal(false, @obj_z.include?(7)) end def test_obj_7 assert_equal(2, @obj_7.max) assert_equal(0, @obj_7.min) assert_equal(2, @obj_7.min_vertical) assert_equal(2, @obj_7.max_vertical) assert_equal(true, @obj_7.include?(0)) assert_equal(true, @obj_7.include?(1)) assert_equal(true, @obj_7.include?(2)) assert_equal(false, @obj_7.include?(-1)) assert_equal(false, @obj_7.include?(3)) end def test_obj_2 assert_equal(4, @obj_2.max) assert_equal(0, @obj_2.min) assert_equal(0, @obj_2.min_vertical) assert_equal(4, @obj_2.max_vertical) assert_equal(true, @obj_2.include?(0)) assert_equal(true, @obj_2.include?(1)) assert_equal(true, @obj_2.include?(3)) assert_equal(true, @obj_2.include?(4)) assert_equal(false, @obj_2.include?(-1)) assert_equal(false, @obj_2.include?(5)) end def test_obj_3 assert_equal(5, @obj_3.max) assert_equal(0, @obj_3.min) assert_equal(0, @obj_3.min_vertical) assert_equal(4, @obj_3.max_vertical) assert_equal(true, @obj_3.include?(0)) assert_equal(true, @obj_3.include?(1)) assert_equal(true, @obj_3.include?(3)) assert_equal(true, @obj_3.include?(4)) assert_equal(true, @obj_3.include?(5)) assert_equal(false, @obj_3.include?(-1)) assert_equal(false, @obj_3.include?(6)) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb0000644000004100000410000001732112200110570030502 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb - Unit test for Bio::RestrictionEnzyme::Analysis::SequenceRange # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/range/sequence_range' require 'bio/util/restriction_enzyme/range/sequence_range/fragments' require 'bio/util/restriction_enzyme/range/cut_range' require 'bio/util/restriction_enzyme/range/horizontal_cut_range' require 'bio/util/restriction_enzyme/range/vertical_cut_range' require 'bio/util/restriction_enzyme/range/cut_ranges' module Bio; module TestRestrictionEnzyme #:nodoc: class TestAnalysisSequenceRange < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::Range::SequenceRange @fs = Bio::RestrictionEnzyme::Range::SequenceRange::Fragments #a.add_cut_range(p_cut_left, p_cut_right, c_cut_left, c_cut_right ) @vcr = Bio::RestrictionEnzyme::Range::VerticalCutRange @crs = Bio::RestrictionEnzyme::Range::CutRanges @hcr = Bio::RestrictionEnzyme::Range::HorizontalCutRange @obj_1 = @t.new(0,5) @obj_1.add_cut_range(0,nil,nil,3) @obj_1.add_cut_range(nil,2,nil,nil) @obj_2 = @t.new(0,5) @obj_2.add_cut_ranges( @crs.new( [@vcr.new(0,2,nil,nil), @vcr.new(3,nil,4,nil)] )) @obj_3 = @t.new(0,5) @obj_3.add_cut_ranges( @crs.new( [@vcr.new(0,2,nil,nil), @vcr.new(3,nil,4,nil)] )) @obj_3.add_cut_ranges( @crs.new( [@hcr.new(0), @hcr.new(5)] )) @obj_4 = @t.new(0,5) @obj_4.add_cut_ranges( @crs.new( [@vcr.new(0,2,1,3)] )) @obj_5 = @t.new(0,5) @obj_5.add_cut_ranges( @crs.new( [@vcr.new(0,nil,nil,nil), @vcr.new(nil,4,3,nil), @hcr.new(1,2)] )) @obj_6 = @t.new(0,5) @obj_6.add_cut_ranges( @crs.new( [@vcr.new(nil,nil,0,nil), @hcr.new(1,2), @vcr.new(nil,4,3,nil)] )) @obj_7 = @t.new(0,5) @obj_7.add_cut_ranges( @crs.new( [@vcr.new(nil,2,nil,nil), @hcr.new(0,2)] )) @obj_8 = @t.new(0,11) @obj_8.add_cut_ranges( @crs.new( [@hcr.new(0,1), @vcr.new(nil,nil,nil,5), @hcr.new(7,8), @hcr.new(10), @vcr.new(nil,10,nil,nil)] )) @obj_9 = @t.new(0,5) @obj_9.add_cut_ranges( @crs.new( [@vcr.new(nil,3,nil,3)] )) @obj_10 = @t.new(0,5) @obj_10.add_cut_ranges( @crs.new( [@vcr.new(0,nil,nil,3), @vcr.new(nil,2,nil,2)] )) end def test_fragments assert_equal(@fs, @obj_1.fragments.class) end # '0|1 2|3 4 5' # ' +---+-+ ' # '0 1 2 3|4 5' def test_fragments_for_display_1 x = @obj_1 assert_equal(3, x.fragments.for_display.size) assert_equal('0 ', x.fragments.for_display[0].primary) assert_equal('0123', x.fragments.for_display[0].complement) assert_equal('12', x.fragments.for_display[1].primary) assert_equal(' ', x.fragments.for_display[1].complement) assert_equal('345', x.fragments.for_display[2].primary) assert_equal(' 45', x.fragments.for_display[2].complement) end # '0|1 2|3|4 5' # ' +---+ +-+ ' # '0 1 2 3 4|5' def test_fragments_for_display_2 x = @obj_2 assert_equal(3, x.fragments.for_display.size) assert_equal('0 3 ', x.fragments.for_display[0].primary) assert_equal('01234', x.fragments.for_display[0].complement) assert_equal('12', x.fragments.for_display[1].primary) assert_equal(' ', x.fragments.for_display[1].complement) assert_equal('45', x.fragments.for_display[2].primary) assert_equal(' 5', x.fragments.for_display[2].complement) end # '0|1 2|3|4 5' # '-+---+ +-+-' # '0 1 2 3 4|5' def test_fragments_for_display_3 x = @obj_3 assert_equal(5, x.fragments.for_display.size) assert_equal('0', x.fragments.for_display[0].primary) assert_equal(' ', x.fragments.for_display[0].complement) assert_equal(' 3 ', x.fragments.for_display[1].primary) assert_equal('01234', x.fragments.for_display[1].complement) assert_equal('12', x.fragments.for_display[2].primary) assert_equal(' ', x.fragments.for_display[2].complement) assert_equal('45', x.fragments.for_display[3].primary) assert_equal(' ', x.fragments.for_display[3].complement) assert_equal(' ', x.fragments.for_display[4].primary) assert_equal('5', x.fragments.for_display[4].complement) end # '0|1 2|3 4 5' # ' +-+-+-+ ' # '0 1|2 3|4 5' def test_fragments_for_display_4 x = @obj_4 assert_equal(4, x.fragments.for_display.size) assert_equal('0 ', x.fragments.for_display[0].primary) assert_equal('01', x.fragments.for_display[0].complement) assert_equal('12', x.fragments.for_display[1].primary) assert_equal(' ', x.fragments.for_display[1].complement) assert_equal(' ', x.fragments.for_display[2].primary) assert_equal('23', x.fragments.for_display[2].complement) assert_equal('345', x.fragments.for_display[3].primary) assert_equal(' 45', x.fragments.for_display[3].complement) end # '0 1 2 3 4|5' # ' +-+ ' # '0 1 2 3|4 5' def test_fragments_for_display_5 x = @obj_5 assert_equal(2, x.fragments.for_display.size) assert_equal('01234', x.fragments.for_display[0].primary) assert_equal('0123 ', x.fragments.for_display[0].complement) assert_equal(' 5', x.fragments.for_display[1].primary) assert_equal('45', x.fragments.for_display[1].complement) end # '0 1 2 3 4|5' # ' +-+ ' # '0 1 2 3|4 5' def test_fragments_for_display_6 x = @obj_6 assert_equal(2, x.fragments.for_display.size) assert_equal('01234', x.fragments.for_display[0].primary) assert_equal('0123 ', x.fragments.for_display[0].complement) assert_equal(' 5', x.fragments.for_display[1].primary) assert_equal('45', x.fragments.for_display[1].complement) end # '0 1 2|3 4 5' # '-----+ ' # '0 1 2 3 4 5' def test_fragments_for_display_7 x = @obj_7 assert_equal(2, x.fragments.for_display.size) assert_equal('012', x.fragments.for_display[0].primary) assert_equal(' ', x.fragments.for_display[0].complement) assert_equal(' 345', x.fragments.for_display[1].primary) assert_equal('012345', x.fragments.for_display[1].complement) end # '0 1 2 3 4 5 6 7 8 9 0 1' # ' ' # '0 1 2 3 4 5 6 7 8 9 0 1' def test_fragments_for_display_8 x = @obj_8 assert_equal(1, x.fragments.for_display.size) assert_equal('012345678901', x.fragments.for_display[0].primary) assert_equal('012345678901', x.fragments.for_display[0].complement) end # '0 1 2 3|4 5' # ' + ' # '0 1 2 3|4 5' def test_fragments_for_display_9 x = @obj_9 assert_equal(2, x.fragments.for_display.size) assert_equal('0123', x.fragments.for_display[0].primary) assert_equal('0123', x.fragments.for_display[0].complement) assert_equal('45', x.fragments.for_display[1].primary) assert_equal('45', x.fragments.for_display[1].complement) end # '0|1 2|3 4 5' # ' +---+-+ ' # '0 1 2|3|4 5' def test_fragments_for_display_10 x = @obj_10 assert_equal(4, x.fragments.for_display.size) assert_equal('0 ', x.fragments.for_display[0].primary) assert_equal('012', x.fragments.for_display[0].complement) assert_equal('12', x.fragments.for_display[1].primary) assert_equal(' ', x.fragments.for_display[1].complement) assert_equal('345', x.fragments.for_display[2].primary) assert_equal(' 45', x.fragments.for_display[2].complement) assert_equal(' ', x.fragments.for_display[3].primary) assert_equal('3', x.fragments.for_display[3].complement) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_single_strand.rb0000644000004100000410000001202412200110570026522 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_single_strand.rb - Unit test for Bio::RestrictionEnzyme::SingleStrand # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/single_strand' module Bio; module TestRestrictionEnzyme #:nodoc: class TestSingleStrand < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::SingleStrand @cl = Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation @s = Bio::Sequence::NA @obj_1 = @t.new(@s.new('gata'), @cl.new(-2,1,3)) @obj_2 = @t.new('gata', -2, 1, 3) @obj_3 = @t.new('garraxt', [-2, 1, 7]) @obj_4 = @t.new('nnnnnnngarraxtnn', [-2, 1, 7]) @obj_5 = @t.new('ga^rr^axt') @obj_6 = @t.new('^ga^rr^axt') @obj_7 = @t.new('n^ngar^raxtnn^n') end def test_pattern_palindromic? assert_equal(true, @t.new('atgcat', 1).palindromic?) assert_equal(false, @t.new('atgcgta', 1).palindromic?) assert_equal(false, @obj_1.palindromic?) assert_equal(false, @obj_2.palindromic?) assert_equal(false, @obj_3.palindromic?) assert_equal(false, @obj_4.palindromic?) end def test_stripped assert_equal('gata', @obj_1.stripped) assert_equal('gata', @obj_2.stripped) assert_equal('garraxt', @obj_3.stripped) assert_equal('garraxt', @obj_4.stripped) end def test_pattern assert_equal('nngata', @obj_1.pattern) assert_equal('nngata', @obj_2.pattern) assert_equal('nngarraxtn', @obj_3.pattern) assert_equal('nngarraxtn', @obj_4.pattern) assert_equal('nngata', @obj_1) assert_equal('nngata', @obj_2) assert_equal('nngarraxtn', @obj_3) assert_equal('nngarraxtn', @obj_4) end def test_with_cut_symbols assert_equal('n^ng^at^a', @obj_1.with_cut_symbols) assert_equal('n^ng^at^a', @obj_2.with_cut_symbols) assert_equal('n^ng^arraxt^n', @obj_3.with_cut_symbols) assert_equal('n^ng^arraxt^n', @obj_4.with_cut_symbols) end def test_with_spaces assert_equal('n^n g^a t^a', @obj_1.with_spaces) assert_equal('n^n g^a t^a', @obj_2.with_spaces) assert_equal('n^n g^a r r a x t^n', @obj_3.with_spaces) assert_equal('n^n g^a r r a x t^n', @obj_4.with_spaces) end def test_cut_locations_in_enzyme_notation assert_equal([-2,1,3], @obj_1.cut_locations_in_enzyme_notation) assert_equal([-2,1,3], @obj_2.cut_locations_in_enzyme_notation) assert_equal([-2,1,7], @obj_3.cut_locations_in_enzyme_notation) assert_equal([-2,1,7], @obj_4.cut_locations_in_enzyme_notation) assert_equal([2,4], @obj_5.cut_locations_in_enzyme_notation) assert_equal([-1,2,4], @obj_6.cut_locations_in_enzyme_notation) assert_equal([-2,3,9], @obj_7.cut_locations_in_enzyme_notation) end def test_cut_locations assert_equal([0,2,4], @obj_1.cut_locations) assert_equal([0,2,4], @obj_2.cut_locations) assert_equal([0,2,8], @obj_3.cut_locations) assert_equal([0,2,8], @obj_4.cut_locations) assert_equal([1,3], @obj_5.cut_locations) assert_equal([0,2,4], @obj_6.cut_locations) assert_equal([0,4,10], @obj_7.cut_locations) end def test_orientation assert_equal([5,3], @obj_1.orientation) assert_equal([5,3], @obj_2.orientation) assert_equal([5,3], @obj_3.orientation) assert_equal([5,3], @obj_4.orientation) end def test_creation_with_no_cuts @obj_8 = @t.new('garraxt') assert_equal([5,3], @obj_8.orientation) assert_equal([], @obj_8.cut_locations) assert_equal([], @obj_8.cut_locations_in_enzyme_notation) assert_equal('garraxt', @obj_8.pattern) end # NOTE def test_to_re end def test_argument_error assert_raise(ArgumentError) { @t.new('a', [0,1,2]) } assert_raise(ArgumentError) { @t.new('a', 0,1,2,0) } assert_raise(ArgumentError) { @t.new('a', [nil,1,2]) } assert_raise(ArgumentError) { @t.new('a', nil,1,2,nil) } assert_raise(ArgumentError) { @t.new('a', [1,1,2]) } assert_raise(ArgumentError) { @t.new('a', 1,1,2,2) } # NOTE t| 2009-09-19 commented out for library efficiency # re: validate_args(sequence, c) in util/restriction_enzyme/single_strand/single_strand.rb # assert_raise(ArgumentError) { @t.new(1, [1,2,3]) } # assert_raise(ArgumentError) { @t.new('gaat^aca', [1,2,3]) } # assert_raise(ArgumentError) { @t.new('gaat^^aca') } # assert_raise(ArgumentError) { @t.new('z', [1,2,3]) } # # assert_raise(ArgumentError) { @t.new('g', [0,1,2]) } # assert_raise(ArgumentError) { @t.new('g', 0,1,2,0) } # assert_raise(ArgumentError) { @t.new('g', [0,1,1,2]) } # assert_raise(ArgumentError) { @t.new('g', 0,1,1,2,2) } # assert_raise(ArgumentError) { @t.new(1,2,3) } # assert_raise(ArgumentError) { @t.new(1,2,'g') } end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb0000644000004100000410000000337312200110570027435 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_string_formatting.rb - Unit test for Bio::RestrictionEnzyme::StringFormatting # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/string_formatting' module Bio; module TestRestrictionEnzyme #:nodoc: class TestStringFormatting < Test::Unit::TestCase #:nodoc: include Bio::RestrictionEnzyme::StringFormatting def setup @t = String @obj_1 = @t.new('gata') @obj_2 = @t.new('garraxt') @obj_3 = @t.new('gArraXT') @obj_4 = @t.new('nnnnnnngarraxtnn') end def test_strip_padding assert_equal('gata', strip_padding(@obj_1)) assert_equal('garraxt', strip_padding(@obj_2)) assert_equal('gArraXT', strip_padding(@obj_3)) assert_equal('garraxt', strip_padding(@obj_4)) end def test_left_padding assert_equal('', left_padding(@obj_1)) assert_equal('', left_padding(@obj_2)) assert_equal('', left_padding(@obj_3)) assert_equal('nnnnnnn', left_padding(@obj_4)) end def test_right_padding assert_equal('', right_padding(@obj_1)) assert_equal('', right_padding(@obj_2)) assert_equal('', right_padding(@obj_3)) assert_equal('nn', right_padding(@obj_4)) end def test_add_spacing assert_equal('n^n g^a t^a', add_spacing('n^ng^at^a') ) assert_equal('n^n g^a r r a x t^n', add_spacing('n^ng^arraxt^n') ) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb0000644000004100000410000000247712200110570026061 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb - Unit test for Bio::RestrictionEnzyme::CutSymbol # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/cut_symbol' module Bio; module TestRestrictionEnzyme #:nodoc: class TestCutSymbol < Test::Unit::TestCase #:nodoc: include Bio::RestrictionEnzyme::CutSymbol def setup end def test_methods assert_equal('^', cut_symbol) assert_equal('|', set_cut_symbol('|')) assert_equal('|', cut_symbol) assert_equal('\\|', escaped_cut_symbol) assert_equal(/\|/, re_cut_symbol) assert_equal('^', set_cut_symbol('^')) assert_equal(3, "abc^de" =~ re_cut_symbol) assert_equal(nil, "abc^de" =~ re_cut_symbol_adjacent) assert_equal(3, "abc^^de" =~ re_cut_symbol_adjacent) assert_equal(4, "a^bc^^de" =~ re_cut_symbol_adjacent) assert_equal(nil, "a^bc^de" =~ re_cut_symbol_adjacent) end end end; end bio-1.4.3.0001/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb0000644000004100000410000001211012200110570030741 0ustar www-datawww-data# # test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb - Unit test for Bio::RestrictionEnzyme::SingleStrandComplement # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/util/restriction_enzyme/single_strand_complement' module Bio; module TestRestrictionEnzyme #:nodoc: class TestSingleStrandComplement < Test::Unit::TestCase #:nodoc: def setup @t = Bio::RestrictionEnzyme::SingleStrandComplement @cl = Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation @s = Bio::Sequence::NA @obj_1 = @t.new(@s.new('gata'), @cl.new(-2,1,3)) @obj_2 = @t.new('gata', -2, 1, 3) @obj_3 = @t.new('garraxt', [-2, 1, 7]) @obj_4 = @t.new('nnnnnnngarraxtnn', [-2, 1, 7]) @obj_5 = @t.new('ga^rr^axt') @obj_6 = @t.new('^ga^rr^axt') @obj_7 = @t.new('n^ngar^raxtnn^n') end def test_pattern_palindromic? assert_equal(true, @t.new('atgcat', 1).palindromic?) assert_equal(false, @t.new('atgcgta', 1).palindromic?) assert_equal(false, @obj_1.palindromic?) assert_equal(false, @obj_2.palindromic?) assert_equal(false, @obj_3.palindromic?) assert_equal(false, @obj_4.palindromic?) end def test_stripped assert_equal('gata', @obj_1.stripped) assert_equal('gata', @obj_2.stripped) assert_equal('garraxt', @obj_3.stripped) assert_equal('garraxt', @obj_4.stripped) end def test_pattern assert_equal('nngata', @obj_1.pattern) assert_equal('nngata', @obj_2.pattern) assert_equal('nngarraxtn', @obj_3.pattern) assert_equal('nngarraxtn', @obj_4.pattern) assert_equal('nngata', @obj_1) assert_equal('nngata', @obj_2) assert_equal('nngarraxtn', @obj_3) assert_equal('nngarraxtn', @obj_4) end def test_with_cut_symbols assert_equal('n^ng^at^a', @obj_1.with_cut_symbols) assert_equal('n^ng^at^a', @obj_2.with_cut_symbols) assert_equal('n^ng^arraxt^n', @obj_3.with_cut_symbols) assert_equal('n^ng^arraxt^n', @obj_4.with_cut_symbols) end def test_with_spaces assert_equal('n^n g^a t^a', @obj_1.with_spaces) assert_equal('n^n g^a t^a', @obj_2.with_spaces) assert_equal('n^n g^a r r a x t^n', @obj_3.with_spaces) assert_equal('n^n g^a r r a x t^n', @obj_4.with_spaces) end def test_cut_locations_in_enzyme_notation assert_equal([-2,1,3], @obj_1.cut_locations_in_enzyme_notation) assert_equal([-2,1,3], @obj_2.cut_locations_in_enzyme_notation) assert_equal([-2,1,7], @obj_3.cut_locations_in_enzyme_notation) assert_equal([-2,1,7], @obj_4.cut_locations_in_enzyme_notation) assert_equal([2,4], @obj_5.cut_locations_in_enzyme_notation) assert_equal([-1,2,4], @obj_6.cut_locations_in_enzyme_notation) assert_equal([-2,3,9], @obj_7.cut_locations_in_enzyme_notation) end def test_cut_locations assert_equal([0,2,4], @obj_1.cut_locations) assert_equal([0,2,4], @obj_2.cut_locations) assert_equal([0,2,8], @obj_3.cut_locations) assert_equal([0,2,8], @obj_4.cut_locations) assert_equal([1,3], @obj_5.cut_locations) assert_equal([0,2,4], @obj_6.cut_locations) assert_equal([0,4,10], @obj_7.cut_locations) end def test_orientation assert_equal([3,5], @obj_1.orientation) assert_equal([3,5], @obj_2.orientation) assert_equal([3,5], @obj_3.orientation) assert_equal([3,5], @obj_4.orientation) end def test_creation_with_no_cuts @obj_8 = @t.new('garraxt') assert_equal([3,5], @obj_8.orientation) assert_equal([], @obj_8.cut_locations) assert_equal([], @obj_8.cut_locations_in_enzyme_notation) assert_equal('garraxt', @obj_8.pattern) end # NOTE def test_to_re end def test_argument_error assert_raise(ArgumentError) { @t.new('a', [0,1,2]) } assert_raise(ArgumentError) { @t.new('a', 0,1,2,0) } assert_raise(ArgumentError) { @t.new('a', [nil,1,2]) } assert_raise(ArgumentError) { @t.new('a', nil,1,2,nil) } assert_raise(ArgumentError) { @t.new('a', [1,1,2]) } assert_raise(ArgumentError) { @t.new('a', 1,1,2,2) } # NOTE t| 2009-09-19 commented out for library efficiency # re: validate_args(sequence, c) in util/restriction_enzyme/single_strand/single_strand.rb # assert_raise(ArgumentError) { @t.new(1, [1,2,3]) } # assert_raise(ArgumentError) { @t.new('gaat^aca', [1,2,3]) } # assert_raise(ArgumentError) { @t.new('gaat^^aca') } # assert_raise(ArgumentError) { @t.new('z', [1,2,3]) } # # assert_raise(ArgumentError) { @t.new('g', [0,1,2]) } # assert_raise(ArgumentError) { @t.new('g', 0,1,2,0) } # assert_raise(ArgumentError) { @t.new('g', [0,1,1,2]) } # assert_raise(ArgumentError) { @t.new('g', 0,1,1,2,2) } # assert_raise(ArgumentError) { @t.new(1,2,3) } # assert_raise(ArgumentError) { @t.new(1,2,'g') } end end end; end bio-1.4.3.0001/test/unit/bio/sequence/0000755000004100000410000000000012200110570017202 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/sequence/test_dblink.rb0000644000004100000410000000322312200110570022031 0ustar www-datawww-data# # test/unit/bio/sequence/test_dblink.rb - Unit test for Bio::Sequencce::DBLink # # Copyright:: Copyright (C) 2008 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/sequence/dblink' module Bio class TestSequenceDBLink < Test::Unit::TestCase def setup @xref = Bio::Sequence::DBLink.new('EMBL', 'Z14088', 'CAA78466.1', '-', 'mRNA') end def test_database assert_equal('EMBL', @xref.database) end def test_id assert_equal('Z14088', @xref.id) end def test_secondary_ids assert_equal([ 'CAA78466.1', '-', 'mRNA' ], @xref.secondary_ids) end end #class class TestSequenceDBLinkClassMethods < Test::Unit::TestCase def test_parse_embl_DR_line str = 'DR EPD; EP07077; HS_HBG1.' xref = Bio::Sequence::DBLink.parse_embl_DR_line(str) assert_equal('EPD', xref.database) assert_equal('EP07077', xref.id) assert_equal([ 'HS_HBG1' ], xref.secondary_ids) end def test_parse_uniprot_DR_line str = 'DR EMBL; Z14088; CAA78466.1; -; mRNA.' xref = Bio::Sequence::DBLink.parse_uniprot_DR_line(str) assert_equal('EMBL', xref.database) assert_equal('Z14088', xref.id) assert_equal([ 'CAA78466.1', '-', 'mRNA' ], xref.secondary_ids) end end #class end #module Bio bio-1.4.3.0001/test/unit/bio/sequence/test_common.rb0000644000004100000410000002647612200110570022075 0ustar www-datawww-data# # test/unit/bio/sequence/test_common.rb - Unit test for Bio::Sequencce::Common # # Copyright:: Copyright (C) 2006-2008 # Mitsuteru C. Nakao , # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/sequence/common' module Bio; module TestSequenceCommon class TSequence < String include Bio::Sequence::Common end class TestSequenceCommon < Test::Unit::TestCase def setup @obj = TSequence.new('atgcatgcatgcatgcaaaa') end def test_to_s assert_equal('atgcatgcatgcatgcaaaa', @obj.to_s) end def test_to_str assert_equal('atgcatgcatgcatgcaaaa', @obj.to_str) end def test_seq str = "atgcatgcatgcatgcaaaa" assert_equal(str, @obj.seq) end # <<(*arg) def test_push str = "atgcatgcatgcatgcaaaaA" assert_equal(str, @obj << "A") end # concat(*arg) def test_concat str = "atgcatgcatgcatgcaaaaA" assert_equal(str, @obj.concat("A")) end # +(*arg) def test_sum str = "atgcatgcatgcatgcaaaaatgcatgcatgcatgcaaaa" assert_equal(str, @obj + @obj) end # window_search(window_size, step_size = 1) def test_window_search @obj.window_search(4) do |subseq| assert_equal(20, @obj.size) end end #total(hash) def test_total hash = {'a' => 1, 'c' => 2, 'g' => 4, 't' => 3} assert_equal(44.0, @obj.total(hash)) end def test_composition composition = {"a"=>8, "c"=>4, "g"=>4, "t"=>4} assert_equal(composition, @obj.composition) end def test_splicing #(position) assert_equal("atgcatgc", @obj.splicing("join(1..4, 13..16)")) end end class TestSequenceCommonNormalize < Test::Unit::TestCase def test_no_normalize str = "atgcatgcatgcatgcaaaA" obj = TSequence.new(str) assert_equal("atgcatgcatgcatgcaaaA", obj) end def test_normalize_A str = "atgcatgcatgcatgcaaaA" seq = TSequence.new(str) assert_equal("atgcatgcatgcatgcaaaA", seq) obj = seq.normalize! assert_equal("atgcatgcatgcatgcaaaA", obj) end def test_normalize_a str = "atgcatgcatgcatgcaaa" seq = TSequence.new(str) assert_equal("atgcatgcatgcatgcaaa", seq) obj = seq.normalize! assert_equal("atgcatgcatgcatgcaaa", obj) end end class TestSequenceCommonRandomize < Test::Unit::TestCase def setup @str = "attcacgcctgctattcccgtcagcctgagcttgccgcgaagctgatgaaagatgttatc" @seq = TSequence.new(@str) @orig = TSequence.new(@str) end # test for Bio::Sequence::Common#randomize(hash = nil) def test_randomize rseqs = (0..2).collect { |i| @seq.randomize } # not breaking given seq? assert_equal(@orig, @seq) # same length? rseqs.each do |rseq| assert_equal(@orig.length, rseq.length) end # same composition? [ 'a', 'c', 'g', 't', 'n' ].each do |chr| count = @orig.count(chr) rseqs.each do |rseq| assert_equal(count, rseq.count(chr)) end end # randomized? (very simple check) assert(rseqs[0] != rseqs[1]) assert(rseqs[0] != rseqs[2]) assert(rseqs[1] != rseqs[2]) end # testing Bio::Sequence::Common#randomize() { |x| ... } def test_randomize_with_block composition = Hash.new(0) [ 'a', 'c', 'g', 't' ].each do |chr| composition[chr] = @seq.count(chr) end rseqs = (0..2).collect do |i| newcomposition = Hash.new(0) newseq = '' ret = @seq.randomize do |c| assert_kind_of(TSequence, c) newcomposition[c] += 1 newseq.concat c end # same length? assert_equal(@orig.length, newseq.length) # same composition? assert_equal(composition, newcomposition) # returned value is empty sequence? assert_equal(TSequence.new(''), ret) # not breaking given seq? assert_equal(@orig, @seq) newseq end # randomized? (very simple check) assert(rseqs[0] != rseqs[1]) assert(rseqs[0] != rseqs[2]) assert(rseqs[1] != rseqs[2]) end # testing Bio::Sequence::Common#randomize(hash) def test_randomize_with_hash hash = { 'a' => 20, 'c' => 19, 'g' => 18, 't' => 17 } hash.default = 0 len = 0 hash.each_value { |v| len += v } rseqs = (0..2).collect do |i| rseq = @seq.randomize(hash) # same length? assert_equal(len, rseq.length) # same composition? [ 'a', 'c', 'g', 't', 'n' ].each do |chr| assert_equal(hash[chr], rseq.count(chr)) end # returned value is instance of TSequence? assert_instance_of(TSequence, rseq) # not breaking given seq? assert_equal(@orig, @seq) rseq end # randomized? (very simple check) assert(rseqs[0] != rseqs[1]) assert(rseqs[0] != rseqs[2]) assert(rseqs[1] != rseqs[2]) end # testing Bio::Sequence::Common#randomize(hash) { |x| ... } def test_randomize_with_hash_block hash = { 'a' => 20, 'c' => 19, 'g' => 18, 't' => 17 } hash.default = 0 len = 0 hash.each_value { |v| len += v } rseqs = (0..2).collect do |i| newcomposition = Hash.new(0) newseq = '' ret = @seq.randomize(hash) do |c| #assert_kind_of(TSequence, c) assert_kind_of(String, c) newcomposition[c] += 1 newseq.concat c end # same length? assert_equal(len, newseq.length) # same composition? assert_equal(hash, newcomposition) # returned value is empty TSequence? assert_equal(TSequence.new(''), ret) # not breaking given seq? assert_equal(@orig, @seq) newseq end # randomized? (very simple check) assert(rseqs[0] != rseqs[1]) assert(rseqs[0] != rseqs[2]) assert(rseqs[1] != rseqs[2]) end end #class TestSequenceCommonRandomize class TestSequenceCommonRandomizeChi2 < Test::Unit::TestCase def chi2(hist, f, k) chi2 = 0 (0...k).each do |i| chi2 += ((hist[i] - f) ** 2).quo(f) end chi2 end private :chi2 # chi-square test for distribution of chi2 values from # distribution of index('a') def randomize_equiprobability_chi2 # Reference: http://www.geocities.jp/m_hiroi/light/pystat04.html seq = TSequence.new('ccccgggtta') # length must be 10 k = 10 hist = Array.new(k, 0) iter = 200 # F for index('a') f = iter.quo(seq.length).to_f # chi2 distribution, degree of freedom 9 # Reference: http://www.geocities.jp/m_hiroi/light/pystat04.html # Reference: http://keisan.casio.jp/has10/SpecExec.cgi # P = 0.9, 0.8, 0.7, ... 0.1, 0 chi2_table = [ 14.684, 12.242, 10.656, 9.414, 8.343, 7.357, 6.393, 5.380, 4.168, 0.000 ] chi2_hist = Array.new(k, 0) chi2_iter = 200 chi2_iter.times do hist.fill(0) iter.times { hist[yield(seq).index('a')] += 1 } chi2 = chi2(hist, f, k) idx = (0...(chi2_table.size)).find { |i| chi2 >= chi2_table[i] } chi2_hist[idx] += 1 end chi2_f = chi2_iter.quo(k).to_f chi2_chi2 = chi2(chi2_hist, chi2_f, k) #$stderr.puts chi2_chi2 chi2_chi2 end private :randomize_equiprobability_chi2 def randomize_equiprobability(&block) ## chi-square test, freedom 9, significance level 5% #critical_value = 16.919 #significance_level_message = "5%" # # chi-square test, freedom 9, significance level 1% critical_value = 21.666 significance_level_message = "1%" # max trial times till the test sucess max_trial = 10 values =[] max_trial.times do |i| chi2_chi2 = randomize_equiprobability_chi2(&block) values.push chi2_chi2 # immediately breaks if the test succeeds break if chi2_chi2 < critical_value $stderr.print "Bio::Sequence::Common#randomize test of chi2 (=#{chi2_chi2}) < #{critical_value} failed (expected #{significance_level_message} by chance)" if values.size < max_trial then $stderr.puts ", retrying." else $stderr.puts " #{values.size} consecutive times!" end end assert_operator(values[-1], :<, critical_value, "test of chi2 < #{critical_value} failed #{values.size} times consecutively (#{values.inspect})") end private :randomize_equiprobability def test_randomize_equiprobability randomize_equiprobability { |seq| seq.randomize } end def test_randomize_with_hash_equiprobability hash = { 'c' => 4, 'g' => 3, 't' => 2, 'a' => 1 } randomize_equiprobability { |seq| seq.randomize(hash) } end ## disabled because it takes too long time. #def test_randomize_with_block_equiprobability # randomize_equiprobability do |seq| # newseq = '' # seq.randomize do |c| # newseq.concat c # end # newseq # end #end ## disabled because it takes too long time. #def test_randomize_with_hash_block_equiprobability # hash = { 'c' => 4, 'g' => 3, 't' => 2, 'a' => 1 } # randomize_equiprobability do |seq| # newseq = '' # seq.randomize(hash) do |c| # newseq.concat c # end # newseq # end #end end #class TestSequenceCommonRandomizeChi2 class TestSequenceCommonSubseq < Test::Unit::TestCase #def subseq(s = 1, e = self.length) def test_to_s_returns_self_as_string s = "abcefghijklmnop" sequence = TSequence.new(s) assert_equal(s, sequence.to_s, "wrong value") assert_instance_of(String, sequence.to_s, "not a String") end def test_subseq_returns_RuntimeError_blank_sequence_default_end sequence = TSequence.new("") assert_raise(RuntimeError) { sequence.subseq(5) } end def test_subseq_returns_RuntimeError_start_less_than_one sequence = TSequence.new("blahblah") assert_raise(RuntimeError) { sequence.subseq(0) } end def test_subseq_returns_subsequence sequence = TSequence.new("hahasubhehe") assert_equal("sub", sequence.subseq(5,7)) end end # Test Sequence#window_wearch class TestSequenceCommonWindowSearch < Test::Unit::TestCase def test_window_search_with_width_3_default_step_no_residual sequence = TSequence.new("agtca") windows = [] returned_value = sequence.window_search(3) { |window| windows << window } assert_equal(["agt", "gtc", "tca"], windows, "windows wrong") assert_equal("", returned_value, "returned value wrong") end # added def test_window_search_with_width_3_step_two_with_residual sequence = TSequence.new("agtcat") windows = [] returned_value = sequence.window_search(3, 2) { |window| windows << window } assert_equal(["agt", "tca"], windows, "windows wrong") assert_equal("t", returned_value, "returned value wrong") end end end; end #module Bio; module TestSequenceCommon bio-1.4.3.0001/test/unit/bio/sequence/test_compat.rb0000644000004100000410000000327012200110570022053 0ustar www-datawww-data# # test/unit/bio/sequence/test_compat.rb - Unit test for Bio::Sequencce::Compat # # Copyright:: Copyright (C) 2006 Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/sequence/compat' module Bio; module TestSequenceCompat class TSequence < String include Bio::Sequence::Common end class TestSequenceCompat < Test::Unit::TestCase def setup @obj = TSequence.new('atgcatgcatgcatgcaaaa') end def test_to_s str = 'atgcatgcatgcatgcaaaa' assert_equal(str, @obj.to_s) end end class TestSequenceCommonCompat < Test::Unit::TestCase # Test Sequence#to_fasta def test_to_fasta sequence = TSequence.new("agtc" * 10) header = "the header" str = ">the header\n" + ("agtc" * 5) + "\n" + ("agtc" * 5) + "\n" assert_equal(str, sequence.to_fasta(header, 20)) end end require 'bio/sequence/na' class TestSequenceNACompat < Test::Unit::TestCase def test_na_self_randomize composition = Bio::Sequence::NA.new("acgtacgt").composition assert(Bio::Sequence::NA.randomize(composition)) end end require 'bio/sequence/aa' class TestSequenceNACompat < Test::Unit::TestCase def test_aa_self_randomize composition = Bio::Sequence::AA.new("WWDTGAK").composition assert(Bio::Sequence::AA.randomize(composition)) end end end; end #module Bio; module TestSequenceCompat bio-1.4.3.0001/test/unit/bio/sequence/test_sequence_masker.rb0000644000004100000410000001134212200110570023741 0ustar www-datawww-data# # = test/unit/bio/sequence/test_sequence_masker.rb - Unit test for Bio::Sequence::SequenceMasker # # Copyright:: Copyright (C) 2010 # Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/sequence/sequence_masker' module Bio class TestSequenceMasker < Test::Unit::TestCase def setup s = "aaacgcattagcaccaccattaccaccacc" @raw = s.dup.freeze @seq = Bio::Sequence.new(s) @seq.quality_scores = (0...30).collect { |i| i * 3 } @seq.error_probabilities = (0...30).collect { |i| 10 ** -(i * 3 / 10.0) } end # Very simple enumerator for testing. class SimpleEnum include Enumerable def initialize(ary) @ary = ary end def each(&block) @ary.each(&block) end end #class SimpleEnum def test_mask_with_enumerator enum = SimpleEnum.new((0..29).to_a) newseq = @seq.mask_with_enumerator(enum, 'n') do |item| i = item % 10 i == 9 || i == 0 end expected = "naacgcattnncaccaccannaccaccacn" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask_with_enumerator_longer_mask_char enum = SimpleEnum.new((0..29).to_a) newseq = @seq.mask_with_enumerator(enum, '-*-') do |item| i = item % 10 i == 9 || i == 0 end expected = "-*-aacgcatt-*--*-caccacca-*--*-accaccac-*-" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask_with_enumerator_empty_mask_char enum = SimpleEnum.new((0..29).to_a) newseq = @seq.mask_with_enumerator(enum, '') do |item| i = item % 10 i == 9 || i == 0 end expected = "aacgcattcaccaccaaccaccac" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask_with_enumerator_shorter enum = SimpleEnum.new((0..10).to_a.freeze) enum.freeze # normal mask char newseq = @seq.mask_with_enumerator(enum, 'n') do |item| item > 5 end expected = "aaacgcnnnnncaccaccattaccaccacc" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) # empty mask char newseq = @seq.mask_with_enumerator(enum, '') do |item| item > 5 end expected = "aaacgccaccaccattaccaccacc" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) # longer mask char newseq = @seq.mask_with_enumerator(enum, '-*-') do |item| item > 5 end expected = "aaacgc-*--*--*--*--*-caccaccattaccaccacc" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask_with_enumerator_excess enum = SimpleEnum.new((0..200).to_a.freeze) enum.freeze # normal mask char newseq = @seq.mask_with_enumerator(enum, 'n') do |item| i = item % 10 i == 9 || i == 0 end expected = "naacgcattnncaccaccannaccaccacn" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) # empty mask char newseq = @seq.mask_with_enumerator(enum, '') do |item| i = item % 10 i == 9 || i == 0 end expected = "aacgcattcaccaccaaccaccac" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) # longer mask char newseq = @seq.mask_with_enumerator(enum, '-*-') do |item| i = item % 10 i == 9 || i == 0 end expected = "-*-aacgcatt-*--*-caccacca-*--*-accaccac-*-" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask_with_quality_score newseq = @seq.mask_with_quality_score(30, 'n') expected = "nnnnnnnnnngcaccaccattaccaccacc" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask newseq = @seq.mask_with_quality_score(30, 'n') expected = "nnnnnnnnnngcaccaccattaccaccacc" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end def test_mask_with_error_probability newseq = @seq.mask_with_error_probability(0.001, 'n') expected = "nnnnnnnnnngcaccaccattaccaccacc" assert_equal(expected, newseq.seq) # not breaking original sequence assert_equal(@raw, @seq.seq) end end #class TestSequenceMasker end #module Bio bio-1.4.3.0001/test/unit/bio/sequence/test_quality_score.rb0000644000004100000410000003036112200110570023454 0ustar www-datawww-data# # test/unit/bio/sequence/test_quality_score.rb - Unit test for Bio::Sequence::QualityScore # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence/quality_score' module Bio module TestSequenceQualityScore # A module providing methods to compare float arrays module FloatArrayComparison private def float_array_equivalent?(expected, actual, *arg) assert_equal(expected.size, actual.size, *arg) dt = Float::EPSILON * 1024 (0...(expected.size)).each do |i| e = expected[i] a = actual[i] #assert_equal(e, a) assert_in_delta(e, a, e.abs * dt) end end end #module FloatArrayComparison module TestConverterMethods Query = (-20..100).to_a.freeze Result_phred2solexa_1to100 = ([ -6, -2, 0, 2, 3, 5, 6, 7, 8, 10 ] + (11..100).to_a).freeze Result_solexa2phred = ([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 10 ] + (11..100).to_a).freeze def test_convert_scores_from_phred_to_solexa result = @obj.convert_scores_from_phred_to_solexa(Query) assert_equal(Result_phred2solexa_1to100, result[21..-1]) (0..20).each do |i| assert_operator(-6, :>, result[i]) end end def test_convert_scores_from_solexa_to_phred result = @obj.convert_scores_from_solexa_to_phred(Query) assert_equal(Result_solexa2phred, result) end def test_convert_nothing result = @obj.convert_nothing(Query) assert_equal(Query, result) end private def do_test_from_phred_to_solexa(obj, meth) result = obj.__send__(meth, Query) assert_equal(Result_phred2solexa_1to100, result[21..-1]) (0..20).each do |i| assert_operator(-6, :>, result[i]) end end def do_test_from_solexa_to_phred(obj, meth) result = obj.__send__(meth, Query) assert_equal(Result_solexa2phred, result) end def do_test_convert_nothing(obj, meth) result = obj.__send__(meth, Query) assert_equal(Query, result) end end #module TestConverterMethods class TestConverter < Test::Unit::TestCase include TestConverterMethods class Dummy include Bio::Sequence::QualityScore::Converter end #class Dummy def setup @obj = Dummy.new end end #class TestConverter class TestPhred < Test::Unit::TestCase include FloatArrayComparison include TestConverterMethods class Dummy include Bio::Sequence::QualityScore::Phred end #class Dummy Qscores = (-20..100).to_a.freeze Q2P = [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.794328234724281, 0.630957344480193, 0.501187233627272, 0.398107170553497, 0.316227766016838, 0.251188643150958, 0.199526231496888, 0.158489319246111, 0.125892541179417, 0.1, 0.0794328234724281, 0.0630957344480193, 0.0501187233627272, 0.0398107170553497, 0.0316227766016838, 0.0251188643150958, 0.0199526231496888, 0.0158489319246111, 0.0125892541179417, 0.01, 0.00794328234724281, 0.00630957344480193, 0.00501187233627272, 0.00398107170553497, 0.00316227766016838, 0.00251188643150958, 0.00199526231496888, 0.00158489319246111, 0.00125892541179417, 0.001, 0.000794328234724281, 0.000630957344480193, 0.000501187233627273, 0.000398107170553497, 0.000316227766016838, 0.000251188643150958, 0.000199526231496888, 0.000158489319246111, 0.000125892541179417, 0.0001, 7.94328234724282e-05, 6.30957344480193e-05, 5.01187233627273e-05, 3.98107170553497e-05, 3.16227766016838e-05, 2.51188643150958e-05, 1.99526231496888e-05, 1.58489319246111e-05, 1.25892541179417e-05, 1.0e-05, 7.94328234724282e-06, 6.30957344480193e-06, 5.01187233627272e-06, 3.98107170553497e-06, 3.16227766016838e-06, 2.51188643150958e-06, 1.99526231496888e-06, 1.58489319246111e-06, 1.25892541179417e-06, 1.0e-06, 7.94328234724282e-07, 6.30957344480193e-07, 5.01187233627272e-07, 3.98107170553497e-07, 3.16227766016838e-07, 2.51188643150958e-07, 1.99526231496888e-07, 1.58489319246111e-07, 1.25892541179417e-07, 1.0e-07, 7.94328234724282e-08, 6.30957344480193e-08, 5.01187233627272e-08, 3.98107170553497e-08, 3.16227766016838e-08, 2.51188643150958e-08, 1.99526231496888e-08, 1.58489319246111e-08, 1.25892541179417e-08, 1.0e-08, 7.94328234724282e-09, 6.30957344480194e-09, 5.01187233627271e-09, 3.98107170553497e-09, 3.16227766016838e-09, 2.51188643150958e-09, 1.99526231496888e-09, 1.58489319246111e-09, 1.25892541179417e-09, 1.0e-09, 7.94328234724282e-10, 6.30957344480194e-10, 5.01187233627271e-10, 3.98107170553497e-10, 3.16227766016838e-10, 2.51188643150958e-10, 1.99526231496888e-10, 1.58489319246111e-10, 1.25892541179417e-10, 1.0e-10 ].freeze P2Q = ( [ 0 ] * 20 + (0..100).to_a ).freeze def setup @obj = Dummy.new end def test_quality_score_type assert_equal(:phred, @obj.quality_score_type) end def test_phred_q2p result = @obj.phred_q2p(Qscores) float_array_equivalent?(Q2P, result) end def test_q2p result = @obj.q2p(Qscores) float_array_equivalent?(Q2P, result) end def test_self_q2p result = Bio::Sequence::QualityScore::Phred.q2p(Qscores) float_array_equivalent?(Q2P, result) end def test_phred_p2q result = @obj.phred_p2q(Q2P) assert_equal(P2Q, result) end def test_p2q result = @obj.p2q(Q2P) assert_equal(P2Q, result) end def test_self_p2q result = Bio::Sequence::QualityScore::Phred.p2q(Q2P) assert_equal(P2Q, result) end def test_convert_scores_from_phred do_test_convert_nothing(@obj, :convert_scores_from_phred) end def test_convert_scores_to_phred do_test_convert_nothing(@obj, :convert_scores_to_phred) end def test_convert_scores_from_solexa do_test_from_solexa_to_phred(@obj, :convert_scores_from_solexa) end def test_convert_scores_to_solexa do_test_from_phred_to_solexa(@obj, :convert_scores_to_solexa) end def test_self_convert_scores_to_solexa do_test_from_phred_to_solexa(Bio::Sequence::QualityScore::Phred, :convert_scores_to_solexa) end end #class TestPhred class TestSolexa < Test::Unit::TestCase include FloatArrayComparison include TestConverterMethods class Dummy include Bio::Sequence::QualityScore::Solexa end #class Dummy Qscores = [ -200, -175, -150, -125, -100, -75, -50, -25, *(-20..100).to_a ].freeze Q2P = [ 1.0, 1.0, 0.999999999999999, 0.999999999999684, 0.9999999999, 0.999999968377224, 0.999990000099999, 0.99684769081674, 0.99009900990099, 0.987567264745558, 0.98439833775817, 0.98043769612742, 0.975496632449664, 0.969346569968284, 0.961713496117745, 0.952273278965796, 0.940649056897232, 0.926412443882426, 0.909090909090909, 0.888184230221883, 0.86319311139679, 0.833662469183438, 0.799239991086898, 0.759746926647958, 0.715252751049199, 0.666139424583122, 0.613136820153143, 0.557311633762293, 0.5, 0.442688366237707, 0.386863179846857, 0.333860575416878, 0.284747248950801, 0.240253073352042, 0.200760008913102, 0.166337530816562, 0.13680688860321, 0.111815769778117, 0.0909090909090909, 0.0735875561175735, 0.0593509431027676, 0.0477267210342039, 0.0382865038822547, 0.0306534300317155, 0.024503367550336, 0.0195623038725795, 0.0156016622418296, 0.0124327352544424, 0.0099009900990099, 0.00788068385033028, 0.00627001234143384, 0.00498687873668797, 0.0039652856191522, 0.00315230918326021, 0.00250559266728573, 0.00199128917072832, 0.00158238528080172, 0.00125734251135529, 0.000999000999000999, 0.000793697778169244, 0.000630559488339893, 0.000500936170813599, 0.000397948744304877, 0.000316127797629618, 0.000251125563261462, 0.00019948642872153, 0.000158464204362237, 0.000125876694242503, 9.99900009999e-05, 7.94265144001309e-05, 6.30917536274865e-05, 5.0116211602182e-05, 3.98091322252505e-05, 3.16217766333056e-05, 2.51182333735999e-05, 1.99522250504614e-05, 1.5848680739949e-05, 1.25890956306177e-05, 9.99990000099999e-06, 7.94321925200956e-06, 6.30953363433606e-06, 5.0118472175343e-06, 3.98105585666614e-06, 3.1622676602e-06, 2.51188012195199e-06, 1.99525833390512e-06, 1.58489068057866e-06, 1.25892382690297e-06, 9.99999000001e-07, 7.94327603767439e-07, 6.30956946373274e-07, 5.01186982438755e-07, 3.98107012064241e-07, 3.1622766601687e-07, 2.5118858005524e-07, 1.99526191686179e-07, 1.58489294127251e-07, 1.25892525330487e-07, 9.9999990000001e-08, 7.94328171628553e-08, 6.30957304669478e-08, 5.01187208508409e-08, 3.98107154704566e-08, 3.16227756016838e-08, 2.51188636841385e-08, 1.99526227515816e-08, 1.58489316734225e-08, 1.25892539594523e-08, 9.9999999e-09, 7.94328228414709e-09, 6.30957340499123e-09, 5.01187231115385e-09, 3.98107168968604e-09, 3.16227765016838e-09, 2.51188642520001e-09, 1.99526231098781e-09, 1.58489318994922e-09, 1.25892541020927e-09, 9.99999999e-10, 7.94328234093325e-10, 6.30957344082087e-10, 5.01187233376083e-10, 3.98107170395008e-10, 3.16227765916838e-10, 2.51188643087862e-10, 1.99526231457078e-10, 1.58489319220992e-10, 1.25892541163568e-10, 9.999999999e-11 ].freeze P2Q_valid = [ -150, -125, -100, -75, -50, -25, *((-20..100).to_a) ].freeze def setup @obj = Dummy.new end def test_quality_score_type assert_equal(:solexa, @obj.quality_score_type) end def test_solexa_q2p result = @obj.solexa_q2p(Qscores) float_array_equivalent?(Q2P, result) end def test_q2p result = @obj.q2p(Qscores) float_array_equivalent?(Q2P, result) end def test_self_q2p result = Bio::Sequence::QualityScore::Solexa.q2p(Qscores) float_array_equivalent?(Q2P, result) end def test_solexa_p2q result = @obj.solexa_p2q(Q2P) assert_equal(P2Q_valid, result[2..-1]) assert_operator(-150, :>, result[0]) assert_operator(-150, :>, result[1]) end def test_p2q result = @obj.p2q(Q2P) assert_equal(P2Q_valid, result[2..-1]) assert_operator(-150, :>, result[0]) assert_operator(-150, :>, result[1]) end def test_self_p2q result = Bio::Sequence::QualityScore::Solexa.p2q(Q2P) assert_equal(P2Q_valid, result[2..-1]) assert_operator(-150, :>, result[0]) assert_operator(-150, :>, result[1]) end def test_convert_scores_from_phred do_test_from_phred_to_solexa(@obj, :convert_scores_from_phred) end def test_convert_scores_to_phred do_test_from_solexa_to_phred(@obj, :convert_scores_to_phred) end def test_self_convert_scores_to_phred do_test_from_solexa_to_phred(Bio::Sequence::QualityScore::Solexa, :convert_scores_to_phred) end def test_convert_scores_from_solexa do_test_convert_nothing(@obj, :convert_scores_from_solexa) end def test_convert_scores_to_solexa do_test_convert_nothing(@obj, :convert_scores_to_solexa) end end #class TestSolexa end #module TestSequenceQualityScore end #module Bio bio-1.4.3.0001/test/unit/bio/sequence/test_aa.rb0000644000004100000410000000504212200110570021150 0ustar www-datawww-data# # = test/unit/bio/sequence/test_aa.rb - Unit test for Bio::Sequencce::AA # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/sequence/aa' module Bio class TestSequenceAANew < Test::Unit::TestCase def test_new str = "RRLEHTFVFL RNFSLMLLRY" assert(Bio::Sequence::AA.new(str)) end def test_new_t str = "RRLEHTFVFLRNFSLMLLRY" str_t = "RRLEHTFVFL\tRNFSLMLLRY" assert_equal(str, Bio::Sequence::AA.new(str_t)) end def test_new_n str = "RRLEHTFVFLRNFSLMLLRY" str_n = "RRLEHTFVFL\nRNFSLMLLRY" assert_equal(str, Bio::Sequence::AA.new(str_n)) end def test_new_r str = "RRLEHTFVFLRNFSLMLLRY" str_r = "RRLEHTFVFL\n\rRNFSLMLLRY" assert_equal(str, Bio::Sequence::AA.new(str_r)) end end class TestSequenceAA < Test::Unit::TestCase def setup str = "RRLEHTFVFLRNFSLMLLRY" @obj = Bio::Sequence::AA.new(str) end def test_to_s str = "RRLEHTFVFLRNFSLMLLRY" assert_equal(str, @obj.to_s) end def test_molecular_weight assert_in_delta(2612.105, @obj.molecular_weight, 1e-4) end def test_to_re re = /RRLEHTFVFLRNFSLMLLRY/ assert_equal(re, @obj.to_re) @obj[1, 1] = 'B' re = /R[DNB]LEHTFVFLRNFSLMLLRY/ assert_equal(re, @obj.to_re) end def test_codes ary = ["Arg", "Arg", "Leu", "Glu", "His", "Thr", "Phe", "Val", "Phe", "Leu", "Arg", "Asn", "Phe", "Ser", "Leu", "Met", "Leu", "Leu", "Arg", "Tyr"] assert_equal(ary, @obj.codes) end def test_names ary = ["arginine", "arginine", "leucine", "glutamic acid", "histidine", "threonine", "phenylalanine", "valine", "phenylalanine", "leucine", "arginine", "asparagine", "phenylalanine", "serine", "leucine", "methionine", "leucine", "leucine", "arginine", "tyrosine"] assert_equal(ary, @obj.names) end end require 'bio/sequence/aa' class TestSequenceAACompat < Test::Unit::TestCase def test_aa_self_randomize composition = Bio::Sequence::AA.new("WWDTGAK").composition assert(Bio::Sequence::AA.randomize(composition)) end end end bio-1.4.3.0001/test/unit/bio/sequence/test_na.rb0000644000004100000410000002001112200110570021156 0ustar www-datawww-data# # = test/unit/bio/sequence/test_na.rb - Unit test for Bio::Sequencce::NA # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/sequence/na' module Bio class TestSequenceNANew < Test::Unit::TestCase def test_new str = 'atgcatgcatgcatgcaaaa' assert(Bio::Sequence::NA.new(str)) end def test_new_t str = "atgcatgcatgcatgcaaaa" str_t = "atgcatgcat\tgca\ttgcaaaa" assert_equal(str, Bio::Sequence::NA.new(str_t)) end def test_new_n str = "atgcatgcatgcatgcaaaa" str_n = "atgcatgcat\ngca\ntgcaaaa" assert_equal(str, Bio::Sequence::NA.new(str_n)) end def test_new_r str = "atgcatgcatgcatgcaaaa" str_r = "atgcatgcat\n\rgca\n\rtgcaaaa" assert_equal(str, Bio::Sequence::NA.new(str_r)) end end class TestSequenceNA < Test::Unit::TestCase def setup @obj = Bio::Sequence::NA.new('atgcatgcatgcatgcaaaa') end def test_splicing # 'atgcatgcatgcatgcaaaa' # 12345678901234567890 str = 'atgca catgcatg'.gsub(' ','') assert_equal(str, @obj.splicing("join(1..5,8..15)")) end def test_forward_complement str = 'atgcatgcatgcatgcaaaa' str_fcomp = 'tacgtacgtacgtacgtttt' fcomp = @obj.forward_complement assert_equal(str_fcomp, @obj.forward_complement) assert_equal(str, @obj) assert_equal(str_fcomp, @obj.forward_complement!) assert_equal(str_fcomp, @obj) end def test_reverse_complement str = 'atgcatgcatgcatgcaaaa' str_rcomp = 'tacgtacgtacgtacgtttt'.reverse rcomp = @obj.forward_complement assert_equal(str_rcomp, @obj.reverse_complement) assert_equal(str, @obj) assert_equal(str_rcomp, @obj.reverse_complement!) assert_equal(str_rcomp, @obj) end def test_complement assert(@obj.complement) assert(@obj.complement!) end def test_to_s str = 'atgcatgcatgcatgcaaaa' assert_equal(str, @obj.to_s) end def test_codon_usage usage = {"cat"=>1, "caa"=>1, "tgc"=>1, "gca"=>1, "atg"=>2} assert_equal(usage, @obj.codon_usage) end def test_gc_percent assert_equal(40, @obj.gc_percent) @obj[0, 1] = 'g' assert_equal(45, @obj.gc_percent) end def test_gc_content assert_in_delta(0.4, @obj.gc_content, Float::EPSILON) @obj[0, 1] = 'g' assert_in_delta(0.45, @obj.gc_content, Float::EPSILON) end def test_at_content assert_in_delta(0.6, @obj.at_content, Float::EPSILON) @obj[0, 1] = 'g' assert_in_delta(0.55, @obj.at_content, Float::EPSILON) end def test_gc_skew assert_in_delta(0.0, @obj.gc_skew, Float::EPSILON) @obj[0, 1] = 'g' assert_in_delta(1.0/9.0, @obj.gc_skew, Float::EPSILON) @obj.gsub!(/a/, 'c') assert_in_delta(-3.0/8.0, @obj.gc_skew, Float::EPSILON) end def test_at_skew assert_in_delta(1.0/3.0, @obj.at_skew, Float::EPSILON) @obj[0, 1] = 'g' assert_in_delta(3.0/11.0, @obj.at_skew, Float::EPSILON) end def test_iliegal_bases @obj[0, 1] = 'n' @obj[1, 1] = 'y' assert_equal(['n', 'y'], @obj.illegal_bases) end def test_molecular_weight assert_in_delta(6174.3974, @obj.molecular_weight, 1e-4) end def test_to_re assert_equal(/atgcatgcatgcatgcaaaa/, @obj.to_re) @obj[1,1] = 'n' @obj[2,1] = 'r' @obj[3,1] = 's' @obj[4,1] = 'y' @obj[5,1] = 'w' assert_equal(/a[atgcyrwskmbdhvn][agr][gcs][tcy][atw]gcatgcatgcaaaa/, @obj.to_re) end def test_names ary = ["Adenine", "Thymine", "Guanine"] assert_equal(ary , @obj.splice("1..3").names) end def test_dna @obj[0,1] = 'u' assert_equal('utgcatgcatgcatgcaaaa', @obj) assert_equal('ttgcatgcatgcatgcaaaa', @obj.dna) end def test_dna! @obj[0,1] = 'u' assert_equal('utgcatgcatgcatgcaaaa', @obj) @obj.dna! assert_equal('ttgcatgcatgcatgcaaaa', @obj) end def test_rna assert_equal('atgcatgcatgcatgcaaaa', @obj) assert_equal('augcaugcaugcaugcaaaa', @obj.rna) end def test_rna! assert_equal('atgcatgcatgcatgcaaaa', @obj) @obj.rna! assert_equal('augcaugcaugcaugcaaaa', @obj) end end class TestSequenceNACommon < Test::Unit::TestCase def setup @obj = Bio::Sequence::NA.new('atgcatgcatgcatgcaaaa') end def test_to_s assert_equal('atgcatgcatgcatgcaaaa', @obj.to_s) end def test_to_str assert_equal('atgcatgcatgcatgcaaaa', @obj.to_str) end def test_seq str = "atgcatgcatgcatgcaaaa" assert_equal(str, @obj.seq) end # <<(*arg) def test_push str = "atgcatgcatgcatgcaaaaa" assert_equal(str, @obj << "A") end # concat(*arg) def test_concat str = "atgcatgcatgcatgcaaaaa" assert_equal(str, @obj.concat("A")) end # +(*arg) def test_sum str = "atgcatgcatgcatgcaaaaatgcatgcatgcatgcaaaa" assert_equal(str, @obj + @obj) end # window_search(window_size, step_size = 1) def test_window_search @obj.window_search(4) do |subseq| assert_equal(20, @obj.size) end end #total(hash) def test_total hash = {'a' => 1, 'c' => 2, 'g' => 4, 't' => 3} assert_equal(44.0, @obj.total(hash)) end def test_composition composition = {"a"=>8, "c"=>4, "g"=>4, "t"=>4} assert_equal(composition, @obj.composition) end def test_splicing #(position) assert_equal("atgcatgc", @obj.splicing("join(1..4, 13..16)")) end end class TestSequenceNATranslation < Test::Unit::TestCase def setup str = "aaacccgggttttaa" # K>>P>>G>>F>>*>> # N>>P>>G>>F>> # T>>R>>V>>L>> # P>>G>>F>>*>> # "tttgggcccaaaatt" # < # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/reference' require 'bio/compat/references' module Bio class TestReference < Test::Unit::TestCase def setup hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 'title' => "Title of the study.", 'journal' => "Theor. J. Hoge", 'volume' => 12, 'issue' => 3, 'pages' => "123-145", 'year' => 2001, 'pubmed' => 12345678, 'medline' => 98765432, 'abstract' => "Hoge fuga. hoge fuga.", 'url' => "http://example.com", 'mesh' => ['Hoge'], 'affiliations' => ['Tokyo']} @obj = Bio::Reference.new(hash) end def test_authors ary = [ "Hoge, J.P.", "Fuga, F.B." ] assert_equal(ary, @obj.authors) end def test_journal str = 'Theor. J. Hoge' assert_equal(str, @obj.journal) end def test_volume str = 12 assert_equal(str, @obj.volume) end def test_issue str = 3 assert_equal(str, @obj.issue) end def test_pages str = '123-145' assert_equal(str, @obj.pages) end def test_year str = 2001 assert_equal(str, @obj.year) end def test_pubmed str = 12345678 assert_equal(str, @obj.pubmed) end def test_abstract str = 'Hoge fuga. hoge fuga.' assert_equal(str, @obj.abstract) end def test_url str = 'http://example.com' assert_equal(str, @obj.url) end def test_mesh str = ['Hoge'] assert_equal(str, @obj.mesh) end def test_affiliations str = ['Tokyo'] assert_equal(str, @obj.affiliations) end def test_pubmed_url assert_equal("http://www.ncbi.nlm.nih.gov/pubmed/12345678", @obj.pubmed_url) end def test_format_general str = 'Hoge, J.P., Fuga, F.B. (2001). "Title of the study." Theor. J. Hoge 12:123-145.' assert_equal(str, @obj.format) assert_equal(str, @obj.format('general')) assert_equal(str, @obj.general) end def test_format_endnote str = "%0 Journal Article\n%A Hoge, J.P.\n%A Fuga, F.B.\n%D 2001\n%T Title of the study.\n%J Theor. J. Hoge\n%V 12\n%N 3\n%P 123-145\n%M 12345678\n%U http://example.com\n%X Hoge fuga. hoge fuga.\n%K Hoge\n%+ Tokyo" assert_equal(str, @obj.format('endnote')) assert_equal(str, @obj.endnote) end def test_format_bibitem str = "\\bibitem{PMID:12345678}\nHoge, J.P., Fuga, F.B.\nTitle of the study.,\n{\\em Theor. J. Hoge}, 12(3):123--145, 2001." assert_equal(str, @obj.format('bibitem')) assert_equal(str, @obj.bibitem) end def test_format_bibtex str =<<__END__ @article{PMID:12345678, author = {Hoge, J.P. and Fuga, F.B.}, title = {Title of the study.}, journal = {Theor. J. Hoge}, year = {2001}, volume = {12}, number = {3}, pages = {123--145}, url = {http://example.com}, } __END__ assert_equal(str, @obj.format('bibtex')) assert_equal(str, @obj.bibtex) end def test_format_bibtex_with_arguments str =<<__END__ @inproceedings{YourArticle, author = {Hoge, J.P. and Fuga, F.B.}, title = {Title of the study.}, year = {2001}, volume = {12}, number = {3}, pages = {123--145}, booktitle = {Theor. J. Hoge}, month = {December}, } __END__ assert_equal(str, @obj.format('bibtex', 'inproceedings', 'YourArticle', { 'journal' => false, 'url' => false, 'booktitle' => @obj.journal, 'month' => 'December'})) assert_equal(str, @obj.bibtex('inproceedings', 'YourArticle', { 'journal' => false, 'url' => false, 'booktitle' => @obj.journal, 'month' => 'December'})) end def test_format_rd str = "== Title of the study.\n\n* Hoge, J.P. and Fuga, F.B.\n\n* Theor. J. Hoge 2001 12:123-145 [PMID:12345678]\n\nHoge fuga. hoge fuga." assert_equal(str, @obj.format('rd')) assert_equal(str, @obj.rd) end def test_format_nature str = 'Hoge, J.P. & Fuga, F.B. Title of the study. Theor. J. Hoge 12, 123-145 (2001).' assert_equal(str, @obj.format('Nature')) assert_equal(str, @obj.format('nature')) assert_equal(str, @obj.nature) end def test_format_science str = 'J.P. Hoge, F.B. Fuga, Theor. J. Hoge 12 123 (2001).' assert_equal(str, @obj.format('Science')) assert_equal(str, @obj.format('science')) assert_equal(str, @obj.science) end def test_format_genome_biol str = 'Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.' assert_equal(str, @obj.format('genome biol')) assert_equal(str, @obj.genome_biol) end def test_format_genome_res str = "Hoge, J.P. and Fuga, F.B. 2001.\n Title of the study. Theor. J. Hoge 12: 123-145." assert_equal(str, @obj.format('genome res')) assert_equal(str, @obj.genome_res) end def test_format_nar str = 'Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge, 12, 123-145.' assert_equal(str, @obj.format('nar')) assert_equal(str, @obj.nar) end def test_format_current str = 'Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.' assert_equal(str, @obj.format('current biology')) end def test_format_trends str = 'Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge 12, 123-145' assert_equal(str, @obj.trends) end def test_format_cell str = 'Hoge, J.P. and Fuga, F.B. (2001). Title of the study. Theor. J. Hoge 12, 123-145.' assert_equal(str, @obj.format('cell')) end end class TestReference_noURL < Test::Unit::TestCase def setup hash = { 'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 'title' => "Title of the study.", 'journal' => "Theor. J. Hoge", 'volume' => 12, 'issue' => 3, 'pages' => "123-145", 'year' => 2001, 'pubmed' => 12345678, 'medline' => 98765432, 'abstract' => "Hoge fuga. hoge fuga.", 'mesh' => ['Hoge'], 'affiliations' => ['Tokyo'] } @obj = Bio::Reference.new(hash) end def test_url assert_equal(nil, @obj.url) end def test_format_endnote str = "%0 Journal Article\n%A Hoge, J.P.\n%A Fuga, F.B.\n%D 2001\n%T Title of the study.\n%J Theor. J. Hoge\n%V 12\n%N 3\n%P 123-145\n%M 12345678\n%U http://www.ncbi.nlm.nih.gov/pubmed/12345678\n%X Hoge fuga. hoge fuga.\n%K Hoge\n%+ Tokyo" assert_equal(str, @obj.format('endnote')) assert_equal(str, @obj.endnote) end end #class TestReference_noURL class TestReferences < Test::Unit::TestCase class NullStderr def initialize @log = [] end def write(*arg) #p arg @log.push([ :write, *arg ]) nil end def method_missing(*arg) #p arg @log.push arg nil end end #class NullStderr def setup # To suppress warning messages, $stderr is replaced by dummy object. @stderr_orig = $stderr $stderr = NullStderr.new hash = {} ary = [Bio::Reference.new(hash), Bio::Reference.new(hash)] @obj = Bio::References.new(ary) end def teardown # bring back $stderr $stderr = @stderr_orig end def test_append hash = {} ref = Bio::Reference.new(hash) assert(@obj.append(ref)) end def test_each @obj.each do |ref| assert(ref) end end end end bio-1.4.3.0001/test/unit/bio/test_shell.rb0000644000004100000410000000071512200110570020070 0ustar www-datawww-data# # test/unit/bio/test_shell.rb - Unit test for Bio::Shell # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/shell' module Bio end bio-1.4.3.0001/test/unit/bio/io/0000755000004100000410000000000012200110570016001 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/io/test_ensembl.rb0000644000004100000410000000465112200110570021020 0ustar www-datawww-data# # = test/unit/bio/io/test_ensembl.rb - Unit test for Bio::Ensembl. # # Copyright:: Copyright (C) 2006, 2007 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/ensembl' # tests for ensembl.rb,v 1.4 class TestEnsembl_v14 < Test::Unit::TestCase def test_ensembl_url assert_equal('http://www.ensembl.org', Bio::Ensembl::ENSEMBL_URL) end def test_server obj = Bio::Ensembl.new('Homo_sapiens') assert_equal('http://www.ensembl.org', obj.server) end def test_organism organism = 'Homo_sapiens' obj = Bio::Ensembl.new(organism) assert_equal(organism, obj.organism) end def test_self_human organism = 'Homo_sapiens' obj = Bio::Ensembl.human assert_equal(organism, obj.organism) end def test_self_mouse organism = 'Mus_musculus' obj = Bio::Ensembl.mouse assert_equal(organism, obj.organism) end def test_new_with_2_args organism = 'Oryza_sativa' server_url = 'http://www.gramene.org' obj = Bio::Ensembl.new(organism, server_url) assert_equal(organism, obj.organism) assert_equal(server_url, obj.server) end end class TestEnsembl < Test::Unit::TestCase def test_server_name assert_equal('http://www.ensembl.org', Bio::Ensembl::EBIServerURI) end def test_server_uri assert_equal('http://www.ensembl.org', Bio::Ensembl.server_uri) end def test_set_server_uri host = 'http://localhost' Bio::Ensembl.server_uri(host) assert_equal(host, Bio::Ensembl.server_uri) end end class TestEnsemblBase < Test::Unit::TestCase def test_exportview end end class TestEnsemblBaseClient < Test::Unit::TestCase def test_class end end class TestEnsemblHuman < Test::Unit::TestCase def test_organism assert_equal("Homo_sapiens", Bio::Ensembl::Human::Organism) end end class TestEnsemblMouse < Test::Unit::TestCase def test_organism assert_equal("Mus_musculus", Bio::Ensembl::Mouse::Organism) end end class TestEnsemblOldStyleClient < Test::Unit::TestCase class Rice < Bio::Ensembl::Base Organism = 'Oryza_sativa' end def test_organism assert_equal('Oryza_sativa', Rice::Organism) end end bio-1.4.3.0001/test/unit/bio/io/test_soapwsdl.rb0000644000004100000410000000136412200110570021225 0ustar www-datawww-data# # test/unit/bio/io/test_soapwsdl.rb - Unit test for SOAP/WSDL # # Copytight:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/soapwsdl' module Bio class TestSOAPWSDL < Test::Unit::TestCase def setup @obj = Bio::SOAPWSDL end def test_methods methods = ['list_methods','wsdl', 'wsdl=', 'log', 'log='] assert_equal(methods.sort, (@obj.instance_methods - Object.methods).sort.collect { |x| x.to_s }) end end end bio-1.4.3.0001/test/unit/bio/io/test_fastacmd.rb0000644000004100000410000000161612200110570021153 0ustar www-datawww-data# # test/unit/bio/io/test_fastacmd.rb - Unit test for Bio::Blast::Fastacmd. # # Copyright:: Copyright (C) 2006 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/fastacmd' module Bio class TestFastacmd < Test::Unit::TestCase def setup @obj = Bio::Blast::Fastacmd.new('/dev/null') end def test_database assert_equal('/dev/null', @obj.database) end def test_fastacmd assert_equal("fastacmd", @obj.fastacmd) end def test_methods method_list = [ :get_by_id, :fetch, :each_entry, :each ] method_list.each do |method| assert(@obj.respond_to?(method)) end end end end bio-1.4.3.0001/test/unit/bio/io/test_togows.rb0000644000004100000410000001037412200110570020714 0ustar www-datawww-data# # test/unit/bio/io/test_togows.rb - Unit test for Bio::TogoWS # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'uri' require 'net/http' require 'bio/version' require 'bio/io/togows' require 'test/unit' module Bio # unit test for Bio::TogoWS::REST class TestTogoWSREST < Test::Unit::TestCase def setup @togows = Bio::TogoWS::REST.new end def test_debug_default assert_equal(false, @togows.debug) end def test_debug assert_equal(true, @togows.debug = true) assert_equal(true, @togows.debug) assert_equal(false, @togows.debug = false) assert_equal(false, @togows.debug) assert_equal(true, @togows.debug = true) assert_equal(true, @togows.debug) end def test_internal_http assert_kind_of(Net::HTTP, @togows.internal_http) end end #class TestTogoWSREST # unit test for Bio::TogoWS::REST private methods class TestTogoWSRESTprivate < Test::Unit::TestCase def setup @togows = Bio::TogoWS::REST.new end def test_make_path a_and_q = { '/ab/cde/fghi' => [ 'ab', 'cde', 'fghi' ], '/a+b/a%2Bb/a%2Fb/a%26b/a%3Bb/a%2Cb/a%3Bb' => [ 'a b', 'a+b', 'a/b', 'a&b', 'a;b', 'a,b', 'a;b' ] } count = 0 a_and_q.each do |k,v| assert_equal(k, @togows.instance_eval { make_path(v) }) count += 1 end assert_equal(a_and_q.size, count) end def test_prepare_return_value dummyclass = Struct.new(:code, :body) dummy200 = dummyclass.new("200", "this is test") assert_equal("this is test", @togows.instance_eval { prepare_return_value(dummy200) }) dummy404 = dummyclass.new("404", "not found") assert_equal(nil, @togows.instance_eval { prepare_return_value(dummy404) }) end end #class TestTogoWSRESTprivate # unit test for Bio::TogoWS::REST class methods class TestTogoWSRESTclassMethod < Test::Unit::TestCase def test_new assert_instance_of(Bio::TogoWS::REST, Bio::TogoWS::REST.new) end def test_new_with_uri_string t = Bio::TogoWS::REST.new('http://localhost:1234/test') assert_instance_of(Bio::TogoWS::REST, t) http = t.internal_http assert_equal('localhost', http.address) assert_equal(1234, http.port) assert_equal('/test/', t.instance_eval { @pathbase }) end def test_new_with_uri_object u = URI.parse('http://localhost:1234/test') t = Bio::TogoWS::REST.new(u) assert_instance_of(Bio::TogoWS::REST, t) http = t.internal_http assert_equal('localhost', http.address) assert_equal(1234, http.port) assert_equal('/test/', t.instance_eval { @pathbase }) end def test_entry assert_respond_to(Bio::TogoWS::REST, :entry) end def test_search assert_respond_to(Bio::TogoWS::REST, :search) end def test_convert assert_respond_to(Bio::TogoWS::REST, :convert) end def test_retrieve assert_respond_to(Bio::TogoWS::REST, :retrieve) end def test_entry_database_list assert_respond_to(Bio::TogoWS::REST, :entry_database_list) end def test_search_database_list assert_respond_to(Bio::TogoWS::REST, :search_database_list) end end #class TestTogoWSRESTclassMethod # dummy class for testing Bio::TogoWS::AccessWait class DummyAccessWait include Bio::TogoWS::AccessWait end # unit test for Bio::TogoWS::AccessWait (all methods are private) class TestTogoWSAccessWait < Test::Unit::TestCase def setup @obj = DummyAccessWait.new end def test_togows_access_wait assert_kind_of(Numeric, @obj.instance_eval { togows_access_wait }) waits = 0 2.times { waits += @obj.instance_eval { togows_access_wait } } assert(waits > 0) end def test_reset_togows_access_wait assert_nothing_raised { @obj.instance_eval { reset_togows_access_wait } } end end #class TestTogoWSAccessWait end #module Bio bio-1.4.3.0001/test/unit/bio/io/flatfile/0000755000004100000410000000000012200110570017567 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/io/flatfile/test_buffer.rb0000644000004100000410000002652212200110570022433 0ustar www-datawww-data# # = test/unit/bio/io/flatfile/test_buffer.rb - unit test for Bio::FlatFile::BufferedInputStream # # Copyright (C) 2006 Naohisa Goto # # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'stringio' require 'bio/io/flatfile/buffer' module Bio::TestFlatFileBufferedInputStream TestDataPath = BioRubyTestDataPath TestDataFastaFormat01 = File.join(TestDataPath, 'fasta', 'example1.txt') class TestBufferedInputStreamParseFileOpenArg < Test::Unit::TestCase K = Bio::FlatFile::BufferedInputStream def _parse_file_open_mode(mode) K.module_eval { _parse_file_open_mode(mode) } end private :_parse_file_open_mode def _parse_file_open_arg(*arg) K.module_eval { _parse_file_open_arg(*arg) } end private :_parse_file_open_arg def test_parse_file_open_mode_nil assert_equal(nil, _parse_file_open_mode(nil)) end def test_parse_file_open_mode_integer assert_equal({ :fmode_integer => 127 }, _parse_file_open_mode(127)) end def test_parse_file_open_mode_str assert_equal({ :fmode_string => "r+b" }, _parse_file_open_mode("r+b")) end def test_parse_file_open_mode_str_with_ext_enc assert_equal({ :fmode_string => "r+t", :external_encoding => "UTF-8" }, _parse_file_open_mode("r+t:UTF-8")) end def test_parse_file_open_mode_str_with_enc assert_equal({ :fmode_string => "rb", :external_encoding => "EUC-JP", :internal_encoding => "UTF-8" }, _parse_file_open_mode("rb:EUC-JP:UTF-8")) end def test_parse_file_open_arg_nil assert_equal({}, _parse_file_open_arg(nil)) end def test_parse_file_open_arg_integer assert_equal({ :fmode_integer => 127 }, _parse_file_open_arg(127)) end def test_parse_file_open_arg_str assert_equal({ :fmode_string => "r+b" }, _parse_file_open_arg("r+b")) end def test_parse_file_open_arg_str_with_ext_enc assert_equal({ :fmode_string => "r+t", :external_encoding => "UTF-8" }, _parse_file_open_arg("r+t:UTF-8")) end def test_parse_file_open_arg_str_with_enc assert_equal({ :fmode_string => "rb", :external_encoding => "EUC-JP", :internal_encoding => "UTF-8" }, _parse_file_open_arg("rb:EUC-JP:UTF-8")) end def test_parse_file_open_arg_str_perm assert_equal({ :fmode_string => "r+b", :perm => 0644 }, _parse_file_open_arg("r+b", 0644)) end def test_parse_file_open_arg_int_perm assert_equal({ :fmode_integer => 255, :perm => 0755 }, _parse_file_open_arg(255, 0755)) end def test_parse_file_open_arg_int_perm_opt assert_equal({ :fmode_integer => 191, :perm => 0600, :textmode => true, :internal_encoding => "EUC-JP" }, _parse_file_open_arg(191, 0600, :textmode => true, :internal_encoding => "EUC-JP")) end def test_parse_file_open_arg_int_opt assert_equal({ :fmode_integer => 191, :textmode => true, :internal_encoding => "EUC-JP" }, _parse_file_open_arg(191, :textmode => true, :internal_encoding => "EUC-JP")) end def test_parse_file_open_arg_str_perm_opt assert_equal({ :fmode_string => "a", :perm => 0644, :binmode => true, :external_encoding => "UTF-8" }, _parse_file_open_arg("a", 0644, :binmode => true, :external_encoding => "UTF-8")) end def test_parse_file_open_arg_str_opt assert_equal({ :fmode_string => "a", :binmode => true, :external_encoding => "UTF-8" }, _parse_file_open_arg("a", :binmode => true, :external_encoding => "UTF-8")) end def test_parse_file_open_arg_opt assert_equal({ :fmode_string => "r", :binmode => true, :external_encoding => "UTF-8" }, _parse_file_open_arg(:mode => "r", :binmode => true, :external_encoding => "UTF-8")) end def test_parse_file_open_arg_opt_with_integer_mode assert_equal({ :fmode_integer => 123, :perm => 0600, :textmode => true, :external_encoding => "EUC-JP" }, _parse_file_open_arg(:mode => 123, :perm => 0600, :textmode => true, :external_encoding => "EUC-JP")) end end #class TestBufferedInputStreamParseFileOpenArg class TestBufferedInputStreamClassMethod < Test::Unit::TestCase def test_self_for_io io = File.open(TestDataFastaFormat01) obj = Bio::FlatFile::BufferedInputStream.for_io(io) assert_instance_of(Bio::FlatFile::BufferedInputStream, obj) assert_equal(TestDataFastaFormat01, obj.path) end def test_self_open_file obj = Bio::FlatFile::BufferedInputStream.open_file(TestDataFastaFormat01) assert_instance_of(Bio::FlatFile::BufferedInputStream, obj) assert_equal(TestDataFastaFormat01, obj.path) end def test_self_open_file_with_block obj2 = nil Bio::FlatFile::BufferedInputStream.open_file(TestDataFastaFormat01) do |obj| assert_instance_of(Bio::FlatFile::BufferedInputStream, obj) assert_equal(TestDataFastaFormat01, obj.path) obj2 = obj end assert_raise(IOError) { obj2.close } end end #class TestBufferedInputStreamClassMethod class TestBufferedInputStream < Test::Unit::TestCase def setup io = File.open(TestDataFastaFormat01) io.binmode path = TestDataFastaFormat01 @obj = Bio::FlatFile::BufferedInputStream.new(io, path) end def test_to_io assert_kind_of(IO, @obj.to_io) end def test_close assert_nil(@obj.close) end def test_rewind @obj.prefetch_gets @obj.rewind assert_equal('', @obj.prefetch_buffer) end def test_pos @obj.gets @obj.gets @obj.prefetch_gets assert_equal(117, @obj.pos) #the number depends on original data end def test_pos=() str = @obj.gets assert_equal(0, @obj.pos = 0) assert_equal(str, @obj.gets) end def test_eof_false_first assert_equal(false, @obj.eof?) end def test_eof_false_after_prefetch while @obj.prefetch_gets; nil; end assert_equal(false, @obj.eof?) end def test_eof_true while @obj.gets; nil; end assert_equal(true, @obj.eof?) end def test_gets @obj.gets @obj.gets assert_equal("gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc", @obj.gets.chomp) end def test_gets_equal_prefetch_gets @obj.prefetch_gets str = @obj.prefetch_gets @obj.prefetch_gets @obj.gets assert_equal(@obj.gets, str) end def test_gets_rs rs = 'tggtg' str = <<__END_OF_STR__ aggcactagaattgagcagtgaa gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtg __END_OF_STR__ @obj.gets(rs) @obj.gets(rs) assert_equal(str.chomp, @obj.gets(rs)) end def test_gets_rs_equal_prefetch_gets rs = 'tggtg' @obj.prefetch_gets(rs) str = @obj.prefetch_gets(rs) @obj.prefetch_gets(rs) @obj.gets(rs) assert_equal(@obj.gets(rs), str) end def test_gets_rs_within_buffer rs = 'tggtg' a = [] 20.times {a.push @obj.gets } @obj.ungets(a.join('')) assert_equal(">At1g02580 mRNA (2291 bp) UTR's and CDS\naggcgagtggttaatggagaaggaaaaccatgaggacgatggtg", @obj.gets(rs)) assert_equal('ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtg', @obj.gets(rs).split(/\n/)[-1]) assert_equal('aggcactagaattgagcagtgaa', @obj.gets(rs).split(/\n/)[0]) assert_equal('aggcttct', @obj.gets(rs).split(/\n/)[0]) assert_equal('agacacc', @obj.gets(rs).split(/\n/)[0]) end def test_gets_paragraph_mode @obj.gets('') @obj.gets('') assert_equal('>At1g65300: mRNA 837bp (shortened at end)', @obj.gets('').split(/\n/)[0]) end def test_gets_paragraph_mode_equal_prefetch_gets rs = '' @obj.prefetch_gets(rs) str = @obj.prefetch_gets(rs) @obj.prefetch_gets(rs) @obj.gets(rs) assert_equal(@obj.gets(rs), str) end def test_gets_paragraph_mode_within_buffer @obj.gets('') a = [] 20.times {a.push @obj.gets } @obj.ungets(a.join('')) assert_equal('>At1g65300: mRNA 837bp', @obj.gets('').split(/\n/)[0]) assert_equal('>At1g65300: mRNA 837bp (shortened at end)', @obj.gets('').split(/\n/)[0]) assert_equal('>At1g65300: mRNA 837bp (shortened from start)', @obj.gets('').split(/\n/)[0]) end def test_ungets @obj.gets @obj.gets str1 = @obj.gets str2 = @obj.gets assert_nil(@obj.ungets(str2)) assert_nil(@obj.ungets(str1)) assert_equal(str1, @obj.gets) assert_equal(str2, @obj.gets) end def test_getc assert_equal(?>, @obj.getc) end def test_getc_after_prefetch @obj.prefetch_gets assert_equal(?>, @obj.getc) end def test_ungetc c = @obj.getc assert_nil(@obj.ungetc(c)) assert_equal(c, @obj.getc) end def test_ungetc_after_prefetch str = @obj.prefetch_gets c = @obj.getc assert_nil(@obj.ungetc(c)) assert_equal(str, @obj.gets) end def test_prefetch_buffer str = @obj.prefetch_gets str += @obj.prefetch_gets assert_equal(str, @obj.prefetch_buffer) end def test_prefetch_gets @obj.prefetch_gets @obj.prefetch_gets @obj.gets str = @obj.prefetch_gets @obj.gets assert_equal(str, @obj.gets) end def test_prefetch_gets_with_arg # test @obj.gets str = @obj.prefetch_gets("\n>") assert_equal(str, @obj.gets("\n>")) # test using IO object io = @obj.to_io io.rewind assert_equal(str, io.gets("\n>")) end def test_skip_spaces @obj.gets('CDS') assert_nil(@obj.skip_spaces) assert_equal(?a, @obj.getc) end end #class TestBufferedInputStream end #module Bio::TestFlatFile bio-1.4.3.0001/test/unit/bio/io/flatfile/test_autodetection.rb0000644000004100000410000002451612200110570024032 0ustar www-datawww-data# # = test/unit/bio/io/flatfile/test_autodetection.rb - unit test for Bio::FlatFile::AutoDetect # # Copyright (C) 2006 Naohisa Goto # # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio' module Bio::TestFlatFile # testing default AutoDetect's behavior class TestDefaultAutoDetect < Test::Unit::TestCase TestDataPath = BioRubyTestDataPath def setup @ad = Bio::FlatFile::AutoDetect.default end def test_genbank # modified from GenBank AB009803.1 # (E-mail and telephone/FAX numbers are removed from original entry) text = <<__END_OF_TEXT__ LOCUS AB009803 81 bp DNA linear PRI 14-APR-2000 DEFINITION Homo sapiens gene for osteonidogen, intron 4. ACCESSION AB009803 VERSION AB009803.1 GI:2749808 KEYWORDS osteonidogen. SOURCE Homo sapiens (human) ORGANISM Homo sapiens Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo. REFERENCE 1 AUTHORS Ohno,I., Okubo,K. and Matsubara,K. TITLE Human osteonidogen gene: intron-exon junctions and chromosomal localization JOURNAL Published Only in Database (1998) REFERENCE 2 (bases 1 to 81) AUTHORS Ohno,I., Okubo,K. and Matsubara,K. TITLE Direct Submission JOURNAL Submitted (13-DEC-1997) Ikko Ohno, Institute for Molecular and Cellular Biology, Osaka University, Molecular Genetics; 1-3 Yamada-oka, Suita, Osaka 565, Japan FEATURES Location/Qualifiers source 1..81 /organism="Homo sapiens" /mol_type="genomic DNA" /db_xref="taxon:9606" /chromosome="14" /map="14q21-22" /clone_lib="Lambda FIX II STRATAGENE" intron 1..81 /number=4 ORIGIN 1 gtaggatctc ccctccagat tctgatctgt cctccccctt gcatccaaca cctacttatt 61 ggccattcta tcctgaaaca g // __END_OF_TEXT__ assert_equal(Bio::GenBank, @ad.autodetect(text)) end def test_genpept # modified from: NCBI: P04637.2 GI:129369 # (to shorten data, many elements are omitted) text = <<__END_OF_TEXT__ LOCUS P04637 393 aa linear PRI 01-JUL-2008 DEFINITION Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein p53) (Antigen NY-CO-13). ACCESSION P04637 VERSION P04637.2 GI:129369 KEYWORDS 3D-structure; Acetylation; Activator; Alternative splicing; Anti-oncogene; Apoptosis; Cell cycle; Covalent protein-RNA linkage; Cytoplasm; Disease mutation; DNA-binding; Endoplasmic reticulum; Glycoprotein; Host-virus interaction; Li-Fraumeni syndrome; Metal-binding; Methylation; Nucleus; Phosphoprotein; Polymorphism; Transcription; Transcription regulation; Ubl conjugation; Zinc. SOURCE Homo sapiens (human) ORGANISM Homo sapiens Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo. REFERENCE 1 (residues 1 to 393) AUTHORS Zakut-Houri,R., Bienz-Tadmor,B., Givol,D. and Oren,M. TITLE Human p53 cellular tumor antigen: cDNA sequence and expression in COS cells JOURNAL EMBO J. 4 (5), 1251-1255 (1985) PUBMED 4006916 REMARK NUCLEOTIDE SEQUENCE [MRNA]. FEATURES Location/Qualifiers source 1..393 /organism="Homo sapiens" /db_xref="taxon:9606" gene 1..393 /gene="TP53" /note="synonym: P53" Protein 1..393 /gene="TP53" /product="Cellular tumor antigen p53" ORIGIN 1 meepqsdpsv epplsqetfs dlwkllpenn vlsplpsqam ddlmlspddi eqwftedpgp 61 deaprmpeaa ppvapapaap tpaapapaps wplsssvpsq ktyqgsygfr lgflhsgtak 121 svtctyspal nkmfcqlakt cpvqlwvdst pppgtrvram aiykqsqhmt evvrrcphhe 181 rcsdsdglap pqhlirvegn lrveylddrn tfrhsvvvpy eppevgsdct tihynymcns 241 scmggmnrrp iltiitleds sgnllgrnsf evrvcacpgr drrteeenlr kkgephhelp 301 pgstkralpn ntssspqpkk kpldgeyftl qirgrerfem frelnealel kdaqagkepg 361 gsrahsshlk skkgqstsrh kklmfktegp dsd // __END_OF_TEXT__ assert_equal(Bio::GenPept, @ad.autodetect(text)) end def test_medline # PMID: 13054692 text = <<__END_OF_TEXT__ PMID- 13054692 OWN - NLM STAT- MEDLINE DA - 19531201 DCOM- 20030501 LR - 20061115 PUBM- Print IS - 0028-0836 (Print) VI - 171 IP - 4356 DP - 1953 Apr 25 TI - Molecular structure of nucleic acids; a structure for deoxyribose nucleic acid. PG - 737-8 FAU - WATSON, J D AU - WATSON JD FAU - CRICK, F H AU - CRICK FH LA - eng PT - Journal Article PL - Not Available TA - Nature JT - Nature JID - 0410462 RN - 0 (Nucleic Acids) SB - OM MH - *Nucleic Acids OID - CLML: 5324:25254:447 OTO - NLM OT - *NUCLEIC ACIDS EDAT- 1953/04/25 MHDA- 1953/04/25 00:01 PST - ppublish SO - Nature. 1953 Apr 25;171(4356):737-8. __END_OF_TEXT__ assert_equal(Bio::MEDLINE, @ad.autodetect(text)) end def test_embl_oldrelease fn = File.join(TestDataPath, 'embl', 'AB090716.embl') text = File.read(fn) assert_equal(Bio::EMBL, @ad.autodetect(text)) end def test_embl fn = File.join(TestDataPath, 'embl', 'AB090716.embl.rel89') text = File.read(fn) assert_equal(Bio::EMBL, @ad.autodetect(text)) end def test_sptr fn = File.join(TestDataPath, 'uniprot', 'p53_human.uniprot') text = File.read(fn) assert_equal(Bio::SPTR, @ad.autodetect(text)) end def test_prosite fn = File.join(TestDataPath, 'prosite', 'prosite.dat') text = File.read(fn) assert_equal(Bio::PROSITE, @ad.autodetect(text)) end def test_transfac # Dummy data; Generated from random data text = <<__END_OF_TEXT__ AC M99999 XX ID V$XXXX_99 XX DT 13.01.98 (created); ewi. DT 31.12.99 (updated); ewi. XX NA XXXX XX DE example gene protein XX BF T99998; XXXX; Species: human, Homo sapiens. BF T99999; XXXX; Species: mouse, Mus musculus. XX P0 A C G T 01 1 2 2 2 N 02 0 2 2 3 N 03 1 1 5 0 G 04 3 1 1 2 N 05 7 0 0 0 A 06 2 0 1 4 W 07 0 1 6 0 G 08 0 3 0 4 Y 09 6 1 0 0 A 10 1 1 0 5 T XX BA 7 functional elements in 3 genes XX CC compiled sequences XX RN [1] RA Anonymou S., Whoam I. RT Example article title for XXXX RL J. Example. 99:990-999 (1999). __END_OF_TEXT__ assert_equal(Bio::TRANSFAC, @ad.autodetect(text)) end def test_aaindex1 fn = File.join(TestDataPath, 'aaindex', 'PRAM900102') text = File.read(fn) assert_equal(Bio::AAindex1, @ad.autodetect(text)) end def test_aaindex2 fn = File.join(TestDataPath, 'aaindex', 'DAYM780301') text = File.read(fn) assert_equal(Bio::AAindex2, @ad.autodetect(text)) end # def test_litdb # end # def test_brite # end # def test_orthology # end # def test_drug # end # def test_glycan # end # def test_enzyme # end # def test_compound # end # def test_reaction # end # def test_genes # end # def test_genome # end def test_maxml_cluster # dummy empty data text = <<__END_OF_TEXT__ __END_OF_TEXT__ assert_equal(Bio::FANTOM::MaXML::Cluster, @ad.autodetect(text)) end def test_maxml_sequence # dummy empty data text = <<__END_OF_TEXT__ __END_OF_TEXT__ assert_equal(Bio::FANTOM::MaXML::Sequence, @ad.autodetect(text)) end # def test_pdb # end # def test_chemicalcomponent # end # def test_clustal # end # def test_gcg_msf # end # def test_gcg_seq # end def test_blastxml fn = File.join(TestDataPath, 'blast', '2.2.15.blastp.m7') text = File.read(fn) assert_equal(Bio::Blast::Report, @ad.autodetect(text)) end # def test_wublast # end # def test_wutblast # end def test_blast fn = File.join(TestDataPath, 'blast', 'b0002.faa.m0') text = File.read(fn) assert_equal(Bio::Blast::Default::Report, @ad.autodetect(text)) end # def test_tblast # end # def test_blat # end # def test_spidey # end def test_hmmer fn = File.join(TestDataPath, 'HMMER', 'hmmpfam.out') text = File.read(fn) assert_equal(Bio::HMMER::Report, @ad.autodetect(text)) fn = File.join(TestDataPath, 'HMMER', 'hmmsearch.out') text = File.read(fn) assert_equal(Bio::HMMER::Report, @ad.autodetect(text)) end # def test_sim4 # end def test_fastq fn = File.join(TestDataPath, 'fastq', 'longreads_original_sanger.fastq') text = File.read(fn, length=300) assert_equal(Bio::Fastq, @ad.autodetect(text)) end def test_fastaformat fn = File.join(TestDataPath, 'fasta', 'example1.txt') text = File.read(fn) assert_equal(Bio::FastaFormat, @ad.autodetect(text)) fn = File.join(TestDataPath, 'fasta', 'example2.txt') text = File.read(fn) assert_equal(Bio::FastaFormat, @ad.autodetect(text)) end def test_fastanumericformat text = <<__END_OF_TEXT__ >sample 30 21 16 11 8 6 3 34 28 34 28 28 35 28 28 37 33 15 27 28 28 27 37 33 17 27 27 28 28 33 26 33 26 28 27 37 33 15 27 26 27 28 37 33 16 34 26 27 33 26 28 33 25 28 28 38 34 23 13 2 __END_OF_TEXT__ assert_equal(Bio::FastaNumericFormat, @ad.autodetect(text)) end end #class TestDefaultAutoDetect end #module Bio::TestFlatFile bio-1.4.3.0001/test/unit/bio/io/flatfile/test_splitter.rb0000644000004100000410000002247612200110570023034 0ustar www-datawww-data# # = test/unit/bio/io/flatfile/test_splitter.rb - unit test for Bio::FlatFile::Splitter # # Copyright (C) 2008 Naohisa Goto # # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'stringio' require 'bio/io/flatfile/splitter' require 'bio/io/flatfile/buffer' module Bio::TestFlatFileSplitter class TestDataClass # Fake fasta format DELIMITER = RS = "\n>" DELIMITER_OVERRUN = 1 # '>'.length FLATFILE_HEADER = '>' def initialize(str) @str = str end attr_reader :str protected :str def ==(other) self.str == other.str end end #class TestDataClass TestData01 = <<__END_OF_TESTDATA__ # This is test >test1 aaagggtttcccaaagggtttccc >testC cccccccccccccccccccccccc >testG gggggggggggggggggggggggg >test2 tttttttttttttttttttttttt tttttttttttttttttttttttt >test3 atatatatatatatatatatatat __END_OF_TESTDATA__ TestData01.chomp! # workaround for Windows TestData01.gsub!(/\r\n/, "\n") class TestTemplate < Test::Unit::TestCase def setup @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData01), 'TestData01') @obj = Bio::FlatFile::Splitter::Template.new(TestDataClass, @stream) end def test_skip_leader assert_raise(NotImplementedError) { @obj.skip_leader } end def test_get_entry assert_raise(NotImplementedError) { @obj.get_entry } end def test_entry assert_nothing_raised { @obj.instance_eval { self.entry = 'test' } } assert_equal('test', @obj.entry) end def test_entry_pos_flag # default is nil or false assert(!@obj.entry_pos_flag) # set a value assert_equal(true, @obj.entry_pos_flag = true) assert_equal(true, @obj.entry_pos_flag) end def test_entry_start_pos assert_nothing_raised { @obj.instance_eval { self.entry_start_pos = 123 } } assert_equal(123, @obj.entry_start_pos) end def test_entry_ended_pos assert_nothing_raised { @obj.instance_eval { self.entry_ended_pos = 456 } } assert_equal(456, @obj.entry_ended_pos) end def test_stream assert_equal(@stream, @obj.instance_eval { stream }) end def test_dbclass assert_equal(TestDataClass, @obj.instance_eval { dbclass }) end def test_stream_pos assert_nil(@obj.instance_eval { stream_pos }) @obj.entry_pos_flag = true assert_equal(0, @obj.instance_eval { stream_pos }) @stream.gets assert_not_equal(0, @obj.instance_eval { stream.pos }) end def test_rewind @obj.entry_pos_flag = true @stream.gets assert_not_equal(0, @stream.pos) @obj.rewind assert_equal(0, @stream.pos) end end #class TestTemplate class TestDefault < TestTemplate # < Test::Unit::TestCase def setup @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData01), 'TestData01') @obj = Bio::FlatFile::Splitter::Default.new(TestDataClass, @stream) end def test_delimiter assert_equal("\n>", @obj.delimiter) end def test_header assert_equal('>', @obj.header) end def test_delimiter_overrun assert_equal(1, @obj.delimiter_overrun) end def test_skip_leader assert_nothing_raised { @obj.skip_leader } assert(@stream.pos > 0) assert_equal('>test1', @stream.gets.chomp) end def test_skip_leader_without_header @obj.header = nil assert_nothing_raised { @obj.skip_leader } assert(@stream.pos > 0) assert_equal('# This is test', @stream.gets.chomp) end def test_get_entry str0 = "\n # This is test\n\n" str1 = ">test1\naaagggtttcccaaagggtttccc\n" str2 = ">testC\ncccccccccccccccccccccccc\n" str3 = ">testG\ngggggggggggggggggggggggg\n" str4 = ">test2\ntttttttttttttttttttttttt\ntttttttttttttttttttttttt\n\n" str5 = ">test3\natatatatatatatatatatatat" assert_equal(str0, @obj.get_entry) assert_equal(str1, @obj.get_entry) assert_equal(str2, @obj.get_entry) assert_equal(str3, @obj.get_entry) assert_equal(str4, @obj.get_entry) assert_equal(str5, @obj.get_entry) assert(@stream.eof?) end def test_get_parsed_entry str1 = ">test1\naaagggtttcccaaagggtttccc\n" str2 = ">testC\ncccccccccccccccccccccccc\n" str3 = ">testG\ngggggggggggggggggggggggg\n" str4 = ">test2\ntttttttttttttttttttttttt\ntttttttttttttttttttttttt\n\n" str5 = ">test3\natatatatatatatatatatatat" @obj.skip_leader assert_equal(TestDataClass.new(str1), @obj.get_parsed_entry) assert_equal(TestDataClass.new(str2), @obj.get_parsed_entry) assert_equal(TestDataClass.new(str3), @obj.get_parsed_entry) assert_equal(TestDataClass.new(str4), @obj.get_parsed_entry) assert_equal(TestDataClass.new(str5), @obj.get_parsed_entry) assert(@stream.eof?) end def test_entry str1 = ">test1\naaagggtttcccaaagggtttccc\n" @obj.skip_leader @obj.get_entry assert_equal(str1, @obj.entry) end def test_entry_start_pos_default_nil @obj.skip_leader @obj.get_entry assert_nil(@obj.entry_start_pos) end def test_entry_ended_pos_default_nil @obj.skip_leader @obj.get_entry assert_nil(@obj.entry_ended_pos) end def test_entry_start_pos @obj.entry_pos_flag = true @obj.skip_leader @obj.get_entry assert_equal(25, @obj.entry_start_pos) end def test_entry_ended_pos @obj.entry_pos_flag = true @obj.skip_leader @obj.get_entry assert_equal(57, @obj.entry_ended_pos) end end #class TestDefault class TestLineOriented < TestTemplate # < Test::Unit::TestCase testdata02 = <<__END_OF_DATA__ #this is header line 1 #this is header line 2 test01 1 2 3 test02 4 5 6 test02 7 8 9 test02 10 11 12 test03 13 14 15 test03 16 17 18 __END_OF_DATA__ TestData02 = testdata02.gsub(/\r\n/, "\n") class TestData02Class FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented LineData = Struct.new(:name, :data) def initialize(str = '') @headers = [] @lines = [] flag_header = true str.each_line do |line| if flag_header then flag_header = add_header_line(line) end unless flag_header then r = add_line(line) end end end attr_reader :headers attr_reader :lines def ==(other) self.headers == other.headers and self.lines == other.lines ? true : false end def add_header_line(line) #puts "add_header_line: #{@headers.inspect} #{line.inspect}" case line when /\A\#/ @headers.push line return self else return false end end def add_line(line) #puts "add_line: #{@lines.inspect} #{line.inspect}" if /\A\s*\z/ =~ line then return @lines.empty? ? self : false end parsed = parse_line(line) if @lines.empty? or @lines.first.name == parsed.name then @lines.push parsed return self else return false end end def parse_line(line) LineData.new(*(line.chomp.split(/\s+/, 2))) end private :parse_line end #class TestData02Class def setup @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData02), 'TestData02') @obj = Bio::FlatFile::Splitter::LineOriented.new(TestData02Class, @stream) @raw_entries = [ "#this is header line 1\n#this is header line 2\ntest01 1 2 3\n", "test02 4 5 6\ntest02 7 8 9\ntest02 10 11 12\n", "test03 13 14 15\n", "\ntest03 16 17 18\n", ] @entries = @raw_entries.collect do |str| TestData02Class.new(str) end end def test_get_parsed_entry @entries.each do |ent| assert_equal(ent, @obj.get_parsed_entry) end assert_nil(@obj.get_parsed_entry) end def test_get_entry @raw_entries.each do |raw| assert_equal(raw, @obj.get_entry) end assert_nil(@obj.get_entry) end def test_rewind while @obj.get_parsed_entry; end assert_equal(0, @obj.rewind) end def test_flag_to_fetch_header assert(@obj.instance_eval { flag_to_fetch_header }) @obj.get_parsed_entry assert(!@obj.instance_eval { flag_to_fetch_header }) @obj.rewind assert(@obj.instance_eval { flag_to_fetch_header }) end def test_skip_leader assert_nil(@obj.skip_leader) end def test_dbclass assert_equal(TestData02Class, @obj.instance_eval { dbclass }) end def test_entry_start_pos @obj.entry_pos_flag = true @obj.skip_leader @obj.get_entry assert_equal(0, @obj.entry_start_pos) @obj.get_entry assert_equal(59, @obj.entry_start_pos) end def test_entry_ended_pos @obj.entry_pos_flag = true @obj.skip_leader @obj.get_entry assert_equal(59, @obj.entry_ended_pos) @obj.get_entry assert_equal(101, @obj.entry_ended_pos) end end #class TestLineOriented end bio-1.4.3.0001/test/unit/bio/io/test_ddbjxml.rb0000644000004100000410000000446112200110570021016 0ustar www-datawww-data# # test/unit/bio/io/test_ddbjxml.rb - Unit test for DDBJ XML. # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/ddbjxml' module Bio class TestDDBJXMLConstants < Test::Unit::TestCase def test_constants constants = ["DDBJ", "TxSearch", "ClustalW", "PML", "Gib", "Fasta", "BASE_URI", "SRS", "SERVER_URI", "Gtop", "GetEntry", "Blast", "RequestManager"].sort assert_equal(constants, Bio::DDBJ::XML.constants.sort.collect{|x| x.to_s}) end def test_base_url assert_equal("http://xml.nig.ac.jp/wsdl/", Bio::DDBJ::XML::BASE_URI) end def test_blast_server_rul assert_equal("http://xml.nig.ac.jp/wsdl/Blast.wsdl", Bio::DDBJ::XML::Blast::SERVER_URI) end def test_clustalw_server_url assert_equal("http://xml.nig.ac.jp/wsdl/ClustalW.wsdl", Bio::DDBJ::XML::ClustalW::SERVER_URI) end def test_ddbj_server_url assert_equal("http://xml.nig.ac.jp/wsdl/DDBJ.wsdl", Bio::DDBJ::XML::DDBJ::SERVER_URI) end def test_fasta_server_url assert_equal("http://xml.nig.ac.jp/wsdl/Fasta.wsdl", Bio::DDBJ::XML::Fasta::SERVER_URI) end def test_getentry_server_url assert_equal("http://xml.nig.ac.jp/wsdl/GetEntry.wsdl", Bio::DDBJ::XML::GetEntry::SERVER_URI) end def test_gib_server_url assert_equal("http://xml.nig.ac.jp/wsdl/Gib.wsdl", Bio::DDBJ::XML::Gib::SERVER_URI) end def test_gtop_server_url assert_equal("http://xml.nig.ac.jp/wsdl/Gtop.wsdl", Bio::DDBJ::XML::Gtop::SERVER_URI) end def test_pml_server_url assert_equal("http://xml.nig.ac.jp/wsdl/PML.wsdl", Bio::DDBJ::XML::PML::SERVER_URI) end def test_srs_server_url assert_equal("http://xml.nig.ac.jp/wsdl/SRS.wsdl", Bio::DDBJ::XML::SRS::SERVER_URI) end def test_txsearch_server_url assert_equal("http://xml.nig.ac.jp/wsdl/TxSearch.wsdl", Bio::DDBJ::XML::TxSearch::SERVER_URI) end def test_requestmanager_server_url assert_equal("http://xml.nig.ac.jp/wsdl/RequestManager.wsdl", Bio::DDBJ::XML::RequestManager::SERVER_URI) end end end bio-1.4.3.0001/test/unit/bio/io/test_flatfile.rb0000644000004100000410000003166612200110570021167 0ustar www-datawww-data# # = test/unit/bio/io/test_flatfile.rb - unit test for Bio::FlatFile # # Copyright (C) 2006 Naohisa Goto # # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio' require 'stringio' module Bio module TestFlatFile TestDataPath = BioRubyTestDataPath TestData01 = File.join(TestDataPath, 'fasta', 'example1.txt') TestData01Ent1def = "At1g02580 mRNA (2291 bp) UTR's and CDS" TestData01Ent4def = 'At1g65300: mRNA 837bp (shortened from start)' TestData01Ent4naseq = Bio::Sequence::NA.new <<__END_OF_SEQ__ ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta g __END_OF_SEQ__ # test Bio::FlatFile class method class TestFlatFileClassMethod < Test::Unit::TestCase def setup @filename = TestData01 @klass = Bio::FastaFormat end # test template for Bio::FlatFile.open def open_TestData01(*arg) assert_instance_of(Bio::FlatFile, ff = Bio::FlatFile.open(*arg)) assert_equal(@klass, ff.dbclass) assert_nil(ff.close) end private :open_TestData01 # test template for Bio::FlatFile.open with block def open_with_block_TestData01(*arg) ret = Bio::FlatFile.open(*arg) do |ff| assert_instance_of(Bio::FlatFile, ff) assert_equal(@klass, ff.dbclass) ff.each do |e| assert_instance_of(@klass, e) assert_instance_of(String, ff.entry_raw) end 'test return value' end assert_equal('test return value', ret) end private :open_with_block_TestData01 def test_open_0arg assert_raise(ArgumentError) { Bio::FlatFile.open } end def test_open_1arg_nil assert_raise(ArgumentError) { Bio::FlatFile.open(nil) } end def test_open_1arg_class assert_raise(ArgumentError) { Bio::FlatFile.open(Bio::GenBank) } end def test_open_1arg_filename open_TestData01(@filename) end def test_open_1arg_io io = File.open(@filename) open_TestData01(io) assert(io.closed?) end def test_open_1arg_with_block open_with_block_TestData01(@filename) end def test_open_1arg_io_with_block io = File.open(@filename) open_with_block_TestData01(io) # When IO object is given, the IO is NOT automatically closed. assert_equal(false, io.closed?) assert_nothing_raised { io.close } end def test_open_2arg_autodetect open_TestData01(nil, @filename) end def test_open_2arg_autodetect_with_block open_with_block_TestData01(nil, @filename) end def test_open_2arg_autodetect_io io = File.open(@filename) open_TestData01(nil, io) assert(io.closed?) end def test_open_2arg_autodetect_io_with_block io = File.open(@filename) open_with_block_TestData01(nil, io) # When IO object is given, the IO is NOT automatically closed. assert_equal(false, io.closed?) assert_nothing_raised { io.close } end def test_open_2arg_class open_TestData01(@klass, @filename) end def test_open_2arg_class_with_block open_with_block_TestData01(@klass, @filename) end def test_open_2arg_class_io io = File.open(@filename) open_TestData01(@klass, io) assert(io.closed?) end def test_open_2arg_class_io_with_block io = File.open(@filename) open_with_block_TestData01(@klass, io) # When IO object is given, the IO is NOT automatically closed. assert_equal(false, io.closed?) assert_nothing_raised { io.close } end def test_open_2arg_filename_mode open_TestData01(@filename, 'r') end def test_open_2arg_filename_mode_with_block open_with_block_TestData01(@filename, 'r') end def test_open_3arg open_TestData01(nil, @filename, 'r') open_TestData01(@klass, @filename, 'r') open_TestData01(@filename, File::RDONLY, 0) end def test_open_3arg_with_block open_with_block_TestData01(nil, @filename, 'r') open_with_block_TestData01(@klass, @filename, 'r') open_with_block_TestData01(@filename, File::RDONLY, 0) end def test_open_4arg open_TestData01(nil, @filename, File::RDONLY, 0) open_TestData01(Bio::FastaFormat, @filename, File::RDONLY, 0) open_with_block_TestData01(nil, @filename, File::RDONLY, 0) open_with_block_TestData01(Bio::FastaFormat, @filename, File::RDONLY, 0) end # test template for Bio::FlatFile.auto def auto_TestData01(*arg) assert_instance_of(Bio::FlatFile, ff = Bio::FlatFile.auto(*arg)) assert_equal(@klass, ff.dbclass) assert_nil(ff.close) end private :auto_TestData01 # test template for Bio::FlatFile.auto with block def auto_with_block_TestData01(*arg) ret = Bio::FlatFile.auto(*arg) do |ff| assert_instance_of(Bio::FlatFile, ff) assert_equal(@klass, ff.dbclass) ff.each do |e| assert_instance_of(@klass, e) assert_instance_of(String, ff.entry_raw) end 'test return value' end assert_equal('test return value', ret) end private :auto_with_block_TestData01 def test_auto_0arg assert_raise(ArgumentError) { Bio::FlatFile.auto } end def test_auto_1arg_filename auto_TestData01(@filename) end def test_auto_1arg_io io = File.open(@filename) auto_TestData01(io) assert(io.closed?) end def test_auto_1arg_with_block auto_with_block_TestData01(@filename) end def test_auto_1arg_io_with_block io = File.open(@filename) auto_with_block_TestData01(io) # When IO object is given, the IO is NOT automatically closed. assert_equal(false, io.closed?) assert_nothing_raised { io.close } end def test_auto_2arg_filename_mode auto_TestData01(@filename, 'r') end def test_auto_2arg_filename_mode_with_block auto_with_block_TestData01(@filename, 'r') end def test_auto_3arg auto_TestData01(@filename, File::RDONLY, 0) end def test_auto_3arg_with_block auto_with_block_TestData01(@filename, File::RDONLY, 0) end def test_to_a assert_instance_of(Array, a = Bio::FlatFile.to_a(@filename)) assert_equal(5, a.size) assert_instance_of(Bio::FastaFormat, a[3]) assert_equal(TestData01Ent4def, a[3].definition) assert_equal(TestData01Ent4naseq, a[3].naseq) end def test_foreach Bio::FlatFile.foreach(@filename) do |ent| assert_instance_of(Bio::FastaFormat, ent) end end def test_new_2arg_nil io = File.open(@filename) assert_instance_of(Bio::FlatFile, ff = Bio::FlatFile.new(nil, io)) assert_equal(@klass, ff.dbclass) assert_nil(ff.close) end def test_new_2arg_class io = File.open(@filename) assert_instance_of(Bio::FlatFile, ff = Bio::FlatFile.new(@klass, io)) assert_equal(@klass, ff.dbclass) assert_nil(ff.close) end end #class TestFlatFileClassMethod # test Bio::FlatFile instance methods class TestFlatFileFastaFormat < Test::Unit::TestCase def setup @klass = Bio::FastaFormat @filename = TestData01 @ff = Bio::FlatFile.open(@klass, @filename) end def test_to_io assert_instance_of(File, @ff.to_io) end def test_path assert_equal(@filename, @ff.path) end def test_next_entry assert_instance_of(@klass, ent = @ff.next_entry) assert_equal(TestData01Ent1def, ent.definition) assert_instance_of(@klass, ent = @ff.next_entry) assert_instance_of(@klass, ent = @ff.next_entry) assert_instance_of(@klass, ent = @ff.next_entry) assert_equal(TestData01Ent4def, ent.definition) assert_equal(TestData01Ent4naseq, ent.naseq) end def test_entry_raw 4.times { @ff.next_entry } assert_instance_of(String, str = @ff.entry_raw) assert_equal(TestData01Ent4def, @klass.new(str).definition) assert_equal(TestData01Ent4naseq, @klass.new(str).naseq) end def test_entry_pos_flag # default is nil assert_equal(nil, @ff.entry_pos_flag) # set as true assert_equal(true, @ff.entry_pos_flag = true) assert_equal(true, @ff.entry_pos_flag) end def test_start_pos_ended_pos_not_recorded # default is nil assert_equal(nil, @ff.entry_start_pos) # @ff.entry_pos_flag = false @ff.next_entry # nil if not recorded assert_equal(nil, @ff.entry_start_pos) assert_equal(nil, @ff.entry_ended_pos) @ff.next_entry # nil if not recorded assert_equal(nil, @ff.entry_start_pos) assert_equal(nil, @ff.entry_ended_pos) end def test_start_pos @ff.entry_pos_flag = true @ff.next_entry assert_equal(0, @ff.entry_start_pos) @ff.next_entry # On Windows, the values might be different. assert_equal(2367, @ff.entry_start_pos) end def test_ended_pos @ff.entry_pos_flag = true @ff.next_entry # On Windows, the values might be different. assert_equal(2367, @ff.entry_ended_pos) @ff.next_entry # On Windows, the values might be different. assert_equal(3244, @ff.entry_ended_pos) end def test_each_entry i = 0 @ff.each_entry do |ent| assert_instance_of(@klass, ent) i += 1 if i == 4 then assert_equal(TestData01Ent4def, ent.definition) assert_equal(TestData01Ent4naseq, ent.naseq) end end end # each is an alias of each_entry def test_each assert_nothing_raised { @ff.each {} } end def test_rewind @ff.next_entry assert_not_equal(0, @ff.pos) assert_equal(0, @ff.rewind) assert_equal(0, @ff.pos) end def test_close assert_nil(@ff.close) end def test_pos assert_equal(0, @ff.pos) @ff.next_entry assert_not_equal(0, @ff.pos) end def test_eof? 5.times { @ff.next_entry } assert_equal(true, @ff.eof?) end def test_raw # default false assert_equal(false, @ff.raw) # changes to true assert_equal(true, @ff.raw = true) @ff.each do |ent| assert_instance_of(String, ent) end end def test_dbclass assert_equal(@klass, @ff.dbclass) end def test_dbclass_eq klass = Bio::FastaNumericFormat assert_equal(klass, @ff.dbclass = klass) assert_equal(klass, @ff.dbclass) end def test_dbclass_nil assert_equal(nil, @ff.dbclass = nil) assert_equal(nil, @ff.dbclass) assert_raise(Bio::FlatFile::UnknownDataFormatError) { @ff.next_entry } end def test_autodetect @ff.dbclass = nil assert_equal(@klass, @ff.autodetect) assert_equal(@klass, @ff.dbclass) end end #class TestFlatFileFastaFormat class TestFlatFileWithCustomClass < Test::Unit::TestCase # very simple parser for tab-separated data class SimpleFormat # delimiter needed for flatfile DELIMITER = RS = nil # nil means no delimiter and reading entire file def initialize(str) @data = str.split(/\n/).collect { |x| x.to_s.split(/\t/) } end attr_reader :data end def test_simpleformat testdata = "AAA\tBBB\tCCCCC\tDDDD\n123\t456\n" testio = StringIO.new(testdata) Bio::FlatFile.open(SimpleFormat, testio) do |ff| ff.each do |entry| assert_equal([ [ 'AAA', 'BBB', 'CCCCC', 'DDDD' ], [ '123', '456' ] ], entry.data) end end end # very simple parser for "//"-separated entries class SimpleFormat2 # delimiter needed for flatfile DELIMITER = RS = "//\n" # the end of each entry is "//\n" def initialize(str) # very simple parser only to store a text data @data = str end attr_reader :data end def test_simpleformat2 testdata = <<__END_OF_TESTDATA__ test01 This is a test. // test02 This is an example. // __END_OF_TESTDATA__ a = testdata.split(/(\/\/\n)/) results = [ a[0]+a[1], a[2]+a[3] ] testio = StringIO.new(testdata) Bio::FlatFile.open(SimpleFormat2, testio) do |ff| ff.each do |entry| assert_equal(results.shift, entry.data) end end end end #class TestFlatFileWithCustomClass end #module TestFlatFile end #module Bio bio-1.4.3.0001/test/unit/bio/test_tree.rb0000644000004100000410000006744312200110570017733 0ustar www-datawww-data# # = test/bio/test_tree.rb - unit test for Bio::Tree # # Copyright:: Copyright (C) 2006, 2010 # Naohisa Goto # Copyright (C) 2010 Kazuhiro Hayashi # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/tree' module Bio class TestTreeEdge < Test::Unit::TestCase def setup @obj = Bio::Tree::Edge.new(123.45) end def test_initialize assert_nothing_raised { Bio::Tree::Edge.new } assert_equal(1.23, Bio::Tree::Edge.new(1.23).distance) assert_equal(12.3, Bio::Tree::Edge.new('12.3').distance) end def test_distance assert_equal(123.45, @obj.distance) end def test_distance_string assert_equal("123.45", @obj.distance_string) end def test_distance=() @obj.distance = 678.9 assert_equal(678.9, @obj.distance) assert_equal("678.9", @obj.distance_string) @obj.distance = nil assert_equal(nil, @obj.distance) assert_equal(nil, @obj.distance_string) end def test_distance_string=() @obj.distance_string = "678.9" assert_equal(678.9, @obj.distance) assert_equal("678.9", @obj.distance_string) @obj.distance_string = nil assert_equal(nil, @obj.distance) assert_equal(nil, @obj.distance_string) end def test_inspect assert_equal("", @obj.inspect) end def test_to_s assert_equal("123.45", @obj.to_s) end end #class TestTreeEdge class TestTreeNode < Test::Unit::TestCase def setup @obj = Bio::Tree::Node.new end def test_initialize assert_nothing_raised { Bio::Tree::Node.new } a = nil assert_nothing_raised { a = Bio::Tree::Node.new('mouse') } assert_equal('mouse', a.name) end def test_name assert_equal(nil, @obj.name) @obj.name = 'human' assert_equal('human', @obj.name) end def test_bootstrap assert_equal(nil, @obj.bootstrap) end def test_bootstrap_string assert_equal(nil, @obj.bootstrap_string) end def test_bootstrap=() @obj.bootstrap = 98 assert_equal(98, @obj.bootstrap) assert_equal('98', @obj.bootstrap_string) @obj.bootstrap = nil assert_equal(nil, @obj.bootstrap) assert_equal(nil, @obj.bootstrap_string) end def test_bootstrap_string=() @obj.bootstrap_string = '98' assert_equal(98, @obj.bootstrap) assert_equal('98', @obj.bootstrap_string) @obj.bootstrap_string = '99.98' assert_equal(99.98, @obj.bootstrap) assert_equal('99.98', @obj.bootstrap_string) @obj.bootstrap = nil assert_equal(nil, @obj.bootstrap) assert_equal(nil, @obj.bootstrap_string) end def test_inspect @obj.name = 'human' assert_equal('(Node:"human")', @obj.inspect) @obj.bootstrap = 99.98 assert_equal('(Node:"human" bootstrap=99.98)', @obj.inspect) end def test_to_s @obj.name = 'human' assert_equal('human', @obj.to_s) end end #class TestTreeNode class TestTree < Test::Unit::TestCase def setup @tree = Bio::Tree.new end def test_get_edge_distance edge = Bio::Tree::Edge.new assert_equal(nil, @tree.get_edge_distance(edge)) edge = Bio::Tree::Edge.new(12.34) assert_equal(12.34, @tree.get_edge_distance(edge)) assert_equal(12.34, @tree.get_edge_distance(12.34)) end def test_get_edge_distance_string edge = Bio::Tree::Edge.new assert_equal(nil, @tree.get_edge_distance_string(edge)) edge = Bio::Tree::Edge.new(12.34) assert_equal("12.34", @tree.get_edge_distance_string(edge)) assert_equal("12.34", @tree.get_edge_distance_string(12.34)) end def test_get_node_name node = Bio::Tree::Node.new assert_equal(nil, @tree.get_node_name(node)) node.name = 'human' assert_equal('human', @tree.get_node_name(node)) end def test_initialize assert_nothing_raised { Bio::Tree.new } assert_nothing_raised { Bio::Tree.new(@tree) } end def test_root assert_equal(nil, @tree.root) end def test_root=() assert_equal(nil, @tree.root) node = Bio::Tree::Node.new @tree.root = node assert_equal(node, @tree.root) end def test_options assert_equal({}, @tree.options) @tree.options[:bootstrap_style] = :traditional assert_equal(:traditional, @tree.options[:bootstrap_style]) end end #class TestTree class TestTree2 < Test::Unit::TestCase def setup # Note that below data is NOT real. The distances are random. @tree = Bio::Tree.new @mouse = Bio::Tree::Node.new('mouse') @rat = Bio::Tree::Node.new('rat') @rodents = Bio::Tree::Node.new('rodents') @human = Bio::Tree::Node.new('human') @chimpanzee = Bio::Tree::Node.new('chimpanzee') @primates = Bio::Tree::Node.new('primates') @mammals = Bio::Tree::Node.new('mammals') @nodes = [ @mouse, @rat, @rodents, @human, @chimpanzee, @primates, @mammals ] @edge_rodents_mouse = Bio::Tree::Edge.new(0.0968) @edge_rodents_rat = Bio::Tree::Edge.new(0.1125) @edge_mammals_rodents = Bio::Tree::Edge.new(0.2560) @edge_primates_human = Bio::Tree::Edge.new(0.0386) @edge_primates_chimpanzee = Bio::Tree::Edge.new(0.0503) @edge_mammals_primates = Bio::Tree::Edge.new(0.2235) @edges = [ [ @rodents, @mouse, @edge_rodents_mouse ], [ @rodents, @rat, @edge_rodents_rat ], [ @mammals, @rodents, @edge_mammals_rodents ], [ @primates, @human, @edge_primates_human ], [ @primates, @chimpanzee, @edge_primates_chimpanzee ], [ @mammals, @primates, @edge_mammals_primates ] ] @edges.each do |a| @tree.add_edge(*a) end @by_id = Proc.new { |a, b| a.__id__ <=> b.__id__ } end def test_clear assert_nothing_raised { @tree.clear } assert_equal(0, @tree.number_of_nodes) assert_equal(0, @tree.number_of_edges) end def test_nodes nodes = @nodes.sort(&@by_id) assert_equal(nodes, @tree.nodes.sort(&@by_id)) end def test_number_of_nodes assert_equal(7, @tree.number_of_nodes) end def test_each_node @tree.each_node do |x| assert_not_nil(@nodes.delete(x)) end assert_equal(true, @nodes.empty?) end def test_each_edge @tree.each_edge do |source, target, edge| assert_not_nil(@edges.delete([ source, target, edge ])) end assert_equal(true, @edges.empty?) end def test_edges edges = @edges.sort { |a, b| a[-1].distance <=> b[-1].distance } assert_equal(edges, @tree.edges.sort { |a, b| a[-1].distance <=> b[-1].distance }) end def test_number_of_edges assert_equal(@edges.size, @tree.number_of_edges) end def test_adjacent_nodes assert_equal([ @rodents ], @tree.adjacent_nodes(@mouse)) assert_equal([ @rodents ], @tree.adjacent_nodes(@rat)) assert_equal([ @primates ], @tree.adjacent_nodes(@human)) assert_equal([ @primates ], @tree.adjacent_nodes(@chimpanzee)) assert_equal([ @mouse, @rat, @mammals ].sort(&@by_id), @tree.adjacent_nodes(@rodents).sort(&@by_id)) assert_equal([ @human, @chimpanzee, @mammals ].sort(&@by_id), @tree.adjacent_nodes(@primates).sort(&@by_id)) assert_equal([ @rodents, @primates ].sort(&@by_id), @tree.adjacent_nodes(@mammals).sort(&@by_id)) end def test_adjacent_nodes_nonexistent # test for not existed nodes assert_equal([], @tree.adjacent_nodes(Bio::Tree::Node.new)) end def test_out_edges assert_equal([[ @mouse, @rodents, @edge_rodents_mouse ]], @tree.out_edges(@mouse)) assert_equal([[ @rat, @rodents, @edge_rodents_rat ]], @tree.out_edges(@rat)) assert_equal([[ @human, @primates, @edge_primates_human ]], @tree.out_edges(@human)) assert_equal([[ @chimpanzee, @primates, @edge_primates_chimpanzee ]], @tree.out_edges(@chimpanzee)) end def test_out_edges_rodents adjacents = [ @mouse, @rat, @mammals ] edges = [ @edge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents ] @tree.out_edges(@rodents).each do |a| assert_equal(@rodents, a[0]) assert_not_nil(i = adjacents.index(a[1])) assert_equal(edges[i], a[2]) adjacents.delete_at(i) edges.delete_at(i) end assert_equal(true, adjacents.empty?) assert_equal(true, edges.empty?) end def test_out_edges_primates adjacents = [ @human, @chimpanzee, @mammals ] edges = [ @edge_primates_human, @edge_primates_chimpanzee, @edge_mammals_primates ] @tree.out_edges(@primates).each do |a| assert_equal(@primates, a[0]) assert_not_nil(i = adjacents.index(a[1])) assert_equal(edges[i], a[2]) adjacents.delete_at(i) edges.delete_at(i) end assert_equal(true, adjacents.empty?) assert_equal(true, edges.empty?) end def test_out_edges_mammals adjacents = [ @rodents, @primates ] edges = [ @edge_mammals_rodents, @edge_mammals_primates ] @tree.out_edges(@mammals).each do |a| assert_equal(@mammals, a[0]) assert_not_nil(i = adjacents.index(a[1])) assert_equal(edges[i], a[2]) adjacents.delete_at(i) edges.delete_at(i) end assert_equal(true, adjacents.empty?) assert_equal(true, edges.empty?) end def test_out_edges_nonexistent # test for not existed nodes assert_equal([], @tree.out_edges(Bio::Tree::Node.new)) end def test_each_out_edge flag = nil r = @tree.each_out_edge(@mouse) do |src, tgt, edge| assert_equal(@mouse, src) assert_equal(@rodents, tgt) assert_equal(@edge_rodents_mouse, edge) flag = true end assert_equal(@tree, r) assert_equal(true, flag) end def test_each_out_edge_rat flag = nil r = @tree.each_out_edge(@rat) do |src, tgt, edge| assert_equal(@rat, src) assert_equal(@rodents, tgt) assert_equal(@edge_rodents_rat, edge) flag = true end assert_equal(@tree, r) assert_equal(true, flag) end def test_each_out_edge_human flag = nil r = @tree.each_out_edge(@human) do |src, tgt, edge| assert_equal(@human, src) assert_equal(@primates, tgt) assert_equal(@edge_primates_human, edge) flag = true end assert_equal(@tree, r) assert_equal(true, flag) end def test_each_out_edge_chimpanzee flag = nil r = @tree.each_out_edge(@chimpanzee) do |src, tgt, edge| assert_equal(@chimpanzee, src) assert_equal(@primates, tgt) assert_equal(@edge_primates_chimpanzee, edge) flag = true end assert_equal(@tree, r) assert_equal(true, flag) end def test_each_out_edge_rodents adjacents = [ @mouse, @rat, @mammals ] edges = [ @edge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents ] @tree.each_out_edge(@rodents) do |src, tgt, edge| assert_equal(@rodents, src) assert_not_nil(i = adjacents.index(tgt)) assert_equal(edges[i], edge) adjacents.delete_at(i) edges.delete_at(i) end assert_equal(true, adjacents.empty?) assert_equal(true, edges.empty?) end def test_each_out_edge_primates adjacents = [ @human, @chimpanzee, @mammals ] edges = [ @edge_primates_human, @edge_primates_chimpanzee, @edge_mammals_primates ] @tree.each_out_edge(@primates) do |src, tgt, edge| assert_equal(@primates, src) assert_not_nil(i = adjacents.index(tgt)) assert_equal(edges[i], edge) adjacents.delete_at(i) edges.delete_at(i) end assert_equal(true, adjacents.empty?) assert_equal(true, edges.empty?) end def test_each_out_edge_mammals adjacents = [ @rodents, @primates ] edges = [ @edge_mammals_rodents, @edge_mammals_primates ] @tree.each_out_edge(@mammals) do |src, tgt, edge| assert_equal(@mammals, src) assert_not_nil(i = adjacents.index(tgt)) assert_equal(edges[i], edge) adjacents.delete_at(i) edges.delete_at(i) end assert_equal(true, adjacents.empty?) assert_equal(true, edges.empty?) end def test_each_out_edge_nonexistent # test for not existed nodes flag = nil node = Bio::Tree::Node.new r = @tree.each_out_edge(node) do |src, tgt, edge| flag = true end assert_equal(@tree, r) assert_equal(nil, flag) end def test_out_degree assert_equal(1, @tree.out_degree(@mouse)) assert_equal(1, @tree.out_degree(@rat)) assert_equal(3, @tree.out_degree(@rodents)) assert_equal(1, @tree.out_degree(@human)) assert_equal(1, @tree.out_degree(@chimpanzee)) assert_equal(3, @tree.out_degree(@primates)) assert_equal(2, @tree.out_degree(@mammals)) end def test_out_degree_nonexistent assert_equal(0, @tree.out_degree(Bio::Tree::Node.new)) end def test_get_edge assert_not_nil(@tree.get_edge(@rodents, @mouse)) assert_not_nil(@tree.get_edge(@mouse, @rodents)) assert_equal(@edge_rodents_mouse, @tree.get_edge(@rodents, @mouse)) assert_equal(@edge_rodents_mouse, @tree.get_edge(@mouse, @rodents)) assert_not_nil(@tree.get_edge(@rodents, @rat)) assert_not_nil(@tree.get_edge(@rat, @rodents)) assert_equal(@edge_rodents_rat, @tree.get_edge(@rodents, @rat)) assert_equal(@edge_rodents_rat, @tree.get_edge(@rat, @rodents)) assert_not_nil(@tree.get_edge(@mammals, @rodents)) assert_not_nil(@tree.get_edge(@rodents, @mammals)) assert_equal(@edge_mammals_rodents, @tree.get_edge(@mammals, @rodents)) assert_equal(@edge_mammals_rodents, @tree.get_edge(@rodents, @mammals)) assert_not_nil(@tree.get_edge(@primates, @human)) assert_not_nil(@tree.get_edge(@human, @primates)) assert_equal(@edge_primates_human, @tree.get_edge(@primates, @human)) assert_equal(@edge_primates_human, @tree.get_edge(@human, @primates)) assert_not_nil(@tree.get_edge(@primates, @chimpanzee)) assert_not_nil(@tree.get_edge(@chimpanzee, @primates)) assert_equal(@edge_primates_chimpanzee, @tree.get_edge(@primates, @chimpanzee)) assert_equal(@edge_primates_chimpanzee, @tree.get_edge(@chimpanzee, @primates)) assert_not_nil(@tree.get_edge(@mammals, @primates)) assert_not_nil(@tree.get_edge(@primates, @mammals)) assert_equal(@edge_mammals_primates, @tree.get_edge(@mammals, @primates)) assert_equal(@edge_mammals_primates, @tree.get_edge(@primates, @mammals)) end def test_get_edge_indirect assert_nil(@tree.get_edge(@mouse, @rat)) assert_nil(@tree.get_edge(@human, @chimpanzee)) end def test_get_edge_nonexistent assert_nil(@tree.get_edge(@mouse, Bio::Tree::Node.new)) end def test_get_node_by_name assert_not_nil(@tree.get_node_by_name('mouse')) assert_not_nil(@tree.get_node_by_name('rat')) assert_not_nil(@tree.get_node_by_name('human')) assert_not_nil(@tree.get_node_by_name('chimpanzee')) assert_equal(@mouse, @tree.get_node_by_name('mouse')) assert_equal(@rat, @tree.get_node_by_name('rat')) assert_equal(@human, @tree.get_node_by_name('human')) assert_equal(@chimpanzee, @tree.get_node_by_name('chimpanzee')) end def test_get_node_by_name_noexistent assert_nil(@tree.get_node_by_name('frog')) end def test_add_edge amphibian = Bio::Tree::Node.new('amphibian') edge = Bio::Tree::Edge.new(0.3123) assert_equal(edge, @tree.add_edge(@mammals, amphibian, edge)) frog = Bio::Tree::Node.new('frog') newt = Bio::Tree::Node.new('newt') assert_instance_of(Bio::Tree::Edge, @tree.add_edge(frog, newt)) end def test_add_node frog = Bio::Tree::Node.new('frog') # the node does not exist assert_nil(@tree.get_node_by_name('frog')) assert_equal(false, @tree.include?(frog)) # add node assert_equal(@tree, @tree.add_node(frog)) # the node exists assert_equal(frog, @tree.get_node_by_name('frog')) assert_equal(true, @tree.include?(frog)) end def test_include? assert_equal(true, @tree.include?(@mouse)) assert_equal(true, @tree.include?(@rat)) assert_equal(true, @tree.include?(@rodents)) assert_equal(true, @tree.include?(@human)) assert_equal(true, @tree.include?(@chimpanzee)) assert_equal(true, @tree.include?(@primates)) assert_equal(true, @tree.include?(@mammals)) end def test_include_nonexistent assert_equal(false, @tree.include?(Bio::Tree::Node.new)) end def test_clear_node assert_equal(2, @tree.out_degree(@mammals)) # clear node assert_equal(@tree, @tree.clear_node(@mammals)) # checks assert_equal(true, @tree.include?(@mammals)) assert_equal(0, @tree.out_degree(@mammals)) assert_equal(2, @tree.out_degree(@rodents)) assert_equal(2, @tree.out_degree(@primates)) end def test_clear_node_nonexistent assert_raise(IndexError) { @tree.clear_node(Bio::Tree::Node.new) } end def test_remove_node assert_equal(2, @tree.out_degree(@mammals)) # remove node assert_equal(@tree, @tree.remove_node(@mammals)) # checks assert_equal(false, @tree.include?(@mammals)) assert_equal(0, @tree.out_degree(@mammals)) assert_equal(2, @tree.out_degree(@rodents)) assert_equal(2, @tree.out_degree(@primates)) end def test_remove_node_nonexistent assert_raise(IndexError) { @tree.remove_node(Bio::Tree::Node.new) } end def test_remove_node_if assert_equal(@tree, @tree.remove_node_if { |node| node == @mouse }) assert_equal(false, @tree.include?(@mouse)) end def test_remove_node_if_false ary = [] assert_equal(@tree, @tree.remove_node_if { |node| ary << node; false }) nodes = @nodes.sort(&@by_id) assert_equal(nodes, ary.sort(&@by_id)) assert_equal(nodes, @tree.nodes.sort(&@by_id)) end def test_remove_edge assert_not_nil(@tree.get_edge(@mouse, @rodents)) assert_equal(@tree, @tree.remove_edge(@mouse, @rodents)) assert_nil(@tree.get_edge(@mouse, @rodents)) end def test_remove_edge_nonexistent assert_raise(IndexError) { @tree.remove_edge(@mouse, @rat) } end def test_remove_edge_if ret = nil assert_nothing_raised { ret = @tree.remove_edge_if do |source, target, edge| [ source.name, target.name ].sort == [ 'mouse', 'rodents' ] end } assert_equal(@tree, ret) assert_nil(@tree.get_edge(@mouse, @rodents)) end def test_remove_edge_if_nothing_removed ret = nil by_id_2 = Proc.new { |x,y| x[2].__id__ <=> y[2].__id__ } orig_edges = @tree.edges.sort(&by_id_2) assert_nothing_raised { ret = @tree.remove_node_if { |edge| false } } assert_equal(@tree, ret) assert_equal(orig_edges, @tree.edges.sort(&by_id_2)) end def test_collect_node! ret = nil newmouse = Bio::Tree::Node.new('MOUSE') newhuman = Bio::Tree::Node.new('HUMAN') assert_nothing_raised { ret = @tree.collect_node! do |node| case node.name when 'mouse' newmouse when 'human' newhuman else node end end } assert_equal(@tree, ret) assert(@tree.include?(newmouse)) assert(!@tree.include?(@mouse)) assert(@tree.include?(newhuman)) assert(!@tree.include?(@human)) nodes = [ newmouse, @rat, @rodents, newhuman, @chimpanzee, @primates, @mammals ].sort(&@by_id) assert_equal(nodes, @tree.nodes.sort(&@by_id)) end def test_collect_edge! ret = nil newedge_rodents_mouse = Bio::Tree::Edge.new(100.0) newedge_primates_human = Bio::Tree::Edge.new(200.0) assert_nothing_raised { ret = @tree.collect_edge! do |source, target, edge| case [ source.name, target.name ].sort when [ 'mouse', 'rodents' ] newedge_rodents_mouse when [ 'human', 'primates' ] newedge_primates_human else edge end end } assert_equal(@tree, ret) assert_equal(newedge_rodents_mouse, @tree.get_edge(@mouse, @rodents)) assert_equal(newedge_primates_human, @tree.get_edge(@human, @primates)) expected = [ newedge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents, newedge_primates_human, @edge_primates_chimpanzee, @edge_mammals_primates ].sort(&@by_id) assert_equal(expected, @tree.edges.collect { |x| x[-1] }.sort(&@by_id)) end def test_get_edge_merged edge1 = Bio::Tree::Edge.new(12.34) edge2 = Bio::Tree::Edge.new(56.78) merged_edge = @tree.get_edge_merged(edge1, edge2) # struggle to avoid possible float problem expected = 12.34 + 56.78 assert_equal(expected, merged_edge.distance) end def test_get_node_bootstrap node = Bio::Tree::Node.new("test") node.bootstrap = 1 assert_equal(1, @tree.get_node_bootstrap(node)) end def test_get_node_bootstrap_string= node = Bio::Tree::Node.new("test") node.bootstrap_string = "0.75" assert_equal(0.75, @tree.get_node_bootstrap(node)) end def test_subtree newtree = nil assert_nothing_raised { newtree = @tree.subtree([ @mouse, @rodents, @human ]) } assert_equal(3, newtree.number_of_nodes) assert_equal(1, newtree.number_of_edges) nodes = [ @mouse, @rodents, @human ].sort(&@by_id) assert_equal(nodes, newtree.nodes.sort(&@by_id)) edges = [ @edge_rodents_mouse ].sort(&@by_id) assert_equal(edges, newtree.edges.collect { |x| x[-1] }.sort(&@by_id)) end def test_subtree_with_all_paths newtree = nil assert_nothing_raised { newtree = @tree.subtree_with_all_paths([ @mouse, @rodents, @human ]) } assert_equal(5, newtree.number_of_nodes) assert_equal(4, newtree.number_of_edges) nodes = [ @mouse, @rodents, @mammals, @primates, @human ].sort(&@by_id) assert_equal(nodes, newtree.nodes.sort(&@by_id)) edges = [ @edge_rodents_mouse, @edge_mammals_rodents, @edge_mammals_primates, @edge_primates_human ].sort(&@by_id) assert_equal(edges, newtree.edges.collect { |x| x[-1] }.sort(&@by_id)) end def test_concat tree2 = Bio::Tree.new node1 = Bio::Tree::Node.new('node1') node2 = Bio::Tree::Node.new('node2') edge0 = Bio::Tree::Edge.new(0.1) tree2.add_edge(node1, node2, edge0) ret = nil assert_nothing_raised { ret = @tree.concat(tree2) } assert_equal(@tree, ret) assert_equal(9, @tree.number_of_nodes) assert_equal(7, @tree.number_of_edges) nodes = (@nodes + [ node1, node2 ]).sort(&@by_id) assert_equal(nodes, @tree.nodes.sort(&@by_id)) edges = (@edges.collect { |x| x[-1] } + [ edge0 ]).sort(&@by_id) assert_equal(edges, @tree.edges.collect { |x| x[-1] }.sort(&@by_id)) end def test_path expected = [ @mouse, @rodents, @mammals, @primates, @human ] assert_equal(expected, @tree.path(@mouse, @human)) end #Passed cache_* methods because of internal methods def test_parent assert_equal(@rodents, @tree.parent(@mouse, @mammals)) end def test_children expected = [ @primates, @rodents ].sort(&@by_id) assert_equal(expected, @tree.children(@mammals, @mammals).sort(&@by_id)) end def test_descendents expected = [ @primates, @rodents, @human, @chimpanzee, @mouse, @rat ].sort(&@by_id) assert_equal(expected, @tree.descendents(@mammals, @mammals).sort(&@by_id)) end def test_leaves_noargs expected = [ @chimpanzee, @human, @mouse, @rat ].sort(&@by_id) @tree.root = @mammals assert_equal(expected, @tree.leaves.sort(&@by_id)) end def test_leaves expected = [ @chimpanzee, @human, @mouse, @rat ].sort(&@by_id) assert_equal(expected, @tree.leaves(@mammals, @mammals).sort(&@by_id)) end def test_ancestors expected = [ @rodents, @mammals ].sort(&@by_id) assert_equal(expected, @tree.ancestors(@mouse, @mammals).sort(&@by_id)) end def test_lowest_common_ancestor assert_equal(@rodents, @tree.lowest_common_ancestor(@mouse, @rat, @mammals)) end def test_total_distance assert_equal("0.7777", sprintf("%.4f", @tree.total_distance)) end def test_distance_matrix mat = @tree.distance_matrix([ @mouse, @rat, @human, @chimpanzee ]) assert_instance_of(Matrix, mat) assert_equal([ "0.0000", "0.2093", "0.6149", "0.6266" ], mat.row(0).map { |x| sprintf("%.4f", x) }.to_a) assert_equal([ "0.2093", "0.0000", "0.6306", "0.6423" ], mat.row(1).map { |x| sprintf("%.4f", x) }.to_a) assert_equal([ "0.6149", "0.6306", "0.0000", "0.0889" ], mat.row(2).map { |x| sprintf("%.4f", x) }.to_a) assert_equal([ "0.6266", "0.6423", "0.0889", "0.0000" ], mat.row(3).map { |x| sprintf("%.4f", x) }.to_a) end def test_adjacency_matrix a = [ [ 0, nil, @edge_rodents_mouse, nil, nil, nil, nil ], [ nil, 0, @edge_rodents_rat, nil, nil, nil, nil ], [ @edge_rodents_mouse, @edge_rodents_rat, 0, nil, nil, nil, @edge_mammals_rodents ], [ nil, nil, nil, 0, nil, @edge_primates_human, nil ], [ nil, nil, nil, nil, 0, @edge_primates_chimpanzee, nil ], [ nil, nil, nil, @edge_primates_human, @edge_primates_chimpanzee, 0, @edge_mammals_primates ], [ nil, nil, @edge_mammals_rodents, nil, nil, @edge_mammals_primates, 0 ] ] expected = Matrix.rows(a, false) assert_equal(expected, @tree.adjacency_matrix(@nodes, nil, 0)) end def test_adjacency_matrix_with_block a = [ [ 0, nil, "0.0968", nil, nil, nil, nil ], [ nil, 0, "0.1125", nil, nil, nil, nil ], [ "0.0968", "0.1125", 0, nil, nil, nil, "0.256" ], [ nil, nil, nil, 0, nil, "0.0386", nil ], [ nil, nil, nil, nil, 0, "0.0503", nil ], [ nil, nil, nil, "0.0386", "0.0503", 0, "0.2235" ], [ nil, nil, "0.256", nil, nil, "0.2235", 0 ] ] expected = Matrix.rows(a, false) assert_equal(expected, @tree.adjacency_matrix(@nodes, nil, 0) { |src, tgt, edge| sprintf("%.15g", edge.distance) }) end def test_remove_nonsense_nodes assert_equal([ @mammals ], @tree.remove_nonsense_nodes) end def test_insert_node node1 = Bio::Tree::Node.new('node1') ret = nil assert_nothing_raised { ret = @tree.insert_node(@mouse, @rodents, node1, 0.0123) } assert_equal(@tree, ret) distance_mouse_node1 = @tree.get_edge(@mouse, node1).distance assert_equal("0.0123", sprintf("%.4f", distance_mouse_node1)) distance_node1_rodents = @tree.get_edge(node1, @rodents).distance assert_equal("0.0845", sprintf("%.4f", distance_node1_rodents)) end end #class TestTree2 end #module Bio bio-1.4.3.0001/test/unit/bio/test_command.rb0000644000004100000410000002634612200110570020407 0ustar www-datawww-data# # test/unit/bio/test_command.rb - Unit test for Bio::Command # # Copyright:: Copyright (C) 2005-2008 # Mitsuteru Nakao , # Naohisa Goto , # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/command' module Bio class TestCommand < Test::Unit::TestCase def windows_platform? Bio::Command.module_eval { windows_platform? } end private :windows_platform? def test_command_constants assert(Bio::Command::UNSAFE_CHARS_UNIX) assert(Bio::Command::QUOTE_CHARS_WINDOWS) assert(Bio::Command::UNESCAPABLE_CHARS) end def test_escape_shell_windows str = "bio_ruby.123@456:789" assert_equal("bio_ruby.123@456:789", Bio::Command.escape_shell_windows(str)) str = "bio\'\"r u\"b\\y123@456:789" assert_equal("\"bio'\"\"r u\"\"b\\y123@456:789\"", Bio::Command.escape_shell_windows(str)) end def test_escape_shell_unix str = "bio_ruby.123@456:789" assert_equal("bio_ruby.123@456:789", Bio::Command.escape_shell_unix(str)) str = "bio\'\"r u\"b\\y123@456:789" assert_equal("bio\\'\\\"r\\ u\\\"b\\\\y123@456:789", Bio::Command.escape_shell_unix(str)) end def test_escape_shell str = "bio_ruby.123@456:789" assert_equal("bio_ruby.123@456:789", Bio::Command.escape_shell(str)) str = "bio\'\"r u\"b\\y123@456:789" if windows_platform? # mswin32, bccwin32, mingw32, etc. assert_equal("\"bio'\"\"r u\"\"b\\y123@456:789\"", Bio::Command.escape_shell(str)) else assert_equal("bio\\'\\\"r\\ u\\\"b\\\\y123@456:789", Bio::Command.escape_shell(str)) end end def test_make_command_line ary = [ "ruby", "test.rb", "atgcatgc", "bio\'\"r u\"b\\y123@456:789" ] if windows_platform? # mswin32, bccwin32, mingw32, etc. assert_equal("ruby" + " test.rb atgcatgc" + " \"bio'\"\"r u\"\"b\\y123@456:789\"", Bio::Command.make_command_line(ary)) else assert_equal("ruby" + " test.rb atgcatgc" + " bio\\'\\\"r\\ u\\\"b\\\\y123@456:789", Bio::Command.make_command_line(ary)) end end def test_make_command_line_windows ary = [ "C:\\Program Files\\Ruby\\bin\\ruby.exe", "test.rb", "atgcatgc", "bio\'\"r u\"b\\y123@456:789" ] assert_equal("\"C:\\Program Files\\Ruby\\bin\\ruby.exe\"" + " test.rb atgcatgc" + " \"bio'\"\"r u\"\"b\\y123@456:789\"", Bio::Command.make_command_line_windows(ary)) end def test_make_command_line_unix ary = [ "/usr/local/bin/ruby", "test.rb", "atgcatgc", "bio\'\"r u\"b\\y123@456:789" ] assert_equal("/usr/local/bin/ruby" + " test.rb atgcatgc" + " bio\\'\\\"r\\ u\\\"b\\\\y123@456:789", Bio::Command.make_command_line_unix(ary)) end def test_safe_command_line_array ary1 = [ 'test' ] assert_equal([ [ 'test', 'test' ] ], Bio::Command.safe_command_line_array(ary1)) ary1a = [ [ 'test/test1a', 'test' ] ] assert_equal(ary1a, Bio::Command.safe_command_line_array(ary1a)) end def test_safe_command_line_array_passthrough ary0 = [] assert_equal(ary0, Bio::Command.safe_command_line_array(ary0)) ary2 = [ 'cmd', 'arg0' ] assert_equal(ary2, Bio::Command.safe_command_line_array(ary2)) ary2a = [ [ 'cmd', 'display name' ], 'arg0' ] assert_equal(ary2a, Bio::Command.safe_command_line_array(ary2a)) ary3 = [ 'cmd', 'arg0', 'arg1' ] assert_equal(ary3, Bio::Command.safe_command_line_array(ary3)) end def test_make_cgi_params_by_hash_in_symbol ary = [ "type1=bp", "type2=bp", "downstream=", "upstream=", "format=fasta", "options=similarity", "options=gene", "action=export", "_format=Text", "output=txt", "submit=Continue+%3E%3E", "ab%3Dcd%26ef%3Dgh%23ij=pq%3D12%26rs%3D34%23tu", ] hash = { :type1 => 'bp', :type2 => 'bp', :downstream => '', :upstream => '', :format => 'fasta', :options => ['similarity', 'gene'], :action => 'export', :_format => 'Text', :output => 'txt', :submit => 'Continue >>', :"ab=cd&ef=gh#ij" => 'pq=12&rs=34#tu', } result = Bio::Command.make_cgi_params(hash) ary.each do |str| assert_match(Regexp.new(Regexp.escape(str)), result) end # round-trip test result_hash = {} CGI.parse(result).each do |k, v| v = case v.size when 0 '' when 1 v[0] else v end result_hash[k.intern] = v end assert_equal(hash, result_hash) end def test_make_cgi_params_by_hash_in_string ary = [ "type1=bp", "type2=bp", "downstream=", "upstream=", "format=fasta", "options=similarity", "options=gene", "action=export", "_format=Text", "output=txt", "submit=Continue+%3E%3E", "ab%3Dcd%26ef%3Dgh%23ij=pq%3D12%26rs%3D34%23tu", ] hash = { "type1" => 'bp', "type2" => 'bp', "downstream" => '', "upstream" => '', "format" => 'fasta', "options" => ['similarity', 'gene'], "action" => 'export', "_format" => 'Text', "output" => 'txt', "submit" => 'Continue >>', 'ab=cd&ef=gh#ij' => 'pq=12&rs=34#tu', } result = Bio::Command.make_cgi_params(hash) ary.each do |str| assert_match(Regexp.new(Regexp.escape(str)), result) end # round-trip test result_hash = {} CGI.parse(result).each do |k, v| v = case v.size when 0 '' when 1 v[0] else v end result_hash[k] = v end assert_equal(hash, result_hash) end def test_make_cgi_params_by_array_of_array ary = [ "type1=bp", "type2=bp", "downstream=", "upstream=", "format=fasta", "options=similarity", "options=gene", "action=export", "_format=Text", "output=txt", "submit=Continue+%3E%3E", "ab%3Dcd%26ef%3Dgh%23ij=pq%3D12%26rs%3D34%23tu", ] array_of_array = [ ["type1", 'bp'], ["type2", 'bp'], ["downstream", ''], ["upstream", ''], ["format", 'fasta'], ["options", ['similarity', 'gene']], ["action", 'export'], ["_format", 'Text'], ["output", 'txt'], ["submit", 'Continue >>'], [ 'ab=cd&ef=gh#ij', 'pq=12&rs=34#tu' ], ] result = Bio::Command.make_cgi_params(array_of_array) # When array of array, order is guaranteed. assert_equal(ary.join('&'), result) # round-trip test result_array = [] CGI.parse(result).each do |k, v| v = case v.size when 0 '' when 1 v[0] else v end result_array.push([ k, v ]) end assert_equal(array_of_array.sort, result_array.sort) end def test_make_cgi_params_by_array_of_hash ary = [ "type1=bp", "type2=bp", "downstream=", "upstream=", "format=fasta", "options=similarity", "options=gene", "action=export", "_format=Text", "output=txt", "submit=Continue+%3E%3E", "ab%3Dcd%26ef%3Dgh%23ij=pq%3D12%26rs%3D34%23tu", ] array_of_hash = [ {"type1" => 'bp'}, {"type2" => 'bp'}, {"downstream" => ''}, {"upstream" => ''}, {"format" => 'fasta'}, {"options" => ['similarity', 'gene']}, {"action" => 'export'}, {"_format" => 'Text'}, {"output" => 'txt'}, {"submit" => 'Continue >>'}, {'ab=cd&ef=gh#ij' => 'pq=12&rs=34#tu'}, ] result = Bio::Command.make_cgi_params(array_of_hash) # When array of hash, order is guaranteed. assert_equal(ary.join('&'), result) # round-trip test result_array = [] CGI.parse(result).each do |k, v| v = case v.size when 0 '' when 1 v[0] else v end result_array.push({ k => v }) end assert_equal(array_of_hash.sort { |x,y| x.keys[0] <=> y.keys[0] }, result_array.sort { |x,y| x.keys[0] <=> y.keys[0] }) end def test_make_cgi_params_by_array_of_string str = "type1=bp&type2=bp&downstream=&upstream=&format=fasta&options=similarity&options=gene&action=export&_format=Text&output=txt&submit=Continue+%3E%3E&ab=cd%26ef%3Dgh%23ij%3Dpq%3D12%26rs%3D34%23tu" array_of_string = [ "type1=bp", "type2=bp", "downstream=", "upstream=", "format=fasta", "options=similarity", "options=gene", "action=export", "_format=Text", "output=txt", "submit=Continue >>", # In the following case, 'ab' is regarded as # the form key, and rest of the string is # regarded as the value. 'ab=cd&ef=gh#ij=pq=12&rs=34#tu', ] result = Bio::Command.make_cgi_params(array_of_string) assert_equal(str, result) end def test_make_cgi_params_by_string string = "type1=bp&type2=bp&downstream=&upstream=&format=fasta&options=similarity&options=gene&action=export&_format=Text&output=txt&submit=Continue%20%3E%3E" # In this case, only URI escaping is performed. query = " type1=bp&type2=bp&downstream=&upstream=&format=fasta&options=similarity&options=gene&action=export&_format=Text&output=txt&submit=Continue >> " result = Bio::Command.make_cgi_params(query) assert_equal(string, result) end end end bio-1.4.3.0001/test/unit/bio/db/0000755000004100000410000000000012200110570015757 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/pdb/0000755000004100000410000000000012200110570016524 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/pdb/test_pdb.rb0000644000004100000410000031025512200110570020663 0ustar www-datawww-data# # = test/unit/bio/db/pdb/test_pdb.rb - Unit test for Bio::PDB classes # # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi # Copyright:: Copyright (C) 2006 Naohisa Goto # # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' require 'matrix' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio' module Bio #This class tests Bio::PDB class. #The sample record isn't sufficient because it cannot pass through all of the case statement... class TestPDB < Test::Unit::TestCase def setup str =<",@pdb.inspect) end def test_jrnl assert_instance_of(Hash, @pdb.jrnl) end def test_keywords assert_equal(["OXIDOREDUCTASE", "ALDH", "E487K", "ROSSMANN FOLD", "ALDA-1"],@pdb.keywords) end def test_remark str =< {:remarkNum=>1, :sub_record=>"AUTH", :authorList=>["C.H.CHEN", "G.R.BUDAS", "E.N.CHURCHILL", "M.H.DISATNIK"]}, 2=>[], 3=>[]} obj = Bio::PDB.new(str) actual = { 1 => {:remarkNum=>obj.remark[1][0].remarkNum, :sub_record=>obj.remark[1][0].sub_record, :authorList=>obj.remark[1][0].authorList}, 2=>obj.remark[2], 3=>obj.remark[3]} assert_equal(actual,expected) end def test_record assert_instance_of(Hash, @pdb.record) end def test_seqres assert_equal({"A"=>"SAAATQAVPAPNQ"},@pdb.seqres) assert_equal(nil,@pdb.seqres(7)) #I'm not sure why this returns nil str =<"ucccccgugccca"},obj.seqres) end # too redundant? def test_sheet seq =<obj.strand, :sheetID=>obj.sheetID, :numStrands=>obj.numStrands, :initResName=>obj.initResName, :initChainID=>obj.initChainID, :initSeqNum=>obj.initSeqNum, :initICode=>obj.initICode, :endResName=>obj.endResName, :endChainID=>obj.endChainID, :endSeqNum=>obj.endSeqNum, :endICode=>obj.endICode, :sense=>obj.sense, :curAtom=>obj.curAtom, :curResName=>obj.curResName, :curChainId=>obj.curChainId, :curResSeq=>obj.curResSeq, :curICode=>obj.curICode, :prevAtom=>obj.prevAtom, :prevResName=>obj.prevResName, :prevChainId=>obj.prevChainId, :prevResSeq=>obj.prevResSeq, :prevICode=>obj.prevICode} end end expected = [ {:strand=>2, :sheetID=>"BS8", :numStrands=>3, :initResName=>"LYS", :initChainID=>" ", :initSeqNum=>639, :initICode=>"", :endResName=>"LYS", :endChainID=>" ", :endSeqNum=>648, :endICode=>"", :sense=>-1, :curAtom=>" N", :curResName=>"PHE", :curChainId=>" ", :curResSeq=>643, :curICode=>"", :prevAtom=>" O", :prevResName=>"HIS", :prevChainId=>" ", :prevResSeq=>662, :prevICode=>""}, {:strand=>3, :sheetID=>"BS8", :numStrands=>3, :initResName=>"ASN", :initChainID=>" ", :initSeqNum=>596, :initICode=>"", :endResName=>"VAL", :endChainID=>" ", :endSeqNum=>600, :endICode=>"", :sense=>-1, :curAtom=>" N", :curResName=>"TYR", :curChainId=>" ", :curResSeq=>598, :curICode=>"", :prevAtom=>" O", :prevResName=>"ILE", :prevChainId=>" ", :prevResSeq=>646, :prevICode=>""}] actual2 = [] s.sheet("BS8").each do |obj2| obj2.each do |obj| actual2 << {:strand=>obj.strand, :sheetID=>obj.sheetID, :numStrands=>obj.numStrands, :initResName=>obj.initResName, :initChainID=>obj.initChainID, :initSeqNum=>obj.initSeqNum, :initICode=>obj.initICode, :endResName=>obj.endResName, :endChainID=>obj.endChainID, :endSeqNum=>obj.endSeqNum, :endICode=>obj.endICode, :sense=>obj.sense, :curAtom=>obj.curAtom, :curResName=>obj.curResName, :curChainId=>obj.curChainId, :curResSeq=>obj.curResSeq, :curICode=>obj.curICode, :prevAtom=>obj.prevAtom, :prevResName=>obj.prevResName, :prevChainId=>obj.prevChainId, :prevResSeq=>obj.prevResSeq, :prevICode=>obj.prevICode} end end assert_equal(expected,actual) assert_equal(expected,actual2) end def test_ssbond assert_instance_of(Bio::PDB::Record::SSBOND,@pdb.ssbond.first) end #is this method correct? def test_to_s assert_equal("MODEL 1\nATOM 1 N ALA A 7 23.484 -35.866 44.510 1.00 28.52 N \nATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C \nATOM 3 C ALA A 7 23.102 -34.082 46.159 1.00 26.68 C \nATOM 4 O ALA A 7 23.097 -32.903 46.524 1.00 30.02 O \nATOM 5 CB ALA A 7 23.581 -33.526 43.770 1.00 31.41 C \nTER\nENDMDL\nMODEL 2\nATOM 1 N ALA A 7 23.484 -35.866 44.510 1.00 28.52 N \nTER\nHETATM30582 C1 EDO A 701 -0.205 -27.262 49.961 1.00 34.45 C \nHETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O \nHETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C \nHETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O \nHETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C \nHETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O \nHETATM30588 C2 EDO A 702 3.678 7.589 66.425 1.00 15.31 C \nHETATM30589 O2 EDO A 702 3.391 6.512 65.550 1.00 17.67 O \nHETATM30857 O HOH A 502 13.654 -16.451 49.711 1.00 12.79 O \nENDMDL\nEND\n",@pdb.to_s) end def test_turn assert_equal([],@pdb.turn) assert_equal(nil,@pdb.turn(1)) end def test_version assert_equal(1,@pdb.version) end def test_bracket #test for [] assert_equal(1,@pdb[1].serial) end end #TestPDBRecord::Test* are unit tests for pdb field classes. #each test class uses one line or several lines of PDB record. #they tests all the methods described or generated in Bio::PDB::Record. module TestPDBRecord # test of Bio::PDB::Record::ATOM class TestATOM < Test::Unit::TestCase def setup # the data is taken from # http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/part_62.html @str = 'ATOM 154 CG2BVAL A 25 29.909 16.996 55.922 0.72 13.25 A1 C ' @atom = Bio::PDB::Record::ATOM.new.initialize_from_string(@str) end def test_record_name assert_equal('ATOM', @atom.record_name) end def test_serial assert_equal(154, @atom.serial) end def test_name assert_equal('CG2', @atom.name) end def test_altLoc assert_equal('B', @atom.altLoc) end def test_resName assert_equal('VAL', @atom.resName) end def test_chainID assert_equal('A', @atom.chainID) end def test_resSeq assert_equal(25, @atom.resSeq) end def test_iCode assert_equal('', @atom.iCode) end def test_x assert_in_delta(29.909, @atom.x, 0.0001) end def test_y assert_in_delta(16.996, @atom.y, 0.0001) end def test_z assert_in_delta(55.922, @atom.z, 0.0001) end def test_occupancy assert_in_delta(0.72, @atom.occupancy, 0.001) end def test_tempFactor assert_in_delta(13.25, @atom.tempFactor, 0.001) end def test_segID assert_equal('A1', @atom.segID) end def test_element assert_equal('C', @atom.element) end def test_charge assert_equal('', @atom.charge) end def test_xyz assert_equal(Bio::PDB::Coordinate[ "29.909".to_f, "16.996".to_f, "55.922".to_f ], @atom.xyz) end def test_to_a assert_equal([ "29.909".to_f, "16.996".to_f, "55.922".to_f ], @atom.to_a) end def test_comparable a = Bio::PDB::Record::ATOM.new a.serial = 999 assert_equal(-1, @atom <=> a) a.serial = 154 assert_equal( 0, @atom <=> a) a.serial = 111 assert_equal( 1, @atom <=> a) end def test_to_s assert_equal(@str + "\n", @atom.to_s) end def test_original_data assert_equal([ @str ], @atom.original_data) end def test_do_parse assert_equal(@atom, @atom.do_parse) end def test_residue assert_equal(nil, @atom.residue) end def test_sigatm assert_equal(nil, @atom.sigatm) end def test_anisou assert_equal(nil, @atom.anisou) end def test_ter assert_equal(nil, @atom.ter) end end #class TestATOM # test of Bio::PDB::Record::ATOM class TestHETATM < Test::Unit::TestCase def setup # the data is taken from # http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/part_62.html @str = 'HETATM30581 NA NA A 601 5.037 -39.853 62.809 1.00 17.37 NA ' @hetatm = Bio::PDB::Record::HETATM.new.initialize_from_string(@str) end def test_record_name assert_equal('HETATM', @hetatm.record_name) end def test_serial assert_equal(30581, @hetatm.serial) end def test_name assert_equal('NA', @hetatm.name) end def test_altLoc assert_equal(' ', @hetatm.altLoc) end def test_resName assert_equal('NA', @hetatm.resName) end def test_chainID assert_equal('A', @hetatm.chainID) end def test_resSeq assert_equal(601, @hetatm.resSeq) end def test_iCode assert_equal('', @hetatm.iCode) end def test_x assert_in_delta(5.037, @hetatm.x, 0.0001) end def test_y assert_in_delta(-39.853, @hetatm.y, 0.0001) end def test_z assert_in_delta(62.809, @hetatm.z, 0.0001) end def test_occupancy assert_in_delta(1.00, @hetatm.occupancy, 0.001) end def test_tempFactor assert_in_delta(17.37, @hetatm.tempFactor, 0.001) end def test_segID assert_equal('', @hetatm.segID) end def test_element assert_equal('NA', @hetatm.element) end def test_charge assert_equal('', @hetatm.charge) end def test_xyz assert_equal(Bio::PDB::Coordinate[ "5.037".to_f, "-39.853".to_f, "62.809".to_f ], @hetatm.xyz) end def test_to_a assert_equal([ "5.037".to_f, "-39.853".to_f, "62.809".to_f ], @hetatm.to_a) end def test_comparable a = Bio::PDB::Record::HETATM.new a.serial = 40000 assert_equal(-1, @hetatm <=> a) a.serial = 30581 assert_equal( 0, @hetatm <=> a) a.serial = 30000 assert_equal( 1, @hetatm <=> a) end def test_to_s assert_equal(@str + "\n", @hetatm.to_s) end def test_original_data assert_equal([ @str ], @hetatm.original_data) end def test_do_parse assert_equal(@hetatm, @hetatm.do_parse) end def test_residue assert_equal(nil, @hetatm.residue) end def test_sigatm assert_equal(nil, @hetatm.sigatm) end def test_anisou assert_equal(nil, @hetatm.anisou) end def test_ter assert_equal(nil, @hetatm.ter) end end #class TestATOM class TestHEADER < Test::Unit::TestCase def setup @str = 'HEADER OXIDOREDUCTASE 12-AUG-09 3INJ ' @header = Bio::PDB::Record::HEADER.new.initialize_from_string(@str) end def test_classification assert_equal('OXIDOREDUCTASE', @header.classification) end def test_depDate assert_equal('12-AUG-09', @header.depDate) end def test_idCode assert_equal('3INJ', @header.idCode) end end class TestOBSLTE < Test::Unit::TestCase def setup @str = 'OBSLTE 31-JAN-94 1MBP 2MBP ' @obslte = Bio::PDB::Record::OBSLTE.new.initialize_from_string(@str) end def test_repDate assert_equal('31-JAN-94', @obslte.repDate) end def test_idCode assert_equal('1MBP', @obslte.idCode) end def test_rIdCode assert_equal(["2MBP"], @obslte.rIdCode) end end #Is this unit test correct? class TestTITLE < Test::Unit::TestCase def setup @str = "TITLE HUMAN MITOCHONDRIAL ALDEHYDE DEHYDROGENASE COMPLEXED WITH \n TITLE 2 AGONIST ALDA-1 " @title = Bio::PDB::Record::TITLE.new.initialize_from_string(@str) end def test_title assert_equal('HUMAN MITOCHONDRIAL ALDEHYDE DEHYDROGENASE COMPLEXED WITH', @title.title) end end class TestCAVEAT < Test::Unit::TestCase def setup @str = 'CAVEAT 1ABC INCORRECT' @caveat = Bio::PDB::Record::CAVEAT.new.initialize_from_string(@str) end def test_idcode assert_equal('1ABC', @caveat.idcode) end def test_comment assert_equal('INCORRECT', @caveat.comment) end end class TestCOMPND < Test::Unit::TestCase def setup @str =<27.89, :iCode=>"", :serial=>2, :charge=>"", :z=>44.904, :chainID=>"A", :segID=>"", :x=>23.849, :altLoc=>" ", :occupancy=>1.0, :resSeq=>7, :element=>"C", :name=>"CA", :y=>-34.509, :resName=>"ALA"} actual = {} @res["CA"].each_pair do |m, v| actual[m] = v end assert_equal(expected, actual) end def test_each_atom expected = [{:serial=>1, :name=>"N", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.484, :y=>-35.866, :z=>44.51, :occupancy=>1.0, :tempFactor=>28.52, :segID=>"", :element=>"N", :charge=>""}, {:serial=>2, :name=>"CA", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.849, :y=>-34.509, :z=>44.904, :occupancy=>1.0, :tempFactor=>27.89, :segID=>"", :element=>"C", :charge=>""}, {:serial=>3, :name=>"C", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.102, :y=>-34.082, :z=>46.159, :occupancy=>1.0, :tempFactor=>26.68, :segID=>"", :element=>"C", :charge=>""},{:serial=>4, :name=>"O", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.097, :y=>-32.903, :z=>46.524, :occupancy=>1.0, :tempFactor=>30.02, :segID=>"", :element=>"O", :charge=>""}, {:serial=>5, :name=>"CB", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.581, :y=>-33.526, :z=>43.77, :occupancy=>1.0, :tempFactor=>31.41, :segID=>"", :element=>"C", :charge=>""}] actual = [] @res.each_atom do |atom| actual << {:serial=>atom.serial, :name=>atom.name, :altLoc=>atom.altLoc, :resName=>atom.resName, :chainID=>atom.chainID, :resSeq=>atom.resSeq, :iCode=>atom.iCode, :x=>atom.x, :y=>atom.y, :z=>atom.z, :occupancy=>atom.occupancy, :tempFactor=>atom.tempFactor, :segID=>atom.segID, :element=>atom.element, :charge=>atom.charge} end assert_equal(expected, actual) end def test_each expected = [{:serial=>1, :name=>"N", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.484, :y=>-35.866, :z=>44.51, :occupancy=>1.0, :tempFactor=>28.52, :segID=>"", :element=>"N", :charge=>""}, {:serial=>2, :name=>"CA", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.849, :y=>-34.509, :z=>44.904, :occupancy=>1.0, :tempFactor=>27.89, :segID=>"", :element=>"C", :charge=>""}, {:serial=>3, :name=>"C", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.102, :y=>-34.082, :z=>46.159, :occupancy=>1.0, :tempFactor=>26.68, :segID=>"", :element=>"C", :charge=>""},{:serial=>4, :name=>"O", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.097, :y=>-32.903, :z=>46.524, :occupancy=>1.0, :tempFactor=>30.02, :segID=>"", :element=>"O", :charge=>""}, {:serial=>5, :name=>"CB", :altLoc=>" ", :resName=>"ALA", :chainID=>"A", :resSeq=>7, :iCode=>"", :x=>23.581, :y=>-33.526, :z=>43.77, :occupancy=>1.0, :tempFactor=>31.41, :segID=>"", :element=>"C", :charge=>""}] actual = [] @res.each do |atom| actual << {:serial=>atom.serial, :name=>atom.name, :altLoc=>atom.altLoc, :resName=>atom.resName, :chainID=>atom.chainID, :resSeq=>atom.resSeq, :iCode=>atom.iCode, :x=>atom.x, :y=>atom.y, :z=>atom.z, :occupancy=>atom.occupancy, :tempFactor=>atom.tempFactor, :segID=>atom.segID, :element=>atom.element, :charge=>atom.charge} end assert_equal(expected, actual) end def test_het_atom assert_equal(false, @res.hetatm) end def test_iCode assert_equal( 1, @res.iCode=1) end def test_resSeq assert_equal( 1, @res.resSeq=1) end def test_to_s expected ="ATOM 1 N ALA A 7 23.484 -35.866 44.510 1.00 28.52 N \nATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C \nATOM 3 C ALA A 7 23.102 -34.082 46.159 1.00 26.68 C \nATOM 4 O ALA A 7 23.097 -32.903 46.524 1.00 30.02 O \nATOM 5 CB ALA A 7 23.581 -33.526 43.770 1.00 31.41 C \n" assert_equal(expected, @res.to_s) end def test_inspect expected = "#" assert_equal(expected,@res.inspect) end def test_sort #<=> expected = [Bio::PDB::Residue.new(resName="ALA",resSeq = 6, iCode = 2, chain = nil), Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = 1, chain = nil), Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = 3, chain = nil)] ress = [Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = 1, chain = nil)] ress << Bio::PDB::Residue.new(resName="ALA",resSeq = 6, iCode = 2, chain = nil) ress << Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = 3, chain = nil) actual = ress.sort do |a, b| a <=> b end assert_equal(expected,actual) end def test_update_resudue_id res = Bio::PDB::Residue.new(resName="ALA", resSeq = nil, iCode = nil, chain = nil) assert_equal(nil, res.residue_id) end end class TestHeterogen < Test::Unit::TestCase def setup @res = Bio::PDB::Heterogen.new(resName="EDO",resSeq = 701, iCode = "", chain = nil) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O")) end def test_get_residue_id_from_atom id = Bio::PDB::Residue.get_residue_id_from_atom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30582 C1 EDO A 701 -0.205 -27.262 49.961 1.00 34.45 C")) assert_equal("701",id) end def test_addAtom assert_nothing_raised { @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C")) @res.addAtom(Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O")) } end def test_square_bracket expected = { :serial=>30586, :name=>"C1", :altLoc=>" ", :resName=>"EDO", :chainID=>"A", :resSeq=>702, :iCode=>"", :x=>2.792, :y=>7.449, :z=>67.655, :occupancy=>1.0, :tempFactor=>17.09, :segID=>"", :element=>"C", :charge=>"" } actual = {} @res["C1"].each_pair do |m, v| actual[m] = v end assert_equal(expected, actual) end def test_each_hetatm expected = [{:z=>49.587, :resName=>"EDO", :altLoc=>" ", :resSeq=>701, :occupancy=>1.0, :iCode=>"", :tempFactor=>35.2, :chainID=>"A", :y=>-26.859, :segID=>"", :x=>-1.516, :name=>"O1", :charge=>"", :element=>"O", :serial=>30583}, {:z=>51.219, :resName=>"EDO", :altLoc=>" ", :resSeq=>701, :occupancy=>1.0, :iCode=>"", :tempFactor=>34.49, :chainID=>"A", :y=>-28.124, :segID=>"", :x=>-0.275, :name=>"C2", :charge=>"", :element=>"C", :serial=>30584}, {:z=>51.167, :resName=>"EDO", :altLoc=>" ", :resSeq=>701, :occupancy=>1.0, :iCode=>"", :tempFactor=>33.95, :chainID=>"A", :y=>-28.941, :segID=>"", :x=>-1.442, :name=>"O2", :charge=>"", :element=>"O", :serial=>30585}, {:z=>67.655, :resName=>"EDO", :altLoc=>" ", :resSeq=>702, :occupancy=>1.0, :iCode=>"", :tempFactor=>17.09, :chainID=>"A", :y=>7.449, :segID=>"", :x=>2.792, :name=>"C1", :charge=>"", :element=>"C", :serial=>30586}, {:z=>67.213, :resName=>"EDO", :altLoc=>" ", :resSeq=>702, :occupancy=>1.0, :iCode=>"", :tempFactor=>15.74, :chainID=>"A", :y=>7.273, :segID=>"", :x=>1.451, :name=>"O1", :charge=>"", :element=>"O", :serial=>30587}] actual = [] @res.each_hetatm do |hetatm| actual << {:serial=>hetatm.serial, :name=>hetatm.name, :altLoc=>hetatm.altLoc, :resName=>hetatm.resName, :chainID=>hetatm.chainID, :resSeq=>hetatm.resSeq, :iCode=>hetatm.iCode, :x=>hetatm.x, :y=>hetatm.y, :z=>hetatm.z, :occupancy=>hetatm.occupancy, :tempFactor=>hetatm.tempFactor, :segID=>hetatm.segID, :element=>hetatm.element, :charge=>hetatm.charge} end assert_equal(expected, actual) end def test_each expected = [{:z=>49.587, :resName=>"EDO", :altLoc=>" ", :resSeq=>701, :occupancy=>1.0, :iCode=>"", :tempFactor=>35.2, :chainID=>"A", :y=>-26.859, :segID=>"", :x=>-1.516, :name=>"O1", :charge=>"", :element=>"O", :serial=>30583}, {:z=>51.219, :resName=>"EDO", :altLoc=>" ", :resSeq=>701, :occupancy=>1.0, :iCode=>"", :tempFactor=>34.49, :chainID=>"A", :y=>-28.124, :segID=>"", :x=>-0.275, :name=>"C2", :charge=>"", :element=>"C", :serial=>30584}, {:z=>51.167, :resName=>"EDO", :altLoc=>" ", :resSeq=>701, :occupancy=>1.0, :iCode=>"", :tempFactor=>33.95, :chainID=>"A", :y=>-28.941, :segID=>"", :x=>-1.442, :name=>"O2", :charge=>"", :element=>"O", :serial=>30585}, {:z=>67.655, :resName=>"EDO", :altLoc=>" ", :resSeq=>702, :occupancy=>1.0, :iCode=>"", :tempFactor=>17.09, :chainID=>"A", :y=>7.449, :segID=>"", :x=>2.792, :name=>"C1", :charge=>"", :element=>"C", :serial=>30586}, {:z=>67.213, :resName=>"EDO", :altLoc=>" ", :resSeq=>702, :occupancy=>1.0, :iCode=>"", :tempFactor=>15.74, :chainID=>"A", :y=>7.273, :segID=>"", :x=>1.451, :name=>"O1", :charge=>"", :element=>"O", :serial=>30587}] actual = [] @res.each do |hetatm| actual << {:serial=>hetatm.serial, :name=>hetatm.name, :altLoc=>hetatm.altLoc, :resName=>hetatm.resName, :chainID=>hetatm.chainID, :resSeq=>hetatm.resSeq, :iCode=>hetatm.iCode, :x=>hetatm.x, :y=>hetatm.y, :z=>hetatm.z, :occupancy=>hetatm.occupancy, :tempFactor=>hetatm.tempFactor, :segID=>hetatm.segID, :element=>hetatm.element, :charge=>hetatm.charge} end assert_equal(expected, actual) end def test_het_atom assert_equal(true, @res.hetatm) end def test_iCode assert_equal( 1, @res.iCode=1) end def test_resSeq assert_equal( 1, @res.resSeq=1) end def test_to_s expected = "HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O \nHETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C \nHETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O \nHETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C \nHETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O \n" assert_equal(expected, @res.to_s) end def test_inspect expected = "#" assert_equal(expected,@res.inspect) end def test_sort #<=> expected = [Bio::PDB::Heterogen.new(resName="EDD",resSeq = 1, iCode = 2, chain = nil), Bio::PDB::Heterogen.new(resName="EDD",resSeq = 1, iCode = 3, chain = nil), Bio::PDB::Heterogen.new(resName="EDD",resSeq = 2, iCode = 1, chain = nil)] ress = [Bio::PDB::Heterogen.new(resName="EDD",resSeq = 1, iCode = 2, chain = nil)] ress << Bio::PDB::Heterogen.new(resName="EDD",resSeq = 1, iCode = 3, chain = nil) ress << Bio::PDB::Heterogen.new(resName="EDD",resSeq = 2, iCode = 1, chain = nil) actual = ress.sort do |a, b| a <=> b end assert_equal(expected,actual) end def test_update_resudue_id res = Bio::PDB::Heterogen.new(resName="EDD", resSeq = nil, iCode = nil, chain = nil) assert_equal(nil, res.residue_id) end end class TestChain < Test::Unit::TestCase def setup @chain = Bio::PDB::Chain.new('A',nil) @chain.addResidue(Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = 1, chain = @chain)) @chain.addResidue(Bio::PDB::Residue.new(resName="ALA",resSeq = 6, iCode = 2, chain = @chain)) @chain.addResidue(Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = 3, chain = @chain)) @chain.addLigand(Bio::PDB::Heterogen.new(resName="EDD",resSeq = 1, iCode = 2, chain = @chain)) end def test_square_brace #[] expected = {:iCode=>1, :chain_id=>'A', :atoms_size=>0, :resSeq=>7, :id=>"71", :resName=>"ALA"} residue = @chain["71"] actual = {:resName => residue.resName, :id => residue.id, :chain_id => residue.chain.id, :resSeq => residue.resSeq, :iCode => residue.iCode, :atoms_size => residue.atoms.size} assert_equal(expected, actual) end def test_comp #<=> expected = [{:iCode=>2, :chain_id=>'A', :atoms_size=>0, :resSeq=>6, :id=>"62", :resName=>"ALA"}, {:iCode=>1, :chain_id=>'A', :atoms_size=>0, :resSeq=>7, :id=>"71", :resName=>"ALA"}, {:iCode=>3, :chain_id=>'A', :atoms_size=>0, :resSeq=>7, :id=>"73", :resName=>"ALA"}] sorted = @chain.sort do |a, b| a<=>b end actual = [] sorted.each do |residue| actual << {:resName => residue.resName, :id => residue.id, :chain_id => residue.chain.id, :resSeq => residue.resSeq, :iCode => residue.iCode, :atoms_size => residue.atoms.size} end assert_equal(expected, actual) end def test_addResidue assert_nothing_raised{ @chain.addResidue(Bio::PDB::Residue.new(resName="ALA",resSeq = 9, iCode = 1, chain = @chain))} end def test_aaseq assert_equal("AAA", @chain.aaseq) end def test_addLigand assert_nothing_raised{ @chain.addLigand(Bio::PDB::Heterogen.new(resName="EDD",resSeq = 10, iCode = 2, chain = @chain)) } end def test_atom_seq assert_equal("AAA", @chain.atom_seq) end def test_each expected = [{:atoms_size=>0, :resSeq=>7, :chain_id=>'A', :iCode=>1, :id=>"71", :resName=>"ALA"}, {:atoms_size=>0, :resSeq=>6, :chain_id=>'A', :iCode=>2, :id=>"62", :resName=>"ALA"}, {:atoms_size=>0, :resSeq=>7, :chain_id=>'A', :iCode=>3, :id=>"73", :resName=>"ALA"}] actual = [] @chain.each do |residue| actual << {:resName => residue.resName, :id => residue.id, :chain_id => residue.chain.id, :resSeq => residue.resSeq, :iCode => residue.iCode, :atoms_size => residue.atoms.size} end assert_equal(expected, actual) end def test_each_residue expected = [{:atoms_size=>0, :resSeq=>7, :chain_id=>'A', :iCode=>1, :id=>"71", :resName=>"ALA"}, {:atoms_size=>0, :resSeq=>6, :chain_id=>'A', :iCode=>2, :id=>"62", :resName=>"ALA"}, {:atoms_size=>0, :resSeq=>7, :chain_id=>'A', :iCode=>3, :id=>"73", :resName=>"ALA"}] actual = [] @chain.each do |residue| actual << {:resName => residue.resName, :id => residue.id, :chain_id => residue.chain.id, :resSeq => residue.resSeq, :iCode => residue.iCode, :atoms_size => residue.atoms.size} end assert_equal(expected, actual) end def test_each_heterogen expected = [{:iCode=>2, :chain_id=>'A', :resSeq=>1, :id=>"12", :atoms_size=>0, :resName=>"EDD"}] actual = [] @chain.each_heterogen do |heterogen| actual << {:resName => heterogen.resName, :id => heterogen.id, :chain_id => heterogen.chain.id, :resSeq => heterogen.resSeq, :iCode => heterogen.iCode, :atoms_size => heterogen.atoms.size} end assert_equal(expected, actual) end def test_get_heterogen_by_id heterogen = @chain.get_heterogen_by_id("12") expected = {:iCode=>2, :chain_id=>'A', :resSeq=>1, :id=>"12", :atoms_size=>0, :resName=>"EDD"} actual = {:resName => heterogen.resName, :id => heterogen.id, :chain_id => heterogen.chain.id, :resSeq => heterogen.resSeq, :iCode => heterogen.iCode, :atoms_size => heterogen.atoms.size} assert_equal(expected, actual) end def test_get_residue_by_id residue = @chain.get_residue_by_id("71") expected = {:atoms_size=>0, :resSeq=>7, :chain_id=>'A', :iCode=>1, :id=>"71", :resName=>"ALA"} actual = {:resName => residue.resName, :id => residue.id, :chain_id => residue.chain.id, :resSeq => residue.resSeq, :iCode => residue.iCode, :atoms_size => residue.atoms.size} assert_equal(expected, actual) end def test_inspect expected = "#" assert_equal(expected, @chain.inspect) end def test_rehash assert_nothing_raised{@chain.rehash} end def test_rehash_heterogens assert_nothing_raised{@chain.rehash_heterogens} #assert_raise{@chain.rehash_heterogens} end def test_rehash_residues assert_nothing_raised{@chain.rehash_residues} end def test_to_s assert_equal("TER\n",@chain.to_s) end end class TestModel < Test::Unit::TestCase def setup @model = Bio::PDB::Model.new(1,nil) @model.addChain(Bio::PDB::Chain.new(1, @model)) @model.addChain(Bio::PDB::Chain.new(2, @model)) @model.addChain(Bio::PDB::Chain.new(3, @model)) end def test_square_brace #[] expected = {:id=>1, :model_serial=>1, :residues_size=>0, :heterogens_size=>0, :aaseq=>""} residue = @model[1] actual = {:id=>residue.id, :model_serial=>residue.model.serial, :residues_size=>residue.residues.size, :heterogens_size=>residue.heterogens.size, :aaseq=>residue.aaseq} assert_equal(expected, actual) end def test_comp #<=> models = [Bio::PDB::Model.new(2,nil), Bio::PDB::Model.new(1,nil), Bio::PDB::Model.new(3,nil)] expected = [{:serial=>1, :chains_size=>0}, {:serial=>2, :chains_size=>0}, {:serial=>3, :chains_size=>0}] sorted = models.sort do |a, b| a<=>b end actual = [] sorted.each do |model| actual << {:serial => model.serial, :chains_size => model.chains.size } end assert_equal(expected, actual) end def test_addChain assert_nothing_raised{ @model.addChain(Bio::PDB::Chain.new("D", @model))} end def test_each expected = [{:model_serial=>1, :aaseq=>"", :residues_size=>0, :heterogens_size=>0, :id=>1}, {:model_serial=>1, :aaseq=>"", :residues_size=>0, :heterogens_size=>0, :id=>2}, {:model_serial=>1, :aaseq=>"", :residues_size=>0, :heterogens_size=>0, :id=>3}] actual = [] @model.each do |m| actual << {:id => m.id, :model_serial => m.model.serial, :residues_size => m.residues.size, :heterogens_size => m.heterogens.size, :aaseq => m.aaseq } end assert_equal(expected, actual) end def test_each_chain expected = [{:model_serial=>1, :aaseq=>"", :residues_size=>0, :heterogens_size=>0, :id=>1}, {:model_serial=>1, :aaseq=>"", :residues_size=>0, :heterogens_size=>0, :id=>2}, {:model_serial=>1, :aaseq=>"", :residues_size=>0, :heterogens_size=>0, :id=>3}] actual = [] @model.each_chain do |m| actual << {:id => m.id, :model_serial => m.model.serial, :residues_size => m.residues.size, :heterogens_size => m.heterogens.size, :aaseq => m.aaseq } end assert_equal(expected, actual) end def test_inspect expected = "#" assert_equal(expected, @model.inspect) end def test_rehash assert_nothing_raised{@model.rehash} end def test_to_s assert_equal("MODEL 1\nTER\nTER\nTER\nENDMDL\n",@model.to_s) end end #this class tests Bio::PDB::Utils with Bio::PDB::Residue class witch is generated directly class TestUtils < Test::Unit::TestCase def setup @res = Bio::PDB::Residue.new(resName="ALA",resSeq = 7, iCode = "", chain = nil) @res.addAtom(Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 1 N ALA A 7 23.484 -35.866 44.510 1.00 28.52 N")) @res.addAtom(Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C")) @res.addAtom(Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 3 C ALA A 7 23.102 -34.082 46.159 1.00 26.68 C")) @res.addAtom(Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 4 O ALA A 7 23.097 -32.903 46.524 1.00 30.02 O")) @res.addAtom(Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 5 CB ALA A 7 23.581 -33.526 43.770 1.00 31.41 C")) end def test_geometricCentre assert_instance_of(Bio::PDB::Coordinate,@res.geometricCentre()) # assert_equal(Vector[23.4226, -34.1772, 45.1734], @res.geometricCentre()) expected = [ 23.4226, -34.1772, 45.1734 ] @res.geometricCentre().to_a.each do |num| assert_in_delta(expected.shift, num, 0.001) end assert(expected.empty?) end def test_centreOfGravity assert_instance_of(Bio::PDB::Coordinate,@res.centreOfGravity()) expected = [ 23.4047272727273, -34.1511515151515, 45.2351515151515 ] @res.centreOfGravity().to_a.each do |num| assert_in_delta(expected.shift, num, 0.001) end assert(expected.empty?) end def test_distance actual1 = Bio::PDB::Utils.distance( Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 7 CA VAL A 8 21.887 -34.822 48.124 1.00 23.78 C") ) actual2 = Bio::PDB::Utils.distance([23.849, -34.509, 44.904], [21.887, -34.822, 48.124]) assert_in_delta(3.78362432067456, actual1, 0.001) assert_in_delta(3.78362432067456, actual2, 0.001) end def test_dihedral_angle actual1 = Bio::PDB::Utils.dihedral_angle( Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 7 CA VAL A 8 21.887 -34.822 48.124 1.00 23.78 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 14 CA PRO A 9 24.180 -35.345 51.107 1.00 22.35 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 21 CA ALA A 10 23.833 -38.844 52.579 1.00 23.41 C") ) actual2 = Bio::PDB::Utils.dihedral_angle( Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 7 CA VAL A 8 21.887 34.822 48.124 1.00 23.78 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 14 CA PRO A 9 24.180 35.345 51.107 1.00 22.35 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 21 CA ALA A 10 23.833 38.844 52.579 1.00 23.41 C") ) assert_in_delta(-1.94387328933899, actual1, 0.001) assert_in_delta( 1.94387328933899, actual2, 0.001) end def test_rad2deg deg = Bio::PDB::Utils::rad2deg(3.14159265358979) assert_in_delta(180.0, deg, 0.0000000001) end end #class Test_Utils #The following classes is unit tests for Test_*Finder #The sample data are arrays generated from corresponding Bio::PDB::* classes, witch has Bio::PDB::Utils::*Finder class TestModelFinder < Test::Unit::TestCase def setup @models = [Bio::PDB::Model.new(1), Bio::PDB::Model.new(2), Bio::PDB::Model.new(3)] def @models.each_model self.each do |model| yield model end end @models.extend(Bio::PDB::ModelFinder) end def test_find_model expected = [Bio::PDB::Model.new(1), Bio::PDB::Model.new(2), Bio::PDB::Model.new(3)] actual = @models.find_model{|m| true} assert_equal(expected,actual) end end class TestChainFinder < Test::Unit::TestCase def setup @model = [Bio::PDB::Chain.new(1), Bio::PDB::Chain.new(2), Bio::PDB::Chain.new(3)] end def test_find_chain def @model.each_chain self.each do |chain| yield chain end end @model.extend(Bio::PDB::ChainFinder) expected = [Bio::PDB::Chain.new(1), Bio::PDB::Chain.new(2), Bio::PDB::Chain.new(3)] actual = @model.find_chain{|m| true} assert_equal(expected,actual) end def test_each_chain expected = [Bio::PDB::Chain.new(1), Bio::PDB::Chain.new(2), Bio::PDB::Chain.new(3), Bio::PDB::Chain.new(1), Bio::PDB::Chain.new(2), Bio::PDB::Chain.new(3)] models = [@model,@model] def models.each_model self.each do |model| yield model end end models.extend(Bio::PDB::ChainFinder) actual = [] models.each_chain{|chain| actual << chain} assert_equal(expected, actual) end def test_chains expected = [Bio::PDB::Chain.new(1), Bio::PDB::Chain.new(2), Bio::PDB::Chain.new(3), Bio::PDB::Chain.new(1), Bio::PDB::Chain.new(2), Bio::PDB::Chain.new(3)] @model.instance_eval{ def chains return self end } models = [@model,@model] def models.each_model self.each do |model| yield model end end models.extend(Bio::PDB::ChainFinder) models.extend(Bio::PDB::ModelFinder) actual = models.chains assert_equal(expected,actual) end end #TestChainFinder class TestResidueFinder < Test::Unit::TestCase def setup @residues = [Bio::PDB::Residue.new("",1), Bio::PDB::Residue.new("",2), Bio::PDB::Residue.new("",3)] end def test_find_residue def @residues.each_residue self.each do |residue| yield residue end end @residues.extend(Bio::PDB::ResidueFinder) # expected = [Bio::PDB::Residue.new("",1), Bio::PDB::Residue.new("",2), Bio::PDB::Residue.new("",3)] expected = [ {:resName=>"", :id=>"1", :chain=>nil, :resSeq=>1, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"2", :chain=>nil, :resSeq=>2, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"3", :chain=>nil, :resSeq=>3, :iCode=>nil, :atoms_size=>0}, ] finded = @residues.find_residue{|m| true} actual = [] finded.each do |res| actual << {:resName=> res.resName, :id=> res.id, :chain=> res.chain, :resSeq=> res.resSeq, :iCode=> res.iCode, :atoms_size=> res.atoms.size} end assert_equal(expected,actual) end def test_each_residue # expected = [Bio::PDB::Residue.new("", 1), Bio::PDB::Residue.new("",2), Bio::PDB::Residue.new("",3), Bio::PDB::Residue.new("",1), Bio::PDB::Residue.new("",2), Bio::PDB::Residue.new("",3)] expected = [ {:resName=>"", :id=>"1", :chain=>nil, :resSeq=>1, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"2", :chain=>nil, :resSeq=>2, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"3", :chain=>nil, :resSeq=>3, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"1", :chain=>nil, :resSeq=>1, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"2", :chain=>nil, :resSeq=>2, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"3", :chain=>nil, :resSeq=>3, :iCode=>nil, :atoms_size=>0} ] chains = [@residues,@residues] def chains.each_chain self.each do |chain| yield chain end end chains.extend(Bio::PDB::ResidueFinder) actual = [] chains.each_residue do |res| actual << {:resName=> res.resName, :id=> res.id, :chain=> res.chain, :resSeq=> res.resSeq, :iCode=> res.iCode, :atoms_size=> res.atoms.size} end assert_equal(expected, actual) end def test_residues # expected = [Bio::PDB::Residue.new("", 1), Bio::PDB::Residue.new("",2), Bio::PDB::Residue.new("",3), Bio::PDB::Residue.new("",1), Bio::PDB::Residue.new("",2), Bio::PDB::Residue.new("",3)] expected = [ {:resName=>"", :id=>"1", :chain=>nil, :resSeq=>1, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"2", :chain=>nil, :resSeq=>2, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"3", :chain=>nil, :resSeq=>3, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"1", :chain=>nil, :resSeq=>1, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"2", :chain=>nil, :resSeq=>2, :iCode=>nil, :atoms_size=>0}, {:resName=>"", :id=>"3", :chain=>nil, :resSeq=>3, :iCode=>nil, :atoms_size=>0}] @residues.instance_eval{ def residues return self end } chains = [@residues,@residues] def chains.each_chain self.each do |chain| yield chain end end chains.extend(Bio::PDB::ResidueFinder) chains.extend(Bio::PDB::ChainFinder) actual = [] chains.residues.each do |res| actual << {:resName=> res.resName, :id=> res.id, :chain=> res.chain, :resSeq=> res.resSeq, :iCode=> res.iCode, :atoms_size=> res.atoms.size} end assert_equal(expected,actual) end end #TestResidueFinder class TestAtomFinder < Test::Unit::TestCase def setup @atoms = [Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C")] end def test_find_atom expected = [Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C")] def @atoms.each_atom self.each do |atom| yield atom end end @atoms.extend(Bio::PDB::AtomFinder) actual = @atoms.find_atom{|a| true} assert_equal(expected,actual) end def test_each_atom expected = [ Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C") ] residues = [@atoms,@atoms] def residues.each_residue self.each do |residue| yield residue end end residues.extend(Bio::PDB::AtomFinder) actual = [] residues.each_atom{|atom| actual << atom} assert_equal(expected, actual) end def test_atoms expected = [ Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C"), Bio::PDB::Record::ATOM.new.initialize_from_string("ATOM 2 CA ALA A 7 23.849 -34.509 44.904 1.00 27.89 C") ] @atoms.instance_eval{ def atoms return self end } residues = [@atoms,@atoms] def residues.each_residue self.each do |atom| yield atom end end residues.extend(Bio::PDB::AtomFinder) residues.extend(Bio::PDB::ResidueFinder) actual = residues.atoms assert_equal(expected,actual) end end #AtomFinder class TestHetatmFinder < Test::Unit::TestCase def setup @hetatms = [Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O") ] end def test_find_hetatm expected = [Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O") ] def @hetatms.each_hetatm self.each do |hetatm| yield hetatm end end @hetatms.extend(Bio::PDB::HetatmFinder) actual = @hetatms.find_hetatm{|a| true} assert_equal(expected,actual) end def test_each_hetatm expected = [ Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O") ] heterogens = [@hetatms,@hetatms] def heterogens.each_heterogen self.each do |heterogen| yield heterogen end end heterogens.extend(Bio::PDB::HetatmFinder) actual = [] heterogens.each_hetatm{|hetatm| actual << hetatm} assert_equal(expected, actual) end def test_hetatms expected = [ Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30583 O1 EDO A 701 -1.516 -26.859 49.587 1.00 35.20 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30584 C2 EDO A 701 -0.275 -28.124 51.219 1.00 34.49 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30585 O2 EDO A 701 -1.442 -28.941 51.167 1.00 33.95 O"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30586 C1 EDO A 702 2.792 7.449 67.655 1.00 17.09 C"), Bio::PDB::Record::HETATM.new.initialize_from_string("HETATM30587 O1 EDO A 702 1.451 7.273 67.213 1.00 15.74 O") ] @hetatms.instance_eval{ def hetatms return self end } heterogens = [@hetatms,@hetatms] def heterogens.each_heterogen self.each do |heterogen| yield heterogen end end heterogens.extend(Bio::PDB::HetatmFinder) heterogens.extend(Bio::PDB::HeterogenFinder) actual = heterogens.hetatms assert_equal(expected,actual) end end #HetatmFinder class TestHeterogenFinder < Test::Unit::TestCase def setup @heterogens = [Bio::PDB::Heterogen.new(), Bio::PDB::Heterogen.new(), Bio::PDB::Heterogen.new(), Bio::PDB::Heterogen.new() ] end def test_find_heterogen def @heterogens.each_heterogen self.each do |heterogen| yield heterogen end end @heterogens.extend(Bio::PDB::HeterogenFinder) expected = [ {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, ] hets = @heterogens.find_heterogen{|a| true} actual = [] hets.each do |het| actual << {:resName=> het.resName, :id=> het.id, :chain=> het.chain, :resSeq=> het.resSeq, :iCode=> het.iCode, :atoms_size=> het.atoms.size} end assert_equal(expected,actual) end def test_each_heterogen # expected = [ # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new() # ] expected = [ {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0} ] def @heterogens.each_heterogen self.each do |heterogen| yield heterogen end end chains = [@heterogens,@heterogens] def chains.each_chain self.each do |chain| yield chain end end chains.extend(Bio::PDB::HeterogenFinder) actual = [] chains.each_heterogen do |het| actual << {:resName=> het.resName, :id=> het.id, :chain=> het.chain, :resSeq=> het.resSeq, :iCode=> het.iCode, :atoms_size=> het.atoms.size} end assert_equal(expected, actual) end def test_heterogens # expected = [ # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new(), # Bio::PDB::Heterogen.new() # ] expected = [ {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0}, {:resName=>nil, :id=>nil, :chain=>nil, :resSeq=>nil, :iCode=>nil, :atoms_size=>0} ] @heterogens.instance_eval{ def heterogens return self end } chains = [@heterogens,@heterogens] def chains.each_chain self.each do |chain| yield chain end end chains.extend(Bio::PDB::HeterogenFinder) chains.extend(Bio::PDB::ChainFinder) hets = chains.heterogens actual = [] hets.each do |het| actual << {:resName=> het.resName, :id=> het.id, :chain=> het.chain, :resSeq=> het.resSeq, :iCode=> het.iCode, :atoms_size=> het.atoms.size} end assert_equal(expected,actual) end end #HetatmFinder end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_rebase.rb0000644000004100000410000000477412200110570020620 0ustar www-datawww-data# # test/unit/bio/db/test_rebase.rb - Unit test for Bio::REBASE # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/rebase' module Bio #:nodoc: class TestREBASE < Test::Unit::TestCase #:nodoc: def setup enzyme_data = < # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/nbrf' #some condition is not covered with it. This unit test need a nucleotide acid sequence. #I can't find a nucleic acid sequence in PIR format module Bio class TestBioNBRF < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'pir', 'CRAB_ANAPL.pir') @obj = Bio::NBRF.new(File.read(filename)) end def test_entry expected = <P1;CRAB_ANAPL ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN). MDITIHNPLI RRPLFSWLAP SRIFDQIFGE HLQESELLPA SPSLSPFLMR SPIFRMPSWL ETGLSEMRLE KDKFSVNLDV KHFSPEELKV KVLGDMVEIH GKHEERQDEH GFIAREFNRK YRIPADVDPL TITSSLSLDG VLTVSAPRKQ SDVPERSIPI TREEKPAIAG AQRK* END_OF_EXPECTED_ENTRY assert_equal(expected, @obj.entry) end def test_seq_class assert_equal(Bio::Sequence::AA, @obj.seq_class) end def test_seq expected = "MDITIHNPLIRRPLFSWLAPSRIFDQIFGEHLQESELLPASPSLSPFLMRSPIFRMPSWLETGLSEMRLEKDKFSVNLDVKHFSPEELKVKVLGDMVEIHGKHEERQDEHGFIAREFNRKYRIPADVDPLTITSSLSLDGVLTVSAPRKQSDVPERSIPITREEKPAIAGAQRK" assert_equal(expected, @obj.seq) end def test_length assert_equal(174, @obj.length) end def test_naseq assert_raise(RuntimeError){ @obj.naseq} #@obj is a protein sequence. the method must output error. end def test_nalen assert_raise(RuntimeError){ @obj.nalen} #@obj is a protein sequence. the method must output error. end def test_aaseq expected = "MDITIHNPLIRRPLFSWLAPSRIFDQIFGEHLQESELLPASPSLSPFLMRSPIFRMPSWLETGLSEMRLEKDKFSVNLDVKHFSPEELKVKVLGDMVEIHGKHEERQDEHGFIAREFNRKYRIPADVDPLTITSSLSLDGVLTVSAPRKQSDVPERSIPITREEKPAIAGAQRK" assert_equal(expected, @obj.aaseq) end def test_aalen assert_equal(174, @obj.aalen) end def test_to_nbrf expected =<aaa;ABCD this is a fake entry. atgc* EOS nbrf = {:seq_type=>"aaa", :seq=>"atgc", :width=>7, :entry_id=>"ABCD", :definition=>"this is a fake entry."} assert_equal(expected, Bio::NBRF.to_nbrf(nbrf)) end end #class TestBioNBRF end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_prosite.rb0000644000004100000410000004376612200110570021050 0ustar www-datawww-data# # test/unit/bio/db/test_prosite.rb - Unit test for Bio::PROSITE # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/prosite' module Bio class TestPROSITEConst < Test::Unit::TestCase def test_delimiter assert_equal("\n//\n", Bio::PROSITE::DELIMITER) assert_equal("\n//\n", Bio::PROSITE::RS) end def test_tagsize assert_equal(5, Bio::PROSITE::TAGSIZE) end end # class TestPROSITEConst class TestPROSITE < Test::Unit::TestCase def setup data = File.open(File.join(BioRubyTestDataPath, 'prosite', 'prosite.dat')).read data = data.split(Bio::PROSITE::RS)[0] @obj = Bio::PROSITE.new(data) end def test_name assert_equal("G_PROTEIN_RECEP_F1_1", @obj.name) end def test_division data = "PATTERN" assert_equal(data, @obj.division) end def test_ac data = "PS00237" assert_equal(data, @obj.ac) end def test_dt assert_equal("APR-1990 (CREATED); NOV-1997 (DATA UPDATE); JUL-1998 (INFO UPDATE).", @obj.dt) end def test_de data = "G-protein coupled receptors family 1 signature." assert_equal(data, @obj.de) end def test_pa pattern = "[GSTALIVMFYWC]-[GSTANCPDE]-{EDPKRH}-x(2)-[LIVMNQGA]-x(2)-[LIVMFT]-[GSTANC]-[LIVMFYWSTAC]-[DENH]-R-[FYWCSH]-x(2)-[LIVM]." assert_equal(pattern, @obj.pa) end def test_ma assert_equal('', @obj.ma) end def test_ru assert_equal('', @obj.ru) end def test_nr data = {"FALSE_NEG"=>112, "POSITIVE"=>[1057, 1057], "PARTIAL"=>48, "FALSE_POS"=>[64, 64], "RELEASE"=>["40.7", 103373], "TOTAL"=>[1121, 1121], "UNKNOWN"=>[0, 0]} assert_equal(data, @obj.nr) end def test_release assert_equal(['40.7', 103373], @obj.release) end def test_swissprot_release_number assert_equal('40.7', @obj.swissprot_release_number) end def test_swissprot_release_sequences assert_equal(103373, @obj.swissprot_release_sequences) end def test_total assert_equal([1121, 1121], @obj.total) end def test_total_hits assert_equal(1121, @obj.total_hits) end def test_total_sequences assert_equal(1121, @obj.total_sequences) end def test_positive assert_equal([1057, 1057], @obj.positive) end def test_positive_hits assert_equal(1057, @obj.positive_hits) end def test_positive_sequences assert_equal(1057, @obj.positive_sequences) end def test_unknown assert_equal([0, 0], @obj.unknown) end def test_unknown_hits assert_equal(0, @obj.unknown_hits) end def test_unknown_sequences assert_equal(0, @obj.unknown_sequences) end def test_false_pos assert_equal([64, 64], @obj.false_pos) end def test_false_positive_sequences assert_equal(64, @obj.false_positive_sequences) end def test_false_neg assert_equal(112, @obj.false_neg) end def test_partial assert_equal(48, @obj.partial) end def test_cc assert_equal({"TAXO-RANGE"=>"??E?V", "MAX-REPEAT"=>"1"}, @obj.cc) end def test_taxon_range assert_equal('??E?V', @obj.taxon_range) end def test_max_repeat assert_equal(1, @obj.max_repeat) end def test_site assert_equal([0, nil], @obj.site) end def test_skip_flag assert_equal(nil, @obj.skip_flag) end def test_dr assert_equal(Hash, @obj.dr.class) data = ['OPSD_LIMBE', 'T'] assert_equal(data, @obj.dr["O42427"]) end def test_list_xref flag = '' assert_equal([], @obj.list_xref(flag)) end def test_list_truepositive data = ["O42427", "P11617", "P46090", "P30939", "P28336", "Q9Z2J6", "Q64326", "P46092", "P07550", "Q9UKL2", "P30940", "P46093", "Q61224", "Q63384", "P46094", "Q28309", "P22328", "P46095", "O77590", "O02813", "Q9R1C8", "P22329", "O93441", "O42300", "Q10904", "O43613", "Q9Z0D9", "P18130", "O42301", "O43614", "P22330", "P22331", "Q9GLJ8", "O15552", "O43193", "P22332", "O43194", "Q9WV26", "Q9TST4", "Q62053", "P58307", "O42307", "Q9TST5", "P58308", "Q9TST6", "P41983", "P30951", "P41984", "O02824", "O88626", "P91657", "P30953", "P18825", "O62709", "O42574", "P30954", "Q28585", "O88628", "P30955", "P28221", "Q9WU02", "P28222", "P32299", "P70310", "P28223", "O76099", "P04201", "P35894", "Q15722", "P35895", "O93459", "P14416", "P35897", "P35898", "P35899", "P49578", "O42451", "P47745", "Q9Y5N1", "O42452", "P50052", "P47746", "O97878", "O02835", "Q09502", "Q28596", "O00254", "P31355", "P47747", "O97879", "O02836", "P31356", "P30966", "P79436", "P47748", "P87365", "P08099", "Q9JL21", "P47749", "P87366", "O18481", "P16582", "P87367", "P30968", "O97880", "P47750", "P87368", "Q62758", "P30969", "Q28468", "O97881", "Q09638", "P09703", "P87369", "O95918", "Q9TUE1", "O97882", "P22909", "P09704", "O18485", "O42327", "P47751", "P47883", "P18841", "O55193", "O97883", "O18486", "O42328", "P47884", "Q9EP86", "O42329", "O14626", "P48145", "P47887", "O08725", "P48146", "P30974", "O08858", "O18910", "O42330", "O55197", "O08726", "O70526", "P30975", "O18911", "Q28474", "O18912", "O70528", "O18913", "P34311", "O18914", "O42466", "O95371", "Q9WU25", "P47892", "P70596", "P33396", "P70597", "Q61130", "Q15743", "Q15612", "P51144", "P32745", "O00270", "Q00991", "Q9YGY9", "P29754", "Q9GZK3", "O02721", "Q9GZK4", "Q15615", "Q9Y5P1", "Q60613", "Q15062", "P29755", "Q9UKP6", "Q28905", "Q9YGZ0", "P47898", "Q60614", "P47899", "Q9GZK7", "Q9YGZ1", "Q9YGZ2", "O97504", "Q15619", "P30987", "Q9YGZ3", "Q9YGZ4", "Q93126", "Q15620", "Q9YGZ5", "P30989", "Q13725", "Q93127", "P16473", "P23749", "Q9YGZ6", "O54814", "O62743", "Q9YGZ7", "P52202", "Q15622", "Q9YGZ8", "P56514", "P30728", "O97772", "Q9YGZ9", "P30991", "P56515", "P30729", "Q9J529", "P56516", "P47774", "O62747", "P30992", "P35403", "P15823", "P47775", "Q9WUK7", "P30993", "O97512", "P35404", "P06002", "O08878", "P30730", "P30994", "O46635", "P35405", "P30731", "P21728", "P35406", "P42288", "P21729", "P35407", "P49217", "P31387", "P35408", "P42289", "O00155", "P31388", "O46639", "P48039", "Q64121", "P35409", "P49219", "P31389", "O18935", "Q9TU05", "P42290", "P21730", "P21731", "P18871", "P31390", "P42291", "P48040", "P31391", "P35410", "P20789", "Q13606", "P48042", "O42490", "P35411", "O88410", "P31392", "P29089", "Q13607", "P48043", "P35412", "P48044", "O89039", "P35413", "P30872", "O01668", "P35414", "P30873", "Q9YH00", "P79211", "P30874", "Q9YH01", "Q28927", "P30875", "Q9YH02", "P49912", "O08890", "P25100", "Q28928", "O95006", "Q9YH03", "P25101", "Q28929", "O95007", "O15218", "O88680", "Q9YH04", "O08892", "Q05394", "P79901", "Q9YH05", "P25102", "P33032", "Q64264", "Q9ERZ3", "P79902", "P25103", "P79217", "Q9UGF5", "Q9ERZ4", "Q9N2A2", "P79903", "P13945", "P25104", "P79218", "P33033", "Q9UGF6", "Q9N2A3", "P25105", "O97661", "O70431", "P79350", "Q9UGF7", "Q9N2A4", "P48974", "P25106", "O97663", "P03999", "P16235", "O95013", "P22269", "O97665", "P26684", "P97468", "P47798", "O97666", "P22270", "O18821", "P47799", "P49922", "Q61038", "P51050", "P25929", "P49238", "Q28807", "P28285", "P79911", "P28286", "P13953", "Q19084", "O61303", "P04000", "P25930", "P04001", "Q9NPB9", "P25931", "Q9R1K6", "P42866", "P56412", "P79914", "Q61041", "Q9GK74", "P25115", "P04950", "P25116", "P19020", "P18762", "O12948", "Q26495", "Q09561", "Q25157", "P70115", "O77830", "Q83207", "P02699", "O12000", "Q01717", "Q25158", "P29403", "P50406", "Q01718", "P29404", "O42384", "P50407", "P79234", "O42385", "O02769", "Q17232", "P79236", "P21761", "P79237", "P04274", "Q28003", "O75388", "P24603", "Q9TUK4", "P53452", "P08588", "Q28005", "P43240", "Q61184", "O08786", "P79240", "P53453", "Q13639", "Q9Y3N9", "Q9UP62", "P18089", "P53454", "P79928", "P79242", "Q01726", "P24053", "P79243", "Q17239", "Q01727", "Q14330", "P18090", "Q9R024", "O62791", "O02777", "P43114", "O62792", "P79113", "O77713", "O08790", "O62793", "P54833", "P43116", "O62794", "Q61614", "O77715", "Q00788", "P43117", "O42266", "O62795", "O88319", "Q03566", "P43118", "O62796", "O02781", "O08530", "P43119", "O42268", "P16395", "O62798", "Q9DDN6", "P05363", "P30518", "O13227", "Q9GZQ6", "P43252", "O18982", "P43253", "P79807", "O18983", "P79808", "O77721", "O35786", "P30098", "P56439", "P79809", "P20309", "O77723", "P56440", "Q28838", "P55919", "Q9UHM6", "Q9TT23", "P56441", "P97926", "Q63652", "P55920", "P56442", "P79812", "P25962", "P56443", "P35462", "P56444", "P35463", "P56445", "O97571", "P79393", "P56446", "O02662", "P49144", "P56447", "P79394", "Q25188", "P49145", "P28190", "P56448", "P24628", "P56449", "O02664", "P49146", "P33765", "Q9EQD2", "Q24563", "P33766", "Q90674", "O02666", "P56450", "P06199", "Q25190", "O02667", "Q99500", "P25021", "P56451", "P08482", "P30796", "Q9H1Y3", "Q28031", "P79266", "O14581", "P08483", "P25023", "P49019", "P25024", "P08485", "P12657", "P12526", "Q9QXZ9", "P08908", "P07700", "P25025", "P32300", "Q9Z1S9", "P35342", "P17200", "O09047", "P08909", "O76100", "Q91175", "P43140", "P49285", "P32302", "P35343", "P43141", "P49286", "P32303", "P35344", "Q91178", "P32304", "Q9Y5X5", "P35345", "P08911", "P43142", "O08556", "P49288", "P35346", "P52702", "P32305", "P08912", "P11483", "P32306", "P52703", "Q9R0M1", "P08913", "P32307", "Q62463", "P35348", "O42294", "P30542", "P32308", "P30411", "P30543", "P32309", "Q9TU77", "Q02152", "Q02284", "Q18904", "P35350", "P30545", "Q28044", "P08100", "O77616", "P35351", "P70031", "P30546", "P79148", "P30547", "P32310", "P16849", "P32311", "Q9JI35", "P30548", "P32312", "P30549", "Q9Y5Y4", "P30680", "P32313", "O08565", "Q28997", "P97266", "P11229", "Q28998", "Q9NYM4", "O93603", "P35356", "P30550", "O15973", "P35357", "P30551", "O77621", "P35358", "P58173", "P34968", "P30552", "P35359", "P34969", "P22888", "P30553", "O19012", "P30554", "P30555", "O19014", "P35360", "Q9MZV8", "P30556", "Q99788", "P35361", "Q04683", "P34970", "P35362", "P70174", "P34971", "Q9WUT7", "P30558", "Q9TUP7", "P79291", "P28088", "P35363", "P34972", "O42179", "P30559", "P79292", "Q99527", "Q01776", "P35364", "P34973", "Q64077", "P41591", "P21554", "P34974", "Q9XT45", "O88495", "P46002", "Q90309", "P41592", "P79848", "P56479", "P21555", "Q9GLX8", "P35365", "P34975", "Q29003", "P30560", "P35366", "P21556", "P47211", "P16177", "P34976", "P10980", "O42604", "P35367", "Q95247", "P34977", "Q29005", "P33533", "P34978", "P28646", "Q17292", "P41595", "P35368", "P33534", "P56481", "P79166", "P41596", "P20905", "P35369", "P28647", "P33535", "P56482", "P41597", "P24530", "P56483", "P47900", "O19024", "P35370", "P56484", "P47901", "O18766", "Q17296", "O19025", "P35371", "P51651", "P34981", "P56485", "Q9H207", "P35372", "P56486", "Q13304", "P35373", "P34982", "Q29010", "Q9H209", "P35374", "P34983", "O76000", "P08255", "P04761", "P22086", "P56488", "P35375", "Q98980", "Q28886", "Q95254", "P14842", "P34984", "O76001", "O76002", "P56489", "P35376", "P34986", "Q9XT57", "Q98982", "Q95125", "Q28756", "Q9H343", "O13018", "P35377", "P34987", "Q9XT58", "P97288", "P56490", "Q9H344", "P35378", "P70612", "P56491", "P79175", "P34989", "P35379", "P16610", "P56492", "O19032", "P79176", "Q9H346", "P49059", "P56493", "P79177", "P56494", "Q9NQN1", "P56495", "P79863", "P97292", "Q91081", "P79178", "P97714", "Q99677", "P35382", "P35383", "O54798", "P56496", "Q99678", "O19037", "Q04573", "P34992", "P34993", "P15409", "O54799", "P56497", "Q99679", "Q94741", "P97717", "P34994", "Q29154", "P97295", "Q90328", "P56498", "P32211", "Q92847", "P34995", "P32212", "P34996", "Q60474", "P34997", "Q63447", "Q60475", "P10608", "Q60476", "Q62928", "Q13585", "O95665", "P79188", "P37288", "Q9DGG4", "Q90334", "P51436", "P26255", "P37289", "Q17053", "O16005", "Q28509", "P21450", "P55167", "Q9Z2D5", "P79190", "P21451", "Q9QZN9", "P79191", "P21452", "P51675", "Q28642", "P21453", "P51676", "Q60483", "P49892", "P14600", "P51677", "Q60484", "Q9ES90", "Q9HC97", "P51678", "P17124", "P79748", "P51679", "O18793", "Q62805", "P41231", "P28678", "P41232", "Q9Y2T5", "P51680", "P18599", "Q9TT96", "Q9WVD0", "P28679", "O19054", "Q18007", "P32229", "Q9XT82", "O35599", "P51681", "P51682", "P47800", "P28680", "Q25414", "P28681", "P51683", "Q90214", "Q28519", "Q90215", "P51684", "P28682", "O60755", "P28683", "P29371", "P51685", "Q9H3N8", "O16017", "P21462", "P25089", "O00574", "P21463", "P28684", "P79756", "O54689", "P51686", "P47936", "O16018", "P51582", "O16019", "P22671", "Q9UM60", "P47937", "O77408", "Q9TTQ9", "Q95154", "P26824", "P25090", "Q95155", "O16020", "P52500", "Q95156", "Q28524", "O35210", "P32236", "Q95157", "P48302", "Q90352", "P32237", "O35476", "P02700", "P48303", "P32238", "O02213", "P25095", "P32239", "O35214", "O35478", "P32240", "P20395", "P79763", "P18901", "Q16581", "P25099", "P79898", "O77680", "P32244", "Q9PUI7", "P28564", "P49650", "P32245", "P28565", "P49651", "P32246", "P28566", "P49652", "Q98894", "P32247", "Q9H255", "Q98895", "P32248", "P32249", "P20272", "P43088", "Q95170", "P50128", "P32250", "P50391", "P70658", "P08172", "P50129", "P08173", "P32251", "P46616", "Q9QYN8", "P50130", "P51470", "P14763", "P51471", "P50132", "Q27987", "P49660", "P51472", "P51473", "O60412", "P51474", "P51475", "Q90373", "Q95179", "P51476", "P58406", "O88853", "P70536", "Q17094", "O88854", "Q11082", "P37067", "Q90245", "P79785", "P37068", "O13076", "Q92633", "P46626", "Q91559", "P37069", "P46627", "P21917", "P23944", "O97967", "P46628", "P56971", "P43657", "O62809", "Q28553", "P21918", "P23945", "P20288", "O19091", "O15529", "P37070", "Q28422", "P29274", "P37071", "P29275", "Q9Z0Z6", "P29276", "Q25321", "Q18179", "P30372", "Q90252", "O14843", "Q9Z2I3", "Q25322", "Q08520", "Q28558", "P51488", "P41143", "P23265", "Q28691", "P51489", "P09241", "O18312", "P41144", "Q61212", "P23266", "Q9WV08", "P41145", "P46636", "P23267", "P56718", "P41146", "P21109", "P51490", "P79400", "Q9UPC5", "Q62035", "P56719", "P23269", "O14718", "P79798", "P51491", "O18315", "P41149", "P23270", "O60431", "P49681", "P23271", "P19327", "P30935", "P11613", "P49682", "P23272", "P41968", "O43603", "P19328", "Q63931", "P30936", "P49683", "P11614", "P23273", "P46089", "P28334", "P49684", "P30937", "P11615", "P23274", "P28335", "O13092", "O43869", "P97520", "Q01337", "P30938", "P49685", "P11616", "P23275", "Q01338"] assert_equal(data.sort, @obj.list_truepositive.sort) end def test_list_falsenegative data = ["P18259", "Q13813", "Q55593", "Q00274", "P54466", "Q9HJA4", "P55687", "Q9W0K0", "Q42608", "P45873", "P45198", "P15828", "P18609", "Q51758", "P24151", "P23892", "P41510", "P22817", "P46457", "O15910", "P23515", "O59098", "P26560", "P26561", "P47551", "P22023", "P21503", "Q9VNB3", "P25147", "Q42675", "P21524", "P06882", "Q61647", "P42790", "Q10775", "O84877", "P51656", "P75548", "Q92839", "P51657", "P37274", "P34724", "P07751", "P00498", "P07886", "P26258", "O67284", "Q25410", "P46724", "P76097", "P16086", "P08032", "P14198", "P77916", "O60779", "P13688", "Q03834", "Q63912", "O68824", "P77932", "Q53547", "P77933", "P34529", "Q00126"] assert_equal(data.sort, @obj.list_falsenegative.sort) end def test_list_falsepositive data =["P41985", "P41986", "P17645", "Q60612", "Q60879", "P52592", "Q60882", "Q60883", "Q60884", "Q60885", "Q60886", "Q60887", "Q60888", "Q60889", "Q60890", "P49218", "Q60891", "Q60892", "P49220", "Q60893", "Q60894", "Q60895", "O70430", "O70432", "P51046", "P51047", "P51048", "P51049", "P51051", "P51052", "Q98913", "Q98914", "Q61616", "Q61618", "P79250", "P14803", "P49287", "Q28602", "P97267", "Q90305", "Q29006", "Q95252", "P34985", "Q90456", "Q95136", "Q95137", "Q62953", "Q95195"] assert_equal(data.sort, @obj.list_falsepositive.sort) end def test_list_potentialhit data = ["P41985", "P41986", "P17645", "Q60612", "Q60879", "P52592", "Q60882", "Q60883", "Q60884", "Q60885", "Q60886", "Q60887", "Q60888", "Q60889", "Q60890", "P49218", "Q60891", "Q60892", "P49220", "Q60893", "Q60894", "Q60895", "O70430", "O70432", "P51046", "P51047", "P51048", "P51049", "P51051", "P51052", "Q98913", "Q98914", "Q61616", "Q61618", "P79250", "P14803", "P49287", "Q28602", "P97267", "Q90305", "Q29006", "Q95252", "P34985", "Q90456", "Q95136", "Q95137", "Q62953", "Q95195"] assert_equal(data.sort, @obj.list_potentialhit.sort) end def test_list_unknown data = [] assert_equal(data, @obj.list_unknown) end def test_pdb_xref data = ["1BOJ", "1BOK", "1F88"] assert_equal(data, @obj.pdb_xref) end def test_pdoc_xref data = "PDOC00210" assert_equal(data, @obj.pdoc_xref) end def test_pa2re pa = '[AC]-x-V-x(4)-{ED}.' assert_equal(/[AC].V.{4}[^ED]/i, @obj.pa2re(pa)) end def test_self_pa2re pa = '[AC]-x-V-x(4)-{ED}.' assert_equal(/[AC].V.{4}[^ED]/i, Bio::PROSITE.pa2re(pa)) end end # class TestPROSITE end bio-1.4.3.0001/test/unit/bio/db/test_fasta.rb0000644000004100000410000002126412200110570020446 0ustar www-datawww-data# # test/unit/bio/db/test_fasta.rb - Unit test for Bio::FastaFormat # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/fasta' module Bio class TestFastaFormatConst < Test::Unit::TestCase def test_delimiter assert_equal("\n>", Bio::FastaFormat::DELIMITER) assert_equal("\n>", Bio::FastaFormat::RS) end end # class TestFastaFormatConst class TestFastaFormatSwissProt < Test::Unit::TestCase def setup text =<gi|1171674|sp|P42267|NDD_BPR69 NUCLEAR DISRUPTION PROTEIN MKYMTVTDLNNAGATVIGTIKGGEWFLGTPHKDILSKPGFYFLVSKLDGRPFSNPCVSARFYVGNQRSKQGFSAVLSHIR QRRSQLARTIANNNMVYTVFYLPASKMKPLTTGFGKGQLALAFTRNHHSEYQTLEEMNRMLADNFKFVLQAY END @obj = Bio::FastaFormat.new(text) end def test_locus assert_equal(nil, @obj.locus) end end class TestFastaFormatKeggGenesNT < Test::Unit::TestCase def setup text =<eco:b0001 thrL; thr operon leader peptide (N) atgaaacgcattagcaccaccattaccaccaccatcaccattaccacaggtaacggtgcg ggctga END @obj = Bio::FastaFormat.new(text) end def test_naseq_class assert_equal(Bio::Sequence::NA, @obj.naseq.class) end def test_naseq seq = 'atgaaacgcattagcaccaccattaccaccaccatcaccattaccacaggtaacggtgcgggctga' assert_equal(seq, @obj.naseq) end def test_nalen assert_equal(66, @obj.nalen) end end class TestFastaFormatKeggGenesAA < Test::Unit::TestCase def setup text =<sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST] MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST] MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER KTGDPLEWRRLFKKISTICRDIILIPN END @obj = Bio::FastaFormat.new(text) end def test_entry_id assert_equal('sce:YBR160W', @obj.entry_id) end def test_acc_version assert_equal(nil, @obj.acc_version) end def test_entry data = ">sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG\nVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME\nGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL\nKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC\nIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP\nQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES\n" assert_equal(data, @obj.entry) end def test_definition data = "sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]" assert_equal(data, @obj.definition) end def test_data data = "\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG\nVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME\nGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL\nKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC\nIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP\nQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES\n" assert_equal(data, @obj.data) end end class TestFastaFormat < Test::Unit::TestCase def setup text =<gi|55416189|gb|AAV50056.1| NADH dehydrogenase subunit 1 [Dasyurus hallucatus] MFTINLLIYIIPILLAVAFLTLIERKMLGYMQFRKGPNIVGPYGLLQPFADAVKLFTKEPLRPLTSSISIFIIAPILALT IALTIWTPLPMPNTLLDLNLGLIFILSLSGLSVYSILWSGWASNSKYALIGALRAVAQTISYEVSLAIILLSIMLINGSF TLKTLSITQENLWLIITTWPLAMMWYISTLAETNRAPFDLTEGESELVSGFNVEYAAGPFAMFFLAEYANIIAMNAITTI LFLGPSLTPNLSHLNTLSFMLKTLLLTMVFLWVRASYPRFRYDQLMHLLWKNFLPMTLAMCLWFISLPIALSCIPPQL >gi|55416190|gb|AAV50057.1| NADH dehydrogenase subunit 2 [Dasyurus hallucatus] MSPYVLMILTLSLFIGTCLTIFSNHWFTAWMGLEINTLAIIPLMTAPNNPRSTEAATKYFLTQATASMLMMFAIIYNAWS TNQWALPQLSDDWISLLMTVALAIKLGLAPFHFWVPEVTQGIPLLTGMILLTWQKIAPTAILFQIAPYLNMKFLVILAIL STLVGGWGGLNQTHLRKILAYSSIAHMGWMIIIVQINPTLSIFTLTIYVMATLTTFLTLNLSNSTKIKSLGNLWNKSATA TIIIFLTLLSLGGLPPLTGFMPKWLILQELINNGNIITATMMALSALLNLFFYMRLIYASSLTMFPSINNSKMQWYNNSM KTTTLIPTATVISSLLLPLTPLFVTLY END @obj = Bio::FastaFormat.new(text) end def test_entry data = ">gi|55416189|gb|AAV50056.1| NADH dehydrogenase subunit 1 [Dasyurus hallucatus]\nMFTINLLIYIIPILLAVAFLTLIERKMLGYMQFRKGPNIVGPYGLLQPFADAVKLFTKEPLRPLTSSISIFIIAPILALT\nIALTIWTPLPMPNTLLDLNLGLIFILSLSGLSVYSILWSGWASNSKYALIGALRAVAQTISYEVSLAIILLSIMLINGSF\nTLKTLSITQENLWLIITTWPLAMMWYISTLAETNRAPFDLTEGESELVSGFNVEYAAGPFAMFFLAEYANIIAMNAITTI\nLFLGPSLTPNLSHLNTLSFMLKTLLLTMVFLWVRASYPRFRYDQLMHLLWKNFLPMTLAMCLWFISLPIALSCIPPQL\n" assert_equal(data, @obj.entry) end def test_entry_overrun data =<gi|55416190|gb|AAV50057.1| NADH dehydrogenase subunit 2 [Dasyurus hallucatus] MSPYVLMILTLSLFIGTCLTIFSNHWFTAWMGLEINTLAIIPLMTAPNNPRSTEAATKYFLTQATASMLMMFAIIYNAWS TNQWALPQLSDDWISLLMTVALAIKLGLAPFHFWVPEVTQGIPLLTGMILLTWQKIAPTAILFQIAPYLNMKFLVILAIL STLVGGWGGLNQTHLRKILAYSSIAHMGWMIIIVQINPTLSIFTLTIYVMATLTTFLTLNLSNSTKIKSLGNLWNKSATA TIIIFLTLLSLGGLPPLTGFMPKWLILQELINNGNIITATMMALSALLNLFFYMRLIYASSLTMFPSINNSKMQWYNNSM KTTTLIPTATVISSLLLPLTPLFVTLY END assert_equal(data, @obj.entry_overrun) end class DummyFactory def query(str) @query_str = str "DummyFactoryResult#{str.length}" end attr_reader :query_str end #class DummyFactory def test_query data =<gi|55416189|gb|AAV50056.1| NADH dehydrogenase subunit 1 [Dasyurus hallucatus] MFTINLLIYIIPILLAVAFLTLIERKMLGYMQFRKGPNIVGPYGLLQPFADAVKLFTKEPLRPLTSSISIFIIAPILALT IALTIWTPLPMPNTLLDLNLGLIFILSLSGLSVYSILWSGWASNSKYALIGALRAVAQTISYEVSLAIILLSIMLINGSF TLKTLSITQENLWLIITTWPLAMMWYISTLAETNRAPFDLTEGESELVSGFNVEYAAGPFAMFFLAEYANIIAMNAITTI LFLGPSLTPNLSHLNTLSFMLKTLLLTMVFLWVRASYPRFRYDQLMHLLWKNFLPMTLAMCLWFISLPIALSCIPPQL END factory = DummyFactory.new assert_equal("DummyFactoryResult401", @obj.query(factory)) assert_equal(data, factory.query_str) end def test_entry_id assert_equal('gi|55416189', @obj.entry_id) end def test_definition data = "gi|55416189|gb|AAV50056.1| NADH dehydrogenase subunit 1 [Dasyurus hallucatus]" assert_equal(data, @obj.definition) end def test_data data = "\nMFTINLLIYIIPILLAVAFLTLIERKMLGYMQFRKGPNIVGPYGLLQPFADAVKLFTKEPLRPLTSSISIFIIAPILALT\nIALTIWTPLPMPNTLLDLNLGLIFILSLSGLSVYSILWSGWASNSKYALIGALRAVAQTISYEVSLAIILLSIMLINGSF\nTLKTLSITQENLWLIITTWPLAMMWYISTLAETNRAPFDLTEGESELVSGFNVEYAAGPFAMFFLAEYANIIAMNAITTI\nLFLGPSLTPNLSHLNTLSFMLKTLLLTMVFLWVRASYPRFRYDQLMHLLWKNFLPMTLAMCLWFISLPIALSCIPPQL\n" assert_equal(data, @obj.data) end def test_seq seq = 'MFTINLLIYIIPILLAVAFLTLIERKMLGYMQFRKGPNIVGPYGLLQPFADAVKLFTKEPLRPLTSSISIFIIAPILALTIALTIWTPLPMPNTLLDLNLGLIFILSLSGLSVYSILWSGWASNSKYALIGALRAVAQTISYEVSLAIILLSIMLINGSFTLKTLSITQENLWLIITTWPLAMMWYISTLAETNRAPFDLTEGESELVSGFNVEYAAGPFAMFFLAEYANIIAMNAITTILFLGPSLTPNLSHLNTLSFMLKTLLLTMVFLWVRASYPRFRYDQLMHLLWKNFLPMTLAMCLWFISLPIALSCIPPQL' assert_equal(seq, @obj.seq) end def test_length assert_equal(318, @obj.length) end def test_aaseq seq = "MFTINLLIYIIPILLAVAFLTLIERKMLGYMQFRKGPNIVGPYGLLQPFADAVKLFTKEPLRPLTSSISIFIIAPILALTIALTIWTPLPMPNTLLDLNLGLIFILSLSGLSVYSILWSGWASNSKYALIGALRAVAQTISYEVSLAIILLSIMLINGSFTLKTLSITQENLWLIITTWPLAMMWYISTLAETNRAPFDLTEGESELVSGFNVEYAAGPFAMFFLAEYANIIAMNAITTILFLGPSLTPNLSHLNTLSFMLKTLLLTMVFLWVRASYPRFRYDQLMHLLWKNFLPMTLAMCLWFISLPIALSCIPPQL" assert_equal(seq, @obj.aaseq) end def test_aalen assert_equal(318, @obj.aalen) end def test_identifiers assert_equal(Bio::FastaDefline, @obj.identifiers.class) end def test_gi assert_equal('55416189', @obj.gi) end def test_accession assert_equal('AAV50056', @obj.accession) end def test_accessions assert_equal(['AAV50056'], @obj.accessions) end def test_acc_version assert_equal('AAV50056.1', @obj.acc_version) end end # class TestFastaFormat end bio-1.4.3.0001/test/unit/bio/db/embl/0000755000004100000410000000000012200110570016676 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/embl/test_uniprot.rb0000644000004100000410000000136212200110570021764 0ustar www-datawww-data# # test/unit/bio/db/embl/test_uniprot.rb - Unit test for Bio::UniProt # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/uniprot' module Bio class TestUniProt < Test::Unit::TestCase def setup data = File.read(File.join(BioRubyTestDataPath, 'uniprot', 'p53_human.uniprot')) @obj = Bio::UniProt.new(data) end def test_gene_name assert_equal('TP53', @obj.gene_name) end end end bio-1.4.3.0001/test/unit/bio/db/embl/test_common.rb0000644000004100000410000000470712200110570021562 0ustar www-datawww-data# # test/unit/bio/db/embl/common.rb - Unit test for Bio::EMBL::COMMON module # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/common' module Bio # TestClass EMBLDB Inherited class EMBL_API < EMBLDB include Bio::EMBLDB::Common end class TestEMBLCommon < Test::Unit::TestCase def setup @obj = Bio::EMBLDB::Common end def test_ac assert(@obj.instance_methods.find {|x| x.to_s == 'ac' }) end def test_accessions assert(@obj.instance_methods.find {|x| x.to_s == 'accessions' }) end def test_accession assert(@obj.instance_methods.find {|x| x.to_s == 'accession' }) end def test_de assert(@obj.instance_methods.find {|x| x.to_s == 'de' }) end def test_description assert(@obj.instance_methods.find {|x| x.to_s == 'description' }) end def test_definition assert(@obj.instance_methods.find {|x| x.to_s == 'definition' }) end def test_os assert(@obj.instance_methods.find {|x| x.to_s == 'os' }) end def test_og assert(@obj.instance_methods.find {|x| x.to_s == 'og' }) end def test_oc assert(@obj.instance_methods.find {|x| x.to_s == 'oc' }) end def test_kw assert(@obj.instance_methods.find {|x| x.to_s == 'kw' }) end def test_keywords assert(@obj.instance_methods.find {|x| x.to_s == 'keywords' }) end def test_ref assert(@obj.instance_methods.find {|x| x.to_s == 'ref' }) end def test_references assert(@obj.instance_methods.find {|x| x.to_s == 'references' }) end def test_dr assert(@obj.instance_methods.find {|x| x.to_s == 'dr' }) end end class TestEMBLAPI < Test::Unit::TestCase def setup data =< # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/embl' module Bio class TestEMBL < Test::Unit::TestCase def setup output = File.read(File.join(BioRubyTestDataPath, 'embl', 'AB090716.embl')) @obj = Bio::EMBL.new(output) end def test_id_line assert(@obj.id_line) end def test_id_line_iterator assert(@obj.id_line {|key, value| }) end def test_id_line_entry_name assert_equal('AB090716', @obj.id_line('ENTRY_NAME')) end def test_id_line_data_class assert_equal('standard', @obj.id_line('DATA_CLASS')) end def test_id_line_molecule_type assert_equal('genomic DNA', @obj.id_line('MOLECULE_TYPE')) end def test_id_line_division assert_equal('VRT', @obj.id_line('DIVISION')) end def test_id_line_sequence_length assert_equal(166, @obj.id_line('SEQUENCE_LENGTH')) end def test_entry entry_id = 'AB090716' assert_equal(entry_id, @obj.entry) assert_equal(entry_id, @obj.entry_name) assert_equal(entry_id, @obj.entry_id) end def test_molecule molecule = 'genomic DNA' assert_equal(molecule, @obj.molecule) assert_equal(molecule, @obj.molecule_type) end def test_division assert_equal('VRT', @obj.division) end def test_sequence_length seqlen = 166 assert_equal(seqlen, @obj.sequence_length) assert_equal(seqlen, @obj.seqlen) end # Bio::EMBLDB::COMMON#ac def test_ac ac = ['AB090716'] assert_equal(ac, @obj.ac) assert_equal(ac, @obj.accessions) end # Bio::EMBLDB::COMMON#accession def test_accession assert_equal('AB090716', @obj.accession) end def test_sv assert_equal('AB090716.1', @obj.sv) end def test_version assert_equal(1, @obj.version) end def test_dt assert(@obj.dt) end def test_dt_iterator assert(@obj.dt {|key, value| }) end def test_dt_created assert_equal('25-OCT-2002 (Rel. 73, Created)', @obj.dt('created')) end def test_dt_updated assert_equal('29-NOV-2002 (Rel. 73, Last updated, Version 2)', @obj.dt('updated')) end # Bio::EMBLDB::COMMON#de def test_de assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @obj.de) end # Bio::EMBLDB::COMMON#kw def test_kw k = [] assert_equal([], @obj.kw) assert_equal([], @obj.keywords) end def test_os assert_equal("Haplochromis sp. 'muzu, rukwa'", @obj.os[0]['os']) assert_nil(@obj.os[0]['name']) end # Bio::EMBLDB::COMMON#oc def test_oc assert_equal('Eukaryota', @obj.oc.first) end # Bio::EMBLDB::COMMON#og def test_og assert_equal([], @obj.og) end # Bio::EMBLDB::COMMON#ref def test_ref assert_equal(2, @obj.ref.size) end # Bio::EMBLDB::COMMON#references def test_references assert_equal(Array, @obj.references.class) end # Bio::EMBLDB::COMMON#dr def test_dr assert_equal({}, @obj.dr) end def test_fh assert_equal('Key Location/Qualifiers', @obj.fh) end def test_ft assert_equal(Array, @obj.ft.class) end def test_ft_iterator @obj.ft.each do |feature| assert_equal(Bio::Feature, feature.class) end end def test_ft_accessor assert_equal('CDS', @obj.ft[1].feature) end def test_each_cds @obj.each_cds do |x| assert_equal('CDS', x.feature) end end def test_each_gene @obj.each_gene do |x| assert_equal('gene', x.feature) end end def test_cc assert_equal('', @obj.cc) end # def test_xx # end def test_sq data = {"a"=>29, "c"=>42, "ntlen"=>166, "g"=>41, "t"=>54, "other"=>0} assert_equal(data, @obj.sq) end def test_sq_get assert_equal(29, @obj.sq("a")) end def test_seq seq = 'gttctggcctcatggactgaagacttcctgtggacctgatgtgttcagtggaagtgaagaccctggagtacagtcctacatgattgttctcatgattacttgctgtttcatccccctggctatcatcatcctgtgctaccttgctgtgtggatggccatccgtgct' assert_equal(seq, @obj.seq) assert_equal(seq, @obj.naseq) assert_equal(seq, @obj.ntseq) end end end bio-1.4.3.0001/test/unit/bio/db/embl/test_embl_to_bioseq.rb0000644000004100000410000001403512200110570023250 0ustar www-datawww-data# # test/unit/bio/db/embl/test_embl_to_bioseq.rb - Unit test for Bio::EMBL to Bio::Sequence data converter # # Copyright:: Copyright (C) 2005, 2008 # Mitsuteru Nakao # Jan Aerts # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/embl' module Bio class TestEMBLToBioSequence < Test::Unit::TestCase def setup input = File.read(File.join(BioRubyTestDataPath, 'embl', 'AB090716.embl.rel89')) embl_object = Bio::EMBL.new(input) embl_object.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" } @bio_seq = embl_object.to_biosequence end def test_entry_id assert_equal('AB090716', @bio_seq.entry_id) end def test_primary_accession assert_equal('AB090716', @bio_seq.primary_accession) end def test_secondary_accessions assert_equal([], @bio_seq.secondary_accessions) end def test_molecule_type assert_equal('genomic DNA', @bio_seq.molecule_type) end def test_definition assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq.definition) end def test_topology assert_equal('linear', @bio_seq.topology) end def test_date_created # '25-OCT-2002 (Rel. 73, Created)' assert_equal(Date.parse('25-OCT-2002'), @bio_seq.date_created) end def test_date_modified # '14-NOV-2006 (Rel. 89, Last updated, Version 3)' assert_equal(Date.parse('14-NOV-2006'), @bio_seq.date_modified) end def test_release_created assert_equal('73', @bio_seq.release_created) end def test_release_modified assert_equal('89', @bio_seq.release_modified) end def test_entry_version assert_equal('3', @bio_seq.entry_version) end def test_division assert_equal('VRT', @bio_seq.division) end def test_sequence_version assert_equal(1, @bio_seq.sequence_version) end def test_keywords assert_equal([], @bio_seq.keywords) end def test_species assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq.species) end def test_classification assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq.classification) end def test_references assert_equal(2, @bio_seq.references.length) assert_equal(Bio::Reference, @bio_seq.references[0].class) end def test_features assert_equal(3, @bio_seq.features.length) assert_equal(Bio::Feature, @bio_seq.features[0].class) end end # To really test the Bio::EMBL to Bio::Sequence conversion, we need to test if # that Bio::Sequence can be made into a valid Bio::EMBL again. class TestEMBLToBioSequenceRoundTrip < Test::Unit::TestCase def setup input = File.read(File.join(BioRubyTestDataPath, 'embl', 'AB090716.embl.rel89')) embl_object_1 = Bio::EMBL.new(input) embl_object_1.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" } @bio_seq_1 = embl_object_1.to_biosequence embl_object_2 = Bio::EMBL.new(@bio_seq_1.output(:embl)) @bio_seq_2 = embl_object_2.to_biosequence end def test_entry_id assert_equal('AB090716', @bio_seq_2.entry_id) end def test_primary_accession assert_equal('AB090716', @bio_seq_2.primary_accession) end def test_secondary_accessions assert_equal([], @bio_seq_2.secondary_accessions) end def test_molecule_type assert_equal('genomic DNA', @bio_seq_2.molecule_type) end def test_definition assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq_2.definition) end def test_topology assert_equal('linear', @bio_seq_2.topology) end def test_date_created # '25-OCT-2002 (Rel. 73, Created)' assert_equal(Date.parse('25-OCT-2002'), @bio_seq_2.date_created) end def test_date_modified # '14-NOV-2006 (Rel. 89, Last updated, Version 3)' assert_equal(Date.parse('14-NOV-2006'), @bio_seq_2.date_modified) end def test_release_created assert_equal('73', @bio_seq_2.release_created) end def test_release_modified assert_equal('89', @bio_seq_2.release_modified) end def test_entry_version assert_equal('3', @bio_seq_2.entry_version) end def test_division assert_equal('VRT', @bio_seq_2.division) end def test_sequence_version assert_equal(1, @bio_seq_2.sequence_version) end def test_keywords assert_equal([], @bio_seq_2.keywords) end def test_species assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq_2.species) end def test_classification assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq_2.classification) end def test_references assert_equal(2, @bio_seq_2.references.length) assert_equal(Bio::Reference, @bio_seq_2.references[0].class) end def test_features assert_equal(3, @bio_seq_2.features.length) assert_equal(Bio::Feature, @bio_seq_2.features[0].class) end end end bio-1.4.3.0001/test/unit/bio/db/embl/test_sptr.rb0000644000004100000410000024000712200110570021255 0ustar www-datawww-data# # test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR # # Copyright::: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/sptr' module Bio class TestSPTR < Test::Unit::TestCase def setup data = File.read(File.join(BioRubyTestDataPath, 'uniprot', 'p53_human.uniprot')) @obj = Bio::SPTR.new(data) end def test_id_line assert(@obj.id_line) end def test_id_line_entry_name assert_equal('P53_HUMAN', @obj.id_line('ENTRY_NAME')) end def test_id_line_data_class assert_equal('STANDARD', @obj.id_line('DATA_CLASS')) end def test_id_line_molecule_type assert_equal('PRT', @obj.id_line('MOLECULE_TYPE')) end def test_id_line_sequence_length assert_equal(393, @obj.id_line('SEQUENCE_LENGTH')) end def test_entry entry = 'P53_HUMAN' assert_equal(entry, @obj.entry) assert_equal(entry, @obj.entry_name) assert_equal(entry, @obj.entry_id) end def test_molecule assert_equal('PRT', @obj.molecule) assert_equal('PRT', @obj.molecule_type) end def test_sequence_length seqlen = 393 assert_equal(seqlen, @obj.sequence_length) assert_equal(seqlen, @obj.aalen) end def test_ac acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807", "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1", "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2", "Q9NZD0", "Q9UBI2", "Q9UQ61"] assert_equal(acs, @obj.ac) assert_equal(acs, @obj.accessions) end def test_accession assert_equal('P04637', @obj.accession) end def test_dr assert_equal(17, @obj.dr.size) assert_equal(27, @obj.dr['GO'].size) assert_equal([["IPR002117", "P53"], ["IPR011615", "P53_DNA_bd"], ["IPR012346", "P53_RUNT_DNA_bd"], ["IPR010991", "p53_tetrameristn"]], @obj.dr['InterPro']) end def test_dr_with_key pfam = [ { " " => "1", "Version" => "P53", "Accession" => "PF00870", "Molecular Type" => nil }, { " " => "1", "Version" => "P53_tetramer", "Accession" => "PF07710", "Molecular Type" => nil } ] assert_equal(pfam, @obj.dr('Pfam')) embl3 = { " " => "JOINED", "Version" => "AAA59987.1", "Accession" => "M13113", "Molecular Type" => "Genomic_DNA" } assert_equal(embl3, @obj.dr('EMBL')[3]) end def test_dr_with_key_empty assert_equal([], @obj.dr('NOT_A_DATABASE')) end def test_dt assert(@obj.dt) end def test_dt_created assert_equal('13-AUG-1987 (Rel. 05, Created)', @obj.dt('created')) end def test_dt_sequence assert_equal('01-MAR-1989 (Rel. 10, Last sequence update)', @obj.dt('sequence')) end def test_dt_annotation assert_equal('13-SEP-2005 (Rel. 48, Last annotation update)', @obj.dt('annotation')) end def test_de assert(@obj.de) end def test_protein_name assert_equal("Cellular tumor antigen p53", @obj.protein_name) end def test_synonyms ary = ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"] assert_equal(ary, @obj.synonyms) end def test_gn assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}], @obj.gn) end def test_gn_uniprot_parser gn_uniprot_data = '' assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}], @obj.instance_eval("gn_uniprot_parser")) end def test_gn_old_parser gn_old_data = '' assert_equal([["Name=TP53; Synonyms=P53;"]], @obj.instance_eval("gn_old_parser")) end def test_gene_names assert_equal(["TP53"], @obj.gene_names) end def test_gene_name assert_equal('TP53', @obj.gene_name) end def test_os assert(@obj.os) end def test_os_access assert_equal("Homo sapiens (Human)", @obj.os(0)) end def test_os_access2 assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0]) end def test_og_1 og = "OG Plastid; Chloroplast." ary = ['Plastid', 'Chloroplast'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(ary, @obj.og) end def test_og_2 og = "OG Mitochondrion." ary = ['Mitochondrion'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(ary, @obj.og) end def test_og_3 og = "OG Plasmid sym pNGR234a." ary = ["Plasmid sym pNGR234a"] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(ary, @obj.og) end def test_og_4 og = "OG Plastid; Cyanelle." ary = ['Plastid', 'Cyanelle'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(ary, @obj.og) end def test_og_5 og = "OG Plasmid pSymA (megaplasmid 1)." ary = ["Plasmid pSymA (megaplasmid 1)"] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(ary, @obj.og) end def test_og_6 og = "OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1." ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(ary, @obj.og) end def test_oc assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria", "Euarchontoglires", "Primates", "Catarrhini", "Hominidae", "Homo"], @obj.oc) end def test_ox assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox) end def test_ref # Bio::SPTR#ref assert_equal(Array, @obj.ref.class) end def test_cc assert_equal(Hash, @obj.cc.class) end def test_cc_database db = [{"NAME" => "IARC TP53 mutation database", "WWW" => "http://www.iarc.fr/p53/", "FTP" => nil, "NOTE" => "IARC db of somatic p53 mutations"}, {"NAME" => "Tokyo p53", "WWW" => "http://p53.genome.ad.jp/", "FTP" => nil, "NOTE" => "University of Tokyo db of p53 mutations"}, {"NAME" => "p53 web site at the Institut Curie", "WWW" => "http://p53.curie.fr/", "FTP" => nil, "NOTE" => nil}, {"NAME" => "Atlas Genet. Cytogenet. Oncol. Haematol.", "WWW" => "http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html", "FTP" => nil, "NOTE" => nil}] assert_equal(db, @obj.cc('DATABASE')) end def test_cc_alternative_products ap = {"Comment" => "", "Named isoforms" => "2", "Variants" => [{"IsoId" => ["P04637-1"], "Name" => "1", "Synonyms" => [], "Sequence" => ["Displayed"]}, {"IsoId" => ["P04637-2"], "Name" => "2", "Synonyms" => ["I9RET"], "Sequence" => ["VSP_006535", "VSP_006536"]}], "Event" => ["Alternative splicing"]} assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS')) end def test_cc_mass_spectrometry assert_equal(nil, @obj.cc('MASS SPECTROMETRY')) end def test_kw keywords = ["3D-structure", "Acetylation", "Activator", "Alternative splicing", "Anti-oncogene", "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding", "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding", "Nuclear protein", "Phosphorylation", "Polymorphism", "Transcription", "Transcription regulation", "Zinc"] assert_equal(keywords, @obj.kw) end def test_ft assert(@obj.ft) name = 'DNA_BIND' assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292, "Description"=>"", "original" => ['DNA_BIND', '102', '292', '', '']}], @obj.ft[name]) end def test_sq assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653}, @obj.sq) end def test_sq_crc64 assert_equal("AD5C149FD8106131", @obj.sq('CRC64')) end def test_sq_mw mw = 43653 assert_equal(mw, @obj.sq('mw')) assert_equal(mw, @obj.sq('molecular')) assert_equal(mw, @obj.sq('weight')) end def test_sq_len length = 393 assert_equal(length, @obj.sq('len')) assert_equal(length, @obj.sq('length')) assert_equal(length, @obj.sq('AA')) end def test_seq seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD' assert_equal(seq, @obj.seq) assert_equal(seq, @obj.aaseq) end end # class TestSPTR class TestSPTRCC < Test::Unit::TestCase def test_allergen # ALLERGEN Information relevant to allergenic proteins data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.' sp = Bio::SPTR.new(data) assert_equal(['Causes an allergic reaction in human.'], sp.cc['ALLERGEN']) assert_equal(['Causes an allergic reaction in human.'], sp.cc('ALLERGEN')) end def test_alternative_products_access_as_hash data = "CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative initiation; Named isoforms=2; CC Name=Long; CC IsoId=P68250-1; Sequence=Displayed; CC Name=Short; CC IsoId=P68250-2; Sequence=VSP_018631; CC Note=Contains a N-acetylmethionine at position 1 (By CC similarity);" res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"] sp = Bio::SPTR.new(data) assert_equal(res, sp.cc['ALTERNATIVE PRODUCTS']) end def test_alternative_products_ai # ALTERNATIVE PRODUCTS Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15 # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting data = "CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative initiation; Named isoforms=2; CC Name=Long; CC IsoId=P68250-1; Sequence=Displayed; CC Name=Short; CC IsoId=P68250-2; Sequence=VSP_018631; CC Note=Contains a N-acetylmethionine at position 1 (By CC similarity);" sp = Bio::SPTR.new(data) assert_equal({"Comment"=>"", "Named isoforms"=>"2", "Variants"=> [{"IsoId"=>["P68250-1"], "Name"=>"Long", "Synonyms" => [], "Sequence"=>["Displayed"]}, {"IsoId"=>["P68250-2"], "Name"=>"Short", "Synonyms" => [], "Sequence"=>["VSP_018631"]}], "Event"=>["Alternative initiation"]}, sp.cc('ALTERNATIVE PRODUCTS')) end def test_alternative_products_as data = "CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative splicing; Named isoforms=2; CC Name=1; CC IsoId=P04637-1; Sequence=Displayed; CC Name=2; Synonyms=I9RET; CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536; CC Note=Seems to be non-functional. Expressed in quiescent CC lymphocytes;" sp = Bio::SPTR.new(data) assert_equal({"Comment"=>"", "Named isoforms"=>"2", "Variants"=> [{"Name"=>"1", "IsoId"=>["P04637-1"], "Synonyms"=>[], "Sequence"=>["Displayed"]}, {"IsoId"=>["P04637-2"], "Name"=>"2", "Synonyms"=>["I9RET"], "Sequence"=>["VSP_006535", "VSP_006536"]}], "Event"=>["Alternative splicing"]}, sp.cc('ALTERNATIVE PRODUCTS')) end def test_alternative_products_apu data = "CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative promoter usage, Alternative splicing; Named isoforms=5; CC Comment=Additional isoforms (AAT-1L and AAT-1S) may exist; CC Name=1; Synonyms=AAT-1M; CC IsoId=Q7Z4T9-1; Sequence=Displayed; CC Name=2; CC IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911; CC Note=No experimental confirmation available; CC Name=3; CC IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912; CC Name=4; Synonyms=AAT1-alpha; CC IsoId=Q7Z4T9-4; Sequence=VSP_014908; CC Note=May be produced by alternative promoter usage; CC Name=5; Synonyms=AAT1-beta, AAT1-gamma; CC IsoId=Q7Z4T9-5; Sequence=VSP_014909; CC Note=May be produced by alternative promoter usage;" sp = Bio::SPTR.new(data) assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist", "Named isoforms"=>"5", "Variants"=> [{"Name"=>"1", "IsoId"=>["Q7Z4T9-1"], "Synonyms"=>["AAT-1M"], "Sequence"=>["Displayed"]}, {"Name"=>"2", "IsoId"=>["Q7Z4T9-2"], "Synonyms" => [], "Sequence"=>["VSP_014910", "VSP_014911"]}, {"Name"=>"3", "IsoId"=>["Q7Z4T9-3"], "Synonyms" => [], "Sequence"=>["VSP_014907", "VSP_014912"]}, {"Name"=>"4", "IsoId"=>["Q7Z4T9-4"], "Synonyms"=>["AAT1-alpha"], "Sequence"=>["VSP_014908"]}, {"Name"=>"5", "IsoId"=>["Q7Z4T9-5"], "Synonyms"=>["AAT1-beta", "AAT1-gamma"], "Sequence"=>["VSP_014909"]}], "Event"=>["Alternative promoter usage", "Alternative splicing"]}, sp.cc('ALTERNATIVE PRODUCTS')) end def test_alternative_products_rf data = "" sp = Bio::SPTR.new(data) assert_equal({}, sp.cc('ALTERNATIVE PRODUCTS')) end def test_biophysicochemical_properties # BIOPHYSICOCHEMICAL PROPERTIES Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8 # data = 'CC -!- BIOPHYSICOCHEMICAL PROPERTIES: CC Kinetic parameters: CC KM=45 uM for AdoMet; CC Vmax=32 uM/h/mg enzyme; CC pH dependence: CC Optimum pH is 8.2;' sp = Bio::SPTR.new(data) assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"], sp.cc['BIOPHYSICOCHEMICAL PROPERTIES']) assert_equal({"Redox potential" => "", "Temperature dependence" => "", "Kinetic parameters" => {"KM" => "45 uM for AdoMet", "Vmax" => "32 uM/h/mg enzyme"}, "Absorption" => {}, "pH dependence" => "Optimum pH is 8.2"}, sp.cc('BIOPHYSICOCHEMICAL PROPERTIES')) # 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES' data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES: CC Absorption: CC Abs(max)=xx nm; CC Note=free_text; CC Kinetic parameters: CC KM=xx unit for substrate [(free_text)]; CC Vmax=xx unit enzyme [free_text]; CC Note=free_text; CC pH dependence: CC free_text; CC Redox potential: CC free_text; CC Temperature dependence: CC free_text;" sp = Bio::SPTR.new(data) assert_equal({"Redox potential"=>"free_text", "Temperature dependence"=>"free_text", "Kinetic parameters"=> {"KM"=>"xx unit for substrate [(free_text)]", "Note"=>"free_text", "Vmax"=>"xx unit enzyme [free_text]"}, "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"}, "pH dependence"=>"free_text"}, sp.cc('BIOPHYSICOCHEMICAL PROPERTIES')) end def test_biotechnology # BIOTECHNOLOGY Description of the use of a specific protein in a biotechnological process data = 'CC -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in CC improved ripening tomato by Monsanto. ACC is the immediate CC precursor of the phytohormone ethylene which is involved in the CC control of ripening. ACC deaminase reduces ethylene biosynthesis CC and thus extends the shelf life of fruits and vegetables.' sp = Bio::SPTR.new(data) assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."], sp.cc['BIOTECHNOLOGY']) end def test_catalytic_activity # CATALYTIC ACTIVITY Description of the reaction(s) catalyzed by an enzyme [1] data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3- CC methyladenine, 3-methylguanine, 7-methylguanine and 7- CC methyladenine.' sp = Bio::SPTR.new(data) assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."], sp.cc['CATALYTIC ACTIVITY']) end def test_caution # CAUTION Warning about possible errors and/or grounds for confusion data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon CC in position 480 which was translated as a stop codon to shorten CC the sequence.' sp = Bio::SPTR.new(data) assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."], sp.cc['CAUTION']) assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.", sp.cc('CAUTION')) end def test_cofactor # COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity. CC -!- COFACTOR: Mg(2+).' sp = Bio::SPTR.new(data) assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.", "Mg(2+)."], sp.cc['COFACTOR']) assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.", "Mg(2+)."], sp.cc('COFACTOR')) end def test_developmental_stage # DEVELOPMENTAL STAGE Description of the developmentally-specific expression of mRNA or protein data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35 CC with higher levels detected at day 56. Isoform 1 is not detected CC in males of any age.' sp = Bio::SPTR.new(data) assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."], sp.cc['DEVELOPMENTAL STAGE']) assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.", sp.cc('DEVELOPMENTAL STAGE')) end def test_disease # DISEASE Description of the disease(s) associated with a deficiency of a protein data = 'CC -!- DISEASE: Defects in APP are a cause of hereditary cerebral CC hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This CC disorder is characterized by amyloid deposits in cerebral vessels. CC The principal clinical characteristics are recurring cerebral CC hemorrhages, sometimes preceded by migrainous headaches or mental CC cleavage. Various types of HCHWAD are known. They differ in onset CC and aggressiveness of the disease. The Iowa type demonstrated no CC cerebral hemorrhaging but is characterized by progressive CC cognitive decline. Beta-APP40 is the predominant form of CC cerebrovascular amyloid.' sp = Bio::SPTR.new(data) assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."], sp.cc['DISEASE']) assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.", sp.cc('DISEASE')) end def test_domain # DOMAIN Description of the domain structure of a protein data = 'CC -!- DOMAIN: The basolateral sorting signal (BaSS) is required for CC sorting of membrane proteins to the basolateral surface of CC epithelial cells. CC -!- DOMAIN: The NPXY sequence motif found in many tyrosine- CC phosphorylated proteins is required for the specific binding of CC the PID domain. However, additional amino acids either N- or C- CC terminal to the NPXY motif are often required for complete CC interaction. The PID domain-containing proteins which bind APP CC require the YENPTY motif for full interaction. These interactions CC are independent of phosphorylation on the terminal tyrosine CC residue. The NPXY site is also involved in clathrin-mediated CC endocytosis (By similarity).' sp = Bio::SPTR.new(data) assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.", "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."], sp.cc['DOMAIN']) assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.", "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."], sp.cc('DOMAIN')) end def test_enzyme_regulation # ENZYME REGULATION Description of an enzyme regulatory mechanism data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated CC by the G protein beta and gamma subunit complex.' sp = Bio::SPTR.new(data) assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."], sp.cc['ENZYME REGULATION']) assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.", sp.cc('ENZYME REGULATION')) end def test_function # FUNCTION General description of the function(s) of a protein data = 'CC -!- FUNCTION: May play a fundamental role in situations where fine CC interplay between intracellular calcium and cAMP determines the CC cellular function. May be a physiologically relevant docking site CC for calcineurin (By similarity).' sp = Bio::SPTR.new(data) assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."], sp.cc['FUNCTION']) assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).", sp.cc('FUNCTION')) end def test_induction # INDUCTION Description of the compound(s) or condition(s) that regulate gene expression data = 'CC -!- INDUCTION: By pheromone (alpha-factor).' sp = Bio::SPTR.new(data) assert_equal(["By pheromone (alpha-factor)."], sp.cc['INDUCTION']) assert_equal("By pheromone (alpha-factor).", sp.cc('INDUCTION')) end def test_interaction # INTERACTION Conveys information relevant to binary protein-protein interaction 3.21.12 data = 'CC -!- INTERACTION: CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;' sp = Bio::SPTR.new(data) assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"], sp.cc['INTERACTION']) assert_equal([{'SP_Ac' => 'P62158', 'identifier' => 'CALM1', 'optional_identifier' => '(xeno)', 'NbExp' => '1', 'IntAct' => ['EBI-457011', 'EBI-397435']}, {'SP_Ac' => 'P62155', 'identifier' => 'calm1', 'optional_identifier' => '(xeno)', 'NbExp' => '1', 'IntAct' => ['EBI-457011', 'EBI-397568']}], sp.cc('INTERACTION')) end def test_mass_spectrometry # MASS SPECTROMETRY Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23 data = "CC -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; CC NOTE=Ref.1. CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29; CC NOTE=Ref.2." sp = Bio::SPTR.new(data) assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.", "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."], sp.cc['MASS SPECTROMETRY']) assert_equal([{'MW' => '2894.9', 'MW_ERR' => '3', 'METHOD' => 'MALDI', 'RANGE' => '1-29', 'NOTE' => 'Ref.1'}, {'MW' => '2892.2', 'METHOD' => 'Electrospray', 'MW_ERR' => nil, 'RANGE' => '1-29', 'NOTE' => 'Ref.2'}], sp.cc('MASS SPECTROMETRY')) end def test_miscellaneous # MISCELLANEOUS Any comment which does not belong to any of the other defined topics data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a CC mitochondrial one.' sp = Bio::SPTR.new(data) assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."], sp.cc['MISCELLANEOUS']) end def test_pathway # PATHWAY Description of the metabolic pathway(s) with which a protein is associated data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3- CC phosphate and glycerone phosphate from D-glucose: step 4.' sp = Bio::SPTR.new(data) assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."], sp.cc['PATHWAY']) assert_equal(["Carbohydrate degradation", 'glycolysis', 'D-glyceraldehyde 3-phosphate', 'glycerone phosphate from D-glucose', 'step 4'], sp.cc('PATHWAY')) end def test_pharmaceutical # PHARMACEUTICAL Description of the use of a protein as a pharmaceutical drug data = 'CC -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs), CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic CC function.' sp = Bio::SPTR.new(data) assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."], sp.cc['PHARMACEUTICAL']) end def test_polymorphism # POLYMORPHISM Description of polymorphism(s) data = 'CC -!- POLYMORPHISM: Position 161 is associated with platelet-specific CC alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. CC Siba is involved in neonatal alloimmune thrombocytopenia (NATP). CC -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem CC 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin- CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown CC here) contains one repeat starting at position 415, allele C CC contains two repeats, allele B contains three repeats and allele A CC contains four repeats.' sp = Bio::SPTR.new(data) assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).", "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."], sp.cc['POLYMORPHISM']) end def test_ptm # PTM Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available. data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked CC carbohydrate. CC -!- PTM: Palmitoylated.' sp = Bio::SPTR.new(data) assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.", "Palmitoylated."], sp.cc['PTM']) end def test_rna_editing # RNA EDITING Description of any type of RNA editing that leads to one or more amino acid changes data = 'CC -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139, CC 146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246, CC 252, 260, 264, 277, 285, 295; Note=The nonsense codons at CC positions 50, 78, 104, 260 and 264 are modified to sense codons.' data = 'CC -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the CC brain. Heteromerically expressed edited GLUR2 (R) receptor CC complexes are impermeable to calcium, whereas the unedited (Q) CC forms are highly permeable to divalent ions (By similarity).' sp = Bio::SPTR.new(data) assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."], sp.cc['RNA EDITING']) assert_equal({"Modified_positions" => ['607'], "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."}, sp.cc('RNA EDITING')) end def test_similarity # SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain. CC -!- SIMILARITY: Contains 1 RGS domain.' sp = Bio::SPTR.new(data) assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."], sp.cc['SIMILARITY']) end def test_subcellular_location # SUBCELLULAR LOCATION Description of the subcellular location of the mature protein data = 'CC -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be CC secreted by a non-classical secretory pathway.' data = "CC -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non- CC classical secretory pathway (By similarity)." data = "CC -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported CC to the nerve terminals." data = "CC -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the CC cell wall." data = "CC -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal CC membrane; single-pass type I membrane protein. Lysosome; lysosomal CC membrane; single-pass type I membrane protein. Localizes to late CC endocytic compartment. Associates with lysosome membranes." data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane; CC peripheral membrane protein. Plastid; chloroplast; chloroplast CC stroma." sp = Bio::SPTR.new(data) assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."], sp.cc['SUBCELLULAR LOCATION']) assert_equal([["Plastid", "chloroplast", "chloroplast membrane", "peripheral membrane protein"], ["Plastid", "chloroplast", "chloroplast stroma"]], sp.cc('SUBCELLULAR LOCATION')) end def test_subunit # SUBUNIT Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION. data = 'CC -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8, CC MAPK9, MAPK10 and MAPK12.' data = 'CC -!- SUBUNIT: Homotetramer.' sp = Bio::SPTR.new(data) assert_equal(["Homotetramer."], sp.cc['SUBUNIT']) end def test_tissue_specificity # TISSUE SPECIFICITY Description of the tissue-specific expression of mRNA or protein data = "CC -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria." data = "CC -!- TISSUE SPECIFICITY: Widely expressed with highest expression in CC thymus, testis, embryo and proliferating blood lymphocytes." data = "CC -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain, CC heart, spleen, kidney and blood. Isoform 2 is expressed (at CC protein level) in the spleen, skeletal muscle and gastrointestinal CC epithelia." sp = Bio::SPTR.new(data) assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."], sp.cc['TISSUE SPECIFICITY']) end def test_toxic_dose # TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.' sp = Bio::SPTR.new(data) assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."], sp.cc['TOXIC DOSE']) end def test_web_resource # WEB RESOURCE Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34 data = 'CC -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db; CC URL="http://www.molgen.ua.ac.be/CMTMutations/". CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage; CC URL="http://www.crg.es/deafness/". CC -!- WEB RESOURCE: NAME=GeneReviews; CC URL="http://www.genetests.org/query?gene=GJB1".' sp = Bio::SPTR.new(data) assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".', 'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".', 'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'], sp.cc['WEB RESOURCE']) assert_equal([{'Name' => "Inherited peripheral neuropathies mutation db", 'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'Note' => nil}, {'Name' => "Connexin-deafness homepage", 'URL' => 'http://www.crg.es/deafness/', 'Note' => nil}, {'Name' => "GeneReviews", 'URL' => 'http://www.genetests.org/query?gene=GJB1', 'Note' => nil}], sp.cc('WEB RESOURCE')) end end # class TestSPTRCC # http://br.expasy.org/sprot/userman.html#Ref_line class TestSPTRRef < Test::Unit::TestCase def setup data = 'RN [1] RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION RP WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL RP STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221. RC STRAIN=Bristol N2; RX PubMed=11134024; DOI=10.1074/jbc.M008990200; RG The mouse genome sequencing consortium; RA Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., RA Cozzone A.J., Cortay J.-C.; RT "A novel adapter protein employs a phosphotyrosine binding domain and RT exceptionally basic N-terminal domains to capture and localize an RT atypical protein kinase C: characterization of Caenorhabditis elegans RT C kinase adapter 1, a protein that avidly binds protein kinase C3."; RL J. Biol. Chem. 276:10463-10475(2001).' @obj = SPTR.new(data) end def test_ref res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.", "RL" => "J. Biol. Chem. 276:10463-10475(2001).", "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.", "RX" => {"MEDLINE" => nil, "DOI" => "10.1074/jbc.M008990200", "PubMed" => "11134024"}, "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}], "RN" => "[1]", "RP" => ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)", "FUNCTION", "INTERACTION WITH PKC-3", "SUBCELLULAR LOCATION", "TISSUE SPECIFICITY", "DEVELOPMENTAL STAGE", "MUTAGENESIS OF PHE-175 AND PHE-221"], "RG" => ["The mouse genome sequencing consortium"]} assert_equal(res, @obj.ref.first) end def test_RN assert_equal("[1]", @obj.ref.first['RN']) end def test_RP assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)", "FUNCTION", "INTERACTION WITH PKC-3", "SUBCELLULAR LOCATION", "TISSUE SPECIFICITY", "DEVELOPMENTAL STAGE", "MUTAGENESIS OF PHE-175 AND PHE-221"], @obj.ref.first['RP']) end def test_RC assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}], @obj.ref.first['RC']) end def test_RX assert_equal({'MEDLINE' => nil, 'PubMed' => '11134024', 'DOI' => '10.1074/jbc.M008990200'}, @obj.ref.first['RX']) end def test_RG assert_equal(["The mouse genome sequencing consortium"], @obj.ref.first['RG']) end def test_RA assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.", @obj.ref.first['RA']) end def test_RT assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.", @obj.ref.first['RT']) end def test_RL assert_equal("J. Biol. Chem. 276:10463-10475(2001).", @obj.ref.first['RL']) end end # class TestSPTRReferences # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0 class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase # Progress in the conversion of Swiss-Prot to mixed-case characters # Multiple RP lines def test_multiple_RP_lines data = "RN [1] RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND RP CHARACTERIZATION." sp = SPTR.new(data) assert_equal(['SEQUENCE FROM N.A.', 'SEQUENCE OF 23-42 AND 351-365', 'CHARACTERIZATION'], sp.ref.first['RP']) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1 class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase # New syntax of the CC line topic ALTERNATIVE PRODUCTS def test_alternative_products data = "ID TEST_ENTRY STANDARD; PRT; 393 AA. CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative promoter; CC Comment=Free text; CC Event=Alternative splicing; Named isoforms=2; CC Comment=Optional free text; CC Name=Isoform_1; Synonyms=Synonym_1; CC IsoId=Isoform_identifier_1; CC Sequence=Displayed; CC Note=Free text; CC Name=Isoform_2; Synonyms=Synonym_1, Synonym_2; CC IsoId=Isoform_identifier_1, Isoform_identifer_2; CC Sequence=VSP_identifier_1, VSP_identifier_2; CC Note=Free text; CC Event=Alternative initiation; CC Comment=Free text;" sp = SPTR.new(data) res = {"Comment" => "Free text", "Named isoforms" => "2", "Variants" => [{"Name" => "Isoform_1", "Synonyms" => ["Synonym_1"], "IsoId" => ["Isoform_identifier_1"], "Sequence" => ["Displayed"] }, {"Name" => "Isoform_2", "Synonyms" => ["Synonym_1", "Synonym_2"], "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"], "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}], "Event" => ["Alternative promoter"]} assert_equal(res, sp.cc('ALTERNATIVE PRODUCTS')) end def test_alternative_products_with_ft data = "ID TEST_ENTRY STANDARD; PRT; 393 AA. CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative splicing; Named isoforms=6; CC Name=1; CC IsoId=Q15746-4; Sequence=Displayed; CC Name=2; CC IsoId=Q15746-5; Sequence=VSP_000040; CC Name=3A; CC IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043; CC Name=3B; CC IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042; CC Name=4; CC IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042; CC Name=del-1790; CC IsoId=Q15746-9; Sequence=VSP_000044; FT VARSPLIC 437 506 VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA FT RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in FT isoform 2 and isoform 3B). FT /FTId=VSP_004791. FT VARSPLIC 1433 1439 DEVEVSD -> MKWRCQT (in isoform 3A, FT isoform 3B and isoform 4). FT /FTId=VSP_004792. FT VARSPLIC 1473 1545 Missing (in isoform 4). FT /FTId=VSP_004793. FT VARSPLIC 1655 1705 Missing (in isoform 3A and isoform 3B). FT /FTId=VSP_004794. FT VARSPLIC 1790 1790 Missing (in isoform Del-1790). FT /FTId=VSP_004795." sp = SPTR.new(data) assert_equal({"Comment" => "", "Named isoforms" => "6", "Variants" => [{"IsoId"=>["Q15746-4"], "Name"=>"1", "Synonyms"=>[], "Sequence"=>["Displayed"]}, {"IsoId"=>["Q15746-5"], "Name"=>"2", "Synonyms"=>[], "Sequence"=>["VSP_000040"]}, {"IsoId"=>["Q15746-6"], "Name"=>"3A", "Synonyms"=>[], "Sequence"=>["VSP_000041", "VSP_000043"]}, {"IsoId"=>["Q15746-7"], "Name"=>"3B", "Synonyms"=>[], "Sequence"=>["VSP_000040", "VSP_000041", "VSP_000042"]}, {"IsoId"=>["Q15746-8"], "Name"=>"4", "Synonyms"=>[], "Sequence"=>["VSP_000041", "VSP_000042"]}, {"IsoId"=>["Q15746-9"], "Name"=>"del-1790", "Synonyms"=>[], "Sequence"=>["VSP_000044"]}], "Event"=>["Alternative splicing"]}, sp.cc('ALTERNATIVE PRODUCTS')) assert_equal([{"FTId"=>"VSP_004791", "From"=>437, "To"=>506, "Description"=>"VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).", "diff"=> ["VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKARTRDSGTYSCTASNAQGQVSCSWTLQVER", "G"], "original"=> ["VARSPLIC", "437", "506", "VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).", "/FTId=VSP_004791."]}, {"FTId"=>"VSP_004792", "From"=>1433, "diff"=>["DEVEVSD", "MKWRCQT"], "To"=>1439, "original"=> ["VARSPLIC", "1433", "1439", "DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4).", "/FTId=VSP_004792."], "Description"=>"DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4)."}, {"FTId"=>"VSP_004793", "From"=>1473, "diff"=>[nil, nil], "To"=>1545, "original"=> ["VARSPLIC", "1473", "1545", "Missing (in isoform 4).", "/FTId=VSP_004793."], "Description"=>"Missing (in isoform 4)."}, {"FTId"=>"VSP_004794", "From"=>1655, "diff"=>[nil, nil], "To"=>1705, "original"=> ["VARSPLIC", "1655", "1705", "Missing (in isoform 3A and isoform 3B).", "/FTId=VSP_004794."], "Description"=>"Missing (in isoform 3A and isoform 3B)."}, {"FTId"=>"VSP_004795", "From"=>1790, "diff"=>[nil, nil], "To"=>1790, "original"=>["VARSPLIC", "1790", "1790", "Missing (in isoform Del-1790).", "/FTId=VSP_004795."], "Description"=>"Missing (in isoform Del-1790)."}], sp.ft['VARSPLIC']) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.10 class TestSPTRSwissProtRel41_10 < Test::Unit::TestCase # Reference Comment (RC) line topics may span lines def test_RC_lines data = "RN [1] RC STRAIN=AZ.026, DC.005, GA.039, GA2181, IL.014, IN.018, KY.172, KY2.37, RC LA.013, MN.001, MNb027, MS.040, NY.016, OH.036, TN.173, TN2.38, RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;" sp = SPTR.new(data) assert_equal([{"Text"=>"AZ.026", "Token"=>"STRAIN"}, {"Text"=>"DC.005", "Token"=>"STRAIN"}, {"Text"=>"GA.039", "Token"=>"STRAIN"}, {"Text"=>"GA2181", "Token"=>"STRAIN"}, {"Text"=>"IL.014", "Token"=>"STRAIN"}, {"Text"=>"IN.018", "Token"=>"STRAIN"}, {"Text"=>"KY.172", "Token"=>"STRAIN"}, {"Text"=>"KY2.37", "Token"=>"STRAIN"}, {"Text"=>"LA.013", "Token"=>"STRAIN"}, {"Text"=>"MN.001", "Token"=>"STRAIN"}, {"Text"=>"MNb027", "Token"=>"STRAIN"}, {"Text"=>"MS.040", "Token"=>"STRAIN"}, {"Text"=>"NY.016", "Token"=>"STRAIN"}, {"Text"=>"OH.036", "Token"=>"STRAIN"}, {"Text"=>"TN.173", "Token"=>"STRAIN"}, {"Text"=>"TN2.38", "Token"=>"STRAIN"}, {"Text"=>"UT.002", "Token"=>"STRAIN"}, {"Text"=>"AL.012", "Token"=>"STRAIN"}, {"Text"=>"AZ.180", "Token"=>"STRAIN"}, {"Text"=>"MI.035", "Token"=>"STRAIN"}, {"Text"=>"VA.015", "Token"=>"STRAIN"}, {"Text"=>"IL2.17", "Token"=>"STRAIN"}], sp.ref.first['RC']) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.20 class TestSPTRSwissProtRel41_20 < Test::Unit::TestCase # Case and wording change for submissions to Swiss-Prot in reference location (RL) lines def test_RL_lines data = "RL Submitted (MAY-2002) to the SWISS-PROT data bank." sp = SPTR.new(data) assert_equal('', sp.ref.first['RL']) end # New comment line (CC) topic ALLERGEN def test_CC_allergen data = "CC -!- ALLERGEN: Causes an allergic reaction in human. Binds IgE. It is a CC partially heat-labile allergen that may cause both respiratory and CC food-allergy symptoms in patients with the bird-egg syndrome." sp = SPTR.new(data) assert_equal(["Causes an allergic reaction in human. Binds IgE. It is a partially heat-labile allergen that may cause both respiratory and food-allergy symptoms in patients with the bird-egg syndrome."], sp.cc("ALLERGEN")) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel42.6 class TestSPTRSwissProtRel42_6 < Test::Unit::TestCase # New comment line (CC) topic RNA EDITING def test_CC_rna_editing data = "CC -!- RNA EDITING: Modified_positions=393, 431, 452, 495." sp = SPTR.new(data) assert_equal({"Note"=>"", "Modified_positions"=>['393', '431', '452', '495']}, sp.cc("RNA EDITING")) data = "CC -!- RNA EDITING: Modified_positions=59, 78, 94, 98, 102, 121; Note=The CC stop codon at position 121 is created by RNA editing. The nonsense CC codon at position 59 is modified to a sense codon." sp = SPTR.new(data) assert_equal({"Note"=>"The stop codon at position 121 is created by RNA editing. The nonsense codon at position 59 is modified to a sense codon.", "Modified_positions"=>['59', '78', '94', '98', '102', '121']}, sp.cc("RNA EDITING")) data = "CC -!- RNA EDITING: Modified_positions=Not_applicable; Note=Some CC positions are modified by RNA editing via nucleotide insertion or CC deletion. The initiator methionine is created by RNA editing." sp = SPTR.new(data) assert_equal({'Modified_positions' => ['Not_applicable'], 'Note' => "Some positions are modified by RNA editing via nucleotide insertion or deletion. The initiator methionine is created by RNA editing."}, sp.cc("RNA EDITING")) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel1_12 class TestSPTRUniProtRel1_12 < Test::Unit::TestCase # Digital Object Identifier (DOI) in the RX line def test_DOI_in_RX_line # RX [MEDLINE=Medline_identifier; ][PubMed=Pubmed_identifier; ][DOI=Digital_object_identifier;] data = " RN [1] RX MEDLINE=97291283; PubMed=9145897; DOI=10.1007/s00248-002-2038-4;" sp = SPTR.new(data) assert_equal({'MEDLINE' => '97291283', 'PubMed' => '9145897', 'DOI' => '10.1007/s00248-002-2038-4'}, sp.ref.first['RX']) end # New line type: RG (Reference Group) def test_RG_line data = " RN [1] RG The C. elegans sequencing consortium; RG The Brazilian network for HIV isolation and characterization;" sp = SPTR.new(data) assert_equal(['The C. elegans sequencing consortium', 'The Brazilian network for HIV isolation and characterization'], sp.ref.first['RG']) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_0 class TestSPTRUniProtRel2_0 < Test::Unit::TestCase # New format for the GN (Gene Name) line # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...]; # xsGN ORFNames=[, ...]; def test_GN_line data = "GN Name=atpG; Synonyms=uncG, papC; GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;" sp = SPTR.new(data) assert_equal([{:orfs => [], :loci => ["b3733", "c4659", "z5231", "ECs4675", "SF3813", "S3955"], :name => "atpG", :synonyms => ["uncG", "papC"]}], sp.gn) data = "GN ORFNames=SPAC1834.11c;" sp = SPTR.new(data) assert_equal([{:orfs => ['SPAC1834.11c'], :loci => [], :name => '', :synonyms => []}], sp.gn) data = "GN Name=cysA1; Synonyms=cysA; OrderedLocusNames=Rv3117, MT3199; GN ORFNames=MTCY164.27; GN and GN Name=cysA2; OrderedLocusNames=Rv0815c, MT0837; ORFNames=MTV043.07c;" sp = SPTR.new(data) assert_equal([{:orfs => ["MTCY164.27"], :loci => ["Rv3117", "MT3199"], :name => "cysA1", :synonyms => ["cysA"]}, {:orfs => ["MTV043.07c"], :loci => ["Rv0815c", "MT0837"], :name => "cysA2", :synonyms => []}], sp.gn) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_1 class TestSPTRUniProtRel2_1 < Test::Unit::TestCase # Format change in the comment line (CC) topic: MASS SPECTROMETRY def test_CC_mass_spectrometry data = "CC -!- MASS SPECTROMETRY: MW=32875.93; METHOD=MALDI; CC RANGE=1-284 (Isoform 3); NOTE=Ref.6." sp = SPTR.new(data) assert_equal([{"RANGE"=>"1-284", "METHOD"=>"MALDI", "MW_ERR"=>nil, "NOTE"=>"Ref.6", "MW"=>"32875.93"}], sp.cc("MASS SPECTROMETRY")) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_3 class TestSPTRUniProtRel2_3 < Test::Unit::TestCase # New RL line structure for electronic publications def test_RL_line data = "RL Submitted (XXX-YYYY) to the HIV data bank." sp = SPTR.new(data) assert_equal('', sp.ref.first['RL']) end # Format change in the cross-reference to PDB def test_DR_PDB data = "DR PDB; 1NB3; X-ray; A/B/C/D=116-335, P/R/S/T=98-105." sp = SPTR.new(data) assert_equal([["1NB3", "X-ray", "A/B/C/D=116-335, P/R/S/T=98-105"]], sp.dr['PDB']) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_4 class TestSPTRUniProtRel3_4 < Test::Unit::TestCase # Changes in the RP (Reference Position) line def test_RP_line data = " RN [1] RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1), PROTEIN SEQUENCE RP OF 108-131; 220-231 AND 349-393, CHARACTERIZATION, AND MUTAGENESIS OF RP ARG-336." sp = SPTR.new(data) assert_equal(['NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)', 'PROTEIN SEQUENCE OF 108-131; 220-231 AND 349-393', 'CHARACTERIZATION', 'MUTAGENESIS OF ARG-336'], sp.ref.first['RP']) data = " RN [1] RP NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]." sp = SPTR.new(data) assert_equal(['NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]'], sp.ref.first['RP']) end # New comment line (CC) topic: BIOPHYSICOCHEMICAL PROPERTIES def test_CC_biophysiochemical_properties data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES: CC Absorption: CC Abs(max)=395 nm; CC Note=Exhibits a smaller absorbance peak at 470 nm. The CC fluorescence emission spectrum peaks at 509 nm with a shoulder CC at 540 nm;" sp = SPTR.new(data) assert_equal({"Redox potential" => "", "Temperature dependence" => "", "Kinetic parameters" => {}, "Absorption" => {"Note" => "Exhibits a smaller absorbance peak at 470 nm. The fluorescence emission spectrum peaks at 509 nm with a shoulder at 540 nm", "Abs(max)" => "395 nm"}, "pH dependence" => ""}, sp.cc("BIOPHYSICOCHEMICAL PROPERTIES")) data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES: CC Kinetic parameters: CC KM=62 mM for glucose; CC KM=90 mM for maltose; CC Vmax=0.20 mmol/min/mg enzyme with glucose as substrate; CC Vmax=0.11 mmol/min/mg enzyme with maltose as substrate; CC Note=Acetylates glucose, maltose, mannose, galactose, and CC fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07, CC 0.04;" sp = SPTR.new(data) assert_equal({"Redox potential" => "", "Temperature dependence" => "", "Kinetic parameters" => {"KM" => "62 mM for glucose; KM=90 mM for maltose", "Note" => "Acetylates glucose, maltose, mannose, galactose, and fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07, 0.04", "Vmax" => "0.20 mmol/min/mg enzyme with glucose as substrate"}, "Absorption" => {}, "pH dependence" => ""}, sp.cc("BIOPHYSICOCHEMICAL PROPERTIES")) data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES: CC Kinetic parameters: CC KM=1.76 uM for chlorophyll; CC pH dependence: CC Optimum pH is 7.5. Active from pH 5.0 to 9.0; CC Temperature dependence: CC Optimum temperature is 45 degrees Celsius. Active from 30 to 60 CC degrees Celsius;" sp = SPTR.new(data) assert_equal({"Redox potential" => "", "Temperature dependence" => "Optimum temperature is 45 degrees Celsius. Active from 30 to 60 degrees Celsius", "Kinetic parameters" => {}, "Absorption" => {}, "pH dependence" => "Optimum pH is 7.5. Active from pH 5.0 to 9.0"}, sp.cc("BIOPHYSICOCHEMICAL PROPERTIES")) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_5 class TestSPTRUniProtRel3_5 < Test::Unit::TestCase # Extension of the Swiss-Prot entry name format def test_entry_name_format # TBD end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel4_0 class TestSPTRUniProtRel4_0 < Test::Unit::TestCase # Extension of the TrEMBL entry name format # Change of the entry name in many Swiss-Prot entries # New comment line (CC) topic: INTERACTION def test_CC_interaction data = "CC -!- INTERACTION: CC P11450:fcp3c; NbExp=1; IntAct=EBI-126914, EBI-159556;" sp = SPTR.new(data) assert_equal([{"SP_Ac" => "P11450", "identifier" => "fcp3c", "optional_identifier" => nil, "NbExp" => "1", "IntAct" => ["EBI-126914", "EBI-159556"]}], sp.cc("INTERACTION")) end def test_CC_interaction_isoform data = "CC -!- INTERACTION: CC Q9W1K5-1:cg11299; NbExp=1; IntAct=EBI-133844, EBI-212772;" sp = SPTR.new(data) assert_equal([{"SP_Ac" => 'Q9W1K5-1', "identifier" => 'cg11299', "optional_identifier" => nil, "NbExp" => "1", "IntAct" => ["EBI-133844", "EBI-212772"]}], sp.cc("INTERACTION")) end def test_CC_interaction_no_gene_name data = "CC -!- INTERACTION: CC Q8NI08:-; NbExp=1; IntAct=EBI-80809, EBI-80799;" sp = SPTR.new(data) assert_equal([{"SP_Ac" => 'Q8NI08', "identifier" => '-', "optional_identifier" => nil, "NbExp" => "1", "IntAct" => ["EBI-80809", "EBI-80799"]}], sp.cc("INTERACTION")) end def test_CC_interaction_self_association data = "ID TEST_ENTRY STANDARD; PRT; 393 AA. CC -!- INTERACTION: CC Self; NbExp=1; IntAct=EBI-123485, EBI-123485;" sp = SPTR.new(data) assert_equal([{"SP_Ac" => 'TEST_ENTRY', "identifier" => 'TEST_ENTRY', "optional_identifier" => nil, "NbExp" => "1", "IntAct" => ["EBI-123485", "EBI-123485"]}], sp.cc("INTERACTION")) end def test_CC_interaction_The_source_organisms_of_the_interacting_proteins_are_different data = "CC -!- INTERACTION: CC Q8C1S0:2410018m14rik (xeno); NbExp=1; IntAct=EBI-394562, EBI-398761;" sp = SPTR.new(data) assert_equal([{"SP_Ac" => 'Q8C1S0', "identifier" => '2410018m14rik', "optional_identifier" => '(xeno)', "NbExp" => "1", "IntAct" => ["EBI-394562", "EBI-398761"]}], sp.cc("INTERACTION")) end def test_CC_interaction_Different_isoforms_of_the_current_protein_are_shown_to_interact_with_the_same_protein data = "CC -!- INTERACTION: CC P51617:irak1; NbExp=1; IntAct=EBI-448466, EBI-358664; CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;" sp = SPTR.new(data) assert_equal([{"SP_Ac" => "P51617", "identifier" => "irak1", "optional_identifier" => nil, "NbExp" => "1", "IntAct" => ["EBI-448466", "EBI-358664"]}, {"SP_Ac" => "P51617", "identifier" => "irak1", "optional_identifier" => nil, "NbExp" => "1", "IntAct" => ["EBI-448472", "EBI-358664"]}], sp.cc("INTERACTION")) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_0 class TestSPTRUniProtRel5_0 < Test::Unit::TestCase # Format change in the DR line # DR DATABASE_IDENTIFIER; PRIMARY_IDENTIFIER; SECONDARY_IDENTIFIER[; TERTIARY_IDENTIFIER][; QUATERNARY_IDENTIFIER]. def test_DR_line data = " DR EMBL; M68939; AAA26107.1; -; Genomic_DNA. DR EMBL; U56386; AAB72034.1; -; mRNA." sp = SPTR.new(data) assert_equal([["M68939", "AAA26107.1", "-", "Genomic_DNA"], ["U56386", "AAB72034.1", "-", "mRNA"]], sp.dr['EMBL']) assert_equal([{" "=>"-", "Version"=>"AAA26107.1", "Accession"=>"M68939", "Molecular Type"=>"Genomic_DNA"}, {" "=>"-", "Version"=>"AAB72034.1", "Accession"=>"U56386", "Molecular Type"=>"mRNA"}], sp.dr('EMBL')) end # New feature (FT) keys and redefinition of existing FT keys end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_4 class TestSPTRUniProtRel5_4 < Test::Unit::TestCase # Multiple comment line (CC) topics COFACTOR def test_multiple_cofactors data = "CC -!- COFACTOR: Binds 1 2Fe-2S cluster per subunit (By similarity). CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)." sp = SPTR.new(data) assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).", "Binds 1 Fe(2+) ion per subunit (By similarity)."], sp.cc['COFACTOR']) assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).", "Binds 1 Fe(2+) ion per subunit (By similarity)."], sp.cc('COFACTOR')) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_0 class TestSPTRUniProtRel6_0 < Test::Unit::TestCase # Changes in the OG (OrGanelle) line def test_OG_line data = "OG Plastid." sp = SPTR.new(data) assert_equal(['Plastid'], sp.og) data = "OG Plastid; Apicoplast." sp = SPTR.new(data) assert_equal(['Plastid', 'Apicoplast'], sp.og) data = "OG Plastid; Chloroplast." sp = SPTR.new(data) assert_equal(['Plastid', 'Chloroplast'], sp.og) data = "OG Plastid; Cyanelle." sp = SPTR.new(data) assert_equal(['Plastid', 'Cyanelle'], sp.og) data = "OG Plastid; Non-photosynthetic plastid." sp = SPTR.new(data) assert_equal(['Plastid', 'Non-photosynthetic plastid'], sp.og) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_1 class TestSPTRUniProtRel6_1 < Test::Unit::TestCase # Annotation changes concerning the feature key METAL def test_FT_metal old_data = "FT METAL 61 61 Copper and zinc." sp = SPTR.new(old_data) assert_equal([{'From' => 61, 'To' => 61, 'Description' => 'Copper and zinc.', 'FTId' =>'', 'diff' => [], 'original' => ["METAL", "61", "61", "Copper and zinc.", ""]}], sp.ft['METAL']) new_data = "FT METAL 61 61 Copper. FT METAL 61 61 Zinc." sp = SPTR.new(new_data) assert_equal([{"From" => 61, "To" => 61, "Description" => "Copper.", "FTId" => "", "diff" => [], "original" => ["METAL", "61", "61", "Copper.", ""]}, {"From" => 61, "To" => 61, "Description" => "Zinc.", "FTId" => "", "diff" => [], "original" => ["METAL", "61", "61", "Zinc.", ""]}], sp.ft['METAL']) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_5 class TestSPTRUniProtRel6_5 < Test::Unit::TestCase # Changes in the keywlist.txt file # * Modification of the HI line format: def test_HI_line # HI Category: Keyword_1; ...; Keyword_n; Described_Keyword. # The first term listed in an HI line is a category. It is followed by a hierarchical list of keywords of that category and ends with the described keyword. There can be more than one HI line of the same category in one keyword entry. data = "HI Molecular function: Ionic channel; Calcium channel. HI Biological process: Transport; Ion transport; Calcium transport; Calcium channel. HI Ligand: Calcium; Calcium channel." sp = SPTR.new(data) assert_equal([{'Category' => 'Molecular function', 'Keywords' => ['Ionic channel'], 'Keyword' => 'Calcium channel'}, {'Category' => 'Biological process', 'Keywords' => ['Transport', 'Ion transport', 'Calcium transport'], 'Keyword' => 'Calcium channel'}, {'Category' => 'Ligand', 'Keywords' => ['Calcium'], 'Keyword' => 'Calcium channel'}], sp.hi) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.0 class TestSPTRUniProtRel7_0 < Test::Unit::TestCase # Changes concerning dates and versions numbers (DT lines) def test_DT_line up_sp_data = "DT 01-JAN-1998, integrated into UniProtKB/Swiss-Prot. DT 15-OCT-2001, sequence version 3. DT 01-APR-2004, entry version 14." sp = SPTR.new(up_sp_data) assert_equal({"sequence" => "15-OCT-2001, sequence version 3.", "annotation" => "01-APR-2004, entry version 14.", "created" => "01-JAN-1998, integrated into UniProtKB/Swiss-Prot."}, sp.dt) up_tr_data = "DT 01-FEB-1999, integrated into UniProtKB/TrEMBL. DT 15-OCT-2000, sequence version 2. DT 15-DEC-2004, entry version 5." sp = SPTR.new(up_tr_data) assert_equal({"sequence" => "15-OCT-2000, sequence version 2.", "annotation" => "15-DEC-2004, entry version 5.", "created" => "01-FEB-1999, integrated into UniProtKB/TrEMBL."}, sp.dt) end # Addition of a feature (FT) key CHAIN over the whole sequence length # Changes concerning the copyright statement def test_CC_copyright_statement data = "CC ----------------------------------------------------------------------- CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms CC Distributed under the Creative Commons Attribution-NoDerivs License CC -----------------------------------------------------------------------" sp = SPTR.new(data) assert_equal({}, sp.cc) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.6 class TestSPTRUniProtRel7_6 < Test::Unit::TestCase # Sequences with over 10000 amino acids in UniProtKB/Swiss-Prot def test_10000aa entry_id = 'Q09165' data = ["SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64;\n", " MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP\n" * 200, "//\n"].join sp = SPTR.new(data) assert_equal(12000, sp.seq.size) end end # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel8.0 class TestSPTRUniProtRel8_0 < Test::Unit::TestCase # Replacement of the feature key VARSPLIC by VAR_SEQ def test_FT_VER_SEQ data = "FT VAR_SEQ 1 34 Missing (in isoform 3). FT /FTId=VSP_004099." sp = SPTR.new(data) res = [{'From' => 1, 'To' => 34, 'Description' => 'Missing (in isoform 3).', 'diff' => ['', nil], 'FTId' => 'VSP_004099', 'original' => ["VAR_SEQ", "1", "34", "Missing (in isoform 3).", "/FTId=VSP_004099."]}] assert_equal(res, sp.ft('VAR_SEQ')) end # Syntax modification of the comment line (CC) topic ALTERNATIVE PRODUCTS def test_CC_alternative_products # CC -!- ALTERNATIVE PRODUCTS: # CC Event=Event(, Event)*; Named isoforms=Number_of_isoforms; # (CC Comment=Free_text;)? # (CC Name=Isoform_name;( Synonyms=Synonym(, Synonym)*;)? # CC IsoId=Isoform_identifier(, Isoform_identifer)*; # CC Sequence=(Displayed|External|Not described|Feature_identifier(, Feature_identifier)*); # (CC Note=Free_text;)?)+ # Note: Variable values are represented in italics. Perl-style multipliers indicate whether a pattern (as delimited by parentheses) is optional (?), may occur 0 or more times (*), or 1 or more times (+). Alternative values are separated by a pipe symbol (|). data = "CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative splicing, Alternative initiation; Named isoforms=3; CC Comment=Isoform 1 and isoform 2 arise due to the use of two CC alternative first exons joined to a common exon 2 at the same CC acceptor site but in different reading frames, resulting in two CC completely different isoforms; CC Name=1; Synonyms=p16INK4a; CC IsoId=O77617-1; Sequence=Displayed; CC Name=3; CC IsoId=O77617-2; Sequence=VSP_004099; CC Note=Produced by alternative initiation at Met-35 of isoform 1; CC Name=2; Synonyms=p19ARF; CC IsoId=O77618-1; Sequence=External; FT VAR_SEQ 1 34 Missing (in isoform 3). FT /FTId=VSP_004099." sp = SPTR.new(data) assert_equal({"Comment" => "Isoform 1 and isoform 2 arise due to the use of two alternative first exons joined to a common exon 2 at the same acceptor site but in different reading frames, resulting in two completely different isoforms", "Named isoforms" => "3", "Variants" => [{"IsoId" => ["O77617-1"], "Name" => "1", "Synonyms" => ["p16INK4a"], "Sequence" => ["Displayed"]}, {"IsoId" => ["O77617-2"], "Name" => "3", "Synonyms" => [], "Sequence" => ["VSP_004099"]}, {"IsoId" => ["O77618-1"], "Name" => "2", "Synonyms" => ["p19ARF"], "Sequence" => ["External"]}], "Event" => ["Alternative splicing", "Alternative initiation"]}, sp.cc("ALTERNATIVE PRODUCTS")) assert_equal([{"From" => 1, "To" => 34, "Description"=>"Missing (in isoform 3).", "FTId" => "VSP_004099", "diff" => ["", nil], "original"=> ["VAR_SEQ", "1", "34", "Missing (in isoform 3).", "/FTId=VSP_004099."]}], sp.ft("VAR_SEQ")) end # Replacement of the comment line (CC) topic DATABASE by WEB RESOURCE def test_CC_web_resource # CC -!- DATABASE: NAME=ResourceName[; NOTE=FreeText][; WWW=WWWAddress][; FTP=FTPAddress]. # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText]; URL=WWWAddress. # The length of these lines may exceed 75 characters because long URL addresses are not wrapped into multiple lines. assert(true) end # Introduction of the new line type OH (Organism Host) for viral hosts def test_OH_lines data = 'OS Tomato black ring virus (strain E) (TBRV). OC Viruses; ssRNA positive-strand viruses, no DNA stage; Comoviridae; OC Nepovirus; Subgroup B. OX NCBI_TaxID=12277; OH NCBI_TaxID=4681; Allium porrum (Leek). OH NCBI_TaxID=4045; Apium graveolens (Celery). OH NCBI_TaxID=161934; Beta vulgaris (Sugar beet). OH NCBI_TaxID=38871; Fraxinus (ash trees). OH NCBI_TaxID=4236; Lactuca sativa (Garden lettuce). OH NCBI_TaxID=4081; Lycopersicon esculentum (Tomato). OH NCBI_TaxID=39639; Narcissus pseudonarcissus (Daffodil). OH NCBI_TaxID=3885; Phaseolus vulgaris (Kidney bean) (French bean). OH NCBI_TaxID=35938; Robinia pseudoacacia (Black locust). OH NCBI_TaxID=23216; Rubus (bramble). OH NCBI_TaxID=4113; Solanum tuberosum (Potato). OH NCBI_TaxID=13305; Tulipa. OH NCBI_TaxID=3603; Vitis.' res = [{'NCBI_TaxID' => '4681', 'HostName' => 'Allium porrum (Leek)'}, {'NCBI_TaxID' => '4045', 'HostName' => 'Apium graveolens (Celery)'}, {'NCBI_TaxID' => '161934', 'HostName' => 'Beta vulgaris (Sugar beet)'}, {'NCBI_TaxID' => '38871', 'HostName' => 'Fraxinus (ash trees)'}, {'NCBI_TaxID' => '4236', 'HostName' => 'Lactuca sativa (Garden lettuce)'}, {'NCBI_TaxID' => '4081', 'HostName' => 'Lycopersicon esculentum (Tomato)'}, {'NCBI_TaxID' => '39639', 'HostName' => 'Narcissus pseudonarcissus (Daffodil)'}, {'NCBI_TaxID' => '3885', 'HostName' => 'Phaseolus vulgaris (Kidney bean) (French bean)'}, {'NCBI_TaxID' => '35938', 'HostName' => 'Robinia pseudoacacia (Black locust)'}, {'NCBI_TaxID' => '23216', 'HostName' => 'Rubus (bramble)'}, {'NCBI_TaxID' => '4113', 'HostName' => 'Solanum tuberosum (Potato)'}, {'NCBI_TaxID' => '13305', 'HostName' => 'Tulipa'}, {'NCBI_TaxID' => '3603', 'HostName' => 'Vitis'}] sp = SPTR.new(data) assert_equal(res, sp.oh) end def test_OH_line_exception data = "ID TEST_ENTRY STANDARD; PRT; 393 AA. OH NCBI_TaxID=23216x: Rubus (bramble)." sp = SPTR.new(data) assert_raise(ArgumentError) { sp.oh } end end class TestOSLine < Test::Unit::TestCase def test_uncapitalized_letter_Q32725_9POAL data = "OS unknown cyperaceous sp.\n" sp = SPTR.new(data) assert_equal('unknown cyperaceous sp.', sp.os.first['os']) end def test_period_trancation_O63147 data = "OS Hippotis sp. Clark and Watts 825.\n" sp = SPTR.new(data) assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os']) end end end # module Bio bio-1.4.3.0001/test/unit/bio/db/embl/test_embl_rel89.rb0000644000004100000410000001153112200110570022225 0ustar www-datawww-data# # test/unit/bio/db/embl/test_embl_rel89.rb - Unit test for Bio::EMBL # # Copyright:: Copyright (C) 2007 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/embl' module Bio class TestEMBL89 < Test::Unit::TestCase def setup output = File.read(File.join(BioRubyTestDataPath, 'embl', 'AB090716.embl.rel89')) @obj = Bio::EMBL.new(output) end # http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#s_3_4_1 def test_id_line assert(@obj.id_line) end def test_id_line_iterator assert(@obj.id_line {|key, value| }) end def test_id_line_entry_name assert_equal('AB090716', @obj.id_line('ENTRY_NAME')) end def test_id_line_data_class assert_equal('STD', @obj.id_line('DATA_CLASS')) end def test_id_line_sequence_version assert_equal('1', @obj.id_line('SEQUENCE_VERSION')) end def test_id_line_molecule_type assert_equal('genomic DNA', @obj.id_line('MOLECULE_TYPE')) end def test_id_line_division assert_equal('VRT', @obj.id_line('DIVISION')) end def test_id_line_sequence_length assert_equal(166, @obj.id_line('SEQUENCE_LENGTH')) end def test_entry entry_id = 'AB090716' assert_equal(entry_id, @obj.entry) assert_equal(entry_id, @obj.entry_name) assert_equal(entry_id, @obj.entry_id) end def test_molecule molecule = 'genomic DNA' assert_equal(molecule, @obj.molecule) assert_equal(molecule, @obj.molecule_type) end def test_division assert_equal('VRT', @obj.division) end def test_sequence_length seqlen = 166 assert_equal(seqlen, @obj.sequence_length) assert_equal(seqlen, @obj.seqlen) end # Bio::EMBLDB::COMMON#ac def test_ac ac = ['AB090716'] assert_equal(ac, @obj.ac) assert_equal(ac, @obj.accessions) end # Bio::EMBLDB::COMMON#accession def test_accession assert_equal('AB090716', @obj.accession) end def test_sv assert_equal('AB090716.1', @obj.sv) end def test_version assert_equal(1, @obj.version) end def test_dt assert(@obj.dt) end def test_dt_iterator assert(@obj.dt {|key, value| }) end def test_dt_created assert_equal('25-OCT-2002 (Rel. 73, Created)', @obj.dt('created')) end def test_dt_updated assert_equal('14-NOV-2006 (Rel. 89, Last updated, Version 3)', @obj.dt('updated')) end # Bio::EMBLDB::COMMON#de def test_de assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @obj.de) end # Bio::EMBLDB::COMMON#kw def test_kw k = [] assert_equal([], @obj.kw) assert_equal([], @obj.keywords) end def test_os assert_equal("Haplochromis sp. 'muzu, rukwa'", @obj.os[0]['os']) assert_nil(@obj.os[0]['name']) end # Bio::EMBLDB::COMMON#oc def test_oc assert_equal('Eukaryota', @obj.oc.first) end # Bio::EMBLDB::COMMON#og def test_og assert_equal([], @obj.og) end # Bio::EMBLDB::COMMON#ref def test_ref assert_equal(2, @obj.ref.size) end # Bio::EMBLDB::COMMON#references def test_references assert_equal(Array, @obj.references.class) end # Bio::EMBLDB::COMMON#dr def test_dr assert_equal({}, @obj.dr) end def test_fh assert_equal('Key Location/Qualifiers', @obj.fh) end def test_ft assert_equal(Array, @obj.ft.class) end def test_ft_iterator @obj.ft.each do |feature| assert_equal(Bio::Feature, feature.class) end end def test_ft_accessor assert_equal('CDS', @obj.ft[1].feature) end def test_each_cds @obj.each_cds do |x| assert_equal('CDS', x.feature) end end def test_each_gene @obj.each_gene do |x| assert_equal('gene', x.feature) end end def test_cc assert_equal('', @obj.cc) end # def test_xx # end def test_sq data = {"a"=>29, "c"=>42, "ntlen"=>166, "g"=>41, "t"=>54, "other"=>0} assert_equal(data, @obj.sq) end def test_sq_get assert_equal(29, @obj.sq("a")) end def test_seq seq = 'gttctggcctcatggactgaagacttcctgtggacctgatgtgttcagtggaagtgaagaccctggagtacagtcctacatgattgttctcatgattacttgctgtttcatccccctggctatcatcatcctgtgctaccttgctgtgtggatggccatccgtgct' assert_equal(seq, @obj.seq) assert_equal(seq, @obj.naseq) assert_equal(seq, @obj.ntseq) end end end bio-1.4.3.0001/test/unit/bio/db/embl/test_uniprot_new_part.rb0000644000004100000410000001344612200110570023671 0ustar www-datawww-data# # test/unit/bio/db/embl/test_uniprot_new_part.rb - Unit test for Bio::UniProt for new file formats using part of psudo entries # # Copyright:: Copyright (C) 2011 Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/embl/uniprot' module Bio class TestUniProt_ID_since_rel9_0 < Test::Unit::TestCase def setup text = "ID ABC_DEFGH Reviewed; 256 AA.\n" @obj = Bio::UniProt.new(text) end def test_id_line expected = { "ENTRY_NAME" => "ABC_DEFGH", "DATA_CLASS" => "Reviewed", "SEQUENCE_LENGTH" => 256, "MOLECULE_TYPE" => nil } assert_equal(expected, @obj.id_line) end def test_entry_id assert_equal("ABC_DEFGH", @obj.entry_id) end def test_entry_name assert_equal("ABC_DEFGH", @obj.entry_name) end def test_entry assert_equal("ABC_DEFGH", @obj.entry) end def test_sequence_length assert_equal(256, @obj.sequence_length) end def test_aalen assert_equal(256, @obj.aalen) end def test_molecule assert_nil(@obj.molecule) end end #class TestUniProt_ID_since_rel9_0 class TestUniProt_DE_since_rel14_0 < Test::Unit::TestCase def setup text = < "BioRuby web site", "Note" => "BioRuby main web site located in Tokyo, Japan", "URL" => "http://bioruby.org" }, { "Name" => "official mirror of BioRuby web site hosted in the Open Bioinformatics Foundation", "Note" => nil, "URL" => "http://bioruby.open-bio.org/" }, { "Name" => "BioRuby Wiki site", "Note" => nil, "URL" => "http://bioruby.open-bio.org/wiki/" } ] assert_equal(expected, @obj.cc('WEB RESOURCE')) end end #class TestUniProt_CC_WEB_RESOURCE_since_rel12_2 end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_phyloxml.rb0000644000004100000410000006041612200110570021226 0ustar www-datawww-data# # = test/unit/bio/db/test_phyloxml.rb - Unit test for Bio::PhyloXML::Parser # # Copyright:: Copyright (C) 2009 # Diana Jaunzeikare # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' begin require 'libxml' rescue LoadError end if defined?(LibXML) then require 'bio/db/phyloxml/phyloxml_parser' end module Bio class TestPhyloXML_Check_LibXML < Test::Unit::TestCase def test_libxml assert(defined?(LibXML), "Error: libxml-ruby library is not present. Please install libxml-ruby library. It is needed for Bio::PhyloXML module. Unit test for PhyloXML will not be performed.") end end #class TestPhyloXML_LibXMLCheck end #module Bio module Bio module TestPhyloXMLData PHYLOXML_TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'phyloxml')).cleanpath.to_s def self.example_xml File.join PHYLOXML_TEST_DATA, 'phyloxml_examples.xml' #If you want to test the output of writer, then do this: #File.join PHYLOXML_TEST_DATA, 'phyloxml_examples_test.xml' # But make sure you run ruby test/unit/bio/db/test_phyloxml_writer.rb before end def self.made_up_xml File.join PHYLOXML_TEST_DATA, 'made_up.xml' #If you want to test the output of writer, then do this: #File.join PHYLOXML_TEST_DATA, 'made_up_test.xml' # But make sure you run ruby test/unit/bio/db/test_phyloxml_writer.rb before end def self.metazoa_xml File.join PHYLOXML_TEST_DATA, 'ncbi_taxonomy_metazoa.xml' end def self.mollusca_xml File.join PHYLOXML_TEST_DATA, 'ncbi_taxonomy_mollusca.xml' end def self.life_xml File.join PHYLOXML_TEST_DATA, 'tol_life_on_earth_1.xml' end def self.dollo_xml File.join PHYLOXML_TEST_DATA, 'o_tol_332_d_dollo.xml' end def self.mollusca_short_xml File.join PHYLOXML_TEST_DATA, 'ncbi_taxonomy_mollusca_short.xml' end end #end module TestPhyloXMLData class TestPhyloXML_class_methods < Test::Unit::TestCase def test_open filename = TestPhyloXMLData.example_xml assert_instance_of(Bio::PhyloXML::Parser, phyloxml = Bio::PhyloXML::Parser.open(filename)) common_test_next_tree(phyloxml) phyloxml.close end def test_open_with_block filename = TestPhyloXMLData.example_xml phyloxml_bak = nil ret = Bio::PhyloXML::Parser.open(filename) do |phyloxml| assert_instance_of(Bio::PhyloXML::Parser, phyloxml) common_test_next_tree(phyloxml) phyloxml_bak = phyloxml "ok" end assert_equal("ok", ret) assert_equal(true, phyloxml_bak.closed?) end def test_new str = File.read(TestPhyloXMLData.example_xml) assert_instance_of(Bio::PhyloXML::Parser, phyloxml = Bio::PhyloXML::Parser.new(str)) common_test_next_tree(phyloxml) end def test_for_io io = File.open(TestPhyloXMLData.example_xml) assert_instance_of(Bio::PhyloXML::Parser, phyloxml = Bio::PhyloXML::Parser.for_io(io)) common_test_next_tree(phyloxml) io.close end def common_test_next_tree(phyloxml) tree = phyloxml.next_tree tree_arr = [] while tree != nil do tree_arr[tree_arr.length] = tree.name tree = phyloxml.next_tree end assert_equal(13, tree_arr.length) end private :common_test_next_tree end #class TestPhyloXML_class_methods class TestPhyloXML_private_methods < Test::Unit::TestCase def setup @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) end def teardown @phyloxml.close end def test__validate assert_nothing_raised { @phyloxml.instance_eval { _validate(:file, TestPhyloXMLData.example_xml) } } end def test__validate_string assert_nothing_raised { @phyloxml.instance_eval { _validate(:string, '') } } end def test__validate_validation_error libxml_set_handler_quiet assert_raise(RuntimeError) { @phyloxml.instance_eval { _validate(:string, 'test') } } libxml_set_handler_verbose end def test__schema s = @phyloxml.instance_eval { _schema } assert_instance_of(LibXML::XML::Schema, s) end def test__secure_filename assert_equal('http:/bioruby.org/test.xml', @phyloxml.instance_eval { _secure_filename('http://bioruby.org/test.xml') }) end def test__secure_filename_unchanged assert_equal('test/test.xml', @phyloxml.instance_eval { _secure_filename('test/test.xml') }) end def test_ClosedPhyloXMLParser cp = Bio::PhyloXML::Parser::ClosedPhyloXMLParser.new assert_raise(LibXML::XML::Error) { cp.next_tree } end private def libxml_set_handler_quiet # Sets quiet handler. # Note that there are no way to get current handler. LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER) end def libxml_set_handler_verbose # Sets verbose handler (default LibXML error handler). # Note that there are no way to get current handler. LibXML::XML::Error.set_handler(&LibXML::XML::Error::VERBOSE_HANDLER) end end #class TestPhyloXML_private_methods class TestPhyloXML_close < Test::Unit::TestCase def phyloxml_open(&block) Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml, &block) end private :phyloxml_open def test_close phyloxml = phyloxml_open phyloxml.next_tree assert_nil(phyloxml.close) end def test_closed? phyloxml = phyloxml_open assert_equal(false, phyloxml.closed?) phyloxml.next_tree assert_equal(false, phyloxml.closed?) phyloxml.close assert_equal(true, phyloxml.closed?) end def test_closed_with_block ret = phyloxml_open do |phyloxml| assert_equal(false, phyloxml.closed?) phyloxml.next_tree assert_equal(false, phyloxml.closed?) phyloxml end assert_equal(true, ret.closed?) end def test_close_after_close phyloxml = phyloxml_open phyloxml.close assert_raise(LibXML::XML::Error) { phyloxml.close } end def test_next_tree_after_close phyloxml = phyloxml_open phyloxml.close assert_raise(LibXML::XML::Error) { phyloxml.next_tree } end def test_next_tree_after_open_with_block phyloxml = phyloxml_open { |arg| arg } assert_raise(LibXML::XML::Error) { phyloxml.next_tree } end def test_close_after_open_with_block phyloxml = phyloxml_open { |arg| arg } assert_raise(LibXML::XML::Error) { phyloxml.close } end def test_close_in_open_with_block phyloxml = phyloxml_open do |arg| ret = arg assert_nil(arg.close) ret end assert_raise(LibXML::XML::Error) { phyloxml.close } end def test_close_does_not_affect_io io = File.open(TestPhyloXMLData.example_xml) phyloxml = Bio::PhyloXML::Parser.for_io(io) phyloxml.next_tree phyloxml.close assert(!io.closed?) end end #class TestPhyloXML_close class TestPhyloXML1 < Test::Unit::TestCase def setup @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) end def teardown @phyloxml.close end def test_initialize assert_instance_of(Bio::PhyloXML::Parser, @phyloxml) end def test_next_tree() tree = @phyloxml.next_tree tree_arr = [] while tree != nil do tree_arr[tree_arr.length] = tree.name tree = @phyloxml.next_tree end assert_equal(13, tree_arr.length) end end #class TestPhyloXML1 class TestPhyloXML2 < Test::Unit::TestCase #setup is called before and every time any function es executed. def setup @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) @tree = @phyloxml.next_tree end def test_tree_name assert_equal("example from Prof. Joe Felsenstein's book \"Inferring Phylogenies\"", @tree.name) end def test_tree_description assert_equal("phyloXML allows to use either a \"branch_length\" attribute or element to indicate branch lengths.", @tree.description) end def test_branch_length_attribute assert_equal(0.792, @tree.total_distance) end def test_rooted_atr assert_equal(true, @tree.rooted) end def test_branch_length_tag @tree = @phyloxml.next_tree assert_equal(0.792, @tree.total_distance) end def test_bootstrap #iterate throuch first 2 trees to get to the third @tree = @phyloxml.next_tree @tree = @phyloxml.next_tree node = @tree.get_node_by_name("AB") assert_equal('bootstrap', node.confidences[0].type) assert_equal(89, node.confidences[0].value) end def test_to_biotreenode_bootstrap #iterate throuch first 2 trees to get to the third @tree = @phyloxml.next_tree @tree = @phyloxml.next_tree node = @tree.get_node_by_name("AB") bionode = node.to_biotreenode assert_equal(89, bionode.bootstrap) end def test_duplications 4.times do @tree = @phyloxml.next_tree end node = @tree.root assert_equal(1, node.events.speciations) end def test_taxonomy_scientific_name 3.times do @tree = @phyloxml.next_tree end t = @tree.get_node_by_name('A').taxonomies[0] assert_equal('E. coli', t.scientific_name) assert_equal("J. G. Cooper, 1863", t.authority) t = @tree.get_node_by_name('C').taxonomies[0] assert_equal('C. elegans', t.scientific_name) end def test_taxonomy_id 5.times do @tree = @phyloxml.next_tree end leaves = @tree.leaves codes = [] ids = [] #id_types = [] leaves.each { |node| codes[codes.length] = node.taxonomies[0].code ids[ids.length] = node.taxonomies[0].taxonomy_id #id_types[id_types.length] = node.taxonomy.id_type } assert_equal(["CLOAB", "DICDI", "OCTVU"], codes.sort) #@todo assert ids, id_types. or create new class for id. end def test_taxonomy2 9.times do @tree = @phyloxml.next_tree end taxonomy = @tree.root.taxonomies[0] assert_equal("8556", taxonomy.taxonomy_id.value) assert_equal("NCBI", taxonomy.taxonomy_id.provider) assert_equal("Varanus", taxonomy.scientific_name) assert_equal("genus", taxonomy.rank) assert_equal("EMBL REPTILE DATABASE", taxonomy.uri.desc) assert_equal("http://www.embl-heidelberg.de/~uetz/families/Varanidae.html", taxonomy.uri.uri) end def test_distribution_desc 9.times do @tree = @phyloxml.next_tree end leaves = @tree.leaves descrs = [] leaves.each { |node| descrs << node.distributions[0].desc } assert_equal(['Africa', 'Asia', 'Australia'], descrs.sort) end def test_distribution_point 10.times do @tree = @phyloxml.next_tree end point = @tree.get_node_by_name('A').distributions[0].points[0] assert_equal("WGS84", point.geodetic_datum) assert_equal(47.481277, point.lat) assert_equal(8.769303, point.long) assert_equal(472, point.alt) point = @tree.get_node_by_name('B').distributions[0].points[0] assert_equal("WGS84", point.geodetic_datum) assert_equal(35.155904, point.lat) assert_equal(136.915863, point.long) assert_equal(10, point.alt) end def test_sequence 3.times do @tree = @phyloxml.next_tree end sequence_a = @tree.get_node_by_name('A').sequences[0] assert_equal('alcohol dehydrogenase', sequence_a.annotations[0].desc) assert_equal("probability", sequence_a.annotations[0].confidence.type) assert_equal(0.99, sequence_a.annotations[0].confidence.value) sequence_b = @tree.get_node_by_name('B').sequences[0] assert_equal('alcohol dehydrogenase', sequence_b.annotations[0].desc) assert_equal("probability", sequence_b.annotations[0].confidence.type) assert_equal(0.91, sequence_b.annotations[0].confidence.value) sequence_c = @tree.get_node_by_name('C').sequences[0] assert_equal('alcohol dehydrogenase', sequence_c.annotations[0].desc) assert_equal("probability", sequence_c.annotations[0].confidence.type) assert_equal(0.67, sequence_c.annotations[0].confidence.value) end def test_sequence2 4.times do @tree = @phyloxml.next_tree end leaves = @tree.leaves leaves.each { |node| #just test one node for now if node.sequences[0].id_source == 'x' assert_equal('adhB', node.sequences[0].symbol) assert_equal("ncbi", node.sequences[0].accession.source) assert_equal('AAB80874', node.sequences[0].accession.value) assert_equal('alcohol dehydrogenase', node.sequences[0].name) end if node.sequences[0].id_source == 'z' assert_equal("InterPro:IPR002085", node.sequences[0].annotations[0].ref) end } end def test_sequence3 5.times do @tree = @phyloxml.next_tree end @tree.leaves.each { |node| if node.sequences[0].symbol == 'ADHX' assert_equal('UniProtKB', node.sequences[0].accession.source) assert_equal('P81431', node.sequences[0].accession.value) assert_equal('Alcohol dehydrogenase class-3', node.sequences[0].name) assert_equal(true, node.sequences[0].is_aligned) assert_equal(true, node.sequences[0].is_aligned?) assert_equal('TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD', node.sequences[0].mol_seq) assert_equal('EC:1.1.1.1', node.sequences[0].annotations[0].ref) assert_equal('GO:0004022', node.sequences[0].annotations[1].ref) end } end def test_to_biosequence 5.times do @tree = @phyloxml.next_tree end @tree.leaves.each { |node| if node.sequences[0].symbol =='ADHX' seq = node.sequences[0].to_biosequence assert_equal('Alcohol dehydrogenase class-3', seq.definition) assert_equal('UniProtKB', seq.id_namespace) assert_equal('P81431', seq.entry_id) assert_equal('TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD', seq.seq.to_s) end } end def test_extract_biosequence 5.times do @tree = @phyloxml.next_tree end @tree.leaves.each { |node| if node.sequences[0].symbol == 'ADHX' seq = node.extract_biosequence assert_equal('Alcohol dehydrogenase class-3', seq.definition) assert_equal('TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD', seq.seq.to_s) assert_equal('Octopus vulgaris', seq.classification[0]) end } end def test_date 11.times do @tree = @phyloxml.next_tree end date_a = @tree.get_node_by_name('A').date assert_equal('mya', date_a.unit) assert_equal("Silurian", date_a.desc) assert_equal(425, date_a.value) date_b = @tree.get_node_by_name('B').date assert_equal('mya', date_b.unit) assert_equal("Devonian", date_b.desc) assert_equal(320, date_b.value) date_c = @tree.get_node_by_name('C').date assert_equal('mya', date_c.unit) assert_equal('Ediacaran', date_c.desc) assert_equal(600, date_c.value) assert_equal(570, date_c.minimum) assert_equal(630, date_c.maximum) end def test_property 7.times do @tree = @phyloxml.next_tree end property = @tree.get_node_by_name('A').properties[0] assert_equal('xsd:integer', property.datatype) assert_equal('NOAA:depth', property.ref) assert_equal('clade', property.applies_to) assert_equal('METRIC:m', property.unit) assert_equal(' 1200 ', property.value) end def test_uri 9.times do @tree = @phyloxml.next_tree end uri = @tree.root.taxonomies[0].uri assert_equal("EMBL REPTILE DATABASE", uri.desc) assert_equal("http://www.embl-heidelberg.de/~uetz/families/Varanidae.html", uri.uri) end end #class TestPhyloXML2 class TestPhyloXML3 < Test::Unit::TestCase TEST_STRING = """ same example, with support of type \"bootstrap\" AB 89 A B C """ def setup phyloxml = Bio::PhyloXML::Parser.new(TEST_STRING) @tree = phyloxml.next_tree() end def test_children node = @tree.get_node_by_name("AB") # nodes = @tree.children(node).sort { |a,b| a.name <=> b.name } node_names = [] @tree.children(node).each { |children| node_names[node_names.length] = children.name } node_names.sort! assert_equal(["A", "B"], node_names) end end # class class TestPhyloXML4 < Test::Unit::TestCase #test cases what pertain to tree def test_clade_relation @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) 7.times do @tree = @phyloxml.next_tree end cr = @tree.clade_relations[0] assert_equal("b", cr.id_ref_0) assert_equal("c", cr.id_ref_1) assert_equal("network_connection", cr.type) end def test_sequence_realations @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) 5.times do @tree = @phyloxml.next_tree end sr = @tree.sequence_relations[0] assert_equal("x", sr.id_ref_0) assert_equal("y", sr.id_ref_1) assert_equal("paralogy", sr.type) end end class TestPhyloXML5 < Test::Unit::TestCase #testing file made_up.xml def setup @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.made_up_xml) end def test_phylogeny_confidence tree = @phyloxml.next_tree() assert_equal("bootstrap", tree.confidences[0].type) assert_equal(89, tree.confidences[0].value) assert_equal("probability", tree.confidences[1].type) assert_equal(0.71, tree.confidences[1].value) end def test_to_biotreenode_probability tree = @phyloxml.next_tree() node = tree.get_node_by_name('c').to_biotreenode assert_equal(nil, node.bootstrap) end def test_polygon 2.times do @tree = @phyloxml.next_tree end polygon = @tree.get_node_by_name('A').distributions[0].polygons[0] assert_equal(3, polygon.points.length) assert_equal(47.481277, polygon.points[0].lat) assert_equal("m", polygon.points[0].alt_unit) assert_equal(136.915863, polygon.points[1].long) assert_equal(452, polygon.points[2].alt) polygon = @tree.get_node_by_name('A').distributions[0].polygons[1] #making sure can read in second polygon assert_equal(3, polygon.points.length) assert_equal(40.481277, polygon.points[0].lat) end def test_reference 3.times do @tree = @phyloxml.next_tree end references = @tree.get_node_by_name('A').references assert_equal("10.1093/bioinformatics/btm619", references[0].doi) assert_equal("Phyutility: a phyloinformatics tool for trees, alignments and molecular data", references[0].desc) assert_equal("10.1186/1471-2105-9-S1-S23", references[1].doi) end def test_single_clade 4.times do @tree = @phyloxml.next_tree() end assert_equal("A", @tree.root.name) end def test_domain_architecture 5.times {@tree = @phyloxml.next_tree()} node = @tree.get_node_by_name("22_MOUSE") assert_equal("22_MOUSE", node.name) assert_equal("MOUSE", node.taxonomies[0].code) domain_arch = node.sequences[0].domain_architecture assert_equal(1249, domain_arch.length) assert_equal(6, domain_arch.domains[0].from) assert_equal(90, domain_arch.domains[0].to) assert_in_delta(7.0E-26, domain_arch.domains[0].confidence, 1E-26) assert_equal("CARD", domain_arch.domains[0].value) assert_equal("x", domain_arch.domains[0].id) assert_equal(733, domain_arch.domains[5].from) assert_equal(771, domain_arch.domains[5].to) assert_in_delta(4.7E-14, domain_arch.domains[5].confidence, 1E-15) assert_equal("WD40", domain_arch.domains[5].value) assert_equal(1168, domain_arch.domains.last.from) assert_equal(1204, domain_arch.domains.last.to) assert_equal(0.3, domain_arch.domains.last.confidence) assert_equal("WD40", domain_arch.domains.last.value) end def test_clade_width @tree = @phyloxml.next_tree assert_equal(0.2, @tree.root.width) end def test_binary_characters 6.times do @tree = @phyloxml.next_tree end bc =@tree.get_node_by_name("cellular_organisms").binary_characters assert_equal("parsimony inferred", bc.bc_type) assert_equal(0, bc.lost_count) assert_equal(0, bc.gained_count) assert_equal([], bc.lost) bc2 = @tree.get_node_by_name("Eukaryota").binary_characters assert_equal(2, bc2.gained_count) assert_equal(["Cofilin_ADF", "Gelsolin"], bc2.gained) assert_equal(["Cofilin_ADF", "Gelsolin"], bc2.present) end def test_rerootable2 6.times do @tree = @phyloxml.next_tree end assert_equal(false, @tree.rerootable) end def test_phylogeny_attributes @tree = @phyloxml.next_tree assert_equal(true, @tree.rooted) assert_equal(false, @tree.rerootable) #@todo make this test pass #assert_equal("1", @tree.branch_length_unit) end def test_taxonomy_synonym 5.times do @tree = @phyloxml.next_tree end node = @tree.get_node_by_name('22_MOUSE') t = node.taxonomies[0] assert_equal("murine", t.synonyms[0]) assert_equal("vermin", t.synonyms[1]) end def test_annotation_property 5.times do @tree =@phyloxml.next_tree end node = @tree.get_node_by_name('22_MOUSE') prop = node.sequences[0].annotations[0].properties[0] assert_equal("1200", prop.value) end end class TestPhyloXML5 < Test::Unit::TestCase def test_each phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) count = 0 phyloxml.each do |tree| count +=1 end assert_equal(13, count) end def test_other phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) assert_equal(nil, phyloxml.other[0]) phyloxml.each do |tree| #iterate through all trees, to get to the end end o = phyloxml.other[0] assert_equal('align:alignment', o.element_name) assert_equal('seq', o.children[0].element_name) assert_equal('aggtcgcggcctgtggaagtcctctcct', o.children[1].value) assert_equal("C", o.children[2].attributes["name"]) end def test_array_behaviour phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.example_xml) tree = phyloxml[2] assert_equal("same example, with support of type \"bootstrap\"", tree.name) end # def test_get_tree_by_name # @phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLData.made_up_xml) # tree = @phyloxml.get_tree_by_name "testing confidence" # # end end end if defined?(LibXML) #end module Bio bio-1.4.3.0001/test/unit/bio/db/test_lasergene.rb0000644000004100000410000002060412200110570021312 0ustar www-datawww-data# # test/unit/bio/db/test_lasergene.rb - Unit test for Bio::Lasergene # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2007 Center for Biomedical Research Informatics, University of Minnesota (http://cbri.umn.edu) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/db/lasergene' module Bio #:nodoc: class TestLasergene < Test::Unit::TestCase #:nodoc: def setup file_format_1 = < # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/genbank/common' require 'bio/reference' require 'bio/compat/references' # Bio::References in this file seems to be obsolete, but Bio::NCBIDB::Common should require it. require 'bio/feature' require 'bio/compat/features' # Bio::Features in this file seems to be obsolete, but Bio::NCBIDB::Common should require it. # - This class has low coverage, because a sample entry used in it lacks a lot of fields. # - There are all the methods for test. module Bio class NCBIDB class TestCommon < Test::Unit::TestCase #Mock Class including the target module. #BioNCBIDBCommon is used for the test. class BioNCBIDBCommon < Bio::NCBIDB include Bio::NCBIDB::Common end #a sample entry is a part of data/genbank/SCU49845.gb def setup entry =<"Saccharomyces cerevisiae", "common_name"=>"Saccharomyces cerevisiae (baker's yeast)", "taxonomy"=> "Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces."} assert_equal(expected, @obj.source) #another pattern to pass line 103-105 source_pattern2 =<"Saccharomyces cerevisiae", "common_name"=>"Saccharomyces cerevisiae (baker's yeast)", "taxonomy"=>"Saccharomyces."} assert_equal(expected2, obj2.source) #the other pattern to pass line 106-109 source_pattern3 =<"Saccharomyces cerevisiae", "common_name"=>"Saccharomyces cerevisiae (baker's yeast)", "taxonomy"=>""} assert_equal(expected3, obj3.source) end def test_common_name expected = "Saccharomyces cerevisiae (baker's yeast)" assert_equal(expected, @obj.common_name) end def test_organism expected = "Saccharomyces cerevisiae" assert_equal(expected, @obj.organism) end def test_taxonomy expected = "Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces." assert_equal(expected, @obj.taxonomy) end def test_references str=<[], :volume=>"239", :doi=>nil, :pages=>"665-671", :embl_gb_record_number=>2, :pubmed=>"20034106", :abstract=>"", :issue=>"2", :year=>"2010", :sequence_position=>"1-2264", :affiliations=>[], :journal=>"Dev. Dyn.", :title=> "Retinoic acid controls expression of tissue remodeling genes Hmgn1 and Fgf18 at the digit-interdigit junction", :authors=>["Zhao, X.", "Brade, T.", "Cunningham, T.J.", "Duester, G."], :medline=>"", :url=>nil, :comments=> ["GeneRIF: limited to the digit-interdigit junction rather than being expressed throughout the interdigital zone"]} actual = {:abstract => obj[0].abstract, :affiliations => obj[0].affiliations, :authors => obj[0].authors, :comments => obj[0].comments, :doi => obj[0].doi, :embl_gb_record_number => obj[0].embl_gb_record_number, :issue => obj[0].issue, :journal => obj[0].journal, :medline => obj[0].medline, :mesh => obj[0].mesh, :pages => obj[0].pages, :pubmed => obj[0].pubmed, :sequence_position => obj[0].sequence_position, :title => obj[0].title, :url => obj[0].url, :volume => obj[0].volume, :year => obj[0].year} assert_equal(expected, actual) actual2 = "" com.references do |reference| actual2 = reference.authors break end assert_equal(["Zhao, X.", "Brade, T.", "Cunningham, T.J.", "Duester, G."],actual2) #the other pattern where a journal doesn't match the regexp. ref=<"CDS", :position=>"<1..206", :qualifiers=> [{:qualifier=>"product", :value=>"TCP1-beta"}, {:value=>3, :qualifier=>"codon_start"}, {:qualifier=>"translation", :value=> "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"}]} fet =<obj.features[0].feature, :position=>obj.features[0].position, :qualifiers=> [{:qualifier=>obj.features[0].qualifiers[0].qualifier, :value=>obj.features[0].qualifiers[0].value}, {:qualifier=>obj.features[0].qualifiers[1].qualifier, :value=> obj.features[0].qualifiers[1].value}, {:qualifier=>obj.features[0].qualifiers[2].qualifier, :value=> obj.features[0].qualifiers[2].value}]} assert_equal(expected, actual) actual2 = "" obj.features do |feature| actual2 = feature.feature end assert_equal("CDS", actual2) end def test_origin expected = "" assert_equal(expected, @obj.origin) end end end end bio-1.4.3.0001/test/unit/bio/db/genbank/test_genbank.rb0000644000004100000410000006131512200110570022363 0ustar www-datawww-data# # test/unit/bio/db/genbank/test_genbank.rb - Unit test for Bio::GenBank # # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/reference' require 'bio/feature' require 'bio/compat/features' require 'bio/compat/references' require 'bio/db/genbank/genbank' require 'bio/db/genbank/genbank_to_biosequence' module Bio class TestBioGenBank < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'genbank', 'SCU49845.gb') @obj = Bio::GenBank.new(File.read(filename)) end def test_locus_class expected = Bio::GenBank::Locus assert_equal(expected, @obj.locus.class) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"#another type of LOCUS line.(release 126) obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal(Bio::GenBank::Locus, obj_rel126.locus.class) end def test_locus_circular expected = "linear" assert_equal(expected, @obj.locus.circular) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal("circular", obj_rel126.locus.circular) end def test_locus_date expected = "23-MAR-2010" assert_equal(expected, @obj.locus.date) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal("05-FEB-1999", obj_rel126.locus.date) end def test_locus_division expected = "PLN" assert_equal(expected, @obj.locus.division) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal("VRL", obj_rel126.locus.division) end def test_locus_entry_id expected = "SCU49845" assert_equal(expected, @obj.locus.entry_id) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal("AB000383", obj_rel126.locus.entry_id) end def test_locus_length expected = 5028 assert_equal(expected, @obj.locus.length) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal(5423, obj_rel126.locus.length) end def test_locus_natype expected = "DNA" assert_equal(expected, @obj.locus.natype) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal("DNA", obj_rel126.locus.natype) end def test_locus_strand expected = "" assert_equal(expected, @obj.locus.strand) locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999" obj_rel126 = Bio::GenBank.new(locus_rel126) assert_equal("", obj_rel126.locus.strand) end def test_entry_id assert_equal("SCU49845", @obj.entry_id) end def test_length assert_equal(5028, @obj.length) end def test_circular assert_equal("linear", @obj.circular) end def test_division assert_equal("PLN", @obj.division) end def test_date assert_equal("23-MAR-2010", @obj.date) end def test_strand assert_equal("", @obj.strand) end def test_natype assert_equal("DNA", @obj.natype) end def test_each_cds_feature @obj.each_cds do |feature| assert_equal("CDS", feature.feature) end end =begin def test_each_cds_qualifiers @obj.each_cds do |feature| feature.qualifiers do |qualifier| assert_equal(Bio::Feature::Qualifier, qualifier.class) end end end =end def test_each_cds_qualifiers expected = [[["codon_start", 3], ["product", "TCP1-beta"], ["protein_id", "AAA98665.1"], ["db_xref", "GI:1293614"], ["translation", "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"]], [["gene", "AXL2"], ["note", "plasma membrane glycoprotein"], ["codon_start", 1], ["product", "Axl2p"], ["protein_id", "AAA98666.1"], ["db_xref", "GI:1293615"], ["translation", "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"]], [["gene", "REV7"], ["codon_start", 1], ["product", "Rev7p"], ["protein_id", "AAA98667.1"], ["db_xref", "GI:1293616"], ["translation", "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"]]] actual = [] @obj.each_cds do |feature| tmp = [] feature.qualifiers.each{|qualifier| tmp << [qualifier.qualifier, qualifier.value] } actual << tmp end assert_equal(expected, actual) end def test_each_gene expected_position = ["<687..>3158", "complement(<3300..>4037)"] expected_gene = [["gene","AXL2"], ["gene","REV7"]] actual_position = [] actual_gene = [] @obj.each_gene do |gene| assert_equal("gene", gene.feature) actual_position << gene.position gene.qualifiers.each do |qualifier| actual_gene << [qualifier.qualifier, qualifier.value] end end assert_equal(expected_position,actual_position) assert_equal(expected_gene, actual_gene) end def test_basecount assert_equal({}, @obj.basecount) end def test_seq expected = "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc" assert_equal(expected, @obj.seq) end def test_seq_len assert_equal(5028, @obj.seq_len) end def test_date_modified assert_equal(Date, @obj.date_modified.class) assert_equal('2010-03-23', @obj.date_modified.to_s) end def test_classification expected = ["Eukaryota", "Fungi", "Dikarya", "Ascomycota", "Saccharomyceta", "Saccharomycotina", "Saccharomycetes", "Saccharomycetales", "Saccharomycetaceae", "Saccharomyces"] assert_equal(expected, @obj.classification) end def test_strandedness assert_equal(nil, @obj.strandedness) end #test for bio_to_sequence def test_to_biosequence seq = @obj.to_biosequence expected_seq = "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc" expected_id_namespace = "GenBank" expected_entry_id = "SCU49845" expected_primary_accession = "U49845" expected_secondary_accessions = [] expected_other_seqids = ["1293613", "GI", []] expected_molecule_type = "DNA" expected_division = "PLN" expected_topology = "linear" expected_strandedness = nil expected_keywords = [] expected_sequence_version = "1" expected_date_modified = "2010-03-23" expected_definition = "Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p (AXL2) and Rev7p (REV7) genes, complete cds." expected_species = [] expected_classification= ["Eukaryota", "Fungi", "Dikarya", "Ascomycota", "Saccharomyceta", "Saccharomycotina", "Saccharomycetes", "Saccharomycetales", "Saccharomycetaceae", "Saccharomyces"] expected_comments = "" expected_references = [{ :abstract=>"", :affiliations=>[], :authors=>["Roemer, T.", "Madden, K.", "Chang, J.", "Snyder, M."], :comments=>nil, :doi=>nil, :embl_gb_record_number=>1, :issue=>"7", :journal=>"Genes Dev.", :medline=>"", :mesh=>[], :pages=>"777-793", :pubmed=>"8846915", :sequence_position=>"1-5028", :title=> "Selection of axial growth sites in yeast requires Axl2p, a novel plasma membrane glycoprotein", :url=>nil, :volume=>"10", :year=>"1996"}, {:abstract=>"", :affiliations=>[], :authors=>["Roemer, T."], :comments=>nil, :doi=>nil, :embl_gb_record_number=>2, :issue=>"", :journal=> "Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT 06520, USA", :medline=>"", :mesh=>[], :pages=>"", :pubmed=>"", :sequence_position=>"1-5028", :title=>"Direct Submission", :url=>nil, :volume=>"", :year=>""}] expected_features = [ {:feature=>"source", :position=>"1..5028", :qualifiers=> [{:qualifier=>"organism", :value=>"Saccharomyces cerevisiae"}, {:qualifier=>"mol_type", :value=>"genomic DNA"}, {:qualifier=>"db_xref", :value=>"taxon:4932"}, {:qualifier=>"chromosome", :value=>"IX"}]}, {:feature=>"mRNA", :position=>"<1..>206", :qualifiers=> [{ :qualifier=>"product", :value=>"TCP1-beta"}]}, {:feature=>"CDS", :position=>"<1..206", :qualifiers=> [{:qualifier=>"codon_start", :value=>3}, {:qualifier=>"product", :value=>"TCP1-beta"}, {:qualifier=>"protein_id", :value=>"AAA98665.1"}, {:qualifier=>"db_xref", :value=>"GI:1293614"}, {:qualifier=>"translation", :value=> "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"}]}, {:feature=>"gene", :position=>"<687..>3158", :qualifiers=> [{:qualifier=>"gene", :value=>"AXL2"}]}, {:feature=>"mRNA", :position=>"<687..>3158", :qualifiers=> [{:qualifier=>"gene", :value=>"AXL2"}, {:qualifier=>"product", :value=>"Axl2p"}]}, {:feature=>"CDS", :position=>"687..3158", :qualifiers=> [{:qualifier=>"gene", :value=>"AXL2"}, {:qualifier=>"note", :value=>"plasma membrane glycoprotein"}, {:qualifier=>"codon_start", :value=>1}, {:qualifier=>"product", :value=>"Axl2p"}, {:qualifier=>"protein_id", :value=>"AAA98666.1"}, {:qualifier=>"db_xref", :value=>"GI:1293615"}, {:qualifier=>"translation", :value=> "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"}]}, {:feature=>"gene", :position=>"complement(<3300..>4037)", :qualifiers=> [{:qualifier=>"gene", :value=>"REV7"}]}, {:feature=>"mRNA", :position=>"complement(<3300..>4037)", :qualifiers=> [{:qualifier=>"gene", :value=>"REV7"}, {:qualifier=>"product", :value=>"Rev7p"}]}, {:feature=>"CDS", :position=>"complement(3300..4037)", :qualifiers=> [{:qualifier=>"gene", :value=>"REV7"}, {:qualifier=>"codon_start", :value=>1}, {:qualifier=>"product", :value=>"Rev7p"}, {:qualifier=>"protein_id", :value=>"AAA98667.1"}, {:qualifier=>"db_xref", :value=>"GI:1293616"}, {:qualifier=>"translation", :value=> "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"}]}] assert_equal(expected_seq, seq.seq) assert_equal(expected_id_namespace, seq.id_namespace) assert_equal(expected_entry_id, seq.entry_id) assert_equal(expected_primary_accession, seq.primary_accession) assert_equal(expected_secondary_accessions, seq.secondary_accessions) seqids = seq.other_seqids.first actual_other_seqids = [seqids.id, seqids.database, seqids.secondary_ids] assert_equal(expected_other_seqids, actual_other_seqids) assert_equal(expected_division, seq.division) assert_equal(expected_strandedness, seq.strandedness) assert_equal(expected_keywords, seq.keywords) assert_equal(expected_classification, seq.classification) assert_equal(expected_comments, seq.comments) refs = seq.references actual_references = [] refs.each do |ref| actual_references << {:abstract => ref.abstract, :affiliations => ref.affiliations, :authors => ref.authors, :comments => ref.comments, :doi => ref.doi, :embl_gb_record_number => ref.embl_gb_record_number, :issue => ref.issue, :journal => ref.journal, :medline => ref.medline, :mesh => ref.mesh, :pages => ref.pages, :pubmed => ref.pubmed, :sequence_position => ref.sequence_position, :title => ref.title, :url => ref.url, :volume => ref.volume, :year => ref.year} end assert_equal(expected_references, actual_references) fets = seq.features actual_features = [] fets.each do |fet| feature = fet.feature position = fet.position quals = [] fet.qualifiers.each do |qual| quals << {:qualifier => qual.qualifier, :value => qual.value} end actual_features << {:feature => feature, :position => position, :qualifiers => quals} end assert_equal(expected_features, actual_features) # skip end end #class TestBioGenBank end #module Bio bio-1.4.3.0001/test/unit/bio/db/genbank/test_genpept.rb0000644000004100000410000000402612200110570022414 0ustar www-datawww-data# # test/unit/bio/db/genbank/test_genpept.rb - Unit test for Bio::GenPept # # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/genbank/genpept' #The coverage of this class is 100% #It tests only the methods descripbed in the soruce class.(It dosen't test the inherited methods from NCBIDB) module Bio class TestBioGenPept < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'genbank', 'CAA35997.gp') @obj = Bio::GenPept.new(File.read(filename)) end def test_locus expected = {:circular=>"linear", :date=>"12-SEP-1993", :division=>"MAM", :entry_id=>"CAA35997", :length=>100} locus = @obj.locus actual = {:entry_id=>locus.entry_id, :circular=>locus.circular, :date=>locus.date, :division=>locus.division, :length=>locus.length} assert_equal(expected, actual) end def test_entry_id assert_equal("CAA35997", @obj.entry_id) end def test_length assert_equal(100, @obj.length) end def test_circular assert_equal("linear", @obj.circular) end def test_division assert_equal("MAM", @obj.division) end def test_date assert_equal("12-SEP-1993", @obj.date) end def test_seq expected = "MRTPMLLALLALATLCLAGRADAKPGDAESGKGAAFVSKQEGSEVVKRLRRYLDHWLGAPAPYPDPLEPKREVCELNPDCDELADHIGFQEAYRRFYGPV" assert_equal(expected, @obj.seq) end def test_seq_len assert_equal(100, @obj.seq_len) end def test_dbsource expected = "DBSOURCE embl accession X51700.1\n" assert_equal(expected, @obj.dbsource) end end #class TestBioGenPept end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_go.rb0000644000004100000410000001515712200110570017761 0ustar www-datawww-data# # test/unit/bio/db/test_go.rb - Unit test for Bio::GO # # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/go' module Bio class TestBioGOOntology < Test::Unit::TestCase TestDataFileName = File.join(BioRubyTestDataPath, 'go', 'selected_component.ontology') def setup @obj = Bio::GO::Ontology.new(File.read(TestDataFileName)) end def test_dag_edit_format_parser obj = Bio::GO::Ontology.new(File.read(TestDataFileName)) assert_equal(Bio::GO::Ontology,obj.class) end def test_goid2term assert_equal('cellular_component', @obj.goid2term('0005575')) assert_equal('cellular_component', @obj.goid2term('0008372')) end def test_parse_goids actual = Bio::GO::Ontology.parse_goids(""GO:0003845", :db=>"Wikipedia", :db_id=>"11beta-hydroxysteroid_dehydrogenase", :go_term=>"11-beta-hydroxysteroid dehydrogenase activity"}, {:go_id=>"GO:0047414", :db=>"Wikipedia", :db_id=> "2-(hydroxymethyl)-3-(acetamidomethylene)succinate_amidohydrolase_(deaminating\\,_decarboxylating)", :go_term=> "2-(hydroxymethyl)-3-(acetamidomethylene)succinate hydrolase activity"}, {:go_id=>"GO:0043718", :db=>"Wikipedia", :db_id=>"2-hydroxymethylglutarate_dehydrogenase", :go_term=>"2-hydroxymethylglutarate dehydrogenase activity"}] file = File.read(TestDataFileName) e2g = Bio::GO::External2go.parser(file) assert_equal(expected, e2g) assert_raise(RuntimeError){ Bio::GO::External2go.parser("probably this occurs error")} end def test_set_date e2g = Bio::GO::External2go.new e2g.set_date("$Date: 2010/06/11 01:01:37 $") assert_equal("$Date: 2010/06/11 01:01:37 $",e2g.header[:date]) end def test_set_desc e2g = Bio::GO::External2go.new e2g.set_desc([" Mapping of Gene Ontology terms to Wikipedia entries."," Wikipedia: http://en.wikipedia.org"]) assert_equal([" Mapping of Gene Ontology terms to Wikipedia entries."," Wikipedia: http://en.wikipedia.org"],e2g.header[:desc]) end def test_to_str assert_equal("!date: \n! version: $Revision: 1.17 $\n! date: $Date: 2010/06/11 01:01:37 $\n!\n! Generated from file ontology/editors/gene_ontology_write.obo,\n! CVS revision: 1.1296; date: 10:06:2010 16:16\n!\n! Mapping of Gene Ontology terms to Wikipedia entries.\n! Wikipedia: http://en.wikipedia.org\n! Last update at Thu Jun 10 17:21:44 2010 by the script /users/cjm/cvs/go-moose/bin/daily_from_obo.pl\n!\nWikipedia:11beta-hydroxysteroid_dehydrogenase > GO:11-beta-hydroxysteroid dehydrogenase activity ; GO:0003845\nWikipedia:2-(hydroxymethyl)-3-(acetamidomethylene)succinate_amidohydrolase_(deaminating\\,_decarboxylating) > GO:2-(hydroxymethyl)-3-(acetamidomethylene)succinate hydrolase activity ; GO:0047414\nWikipedia:2-hydroxymethylglutarate_dehydrogenase > GO:2-hydroxymethylglutarate dehydrogenase activity ; GO:0043718", @e2g.to_str) end def test_dbs assert_equal(["Wikipedia"], @e2g.dbs) end def test_db_ids assert_equal(["11beta-hydroxysteroid_dehydrogenase", "2-(hydroxymethyl)-3-(acetamidomethylene)succinate_amidohydrolase_(deaminating\\,_decarboxylating)", "2-hydroxymethylglutarate_dehydrogenase"], @e2g.db_ids) end def test_go_terms assert_equal(["11-beta-hydroxysteroid dehydrogenase activity", "2-(hydroxymethyl)-3-(acetamidomethylene)succinate hydrolase activity", "2-hydroxymethylglutarate dehydrogenase activity"], @e2g.go_terms) end def test_go_ids assert_equal(["GO:0003845", "GO:0047414", "GO:0043718"], @e2g.go_ids) end end end bio-1.4.3.0001/test/unit/bio/db/test_medline.rb0000644000004100000410000002134412200110570020764 0ustar www-datawww-data# # test/unit/bio/db/test_medline.rb - Unit test for Bio::MEDLINE # # Copyright:: Copyright (C) 2008 Collaborative Drug Discovery, Inc. # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/reference' require 'bio/db/medline' module Bio class TestMEDLINE_20146148 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'medline', '20146148_modified.medline') @obj = Bio::MEDLINE.new(File.read(filename)) end def test_self_new assert_instance_of(Bio::MEDLINE, @obj) end def test_reference h = { 'authors' => ["Mattsson, M.", "Summala, H."], 'affiliations' => [ "Traffic Research Unit, Department of Psychology, University of Helsinki, Finland. markus.mattsson@helsinki.fi" ], 'journal' => "Traffic Inj Prev", 'title' => "With power comes responsibility: motorcycle engine power and power-to-weight ratio in relation to accident risk.", 'pages' => '87-95', 'issue' => "1", 'volume' => "11", 'year' => "2010", 'pubmed' => "20146148", 'doi' => "10.1080/15389580903471126", 'mesh' => [ "Accidents, Traffic/mortality/*statistics & numerical data", "Adult", "Age Distribution", "Body Weight", "Female", "Finland/epidemiology", "Humans", "Linear Models", "Male", "Motorcycles/classification/legislation & jurisprudence/*statistics & numerical data", "Questionnaires", "Risk", "Social Responsibility", "Young Adult" ], 'abstract' => "(The abstract is omitted to avoid copyright issues. Please find the abstract at http://www.ncbi.nlm.nih.gov/pubmed/20146148. We believe that other information in this entry is within public domain, according to \"Copyright and Disclaimers\" in http://www.ncbi.nlm.nih.gov/About/disclaimer.html.)" } expected = Bio::Reference.new(h) assert_equal(expected, @obj.reference) end def test_pmid assert_equal("20146148", @obj.pmid) end def test_ui assert_equal("", @obj.ui) end def test_ta assert_equal("Traffic Inj Prev", @obj.ta) end def test_vi assert_equal("11", @obj.vi) end def test_ip assert_equal("1", @obj.ip) end def test_pg assert_equal("87-95", @obj.pg) end def test_pages assert_equal("87-95", @obj.pages) end def test_dp assert_equal("2010 Feb", @obj.dp) end def test_year assert_equal("2010", @obj.year) end def test_ti expected = "With power comes responsibility: motorcycle engine power and power-to-weight ratio in relation to accident risk." assert_equal(expected, @obj.ti) end def test_ab expected = "(The abstract is omitted to avoid copyright issues. Please find the abstract at http://www.ncbi.nlm.nih.gov/pubmed/20146148. We believe that other information in this entry is within public domain, according to \"Copyright and Disclaimers\" in http://www.ncbi.nlm.nih.gov/About/disclaimer.html.)" assert_equal(expected, @obj.ab) end def test_au expected = "Mattsson M\nSummala H" assert_equal(expected, @obj.au) end def test_authors expected = ["Mattsson, M.", "Summala, H."] assert_equal(expected, @obj.authors) end def test_so expected = "Traffic Inj Prev. 2010 Feb;11(1):87-95." assert_equal(expected, @obj.so) end def test_mh expected = [ "Accidents, Traffic/mortality/*statistics & numerical data", "Adult", "Age Distribution", "Body Weight", "Female", "Finland/epidemiology", "Humans", "Linear Models", "Male", "Motorcycles/classification/legislation & jurisprudence/*statistics & numerical data", "Questionnaires", "Risk", "Social Responsibility", "Young Adult" ] assert_equal(expected, @obj.mh) end def test_ad expected = [ "Traffic Research Unit, Department of Psychology, University of Helsinki, Finland. markus.mattsson@helsinki.fi" ] assert_equal(expected, @obj.ad) end def test_doi assert_equal("10.1080/15389580903471126", @obj.doi) end def test_pii assert_equal("919158438", @obj.pii) end def test_pt expected = [ "Journal Article", "Research Support, Non-U.S. Gov't" ] assert_equal(expected, @obj.pt) end end #class TestMEDLINE_20146148 class TestMEDLINE < Test::Unit::TestCase def test_authors assert_equal(["Kane, D. W.", "Hohman, M. M.", "Cerami, E. G.", "McCormick, M. W.", "Kuhlmann, K. F.", "Byrd, J. A."], Bio::MEDLINE.new(AGILE).authors) end def test_authors_with_suffix assert_equal(["Jenkins, F. A. Jr"], Bio::MEDLINE.new("AU - Jenkins FA Jr").authors) end def test_authors_with_last_name_all_caps assert_equal(["GARTLER, S. M."], Bio::MEDLINE.new("AU - GARTLER SM").authors) end AGILE = <<-EOMED PMID- 16734914 OWN - NLM STAT- MEDLINE DA - 20060811 DCOM- 20060928 LR - 20081120 IS - 1471-2105 (Electronic) VI - 7 DP - 2006 TI - Agile methods in biomedical software development: a multi-site experience report. PG - 273 AB - BACKGROUND: Agile is an iterative approach to software development that relies on strong collaboration and automation to keep pace with dynamic environments. We have successfully used agile development approaches to create and maintain biomedical software, including software for bioinformatics. This paper reports on a qualitative study of our experiences using these methods. RESULTS: We have found that agile methods are well suited to the exploratory and iterative nature of scientific inquiry. They provide a robust framework for reproducing scientific results and for developing clinical support systems. The agile development approach also provides a model for collaboration between software engineers and researchers. We present our experience using agile methodologies in projects at six different biomedical software development organizations. The organizations include academic, commercial and government development teams, and included both bioinformatics and clinical support applications. We found that agile practices were a match for the needs of our biomedical projects and contributed to the success of our organizations. CONCLUSION: We found that the agile development approach was a good fit for our organizations, and that these practices should be applicable and valuable to other biomedical software development efforts. Although we found differences in how agile methods were used, we were also able to identify a set of core practices that were common to all of the groups, and that could be a focus for others seeking to adopt these methods. AD - SRA International, 4300 Fair Lakes Court, Fairfax, VA 22033, USA. david_kane@sra.com FAU - Kane, David W AU - Kane DW FAU - Hohman, Moses M AU - Hohman MM FAU - Cerami, Ethan G AU - Cerami EG FAU - McCormick, Michael W AU - McCormick MW FAU - Kuhlmann, Karl F AU - Kuhlmann KF FAU - Byrd, Jeff A AU - Byrd JA LA - eng GR - U01 MH061915-03/MH/NIMH NIH HHS/United States GR - U01 MH061915-04/MH/NIMH NIH HHS/United States GR - U01 MH61915/MH/NIMH NIH HHS/United States PT - Journal Article PT - Research Support, N.I.H., Extramural PT - Research Support, Non-U.S. Gov't DEP - 20060530 PL - England TA - BMC Bioinformatics JT - BMC bioinformatics JID - 100965194 SB - IM MH - Algorithms MH - Automation MH - Computational Biology/*methods MH - Computers MH - Database Management Systems MH - Databases, Genetic MH - Diffusion of Innovation MH - Hospital Information Systems MH - Hospitals MH - Humans MH - Medical Informatics MH - Multicenter Studies as Topic MH - Programming Languages MH - Software MH - *Software Design MH - Systems Integration PMC - PMC1539031 OID - NLM: PMC1539031 EDAT- 2006/06/01 09:00 MHDA- 2006/09/29 09:00 CRDT- 2006/06/01 09:00 PHST- 2005/11/17 [received] PHST- 2006/05/30 [accepted] PHST- 2006/05/30 [aheadofprint] AID - 1471-2105-7-273 [pii] AID - 10.1186/1471-2105-7-273 [doi] PST - epublish SO - BMC Bioinformatics. 2006 May 30;7:273. EOMED end end bio-1.4.3.0001/test/unit/bio/db/test_newick.rb0000644000004100000410000002365712200110570020640 0ustar www-datawww-data# # = test/bio/db/newick.rb - Unit test for Bio::Newick # # Copyright:: Copyright (C) 2004-2006 # Daniel Amelang # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/tree' require 'bio/db/newick' module Bio class TestNewick < Test::Unit::TestCase TREE_STRING = <<-END_OF_TREE_STRING ( ( HexLEZ35:0.00263, HexMCZ42:0.00788 ):0.00854, ( HexFLZ48:0.00457, ( HexFLZ83:0.00217, HexFLZ13:0.00574 ):0.00100 ):0.04692, HexLEZ73:0.00268 )[0.1250]; END_OF_TREE_STRING def setup @newick = Bio::Newick.new(TREE_STRING) end def test_string_tree tree = @newick.tree assert_equal(3, tree.children(tree.root).size) assert_equal(9, tree.descendents(tree.root).size) assert_equal(6, tree.leaves.size) leaf = tree.get_node_by_name('HexFLZ83') assert_equal(3, tree.ancestors(leaf).size) assert_equal(tree.path(tree.root, leaf)[1], tree.ancestors(leaf)[1]) assert_equal(0.00217, tree.get_edge(leaf, tree.parent(leaf)).distance) assert_equal("HexFLZ83", leaf.name) end def test_reparse tree = @newick.tree tree_text = tree.output(:newick) assert_equal(@newick, @newick.reparse) assert_equal(tree_text, @newick.tree.output(:newick)) end def test_reparse_before_lazy_parsing # not to use @newick to guarantee that the Newick object # is before lazy parsing. newick = Bio::Newick.new(TREE_STRING) assert_equal(newick, newick.reparse) end end #class TestNewick class TestNewick2 < Test::Unit::TestCase TREE_STRING = <<-END_OF_TREE_STRING ( ( 'this is test':0.0625, 'test2 (abc, def)':0.125 ) 'internal node''s name' : 0.25, ( '''':0.03125, ( 'ABCAC_HUMAN [ABC superfamily]':0.015625, hypothetical_protein:0.5 ) ABC : 0.25 [99] ) test3 :0.5 )root; END_OF_TREE_STRING def setup @newick = Bio::Newick.new(TREE_STRING) end def test_string_tree tree = @newick.tree assert_equal('root', tree.root.name) assert_equal([ "this is test", "test2 (abc, def)", "internal node\'s name", "\'", "ABCAC_HUMAN [ABC superfamily]", "hypothetical protein", "ABC", "test3", "root" ].sort, tree.nodes.collect { |x| x.name }.sort) assert_equal(tree.children(tree.root).collect { |x| x.name }.sort, [ "internal node\'s name", "test3" ]) node = tree.get_node_by_name('ABC') assert_equal(99, node.bootstrap) assert_equal(1.5625, tree.distance(tree.get_node_by_name('hypothetical protein'), tree.get_node_by_name('this is test'))) end end #class TestNewick2 class TestNewickPrivate < Test::Unit::TestCase def setup @newick = Bio::Newick.new('') # dummy data end def test_parse_newick_leaf leaf_tokens = [ "A:B _C(D,E)F\'s G[H]", :":", '0.5', :"[", "&&NHX", :":", "S=human", :":", "E=1.1.1.1", :"]" ] node = Bio::Tree::Node.new edge = Bio::Tree::Edge.new options = {} assert_equal(true, @newick.instance_eval do __parse_newick_leaf(leaf_tokens, node, edge, options) end) assert_equal(:nhx, @newick.options[:original_format]) assert_equal("A:B _C(D,E)F\'s G[H]", node.name) assert_equal("human", node.scientific_name) assert_equal("1.1.1.1", node.ec_number) assert_equal(0.5, edge.distance) end def test_parse_newick_get_tokens_for_leaf input = [ "A:B _C(D,E)F\'s G[H]", :":", '0.5', :"[", "&&NHX", :":", "S=human", :":", "E=1.1.1.1", :"]", :",", :"(", "bbb", :":", "0.2", :")" ] leaf_should_be = [ "A:B _C(D,E)F\'s G[H]", :":", '0.5', :"[", "&&NHX", :":", "S=human", :":", "E=1.1.1.1", :"]" ] rest_should_be = [ :",", :"(", "bbb", :":", "0.2", :")" ] assert_equal(leaf_should_be, @newick.instance_eval do __parse_newick_get_tokens_for_leaf(input) end) assert_equal(rest_should_be, input) end def test_parse_newick_tokenize examples = [ [ '(a,b);', # input [ :"(", 'a', :",", 'b', :")" ], # normal parser result [ :"(", 'a', :",", 'b', :")" ], # naive parser result ], [ # input "(\'A:B _C(D,E)F\'\'s G[H]\':0.5[&&NHX:S=human:E=1.1.1.1], \n(bbb:0.2, c_d_e[&&NHX:B=100]);", # normal parser result [ :"(", "A:B _C(D,E)F\'s G[H]", :":", '0.5', :"[", "&&NHX", :":", "S=human", :":", "E=1.1.1.1", :"]", :",", :"(", "bbb", :":", "0.2", :",", "c d e", :"[", "&&NHX", :":", "B=100", :"]", :")" ], # naive parser result [ :"(", "\'A", :":", "B _C", :"(", "D", :",", "E", :")", "F\'\'s G", :"[", "H", :"]", "\'", :":", '0.5', :"[", "&&NHX", :":", "S=human", :":", "E=1.1.1.1", :"]", :",", :"(", "bbb", :":", "0.2", :",", "c_d_e", :"[", "&&NHX", :":", "B=100", :"]", :")" ] ] ] examples.each do |a| # normal parser assert_equal(a[1], @newick.instance_eval do __parse_newick_tokenize(a[0], {}) end) # naive parser assert_equal(a[2], @newick.instance_eval do __parse_newick_tokenize(a[0], { :parser => :naive }) end) end end end #class TestNewickPrivate class TestBioTreeOutputPrivate < Test::Unit::TestCase def setup @tree = Bio::Tree.new end def test_to_newick_format_label # unquoted_label assert_equal('ABC', @tree.instance_eval do __to_newick_format_label('ABC', {}) end) # unquoted_label, replaces blank to underscore assert_equal('A_B_C', @tree.instance_eval do __to_newick_format_label('A B C', {}) end) # quoted_label example 1 assert_equal("\'A B_C\'", @tree.instance_eval do __to_newick_format_label('A B_C', {}) end) # quoted_label example 2 assert_equal("\'A(B),C\'", @tree.instance_eval do __to_newick_format_label('A(B),C', {}) end) # normal formatter assert_equal("\'A_B_C\'", @tree.instance_eval do __to_newick_format_label('A_B_C', {}) end) # naive formatter assert_equal("A_B_C", @tree.instance_eval do __to_newick_format_label('A_B_C', { :parser => :naive }) end) end def test_to_newick_format_leaf node = Bio::Tree::Node.new('ABC') edge = Bio::Tree::Edge.new(0.5) assert_equal('ABC:0.5', @tree.instance_eval do __to_newick_format_leaf(node, edge, {}) end) # disable branch length assert_equal('ABC', @tree.instance_eval do __to_newick_format_leaf(node, edge, { :branch_length_style => :disabled }) end) node.bootstrap = 98 # default: molphy style bootstrap assert_equal('ABC:0.5[98]', @tree.instance_eval do __to_newick_format_leaf(node, edge, {}) end) # force molphy style bootstrap assert_equal('ABC:0.5[98]', @tree.instance_eval do __to_newick_format_leaf(node, edge, { :bootstrap_style => :molphy }) end) # disable bootstrap output assert_equal('ABC:0.5', @tree.instance_eval do __to_newick_format_leaf(node, edge, { :bootstrap_style => :disabled }) end) # force traditional bootstrap style assert_equal('ABC98:0.5', @tree.instance_eval do __to_newick_format_leaf(node, edge, { :bootstrap_style => :traditional }) end) # normally, when traditional style, no node name allowed for the node node2 = Bio::Tree::Node.new node2.bootstrap = 98 assert_equal('98:0.5', @tree.instance_eval do __to_newick_format_leaf(node2, edge, { :bootstrap_style => :traditional }) end) end def test_to_newick_format_leaf_NHX node = Bio::Tree::Node.new('ADH') edge = Bio::Tree::Edge.new(0.5) node.bootstrap = 98 node.ec_number = '1.1.1.1' node.scientific_name = 'human' node.taxonomy_id = '9606' node.events.push :gene_duplication edge.log_likelihood = 1.5 edge.width = 3 str = 'ADH:0.5[&&NHX:B=98:D=Y:E=1.1.1.1:L=1.5:S=human:T=9606:W=3]' assert_equal(str, @tree.instance_eval do __to_newick_format_leaf_NHX(node, edge, {}) end) end end #class TestBioTreeOutputPrivate end #module Bio bio-1.4.3.0001/test/unit/bio/db/fasta/0000755000004100000410000000000012200110570017055 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/fasta/test_defline_misc.rb0000644000004100000410000003060712200110570023070 0ustar www-datawww-data# # test/unit/bio/db/fasta/test_defline_misc.rb - Unit test for Bio::FastaDefline # # Copyright:: Copyright (C) 2010 # John Prince # # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/fasta/defline' module Bio class TestFastaDeflineGI1 < Test::Unit::TestCase def setup definition_line = '>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]' @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal('gi|671595', @defline.entry_id) end def test_emb assert_equal('CAA85678.1', @defline.emb) end def test_get_emb assert_equal('CAA85678.1', @defline.get('emb')) end def test_gi assert_equal('671595', @defline.gi) end def test_accession assert_equal('CAA85678', @defline.accession) end def test_accessions assert_equal(['CAA85678'], @defline.accessions) end def test_acc_version assert_equal('CAA85678.1', @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["gi", "671595"], ["emb", "CAA85678.1", nil], ["Perovskia abrotanoides"]], @defline.list_ids) end def test_description assert_equal('rubisco large subunit [Perovskia abrotanoides]', @defline.description) end def test_descriptions assert_equal(['rubisco large subunit [Perovskia abrotanoides]'], @defline.descriptions) end def test_words assert_equal(["abrotanoides", "large", "perovskia", "rubisco", "subunit"], @defline.words) end def test_id_strings assert_equal(["671595", "CAA85678.1"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineGIMultiple < Test::Unit::TestCase def setup definition_line = ">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]" @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal("gi|2495000", @defline.entry_id) end def test_gi assert_equal("2495000", @defline.gi) end def test_accession assert_equal("AAB29504", @defline.accession) end def test_accessions assert_equal(["Q63931", "AAB29504"], @defline.accessions) end def test_acc_version assert_equal("AAB29504.1", @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"], ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"], ["gb", "AAB29504.1", nil], ["Cavia"]], @defline.list_ids) end def test_description assert_equal("CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", @defline.description) end def test_descriptions assert_equal(["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", "cholecystokinin A receptor - guinea pig", "cholecystokinin A receptor; CCK-A receptor [Cavia]"], @defline.descriptions) end def test_words assert_equal(["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", "receptor", "type"], @defline.words) end def test_id_strings assert_equal(["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", "544724", "AAB29504.1", "Cavia"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineGI2 < Test::Unit::TestCase def setup definition_line = '>gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P' @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal("gi|9910844", @defline.entry_id) end def test_gi assert_equal("9910844", @defline.gi) end def test_sp assert_equal('RL3_METVA', @defline.sp) end def test_accession assert_equal("Q9UWG2", @defline.accession) end def test_accessions assert_equal(["Q9UWG2"], @defline.accessions) end def test_acc_version assert_equal(nil, @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["gi", "9910844"], ["sp", "Q9UWG2", "RL3_METVA"]], @defline.list_ids) end def test_description assert_equal("50S ribosomal protein L3P", @defline.description) end def test_descriptions assert_equal(["50S ribosomal protein L3P"], @defline.descriptions) end def test_words assert_equal(["50s", "ribosomal"], @defline.words) end def test_id_strings assert_equal(["9910844", "Q9UWG2", "RL3_METVA", "L3P"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineSce < Test::Unit::TestCase def setup definition_line = '>sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]' @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal("sce:YBR160W", @defline.entry_id) end def test_gi assert_equal(nil, @defline.gi) end def test_accession assert_equal(nil, @defline.accession) end def test_accessions assert_equal([], @defline.accessions) end def test_acc_version assert_equal(nil, @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["sce", "YBR160W"], ["EC", "2.7.1.-"], ["SP", "CC28_YEAST"]], @defline.list_ids) end def test_description assert_equal("CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @defline.description) end def test_descriptions assert_equal(["CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]"], @defline.descriptions) end def test_words assert_equal(["catalytic", "cyclin-dependent", "kinase", "srm5", "subunit"], @defline.words) end def test_id_strings assert_equal(["YBR160W", "2.7.1.-", "CC28_YEAST", "CC28_YEAST", "CDC28"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineEmb < Test::Unit::TestCase def setup definition_line = '>emb:CACDC28 [X80034] C.albicans CDC28 gene' @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal("emb:CACDC28", @defline.entry_id) end def test_gi assert_equal(nil, @defline.gi) end def test_accession assert_equal("CACDC28", @defline.accession) end def test_accessions assert_equal(["CACDC28"], @defline.accessions) end def test_acc_version assert_equal("CACDC28", @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["emb", "CACDC28"], ["X80034"]], @defline.list_ids) end def test_description assert_equal("[X80034] C.albicans CDC28 gene", @defline.description) end def test_descriptions assert_equal(["[X80034] C.albicans CDC28 gene"], @defline.descriptions) end def test_words assert_equal(["albicans"], @defline.words) end def test_id_strings assert_equal(["CACDC28", "X80034", "CDC28", "X80034"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineSimple < Test::Unit::TestCase def setup definition_line = '>ABC12345 this is test' @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal("ABC12345", @defline.entry_id) end def test_gi assert_equal(nil, @defline.gi) end def test_accession assert_equal(nil, @defline.accession) end def test_accessions assert_equal([], @defline.accessions) end def test_acc_version assert_equal(nil, @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["ABC12345"]], @defline.list_ids) end def test_description assert_equal("this is test", @defline.description) end def test_descriptions assert_equal(["this is test"], @defline.descriptions) end def test_words assert_equal(["test"], @defline.words) end def test_id_strings assert_equal(["ABC12345"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineSwissProt < Test::Unit::TestCase def setup definition_line = '>sp|P05100|3MG1_ECOLI DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1' @defline = FastaDefline.new(definition_line) end def test_entry_id assert_equal('sp|P05100|3MG1_ECOLI', @defline.entry_id ) end def test_get assert_equal('3MG1_ECOLI', @defline.get('sp') ) end def test_sp assert_equal('3MG1_ECOLI', @defline.sp ) end def test_accession assert_equal("P05100", @defline.accession) end def test_accessions assert_equal(["P05100"], @defline.accessions) end def test_acc_version assert_equal(nil, @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["sp", "P05100", "3MG1_ECOLI"]], @defline.list_ids) end def test_description assert_equal("DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1", @defline.description) end def test_descriptions assert_equal(["DNA-3-methyladenine glycosylase 1 OS=Escherichia coli (strain K12) GN=tag PE=1 SV=1"], @defline.descriptions) end def test_words assert_equal(["coli", "dna-3-methyladenine", "glycosylase", "gn=tag", "os=escherichia", "pe=1", "sv=1"], @defline.words) end def test_id_strings assert_equal(["P05100", "3MG1_ECOLI", "K12"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end class TestFastaDeflineTrembl < Test::Unit::TestCase def setup definition_line = '>tr|C8URF0|C8URF0_ECO1A Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1' @defline = Bio::FastaDefline.new(definition_line) end def test_entry_id assert_equal('tr|C8URF0|C8URF0_ECO1A', @defline.entry_id ) end def test_get assert_equal('C8URF0_ECO1A', @defline.get('tr') ) end def test_tr assert_equal('C8URF0_ECO1A', @defline.tr ) end def test_accession assert_equal("C8URF0", @defline.accession) end def test_accessions assert_equal(["C8URF0"], @defline.accessions) end def test_acc_version assert_equal(nil, @defline.acc_version) end def test_locus assert_equal(nil, @defline.locus) end def test_list_ids assert_equal([["tr", "C8URF0", "C8URF0_ECO1A"]], @defline.list_ids) end def test_description assert_equal("Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1", @defline.description) end def test_descriptions assert_equal(["Conserved predicted plasmid protein ECsL50 OS=Escherichia coli O111:H- (strain 11128 / EHEC) GN=ECO111_p3-39 PE=4 SV=1"], @defline.descriptions) end def test_words assert_equal(["11128", "coli", "conserved", "ehec", "gn=eco111_p3-39", "os=escherichia", "pe=4", "plasmid", "predicted", "sv=1"], @defline.words) end def test_id_strings assert_equal(["C8URF0", "C8URF0_ECO1A", "ECsL50", "O111"], @defline.id_strings) end def test_get_all_by_type assert_equal([], @defline.get_all_by_type) end end end bio-1.4.3.0001/test/unit/bio/db/fasta/test_format_qual.rb0000644000004100000410000003040712200110570022757 0ustar www-datawww-data# # test/unit/bio/db/fasta/test_format_qual.rb - Unit test for Bio::Sequence::Format::Formatter::Fasta_numeric and Qual # # Copyright:: Copyright (C) 2009 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/db/fasta/format_qual' module Bio class TestSequenceFormatFormatterFasta_numeric < Test::Unit::TestCase def setup @sequence = Bio::Sequence.new('acgt' * 50 + 'a') @sequence.quality_scores = (-100..100).to_a.freeze @sequence.entry_id = 'TEST0001' @sequence.definition = 'this is test' end def test_output expected = <<_END_EXPECTED_ >TEST0001 this is test -100 -99 -98 -97 -96 -95 -94 -93 -92 -91 -90 -89 -88 -87 -86 -85 -84 -83 -82 -81 -80 -79 -78 -77 -76 -75 -74 -73 -72 -71 -70 -69 -68 -67 -66 -65 -64 -63 -62 -61 -60 -59 -58 -57 -56 -55 -54 -53 -52 -51 -50 -49 -48 -47 -46 -45 -44 -43 -42 -41 -40 -39 -38 -37 -36 -35 -34 -33 -32 -31 -30 -29 -28 -27 -26 -25 -24 -23 -22 -21 -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 _END_EXPECTED_ str = @sequence.output(:fasta_numeric) assert_equal(expected, str) # default width is 70 str = @sequence.output(:fasta_numeric, { :width => 70 }) assert_equal(expected, str) # Modifying the sequence does not affect the output. @sequence.delete!('a') str = @sequence.output(:fasta_numeric) assert_equal(expected, str) end def test_output_width_35 expected = <<_END_OF_EXPECTED_ >TEST0001 this is test -100 -99 -98 -97 -96 -95 -94 -93 -92 -91 -90 -89 -88 -87 -86 -85 -84 -83 -82 -81 -80 -79 -78 -77 -76 -75 -74 -73 -72 -71 -70 -69 -68 -67 -66 -65 -64 -63 -62 -61 -60 -59 -58 -57 -56 -55 -54 -53 -52 -51 -50 -49 -48 -47 -46 -45 -44 -43 -42 -41 -40 -39 -38 -37 -36 -35 -34 -33 -32 -31 -30 -29 -28 -27 -26 -25 -24 -23 -22 -21 -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 _END_OF_EXPECTED_ str = @sequence.output(:fasta_numeric, { :width => 35 }) assert_equal(expected, str) end def test_output_width_nil expected = ">TEST0001 this is test\n" + (-100..100).collect { |x| x.to_s }.join(' ') + "\n" str = @sequence.output(:fasta_numeric, { :width => nil }) assert_equal(expected, str) end end #clsaa TestSequenceFormatFormatterFasta_numeric class TestSequenceFormatFormatterQual < Test::Unit::TestCase def setup @sequence = Bio::Sequence.new('acgt' * 28) @sequence.quality_scores = [ -100, *(-10..100).to_a ].freeze @sequence.entry_id = 'TEST0001' @sequence.definition = 'this is test' end def test_output expected = <<_END_EXPECTED_ >TEST0001 this is test -100 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 _END_EXPECTED_ str = @sequence.output(:qual) assert_equal(expected, str) # default width is 70 str = @sequence.output(:qual, { :width => 70 }) assert_equal(expected, str) end def test_output_width45 expected = <<_END_EXPECTED_ >TEST0001 this is test -100 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 _END_EXPECTED_ str = @sequence.output(:qual, { :width => 45 }) assert_equal(expected, str) end def test_output_after_truncating_sequence expected = <<_END_EXPECTED_ >TEST0001 this is test -100 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 _END_EXPECTED_ # Modifying the sequence affects the output. @sequence.delete!('a') str = @sequence.output(:qual) assert_equal(expected, str) end def test_output_after_adding_sequence expected = <<_END_EXPECTED_ >TEST0001 this is test -100 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _END_EXPECTED_ # Modifying the sequence affects the output. @sequence.gsub!(/a/, 'at') str = @sequence.output(:qual) assert_equal(expected, str) end def test_output_with_default_score expected = <<_END_EXPECTED_ >TEST0001 this is test -100 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 -10 -10 -10 -10 -10 -10 -10 -10 _END_EXPECTED_ # Modifying the sequence affects the output. @sequence.concat('aaaatttt') str = @sequence.output(:qual, { :default_score => -10 }) assert_equal(expected, str) end def test_output_with_converting_score_solexa2phred expected = <<_END_EXPECTED_ >TEST0001 this is test 0 0 1 1 1 1 1 1 2 2 3 3 4 4 5 5 6 7 8 9 10 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 -99 -99 -99 -99 _END_EXPECTED_ @sequence.quality_score_type = :solexa @sequence.concat('aaaa') str = @sequence.output(:qual, { :default_score => -99 }) assert_equal(expected, str) end def test_output_with_converting_score_phred2solexa expected = <<_END_EXPECTED_ >TEST0001 this is test -6 -2 0 2 3 5 6 7 8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 _END_EXPECTED_ @sequence.quality_score_type = :phred @sequence.quality_scores = @sequence.quality_scores.find_all { |x| x > 0 } str = @sequence.output(:qual, { :default_score => -99, :quality_score_type => :solexa }) assert_equal(expected, str) # If @sequence.quality_score_type == nil, :phred is assumed. @sequence.quality_score_type = nil str = @sequence.output(:qual, { :default_score => -75, :quality_score_type => :solexa }) expected2 = expected.gsub(/ \-99/, ' -75') assert_equal(expected2, str) end def test_output_from_error_probabilities # @sequence.quality_scores expected_qsc = <<_END_EXPECTED_ >TEST0001 this is test -100 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 _END_EXPECTED_ # @sequence.error_probabilities to phred score expected_ep_phred = <<_END_EXPECTED_ >TEST0001 this is test 0 3 10 18 28 39 51 63 76 90 104 119 134 149 165 181 197 213 230 247 264 282 300 317 336 354 372 391 409 428 447 467 486 505 525 545 565 585 605 625 645 666 686 707 727 748 769 790 811 832 854 875 897 918 940 962 983 1005 1027 1049 1071 1093 1116 1138 1160 1183 1205 1228 1250 1273 1296 1319 1342 1365 1388 1411 1434 1457 1480 1503 1527 1550 1574 1597 1621 1644 1668 1692 1715 1739 1763 1787 1811 1835 1859 1883 1907 1931 1956 1980 2004 2029 2053 2078 2102 2127 2151 2176 2200 2225 2250 2275 _END_EXPECTED_ # @sequence.error_probabilities to phred score expected_ep_solexa = <<_END_EXPECTED_ >TEST0001 this is test -80 0 9 18 28 39 51 63 76 90 104 119 134 149 165 181 197 213 230 247 264 282 300 317 336 354 372 391 409 428 447 467 486 505 525 545 565 585 605 625 645 666 686 707 727 748 769 790 811 832 854 875 897 918 940 962 983 1005 1027 1049 1071 1093 1116 1138 1160 1183 1205 1228 1250 1273 1296 1319 1342 1365 1388 1411 1434 1457 1480 1503 1527 1550 1574 1597 1621 1644 1668 1692 1715 1739 1763 1787 1811 1835 1859 1883 1907 1931 1956 1980 2004 2029 2053 2078 2102 2127 2151 2176 2200 2225 2250 2275 _END_EXPECTED_ @sequence.error_probabilities = (0...(@sequence.length)).collect { |i| ((i + 1) ** -i) } # Because Solexa score does not allow 1. @sequence.error_probabilities[0] = 0.99999999 # @sequence.quality_score_type: nil # output :qual, :quality_score_type => (not set) # # ==> using @sequence.quality_scores # @sequence.quality_score_type = nil str = @sequence.output(:qual) assert_equal(expected_qsc, str) # @sequence.quality_score_type: :phred # output :qual, :quality_score_type => (not set) # # ==> using @sequence.error_probabilities # @sequence.quality_score_type = :phred str = @sequence.output(:qual) assert_equal(expected_ep_phred, str) # @sequence.quality_score_type: nil # output :qual, :quality_score_type => :phred # # ==> using @sequence.error_probabilities # @sequence.quality_score_type = nil str = @sequence.output(:qual, :quality_score_type => :phred) assert_equal(expected_ep_phred, str) # @sequence.quality_score_type: :phred # output :qual, :quality_score_type => :solexa # # ==> using @sequence.error_probabilities # @sequence.quality_score_type = :phred str = @sequence.output(:qual, :quality_score_type => :solexa) assert_equal(expected_ep_solexa, str) # @sequence.quality_score_type: :solexa # output :qual, :quality_score_type => :phred # # ==> using @sequence.error_probabilities # @sequence.quality_score_type = :solexa str = @sequence.output(:qual, :quality_score_type => :phred) assert_equal(expected_ep_phred, str) # @sequence.quality_score_type: :phred # output :qual, :quality_score_type => :phred # # ==> using @sequence.quality_scores # @sequence.quality_score_type = :phred str = @sequence.output(:qual, :quality_score_type => :phred) assert_equal(expected_qsc, str) # After removing @sequence.quality_scores: # @sequence.quality_score_type: :phred # output :qual, :quality_score_type => :phred # # ==> using @sequence.error_probabilities # @sequence.quality_scores = nil @sequence.quality_score_type = :phred str = @sequence.output(:qual, :quality_score_type => :phred) assert_equal(expected_ep_phred, str) end end #class TestSequenceFormatFormatterQual end #module Bio bio-1.4.3.0001/test/unit/bio/db/fasta/test_defline.rb0000644000004100000410000001742312200110570022056 0ustar www-datawww-data# # test/unit/bio/db/fasta/test_defline.rb - Unit test for Bio::FastaDefline # # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/fasta/defline' module Bio class TestBioFastaDefline < Test::Unit::TestCase def setup #test for all the patterns. @rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]') @ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]") #from an exaple in the class file @sce = Bio::FastaDefline.new(">sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]") #from an exaple in the class file @emb = Bio::FastaDefline.new(">emb:CACDC28 [X80034] C.albicans CDC28 gene") #from an exaple in the class file @abc = Bio::FastaDefline.new(">ABC12345 this is test") #from an exaple in the class file @etc = Bio::FastaDefline.new(">fasta1") # In this case, the defline has only a id like string? #test for the other formats end def test_entry_id assert_equal("gi|671595", @rub.entry_id) assert_equal("gi|2495000", @ckr.entry_id) assert_equal("sce:YBR160W", @sce.entry_id) assert_equal("emb:CACDC28", @emb.entry_id) assert_equal("ABC12345", @abc.entry_id) assert_equal("fasta1", @etc.entry_id) end def test_to_s assert_equal("gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]", @rub.to_s) assert_equal("gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]", @ckr.to_s) assert_equal("sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @sce.to_s) assert_equal("emb:CACDC28 [X80034] C.albicans CDC28 gene", @emb.to_s) assert_equal("ABC12345 this is test", @abc.to_s) assert_equal("fasta1", @etc.to_s) end def test_description assert_equal("rubisco large subunit [Perovskia abrotanoides]", @rub.description) assert_equal("CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", @ckr.description) assert_equal("CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @sce.description) assert_equal("[X80034] C.albicans CDC28 gene", @emb.description) assert_equal("this is test", @abc.description) assert_equal("", @etc.description) end def test_descriptions assert_equal(["rubisco large subunit [Perovskia abrotanoides]"], @rub.descriptions) assert_equal(["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", "cholecystokinin A receptor - guinea pig", "cholecystokinin A receptor; CCK-A receptor [Cavia]"], @ckr.descriptions) assert_equal(["CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]"], @sce.descriptions) assert_equal(["[X80034] C.albicans CDC28 gene"], @emb.descriptions) assert_equal("this is test", @abc.description) assert_equal("", @etc.description) #this result that return a string is correct? end def test_id_strings assert_equal(["671595", "CAA85678.1"], @rub.id_strings) assert_equal(["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", "544724", "AAB29504.1", "Cavia"], @ckr.id_strings) assert_equal(["YBR160W", "2.7.1.-", "CC28_YEAST", "CC28_YEAST", "CDC28"], @sce.id_strings) assert_equal(["CACDC28", "X80034", "CDC28", "X80034"] , @emb.id_strings) #this result that return "X80034" twice is correct? assert_equal(["ABC12345"], @abc.id_strings) assert_equal(["fasta1"], @etc.id_strings) end def test_words assert_equal(["abrotanoides", "large", "perovskia", "rubisco", "subunit"], @rub.words) assert_equal(["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", "receptor", "type"], @ckr.words) assert_equal(["catalytic", "cyclin-dependent", "kinase", "srm5", "subunit"], @sce.words) assert_equal(["albicans"], @emb.words) #this result that return "X80034" twice is correct? assert_equal(["test"], @abc.words) assert_equal([], @etc.words) assert_equal(["CCK-A", "CCK-AR", "CHOLECYSTOKININ", "Cavia", "RECEPTOR", "TYPE", "cholecystokinin", "guinea", "pig", "receptor"], @ckr.words(true)) #case sensitive #probably, it need not check changes in second and third arguments. end def test_get #get each db from each pattern except the duplicate. assert_equal("671595", @rub.get("gi")) assert_equal("CCKR_CAVPO", @ckr.get("sp")) assert_equal("I51898", @ckr.get("pir")) assert_equal("AAB29504.1", @ckr.get("gb")) assert_equal("YBR160W", @sce.get("sce")) assert_equal("2.7.1.-", @sce.get("EC")) assert_equal("CC28_YEAST", @sce.get("SP")) assert_equal("CACDC28", @emb.get("emb")) #the other dbs end def test_get_by_type #specify each type in each pattern while refering to NSIDs. assert_equal("671595", @rub.get_by_type("gi")) assert_equal("CAA85678.1", @rub.get_by_type("acc_version")) assert_equal(nil, @rub.get_by_type("locus")) assert_equal("Q63931", @ckr.get_by_type("accession")) assert_equal("CCKR_CAVPO", @ckr.get_by_type("entry_id")) end def test_get_all_by_type #specify each type in each pattern while refering to NSIDs. assert_equal(["671595", "CAA85678.1"], @rub.get_all_by_type("gi","acc_version","locus")) assert_equal(["Q63931", "CCKR_CAVPO", "I51898"], @ckr.get_all_by_type("accession","entry_id")) end def test_locus #Any of the examples don't have the locus information ... assert_equal(nil, @rub.locus) end def test_gi assert_equal("671595", @rub.gi) assert_equal("2495000", @ckr.gi) assert_equal(nil, @sce.gi) #sce dosen't have "gi" in the type. end def test_acc_version assert_equal("CAA85678.1", @rub.acc_version) assert_equal("AAB29504.1", @ckr.acc_version) assert_equal("CACDC28", @emb.acc_version) end def test_accessions assert_equal(["CACDC28"], @emb.accessions) assert_equal(["CAA85678"], @rub.accessions) assert_equal(["Q63931", "AAB29504"], @ckr.accessions) assert_raise(RuntimeError){@sce.accesions} #sce dosen't have "accession" in the type. end def test_accession assert_equal("CACDC28", @emb.accession) assert_equal("CAA85678", @rub.accession) assert_equal("AAB29504", @ckr.accession) assert_raise(RuntimeError){@sce.accesion} #sce dosen't have "accession" in the type. # to cover the else statement ckr2 = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)") #from an exaple in the class file assert_equal("Q63931", ckr2.accession) end def test_method_missing #Methods specified with the types are tested only in this test metho.d assert_equal("CCKR_CAVPO", @ckr.sp) assert_equal("I51898", @ckr.pir) assert_equal("AAB29504.1", @ckr.gb) assert_equal("YBR160W", @sce.sce) assert_equal("2.7.1.-", @sce.EC) assert_equal("CC28_YEAST", @sce.SP) assert_equal("CACDC28", @emb.emb) end end #class TestBioFastaDefline end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_aaindex.rb0000644000004100000410000001405512200110570020761 0ustar www-datawww-data# # test/unit/bio/db/test_aaindex.rb - Unit test for Bio::AAindex # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/aaindex' module Bio class DataAAindex TestDataAAindex = Pathname.new(File.join(BioRubyTestDataPath, 'aaindex')).cleanpath.to_s def self.aax1 File.read(File.join(TestDataAAindex, "PRAM900102")) end def self.aax2 File.read(File.join(TestDataAAindex, "DAYM780301")) end end # A super class for Bio::AAindex1 and Bio::AAindex2 class TestAAindexConstant < Test::Unit::TestCase def test_delimiter rs = "\n//\n" assert_equal(rs, Bio::AAindex::DELIMITER) assert_equal(rs, Bio::AAindex::RS) end def test_tagsize assert_equal(2, Bio::AAindex::TAGSIZE) end end class TestAAindex < Test::Unit::TestCase def test_api api_methods = ['entry_id', 'definition', 'dblinks', 'author', 'title', 'journal', 'comment'] api_methods.each do |m| end end def test_auto_aax1 assert_equal(Bio::AAindex1, Bio::AAindex.auto(DataAAindex.aax1).class) end def test_auto_aax2 assert_equal(Bio::AAindex2, Bio::AAindex.auto(DataAAindex.aax2).class) end end class TestAAindex1 < Test::Unit::TestCase def setup str = DataAAindex.aax1 @obj = Bio::AAindex1.new(str) end def test_entry_id assert_equal('PRAM900102', @obj.entry_id) end def test_definition assert_equal('Relative frequency in alpha-helix (Prabhakaran, 1990)', @obj.definition) end def test_dblinks assert_equal(['LIT:1614053b', 'PMID:2390062'], @obj.dblinks) end def test_author assert_equal('Prabhakaran, M.', @obj.author) end def test_title assert_equal('The distribution of physical, chemical and conformational properties in signal and nascent peptides', @obj.title) end def test_journal assert_equal('Biochem. J. 269, 691-696 (1990) Original reference of these three data: Creighton, T.E. In "Protein Structure and Melecular Properties", (Freeman, W.H., ed.), San Francisco P.235 (1983)', @obj.journal) end def test_comment assert_equal("", @obj.comment) end def test_correlation_coefficient # str = "LEVM780101 1.000 LEVM780104 0.964 PALJ810101 0.943 KANM800101 0.942 ISOY800101 0.929 MAXF760101 0.924 ROBB760101 0.916 GEIM800101 0.912 GEIM800104 0.907 RACS820108 0.904 PALJ810102 0.902 PALJ810109 0.898 NAGK730101 0.894 CRAJ730101 0.887 CHOP780201 0.873 TANS770101 0.854 KANM800103 0.850 QIAN880107 0.829 QIAN880106 0.827 BURA740101 0.805 NAGK730103 -0.809" # assert_equal(str, @obj.correlation_coefficient) # to be this ? hash = {'LEVM780101' => 1.000, 'LEVM780104' => 0.964, 'PALJ810101' => 0.943, 'KANM800101' => 0.942, 'ISOY800101' => 0.929, 'MAXF760101' => 0.924, 'ROBB760101' => 0.916, 'GEIM800101' => 0.912, 'GEIM800104' => 0.907, 'RACS820108' => 0.904, 'PALJ810102' => 0.902, 'PALJ810109' => 0.898, 'NAGK730101' => 0.894, 'CRAJ730101' => 0.887, 'CHOP780201' => 0.873, 'TANS770101' => 0.854, 'KANM800103' => 0.850, 'QIAN880107' => 0.829, 'QIAN880106' => 0.827, 'BURA740101' => 0.805, 'NAGK730103' => -0.809} assert_equal(hash, @obj.correlation_coefficient) end def test_index hash = {"V"=>0.91, "K"=>1.23, "W"=>0.99, "L"=>1.3, "A"=>1.29, "M"=>1.47, "Y"=>0.72, "C"=>1.11, "N"=>0.9, "D"=>1.04, "P"=>0.52, "E"=>1.44, "F"=>1.07, "Q"=>1.27, "G"=>0.56, "R"=>0.96, "S"=>0.82, "H"=>1.22, "T"=>0.82, "I"=>0.97} assert_equal(hash, @obj.index) end end class TestAAindex2 < Test::Unit::TestCase def setup str = DataAAindex.aax2 @obj = Bio::AAindex2.new(str) end def test_entry_id assert_equal('DAYM780301', @obj.entry_id) end def test_definition assert_equal('Log odds matrix for 250 PAMs (Dayhoff et al., 1978)', @obj.definition) end def test_dblinks assert_equal([], @obj.dblinks) end def test_author assert_equal("Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C.", @obj.author) end def test_title assert_equal("A model of evolutionary change in proteins", @obj.title) end def test_journal assert_equal('In "Atlas of Protein Sequence and Structure", Vol.5, Suppl.3 (Dayhoff, M.O., ed.), National Biomedical Research Foundation, Washington, D.C., p.352 (1978)', @obj.journal) end def test_comment assert_equal("", @obj.comment) end def test_rows ary = ["A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"] assert_equal(ary, @obj.rows) end def test_cols ary = ["A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"] assert_equal(ary, @obj.cols) end def test_matrix assert_equal(Matrix, @obj.matrix.class) end def test_matrix_2_2 assert_equal(2.0, @obj.matrix[2, 2]) end def test_matrix_1_2 assert_equal(0.0, @obj.matrix[1, 2]) end def test_access_A_R assert_equal(-2.0, @obj['A', 'R']) end def test_access_R_A assert_equal(-2.0, @obj['R', 'A']) end def test_matrix_A_R assert_equal(-2.0, @obj.matrix('A', 'R')) end def test_matrix_R_A assert_equal(-2.0, @obj.matrix('R', 'A')) end def test_matrix_determinant assert_in_delta(27926521998.0, @obj.matrix.determinant, 1e-3) end def test_matrix_rank assert_equal(20, @obj.matrix.rank) end def test_matrix_transpose assert_equal(@obj.matrix, @obj.matrix.transpose) ary = Matrix.row_vector([2.0, -2.0, 0.0, 0.0, -2.0, 0.0, 0.0, 1.0, -1.0, -1.0, -2.0, -1.0, -1.0, -4.0, 1.0, 1.0, 1.0, -6.0, -3.0, 0.0]).row(0) assert_equal(ary, @obj.matrix.transpose.row(0)) end end end bio-1.4.3.0001/test/unit/bio/db/test_litdb.rb0000644000004100000410000000511212200110570020440 0ustar www-datawww-data# # test/unit/bio/db/litdb.rb - Unit test for Bio::LITDB # # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/litdb' require 'bio/reference' module Bio class TestBioLITDB < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'litdb', '1717226.litdb') @obj = Bio::LITDB.new(File.read(filename)) end # it return the reference infromation formatted as part of a Bio::Reference object. def test_reference expected = {:authors=> ["Boyd, L.A.", "Adam, L.", "Pelcher, L.E.", "McHughen, A.", "Hirji, R.", " Selvaraj, G."], :issue=>"1", :journal=>"Gene", :pages=>"45-52", :title=> "Characterization of an Escherichia coli gene encoding betaine aldehyde dehydrogenase (BADH). Structural similarity to mammalian ALDHs and a plant BADH.", :volume=>"103", :year=>"(1991)"} litdb_ref = @obj.reference actual = {:authors=>litdb_ref.authors, :journal=> litdb_ref.journal, :pages=> litdb_ref.pages, :volume=>litdb_ref.volume, :year=>litdb_ref.year, :issue=>litdb_ref.issue, :title=>litdb_ref.title } assert_equal(expected, actual) end #access to the each field with field_fetch method. #most methods are the same as values of Bio::Refence object. def test_entry_id assert_equal("1717226", @obj.entry_id) end def test_title expected = "Characterization of an Escherichia coli gene encoding betaine aldehyde dehydrogenase (BADH). Structural similarity to mammalian ALDHs and a plant BADH." assert_equal(expected, @obj.title) end def test_field assert_equal("q (sequence analysis)", @obj.field) end def test_journal assert_equal("Gene", @obj.journal) end def test_volume assert_equal("Vol.103, No.1, 45-52 (1991)", @obj.volume) end def test_keyword expected = ["*Betaine Aldehyde Dehydrogenase", "*betB Gene;E.coli", "Seq Determination;1854bp;491AAs", "Hydropathy Plot;*EC1.2.1.8", "Seq Comparison"] assert_equal(expected, @obj.keyword) end def test_author expected = "Boyd,L.A.;Adam,L.;Pelcher,L.E.;McHughen,A.;Hirji,R.; Selvaraj,G." assert_equal(expected, @obj.author) end end #class TestBioLITDB end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_phyloxml_writer.rb0000644000004100000410000002457412200110570022627 0ustar www-datawww-data# # = test/unit/bio/db/test_phyloxml_writer.rb - Unit test for Bio::PhyloXML::Writer # # Copyright:: Copyright (C) 2009 # Diana Jaunzeikare # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'singleton' require 'bio/command' begin require 'libxml' rescue LoadError end if defined?(LibXML) then require 'bio/db/phyloxml/phyloxml_writer' end module Bio class TestPhyloXMLWriter_Check_LibXML < Test::Unit::TestCase def test_libxml assert(defined?(LibXML), "Error: libxml-ruby library is not present. Please install libxml-ruby library. It is needed for Bio::PhyloXML module. Unit test for PhyloXML will not be performed.") end end #class TestPhyloXMLWriter_Check_LibXML end #module Bio module Bio module TestPhyloXMLWriterData PHYLOXML_WRITER_TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'phyloxml')).cleanpath.to_s def self.example_xml File.join PHYLOXML_WRITER_TEST_DATA, 'phyloxml_examples.xml' end def self.mollusca_short_xml File.join PHYLOXML_WRITER_TEST_DATA, 'ncbi_taxonomy_mollusca_short.xml' end def self.made_up_xml File.join PHYLOXML_WRITER_TEST_DATA, 'made_up.xml' end end #end module TestPhyloXMLWriterData class TestPhyloXMLWriter < Test::Unit::TestCase # helper class to write files using temporary directory class WriteTo include Singleton def initialize @leave_tmpdir = ENV['BIORUBY_TEST_DEBUG'].to_s.empty? ? false : true @tests = nil @tests_passed = 0 @tmpdir = nil end attr_accessor :tests def test_passed @tests_passed += 1 if !@leave_tmpdir and @tmpdir and @tests and @tests_passed >= @tests then #$stderr.print "Removing #{@tmpdir.path}\n" @tmpdir.close! @tmpdir = nil @tests_passed = 0 end end def tmpdir @tmpdir ||= Bio::Command::Tmpdir.new('PhyloXML') @tmpdir end def file(f) File.join(self.tmpdir.path, f) end def example_xml_test self.file('phyloxml_examples_written.xml') end end #class WriteTo def setup @writeto = WriteTo.instance @writeto.tests ||= self.methods.collect { |x| x.to_s }.find_all { |y| /\Atest\_/ =~ y }.size end def teardown @writeto.test_passed end # def test_write # # @todo this is test for Tree.write # tree = Bio::PhyloXML::Tree.new # filename = @writeto.file('test.xml') # tree.write(filename) # end def test_init filename = @writeto.file("test2.xml") writer = Bio::PhyloXML::Writer.new(filename) tree = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.mollusca_short_xml) { |px| px.next_tree } writer.write(tree) assert_nothing_thrown do Bio::PhyloXML::Parser.open(filename) { |px| true } end #File.delete(filename) end def test_simple_xml filename = @writeto.file("sample.xml") writer = Bio::PhyloXML::Writer.new(filename) tree = Bio::PhyloXML::Tree.new tree.rooted = true tree.name = "Test tree" root_node = Bio::PhyloXML::Node.new tree.root = root_node root_node.name = "A" #root_node.taxonomies[0] = Bio::PhyloXML::Taxonomy.new root_node.taxonomies << Bio::PhyloXML::Taxonomy.new root_node.taxonomies[0].scientific_name = "Animal animal" node2 = Bio::PhyloXML::Node.new node2.name = "B" tree.add_node(node2) tree.add_edge(root_node, node2) writer.write(tree) lines = File.readlines(filename) assert_equal("", lines[1].chomp) assert_equal(" ", lines[2].chomp) assert_equal(" Test tree", lines[3].chomp) assert_equal(" ", lines[4].chomp) assert_equal(" A", lines[5].chomp) assert_equal(" ", lines[6].chomp) assert_equal(" Animal animal", lines[7].chomp) assert_equal(" ", lines[8].chomp) assert_equal(" B", lines[10].chomp) assert_equal(" ", lines[12].chomp) assert_equal(" ", lines[13].chomp) assert_equal("", lines[14].chomp) #File.delete(filename) end def test_phyloxml_examples_tree1 tree = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.example_xml) { |px| px.next_tree } filename = @writeto.file('example_tree1.xml') writer = Bio::PhyloXML::Writer.new(filename) writer.write_branch_length_as_subelement = false writer.write(tree) assert_nothing_thrown do tree2 = Bio::PhyloXML::Parser.open(filename) { |px| true } end #File.delete(filename) #@todo check if branch length is written correctly end def test_phyloxml_examples_tree2 phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.example_xml) 2.times do @tree = phyloxml.next_tree end phyloxml.close filename = @writeto.file('example_tree2.xml') writer = Bio::PhyloXML::Writer.new(filename) writer.write(@tree) assert_nothing_thrown do tree2 = Bio::PhyloXML::Parser.open(filename) { |px| true } end #File.delete(filename) end def test_phyloxml_examples_tree4 phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.example_xml) 4.times do @tree = phyloxml.next_tree end phyloxml.close #@todo tree = phyloxml[4] filename = @writeto.file('example_tree4.xml') writer = Bio::PhyloXML::Writer.new(filename) writer.write(@tree) assert_nothing_thrown do @tree2 = Bio::PhyloXML::Parser.open(filename) { |px| px.next_tree } end assert_equal(@tree.name, @tree2.name) assert_equal(@tree.get_node_by_name('A').taxonomies[0].scientific_name, @tree2.get_node_by_name('A').taxonomies[0].scientific_name) assert_equal(@tree.get_node_by_name('B').sequences[0].annotations[0].desc, @tree2.get_node_by_name('B').sequences[0].annotations[0].desc) # assert_equal(@tree.get_node_by_name('B').sequences[0].annotations[0].confidence.value,@tree2.get_node_by_name('B').sequences[0].annotations[0].confidence.value) #File.delete(filename) end def test_phyloxml_examples_sequence_relation phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.example_xml) filename = @writeto.example_xml_test writer = Bio::PhyloXML::Writer.new(filename) phyloxml.each do |tree| writer.write(tree) end phyloxml.close assert_nothing_thrown do @phyloxml_test = Bio::PhyloXML::Parser.open(filename) end 5.times do @tree = @phyloxml_test.next_tree end @phyloxml_test.close assert_equal("x", @tree.sequence_relations[0].id_ref_0) assert_equal("z", @tree.sequence_relations[1].id_ref_1) assert_equal(nil, @tree.sequence_relations[2].distance) assert_equal("orthology", @tree.sequence_relations[2].type) #File.delete(filename) end def test_generate_xml_with_sequence tree = Bio::PhyloXML::Tree.new r = Bio::PhyloXML::Node.new tree.add_node(r) tree.root = r n = Bio::PhyloXML::Node.new tree.add_node(n) tree.add_edge(tree.root, n) tree.rooted = true n.name = "A" seq = PhyloXML::Sequence.new n.sequences[0] = seq seq.annotations[0] = PhyloXML::Annotation.new seq.annotations[0].desc = "Sample annotation" seq.name = "sequence name" seq.location = "somewhere" seq.accession = PhyloXML::Accession.new seq.accession.source = "ncbi" seq.accession.value = "AAB80874" seq.symbol = "adhB" seq.mol_seq = "TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD" seq.uri = PhyloXML::Uri.new seq.uri.desc = "EMBL REPTILE DATABASE" seq.uri.uri = "http://www.embl-heidelberg.de/~uetz/families/Varanidae.html" seq.domain_architecture = PhyloXML::DomainArchitecture.new seq.domain_architecture.length = 1249 domain1 = PhyloXML::ProteinDomain.new seq.domain_architecture.domains << domain1 domain1.from = 6 domain1.to = 90 domain1.confidence = "7.0E-26" domain1.value = "CARD" domain2 = PhyloXML::ProteinDomain.new seq.domain_architecture.domains << domain2 domain2.from = 109 domain2.to = 414 domain2.confidence = "7.2E-117" domain2.value = "NB-ARC" filename = @writeto.file('sequence.xml') Bio::PhyloXML::Writer.new(filename).write(tree) assert_nothing_thrown do Bio::PhyloXML::Parser.open(filename) { |px| px.next_tree } end #File.delete(filename) end def test_phyloxml_examples_file outputfn = "phyloxml_examples_generated_in_test.xml" phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.example_xml) filename = @writeto.file(outputfn) writer = Bio::PhyloXML::Writer.new(filename) phyloxml.each do |tree| writer.write(tree) end writer.write_other(phyloxml.other) assert_nothing_thrown do Bio::PhyloXML::Parser.open(filename) { |px| true } end # The output file is not deleted since it might be used in the phyloxml # parser test. But since the order of tests can't be assumed, I can't # hard code it in. end def test_made_up_xml_file phyloxml = Bio::PhyloXML::Parser.open(TestPhyloXMLWriterData.made_up_xml) filename = @writeto.file("made_up_generated_in_test.xml") writer = Bio::PhyloXML::Writer.new(filename) # The output file is not deleted since it might be used in the phyloxml # parser test. But since the order of tests can't be assumed, I can't # hard code it in. phyloxml.each do |tree| writer.write(tree) end phyloxml.close end end end if defined?(LibXML) #end module Bio bio-1.4.3.0001/test/unit/bio/db/biosql/0000755000004100000410000000000012200110570017250 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/biosql/ts_suite_biosql.rb0000644000004100000410000000030212200110570023000 0ustar www-datawww-data# To change this template, choose Tools | Templates # and open the template in the editor. $:.unshift File.dirname(__FILE__) require 'test/unit' # Add your testcases here require 'tc_biosql' bio-1.4.3.0001/test/unit/bio/db/biosql/tc_biosql.rb0000644000004100000410000001143412200110570021557 0ustar www-datawww-data# To change this template, choose Tools | Templates # and open the template in the editor. # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio' module Bio class TestBiosqlIO < Test::Unit::TestCase def setup @connection = Bio::SQL.establish_connection({'development'=>{'hostname'=>'localhost','database'=>"bioseq", 'adapter'=>"jdbcmysql", 'username'=>"febo", 'password'=>nil}},'development') @str_genbank=<Bio::GenBank.new(@str_genbank).to_biosequence, :biodatabase=>Bio::SQL::Biodatabase.find(:first)) assert_not_nil(@@x) end def test_03_input_output bioseq = Bio::SQL.fetch_accession("X64011") assert_not_nil bioseq assert_equal(@str_genbank, bioseq.to_biosequence.output(:genbank)) end def test_04_bioentry_data_format assert_equal('26-SEP-2006', @@x.date_modified.to_s) end def test_05_title assert_equal('Cloning of a superoxide dismutase gene from Listeria ivanovii by functional complementation in Escherichia coli and characterization of the gene product',@@x.references.first.title) end def test_99_delete_bioentry assert_not_nil(@@x.delete) end end end bio-1.4.3.0001/test/unit/bio/db/test_soft.rb0000644000004100000410000001512212200110570020317 0ustar www-datawww-data# # test/unit/bio/db/test_soft.rb - Unit test for Bio::SOFT # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/soft' module Bio #:nodoc: class TestSOFT < Test::Unit::TestCase #:nodoc: def setup test_data_path = Pathname.new(File.join(BioRubyTestDataPath, 'soft')).cleanpath.to_s series_filename = File.join(test_data_path, 'GSE3457_family_partial.soft') dataset_filename = File.join(test_data_path, 'GDS100_partial.soft') @obj_series = Bio::SOFT.new( IO.readlines(series_filename)) @obj_dataset = Bio::SOFT.new( IO.readlines(dataset_filename)) end def test_series assert_equal( @obj_series.platform[:geo_accession], 'GPL2092') assert_equal( @obj_series.platform[:organism], 'Populus') assert_equal( @obj_series.platform[:contributor], ["Jingyi,,Li", "Olga,,Shevchenko", "Steve,H,Strauss", "Amy,M,Brunner"]) assert_equal( @obj_series.platform[:data_row_count], '240') assert_equal( @obj_series.platform.keys.sort {|a,b| a.to_s <=> b.to_s}[0..2], [:contact_address, :contact_city, :contact_country]) assert_equal( @obj_series.platform[:"contact_zip/postal_code"], '97331') assert_equal( @obj_series.platform[:table].header, ["ID", "GB_ACC", "SPOT_ID", "Function/Family", "ORGANISM", "SEQUENCE"]) assert_equal( @obj_series.platform[:table].header_description, {"ORGANISM"=>"sequence sources", "SEQUENCE"=>"oligo sequence used", "Function/Family"=>"gene functions and family", "ID"=>"", "SPOT_ID"=>"", "GB_ACC"=>"Gene bank accession number"}) assert_equal( @obj_series.platform[:table].rows.size, 240) assert_equal( @obj_series.platform[:table].rows[5], ["A039P68U", "AI163321", "", "TF, flowering protein CONSTANS", "P. tremula x P. tremuloides", "AGAAAATTCGATATACTGTCCGTAAAGAGGTAGCACTTAGAATGCAACGGAATAAAGGGCAGTTCACCTC"]) assert_equal( @obj_series.platform[:table].rows[5][4], 'P. tremula x P. tremuloides') assert_equal( @obj_series.platform[:table].rows[5][:organism], 'P. tremula x P. tremuloides') assert_equal( @obj_series.platform[:table].rows[5]['ORGANISM'], 'P. tremula x P. tremuloides') assert_equal( @obj_series.series[:geo_accession], 'GSE3457') assert_equal( @obj_series.series[:contributor], ["Jingyi,,Li", "Olga,,Shevchenko", "Ove,,Nilsson", "Steve,H,Strauss", "Amy,M,Brunner"]) assert_equal( @obj_series.series[:platform_id], 'GPL2092') assert_equal( @obj_series.series[:sample_id].size, 74) assert_equal( @obj_series.series[:sample_id][0..4], ["GSM77557", "GSM77558", "GSM77559", "GSM77560", "GSM77561"]) assert_equal( @obj_series.database[:name], 'Gene Expression Omnibus (GEO)') assert_equal( @obj_series.database[:ref], 'Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6') assert_equal( @obj_series.database[:institute], 'NCBI NLM NIH') assert_equal( @obj_series.samples.size, 2) assert_equal( @obj_series.samples[:GSM77557][:series_id], 'GSE3457') assert_equal( @obj_series.samples['GSM77557'][:series_id], 'GSE3457') assert_equal( @obj_series.samples[:GSM77557][:platform_id], 'GPL2092') assert_equal( @obj_series.samples[:GSM77557][:type], 'RNA') assert_equal( @obj_series.samples[:GSM77557][:title], 'fb6a1') assert_equal( @obj_series.samples[:GSM77557][:table].header, ["ID_REF", "VALUE"]) assert_equal( @obj_series.samples[:GSM77557][:table].header_description, {"ID_REF"=>"", "VALUE"=>"normalized signal intensities"}) assert_equal( @obj_series.samples[:GSM77557][:table].rows.size, 217) assert_equal( @obj_series.samples[:GSM77557][:table].rows[5], ["A039P68U", "5.36"]) assert_equal( @obj_series.samples[:GSM77557][:table].rows[5][0], 'A039P68U') assert_equal( @obj_series.samples[:GSM77557][:table].rows[5][:id_ref], 'A039P68U') assert_equal( @obj_series.samples[:GSM77557][:table].rows[5]['ID_REF'], 'A039P68U') end def test_dataset assert_equal( @obj_dataset.database[:name], 'Gene Expression Omnibus (GEO)') assert_equal( @obj_dataset.database[:ref], 'Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6') assert_equal( @obj_dataset.database[:institute], 'NCBI NLM NIH') assert_equal( @obj_dataset.subsets.size, 8) assert_equal( @obj_dataset.subsets.keys.sort, ["GDS100_1", "GDS100_2", "GDS100_3", "GDS100_4", "GDS100_5", "GDS100_6", "GDS100_7", "GDS100_8"]) assert_equal( @obj_dataset.subsets[:GDS100_7], {:sample_id=>"GSM548,GSM543", :dataset_id=>"GDS100", :description=>"60 minute", :type=>"time"}) assert_equal( @obj_dataset.subsets['GDS100_7'][:sample_id], 'GSM548,GSM543') assert_equal( @obj_dataset.subsets[:GDS100_7][:sample_id], 'GSM548,GSM543') assert_equal( @obj_dataset.subsets[:GDS100_7][:dataset_id], 'GDS100') assert_equal( @obj_dataset.dataset[:order], 'none') assert_equal( @obj_dataset.dataset[:sample_organism], 'Escherichia coli') assert_equal( @obj_dataset.dataset[:table].header, ["ID_REF", "IDENTIFIER", "GSM549", "GSM542", "GSM543", "GSM547", "GSM544", "GSM545", "GSM546", "GSM548"]) assert_equal( @obj_dataset.dataset[:table].rows.size, 15) assert_equal( @obj_dataset.dataset[:table].rows[5], ["6", "EMPTY", "0.097", "0.217", "0.242", "0.067", "0.104", "0.162", "0.104", "0.154"]) assert_equal( @obj_dataset.dataset[:table].rows[5][4], '0.242') assert_equal( @obj_dataset.dataset[:table].rows[5][:gsm549], '0.097') assert_equal( @obj_dataset.dataset[:table].rows[5][:GSM549], '0.097') assert_equal( @obj_dataset.dataset[:table].rows[5]['GSM549'], '0.097') end end end bio-1.4.3.0001/test/unit/bio/db/kegg/0000755000004100000410000000000012200110570016674 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/kegg/test_enzyme.rb0000644000004100000410000002151112200110570021567 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_enzyme.rb - Unit test for Bio::KEGG::ENZYME # # Copyright:: Copyright (C) 2009 Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'digest/sha1' require 'bio/db/kegg/enzyme' module Bio class TestKeggEnzyme < Test::Unit::TestCase def setup testdata_kegg = Pathname.new(File.join(BioRubyTestDataPath, 'KEGG')).cleanpath.to_s entry = File.read(File.join(testdata_kegg, "1.1.1.1.enzyme")) @obj = Bio::KEGG::ENZYME.new(entry) end def test_entry assert_equal("EC 1.1.1.1 Enzyme", @obj.entry) end def test_entry_id assert_equal("1.1.1.1", @obj.entry_id) end def test_obsolete? assert_equal(false, @obj.obsolete?) end def test_names expected = [ "alcohol dehydrogenase", "aldehyde reductase", "ADH", "alcohol dehydrogenase (NAD)", "aliphatic alcohol dehydrogenase", "ethanol dehydrogenase", "NAD-dependent alcohol dehydrogenase", "NAD-specific aromatic alcohol dehydrogenase", "NADH-alcohol dehydrogenase", "NADH-aldehyde dehydrogenase", "primary alcohol dehydrogenase", "yeast alcohol dehydrogenase" ] assert_equal(expected, @obj.names) end def test_name assert_equal("alcohol dehydrogenase", @obj.name) end def test_classes assert_equal([ "Oxidoreductases;", "Acting on the CH-OH group of donors;", "With NAD+ or NADP+ as acceptor" ], @obj.classes) end def test_sysname assert_equal("alcohol:NAD+ oxidoreductase", @obj.sysname) end def test_reaction expected = "an alcohol + NAD+ = an aldehyde or ketone + NADH + H+ [RN:R07326 R07327]" assert_equal(expected, @obj.reaction) end def test_all_reac expected = "R07326 > R00623 R00754 R02124 R04805 R04880 R05233 R05234 R06917 R06927 R08281 R08306 R08557 R08558; R07327 > R00624 R08310; (other) R07105" assert_equal(expected, @obj.all_reac) end def test_iubmb_reactions expected = [ "R07326 > R00623 R00754 R02124 R04805 R04880 R05233 R05234 R06917 R06927 R08281 R08306 R08557 R08558", "R07327 > R00624 R08310" ] assert_equal(expected, @obj.iubmb_reactions) end def test_kegg_reactions assert_equal(["R07105"], @obj.kegg_reactions) end def test_substrates expected = [ "alcohol [CPD:C00069]", "NAD+ [CPD:C00003]" ] assert_equal(expected, @obj.substrates) end def test_products expected = [ "aldehyde [CPD:C00071]", "ketone [CPD:C01450]", "NADH [CPD:C00004]", "H+ [CPD:C00080]" ] assert_equal(expected, @obj.products) end def test_inhibitors assert_equal([], @obj.inhibitors) end def test_cofactors assert_equal(["Zinc [CPD:C00038]"], @obj.cofactors) end def test_comment expected = "A zinc protein. Acts on primary or secondary alcohols or hemi-acetals; the animal, but not the yeast, enzyme acts also on cyclic secondary alcohols." assert_equal(expected, @obj.comment) end def test_pathways_as_strings expected = [ "PATH: ec00010 Glycolysis / Gluconeogenesis", "PATH: ec00071 Fatty acid metabolism", "PATH: ec00260 Glycine, serine and threonine metabolism", "PATH: ec00350 Tyrosine metabolism", "PATH: ec00624 1- and 2-Methylnaphthalene degradation", "PATH: ec00641 3-Chloroacrylic acid degradation", "PATH: ec00830 Retinol metabolism", "PATH: ec00980 Metabolism of xenobiotics by cytochrome P450", "PATH: ec00982 Drug metabolism - cytochrome P450", "PATH: ec01100 Metabolic pathways" ] assert_equal(expected, @obj.pathways_as_strings) end def test_pathways_as_hash expected = { "ec01100" => "Metabolic pathways", "ec00982" => "Drug metabolism - cytochrome P450", "ec00641" => "3-Chloroacrylic acid degradation", "ec00830" => "Retinol metabolism", "ec00071" => "Fatty acid metabolism", "ec00260" => "Glycine, serine and threonine metabolism", "ec00624" => "1- and 2-Methylnaphthalene degradation", "ec00350" => "Tyrosine metabolism", "ec00010" => "Glycolysis / Gluconeogenesis", "ec00980" => "Metabolism of xenobiotics by cytochrome P450" } assert_equal(expected, @obj.pathways_as_hash) assert_equal(expected, @obj.pathways) end def test_orthologs_as_strings expected = [ "KO: K00001 alcohol dehydrogenase", "KO: K11440 choline dehydrogenase" ] assert_equal(expected, @obj.orthologs_as_strings) end def test_orthologs_as_hash expected = { "K11440" => "choline dehydrogenase", "K00001" => "alcohol dehydrogenase" } assert_equal(expected, @obj.orthologs_as_hash) assert_equal(expected, @obj.orthologs) end def test_genes_as_strings assert_equal(759, @obj.genes_as_strings.size) assert_equal("0b01addd884266d7e80fdc34f112b9a89b90cc54", Digest::SHA1.hexdigest(@obj.genes_as_strings.join("\n"))) end def test_genes_as_hash assert_equal(759, @obj.genes_as_hash.size) assert_equal("025e77f866a7edb0eccaaabcff31df90d8e1fca1", Digest::SHA1.hexdigest(@obj.genes_as_hash.keys.sort.join(";"))) assert_equal(["124", "125", "126", "127", "128", "130", "131"], @obj.genes_as_hash['hsa']) assert_equal(["BSU18430", "BSU26970", "BSU31050"], @obj.genes_as_hash['bsu']) assert_equal(["Tpen_1006", "Tpen_1516"], @obj.genes_as_hash['tpe']) end def test_genes assert_equal(759, @obj.genes.size) assert_equal("025e77f866a7edb0eccaaabcff31df90d8e1fca1", Digest::SHA1.hexdigest(@obj.genes.keys.sort.join(";"))) assert_equal(["124", "125", "126", "127", "128", "130", "131"], @obj.genes['hsa']) assert_equal(["BSU18430", "BSU26970", "BSU31050"], @obj.genes['bsu']) assert_equal(["Tpen_1006", "Tpen_1516"], @obj.genes['tpe']) end def test_diseases assert_equal([], @obj.diseases) end def test_motifs assert_equal([], @obj.motifs) end def test_structures expected = ["1A4U", "1A71", "1A72", "1ADB", "1ADC", "1ADF", "1ADG", "1AGN", "1AXE", "1AXG", "1B14", "1B15", "1B16", "1B2L", "1BTO", "1CDO", "1D1S", "1D1T", "1DEH", "1E3E", "1E3I", "1E3L", "1EE2", "1H2B", "1HDX", "1HDY", "1HDZ", "1HET", "1HEU", "1HF3", "1HLD", "1HSO", "1HSZ", "1HT0", "1HTB", "1JU9", "1JVB", "1LDE", "1LDY", "1LLU", "1M6H", "1M6W", "1MA0", "1MC5", "1MG0", "1MG5", "1MGO", "1MP0", "1N8K", "1N92", "1NTO", "1NVG", "1O2D", "1P1R", "1QLH", "1QLJ", "1QV6", "1QV7", "1R37", "1RJW", "1SBY", "1TEH", "1U3T", "1U3U", "1U3V", "1U3W", "1VJ0", "1YE3", "2EER", "2FZE", "2FZW", "2HCY", "2JHF", "2JHG", "2OHX", "2OXI", "3BTO", "3COS", "3HUD", "3I4C", "5ADH", "6ADH", "7ADH"] assert_equal(expected, @obj.structures) end def test_dblinks_as_strings expected = [ "ExplorEnz - The Enzyme Database: 1.1.1.1", "IUBMB Enzyme Nomenclature: 1.1.1.1", "ExPASy - ENZYME nomenclature database: 1.1.1.1", "UM-BBD (Biocatalysis/Biodegradation Database): 1.1.1.1", "BRENDA, the Enzyme Database: 1.1.1.1", "CAS: 9031-72-5" ] assert_equal(expected, @obj.dblinks_as_strings) end def test_dblinks_as_hash expected = { "UM-BBD (Biocatalysis/Biodegradation Database)" => [ "1.1.1.1" ], "ExPASy - ENZYME nomenclature database" => [ "1.1.1.1" ], "IUBMB Enzyme Nomenclature" => [ "1.1.1.1" ], "BRENDA, the Enzyme Database" => [ "1.1.1.1" ], "ExplorEnz - The Enzyme Database" => [ "1.1.1.1" ], "CAS" => [ "9031-72-5" ] } assert_equal(expected, @obj.dblinks_as_hash) assert_equal(expected, @obj.dblinks) end end #class TestKeggEnzyme < Test::Unit::TestCase end #module Bio bio-1.4.3.0001/test/unit/bio/db/kegg/test_drug.rb0000644000004100000410000001400212200110570021216 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_drug.rb - Unit test for Bio::KEGG::DRUG # # Copyright:: Copyright (C) 2009 Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/drug' module Bio class TestBioKeggDRUG < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG/D00063.drug') @obj = Bio::KEGG::DRUG.new(File.read(filename)) end def test_dblinks_as_hash expected = { "ChEBI"=>["28864"], "PubChem"=>["7847131"], "NIKKAJI"=>["J4.533K"], "PDB-CCD"=>["TOY"], "DrugBank"=>["DB00684"], "LigandBox"=>["D00063"], "CAS"=>["32986-56-4"] } assert_equal(expected, @obj.dblinks_as_hash) assert_equal(expected, @obj.dblinks) end def test_pathways_as_hash expected = {"map07021"=>"Aminoglycosides"} assert_equal(expected, @obj.pathways_as_hash) assert_equal(expected, @obj.pathways) end def test_entry_id assert_equal("D00063", @obj.entry_id) end def test_names expected = [ "Tobramycin (JP15/USP)", "TOB", "Tobracin (TN)", "Tobrex (TN)" ] assert_equal(expected, @obj.names) end def test_name expected = "Tobramycin (JP15/USP)" assert_equal(expected, @obj.name) end def test_formula assert_equal("C18H37N5O9", @obj.formula) end def test_mass assert_equal(467.2591, @obj.mass) end def test_activity expected = "Antibacterial" assert_equal(expected, @obj.activity) end def test_remark expected = "Same as: C00397 Therapeutic category: 1317 6123 ATC code: J01GB01 S01AA12" assert_equal(expected, @obj.remark) end def test_pathways_as_strings expected = [ "PATH: map07021 Aminoglycosides" ] assert_equal(expected, @obj.pathways_as_strings) end def test_dblinks_as_strings expected = [ "CAS: 32986-56-4", "PubChem: 7847131", "ChEBI: 28864", "DrugBank: DB00684", "PDB-CCD: TOY", "LigandBox: D00063", "NIKKAJI: J4.533K" ] assert_equal(expected, @obj.dblinks_as_strings) end def test_kcf expected = < # License:: The Ruby License # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/reaction' module Bio class TestKeggReaction < Test::Unit::TestCase def setup testdata_kegg = Pathname.new(File.join(BioRubyTestDataPath, 'KEGG')).cleanpath.to_s entry = File.read(File.join(testdata_kegg, "R00006.reaction")) @obj = Bio::KEGG::REACTION.new(entry) end def test_entry_id assert_equal('R00006', @obj.entry_id) end def test_name assert_equal('pyruvate:pyruvate acetaldehydetransferase (decarboxylating); 2-acetolactate pyruvate-lyase (carboxylating)', @obj.name) end def test_definition assert_equal('2-Acetolactate + CO2 <=> 2 Pyruvate', @obj.definition) end def test_equation assert_equal('C00900 + C00011 <=> 2 C00022', @obj.equation) end def test_rpairs_as_hash expected = { "RP00440" => [ "C00022_C00900", "main" ], "RP05698" => [ "C00011_C00022", "leave" ], "RP12733" => [ "C00022_C00900", "trans" ] } assert_equal(expected, @obj.rpairs_as_hash) assert_equal(expected, @obj.rpairs) end def test_rpairs_as_strings expected = [ 'RP: RP00440 C00022_C00900 main', 'RP: RP05698 C00011_C00022 leave', 'RP: RP12733 C00022_C00900 trans' ] assert_equal(expected, @obj.rpairs_as_strings) end def test_rpairs_as_tokens expected = %w( RP: RP00440 C00022_C00900 main RP: RP05698 C00011_C00022 leave RP: RP12733 C00022_C00900 trans ) assert_equal(expected, @obj.rpairs_as_tokens) end def test_pathways_as_strings assert_equal([ "PATH: rn00770 Pantothenate and CoA biosynthesis" ], @obj.pathways_as_strings) end def test_pathways_as_hash expected = { "rn00770" => "Pantothenate and CoA biosynthesis" } assert_equal(expected, @obj.pathways_as_hash) assert_equal(expected, @obj.pathways) end def test_enzymes assert_equal(["2.2.1.6"], @obj.enzymes) end def test_orthologs_as_strings assert_equal(["KO: K01652 acetolactate synthase I/II/III large subunit [EC:2.2.1.6]", "KO: K01653 acetolactate synthase I/III small subunit [EC:2.2.1.6]"], @obj.orthologs_as_strings) end def test_orthologs_as_hash expected = { 'K01652'=>"acetolactate synthase I/II/III large subunit [EC:2.2.1.6]", 'K01653'=>"acetolactate synthase I/III small subunit [EC:2.2.1.6]" } assert_equal(expected, @obj.orthologs_as_hash) assert_equal(expected, @obj.orthologs) end end end bio-1.4.3.0001/test/unit/bio/db/kegg/test_genes.rb0000644000004100000410000002465212200110570021372 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_genes.rb - Unit test for Bio::KEGG::GENES # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # Copyright (C) 2010 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/genes' module Bio class TestKeggGenesStructure < Test::Unit::TestCase def setup entry =<["16128513"], "UniProt"=>["P24186"], "NCBI-GeneID"=>["945221"], "ECOCYC"=>["EG10328"], "EcoGene"=>["EG10328"], "RegulonDB"=>["B0529"] } assert_equal(expected, @obj.dblinks_as_hash) end def test_pathways_as_hash expected = { "eco00630" => "Glyoxylate and dicarboxylate metabolism", "eco00670" => "One carbon pool by folate", "eco01100" => "Metabolic pathways" } assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_hash expected = { "K01491" => "methylenetetrahydrofolate dehydrogenase (NADP+) / methenyltetrahydrofolate cyclohydrolase [EC:1.5.1.5 3.5.4.9]" } assert_equal(expected, @obj.orthologs_as_hash) end def test_new assert_instance_of(Bio::KEGG::GENES, @obj) end def test_entry expected = {"organism"=>"E.coli", "division"=>"CDS", "id"=>"b0529"} assert_equal(expected, @obj.entry) end def test_entry_id assert_equal("b0529", @obj.entry_id) end def test_division assert_equal("CDS", @obj.division) end def test_organism assert_equal("E.coli", @obj.organism) end def test_name assert_equal("folD, ads, ECK0522, JW0518", @obj.name) end def test_names_as_array expected = ["folD", "ads", "ECK0522", "JW0518"] assert_equal(expected, @obj.names_as_array) assert_equal(expected, @obj.names) end def test_genes expected = ["folD", "ads", "ECK0522", "JW0518"] assert_equal(expected, @obj.genes) end def test_gene assert_equal("folD", @obj.gene) end def test_definition expected = "bifunctional 5,10-methylene-tetrahydrofolate dehydrogenase/5,10-methylene-tetrahydrofolate cyclohydrolase (EC:1.5.1.5 3.5.4.9)" assert_equal(expected, @obj.definition) end def test_eclinks assert_equal(["1.5.1.5", "3.5.4.9"], @obj.eclinks) end def test_orthologs_as_strings expected = ["K01491 methylenetetrahydrofolate dehydrogenase (NADP+) / methenyltetrahydrofolate cyclohydrolase [EC:1.5.1.5 3.5.4.9]"] assert_equal(expected, @obj.orthologs_as_strings) end def test_pathway expected = "eco00630 Glyoxylate and dicarboxylate metabolism eco00670 One carbon pool by folate eco01100 Metabolic pathways" assert_equal(expected, @obj.pathway) end def test_pathways_as_strings expected = [ "eco00630 Glyoxylate and dicarboxylate metabolism", "eco00670 One carbon pool by folate", "eco01100 Metabolic pathways" ] assert_equal(expected, @obj.pathways_as_strings) end def test_position assert_equal("complement(556098..556964)", @obj.position) end def test_chromosome assert_equal(nil, @obj.chromosome) end def test_gbposition assert_equal("complement(556098..556964)", @obj.gbposition) end def test_locations expected = Bio::Locations.new("complement(556098..556964)") assert_equal(expected, @obj.locations) end def test_motifs_as_strings expected = [ "Pfam: THF_DHG_CYH_C THF_DHG_CYH Amidohydro_1", "PROSITE: THF_DHG_CYH_1 THF_DHG_CYH_2" ] assert_equal(expected, @obj.motifs_as_strings) end def test_motifs_as_hash expected = { "Pfam" => ["THF_DHG_CYH_C", "THF_DHG_CYH", "Amidohydro_1"], "PROSITE" => ["THF_DHG_CYH_1", "THF_DHG_CYH_2"] } assert_equal(expected, @obj.motifs_as_hash) assert_equal(expected, @obj.motifs) assert_equal(expected, @obj.motif) end def test_dblinks_as_strings expected = [ "NCBI-GI: 16128513", "NCBI-GeneID: 945221", "RegulonDB: B0529", "EcoGene: EG10328", "ECOCYC: EG10328", "UniProt: P24186" ] assert_equal(expected, @obj.dblinks_as_strings) end def test_structure assert_equal(["1B0A"], @obj.structure) end def test_codon_usage expected = { "gcg"=>nil, "gtc"=>nil, "cat"=>nil, "ctg"=>nil, "tac"=>nil, "gga"=>nil, "agg"=>nil, "aaa"=>nil, "acc"=>nil, "att"=>nil, "cca"=>nil, "tgt"=>nil, "tta"=>nil, "gag"=>nil, "gct"=>nil, "tcg"=>nil, "ggc"=>nil, "agt"=>nil, "aac"=>nil, "ata"=>nil, "cgg"=>nil, "caa"=>nil, "ccc"=>nil, "ctt"=>nil, "tga"=>nil, "ttc"=>nil, "gat"=>nil, "gtg"=>nil, "tag"=>nil, "gca"=>nil, "aga"=>nil, "acg"=>nil, "atc"=>nil, "cgt"=>nil, "cac"=>nil, "cta"=>nil, "tgc"=>nil, "tct"=>nil, "ggg"=>nil, "gaa"=>nil, "gcc"=>nil, "gtt"=>nil, "agc"=>nil, "aag"=>nil, "act"=>nil, "cga"=>nil, "ccg"=>nil, "ctc"=>nil, "tat"=>nil, "tca"=>nil, "ttg"=>nil, "ggt"=>nil, "gac"=>nil, "gta"=>nil, "aat"=>nil, "aca"=>nil, "atg"=>nil, "cgc"=>nil, "cag"=>nil, "cct"=>nil, "tgg"=>nil, "taa"=>nil, "tcc"=>nil, "ttt"=>nil } assert_equal(expected, @obj.codon_usage) end def test_cu_list assert_equal([], @obj.cu_list) end def test_aaseq expected = "MAAKIIDGKTIAQQVRSEVAQKVQARIAAGLRAPGLAVVLVGSNPASQIYVASKRKACEEVGFVSRSYDLPETTSEAELLELIDTLNADNTIDGILVQLPLPAGIDNVKVLERIHPDKDVDGFHPYNVGRLCQRAPRLRPCTPRGIVTLLERYNIDTFGLNAVVIGASNIVGRPMSMELLLAGCTTTVTHRFTKNLRHHVENADLLIVAVGKPGFIPGDWIKEGAIVIDVGINRLENGKVVGDVVFEDAAKRASYITPVPGGVGPMTVATLIENTLQACVEYHDPQDE" assert_equal(expected, @obj.aaseq) end def test_aalen assert_equal(288, @obj.aalen) end def test_ntseq expected = "atggcagcaaagattattgacggtaaaacgattgcgcagcaggtgcgctctgaagttgctcaaaaagttcaggcgcgtattgcagccggactgcgggcaccaggactggccgttgtgctggtgggtagtaaccctgcatcgcaaatttatgtcgcaagcaaacgcaaggcttgtgaagaagtcgggttcgtctcccgctcttatgacctcccggaaaccaccagcgaagcggagctgctggagcttatcgatacgctgaatgccgacaacaccatcgatggcattctggttcaactgccgttaccggcgggtattgataacgtcaaagtgctggaacgtattcatccggacaaagacgtggacggtttccatccttacaacgtcggtcgtctgtgccagcgcgcgccgcgtctgcgtccctgcaccccgcgcggtatcgtcacgctgcttgagcgttacaacattgataccttcggcctcaacgccgtggtgattggcgcatcgaatatcgttggccgcccgatgagcatggaactgctgctggcaggttgcaccactacagtgactcaccgcttcactaaaaatctgcgtcatcacgtagaaaatgccgatctattgatcgttgccgttggcaagccaggctttattcccggtgactggatcaaagaaggcgcaattgtgattgatgtcggcatcaaccgtctggaaaatggcaaagttgtgggcgacgtcgtgtttgaagacgcggctaaacgcgcctcatacattacgcctgttcccggcggcgttggcccgatgacggttgccacgctgattgaaaacacgctacaggcgtgcgttgaatatcatgatccacaggatgagtaa" assert_equal(expected, @obj.ntseq) end def test_ntlen assert_equal(867, @obj.ntlen) end def test_pathway_after_pathways_as_strings str = "eco00630 Glyoxylate and dicarboxylate metabolism eco00670 One carbon pool by folate eco01100 Metabolic pathways" strary = [ "eco00630 Glyoxylate and dicarboxylate metabolism", "eco00670 One carbon pool by folate", "eco01100 Metabolic pathways" ] 2.times { assert_equal(str, @obj.pathway) assert_equal(strary, @obj.pathways_as_strings) } end def test_pathway_before_pathways_as_strings str = "eco00630 Glyoxylate and dicarboxylate metabolism eco00670 One carbon pool by folate eco01100 Metabolic pathways" strary = [ "eco00630 Glyoxylate and dicarboxylate metabolism", "eco00670 One carbon pool by folate", "eco01100 Metabolic pathways" ] 2.times { assert_equal(strary, @obj.pathways_as_strings) assert_equal(str, @obj.pathway) } end def test_keggclass expected = "Metabolism; Carbohydrate Metabolism; Glyoxylate and dicarboxylate metabolism [PATH:eco00630] Metabolism; Metabolism of Cofactors and Vitamins; One carbon pool by folate [PATH:eco00670]" assert_equal(expected, @obj.keggclass) end def test_keggclasses expected = [ "Metabolism; Carbohydrate Metabolism; Glyoxylate and dicarboxylate metabolism", "Metabolism; Metabolism of Cofactors and Vitamins; One carbon pool by folate" ] assert_equal(expected, @obj.keggclasses) end end #class TestBioKEGGGENES_b0529 end #module Bio bio-1.4.3.0001/test/unit/bio/db/kegg/test_kgml.rb0000644000004100000410000006401212200110570021215 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_kgml.rb - Unit test for Bio::KEGG::KGML # # Copyright (C) 2012 Naohisa Goto # License:: The Ruby License # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/kgml' module Bio; module TestKeggKGML filename = File.join(BioRubyTestDataPath, 'KEGG', 'test.kgml') KGMLTestXMLstr = File.read(filename).freeze class TestKGMLPathway < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr) end def test_name assert_equal 'path:xxx09876', @obj.name end def test_org assert_equal 'xxx', @obj.org end def test_number assert_equal '09876', @obj.number end def test_title assert_equal 'This is test title', @obj.title end def test_image assert_equal 'http://example.com/pathway/ec/09876.png', @obj.image end def test_link assert_equal 'http://example.com/show_pathway?xxx09876', @obj.link end def test_entries__size assert_equal 3, @obj.entries.size end def test_relations__size assert_equal 1, @obj.relations.size end def test_reactions__size assert_equal 1, @obj.reactions.size end def test_entries=() a = [ nil, nil, nil ] b = [ nil, nil, nil, nil ] assert_equal a, (@obj.entries = a) assert_equal a, @obj.entries assert_equal b, (@obj.entries = b) assert_equal b, @obj.entries end def test_relations=() a = [ nil, nil, nil ] b = [ nil, nil, nil, nil ] assert_equal a, (@obj.relations = a) assert_equal a, @obj.relations assert_equal b, (@obj.relations = b) assert_equal b, @obj.relations end def test_reactions=() a = [ nil, nil, nil ] b = [ nil, nil, nil, nil ] assert_equal a, (@obj.reactions = a) assert_equal a, @obj.reactions assert_equal b, (@obj.reactions = b) assert_equal b, @obj.reactions end end #class TestKGMLPathway class TestKGMLEntrySetter < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Entry.new end def test_id=() assert_nil @obj.id assert_equal 1234, (@obj.id = 1234) assert_equal 1234, @obj.id assert_equal 4567, (@obj.id = 4567) assert_equal 4567, @obj.id end def test_name=() assert_nil @obj.name assert_equal 'cpd:C99999', (@obj.name = 'cpd:C99999') assert_equal 'cpd:C99999', @obj.name assert_equal 'cpd:C98765', (@obj.name = 'cpd:C98765') assert_equal 'cpd:C98765', @obj.name end def test_type=() assert_equal 'compound', (@obj.type = 'compound') assert_equal 'compound', @obj.type assert_equal 'enzyme', (@obj.type = 'enzyme') assert_equal 'enzyme', @obj.type end def test_link=() str1 = 'http://example.com/dbget-bin/www_bget?C99999'.freeze str2 = 'http://example.com/dbget-bin/www_bget?C98765'.freeze assert_equal str1, (@obj.link = str1) assert_equal str1, @obj.link assert_equal str2, (@obj.link = str2) assert_equal str2, @obj.link end def test_reaction=() assert_equal "rn:R99999", (@obj.reaction = 'rn:R99999') assert_equal "rn:R99999", @obj.reaction assert_equal "rn:R98765", (@obj.reaction = 'rn:R98765') assert_equal "rn:R98765", @obj.reaction end def test_graphics=() a = [ nil, nil ] b = [ nil, nil, nil ] assert_equal a, (@obj.graphics = a) assert_equal a, @obj.graphics assert_equal b, (@obj.graphics = b) assert_equal b, @obj.graphics end def test_components=() a = [ nil, nil ] b = [ nil, nil, nil ] assert_equal a, (@obj.components = a) assert_equal a, @obj.components assert_equal b, (@obj.components = b) assert_equal b, @obj.components end end #class TestKGMLEntrySetter # for deprecated methods/attributes class TestKGMLEntrySetterDeprecated < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Entry.new end def test_entry_id=() assert_nil @obj.entry_id assert_equal 1234, (@obj.entry_id = 1234) assert_equal 1234, @obj.entry_id assert_equal 4567, (@obj.entry_id = 4567) assert_equal 4567, @obj.entry_id assert_equal 4567, @obj.id @obj.id = 7890 assert_equal 7890, @obj.entry_id end def test_category=() assert_nil @obj.category assert_equal 'compound', (@obj.category = 'compound') assert_equal 'compound', @obj.category assert_equal 'enzyme', (@obj.category = 'enzyme') assert_equal 'enzyme', @obj.category assert_equal 'enzyme', @obj.type @obj.type = 'gene' assert_equal 'gene', @obj.category end def test_pathway=() assert_nil @obj.pathway assert_equal 'deprecated', (@obj.pathway = 'deprecated') assert_equal 'deprecated', @obj.pathway assert_equal "don't use", (@obj.pathway = "don't use") assert_equal "don't use", @obj.pathway end def test_label=() assert_nil @obj.label assert_equal 'deprecated', (@obj.label = 'deprecated') assert_equal 'deprecated', @obj.label assert_equal "don't use", (@obj.label = "don't use") assert_equal "don't use", @obj.label assert_equal "don't use", @obj.graphics[-1].name @obj.graphics[-1].name = 'test' assert_equal 'test', @obj.label end def test_shape=() assert_nil @obj.shape assert_equal 'deprecated', (@obj.shape = 'deprecated') assert_equal 'deprecated', @obj.shape assert_equal "don't use", (@obj.shape = "don't use") assert_equal "don't use", @obj.shape assert_equal "don't use", @obj.graphics[-1].type @obj.graphics[-1].type = 'test' assert_equal 'test', @obj.shape end def test_x=() assert_equal 123, (@obj.x = 123) assert_equal 123, @obj.x assert_equal 456, (@obj.x = 456) assert_equal 456, @obj.x assert_equal 456, @obj.graphics[-1].x @obj.graphics[-1].x = 789 assert_equal 789, @obj.x end def test_y=() assert_equal 123, (@obj.y = 123) assert_equal 123, @obj.y assert_equal 456, (@obj.y = 456) assert_equal 456, @obj.y assert_equal 456, @obj.graphics[-1].y @obj.graphics[-1].y = 789 assert_equal 789, @obj.y end def test_width=() assert_equal 123, (@obj.width = 123) assert_equal 123, @obj.width assert_equal 456, (@obj.width = 456) assert_equal 456, @obj.width assert_equal 456, @obj.graphics[-1].width @obj.graphics[-1].width = 789 assert_equal 789, @obj.width end def test_height=() assert_equal 123, (@obj.height = 123) assert_equal 123, @obj.height assert_equal 456, (@obj.height = 456) assert_equal 456, @obj.height assert_equal 456, @obj.graphics[-1].height @obj.graphics[-1].height = 789 assert_equal 789, @obj.height end def test_fgcolor=() assert_equal "#E0E0E0", (@obj.fgcolor = "#E0E0E0") assert_equal "#E0E0E0", @obj.fgcolor assert_equal "#FFFFFF", (@obj.fgcolor = "#FFFFFF") assert_equal "#FFFFFF", @obj.fgcolor assert_equal "#FFFFFF", @obj.graphics[-1].fgcolor @obj.graphics[-1].fgcolor = "#99CCFF" assert_equal "#99CCFF", @obj.fgcolor end def test_bgcolor=() assert_equal "#E0E0E0", (@obj.bgcolor = "#E0E0E0") assert_equal "#E0E0E0", @obj.bgcolor assert_equal "#FFFFFF", (@obj.bgcolor = "#FFFFFF") assert_equal "#FFFFFF", @obj.bgcolor assert_equal "#FFFFFF", @obj.graphics[-1].bgcolor @obj.graphics[-1].bgcolor = "#99CCFF" assert_equal "#99CCFF", @obj.bgcolor end end #class TestKGMLEntrySetterDeprecated class TestKGMLEntry1234 < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).entries[0] end def test_id assert_equal 1234, @obj.id end def test_name assert_equal 'cpd:C99999', @obj.name end def test_type assert_equal 'compound', @obj.type end def test_link assert_equal 'http://example.com/dbget-bin/www_bget?C99999', @obj.link end def test_reaction assert_equal nil, @obj.reaction end def test_graphics__size assert_equal 1, @obj.graphics.size end def test_components assert_equal nil, @obj.components end end #class TestKGMLEntry1234 class TestKGMLEntry1 < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).entries[1] end def test_id assert_equal 1, @obj.id end def test_name assert_equal 'ec:1.1.1.1', @obj.name end def test_type assert_equal 'enzyme', @obj.type end def test_link assert_equal 'http://example.com/dbget-bin/www_bget?1.1.1.1', @obj.link end def test_reaction assert_equal 'rn:R99999', @obj.reaction end def test_graphics__size assert_equal 2, @obj.graphics.size end def test_components assert_equal nil, @obj.components end end #class TestKGMLEntry1 # for deprecated methods/attributes class TestKGMLEntry1Deprecated < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).entries[1] end def test_entry_id assert_equal 1, @obj.entry_id end def test_category assert_equal 'enzyme', @obj.category end def test_label=() assert_equal '1.1.1.1', @obj.label assert_equal '1.2.3.4', (@obj.label = '1.2.3.4') assert_equal '1.2.3.4', @obj.label assert_equal '1.2.3.4', @obj.graphics[-1].name assert_equal '9.8.7.6', (@obj.graphics[-1].name = '9.8.7.6') assert_equal '9.8.7.6', @obj.label # check if it doesn't modify graphics[0] assert_equal '1.1.1.1', @obj.graphics[0].name end def test_shape=() assert_equal 'line', @obj.shape assert_equal 'circle', (@obj.shape = 'circle') assert_equal 'circle', @obj.shape assert_equal 'circle', @obj.graphics[-1].type assert_equal 'rectangle', (@obj.graphics[-1].type = 'rectangle') assert_equal 'rectangle', @obj.shape # check if it doesn't modify graphics[0] assert_equal 'line', @obj.graphics[0].type end def test_x=() assert_equal 0, @obj.x assert_equal 123, (@obj.x = 123) assert_equal 123, @obj.x assert_equal 456, (@obj.x = 456) assert_equal 456, @obj.x assert_equal 456, @obj.graphics[-1].x @obj.graphics[-1].x = 789 assert_equal 789, @obj.x # check if it doesn't modify graphics[0] assert_equal 0, @obj.graphics[0].x end def test_y=() assert_equal 0, @obj.y assert_equal 123, (@obj.y = 123) assert_equal 123, @obj.y assert_equal 456, (@obj.y = 456) assert_equal 456, @obj.y assert_equal 456, @obj.graphics[-1].y @obj.graphics[-1].y = 789 assert_equal 789, @obj.y # check if it doesn't modify graphics[0] assert_equal 0, @obj.graphics[0].y end def test_width=() assert_equal 0, @obj.width assert_equal 123, (@obj.width = 123) assert_equal 123, @obj.width assert_equal 456, (@obj.width = 456) assert_equal 456, @obj.width assert_equal 456, @obj.graphics[-1].width @obj.graphics[-1].width = 789 assert_equal 789, @obj.width # check if it doesn't modify graphics[0] assert_equal 0, @obj.graphics[0].width end def test_height=() assert_equal 0, @obj.height assert_equal 123, (@obj.height = 123) assert_equal 123, @obj.height assert_equal 456, (@obj.height = 456) assert_equal 456, @obj.height assert_equal 456, @obj.graphics[-1].height @obj.graphics[-1].height = 789 assert_equal 789, @obj.height # check if it doesn't modify graphics[0] assert_equal 0, @obj.graphics[0].height end def test_fgcolor=() assert_equal '#FF99CC', @obj.fgcolor assert_equal "#E0E0E0", (@obj.fgcolor = "#E0E0E0") assert_equal "#E0E0E0", @obj.fgcolor assert_equal "#E0E0E0", @obj.graphics[-1].fgcolor @obj.graphics[-1].fgcolor = "#C0C0C0" assert_equal "#C0C0C0", @obj.fgcolor # check if it doesn't modify graphics[0] assert_equal "#99CCFF", @obj.graphics[0].fgcolor end def test_bgcolor=() assert_equal "#CC99FF", @obj.bgcolor assert_equal "#E0E0E0", (@obj.bgcolor = "#E0E0E0") assert_equal "#E0E0E0", @obj.bgcolor assert_equal "#E0E0E0", @obj.graphics[-1].bgcolor @obj.graphics[-1].bgcolor = "#C0C0C0" assert_equal "#C0C0C0", @obj.bgcolor # check if it doesn't modify graphics[0] assert_equal "#FFFFFF", @obj.graphics[0].bgcolor end end #class TestKGMLEntry1Deprecated class TestKGMLEntry567 < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).entries[2] end def test_id assert_equal 567, @obj.id end def test_name assert_equal 'undefined', @obj.name end def test_type assert_equal 'group', @obj.type end def test_link assert_equal nil, @obj.link end def test_reaction assert_equal nil, @obj.reaction end def test_graphics__size assert_equal 1, @obj.graphics.size end def test_components assert_equal [ 34, 56, 78, 90 ], @obj.components end end #class TestKGMLEntry567 class TestKGMLGraphicsSetter < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Graphics.new end def test_name=() assert_equal '1.1.1.1', (@obj.name = '1.1.1.1') assert_equal '1.1.1.1', @obj.name assert_equal 'C99999', (@obj.name = 'C99999') assert_equal 'C99999', @obj.name end def test_type=() assert_equal 'line', (@obj.type = 'line') assert_equal 'line', @obj.type assert_equal 'circle', (@obj.type = 'circle') assert_equal 'circle', @obj.type end def test_x=() assert_equal 123, (@obj.x = 123) assert_equal 123, @obj.x assert_equal 456, (@obj.x = 456) assert_equal 456, @obj.x end def test_y=() assert_equal 123, (@obj.y = 123) assert_equal 123, @obj.y assert_equal 456, (@obj.y = 456) assert_equal 456, @obj.y end def test_width=() assert_equal 123, (@obj.width = 123) assert_equal 123, @obj.width assert_equal 456, (@obj.width = 456) assert_equal 456, @obj.width end def test_height=() assert_equal 123, (@obj.height = 123) assert_equal 123, @obj.height assert_equal 456, (@obj.height = 456) assert_equal 456, @obj.height end def test_fgcolor=() assert_equal "#E0E0E0", (@obj.fgcolor = "#E0E0E0") assert_equal "#E0E0E0", @obj.fgcolor assert_equal "#FFFFFF", (@obj.fgcolor = "#FFFFFF") assert_equal "#FFFFFF", @obj.fgcolor end def test_bgcolor=() assert_equal "#E0E0E0", (@obj.bgcolor = "#E0E0E0") assert_equal "#E0E0E0", @obj.bgcolor assert_equal "#FFFFFF", (@obj.bgcolor = "#FFFFFF") assert_equal "#FFFFFF", @obj.bgcolor end def test_coords=() a = [[1, 2], [3, 4]] b = [[5, 6], [7, 8], [9, 10]] assert_equal a, (@obj.coords = a) assert_equal a, @obj.coords assert_equal b, (@obj.coords = b) assert_equal b, @obj.coords end end #class TestKGMLGraphicsSetter class TestKGMLGraphics1234 < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).entries[0].graphics[0] end def test_name assert_equal 'C99999', @obj.name end def test_type assert_equal 'circle', @obj.type end def test_fgcolor assert_equal "#E0E0E0", @obj.fgcolor end def test_bgcolor assert_equal "#D0E0F0", @obj.bgcolor end def test_x assert_equal 1314, @obj.x end def test_y assert_equal 1008, @obj.y end def test_width assert_equal 14, @obj.width end def test_height assert_equal 28, @obj.height end def test_coords assert_equal nil, @obj.coords end end #class TestKGMLGraphics1234 class TestKGMLGraphics1_0 < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).entries[1].graphics[0] end def test_name assert_equal '1.1.1.1', @obj.name end def test_type assert_equal 'line', @obj.type end def test_fgcolor assert_equal "#99CCFF", @obj.fgcolor end def test_bgcolor assert_equal "#FFFFFF", @obj.bgcolor end def test_x assert_equal 0, @obj.x end def test_y assert_equal 0, @obj.y end def test_width assert_equal 0, @obj.width end def test_height assert_equal 0, @obj.height end def test_coords assert_equal [[314,159], [265,358], [979,323]], @obj.coords end end #class TestKGMLGraphics1_0 class TestKGMLRelationSetter < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Relation.new end def test_entry1=() assert_nil @obj.entry1 assert_equal 123, (@obj.entry1 = 123) assert_equal 123, @obj.entry1 assert_equal 456, (@obj.entry1 = 456) assert_equal 456, @obj.entry1 end def test_entry2=() assert_nil @obj.entry2 assert_equal 123, (@obj.entry2 = 123) assert_equal 123, @obj.entry2 assert_equal 456, (@obj.entry2 = 456) assert_equal 456, @obj.entry2 end def test_type=() assert_nil @obj.type assert_equal "ECrel", (@obj.type = "ECrel") assert_equal "ECrel", @obj.type assert_equal "maplink", (@obj.type = "maplink") assert_equal "maplink", @obj.type end def test_name=() assert_nil @obj.name assert_equal "hidden compound", (@obj.name = "hidden compound") assert_equal "hidden compound", @obj.name assert_equal "indirect effect", (@obj.name = "indirect effect") assert_equal "indirect effect", @obj.name end def test_value=() assert_nil @obj.value assert_equal "123", (@obj.value = "123") assert_equal "123", @obj.value assert_equal "-->", (@obj.value = "-->") assert_equal "-->", @obj.value end end #class TestKGMLRelationSetter # for deprecated methods/attributes class TestKGMLRelationDeprecated < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Relation.new end def test_node1=() assert_nil @obj.node1 assert_equal 123, (@obj.node1 = 123) assert_equal 123, @obj.node1 assert_equal 456, (@obj.node1 = 456) assert_equal 456, @obj.node1 assert_equal 456, @obj.entry1 @obj.entry1 = 789 assert_equal 789, @obj.node1 end def test_node2=() assert_nil @obj.node2 assert_equal 123, (@obj.node2 = 123) assert_equal 123, @obj.node2 assert_equal 456, (@obj.node2 = 456) assert_equal 456, @obj.node2 assert_equal 456, @obj.entry2 @obj.entry2 = 789 assert_equal 789, @obj.node2 end def test_rel=() assert_nil @obj.rel assert_equal "ECrel", (@obj.rel = "ECrel") assert_equal "ECrel", @obj.rel assert_equal "maplink", (@obj.rel = "maplink") assert_equal "maplink", @obj.rel assert_equal "maplink", @obj.type @obj.type = "PCrel" assert_equal "PCrel", @obj.rel end def test_edge @obj.value = "123" assert_equal 123, @obj.edge end end #class TestKGMLRelationDeprecated class TestKGMLRelation < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).relations[0] end def test_entry1 assert_equal 109, @obj.entry1 end def test_entry2 assert_equal 87, @obj.entry2 end def test_type assert_equal "ECrel", @obj.type end def test_name assert_equal "compound", @obj.name end def test_value assert_equal "100", @obj.value end end #class TestKGMLRelation class TestKGMLReactionSetter < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Reaction.new end def test_id=() assert_nil @obj.id assert_equal 1234, (@obj.id = 1234) assert_equal 1234, @obj.id assert_equal 4567, (@obj.id = 4567) assert_equal 4567, @obj.id end def test_name=() assert_nil @obj.name assert_equal 'rn:R99999 rn:R99998', (@obj.name = 'rn:R99999 rn:R99998') assert_equal 'rn:R99999 rn:R99998', @obj.name assert_equal 'rn:R98765 rn:R98764', (@obj.name = 'rn:R98765 rn:R98764') assert_equal 'rn:R98765 rn:R98764', @obj.name end def test_type=() assert_nil @obj.type assert_equal 'reversible', (@obj.type = 'reversible') assert_equal 'reversible', @obj.type assert_equal 'irreversible', (@obj.type = 'irreversible') assert_equal 'irreversible', @obj.type end def test_substraces=() assert_nil @obj.substrates a = [ nil, nil ] b = [ nil, nil, nil ] assert_equal a, (@obj.substrates = a) assert_equal a, @obj.substrates assert_equal b, (@obj.substrates = b) assert_equal b, @obj.substrates end def test_products=() assert_nil @obj.products a = [ nil, nil ] b = [ nil, nil, nil ] assert_equal a, (@obj.products = a) assert_equal a, @obj.products assert_equal b, (@obj.products = b) assert_equal b, @obj.products end # TODO: add tests for alt end #class TestKGMLReactionSetter class TestKGMLReactionSetterDeprecated < Test::Unit::TestCase def setup @obj = Bio::KEGG::KGML::Reaction.new end def test_entry_id=() assert_nil @obj.entry_id assert_equal "rn:R99999 rn:R99998", (@obj.entry_id = "rn:R99999 rn:R99998") assert_equal "rn:R99999 rn:R99998", @obj.entry_id assert_equal "rn:R99990 rn:R99991", (@obj.entry_id = "rn:R99990 rn:R99991") assert_equal "rn:R99990 rn:R99991", @obj.entry_id assert_equal "rn:R99990 rn:R99991", @obj.name @obj.name = "rn:R98765 rn:R98766" assert_equal "rn:R98765 rn:R98766", @obj.entry_id end def test_direction=() assert_nil @obj.direction assert_equal 'reversible', (@obj.direction = 'reversible') assert_equal 'reversible', @obj.direction assert_equal 'irreversible', (@obj.direction = 'irreversible') assert_equal 'irreversible', @obj.direction assert_equal 'irreversible', @obj.type @obj.type = 'this is test' assert_equal 'this is test', @obj.direction end end #class TestKGMLReactionSetterDreprecated class TestKGMLReaction < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj = Bio::KEGG::KGML.new(xmlstr).reactions[0] end def test_id assert_equal 3, @obj.id end def test_name assert_equal "rn:R99999 rn:R99998", @obj.name end def test_type assert_equal "reversible", @obj.type end def test_substrates assert_equal [ "cpd:C99990", "cpd:C99991" ], @obj.substrates.collect { |x| x.name } assert_equal [ 3330, 3331 ], @obj.substrates.collect { |x| x.id } end def test_products assert_equal [ "cpd:C99902", "cpd:C99903" ], @obj.products.collect { |x| x.name } assert_equal [ 3332, 3333 ], @obj.products.collect { |x| x.id } end end #class TestKGMLReaction class TestKGMLSubstrate < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj0 = Bio::KEGG::KGML.new(xmlstr).reactions[0].substrates[0] @obj1 = Bio::KEGG::KGML.new(xmlstr).reactions[0].substrates[1] end def test_id assert_equal 3330, @obj0.id assert_equal 3331, @obj1.id end def test_name assert_equal 'cpd:C99990', @obj0.name assert_equal 'cpd:C99991', @obj1.name end end #class TestKGMLSubstrate class TestKGMLProduct < Test::Unit::TestCase def setup xmlstr = KGMLTestXMLstr @obj0 = Bio::KEGG::KGML.new(xmlstr).reactions[0].products[0] @obj1 = Bio::KEGG::KGML.new(xmlstr).reactions[0].products[1] end def test_id assert_equal 3332, @obj0.id assert_equal 3333, @obj1.id end def test_name assert_equal 'cpd:C99902', @obj0.name assert_equal 'cpd:C99903', @obj1.name end end #class TestKGMLProduct module TestKGMLSubstrateProductSetterMethods def test_initialize_0 assert_nil @obj.id assert_nil @obj.name end def test_initialize_1 obj = Bio::KEGG::KGML::SubstrateProduct.new(123) assert_equal 123, obj.id assert_nil obj.name end def test_initialize_2 obj = Bio::KEGG::KGML::SubstrateProduct.new(123, 'test') assert_equal 123, obj.id assert_equal 'test', obj.name end def test_id=() assert_nil @obj.id assert_equal 123, (@obj.id = 123) assert_equal 123, @obj.id assert_equal 456, (@obj.id = 456) assert_equal 456, @obj.id end def test_name=() assert_nil @obj.name assert_equal "cpd:C99990", (@obj.name = "cpd:C99990") assert_equal "cpd:C99990", @obj.name assert_equal "cpd:C99902", (@obj.name = "cpd:C99902") assert_equal "cpd:C99902", @obj.name end end #module TestKGMLSubstrateProductSetterMethods class TestKGMLSubstrateProductSetter < Test::Unit::TestCase include TestKGMLSubstrateProductSetterMethods def setup @obj = Bio::KEGG::KGML::SubstrateProduct.new end end # class TestKGMLSubstrateProductSetter class TestKGMLSubstrateSetter < Test::Unit::TestCase include TestKGMLSubstrateProductSetterMethods def setup @obj = Bio::KEGG::KGML::Substrate.new end end # class TestKGMLSubstrateSetter class TestKGMLProductSetter < Test::Unit::TestCase include TestKGMLSubstrateProductSetterMethods def setup @obj = Bio::KEGG::KGML::Product.new end end # class TestKGMLProductSetter end; end #module TestKeggKGML; #module Bio bio-1.4.3.0001/test/unit/bio/db/kegg/test_glycan.rb0000644000004100000410000001764212200110570021547 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_glycan.rb - Unit test for Bio::KEGG::GLYCAN # # Copyright:: Copyright (C) 2009 Naohisa Goto # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/glycan' module Bio class TestBioKeggGLYCAN < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG/G00024.glycan') @obj = Bio::KEGG::GLYCAN.new(File.read(filename)) end def test_dblinks_as_hash expected = { "JCGGDB"=>["JCGG-STR025711"], "GlycomeDB"=>["475"], "CCSD" => %w( 98 99 100 2225 2236 2237 2238 2239 2240 2241 2242 2243 3406 5035 5038 5887 14321 18613 25363 27572 28182 29046 29092 29175 29393 29521 29554 30734 30735 30848 30849 30850 30917 32646 33022 33851 33878 33952 34823 34829 34986 34995 35029 35050 35107 35108 35805 35833 35991 36236 36826 36863 37982 38587 38640 38672 42797 43915 44029 44775 45346 46438 46466 47186 48015 48891 49283 49293 50466 50469 50477 ) } assert_equal(expected, @obj.dblinks_as_hash) assert_equal(expected, @obj.dblinks) end def test_pathways_as_hash expected = { "ko01100" => "Metabolic pathways", "ko00512" => "O-Glycan biosynthesis" } assert_equal(expected, @obj.pathways_as_hash) assert_equal(expected, @obj.pathways) end def test_orthologs_as_hash expected = { "K00780" => "beta-galactoside alpha-2,3-sialyltransferase (sialyltransferase 4A) [EC:2.4.99.4]", "K00727" => "beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase [EC:2.4.1.102]", "K03368" => "beta-galactoside alpha-2,3-sialyltransferase (sialyltransferase 4B) [EC:2.4.99.4]", "K00731" => "glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase [EC:2.4.1.122]" } assert_equal(expected, @obj.orthologs_as_hash) assert_equal(expected, @obj.orthologs) end def test_entry_id assert_equal("G00024", @obj.entry_id) end def test_name assert_equal("T antigen", @obj.name) end def test_composition expected = {"Ser/Thr"=>1, "Gal"=>1, "GalNAc"=>1} assert_equal(expected, @obj.composition) end def test_mass assert_equal(365.3, @obj.mass) end def test_keggclass expected = "Glycoprotein; O-Glycan Neoglycoconjugate" assert_equal(expected, @obj.keggclass) end def test_compounds assert_equal([], @obj.compounds) end def test_reactions expected = ["R05908", "R05912", "R05913", "R06140"] assert_equal(expected, @obj.reactions) end def test_pathways_as_strings expected = [ "PATH: ko00512 O-Glycan biosynthesis", "PATH: ko01100 Metabolic pathways" ] assert_equal(expected, @obj.pathways_as_strings) end def test_enzymes expected = ["2.4.1.102", "2.4.1.122", "2.4.99.4", "3.2.1.97"] assert_equal(expected, @obj.enzymes) end def test_orthologs_as_strings expected = [ "KO: K00727 beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase [EC:2.4.1.102]", "KO: K00731 glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase [EC:2.4.1.122]", "KO: K00780 beta-galactoside alpha-2,3-sialyltransferase (sialyltransferase 4A) [EC:2.4.99.4]", "KO: K03368 beta-galactoside alpha-2,3-sialyltransferase (sialyltransferase 4B) [EC:2.4.99.4]" ] assert_equal(expected, @obj.orthologs_as_strings) end def test_comment assert_equal("", @obj.comment) end def test_remark assert_equal("Same as: C04750 C04776", @obj.remark) end def test_references expected = [ "1 [PMID:12950230] Backstrom M, Link T, Olson FJ, Karlsson H, Graham R, Picco G, Burchell J, Taylor-Papadimitriou J, Noll T, Hansson GC. Recombinant MUC1 mucin with a breast cancer-like O-glycosylation produced in large amounts in Chinese-hamster ovary cells. Biochem. J. 376 (2003) 677-86.", "2 [PMID:14631106] Wu AM. Carbohydrate structural units in glycoproteins and polysaccharides as important ligands for Gal and GalNAc reactive lectins. J. Biomed. Sci. 10 (2003) 676-88." ] assert_equal(expected, @obj.references) end def test_dblinks_as_strings expected = [ "CCSD: 98 99 100 2225 2236 2237 2238 2239 2240 2241 2242 2243 3406 5035 5038 5887 14321 18613 25363 27572 28182 29046 29092 29175 29393 29521 29554 30734 30735 30848 30849 30850 30917 32646 33022 33851 33878 33952 34823 34829 34986 34995 35029 35050 35107 35108 35805 35833 35991 36236 36826 36863 37982 38587 38640 38672 42797 43915 44029 44775 45346 46438 46466 47186 48015 48891 49283 49293 50466 50469 50477", "GlycomeDB: 475", "JCGGDB: JCGG-STR025711" ] assert_equal(expected, @obj.dblinks_as_strings) end def test_kcf expected = <["JCGG-STR026574"], "GlycomeDB"=>["5567"], "CCSD"=>["2549", "2550", "16559", "25204"] } assert_equal(expected, @obj.dblinks_as_hash) assert_equal(expected, @obj.dblinks) end def test_pathways_as_hash assert_equal({}, @obj.pathways_as_hash) assert_equal({}, @obj.pathways) end def test_orthologs_as_hash assert_equal({}, @obj.orthologs_as_hash) assert_equal({}, @obj.orthologs) end def test_entry_id assert_equal("G01366", @obj.entry_id) end def test_name assert_equal("", @obj.name) end def test_composition expected = {"GlcNAc"=>1, "4dlyxHex"=>1, "Man"=>2} assert_equal(expected, @obj.composition) end def test_mass assert_equal(691.6, @obj.mass) end def test_keggclass expected = "Glycoprotein; N-Glycan" assert_equal(expected, @obj.keggclass) end def test_compounds assert_equal([], @obj.compounds) end def test_reactions assert_equal([], @obj.reactions) end def test_pathways_as_strings assert_equal([], @obj.pathways_as_strings) end def test_enzymes assert_equal([], @obj.enzymes) end def test_orthologs_as_strings assert_equal([], @obj.orthologs_as_strings) end def test_comment expected = "synthetic (CCSD:2549)" assert_equal(expected, @obj.comment) end def test_remark assert_equal("", @obj.remark) end def test_references assert_equal([], @obj.references) end def test_dblinks_as_strings expected = [ "CCSD: 2549 2550 16559 25204", "GlycomeDB: 5567", "JCGGDB: JCGG-STR026574" ] assert_equal(expected, @obj.dblinks_as_strings) end def test_kcf expected = < # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/genome' module Bio class TestBioKEGGGENOME_T00005 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG/T00005.genome') @obj = Bio::KEGG::GENOME.new(File.read(filename)) end def test_new assert_instance_of(Bio::KEGG::GENOME, @obj) end def test_entry_id assert_equal("T00005", @obj.entry_id) end def test_name expected = "sce, S.cerevisiae, YEAST, 4932" assert_equal(expected, @obj.name) end def test_definition expected = "Saccharomyces cerevisiae S288C" assert_equal(expected, @obj.definition) end def test_taxonomy expected = { "lineage"=> "Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces", "taxid"=>"TAX:4932" } assert_equal(expected, @obj.taxonomy) end def test_taxid assert_equal("TAX:4932", @obj.taxid) end def test_lineage expected = "Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces" assert_equal(expected, @obj.lineage) end def test_data_source assert_equal("RefSeq", @obj.data_source) end def test_original_db assert_equal("SGD MIPS", @obj.original_db) end def test_original_databases assert_equal([ "SGD", "MIPS" ], @obj.original_databases) end def test_disease assert_equal("", @obj.disease) end def test_comment assert_equal("", @obj.comment) end def test_references data = [ { "authors" => ["Goffeau A", "et al."], "comments" => nil, "journal" => "Science", "pages" => "546-67", "pubmed" => "8849441", "title" => "Life with 6000 genes.", "volume" => "274", "year" => "1996", }, { "authors" => ["Bussey H", "et al."], "comments" => ["(chromosome I)"], "journal" => "Proc Natl Acad Sci U S A", "pages" => "3809-13", "pubmed" => "7731988", "title" => "The nucleotide sequence of chromosome I from Saccharomyces cerevisiae.", "volume" => "92", "year" => "1995", }, { "authors" => ["Feldmann, H.", "et al."], "comments" => ["(chromosome II)"], "journal" => "EMBO J", "pages" => "5795-809", "pubmed" => "7813418", "title" => "Complete DNA sequence of yeast chromosome II.", "volume" => "13", "year" => "1994", }, { "authors" => ["Oliver, S.G.", "et al."], "comments" => ["(chromosome III)"], "journal" => "Nature", "pages" => "38-46", "pubmed" => "1574125", "title" => "The complete DNA sequence of yeast chromosome III.", "volume" => "357", "year" => "1992", }, { "authors" => ["Jacq C", "et al."], "comments" => ["(chromosome IV)"], "journal" => "Nature", "pages" => "75-8", "pubmed" => "9169867", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome IV.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Dietrich FS", "et al."], "comments" => ["(chromosome V)"], "journal" => "Nature", "pages" => "78-81", "pubmed" => "9169868", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome V.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Murakami, Y.", "et al."], "comments" => ["(chromosome VI)"], "journal" => "Nat Genet", "pages" => "261-8", "pubmed" => "7670463", "title" => "Analysis of the nucleotide sequence of chromosome VI from Saccharomyces cerevisiae.", "volume" => "10", "year" => "1995", }, { "authors" => ["Tettelin H", "et al."], "comments" => ["(chromosome VII)"], "journal" => "Nature", "pages" => "81-4", "pubmed" => "9169869", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome VII.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Johnston, M.", "et al."], "comments" => ["(chromosome VIII)"], "journal" => "Science", "pages" => "2077-82", "pubmed" => "8091229", "title" => "Complete nucleotide sequence of Saccharomyces cerevisiae chromosome VIII.", "volume" => "265", "year" => "1994", }, { "authors" => ["Churcher C", "et al."], "comments" => ["(chromosome IX)"], "journal" => "Nature", "pages" => "84-7", "pubmed" => "9169870", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome IX.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Galibert, F.", "et al."], "comments" => ["(chromosome X)"], "journal" => "EMBO J", "pages" => "2031-49", "pubmed" => "8641269", "title" => "Complete nucleotide sequence of Saccharomyces cerevisiae chromosome X.", "volume" => "15", "year" => "1996", }, { "authors" => ["Dujon, B.", "et al."], "comments" => ["(chromosome XI)"], "journal" => "Nature", "pages" => "371-8", "pubmed" => "8196765", "title" => "Complete DNA sequence of yeast chromosome XI.", "volume" => "369", "year" => "1994", }, { "authors" => ["Johnston M", "et al."], "comments" => ["(chromosome XII)"], "journal" => "Nature", "pages" => "87-90", "pubmed" => "9169871", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome XII.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Bowman S", "et al."], "comments" => ["(chromosome XIII)"], "journal" => "Nature", "pages" => "90-3", "pubmed" => "9169872", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome XIII.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Philippsen P", "et al."], "comments" => ["(chromosome XIV)"], "journal" => "Nature", "pages" => "93-8", "pubmed" => "9169873", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome XIV and its evolutionary implications.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Dujon B", "et al."], "comments" => ["(chromosome XV)"], "journal" => "Nature", "pages" => "98-102", "pubmed" => "9169874", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome XV.", "volume" => "387(6632 Suppl)", "year" => "1997", }, { "authors" => ["Bussey H", "et al."], "comments" => ["(chromosome XVI)"], "journal" => "Nature", "pages" => "103-5", "pubmed" => "9169875", "title" => "The nucleotide sequence of Saccharomyces cerevisiae chromosome XVI.", "volume" => "387(6632 Suppl)", "year" => "1997", } ] expected = data.collect { |h| Bio::Reference.new(h) } #assert_equal(expected, @obj.references) expected.each_with_index do |x, i| assert_equal(x, @obj.references[i]) end end def test_chromosomes expected = [{"SEQUENCE"=>"RS:NC_001133", "LENGTH"=>"230208", "CHROMOSOME"=>"I"}, {"SEQUENCE"=>"RS:NC_001134", "LENGTH"=>"813178", "CHROMOSOME"=>"II"}, {"SEQUENCE"=>"RS:NC_001135", "LENGTH"=>"316617", "CHROMOSOME"=>"III"}, {"SEQUENCE"=>"RS:NC_001136", "LENGTH"=>"1531919", "CHROMOSOME"=>"IV"}, {"SEQUENCE"=>"RS:NC_001137", "LENGTH"=>"576869", "CHROMOSOME"=>"V"}, {"SEQUENCE"=>"RS:NC_001138", "LENGTH"=>"270148", "CHROMOSOME"=>"VI"}, {"SEQUENCE"=>"RS:NC_001139", "LENGTH"=>"1090947", "CHROMOSOME"=>"VII"}, {"SEQUENCE"=>"RS:NC_001140", "LENGTH"=>"562643", "CHROMOSOME"=>"VIII"}, {"SEQUENCE"=>"RS:NC_001141", "LENGTH"=>"439885", "CHROMOSOME"=>"IX"}, {"SEQUENCE"=>"RS:NC_001142", "LENGTH"=>"745741", "CHROMOSOME"=>"X"}, {"SEQUENCE"=>"RS:NC_001143", "LENGTH"=>"666454", "CHROMOSOME"=>"XI"}, {"SEQUENCE"=>"RS:NC_001144", "LENGTH"=>"1078175", "CHROMOSOME"=>"XII"}, {"SEQUENCE"=>"RS:NC_001145", "LENGTH"=>"924429", "CHROMOSOME"=>"XIII"}, {"SEQUENCE"=>"RS:NC_001146", "LENGTH"=>"784333", "CHROMOSOME"=>"XIV"}, {"SEQUENCE"=>"RS:NC_001147", "LENGTH"=>"1091289", "CHROMOSOME"=>"XV"}, {"SEQUENCE"=>"RS:NC_001148", "LENGTH"=>"948062", "CHROMOSOME"=>"XVI"}, {"SEQUENCE"=>"RS:NC_001224", "LENGTH"=>"85779", "CHROMOSOME"=>"MT (mitochondrion); Circular"} ] assert_equal(expected, @obj.chromosomes) end def test_plasmids assert_equal([], @obj.plasmids) end def test_statistics expected = {"num_rna"=>414, "num_nuc"=>12156676, "num_gene"=>5881} assert_equal(expected, @obj.statistics) end def test_nalen assert_equal(12156676, @obj.nalen) end def test_num_gene assert_equal(5881, @obj.num_gene) end def test_num_rna assert_equal(414, @obj.num_rna) end end #class TestBioKEGGGENOME_T00005 class TestBioKEGGGENOME_T00070 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG/T00070.genome') @obj = Bio::KEGG::GENOME.new(File.read(filename)) end def test_new assert_instance_of(Bio::KEGG::GENOME, @obj) end def test_entry_id assert_equal("T00070", @obj.entry_id) end def test_name expected = "atu, A.tumefaciens, AGRT5, 176299" assert_equal(expected, @obj.name) end def test_definition expected = "Agrobacterium tumefaciens C58" assert_equal(expected, @obj.definition) end def test_taxonomy expected = { "lineage"=> "Bacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales; Rhizobiaceae; Rhizobium/Agrobacterium group; Agrobacterium", "taxid" => "TAX:176299" } assert_equal(expected, @obj.taxonomy) end def test_taxid assert_equal("TAX:176299", @obj.taxid) end def test_lineage expected = "Bacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales; Rhizobiaceae; Rhizobium/Agrobacterium group; Agrobacterium" assert_equal(expected, @obj.lineage) end def test_data_source assert_equal("RefSeq", @obj.data_source) end def test_original_db assert_equal("UWash", @obj.original_db) end def test_original_databases assert_equal([ "UWash" ], @obj.original_databases) end def test_disease expected = "Crown gall disease in plants" assert_equal(expected, @obj.disease) end def test_comment expected = "Originally called Agrobacterium tumefaciens C58 (U.Washington/Dupont) to distinguish from Agrobacterium tumefaciens C58 (Cereon) [GN:atc]" assert_equal(expected, @obj.comment) end def test_references h = { "authors" => [ "Wood DW", "et al." ], "journal" => "Science", "pages" => "2317-23", "pubmed" => "11743193", "title" => "The genome of the natural genetic engineer Agrobacterium tumefaciens C58.", "volume" => "294", "year" => "2001" } expected = [ Bio::Reference.new(h) ] assert_equal(expected, @obj.references) end def test_chromosomes expected = [ { "SEQUENCE" => "RS:NC_003062", "LENGTH" => "2841580", "CHROMOSOME" => "Circular"}, { "SEQUENCE" => "RS:NC_003063", "LENGTH" => "2075577", "CHROMOSOME" => "L (linear chromosome)"} ] assert_equal(expected, @obj.chromosomes) end def test_plasmids expected = [ { "SEQUENCE" => "RS:NC_003065", "LENGTH" => "214233", "PLASMID" => "Ti; Circular" }, { "SEQUENCE" => "RS:NC_003064", "LENGTH" => "542868", "PLASMID" => "AT; Circular" } ] assert_equal(expected, @obj.plasmids) end def test_statistics expected = {"num_rna"=>74, "num_nuc"=>5674258, "num_gene"=>5355} assert_equal(expected, @obj.statistics) end def test_nalen assert_equal(5674258, @obj.nalen) end def test_num_gene assert_equal(5355, @obj.num_gene) end def test_num_rna assert_equal(74, @obj.num_rna) end end #class TestBioKEGGGENOME_T00070 end #module Bio bio-1.4.3.0001/test/unit/bio/db/kegg/test_compound.rb0000644000004100000410000001755612200110570022122 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_compound.rb - Unit test for Bio::KEGG::COMPOUND # # Copyright:: Copyright (C) 2009 Kozo Nishida # License:: The Ruby License # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/compound' module Bio class TestKeggCompound < Test::Unit::TestCase def setup testdata_kegg = Pathname.new(File.join(BioRubyTestDataPath, 'KEGG')).cleanpath.to_s entry = File.read(File.join(testdata_kegg, "C00025.compound")) @obj = Bio::KEGG::COMPOUND.new(entry) end def test_entry_id assert_equal('C00025', @obj.entry_id) end def test_name assert_equal("L-Glutamate", @obj.name) end def test_names assert_equal(["L-Glutamate", "L-Glutamic acid", "L-Glutaminic acid", "Glutamate"], @obj.names) end def test_formula assert_equal("C5H9NO4", @obj.formula) end def test_mass assert_equal(147.0532, @obj.mass) end def test_remark assert_equal("Same as: D00007", @obj.remark) end def test_reactions assert_equal(["R00021", "R00093", "R00114", "R00239", "R00241", "R00243", "R00245", "R00248", "R00250", "R00251", "R00253", "R00254", "R00256", "R00257", "R00258", "R00259", "R00260", "R00261", "R00262", "R00263", "R00355", "R00372", "R00411", "R00457", "R00494", "R00525", "R00573", "R00575", "R00578", "R00609", "R00667", "R00668", "R00684", "R00694", "R00707", "R00708", "R00734", "R00768", "R00894", "R00895", "R00908", "R00942", "R00986", "R01072", "R01090", "R01155", "R01161", "R01214", "R01231", "R01339", "R01585", "R01586", "R01648", "R01654", "R01684", "R01716", "R01939", "R01956", "R02040", "R02077", "R02199", "R02237", "R02274", "R02282", "R02283", "R02285", "R02287", "R02313", "R02315", "R02433", "R02619", "R02700", "R02772", "R02773", "R02929", "R02930", "R03053", "R03120", "R03189", "R03207", "R03243", "R03248", "R03266", "R03651", "R03905", "R03916", "R03952", "R03970", "R03971", "R04028", "R04029", "R04051", "R04171", "R04173", "R04188", "R04212", "R04217", "R04234", "R04241", "R04269", "R04338", "R04438", "R04463", "R04467", "R04475", "R04529", "R04558", "R04776", "R05052", "R05085", "R05197", "R05207", "R05224", "R05225", "R05507", "R05578", "R05815", "R06423", "R06426", "R06844", "R06977", "R07275", "R07276", "R07277", "R07396", "R07414", "R07419", "R07456", "R07613", "R07643", "R07659", "R08244"], @obj.reactions) end def test_rpairs assert_equal([], @obj.rpairs) end def test_pathways_as_strings assert_equal(["PATH: ko00250 Alanine, aspartate and glutamate metabolism", "PATH: ko00330 Arginine and proline metabolism", "PATH: ko00340 Histidine metabolism", "PATH: ko00471 D-Glutamine and D-glutamate metabolism", "PATH: ko00480 Glutathione metabolism", "PATH: ko00650 Butanoate metabolism", "PATH: ko00660 C5-Branched dibasic acid metabolism", "PATH: ko00860 Porphyrin and chlorophyll metabolism", "PATH: ko00910 Nitrogen metabolism", "PATH: ko00970 Aminoacyl-tRNA biosynthesis", "PATH: map01060 Biosynthesis of plant secondary metabolites", "PATH: ko01064 Biosynthesis of alkaloids derived from ornithine, lysine and nicotinic acid", "PATH: ko01100 Metabolic pathways", "PATH: ko02010 ABC transporters", "PATH: ko04080 Neuroactive ligand-receptor interaction", "PATH: ko04540 Gap junction", "PATH: ko04720 Long-term potentiation", "PATH: ko04730 Long-term depression", "PATH: ko04742 Taste transduction", "PATH: ko05014 Amyotrophic lateral sclerosis (ALS)", "PATH: ko05016 Huntington's disease"], @obj.pathways_as_strings) end def test_enzymes assert_equal(["1.4.1.2", "1.4.1.3", "1.4.1.4", "1.4.1.13", "1.4.1.14", "1.4.3.11", "1.4.7.1", "1.5.1.9", "1.5.1.10", "1.5.1.12", "1.5.99.5", "2.1.1.21", "2.1.2.5", "2.3.1.1", "2.3.1.14", "2.3.1.35", "2.3.2.2", "2.3.2.-", "2.4.2.14", "2.4.2.-", "2.6.1.1", "2.6.1.2", "2.6.1.3", "2.6.1.4", "2.6.1.5", "2.6.1.6", "2.6.1.7", "2.6.1.8", "2.6.1.9", "2.6.1.11", "2.6.1.13", "2.6.1.16", "2.6.1.17", "2.6.1.19", "2.6.1.22", "2.6.1.23", "2.6.1.24", "2.6.1.26", "2.6.1.27", "2.6.1.29", "2.6.1.33", "2.6.1.34", "2.6.1.36", "2.6.1.38", "2.6.1.39", "2.6.1.40", "2.6.1.42", "2.6.1.48", "2.6.1.49", "2.6.1.52", "2.6.1.55", "2.6.1.57", "2.6.1.59", "2.6.1.65", "2.6.1.67", "2.6.1.68", "2.6.1.72", "2.6.1.75", "2.6.1.76", "2.6.1.79", "2.6.1.80", "2.6.1.81", "2.6.1.82", "2.6.1.83", "2.6.1.85", "2.6.1.-", "2.7.2.11", "2.7.2.13", "3.5.1.2", "3.5.1.38", "3.5.1.55", "3.5.1.65", "3.5.1.68", "3.5.1.87", "3.5.1.94", "3.5.1.96", "3.5.2.9", "3.5.3.8", "4.1.1.15", "4.1.3.27", "4.1.3.-", "5.1.1.3", "5.4.99.1", "6.1.1.17", "6.1.1.24", "6.3.1.2", "6.3.1.6", "6.3.1.11", "6.3.1.-", "6.3.2.2", "6.3.2.12", "6.3.2.17", "6.3.2.18", "6.3.4.2", "6.3.4.12", "6.3.5.1", "6.3.5.2", "6.3.5.3", "6.3.5.4", "6.3.5.5", "6.3.5.6", "6.3.5.7", "6.3.5.9", "6.3.5.10"], @obj.enzymes) end def test_dblinks_as_strings assert_equal([ "CAS: 56-86-0", "PubChem: 3327", "ChEBI: 16015", "KNApSAcK: C00001358", "PDB-CCD: GLU", "3DMET: B00007", "NIKKAJI: J9.171E" ], @obj.dblinks_as_strings) end def test_dblinks_as_hash expected = { "CAS" => [ "56-86-0" ], "PubChem" => [ "3327" ], "ChEBI" => [ "16015" ], "KNApSAcK" => [ "C00001358" ], "PDB-CCD" => [ "GLU" ], "3DMET" => [ "B00007" ], "NIKKAJI" => [ "J9.171E" ] } assert_equal(expected, @obj.dblinks_as_hash) assert_equal(expected, @obj.dblinks) end def test_pathways_as_hash expected = { "ko00250" => "Alanine, aspartate and glutamate metabolism", "ko00330" => "Arginine and proline metabolism", "ko00340" => "Histidine metabolism", "ko00471" => "D-Glutamine and D-glutamate metabolism", "ko00480" => "Glutathione metabolism", "ko00650" => "Butanoate metabolism", "ko00660" => "C5-Branched dibasic acid metabolism", "ko00860" => "Porphyrin and chlorophyll metabolism", "ko00910" => "Nitrogen metabolism", "ko00970" => "Aminoacyl-tRNA biosynthesis", "map01060" => "Biosynthesis of plant secondary metabolites", "ko01064" => "Biosynthesis of alkaloids derived from ornithine, lysine and nicotinic acid", "ko01100" => "Metabolic pathways", "ko02010" => "ABC transporters", "ko04080" => "Neuroactive ligand-receptor interaction", "ko04540" => "Gap junction", "ko04720" => "Long-term potentiation", "ko04730" => "Long-term depression", "ko04742" => "Taste transduction", "ko05014" => "Amyotrophic lateral sclerosis (ALS)", "ko05016" => "Huntington's disease" } assert_equal(expected, @obj.pathways_as_hash) assert_equal(expected, @obj.pathways) end def test_kcf assert_equal("ATOM 10 1 C1c C 23.8372 -17.4608 2 C1b C 25.0252 -16.7233 3 C6a C 22.6023 -16.7994 4 N1a N 23.8781 -18.8595 5 C1b C 26.2601 -17.3788 6 O6a O 21.4434 -17.5954 7 O6a O 22.6198 -15.4007 8 C6a C 27.4482 -16.6414 9 O6a O 28.6830 -17.3028 10 O6a O 27.4714 -15.2426 BOND 9 1 1 2 1 2 1 3 1 3 1 4 1 #Down 4 2 5 1 5 3 6 1 6 3 7 2 7 5 8 1 8 8 9 1 9 8 10 2", @obj.kcf) end def test_comment assert_equal('The name "glutamate" also means DL-Glutamate (see [CPD:C00302])', @obj.comment) end end end bio-1.4.3.0001/test/unit/bio/db/kegg/test_orthology.rb0000644000004100000410000001266012200110570022313 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_orthology.rb - Unit test for Bio::KEGG::ORTHOLOGY # # Copyright:: Copyright (C) 2009 Kozo Nishida # License:: The Ruby License # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/reference' require 'bio/db/kegg/orthology' module Bio class TestKeggOrthology < Test::Unit::TestCase def setup testdata_kegg = Pathname.new(File.join(BioRubyTestDataPath, 'KEGG')).cleanpath.to_s entry = File.read(File.join(testdata_kegg, "K02338.orthology")) @obj = Bio::KEGG::ORTHOLOGY.new(entry) end def test_entry_id assert_equal('K02338', @obj.entry_id) end def test_name assert_equal("DPO3B, dnaN", @obj.name) end def test_names assert_equal(["DPO3B", "dnaN"], @obj.names) end def test_definition assert_equal("DNA polymerase III subunit beta [EC:2.7.7.7]", @obj.definition) end def test_dblinks_as_hash assert_equal({"COG"=>["COG0592"], "RN"=>["R00375", "R00376", "R00377", "R00378"], "GO"=>["0003887"]}, @obj.dblinks_as_hash) end def test_dblinks expected = { "COG" => ["COG0592"], "RN" => ["R00375", "R00376", "R00377", "R00378"], "GO" => ["0003887"] } assert_equal(expected, @obj.dblinks) end def test_genes_as_hash assert_equal(1000, @obj.genes_as_hash.size) assert_equal(["BSU00020"], @obj.genes_as_hash["bsu"]) assert_equal(["SynWH7803_0001"], @obj.genes_as_hash["syx"]) end def test_modules_as_hash expected = {"M00597"=>"DNA polymerase III complex"} assert_equal(expected, @obj.modules_as_hash) end def test_modules expected = {"M00597"=>"DNA polymerase III complex"} assert_equal(expected, @obj.modules) end def test_references data = [ { "authors" => [ "Stillman B." ], "journal" => "Cell", "pages" => "725-8", "pubmed" => "8087839", "title" => "Smart machines at the DNA replication fork.", "volume" => "78", "year" => "1994" } ] expected = data.collect { |h| Bio::Reference.new(h) } assert_equal(expected, @obj.references) end def test_keggclass expected = "Metabolism; Nucleotide Metabolism; Purine metabolism [PATH:ko00230] Metabolism; Nucleotide Metabolism; Pyrimidine metabolism [PATH:ko00240] Genetic Information Processing; Replication and Repair; DNA replication [PATH:ko03030] Genetic Information Processing; Replication and Repair; DNA replication proteins [BR:ko03032] Genetic Information Processing; Replication and Repair; Mismatch repair [PATH:ko03430] Genetic Information Processing; Replication and Repair; Homologous recombination [PATH:ko03440] Genetic Information Processing; Replication and Repair; DNA repair and recombination proteins [BR:ko03400]" assert_equal(expected, @obj.keggclass) end def test_keggclasses expected = [ "Metabolism; Nucleotide Metabolism; Purine metabolism", "Metabolism; Nucleotide Metabolism; Pyrimidine metabolism", "Genetic Information Processing; Replication and Repair; DNA replication", "Genetic Information Processing; Replication and Repair; DNA replication proteins", "Genetic Information Processing; Replication and Repair; Mismatch repair", "Genetic Information Processing; Replication and Repair; Homologous recombination", "Genetic Information Processing; Replication and Repair; DNA repair and recombination proteins" ] assert_equal(expected, @obj.keggclasses) end def test_pathways_as_strings expected = ["ko00230 Purine metabolism", "ko00240 Pyrimidine metabolism", "ko03030 DNA replication", "ko03430 Mismatch repair", "ko03440 Homologous recombination"] assert_equal(expected, @obj.pathways_as_strings) end def test_pathways_in_keggclass expected = ["ko00230", "ko00240", "ko03030", "ko03430", "ko03440"] assert_equal(expected, @obj.pathways_in_keggclass) end def test_modules_as_strings expected = ["M00597 DNA polymerase III complex"] assert_equal(expected, @obj.modules_as_strings) end def test_dblinks_as_strings expected = [ "RN: R00375 R00376 R00377 R00378", "COG: COG0592", "GO: 0003887" ] assert_equal(expected, @obj.dblinks_as_strings) end def test_genes_as_strings assert_equal(1000, @obj.genes_as_strings.size) assert_equal("ECO: b3701(dnaN)", @obj.genes_as_strings[0]) assert_equal("BPN: BPEN_015(dnaN)", @obj.genes_as_strings[100]) assert_equal("SVO: SVI_0032(dnaN)", @obj.genes_as_strings[200]) assert_equal("RFR: Rfer_0002 Rfer_4311", @obj.genes_as_strings[300]) assert_equal("OTS: OTBS_0002(dnaN)", @obj.genes_as_strings[400]) assert_equal("ACR: Acry_1437", @obj.genes_as_strings[500]) assert_equal("SPD: SPD_0002(dnaN)", @obj.genes_as_strings[600]) assert_equal("TEX: Teth514_0002", @obj.genes_as_strings[700]) assert_equal("FAL: FRAAL0004(dnaN) FRAAL1257", @obj.genes_as_strings[800]) assert_equal("AMU: Amuc_0816", @obj.genes_as_strings[900]) assert_equal("DAP: Dacet_2869", @obj.genes_as_strings[-1]) end end end bio-1.4.3.0001/test/unit/bio/db/kegg/test_pathway.rb0000644000004100000410000012624412200110570021746 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_pathway.rb - Unit test for Bio::KEGG::PATHWAY # # Copyright:: Copyright (C) 2010 Kozo Nishida # Copyright (C) 2010 Naohisa Goto # License:: The Ruby License # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/pathway' module Bio class TestKeggPathway_map00052 < Test::Unit::TestCase def setup testdata_kegg = Pathname.new(File.join(BioRubyTestDataPath, 'KEGG')).cleanpath.to_s entry = File.read(File.join(testdata_kegg, "map00052.pathway")) @obj = Bio::KEGG::PATHWAY.new(entry) end def test_entry_id assert_equal('map00052', @obj.entry_id) end def test_name assert_equal('Galactose metabolism', @obj.name) end def test_keggclass assert_equal('Metabolism; Carbohydrate Metabolism', @obj.keggclass) end def test_modules_as_hash expected = { "M00097"=>"UDP-glucose and UDP-galactose biosynthesis, Glc-1P/Gal-1P => UDP-Glc/UDP-Gal", "M00614"=>"PTS system, N-acetylgalactosamine-specific II component", "M00616"=>"PTS system, galactitol-specific II component", "M00618"=>"PTS system, lactose-specific II component", "M00624"=>"PTS system, galactosamine-specific II component" } assert_equal(expected, @obj.modules_as_hash) assert_equal(expected, @obj.modules) end def test_modules_as_strings expected = [ "M00097 UDP-glucose and UDP-galactose biosynthesis, Glc-1P/Gal-1P => UDP-Glc/UDP-Gal", "M00614 PTS system, N-acetylgalactosamine-specific II component", "M00616 PTS system, galactitol-specific II component", "M00618 PTS system, lactose-specific II component", "M00624 PTS system, galactosamine-specific II component" ] assert_equal(expected, @obj.modules_as_strings) end def test_rel_pathways_as_strings expected = [ "map00010 Glycolysis / Gluconeogenesis", "map00040 Pentose and glucuronate interconversions", "map00051 Fructose and mannose metabolism", "map00520 Amino sugar and nucleotide sugar metabolism" ] assert_equal(expected, @obj.rel_pathways_as_strings) end def test_rel_pathways_as_hash expected = { "map00010"=>"Glycolysis / Gluconeogenesis", "map00040"=>"Pentose and glucuronate interconversions", "map00051"=>"Fructose and mannose metabolism", "map00520"=>"Amino sugar and nucleotide sugar metabolism" } assert_equal(expected, @obj.rel_pathways_as_hash) assert_equal(expected, @obj.rel_pathways) end def test_references assert_equal([], @obj.references) end def test_dblinks_as_strings assert_equal([], @obj.dblinks_as_strings) end def test_dblinks_as_hash assert_equal({}, @obj.dblinks_as_hash) end def test_pathways_as_strings expected = ["map00052 Galactose metabolism"] assert_equal(expected, @obj.pathways_as_strings) end def test_pathways_as_hash expected = {"map00052"=>"Galactose metabolism"} assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_strings assert_equal([], @obj.orthologs_as_strings) end def test_orthologs_as_hash assert_equal({}, @obj.orthologs_as_hash) end def test_genes_as_strings assert_equal([], @obj.genes_as_strings) end def test_genes_as_hash assert_equal({}, @obj.genes_as_hash) end def test_diseases_as_strings assert_equal([], @obj.diseases_as_strings) end def test_diseases_as_hash assert_equal({}, @obj.diseases_as_hash) end def test_enzymes_as_strings assert_equal([], @obj.enzymes_as_strings) end def test_reactions_as_strings assert_equal([], @obj.reactions_as_strings) end def test_reactions_as_hash assert_equal({}, @obj.reactions_as_hash) end def test_compounds_as_strings assert_equal([], @obj.compounds_as_strings) end def test_compounds_as_hash assert_equal({}, @obj.compounds_as_hash) end def test_description assert_equal("", @obj.description) end def test_organism assert_equal("", @obj.organism) end def test_ko_pathway assert_equal("", @obj.ko_pathway) end end #class TestKeggPathway_map00052 class TestBioKEGGPATHWAY_map00030 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG/map00030.pathway') @obj = Bio::KEGG::PATHWAY.new(File.read(filename)) end def test_references data = [ { "authors" => [ "Nishizuka Y (ed)." ], "comments" => [ "(map 3)" ], "journal" => "Tokyo Kagaku Dojin", "title" => "[Metabolic Maps] (In Japanese)", "year" => "1980" }, { "authors" => [ "Nishizuka Y", "Seyama Y", "Ikai A", "Ishimura Y", "Kawaguchi A (eds)." ], "comments" => [ "(map 4)" ], "journal" => "Tokyo Kagaku Dojin", "title"=>"[Cellular Functions and Metabolic Maps] (In Japanese)", "year" => "1997" }, { "authors" => [ "Michal G." ], "journal" => "Wiley", "title" => "Biochemical Pathways", "year" => "1999" }, { "authors" => [ "Hove-Jensen B", "Rosenkrantz TJ", "Haldimann A", "Wanner BL." ], "journal" => "J Bacteriol", "pages" => "2793-801", "pubmed" => "12700258", "title" => "Escherichia coli phnN, encoding ribose 1,5-bisphosphokinase activity (phosphoribosyl diphosphate forming): dual role in phosphonate degradation and NAD biosynthesis pathways.", "volume" => "185", "year" => "2003" } ] expected = data.collect { |h| Bio::Reference.new(h) } assert_equal(expected, @obj.references) end def test_new assert_instance_of(Bio::KEGG::PATHWAY, @obj) end def test_entry_id assert_equal("map00030", @obj.entry_id) end def test_name assert_equal("Pentose phosphate pathway", @obj.name) end def test_description expected = "The pentose phosphate pathway is a process of glucose turnover that produces NADPH as reducing equivalents and pentoses as essential parts of nucleotides. There are two different phases in the pathway. One is irreversible oxidative phase in which glucose-6P is converted to ribulose-5P by oxidative decarboxylation, and NADPH is generated [MD:M00006]. The other is reversible non-oxidative phase in which phosphorylated sugars are interconverted to generate xylulose-5P, ribulose-5P, and ribose-5P [MD:M00007]. Phosphoribosyl pyrophosphate (PRPP) formed from ribose-5P [MD:M00005] is an activated compound used in the biosynthesis of histidine and purine/pyrimidine nucleotides. This pathway map also shows the Entner-Doudoroff pathway where 6-P-gluconate is dehydrated and then cleaved into pyruvate and glyceraldehyde-3P [MD:M00008]." assert_equal(expected, @obj.description) end def test_keggclass expected = "Metabolism; Carbohydrate Metabolism" assert_equal(expected, @obj.keggclass) end def test_modules_as_strings expected = [ "M00004 Pentose phosphate pathway (Pentose phosphate cycle) [PATH:map00030]", "M00005 PRPP biosynthesis, ribose 5P -> PRPP [PATH:map00030]", "M00006 Pentose phosphate pathway, oxidative phase, glucose 6P => ribulose 5P [PATH:map00030]", "M00007 Pentose phosphate pathway, non-oxidative phase, fructose 6P => ribose 5P [PATH:map00030]", "M00008 Entner-Doudoroff pathway, glucose-6P => glyceraldehyde-3P + pyruvate [PATH:map00030]", "M00680 Semi-phosphorylative Entner-Doudoroff pathway, gluconate => glyceraldehyde-3P + pyruvate [PATH:map00030]", "M00681 Non-phosphorylative Entner-Doudoroff pathway, gluconate => glyceraldehyde + pyruvate [PATH:map00030]" ] assert_equal(expected, @obj.modules_as_strings) end def test_modules_as_hash expected = { "M00008" => "Entner-Doudoroff pathway, glucose-6P => glyceraldehyde-3P + pyruvate [PATH:map00030]", "M00680" => "Semi-phosphorylative Entner-Doudoroff pathway, gluconate => glyceraldehyde-3P + pyruvate [PATH:map00030]", "M00681" => "Non-phosphorylative Entner-Doudoroff pathway, gluconate => glyceraldehyde + pyruvate [PATH:map00030]", "M00004" => "Pentose phosphate pathway (Pentose phosphate cycle) [PATH:map00030]", "M00005" => "PRPP biosynthesis, ribose 5P -> PRPP [PATH:map00030]", "M00006" => "Pentose phosphate pathway, oxidative phase, glucose 6P => ribulose 5P [PATH:map00030]", "M00007" => "Pentose phosphate pathway, non-oxidative phase, fructose 6P => ribose 5P [PATH:map00030]" } assert_equal(expected, @obj.modules_as_hash) assert_equal(expected, @obj.modules) end def test_rel_pathways_as_strings expected = [ "map00010 Glycolysis / Gluconeogenesis", "map00040 Pentose and glucuronate interconversions", "map00230 Purine metabolism", "map00240 Pyrimidine metabolism", "map00340 Histidine metabolism" ] assert_equal(expected, @obj.rel_pathways_as_strings) end def test_rel_pathways_as_hash expected = { "map00240" => "Pyrimidine metabolism", "map00340" => "Histidine metabolism", "map00230" => "Purine metabolism", "map00010" => "Glycolysis / Gluconeogenesis", "map00040" => "Pentose and glucuronate interconversions" } assert_equal(expected, @obj.rel_pathways_as_hash) assert_equal(expected, @obj.rel_pathways) end def test_dblinks_as_strings assert_equal(["GO: 0006098"], @obj.dblinks_as_strings) end def test_dblinks_as_hash assert_equal({"GO"=>["0006098"]}, @obj.dblinks_as_hash) end def test_pathways_as_strings expected = ["map00030 Pentose phosphate pathway"] assert_equal(expected, @obj.pathways_as_strings) end def test_pathways_as_hash expected = {"map00030"=>"Pentose phosphate pathway"} assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_strings assert_equal([], @obj.orthologs_as_strings) end def test_orthologs_as_hash assert_equal({}, @obj.orthologs_as_hash) end def test_genes_as_strings assert_equal([], @obj.genes_as_strings) end def test_genes_as_hash assert_equal({}, @obj.genes_as_hash) end def test_diseases_as_strings expected = ["H00196 Phosphoribosylpyrophosphate synthetase I superactivity"] assert_equal(expected, @obj.diseases_as_strings) end def test_diseases_as_hash expected = {"H00196"=>"Phosphoribosylpyrophosphate synthetase I superactivity"} assert_equal(expected, @obj.diseases_as_hash) end def test_enzymes_as_strings assert_equal([], @obj.enzymes_as_strings) end def test_reactions_as_strings assert_equal([], @obj.reactions_as_strings) end def test_reactions_as_hash assert_equal({}, @obj.reactions_as_hash) end def test_compounds_as_strings assert_equal([], @obj.compounds_as_strings) end def test_compounds_as_hash assert_equal({}, @obj.compounds_as_hash) end def test_organism assert_equal("", @obj.organism) end def test_ko_pathway assert_equal("ko00030", @obj.ko_pathway) end end #class TestBioKEGGPATHWAY class TestBioKeggPathway_rn00250 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG', 'rn00250.pathway') @obj = Bio::KEGG::PATHWAY.new(File.read(filename)) end def test_dblinks_as_hash expected = {"GO"=>["0006522", "0006531", "0006536"]} assert_equal(expected, @obj.dblinks_as_hash) end def test_pathways_as_hash expected = {"rn00250"=>"Alanine, aspartate and glutamate metabolism"} assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_hash assert_equal({}, @obj.orthologs_as_hash) end def test_genes_as_hash assert_equal({}, @obj.genes_as_hash) end def test_references data = [ { "authors" => [ "Nishizuka Y", "Seyama Y", "Ikai A", "Ishimura Y", "Kawaguchi A (eds)." ], "journal" => "Tokyo Kagaku Dojin", "title"=>"[Cellular Functions and Metabolic Maps] (In Japanese)", "year" => "1997" }, { "authors" => [ "Wu G" ], "journal" => "J Nutr", "pages" => "1249-52", "pubmed" => "9687539", "title" => "Intestinal mucosal amino acid catabolism.", "volume" => "128", "year" => "1998" } ] expected = data.collect { |h| Bio::Reference.new(h) } assert_equal(expected, @obj.references) end def test_modules_as_hash expected = { "M00019"=> "Glutamate biosynthesis, oxoglutarete => glutamate (glutamate synthase) [PATH:rn00250]", "M00021"=> "Aspartate biosynthesis, oxaloacetate => aspartate [PATH:rn00250]", "M00044"=> "Aspartate degradation, aspartate => fumarate [PATH:rn00250]", "M00022"=> "Asparagine biosynthesis, aspartate => asparagine [PATH:rn00250]", "M00045"=> "Aspartate degradation, aspartate => oxaloacetate [PATH:rn00250]", "M00046"=> "Asparagine degradation, asparagine => aspartate +NH3 [PATH:rn00250]", "M00026"=> "Alanine biosynthesis, pyruvate => alanine [PATH:rn00250]", "M00038"=> "Glutamine degradation, glutamine => glutamate + NH3 [PATH:rn00250]", "M00040"=> "GABA (gamma-Aminobutyrate) shunt [PATH:rn00250]", "M00017"=> "Glutamate biosynthesis, oxoglutarate => glutamate (glutamate dehydrogenase) [PATH:rn00250]", "M00018"=> "Glutamine biosynthesis, glutamate => glutamine [PATH:rn00250]" } assert_equal(expected, @obj.modules_as_hash) end def test_new assert_kind_of(Bio::KEGG::PATHWAY, @obj) end def test_entry_id assert_equal("rn00250", @obj.entry_id) end def test_name expected = "Alanine, aspartate and glutamate metabolism" assert_equal(expected, @obj.name) end def test_description assert_equal("", @obj.description) end def test_keggclass expected = "Metabolism; Amino Acid Metabolism" assert_equal(expected, @obj.keggclass) end def test_pathways_as_strings expected = ["rn00250 Alanine, aspartate and glutamate metabolism"] assert_equal(expected, @obj.pathways_as_strings) end def test_modules_as_strings expected = [ "M00017 Glutamate biosynthesis, oxoglutarate => glutamate (glutamate dehydrogenase) [PATH:rn00250]", "M00018 Glutamine biosynthesis, glutamate => glutamine [PATH:rn00250]", "M00019 Glutamate biosynthesis, oxoglutarete => glutamate (glutamate synthase) [PATH:rn00250]", "M00021 Aspartate biosynthesis, oxaloacetate => aspartate [PATH:rn00250]", "M00022 Asparagine biosynthesis, aspartate => asparagine [PATH:rn00250]", "M00026 Alanine biosynthesis, pyruvate => alanine [PATH:rn00250]", "M00038 Glutamine degradation, glutamine => glutamate + NH3 [PATH:rn00250]", "M00040 GABA (gamma-Aminobutyrate) shunt [PATH:rn00250]", "M00044 Aspartate degradation, aspartate => fumarate [PATH:rn00250]", "M00045 Aspartate degradation, aspartate => oxaloacetate [PATH:rn00250]", "M00046 Asparagine degradation, asparagine => aspartate +NH3 [PATH:rn00250]" ] assert_equal(expected, @obj.modules_as_strings) end def test_diseases_as_strings expected = [ "H00074 Canavan disease (CD)", "H00185 Citrullinemia (CTLN)", "H00197 Adenylosuccinate lyase deficiency" ] assert_equal(expected, @obj.diseases_as_strings) end def test_diseases_as_hash expected = { "H00197"=>"Adenylosuccinate lyase deficiency", "H00074"=>"Canavan disease (CD)", "H00185"=>"Citrullinemia (CTLN)" } assert_equal(expected, @obj.diseases_as_hash) end def test_dblinks_as_strings expected = ["GO: 0006522 0006531 0006536"] assert_equal(expected, @obj.dblinks_as_strings) end def test_orthologs_as_strings assert_equal([], @obj.orthologs_as_strings) end def test_organism assert_equal("", @obj.organism) end def test_genes_as_strings assert_equal([], @obj.genes_as_strings) end def test_enzymes_as_strings assert_equal([], @obj.enzymes_as_strings) end def test_reactions_as_strings expected = [ "R00093 L-glutamate:NAD+ oxidoreductase (transaminating)", "R00114 L-Glutamate:NADP+ oxidoreductase (transaminating)", "R00149 Carbon-dioxide:ammonia ligase (ADP-forming,carbamate-phosphorylating)", "R00243 L-Glutamate:NAD+ oxidoreductase (deaminating)", "R00248 L-Glutamate:NADP+ oxidoreductase (deaminating)", "R00253 L-Glutamate:ammonia ligase (ADP-forming)", "R00256 L-Glutamine amidohydrolase", "R00258 L-Alanine:2-oxoglutarate aminotransferase", "R00261 L-glutamate 1-carboxy-lyase (4-aminobutanoate-forming)", "R00269 2-Oxoglutaramate amidohydrolase", "R00348 2-Oxosuccinamate amidohydrolase", "R00355 L-Aspartate:2-oxoglutarate aminotransferase", "R00357 L-Aspartic acid:oxygen oxidoreductase (deaminating)", "R00359 D-Aspartate:oxygen oxidoreductase (deaminating)", "R00369 L-Alanine:glyoxylate aminotransferase", "R00396 L-Alanine:NAD+ oxidoreductase (deaminating)", "R00397 L-aspartate 4-carboxy-lyase (L-alanine-forming)", "R00400 L-alanine:oxaloacetate aminotransferase", "R00483 L-aspartate:ammonia ligase (AMP-forming)", "R00484 N-Carbamoyl-L-aspartate amidohydrolase", "R00485 L-Asparagine amidohydrolase", "R00487 Acetyl-CoA:L-aspartate N-acetyltransferase", "R00488 N-Acetyl-L-aspartate amidohydrolase", "R00490 L-Aspartate ammonia-lyase", "R00491 aspartate racemase", "R00575 hydrogen-carbonate:L-glutamine amido-ligase (ADP-forming, carbamate-phosphorylating)", "R00576 L-Glutamine:pyruvate aminotransferase", "R00578 L-aspartate:L-glutamine amido-ligase (AMP-forming)", "R00707 (S)-1-pyrroline-5-carboxylate:NAD+ oxidoreductase", "R00708 (S)-1-pyrroline-5-carboxylate:NADP+ oxidoreductase", "R00713 Succinate-semialdehyde:NAD+ oxidoreductase", "R00714 Succinate-semialdehyde:NADP+ oxidoreductase", "R00768 L-glutamine:D-fructose-6-phosphate isomerase (deaminating)", "R01072 5-phosphoribosylamine:diphosphate phospho-alpha-D-ribosyltransferase (glutamate-amidating)", "R01083 N6-(1,2-dicarboxyethyl)AMP AMP-lyase (fumarate-forming)", "R01086 2-(Nomega-L-arginino)succinate arginine-lyase (fumarate-forming)", "R01135 IMP:L-aspartate ligase (GDP-forming)", "R01346 L-Asparagine:2-oxo-acid aminotransferase", "R01397 carbamoyl-phosphate:L-aspartate carbamoyltransferase", "R01648 4-Aminobutanoate:2-oxoglutarate aminotransferase", "R01954 L-Citrulline:L-aspartate ligase (AMP-forming)" ] assert_equal(expected, @obj.reactions_as_strings) end def test_reactions_as_hash expected = { "R01648"=>"4-Aminobutanoate:2-oxoglutarate aminotransferase", "R00485"=>"L-Asparagine amidohydrolase", "R00397"=>"L-aspartate 4-carboxy-lyase (L-alanine-forming)", "R00243"=>"L-Glutamate:NAD+ oxidoreductase (deaminating)", "R01397"=>"carbamoyl-phosphate:L-aspartate carbamoyltransferase", "R00707"=>"(S)-1-pyrroline-5-carboxylate:NAD+ oxidoreductase", "R00575"=> "hydrogen-carbonate:L-glutamine amido-ligase (ADP-forming, carbamate-phosphorylating)", "R00487"=>"Acetyl-CoA:L-aspartate N-acetyltransferase", "R00355"=>"L-Aspartate:2-oxoglutarate aminotransferase", "R00256"=>"L-Glutamine amidohydrolase", "R01135"=>"IMP:L-aspartate ligase (GDP-forming)", "R00708"=>"(S)-1-pyrroline-5-carboxylate:NADP+ oxidoreductase", "R00576"=>"L-Glutamine:pyruvate aminotransferase", "R00488"=>"N-Acetyl-L-aspartate amidohydrolase", "R00400"=>"L-alanine:oxaloacetate aminotransferase", "R00114"=>"L-Glutamate:NADP+ oxidoreductase (transaminating)", "R00093"=>"L-glutamate:NAD+ oxidoreductase (transaminating)", "R00490"=>"L-Aspartate ammonia-lyase", "R00357"=>"L-Aspartic acid:oxygen oxidoreductase (deaminating)", "R00269"=>"2-Oxoglutaramate amidohydrolase", "R00258"=>"L-Alanine:2-oxoglutarate aminotransferase", "R01346"=>"L-Asparagine:2-oxo-acid aminotransferase", "R01083"=>"N6-(1,2-dicarboxyethyl)AMP AMP-lyase (fumarate-forming)", "R01072"=> "5-phosphoribosylamine:diphosphate phospho-alpha-D-ribosyltransferase (glutamate-amidating)", "R00578"=>"L-aspartate:L-glutamine amido-ligase (AMP-forming)", "R00491"=>"aspartate racemase", "R00369"=>"L-Alanine:glyoxylate aminotransferase", "R00248"=>"L-Glutamate:NADP+ oxidoreductase (deaminating)", "R00149"=> "Carbon-dioxide:ammonia ligase (ADP-forming,carbamate-phosphorylating)", "R00359"=>"D-Aspartate:oxygen oxidoreductase (deaminating)", "R00348"=>"2-Oxosuccinamate amidohydrolase", "R00261"=>"L-glutamate 1-carboxy-lyase (4-aminobutanoate-forming)", "R01954"=>"L-Citrulline:L-aspartate ligase (AMP-forming)", "R01086"=>"2-(Nomega-L-arginino)succinate arginine-lyase (fumarate-forming)", "R00768"=>"L-glutamine:D-fructose-6-phosphate isomerase (deaminating)", "R00713"=>"Succinate-semialdehyde:NAD+ oxidoreductase", "R00483"=>"L-aspartate:ammonia ligase (AMP-forming)", "R00714"=>"Succinate-semialdehyde:NADP+ oxidoreductase", "R00484"=>"N-Carbamoyl-L-aspartate amidohydrolase", "R00396"=>"L-Alanine:NAD+ oxidoreductase (deaminating)", "R00253"=>"L-Glutamate:ammonia ligase (ADP-forming)" } assert_equal(expected, @obj.reactions_as_hash) end def test_compounds_as_strings expected = [ "C00014 NH3", "C00022 Pyruvate", "C00025 L-Glutamate", "C00026 2-Oxoglutarate", "C00036 Oxaloacetate", "C00041 L-Alanine", "C00042 Succinate", "C00049 L-Aspartate", "C00064 L-Glutamine", "C00122 Fumarate", "C00152 L-Asparagine", "C00169 Carbamoyl phosphate", "C00232 Succinate semialdehyde", "C00334 4-Aminobutanoate", "C00352 D-Glucosamine 6-phosphate", "C00402 D-Aspartate", "C00438 N-Carbamoyl-L-aspartate", "C00940 2-Oxoglutaramate", "C01042 N-Acetyl-L-aspartate", "C02362 2-Oxosuccinamate", "C03090 5-Phosphoribosylamine", "C03406 N-(L-Arginino)succinate", "C03794 N6-(1,2-Dicarboxyethyl)-AMP", "C03912 (S)-1-Pyrroline-5-carboxylate" ] assert_equal(expected, @obj.compounds_as_strings) end def test_compounds_as_hash expected = { "C02362"=>"2-Oxosuccinamate", "C01042"=>"N-Acetyl-L-aspartate", "C00041"=>"L-Alanine", "C03912"=>"(S)-1-Pyrroline-5-carboxylate", "C03406"=>"N-(L-Arginino)succinate", "C00438"=>"N-Carbamoyl-L-aspartate", "C00152"=>"L-Asparagine", "C00064"=>"L-Glutamine", "C00042"=>"Succinate", "C00352"=>"D-Glucosamine 6-phosphate", "C00022"=>"Pyruvate", "C03794"=>"N6-(1,2-Dicarboxyethyl)-AMP", "C03090"=>"5-Phosphoribosylamine", "C00232"=>"Succinate semialdehyde", "C00122"=>"Fumarate", "C00036"=>"Oxaloacetate", "C00025"=>"L-Glutamate", "C00014"=>"NH3", "C00334"=>"4-Aminobutanoate", "C00169"=>"Carbamoyl phosphate", "C00026"=>"2-Oxoglutarate", "C00940"=>"2-Oxoglutaramate", "C00049"=>"L-Aspartate", "C00402"=>"D-Aspartate" } assert_equal(expected, @obj.compounds_as_hash) end def test_rel_pathways_as_strings expected = [ "rn00010 Glycolysis / Gluconeogenesis", "rn00020 Citrate cycle (TCA cycle)", "rn00230 Purine metabolism", "rn00240 Pyrimidine metabolism", "rn00253 Tetracycline biosynthesis", "rn00260 Glycine, serine and threonine metabolism", "rn00300 Lysine biosynthesis", "rn00330 Arginine and proline metabolism", "rn00340 Histidine metabolism", "rn00410 beta-Alanine metabolism", "rn00460 Cyanoamino acid metabolism", "rn00471 D-Glutamine and D-glutamate metabolism", "rn00473 D-Alanine metabolism", "rn00480 Glutathione metabolism", "rn00650 Butanoate metabolism", "rn00660 C5-Branched dibasic acid metabolism", "rn00760 Nicotinate and nicotinamide metabolism", "rn00770 Pantothenate and CoA biosynthesis", "rn00860 Porphyrin and chlorophyll metabolism", "rn00910 Nitrogen metabolism" ] assert_equal(expected, @obj.rel_pathways_as_strings) end def test_rel_pathways_as_hash expected = { "rn00770"=>"Pantothenate and CoA biosynthesis", "rn00660"=>"C5-Branched dibasic acid metabolism", "rn00473"=>"D-Alanine metabolism", "rn00330"=>"Arginine and proline metabolism", "rn00253"=>"Tetracycline biosynthesis", "rn00760"=>"Nicotinate and nicotinamide metabolism", "rn00650"=>"Butanoate metabolism", "rn00860"=>"Porphyrin and chlorophyll metabolism", "rn00410"=>"beta-Alanine metabolism", "rn00300"=>"Lysine biosynthesis", "rn00480"=>"Glutathione metabolism", "rn00260"=>"Glycine, serine and threonine metabolism", "rn00910"=>"Nitrogen metabolism", "rn00471"=>"D-Glutamine and D-glutamate metabolism", "rn00460"=>"Cyanoamino acid metabolism", "rn00240"=>"Pyrimidine metabolism", "rn00020"=>"Citrate cycle (TCA cycle)", "rn00340"=>"Histidine metabolism", "rn00230"=>"Purine metabolism", "rn00010"=>"Glycolysis / Gluconeogenesis"} assert_equal(expected, @obj.rel_pathways_as_hash) end def test_ko_pathway assert_equal("ko00250", @obj.ko_pathway) end end #class TestBioKeggPathway_rn00250 class TestBioKEGGPATHWAY_ec00072 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG', 'ec00072.pathway') @obj = Bio::KEGG::PATHWAY.new(File.read(filename)) end def test_dblinks_as_hash assert_equal({}, @obj.dblinks_as_hash) end def test_pathways_as_hash expected = {"ec00072"=>"Synthesis and degradation of ketone bodies"} assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_hash assert_equal({}, @obj.orthologs_as_hash) end def test_genes_as_hash assert_equal({}, @obj.genes_as_hash) end def test_references assert_equal([], @obj.references) end def test_modules_as_hash expected = { "M00177" => "Ketone body biosynthesis, acetyl-CoA => acetoacetate/3-hydroxybutyrate/acetone [PATH:ec00072]" } assert_equal(expected, @obj.modules_as_hash) end def test_new assert_kind_of(Bio::KEGG::PATHWAY, @obj) end def test_entry_id assert_equal("ec00072", @obj.entry_id) end def test_name expected = "Synthesis and degradation of ketone bodies" assert_equal(expected, @obj.name) end def test_description assert_equal("", @obj.description) end def test_keggclass expected = "Metabolism; Lipid Metabolism" assert_equal(expected, @obj.keggclass) end def test_pathways_as_strings expected = ["ec00072 Synthesis and degradation of ketone bodies"] assert_equal(expected, @obj.pathways_as_strings) end def test_modules_as_strings expected = ["M00177 Ketone body biosynthesis, acetyl-CoA => acetoacetate/3-hydroxybutyrate/acetone [PATH:ec00072]"] assert_equal(expected, @obj.modules_as_strings) end def test_diseases_as_strings assert_equal([], @obj.diseases_as_strings) end def test_diseases_as_hash assert_equal({}, @obj.diseases_as_hash) end def test_dblinks_as_strings assert_equal([], @obj.dblinks_as_strings) end def test_orthologs_as_strings assert_equal([], @obj.orthologs_as_strings) end def test_organism assert_equal("", @obj.organism) end def test_genes_as_strings assert_equal([], @obj.genes_as_strings) end def test_enzymes_as_strings expected = [ "1.1.1.30", "2.3.1.9", "2.3.3.10", "2.8.3.5", "4.1.1.4", "4.1.3.4" ] assert_equal(expected, @obj.enzymes_as_strings) end def test_reactions_as_strings assert_equal([], @obj.reactions_as_strings) end def test_reactions_as_hash assert_equal({}, @obj.reactions_as_hash) end def test_compounds_as_strings expected = [ "C00024 Acetyl-CoA", "C00164 Acetoacetate", "C00207 Acetone", "C00332 Acetoacetyl-CoA", "C00356 (S)-3-Hydroxy-3-methylglutaryl-CoA", "C01089 (R)-3-Hydroxybutanoate" ] assert_equal(expected, @obj.compounds_as_strings) end def test_compounds_as_hash expected = { "C00207"=>"Acetone", "C00164"=>"Acetoacetate", "C01089"=>"(R)-3-Hydroxybutanoate", "C00332"=>"Acetoacetyl-CoA", "C00024"=>"Acetyl-CoA", "C00356"=>"(S)-3-Hydroxy-3-methylglutaryl-CoA" } assert_equal(expected, @obj.compounds_as_hash) end def test_rel_pathways_as_strings expected = [ "ec00010 Glycolysis / Gluconeogenesis", "ec00071 Fatty acid metabolism", "ec00620 Pyruvate metabolism", "ec00650 Butanoate metabolism" ] assert_equal(expected, @obj.rel_pathways_as_strings) end def test_rel_pathways_as_hash expected = { "ec00620"=>"Pyruvate metabolism", "ec00071"=>"Fatty acid metabolism", "ec00010"=>"Glycolysis / Gluconeogenesis", "ec00650"=>"Butanoate metabolism" } assert_equal(expected, @obj.rel_pathways_as_hash) end def test_ko_pathway assert_equal("ko00072", @obj.ko_pathway) end end #class TestBioKEGGPATHWAY_ec00072 class TestBioKEGGPATHWAY_hsa00790 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG', 'hsa00790.pathway') @obj = Bio::KEGG::PATHWAY.new(File.read(filename)) end def test_dblinks_as_hash assert_equal({"GO"=>["0046656"]}, @obj.dblinks_as_hash) end def test_pathways_as_hash expected = {"hsa00790"=>"Folate biosynthesis"} assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_hash assert_equal({}, @obj.orthologs_as_hash) end def test_genes_as_hash expected = { "248" => "ALPI, IAP [KO:K01077] [EC:3.1.3.1]", "6697" => "SPR, SDR38C1 [KO:K00072] [EC:1.1.1.153]", "249" => "ALPL, AP-TNAP, APTNAP, FLJ40094, FLJ93059, HOPS, MGC161443, MGC167935, TNAP, TNSALP [KO:K01077] [EC:3.1.3.1]", "2356" => "FPGS [KO:K01930] [EC:6.3.2.17]", "250" => "ALPP, ALP, FLJ61142, PALP, PLAP [KO:K01077] [EC:3.1.3.1]", "1719" => "DHFR, DHFRP1, DYR [KO:K00287] [EC:1.5.1.3]", "251" => "ALPPL2, ALPG, ALPPL, GCAP [KO:K01077] [EC:3.1.3.1]", "2643" => "GCH1, DYT14, DYT5, DYT5a, GCH, GTP-CH-1, GTPCH1, HPABH4B [KO:K01495] [EC:3.5.4.16]", "8836" => "GGH, GH [KO:K01307] [EC:3.4.19.9]", "5860" => "QDPR, DHPR, FLJ42391, PKU2, SDR33C1 [KO:K00357] [EC:1.5.1.34]", "5805" => "PTS, FLJ97081, PTPS [KO:K01737] [EC:4.2.3.12]" } assert_equal(expected, @obj.genes_as_hash) end def test_references assert_equal([], @obj.references) end def test_modules_as_hash expected = { "M00251"=>"Folate biosynthesis, GTP => THF [PATH:hsa00790]", "M00304"=>"Methanogenesis [PATH:hsa00790]" } assert_equal(expected, @obj.modules_as_hash) end def test_new assert_instance_of(Bio::KEGG::PATHWAY, @obj) end def test_entry_id assert_equal("hsa00790", @obj.entry_id) end def test_name expected = "Folate biosynthesis - Homo sapiens (human)" assert_equal(expected, @obj.name) end def test_description assert_equal("", @obj.description) end def test_keggclass expected = "Metabolism; Metabolism of Cofactors and Vitamins" assert_equal(expected, @obj.keggclass) end def test_pathways_as_strings expected = ["hsa00790 Folate biosynthesis"] assert_equal(expected, @obj.pathways_as_strings) end def test_modules_as_strings expected = ["M00251 Folate biosynthesis, GTP => THF [PATH:hsa00790]", "M00304 Methanogenesis [PATH:hsa00790]"] assert_equal(expected, @obj.modules_as_strings) end def test_diseases_as_strings expected = [ "H00167 Phenylketonuria (PKU)", "H00213 Hypophosphatasia" ] assert_equal(expected, @obj.diseases_as_strings) end def test_diseases_as_hash expected = { "H00167"=>"Phenylketonuria (PKU)", "H00213"=>"Hypophosphatasia" } assert_equal(expected, @obj.diseases_as_hash) end def test_dblinks_as_strings assert_equal(["GO: 0046656"], @obj.dblinks_as_strings) end def test_orthologs_as_strings assert_equal([], @obj.orthologs_as_strings) end def test_organism expected = "Homo sapiens (human) [GN:hsa]" assert_equal(expected, @obj.organism) end def test_genes_as_strings expected = [ "2643 GCH1, DYT14, DYT5, DYT5a, GCH, GTP-CH-1, GTPCH1, HPABH4B [KO:K01495] [EC:3.5.4.16]", "248 ALPI, IAP [KO:K01077] [EC:3.1.3.1]", "249 ALPL, AP-TNAP, APTNAP, FLJ40094, FLJ93059, HOPS, MGC161443, MGC167935, TNAP, TNSALP [KO:K01077] [EC:3.1.3.1]", "250 ALPP, ALP, FLJ61142, PALP, PLAP [KO:K01077] [EC:3.1.3.1]", "251 ALPPL2, ALPG, ALPPL, GCAP [KO:K01077] [EC:3.1.3.1]", "1719 DHFR, DHFRP1, DYR [KO:K00287] [EC:1.5.1.3]", "2356 FPGS [KO:K01930] [EC:6.3.2.17]", "8836 GGH, GH [KO:K01307] [EC:3.4.19.9]", "5805 PTS, FLJ97081, PTPS [KO:K01737] [EC:4.2.3.12]", "6697 SPR, SDR38C1 [KO:K00072] [EC:1.1.1.153]", "5860 QDPR, DHPR, FLJ42391, PKU2, SDR33C1 [KO:K00357] [EC:1.5.1.34]" ] assert_equal(expected, @obj.genes_as_strings) end def test_enzymes_as_strings assert_equal([], @obj.enzymes_as_strings) end def test_reactions_as_strings assert_equal([], @obj.reactions_as_strings) end def test_reactions_as_hash assert_equal({}, @obj.reactions_as_hash) end def test_compounds_as_strings expected = [ "C00044 GTP", "C00101 Tetrahydrofolate", "C00251 Chorismate", "C00266 Glycolaldehyde", "C00268 Dihydrobiopterin", "C00272 Tetrahydrobiopterin", "C00415 Dihydrofolate", "C00504 Folate", "C00568 4-Aminobenzoate", "C00921 Dihydropteroate", "C01217 5,6,7,8-Tetrahydromethanopterin", "C01300 2-Amino-4-hydroxy-6-hydroxymethyl-7,8-dihydropteridine", "C03541 Tetrahydrofolyl-[Glu](n)", "C03684 6-Pyruvoyltetrahydropterin", "C04244 6-Lactoyl-5,6,7,8-tetrahydropterin", "C04807 2-Amino-7,8-dihydro-4-hydroxy-6-(diphosphooxymethyl)pteridine", "C04874 2-Amino-4-hydroxy-6-(D-erythro-1,2,3-trihydroxypropyl)-7,8-dihydropteridine", "C04895 2-Amino-4-hydroxy-6-(erythro-1,2,3-trihydroxypropyl)dihydropteridine triphosphate", "C05922 Formamidopyrimidine nucleoside triphosphate", "C05923 2,5-Diaminopyrimidine nucleoside triphosphate", "C05924 Molybdopterin", "C05925 Dihydroneopterin phosphate", "C05926 Neopterin", "C05927 7,8-Dihydromethanopterin", "C06148 2,5-Diamino-6-(5'-triphosphoryl-3',4'-trihydroxy-2'-oxopentyl)-amino-4-oxopyrimidine", "C06149 6-(3'-Triphosphoryl-1'-methylglyceryl)-7-methyl-7,8-dihydrobiopterin", "C09332 Tetrahydrofolyl-[Glu](2)", "C11355 4-Amino-4-deoxychorismate" ] assert_equal(expected, @obj.compounds_as_strings) end def test_compounds_as_hash expected = { "C05925"=>"Dihydroneopterin phosphate", "C04244"=>"6-Lactoyl-5,6,7,8-tetrahydropterin", "C01217"=>"5,6,7,8-Tetrahydromethanopterin", "C00568"=>"4-Aminobenzoate", "C05926"=>"Neopterin", "C00921"=>"Dihydropteroate", "C00415"=>"Dihydrofolate", "C00272"=>"Tetrahydrobiopterin", "C05927"=>"7,8-Dihydromethanopterin", "C04895"=> "2-Amino-4-hydroxy-6-(erythro-1,2,3-trihydroxypropyl)dihydropteridine triphosphate", "C04807"=>"2-Amino-7,8-dihydro-4-hydroxy-6-(diphosphooxymethyl)pteridine", "C00504"=>"Folate", "C00251"=>"Chorismate", "C06148"=> "2,5-Diamino-6-(5'-triphosphoryl-3',4'-trihydroxy-2'-oxopentyl)-amino-4-oxopyrimidine", "C04874"=> "2-Amino-4-hydroxy-6-(D-erythro-1,2,3-trihydroxypropyl)-7,8-dihydropteridine", "C06149"=> "6-(3'-Triphosphoryl-1'-methylglyceryl)-7-methyl-7,8-dihydrobiopterin", "C00044"=>"GTP", "C03684"=>"6-Pyruvoyltetrahydropterin", "C03541"=>"Tetrahydrofolyl-[Glu](n)", "C01300"=>"2-Amino-4-hydroxy-6-hydroxymethyl-7,8-dihydropteridine", "C00266"=>"Glycolaldehyde", "C00101"=>"Tetrahydrofolate", "C05922"=>"Formamidopyrimidine nucleoside triphosphate", "C00268"=>"Dihydrobiopterin", "C11355"=>"4-Amino-4-deoxychorismate", "C05923"=>"2,5-Diaminopyrimidine nucleoside triphosphate", "C09332"=>"Tetrahydrofolyl-[Glu](2)", "C05924"=>"Molybdopterin" } assert_equal(expected, @obj.compounds_as_hash) end def test_rel_pathways_as_strings expected = [ "hsa00230 Purine metabolism", "hsa00400 Phenylalanine, tyrosine and tryptophan biosynthesis", "hsa00670 One carbon pool by folate", "hsa00680 Methane metabolism" ] assert_equal(expected, @obj.rel_pathways_as_strings) end def test_rel_pathways_as_hash expected = { "hsa00680"=>"Methane metabolism", "hsa00670"=>"One carbon pool by folate", "hsa00230"=>"Purine metabolism", "hsa00400"=>"Phenylalanine, tyrosine and tryptophan biosynthesis" } assert_equal(expected, @obj.rel_pathways_as_hash) end def test_ko_pathway assert_equal("ko00790", @obj.ko_pathway) end end #class TestBioKEGGPATHWAY_hsa00790 class TestBioKEGGPATHWAY_ko00312 < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG', 'ko00312.pathway') @obj = Bio::KEGG::PATHWAY.new(File.read(filename)) end def test_dblinks_as_hash assert_equal({}, @obj.dblinks_as_hash) end def test_pathways_as_hash expected = {"ko00312"=>"beta-Lactam resistance"} assert_equal(expected, @obj.pathways_as_hash) end def test_orthologs_as_hash expected = { "K02545"=>"penicillin-binding protein 2 prime", "K02172"=>"bla regulator protein blaR1", "K02546"=>"methicillin resistance regulatory protein", "K02547"=>"methicillin resistance protein", "K02352"=>"drp35", "K01467"=>"beta-lactamase [EC:3.5.2.6]", "K02171"=>"penicillinase repressor" } assert_equal(expected, @obj.orthologs_as_hash) end def test_genes_as_hash assert_equal({}, @obj.genes_as_hash) end def test_references assert_equal([], @obj.references) end def test_modules_as_hash assert_equal({}, @obj.modules_as_hash) end def test_new assert_instance_of(Bio::KEGG::PATHWAY, @obj) end def test_entry_id assert_equal("ko00312", @obj.entry_id) end def test_name assert_equal("beta-Lactam resistance", @obj.name) end def test_description assert_equal("", @obj.description) end def test_keggclass expected = "Metabolism; Biosynthesis of Other Secondary Metabolites" assert_equal(expected, @obj.keggclass) end def test_pathways_as_strings expected = ["ko00312 beta-Lactam resistance"] assert_equal(expected, @obj.pathways_as_strings) end def test_modules_as_strings assert_equal([], @obj.modules_as_strings) end def test_diseases_as_strings assert_equal([], @obj.diseases_as_strings) end def test_diseases_as_hash assert_equal({}, @obj.diseases_as_hash) end def test_dblinks_as_strings assert_equal([], @obj.dblinks_as_strings) end def test_orthologs_as_strings expected = [ "K02172 bla regulator protein blaR1", "K02171 penicillinase repressor", "K01467 beta-lactamase [EC:3.5.2.6]", "K02352 drp35", "K02547 methicillin resistance protein", "K02546 methicillin resistance regulatory protein", "K02545 penicillin-binding protein 2 prime" ] assert_equal(expected, @obj.orthologs_as_strings) end def test_organism assert_equal("", @obj.organism) end def test_genes_as_strings assert_equal([], @obj.genes_as_strings) end def test_enzymes_as_strings assert_equal([], @obj.enzymes_as_strings) end def test_reactions_as_strings assert_equal([], @obj.reactions_as_strings) end def test_reactions_as_hash assert_equal({}, @obj.reactions_as_hash) end def test_compounds_as_strings expected = ["C00039 DNA", "C03438 beta-Lactam antibiotics"] assert_equal(expected, @obj.compounds_as_strings) end def test_compounds_as_hash expected = {"C03438"=>"beta-Lactam antibiotics", "C00039"=>"DNA"} assert_equal(expected, @obj.compounds_as_hash) end def test_rel_pathways_as_strings expected = ["ko00311 Penicillin and cephalosporin biosynthesis"] assert_equal(expected, @obj.rel_pathways_as_strings) end def test_rel_pathways_as_hash expected = {"ko00311"=>"Penicillin and cephalosporin biosynthesis"} assert_equal(expected, @obj.rel_pathways_as_hash) end def test_ko_pathway assert_equal("", @obj.ko_pathway) end end #class TestBioKEGGPATHWAY_ko00312 end #module Bio bio-1.4.3.0001/test/unit/bio/db/kegg/test_module.rb0000644000004100000410000002153212200110570021550 0ustar www-datawww-data# # test/unit/bio/db/kegg/test_module.rb - Unit test for Bio::KEGG::MODULE # # Copyright:: Copyright (C) 2010 Kozo Nishida # Copyright (C) 2010 Naohisa Goto # License:: The Ruby License # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/kegg/module' module Bio class TestKeggModule < Test::Unit::TestCase def setup filename = File.join(BioRubyTestDataPath, 'KEGG/M00118.module') entry = File.read(filename) @obj = Bio::KEGG::MODULE.new(entry) end def test_new assert_instance_of(Bio::KEGG::MODULE, @obj) end def test_entry_id assert_equal('M00118', @obj.entry_id) end def test_name assert_equal('Pentose interconversion, arabinose/ribulose/xylulose/xylose', @obj.name) end def test_definition expected = "K00011 K01804 K00853 (K01786,K03080) K03331 K05351 K00854 K00011 K01805 K01783 (K00853,K00875) K00039" assert_equal(expected, @obj.definition) end def test_keggclass assert_equal('Metabolism; Central metabolism; Other carbohydrate metabolism', @obj.keggclass) end def test_pathways_as_strings expected = ["ko00040(K00011+K01804+K00853+K01786+K03080+K03331+K05351+K00854+K00011+K01805+K01783+K00853+K00875+K00039) Pentose and glucuronate interconversions"] assert_equal(expected, @obj.pathways_as_strings) end def test_pathways_as_hash expected = { "ko00040(K00011+K01804+K00853+K01786+K03080+K03331+K05351+K00854+K00011+K01805+K01783+K00853+K00875+K00039)" => "Pentose and glucuronate interconversions" } assert_equal(expected, @obj.pathways_as_hash) end def test_pathways expected = { "ko00040(K00011+K01804+K00853+K01786+K03080+K03331+K05351+K00854+K00011+K01805+K01783+K00853+K00875+K00039)" => "Pentose and glucuronate interconversions" } assert_equal(expected, @obj.pathways) end def test_orthologs_as_strings expected = [ "K00011 aldehyde reductase [EC:1.1.1.21] [RN:R01758 R01759]", "K01804 L-arabinose isomerase [EC:5.3.1.4] [RN:R01761]", "K00853 L-ribulokinase [EC:2.7.1.16] [RN:R02439]", "K01786,K03080 L-ribulose-5-phosphate 4-epimerase [EC:5.1.3.4] [RN:R05850]", "K03331 L-xylulose reductase [EC:1.1.1.10] [RN:R01904]", "K05351 D-xylulose reductase [EC:1.1.1.9] [RN:R01896]", "K00854 xylulokinase [EC:2.7.1.17] [RN:R01639]", "K00011 aldehyde reductase [EC:1.1.1.21] [RN:R01431]", "K01805 xylose isomerase [EC:5.3.1.5] [RN:R01432]", "K01783 ribulose-phosphate 3-epimerase [EC:5.1.3.1] [RN:R01529]", "K00853,K00875 ribulokinase [EC:2.7.1.16 2.7.1.47] [RN:R01526]", "K00039 ribitol 2-dehydrogenase [EC:1.1.1.56] [RN:R01895]" ] assert_equal(expected, @obj.orthologs_as_strings) end def test_orthologs_as_hash expected = { "K00039" => "ribitol 2-dehydrogenase [EC:1.1.1.56] [RN:R01895]", "K00853" => "L-ribulokinase [EC:2.7.1.16] [RN:R02439]", "K00854" => "xylulokinase [EC:2.7.1.17] [RN:R01639]", "K05351" => "D-xylulose reductase [EC:1.1.1.9] [RN:R01896]", "K00853,K00875" => "ribulokinase [EC:2.7.1.16 2.7.1.47] [RN:R01526]", "K03331" => "L-xylulose reductase [EC:1.1.1.10] [RN:R01904]", "K00011" => "aldehyde reductase [EC:1.1.1.21] [RN:R01431]", "K01786,K03080" => "L-ribulose-5-phosphate 4-epimerase [EC:5.1.3.4] [RN:R05850]", "K01804" => "L-arabinose isomerase [EC:5.3.1.4] [RN:R01761]", "K01783" => "ribulose-phosphate 3-epimerase [EC:5.1.3.1] [RN:R01529]", "K01805" => "xylose isomerase [EC:5.3.1.5] [RN:R01432]" } assert_equal(expected, @obj.orthologs_as_hash) end def test_orthologs expected = { "K00039" => "ribitol 2-dehydrogenase [EC:1.1.1.56] [RN:R01895]", "K00853" => "L-ribulokinase [EC:2.7.1.16] [RN:R02439]", "K00854" => "xylulokinase [EC:2.7.1.17] [RN:R01639]", "K05351" => "D-xylulose reductase [EC:1.1.1.9] [RN:R01896]", "K00853,K00875" => "ribulokinase [EC:2.7.1.16 2.7.1.47] [RN:R01526]", "K03331" => "L-xylulose reductase [EC:1.1.1.10] [RN:R01904]", "K00011" => "aldehyde reductase [EC:1.1.1.21] [RN:R01431]", "K01786,K03080" => "L-ribulose-5-phosphate 4-epimerase [EC:5.1.3.4] [RN:R05850]", "K01804" => "L-arabinose isomerase [EC:5.3.1.4] [RN:R01761]", "K01783" => "ribulose-phosphate 3-epimerase [EC:5.1.3.1] [RN:R01529]", "K01805" => "xylose isomerase [EC:5.3.1.5] [RN:R01432]" } assert_equal(expected, @obj.orthologs) end def test_orthologs_as_array expected = [ "K00011", "K00039", "K00853", "K00854", "K00875", "K01783", "K01786", "K01804", "K01805", "K03080", "K03331", "K05351" ] assert_equal(expected, @obj.orthologs_as_array) end def test_reactions_as_strings expected = [ "R01903 C00312 -> C00532", "R01758,R01759 C00532 -> C00259", "R01761 C00259 -> C00508", "R02439 C00508 -> C01101", "R05850 C01101 -> C00231", "R01904 C00312 -> C00379", "R01896 C00379 -> C00310", "R01639 C00310 -> C00231", "R01431 C00379 -> C00181", "R01432 C00181 -> C00310", "R01529 C00199 -> C00231", "R01526 C00231 -> C00309", "R01895 C00309 -> C00474" ] assert_equal(expected, @obj.reactions_as_strings) end def test_reactions_as_hash expected = { "R01529" => "C00199 -> C00231", "R01431" => "C00379 -> C00181", "R01639" => "C00310 -> C00231", "R01761" => "C00259 -> C00508", "R01903" => "C00312 -> C00532", "R01904" => "C00312 -> C00379", "R01432" => "C00181 -> C00310", "R01758,R01759" => "C00532 -> C00259", "R01895" => "C00309 -> C00474", "R01896" => "C00379 -> C00310", "R02439" => "C00508 -> C01101", "R05850" => "C01101 -> C00231", "R01526" => "C00231 -> C00309" } assert_equal(expected, @obj.reactions_as_hash) end def test_reactions expected = { "R01529" => "C00199 -> C00231", "R01431" => "C00379 -> C00181", "R01639" => "C00310 -> C00231", "R01761" => "C00259 -> C00508", "R01903" => "C00312 -> C00532", "R01904" => "C00312 -> C00379", "R01432" => "C00181 -> C00310", "R01758,R01759" => "C00532 -> C00259", "R01895" => "C00309 -> C00474", "R01896" => "C00379 -> C00310", "R02439" => "C00508 -> C01101", "R05850" => "C01101 -> C00231", "R01526" => "C00231 -> C00309" } assert_equal(expected, @obj.reactions) end def test_compounds_as_strings expected = [ "C00312 L-Xylulose", "C00532 L-Arabitol", "C00259 L-Arabinose", "C00508 L-Ribulose", "C01101 L-Ribulose 5-phosphate", "C00231 D-Xylulose 5-phosphate", "C00379 Xylitol", "C00310 D-Xylulose", "C00181 D-Xylose", "C00199 D-Ribulose 5-phosphate", "C00309 D-Ribulose", "C00474 Ribitol" ] assert_equal(expected, @obj.compounds_as_strings) end def test_compounds_as_hash expected = { "C00231" => "D-Xylulose 5-phosphate", "C00474" => "Ribitol", "C00309" => "D-Ribulose", "C00199" => "D-Ribulose 5-phosphate", "C01101" => "L-Ribulose 5-phosphate", "C00310" => "D-Xylulose", "C00508" => "L-Ribulose", "C00532" => "L-Arabitol", "C00312" => "L-Xylulose", "C00181" => "D-Xylose", "C00379" => "Xylitol", "C00259" => "L-Arabinose" } assert_equal(expected, @obj.compounds_as_hash) end def test_compounds expected = { "C00231" => "D-Xylulose 5-phosphate", "C00474" => "Ribitol", "C00309" => "D-Ribulose", "C00199" => "D-Ribulose 5-phosphate", "C01101" => "L-Ribulose 5-phosphate", "C00310" => "D-Xylulose", "C00508" => "L-Ribulose", "C00532" => "L-Arabitol", "C00312" => "L-Xylulose", "C00181" => "D-Xylose", "C00379" => "Xylitol", "C00259" => "L-Arabinose" } assert_equal(expected, @obj.compounds) end end end bio-1.4.3.0001/test/unit/bio/db/test_gff.rb0000644000004100000410000012706112200110570020114 0ustar www-datawww-data# # test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF # # Copyright:: Copyright (C) 2005, 2008 # Mitsuteru Nakao # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'digest/sha1' require 'bio/db/gff' module Bio class TestGFF < Test::Unit::TestCase def setup data = <'"Chromosome I Centromere"', "Gene"=>'"CEN1"'} assert_equal(at, @obj.attributes) end def test_comment assert_equal(nil, @obj.comment) end end # class TestGFFRecord class TestGFFRecordConstruct < Test::Unit::TestCase def setup @obj = Bio::GFF.new end def test_add_seqname name = "test" record = Bio::GFF::Record.new("") record.seqname = name @obj.records << record assert_equal(name, @obj.records[0].seqname) end end # class TestGFFRecordConstruct class TestGFF2 < Test::Unit::TestCase def setup data = < Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']), 'E_value' => '0.0003', 'Align' => Bio::GFF::GFF2::Record::Value.new(['101', '11']), 'Comment' => Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]), 'Note' => '', 'Misc' => 'IdString' } assert_equal(hash, @obj.attributes_to_hash) end def test_attributes attributes = [ [ 'Target', Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) ], [ 'E_value', '0.0003' ], [ 'Align', Bio::GFF::GFF2::Record::Value.new(['101', '11']) ], [ 'Align', Bio::GFF::GFF2::Record::Value.new(['179', '36']) ], [ 'Comment', Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) ], [ 'Note', '' ], [ 'Misc', 'IdString' ], [ 'Misc', 'free text' ], [ 'Misc', '5678' ] ] assert_equal(attributes, @obj.attributes) end def test_attribute val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) assert_equal(val_Target, @obj.attribute('Target')) assert_equal('0.0003', @obj.attribute('E_value')) val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11']) val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36']) assert_equal(val_Align0, @obj.attribute('Align')) val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) assert_equal(val_Comment, @obj.attribute('Comment')) assert_equal('', @obj.attribute('Note')) assert_equal('IdString', @obj.attribute('Misc')) end def test_attribute_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) end def test_get_attribute val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) assert_equal(val_Target, @obj.get_attribute('Target')) assert_equal('0.0003', @obj.get_attribute('E_value')) val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11']) val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36']) assert_equal(val_Align0, @obj.get_attribute('Align')) val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) assert_equal(val_Comment, @obj.get_attribute('Comment')) assert_equal('', @obj.get_attribute('Note')) assert_equal('IdString', @obj.get_attribute('Misc')) end def test_get_attribute_nonexistent assert_equal(nil, @obj.get_attribute('NonExistent')) end def test_get_attributes val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) assert_equal([ val_Target ], @obj.get_attributes('Target')) assert_equal([ '0.0003' ], @obj.get_attributes('E_value')) val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11']) val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36']) assert_equal([ val_Align0, val_Align1 ], @obj.get_attributes('Align')) val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) assert_equal([ val_Comment ], @obj.get_attributes('Comment')) assert_equal([ '' ], @obj.get_attributes('Note')) assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_get_attributes_nonexistent assert_equal([], @obj.get_attributes('NonExistent')) end def test_set_attribute assert_equal('0.0003', @obj.attribute('E_value')) assert_equal('1e-10', @obj.set_attribute('E_value', '1e-10')) assert_equal('1e-10', @obj.attribute('E_value')) end def test_set_attribute_multiple assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('Replaced', @obj.set_attribute('Misc', 'Replaced')) assert_equal([ 'Replaced', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_set_attribute_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal('test', @obj.set_attribute('NonExistent', 'test')) assert_equal('test', @obj.attribute('NonExistent')) end def test_replace_attributes assert_equal([ '0.0003' ], @obj.get_attributes('E_value')) assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10')) assert_equal([ '1e-10' ], @obj.get_attributes('E_value')) end def test_replace_attributes_single_multiple assert_equal([ '0.0003' ], @obj.get_attributes('E_value')) assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10', '3.14', '2.718')) assert_equal([ '1e-10', '3.14', '2.718' ], @obj.get_attributes('E_value')) end def test_replace_attributes_multiple_single assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced_All')) assert_equal([ 'Replaced_All' ], @obj.get_attributes('Misc')) end def test_replace_attributes_multiple_multiple_two assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced', 'test2')) assert_equal([ 'Replaced', 'test2' ], @obj.get_attributes('Misc')) end def test_replace_attributes_multiple_multiple_same assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced', 'test2', 'test3')) assert_equal([ 'Replaced', 'test2', 'test3' ], @obj.get_attributes('Misc')) end def test_replace_attributes_multiple_multiple_over assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced', 'test2', 'test3', '4')) assert_equal([ 'Replaced', 'test2', 'test3', '4' ], @obj.get_attributes('Misc')) end def test_replace_attributes_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test')) assert_equal([ 'test' ], @obj.get_attributes('NonExistent')) end def test_replace_attributes_nonexistent_multiple assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test', 'gff2', 'attr')) assert_equal([ 'test', 'gff2', 'attr' ], @obj.get_attributes('NonExistent')) end def test_delete_attribute assert_equal('0.0003', @obj.attribute('E_value')) assert_equal('0.0003', @obj.delete_attribute('E_value', '0.0003')) assert_equal(nil, @obj.attribute('E_value')) end def test_delete_attribute_nil assert_equal('0.0003', @obj.attribute('E_value')) assert_equal(nil, @obj.delete_attribute('E_value', '3')) assert_equal('0.0003', @obj.attribute('E_value')) end def test_delete_attribute_multiple assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('free text', @obj.delete_attribute('Misc', 'free text')) assert_equal([ 'IdString', '5678' ], @obj.get_attributes('Misc')) end def test_delete_attribute_multiple2 assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('IdString', @obj.delete_attribute('Misc', 'IdString')) assert_equal([ 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('5678', @obj.delete_attribute('Misc', '5678')) assert_equal([ 'free text' ], @obj.get_attributes('Misc')) end def test_delete_attribute_multiple_nil assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(nil, @obj.delete_attribute('Misc', 'test')) assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_delete_attribute_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(nil, @obj.delete_attribute('NonExistent', 'test')) assert_equal([], @obj.get_attributes('NonExistent')) end def test_delete_attributes assert_equal('0.0003', @obj.attribute('E_value')) assert_equal(@obj, @obj.delete_attributes('E_value')) assert_equal(nil, @obj.attribute('E_value')) end def test_delete_attributes_multiple assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.delete_attributes('Misc')) assert_equal([], @obj.get_attributes('Misc')) end def test_delete_attributes_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(nil, @obj.delete_attributes('NonExistent')) assert_equal([], @obj.get_attributes('NonExistent')) end def test_sort_attributes_by_tag! tags = %w( Comment Align E_value Note ) assert_equal(@obj, @obj.sort_attributes_by_tag!(tags)) assert_equal(%w( Comment Align Align E_value Note Target Misc Misc Misc ), @obj.attributes.collect { |x| x[0] }) # check if the order of 'Misc' is not changed assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_sort_attributes_by_tag_bang_test2 tags = %w( E_value Misc Note Target ) assert_equal(@obj, @obj.sort_attributes_by_tag!(tags)) assert_equal(%w( E_value Misc Misc Misc Note Target Align Align Comment ), @obj.attributes.collect { |x| x[0] }) # check if the order of 'Misc' is not changed assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_sort_attributes_by_tag_bang_with_block assert_equal(@obj, @obj.sort_attributes_by_tag! { |x, y| x <=> y }) assert_equal(%w( Align Align Comment E_value Misc Misc Misc Note Target ), @obj.attributes.collect { |x| x[0] }) # check if the order of 'Misc' is not changed assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end end #class TestGFF2Record class TestGFF2RecordEmpty < Test::Unit::TestCase def setup @obj = Bio::GFF::GFF2::Record.new('# test comment') end def test_comment_only? assert_equal(true, @obj.comment_only?) end def test_comment_only_false @obj.seqname = 'test' assert_equal(false, @obj.comment_only?) end def test_to_s assert_equal("# test comment\n", @obj.to_s) end def test_to_s_not_empty @obj.seqname = 'test' @obj.feature = 'region' @obj.start = 1 @obj.end = 100 assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\t\t# test comment\n", @obj.to_s) @obj.add_attribute('Gene', 'unknown') assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\tGene unknown\t# test comment\n", @obj.to_s) end def test_comment assert_equal(' test comment', @obj.comment) end def test_comment_eq assert_equal('changed the comment', @obj.comment = 'changed the comment') end end #class TestGFF2RecordEmpty class TestGFF2ComplexAttributes < Test::Unit::TestCase # The test string comes from the Popular genome annotation from the JGI. # ftp://ftp.jgi-psf.org/pub/JGI_data/Poplar/annotation/v1.1/Poptr1_1.JamboreeModels.gff.gz # Thanks to Tomoaki NISHIYAMA who picks up the example line. def test_attributes_case1 str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3\n" attributes = [ [ "name", "grail3.0116000101" ], [ "proteinId", "639579" ], [ "exonNumber", "3" ] ] record = Bio::GFF::GFF2::Record.new(str) assert_equal(attributes, record.attributes) end # The test string is modified from that of test_attributes_case1. def test_attributes_case2 str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3; Note \"Semicolons ; and \;, and quote \\\" can be OK\"; Comment \"This is the \\\"comment\\\"\"\n" attributes = [ [ "name", "grail3.0116000101" ], [ "proteinId", "639579" ], [ "exonNumber", "3" ], [ "Note", "Semicolons ; and ;, and quote \" can be OK" ], [ "Comment", "This is the \"comment\"" ] ] record = Bio::GFF::GFF2::Record.new(str) assert_equal(attributes, record.attributes) end def test_attributes_incompatible_backslash_semicolon # No special treatments for backslash-semicolon outside the free text. str =<test01 ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC END_OF_DATA @gff3 = Bio::GFF::GFF3.new(@data) end def test_const_version assert_equal(3, Bio::GFF::GFF3::VERSION) end def test_sequence_regions region = Bio::GFF::GFF3::SequenceRegion.new('test01', 1, 400) assert_equal([ region ], @gff3.sequence_regions) end def test_gff_version assert_equal('3', @gff3.gff_version) end def test_records assert_equal(7, @gff3.records.size) r_test01 = Bio::GFF::GFF3::Record.new('test01', 'RANDOM', 'contig', 1, 400, nil, '+', nil, [ ['ID', 'test01'], ['Note', 'this is test'] ]) r_mrna01 = Bio::GFF::GFF3::Record.new('test01', nil, 'mRNA', 101, 230, nil, '+', nil, [ ['ID', 'mrna01'], ['Name', 'testmRNA'], ['Note', 'this is test mRNA'] ]) r_exon01 = Bio::GFF::GFF3::Record.new('test01', nil, 'exon', 101, 160, nil, '+', nil, [ ['ID', 'exon01'], ['Name', 'exon01'], ['Alias', 'exon 1'], ['Parent', 'mrna01'], ['Parent', 'mrna01a'] ]) target = Bio::GFF::GFF3::Record::Target.new('EST101', 1, 21) gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6') r_match01 =Bio::GFF::GFF3::Record.new('test01', nil, 'Match', 101, 123, nil, nil, nil, [ ['ID', 'match01'], ['Name', 'match01'], ['Target', target], ['Gap', gap] ]) assert_equal(r_test01, @gff3.records[0]) assert_equal(r_mrna01, @gff3.records[1]) assert_equal(r_exon01, @gff3.records[3]) assert_equal(r_match01, @gff3.records[6]) end def test_sequences assert_equal(1, @gff3.sequences.size) assert_equal('test01', @gff3.sequences[0].entry_id) assert_equal('3510a3c4f66f9c2ab8d4d97446490aced7ed1fa4', Digest::SHA1.hexdigest(@gff3.sequences[0].seq.to_s)) end def test_to_s assert_equal(@data, @gff3.to_s) end end #class TestGFF3 class TestGFF3Record < Test::Unit::TestCase def setup data =<B%09C=100%25;d=e,f,g h', @obj.instance_eval { escape(str) }) end def test_escape_attribute str = @str assert_equal('A>B%09C%3D100%25%3Bd%3De%2Cf%2Cg h', @obj.instance_eval { escape_attribute(str) }) end def test_escape_seqid str = @str assert_equal('A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h', @obj.instance_eval { escape_seqid(str) }) end def test_unescape escaped_str = 'A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h' assert_equal(@str, @obj.instance_eval { unescape(escaped_str) }) end end #class TestGFF3RecordEscape class TestGFF3RecordTarget < Test::Unit::TestCase def setup @target = [ Bio::GFF::GFF3::Record::Target.new('ABCD1234', 123, 456, '+'), Bio::GFF::GFF3::Record::Target.new(">X Y=Z;P%,Q\tR", 78, 90), Bio::GFF::GFF3::Record::Target.new(nil, nil, nil), ] end def test_parse strings = [ 'ABCD1234 123 456 +', '%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', '' ] @target.each do |target| str = strings.shift assert_equal(target, Bio::GFF::GFF3::Record::Target.parse(str)) end end def test_target_id assert_equal('ABCD1234', @target[0].target_id) assert_equal(">X Y=Z;P%,Q\tR", @target[1].target_id) assert_equal(nil, @target[2].target_id) end def test_start assert_equal(123, @target[0].start) assert_equal(78, @target[1].start) assert_nil(@target[2].start) end def test_end assert_equal(456, @target[0].end) assert_equal(90, @target[1].end) assert_nil(@target[2].end) end def test_strand assert_equal('+', @target[0].strand) assert_nil(@target[1].strand) assert_nil(@target[2].strand) end def test_to_s assert_equal('ABCD1234 123 456 +', @target[0].to_s) assert_equal('%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', @target[1].to_s) assert_equal('. . .', @target[2].to_s) end end #class TestGFF3RecordTarget class TestGFF3RecordGap < Test::Unit::TestCase def setup # examples taken from http://song.sourceforge.net/gff3.shtml @gaps_src = [ 'M8 D3 M6 I1 M6', 'M3 I1 M2 F1 M4', 'M3 I1 M2 R1 M4' ] @gaps = @gaps_src.collect { |x| Bio::GFF::GFF3::Record::Gap.new(x) } end def test_to_s @gaps_src.each do |src| assert_equal(src, @gaps.shift.to_s) end end def test_eqeq gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6') assert(gap == @gaps[0]) assert_equal(false, gap == @gaps[1]) end def test_process_sequences_na ref = 'CAAGACCTAAACTGGATTCCAAT' tgt = 'CAAGACCTCTGGATATCCAAT' ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT' tgt_aligned = 'CAAGACCT---CTGGATATCCAAT' assert_equal([ ref_aligned, tgt_aligned ], @gaps[0].process_sequences_na(ref, tgt)) end def test_process_sequences_na_tooshort ref = 'CAAGACCTAAACTGGATTCCAA' tgt = 'CAAGACCTCTGGATATCCAA' assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) } ref = 'c' tgt = 'c' assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) } end def test_process_sequences_na_aa ref1 = 'atgaaggaggttattgaatgtcggcggt' tgt1 = 'MKEVVINVGG' ref1_aligned = 'atgaaggag---gttattgaatgtcggcggt' tgt1_aligned = 'M K E V V I >N V G G ' assert_equal([ ref1_aligned, tgt1_aligned ], @gaps[1].process_sequences_na_aa(ref1, tgt1)) end def test_process_sequences_na_aa_reverse_frameshift ref2 = 'atgaaggaggttataatgtcggcggt' tgt2 = 'MKEVVINVGG' ref2_aligned = 'atgaaggag---gttatX Y=Z;P%,Q\tR", 78, 90), Bio::GFF::GFF3::SequenceRegion.new(nil, nil, nil), ] end def test_parse strings = [ '##sequence-region ABCD1234 123 456', '##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', '##sequence-region' ] @data.each do |reg| str = strings.shift assert_equal(reg, Bio::GFF::GFF3::SequenceRegion.parse(str)) end end def test_seqid assert_equal('ABCD1234', @data[0].seqid) assert_equal(">X Y=Z;P%,Q\tR", @data[1].seqid) assert_equal(nil, @data[2].seqid) end def test_start assert_equal(123, @data[0].start) assert_equal(78, @data[1].start) assert_nil(@data[2].start) end def test_end assert_equal(456, @data[0].end) assert_equal(90, @data[1].end) assert_nil(@data[2].end) end def test_to_s assert_equal("##sequence-region ABCD1234 123 456\n", @data[0].to_s) assert_equal("##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90\n", @data[1].to_s) assert_equal("##sequence-region . . .\n", @data[2].to_s) end end #class TestGFF3SequenceRegion class TestGFF3MetaData < Test::Unit::TestCase def setup @data = Bio::GFF::GFF3::MetaData.new('feature-ontology', 'http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12') end def test_parse assert_equal(@data, Bio::GFF::GFF3::MetaData.parse('##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12')) end def test_directive assert_equal('feature-ontology', @data.directive) end def test_data assert_equal('http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12', @data.data) end end #class TestGFF3MetaData end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_qual.rb0000644000004100000410000000326712200110570020315 0ustar www-datawww-data# # test/unit/bio/db/test_qual.rb - Unit test for Bio::FastaNumericFormat # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/fasta/qual' module Bio class TestFastaNumericFormat < Test::Unit::TestCase DATA = [24, 15, 23, 29, 20, 13, 20, 21, 21, 23, 22, 25, 13, 22, 17, 15, 25, 27, 32, 26, 32, 29, 29, 25].freeze def setup text =<CRA3575282.F 24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 32 29 29 25 END @obj = Bio::FastaNumericFormat.new(text) end def test_entry assert_equal(">CRA3575282.F\n24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 \n32 29 29 25\n", @obj.entry) end def test_entry_id assert_equal('CRA3575282.F', @obj.entry_id) end def test_definition assert_equal('CRA3575282.F', @obj.definition) end def test_data assert_equal(DATA, @obj.data) end def test_length assert_equal(24, @obj.length) end def test_each assert(@obj.each {|x| }) end def test_arg assert(@obj[0], '') assert(@obj[-1], '') end def test_to_biosequence assert_instance_of(Bio::Sequence, s = @obj.to_biosequence) assert_equal(Bio::Sequence::Generic.new(''), s.seq) assert_equal(DATA, s.quality_scores) assert_equal(nil, s.quality_score_type) end end #class TestFastaNumericFormat end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_nexus.rb0000644000004100000410000003754212200110570020520 0ustar www-datawww-data# # = test/bio/db/nexus.rb - Unit test for Bio::Nexus # # Copyright:: Copyright (C) 2006 Christian M Zmasek # # License:: The Ruby License # # $Id:$ # # == Description # # This file contains unit tests for Bio::Nexus. # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/nexus' module Bio class TestNexus < Test::Unit::TestCase NEXUS_STRING_1 = <<-END_OF_NEXUS_STRING #NEXUS Begin Taxa; Dimensions [[comment]] ntax=4; TaxLabels "hag fish" [comment] 'african frog' [lots of different comment follow] [] [a] [[a]] [ a ] [[ a ]] [ [ a ] ] [a ] [[a ]] [ [a ] ] [ a] [[ a]] [ [ a] ] [ ] [[ ]] [ [ ] ] [ a b ] [[ a b ]] [ [ a b ] ] [x[ x [x[ x[[x[[xx[x[ x]] ]x ] []]][x]]x]]] [comment_1 comment_3] "rat snake" 'red mouse'; End; [yet another comment End; ] Begin Characters; Dimensions nchar=20 ntax=4; [ ntax=1000; ] Format DataType=DNA Missing=x Gap=- MatchChar=.; Matrix [comment] fish ACATA GAGGG TACCT CTAAG frog ACTTA GAGGC TACCT CTAGC snake ACTCA CTGGG TACCT TTGCG mouse ACTCA GACGG TACCT TTGCG; End; Begin Trees; [comment] Tree best=(fish,(frog,(snake,mo use))); [some long comment] Tree other=(snake, (frog,(fish,mo use ))); End; Begin Trees; [comment] Tree worst=(A,(B,(C,D ))); Tree bad=(a, (b,(c , d ) ) ); End; Begin Distances; Dimensions nchar=20 ntax=5; Format Triangle=Both; Matrix taxon_1 0.0 1.0 2.0 4.0 7.0 taxon_2 1.0 0.0 3.0 5.0 8.0 taxon_3 3.0 4.0 0.0 6.0 9.0 taxon_4 7.0 3.0 2.0 0.0 9.5 taxon_5 1.2 1.3 1.4 1.5 0.0; End; Begin Data; Dimensions ntax=5 nchar=14; Format Datatype=RNA gap=# MISSING=x MatchChar=^; TaxLabels ciona cow [comment1 commentX] ape 'purple urchin' "green lizard"; Matrix [ comment [old comment] ] taxon_1 A- CCGTCGA-GTTA taxon_2 T- CCG-CGA-GATC taxon_3 A- C-GTCGA-GATG taxon_4 A- C C TC G A - -G T T T taxon_5 T-CGGTCGT-CTTA; End; Begin Private1; Something foo=5 bar=20; Format Datatype=DNA; Matrix taxon_1 1111 1111111111 taxon_2 2222 2222222222 taxon_3 3333 3333333333 taxon_4 4444 4444444444 taxon_5 5555 5555555555; End; Begin Private1; some [boring] interesting [ outdated ] data be here End; END_OF_NEXUS_STRING DATA_BLOCK_OUTPUT_STRING = <<-DATA_BLOCK_OUTPUT_STRING Begin Data; Dimensions NTax=5 NChar=14; Format DataType=RNA Missing=x Gap=# MatchChar=^; TaxLabels ciona cow ape purple_urchin green_lizard; Matrix taxon_1 A-CCGTCGA-GTTA taxon_2 T-CCG-CGA-GATC taxon_3 A-C-GTCGA-GATG taxon_4 A-CCTCGA--GTTT taxon_5 T-CGGTCGT-CTTA; End; DATA_BLOCK_OUTPUT_STRING def test_nexus nexus = Bio::Nexus.new( NEXUS_STRING_1 ) blocks = nexus.get_blocks assert_equal( 8, blocks.size ) private_blocks = nexus.get_blocks_by_name( "private1" ) data_blocks = nexus.get_data_blocks character_blocks = nexus.get_characters_blocks trees_blocks = nexus.get_trees_blocks distances_blocks = nexus.get_distances_blocks taxa_blocks = nexus.get_taxa_blocks assert_equal( 2, private_blocks.size ) assert_equal( 1, data_blocks.size ) assert_equal( 1, character_blocks.size ) assert_equal( 2, trees_blocks.size ) assert_equal( 1, distances_blocks.size ) assert_equal( 1, taxa_blocks.size ) taxa_block = taxa_blocks[ 0 ] assert_equal( taxa_block.get_number_of_taxa.to_i , 4 ) assert_equal( taxa_block.get_taxa[ 0 ], "hag_fish" ) assert_equal( taxa_block.get_taxa[ 1 ], "african_frog" ) assert_equal( taxa_block.get_taxa[ 2 ], "rat_snake" ) assert_equal( taxa_block.get_taxa[ 3 ], "red_mouse" ) chars_block = character_blocks[ 0 ] assert_equal( chars_block.get_number_of_taxa.to_i, 4 ) assert_equal( chars_block.get_number_of_characters.to_i, 20 ) assert_equal( chars_block.get_datatype, "DNA" ) assert_equal( chars_block.get_match_character, "." ) assert_equal( chars_block.get_missing, "x" ) assert_equal( chars_block.get_gap_character, "-" ) assert_equal( chars_block.get_matrix.get_value( 0, 0 ), "fish" ) assert_equal( chars_block.get_matrix.get_value( 1, 0 ), "frog" ) assert_equal( chars_block.get_matrix.get_value( 2, 0 ), "snake" ) assert_equal( chars_block.get_matrix.get_value( 3, 0 ), "mouse" ) assert_equal( chars_block.get_matrix.get_value( 0, 20 ), "G" ) assert_equal( chars_block.get_matrix.get_value( 1, 20 ), "C" ) assert_equal( chars_block.get_matrix.get_value( 2, 20 ), "G" ) assert_equal( chars_block.get_matrix.get_value( 3, 20 ), "G" ) assert_equal( chars_block.get_characters_strings_by_name( "fish" )[ 0 ], "ACATAGAGGGTACCTCTAAG" ) assert_equal( chars_block.get_characters_strings_by_name( "frog" )[ 0 ], "ACTTAGAGGCTACCTCTAGC" ) assert_equal( chars_block.get_characters_strings_by_name( "snake" )[ 0 ], "ACTCACTGGGTACCTTTGCG" ) assert_equal( chars_block.get_characters_strings_by_name( "mouse" )[ 0 ], "ACTCAGACGGTACCTTTGCG" ) assert_equal( chars_block.get_characters_string( 0 ), "ACATAGAGGGTACCTCTAAG" ) assert_equal( chars_block.get_characters_string( 1 ), "ACTTAGAGGCTACCTCTAGC" ) assert_equal( chars_block.get_characters_string( 2 ), "ACTCACTGGGTACCTTTGCG" ) assert_equal( chars_block.get_characters_string( 3 ), "ACTCAGACGGTACCTTTGCG" ) assert_equal( chars_block.get_row_name( 1 ), "frog" ) assert_equal( chars_block.get_sequences_by_name( "fish" )[ 0 ].seq.to_s.downcase, "ACATAGAGGGTACCTCTAAG".downcase ) assert_equal( chars_block.get_sequences_by_name( "frog" )[ 0 ].seq.to_s.downcase, "ACTTAGAGGCTACCTCTAGC".downcase ) assert_equal( chars_block.get_sequences_by_name( "snake" )[ 0 ].seq.to_s.downcase, "ACTCACTGGGTACCTTTGCG".downcase ) assert_equal( chars_block.get_sequences_by_name( "mouse" )[ 0 ].seq.to_s.downcase, "ACTCAGACGGTACCTTTGCG".downcase ) assert_equal( chars_block.get_sequences_by_name( "fish" )[ 0 ].definition, "fish" ) assert_equal( chars_block.get_sequences_by_name( "frog" )[ 0 ].definition, "frog" ) assert_equal( chars_block.get_sequences_by_name( "snake" )[ 0 ].definition, "snake" ) assert_equal( chars_block.get_sequences_by_name( "mouse" )[ 0 ].definition, "mouse" ) assert_equal( chars_block.get_sequence( 0 ).seq.to_s.downcase, "ACATAGAGGGTACCTCTAAG".downcase ) assert_equal( chars_block.get_sequence( 1 ).seq.to_s.downcase, "ACTTAGAGGCTACCTCTAGC".downcase ) assert_equal( chars_block.get_sequence( 2 ).seq.to_s.downcase, "ACTCACTGGGTACCTTTGCG".downcase ) assert_equal( chars_block.get_sequence( 3 ).seq.to_s.downcase, "ACTCAGACGGTACCTTTGCG".downcase ) assert_equal( chars_block.get_sequence( 0 ).definition, "fish" ) assert_equal( chars_block.get_sequence( 1 ).definition, "frog" ) assert_equal( chars_block.get_sequence( 2 ).definition, "snake" ) assert_equal( chars_block.get_sequence( 3 ).definition, "mouse" ) tree_block_0 = trees_blocks[ 0 ] tree_block_1 = trees_blocks[ 1 ] assert_equal( tree_block_0.get_tree_names[ 0 ], "best" ) assert_equal( tree_block_0.get_tree_names[ 1 ], "other" ) assert_equal( tree_block_0.get_tree_strings_by_name( "best" )[ 0 ], "(fish,(frog,(snake,mouse)));" ) assert_equal( tree_block_0.get_tree_strings_by_name( "other" )[ 0 ], "(snake,(frog,(fish,mouse)));" ) best_tree = tree_block_0.get_trees_by_name( "best" )[ 0 ] other_tree = tree_block_0.get_trees_by_name( "other" )[ 0 ] worst_tree = tree_block_1.get_tree( 0 ) bad_tree = tree_block_1.get_tree( 1 ) assert_equal( 6, best_tree.descendents( best_tree.root ).size ) assert_equal( 4, best_tree.leaves.size) assert_equal( 6, other_tree.descendents( other_tree.root ).size ) assert_equal( 4, other_tree.leaves.size) fish_leaf_best = best_tree.nodes.find { |x| x.name == 'fish' } assert_equal( 1, best_tree.ancestors( fish_leaf_best ).size ) fish_leaf_other = other_tree.nodes.find { |x| x.name == 'fish' } assert_equal( 3, other_tree.ancestors( fish_leaf_other ).size ) a_leaf_worst = worst_tree.nodes.find { |x| x.name == 'A' } assert_equal( 1, worst_tree.ancestors( a_leaf_worst ).size ) c_leaf_bad = bad_tree.nodes.find { |x| x.name == 'c' } assert_equal( 3, bad_tree.ancestors( c_leaf_bad ).size ) dist_block = distances_blocks[ 0 ] assert_equal( dist_block.get_number_of_taxa.to_i, 5 ) assert_equal( dist_block.get_number_of_characters.to_i, 20 ) assert_equal( dist_block.get_triangle, "Both" ) assert_equal( dist_block.get_matrix.get_value( 0, 0 ), "taxon_1" ) assert_equal( dist_block.get_matrix.get_value( 1, 0 ), "taxon_2" ) assert_equal( dist_block.get_matrix.get_value( 2, 0 ), "taxon_3" ) assert_equal( dist_block.get_matrix.get_value( 3, 0 ), "taxon_4" ) assert_equal( dist_block.get_matrix.get_value( 4, 0 ), "taxon_5" ) assert_equal( dist_block.get_matrix.get_value( 0, 5 ).to_f, 7.0 ) assert_equal( dist_block.get_matrix.get_value( 1, 5 ).to_f, 8.0 ) assert_equal( dist_block.get_matrix.get_value( 2, 5 ).to_f, 9.0 ) assert_equal( dist_block.get_matrix.get_value( 3, 5 ).to_f, 9.5 ) assert_equal( dist_block.get_matrix.get_value( 4, 5 ).to_f, 0.0 ) data_block = data_blocks[ 0 ] assert_equal( data_block.get_number_of_taxa.to_i, 5 ) assert_equal( data_block.get_number_of_characters.to_i, 14 ) assert_equal( data_block.get_datatype, "RNA" ) assert_equal( data_block.get_match_character, "^" ) assert_equal( data_block.get_missing, "x" ) assert_equal( data_block.get_gap_character, "#" ) assert_equal( data_block.get_matrix.get_value( 0, 0 ), "taxon_1" ) assert_equal( data_block.get_matrix.get_value( 1, 0 ), "taxon_2" ) assert_equal( data_block.get_matrix.get_value( 2, 0 ), "taxon_3" ) assert_equal( data_block.get_matrix.get_value( 3, 0 ), "taxon_4" ) assert_equal( data_block.get_matrix.get_value( 4, 0 ), "taxon_5" ) assert_equal( data_block.get_matrix.get_value( 0, 14 ), "A" ) assert_equal( data_block.get_matrix.get_value( 1, 14 ), "C" ) assert_equal( data_block.get_matrix.get_value( 2, 14 ), "G" ) assert_equal( data_block.get_matrix.get_value( 3, 14 ), "T" ) assert_equal( data_block.get_matrix.get_value( 4, 14 ), "A" ) assert_equal( data_block.get_taxa[ 0 ], "ciona" ) assert_equal( data_block.get_taxa[ 1 ], "cow" ) assert_equal( data_block.get_taxa[ 2 ], "ape" ) assert_equal( data_block.get_taxa[ 3 ], "purple_urchin" ) assert_equal( data_block.get_taxa[ 4 ], "green_lizard" ) assert_equal( data_block.get_characters_strings_by_name( "taxon_1" )[ 0 ], "A-CCGTCGA-GTTA" ) assert_equal( data_block.get_characters_strings_by_name( "taxon_2" )[ 0 ], "T-CCG-CGA-GATC" ) assert_equal( data_block.get_characters_strings_by_name( "taxon_3" )[ 0 ], "A-C-GTCGA-GATG" ) assert_equal( data_block.get_characters_strings_by_name( "taxon_4" )[ 0 ], "A-CCTCGA--GTTT" ) assert_equal( data_block.get_characters_strings_by_name( "taxon_5" )[ 0 ], "T-CGGTCGT-CTTA" ) assert_equal( data_block.get_characters_string( 0 ), "A-CCGTCGA-GTTA" ) assert_equal( data_block.get_characters_string( 1 ), "T-CCG-CGA-GATC" ) assert_equal( data_block.get_characters_string( 2 ), "A-C-GTCGA-GATG" ) assert_equal( data_block.get_characters_string( 3 ), "A-CCTCGA--GTTT" ) assert_equal( data_block.get_characters_string( 4 ), "T-CGGTCGT-CTTA" ) assert_equal( data_block.get_row_name( 0 ), "taxon_1" ) assert_equal( data_block.get_row_name( 1 ), "taxon_2" ) assert_equal( data_block.get_row_name( 2 ), "taxon_3" ) assert_equal( data_block.get_row_name( 3 ), "taxon_4" ) assert_equal( data_block.get_row_name( 4 ), "taxon_5" ) assert_equal( data_block.get_sequences_by_name( "taxon_1" )[ 0 ].seq.to_s.downcase, "A-CCGTCGA-GTTA".downcase ) assert_equal( data_block.get_sequences_by_name( "taxon_2" )[ 0 ].seq.to_s.downcase, "T-CCG-CGA-GATC".downcase ) assert_equal( data_block.get_sequences_by_name( "taxon_3" )[ 0 ].seq.to_s.downcase, "A-C-GTCGA-GATG".downcase ) assert_equal( data_block.get_sequences_by_name( "taxon_4" )[ 0 ].seq.to_s.downcase, "A-CCTCGA--GTTT".downcase ) assert_equal( data_block.get_sequences_by_name( "taxon_5" )[ 0 ].seq.to_s.downcase, "T-CGGTCGT-CTTA".downcase ) assert_equal( data_block.get_sequences_by_name( "taxon_1" )[ 0 ].definition, "taxon_1" ) assert_equal( data_block.get_sequences_by_name( "taxon_2" )[ 0 ].definition, "taxon_2" ) assert_equal( data_block.get_sequences_by_name( "taxon_3" )[ 0 ].definition, "taxon_3" ) assert_equal( data_block.get_sequences_by_name( "taxon_4" )[ 0 ].definition, "taxon_4" ) assert_equal( data_block.get_sequences_by_name( "taxon_5" )[ 0 ].definition, "taxon_5" ) assert_equal( data_block.get_sequence( 0 ).seq.to_s.downcase, "A-CCGTCGA-GTTA".downcase ) assert_equal( data_block.get_sequence( 1 ).seq.to_s.downcase, "T-CCG-CGA-GATC".downcase ) assert_equal( data_block.get_sequence( 2 ).seq.to_s.downcase, "A-C-GTCGA-GATG".downcase ) assert_equal( data_block.get_sequence( 3 ).seq.to_s.downcase, "A-CCTCGA--GTTT".downcase ) assert_equal( data_block.get_sequence( 4 ).seq.to_s.downcase, "T-CGGTCGT-CTTA".downcase ) assert_equal( data_block.get_sequence( 0 ).definition, "taxon_1" ) assert_equal( data_block.get_sequence( 1 ).definition, "taxon_2" ) assert_equal( data_block.get_sequence( 2 ).definition, "taxon_3" ) assert_equal( data_block.get_sequence( 3 ).definition, "taxon_4" ) assert_equal( data_block.get_sequence( 4 ).definition, "taxon_5" ) assert_equal( DATA_BLOCK_OUTPUT_STRING, data_block.to_nexus() ) generic_0 = private_blocks[ 0 ] generic_1 = private_blocks[ 1 ] assert_equal( generic_0.get_tokens[ 0 ], "Something" ) assert_equal( generic_0.get_tokens[ 1 ], "foo" ) assert_equal( generic_0.get_tokens[ 2 ], "5" ) assert_equal( generic_0.get_tokens[ 3 ], "bar" ) assert_equal( generic_0.get_tokens[ 4 ], "20" ) assert_equal( generic_0.get_tokens[ 5 ], "Format" ) assert_equal( generic_0.get_tokens[ 6 ], "Datatype" ) assert_equal( generic_0.get_tokens[ 7 ], "DNA" ) assert_equal( generic_0.get_tokens[ 8 ], "Matrix" ) assert_equal( generic_0.get_tokens[ 9 ], "taxon_1" ) assert_equal( generic_0.get_tokens[10 ], "1111" ) assert_equal( generic_1.get_tokens[ 0 ], "some" ) assert_equal( generic_1.get_tokens[ 1 ], "interesting" ) assert_equal( generic_1.get_tokens[ 2 ], "data" ) assert_equal( generic_1.get_tokens[ 3 ], "be" ) assert_equal( generic_1.get_tokens[ 4 ], "here" ) end # test_nexus end # class TestNexus end # module Bio bio-1.4.3.0001/test/unit/bio/db/sanger_chromatogram/0000755000004100000410000000000012200110570022001 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/db/sanger_chromatogram/test_abif.rb0000644000004100000410000000702212200110570024267 0ustar www-datawww-data# # test/unit/bio/db/sanger_chromatogram/test_abif.rb - Unit test for Bio::Abif # # Copyright:: Copyright (C) 2009 Anthony Underwood , # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/sanger_chromatogram/chromatogram' require 'bio/db/sanger_chromatogram/abif' module Bio module TestAbifData DataPath = Pathname.new(File.join(BioRubyTestDataPath, 'sanger_chromatogram')).cleanpath.to_s def self.abif fn = File.join(DataPath, 'test_chromatogram_abif.ab1') File.open(fn, 'rb') { |f| f.read } end end #module TestAbifData class TestAbif < Test::Unit::TestCase Abif_sequence = "nnnnnnnnnnnttggttggttcgctataaaaactcttattttggataatttgtttagctgttgcaatataaattgacccatttaatttataaattggattctcgttgcaataaatttccagatcctgaaaaagctctggcttaaccaaattgccttggctatcaatgcttctacaccaagaaggctttaaagagataggactaactgaaacgacactttttcccgttgcttgatgtatttcaacagcatgtcttatggtttctggcttcctgaatggagaagttggttgtaaaagcaatacactgtcaaaaaaaacctccatttgctgaaacttaaacaggaggtcaataacagtatgaatcacatccgaagtatccgtggctaaatcttccgatcttagccaaggtactgaagccccatattgaacn".freeze Abif_RC_sequence = "ngttcaatatggggcttcagtaccttggctaagatcggaagatttagccacggatacttcggatgtgattcatactgttattgacctcctgtttaagtttcagcaaatggaggttttttttgacagtgtattgcttttacaaccaacttctccattcaggaagccagaaaccataagacatgctgttgaaatacatcaagcaacgggaaaaagtgtcgtttcagttagtcctatctctttaaagccttcttggtgtagaagcattgatagccaaggcaatttggttaagccagagctttttcaggatctggaaatttattgcaacgagaatccaatttataaattaaatgggtcaatttatattgcaacagctaaacaaattatccaaaataagagtttttatagcgaaccaaccaannnnnnnnnnn".freeze Abif_first_10_peak_indices = [3, 16,38,61,66,91,105,115,138,151].freeze Abif_last_10_peak_indices = [5070,5081,5094,5107,5120,5133,5145,5157,5169,5182].freeze Abif_atrace_size = 5236 Abif_RC_first_10_peak_indices = Abif_last_10_peak_indices.collect{|index| Abif_atrace_size - index}.reverse.freeze Abif_RC_last_10_peak_indices = Abif_first_10_peak_indices.collect{|index| Abif_atrace_size - index}.reverse.freeze def setup @abi = Abif.new(TestAbifData.abif) end def test_seq assert_equal(Abif_sequence, @abi.seq.to_s) end def test_to_biosequence assert_equal(Abif_sequence, @abi.to_biosequence.to_s) end def test_complement @RC_chromatogram = @abi.complement # check reverse complemented sequence assert_equal(Abif_RC_sequence, @RC_chromatogram.sequence) # check reverse complemented peak indices assert_equal(Abif_RC_first_10_peak_indices, @RC_chromatogram.peak_indices.slice(0,10)) assert_equal(Abif_RC_last_10_peak_indices, @RC_chromatogram.peak_indices.slice(-10..-1)) # check reverse complemented traces assert_equal(@abi.atrace.slice(0,10), @RC_chromatogram.ttrace.slice(-10..-1).reverse) assert_equal(@abi.ctrace.slice(0,10), @RC_chromatogram.gtrace.slice(-10..-1).reverse) assert_equal(@abi.gtrace.slice(0,10), @RC_chromatogram.ctrace.slice(-10..-1).reverse) assert_equal(@abi.ttrace.slice(0,10), @RC_chromatogram.atrace.slice(-10..-1).reverse) assert_equal(@abi.qualities.slice(0,10), @RC_chromatogram.qualities.slice(-10..-1).reverse) end end end bio-1.4.3.0001/test/unit/bio/db/sanger_chromatogram/test_scf.rb0000644000004100000410000001072312200110570024143 0ustar www-datawww-data# # test/unit/bio/db/sanger_chromatogram/test_scf.rb - Unit test for Bio::Scf # # Copyright:: Copyright (C) 2009 Anthony Underwood , # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db/sanger_chromatogram/scf' module Bio module TestScfData DataPath = Pathname.new(File.join(BioRubyTestDataPath, 'sanger_chromatogram')).cleanpath.to_s def self.scf_version_2 fn = File.join(DataPath, 'test_chromatogram_scf_v2.scf') File.open(fn, "rb") { |f| f.read } end def self.scf_version_3 fn = File.join(DataPath, 'test_chromatogram_scf_v3.scf') File.open(fn, "rb") { |f| f.read } end end #module TestScfData module TestScf_common Scf_sequence = "attaacgtaaaaggtttggttggttcgctataaaaactcttattttggataatttgtttagctgttgcaatataaattgacccatttaatttataaattggattctcgttgcaataaatttccagatcctgaaaaagctctggcttaaccaaattgccttggctatcaatgcttctacaccaagaaggctttaaagagataggactaactgaaacgacactttttcccgttgcttgatgtatttcaacagcatgtcttatggtttctggcttcctgaatggagaagttggttgtaaaagcaatacactgtcaaaaaaaacctccatttgctgaaacttaaacaggaggtcaataacagtatgaatcacatccgaagtatccgtggctaaatcttccgatcttagccaaggtactgaagccccatattgaacggann".freeze Scf_RC_sequence = "nntccgttcaatatggggcttcagtaccttggctaagatcggaagatttagccacggatacttcggatgtgattcatactgttattgacctcctgtttaagtttcagcaaatggaggttttttttgacagtgtattgcttttacaaccaacttctccattcaggaagccagaaaccataagacatgctgttgaaatacatcaagcaacgggaaaaagtgtcgtttcagttagtcctatctctttaaagccttcttggtgtagaagcattgatagccaaggcaatttggttaagccagagctttttcaggatctggaaatttattgcaacgagaatccaatttataaattaaatgggtcaatttatattgcaacagctaaacaaattatccaaaataagagtttttatagcgaaccaaccaaaccttttacgttaat".freeze Scf_first_10_peak_indices = [16,24,37,49,64,64,80,92,103,113].freeze Scf_last_10_peak_indices = [5120,5132,5145,5157,5169,5182,5195,5207,5219,5231].freeze Scf_atrace_size = 5236 Scf_RC_first_10_peak_indices = Scf_last_10_peak_indices.collect{|index| Scf_atrace_size - index}.reverse.freeze Scf_RC_last_10_peak_indices = Scf_first_10_peak_indices.collect{|index| Scf_atrace_size - index}.reverse.freeze def test_seq assert_equal(Scf_sequence, @scf.seq.to_s) end def test_to_biosequence assert_equal(Scf_sequence, @scf.to_biosequence.to_s) end def test_complement @RC_chromatogram = @scf.complement # check reverse complemented sequence assert_equal(Scf_RC_sequence, @RC_chromatogram.sequence) # check reverse complemented peak indices assert_equal(Scf_RC_first_10_peak_indices, @RC_chromatogram.peak_indices.slice(0,10)) assert_equal(Scf_RC_last_10_peak_indices, @RC_chromatogram.peak_indices.slice(-10..-1)) # check reverse complemented traces assert_equal(@scf.atrace.slice(0,10), @RC_chromatogram.ttrace.slice(-10..-1).reverse) assert_equal(@scf.ctrace.slice(0,10), @RC_chromatogram.gtrace.slice(-10..-1).reverse) assert_equal(@scf.gtrace.slice(0,10), @RC_chromatogram.ctrace.slice(-10..-1).reverse) assert_equal(@scf.ttrace.slice(0,10), @RC_chromatogram.atrace.slice(-10..-1).reverse) # check reverse complemented individual and combined qualities #if @RC_chromatogram.chromatogram_type == ".scf" assert_equal(@scf.aqual.slice(0,10), @RC_chromatogram.tqual.slice(-10..-1).reverse) assert_equal(@scf.cqual.slice(0,10), @RC_chromatogram.gqual.slice(-10..-1).reverse) assert_equal(@scf.gqual.slice(0,10), @RC_chromatogram.cqual.slice(-10..-1).reverse) assert_equal(@scf.tqual.slice(0,10), @RC_chromatogram.aqual.slice(-10..-1).reverse) #end assert_equal(@scf.qualities.slice(0,10), @RC_chromatogram.qualities.slice(-10..-1).reverse) end end #module TestScf_common class TestScf_version_2 < Test::Unit::TestCase include TestScf_common def setup @scf = Scf.new(TestScfData.scf_version_2) end end class TestScf_version_3 < Test::Unit::TestCase include TestScf_common def setup @scf = Scf.new(TestScfData.scf_version_3) end end #class TestScf_version_3 end #module Bio bio-1.4.3.0001/test/unit/bio/db/test_fastq.rb0000644000004100000410000007402112200110570020465 0ustar www-datawww-data# # test/unit/bio/db/test_fastq.rb - Unit test for Bio::Fastq # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/io/flatfile' require 'bio/db/fastq' module Bio module TestFastq TestFastqDataDir = Pathname.new(File.join(BioRubyTestDataPath, 'fastq')).cleanpath.to_s # A module providing methods to compare float arrays module FloatArrayComparison private def float_array_equivalent?(expected, actual, *arg) assert_equal(expected.size, actual.size, *arg) dt = Float::EPSILON * 1024 (0...(expected.size)).each do |i| e = expected[i] a = actual[i] #assert_equal(e, a) assert_in_delta(e, a, e.abs * dt) end end end #module FloatArrayComparison # Tests using 'longreads_original_sanger.fastq' class TestFastq_longreads_original_sanger < Test::Unit::TestCase include FloatArrayComparison SEQS = [ 'tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGA AGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca aatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcagg tttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaaga agatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtc gaggaactgccaacggacgacacagggagtagnnn', 'tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCT ACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgaga ctgccaaggcacncagggataggnn', 'tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATA TGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGC GATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATA ATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTG CCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGC CAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgc caaggcacacaggggataggnn', 'tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGT CCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTT GCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCA TTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCA TTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTG GCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaa ggcacacagggggatagggnn', 'tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCG ACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACG TCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTG ATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttanca acaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccgg acccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggtttttt cggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagg gaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaagg ggggggaagtaggngnnnnnnnnnnnn', 'tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTAT CGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATAC CGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATT TAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnn nnnnnnnnnnnnnnnnnn', 'tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTAC AAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTT TTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGC TGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGA CCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGC AAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTG TGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAA AGctgagactgccaaggcacacaggggataggn', 'tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGC ATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATT TCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGT ATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATC TCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCA AAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTT ACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn', 'tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATT TTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTT AATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTT GAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATT TTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagg gggataggn', 'tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATA ACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAA AACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACA TTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTA TTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCC ATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATAC AGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggat aggn' ].collect { |x| x.gsub(/\s/, '').freeze }.freeze IDLINES = [ 'FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95]', 'FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]', 'FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346]', 'FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343]', 'FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208]', 'FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193]', 'FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418]', 'FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374]', 'FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273]', 'FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389]', ].collect { |x| x.freeze }.freeze ENTRY_IDS = [ 'FSRRS4401BE7HA', 'FSRRS4401BRRTC', 'FSRRS4401B64ST', 'FSRRS4401EJ0YH', 'FSRRS4401BK0IB', 'FSRRS4401ARCCB', 'FSRRS4401CM938', 'FSRRS4401EQLIK', 'FSRRS4401AOV6A', 'FSRRS4401EG0ZW' ].collect { |x| x.freeze }.freeze QUALITY_STRINGS = [ <<'_0_', <<'_1_', <<'_2_', <<'_3_', <<'_4_', <<'_5_', <<'_6_', <<'_7_', <<'_8_', <<'_9_' ].collect { |x| x.delete("\r\n").freeze }.freeze FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIII IHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF D???:3104/76=:5...4.3,,,366////4<;!!! _0_ FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=5 55:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! _1_ IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIII IIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EI CE::338=/----,8=>>??:2-////7>CEEIEIHHHII IIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHI IIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHI IIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIII IIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIH H????EIIIFF999;EIIBB!! _2_ IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIH HHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>> IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7I IIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIII IIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIII BBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIII GGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE?? >>?EFEE?/////;:80--!! _3_ FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIII IIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<< 88;?AABDDC???DDAAAADA666D?DDD=====AA>?>> <<<=<11188<>?@@1114 2::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=// //8,--111111!23--/24!37:6666<;822/..4!46 521177553.-.23!231121112,,-,,211==5----- -,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,, ,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-- ---//----55//**/--22--**,,,,**,,,,,,.1., *,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,, ,),,,,,**//.),,,///,,,,,,,,,,,.))33---,, ,,,,,,,,(0,,,!.!!!!!!!!!!!! _4_ FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;663 22366762243348<<=??4445::>ABAAA@<<==B=:5 55:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA= =88880004><<<99688;889<889?BBBBA=???DDBB B@@??88889---237771,,,,,,,,--1152<<00158 A@><<<<<43277711,,,--37===75,----34666!! !!!!!!!!!!!!!!!!!! _5_ IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666I IIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIII I@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI::/// //7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIII IIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIII IIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@7 77772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIII IIIIIIIII==?==IIIII???=;I63DDD82--,,,38= =::----,,---+++33066;@6380008/:889<:BGII IIIIIIIFE?@C<<7999EEE EEE@@@@EEEEE! _6_ III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHH HIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII @@@@IIIIEIE111100----22?=8---:-------,,, ,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122 000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=// -,,21??<5-002=6FBB?:9<=11/4444//-//77??G EIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIII IIIIIIIIIIIIIIIIIEE1//--822;----.777@EII IIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIII I994227775555AE;IEEEEEIIIII??9755>@==:3, ,,,,33336!! _7_ IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIII IIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B @@D66445555<<>AAIIIIIIII;;;::III??? CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE? EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHH HIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97 771119:EAAADDBD7777=/111122DA@@B68;;;I8H HIIIII;;;;?>IECCCB/////;745=! _8_ IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;; ;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIII IIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC @@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E ;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99 988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIII II?????IIIIIIIIIIICAC;55539EIIIIIIIIIIII IIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFI II;;;977FFCCC@24449?FDD! _9_ QUALITY_SCORES = QUALITY_STRINGS.collect { |str| str.unpack('C*').collect { |i| i - 33 }.freeze }.freeze ERROR_PROBABILITIES = QUALITY_SCORES.collect { |ary| ary.collect { |q| 10 ** (- q / 10.0) }.freeze }.freeze def setup fn = File.join(TestFastqDataDir, 'longreads_original_sanger.fastq') @ff = Bio::FlatFile.open(Bio::Fastq, fn) end def test_validate_format @ff.each do |e| assert(e.validate_format) end assert(@ff.eof?) end def test_validate_format_with_array @ff.each do |e| a = [] assert(e.validate_format(a)) assert(a.empty?) end end def test_to_s ids = IDLINES.dup seqs = SEQS.dup qstrs = QUALITY_STRINGS.dup ent = [] while !ids.empty? ent.push "@#{ids.shift}\n#{seqs.shift}\n+\n#{qstrs.shift}\n" end @ff.each do |e| assert_equal(ent.shift, e.to_s) end assert(ent.empty?) end def test_definition ids = IDLINES.dup @ff.each do |e| assert_equal(ids.shift, e.definition) end assert(ids.empty?) end def test_entry_id ids = ENTRY_IDS.dup @ff.each do |e| assert_equal(ids.shift, e.entry_id) end assert(ids.empty?) end def test_sequence_string seqs = SEQS.dup @ff.each do |e| s = seqs.shift assert_equal(s, e.sequence_string) end assert(seqs.empty?) end def test_seq seqs = SEQS.collect { |x| Bio::Sequence::Generic.new(x) } @ff.each do |e| s = seqs.shift assert_equal(s, e.seq) end assert(seqs.empty?) end def test_naseq seqs = SEQS.collect { |x| Bio::Sequence::NA.new(x) } @ff.each do |e| s = seqs.shift assert_equal(s, e.naseq) end assert(seqs.empty?) end def test_nalen lengths = SEQS.collect { |x| Bio::Sequence::NA.new(x).length } @ff.each do |e| i = lengths.shift assert_equal(i, e.nalen) end assert(lengths.empty?) end def test_quality_string qualities = QUALITY_STRINGS.dup @ff.each do |e| assert_equal(qualities.shift, e.quality_string) end assert(qualities.empty?) end def test_quality_scores qualities = QUALITY_SCORES.dup @ff.each do |e| assert_equal(qualities.shift, e.quality_scores) end assert(qualities.empty?) end def test_error_probabilities probs = ERROR_PROBABILITIES.dup @ff.each do |e| float_array_equivalent?(probs.shift, e.error_probabilities) end assert(probs.empty?) end def test_to_biosequence @ff.each_with_index do |e, i| s = nil assert_nothing_raised { s = e.to_biosequence } assert_equal(Bio::Sequence::Generic.new(SEQS[i]), s.seq) assert_equal(IDLINES[i], s.definition) assert_equal(ENTRY_IDS[i], s.entry_id) assert_equal(:phred, s.quality_score_type) assert_equal(QUALITY_SCORES[i], s.quality_scores) float_array_equivalent?(ERROR_PROBABILITIES[i], s.error_probabilities) end end def test_to_biosequence_and_output @ff.each_with_index do |e, i| id_line = IDLINES[i] seq_line = SEQS[i] qual_line = QUALITY_STRINGS[i] # Changed default width to nil (no wrapping) expected = "@#{id_line}\n#{seq_line}\n+\n#{qual_line}\n" actual = e.to_biosequence.output(:fastq_sanger) assert_equal(expected, actual) end end def test_roundtrip @ff.each_with_index do |e, i| str_orig = @ff.entry_raw s = e.to_biosequence str = s.output(:fastq_sanger, { :repeat_title => true, :width => 80 }) assert_equal(str_orig, str) e2 = Bio::Fastq.new(str) assert_equal(e.sequence_string, e2.sequence_string) assert_equal(e.quality_string, e2.quality_string) assert_equal(e.definition, e2.definition) assert_equal(e.quality_scores, e2.quality_scores) float_array_equivalent?(e.error_probabilities, e2.error_probabilities) end end end #class TestFastq_longreads_original_sanger # common methods to read *_full_range_as_*.fastq and test quality scores # and error probabilities module TestFastq_full_range include FloatArrayComparison private def read_file(fn, format) path = File.join(TestFastqDataDir, fn) entries = Bio::FlatFile.open(Bio::Fastq, path) { |ff| ff.to_a } entries.each { |e| e.format=format } entries end def scores_through(range) range.to_a end def scores_phred2solexa(range) min = -5 max = 62 sc = range.collect do |q| tmp = 10 ** (q / 10.0) - 1 if tmp <= 0 then min else r = (10 * Math.log10(tmp)).round if r < min then min elsif r > max then max else r end end end sc end def scores_phred2illumina(range) min = 0 max = 62 sc = range.collect do |q| if q < min then min elsif q > max then max else q end end sc end def scores_phred2sanger(range) min = 0 max = 93 sc = range.collect do |q| if q < min then min elsif q > max then max else q end end sc end def scores_solexa2phred(range) sc = range.collect do |q| r = 10 * Math.log10(10 ** (q / 10.0) + 1) r.round end sc end def scores_solexa2sanger(range) scores_phred2sanger(scores_solexa2phred(range)) end def scores_solexa2illumina(range) scores_phred2illumina(scores_solexa2phred(range)) end def common_test_quality_scores(scores, filename, format) entries = read_file(filename, format) assert_equal(scores, entries[0].quality_scores) assert_equal(scores.reverse, entries[1].quality_scores) end def common_test_error_probabilities(probabilities, filename, format) entries = read_file(filename, format) float_array_equivalent?(probabilities, entries[0].error_probabilities) float_array_equivalent?(probabilities.reverse, entries[1].error_probabilities) end def common_test_validate_format(filename, format) entries = read_file(filename, format) assert(entries[0].validate_format) assert(entries[1].validate_format) end def phred_q2p(scores) scores.collect { |q| 10 ** (-q / 10.0) } end def solexa_q2p(scores) scores.collect do |q| t = 10 ** (-q / 10.0) t / (1.0 + t) end end public def test_validate_format common_test_validate_format(self.class::FILENAME_AS_SANGER, 'fastq-sanger') common_test_validate_format(self.class::FILENAME_AS_SOLEXA, 'fastq-solexa') common_test_validate_format(self.class::FILENAME_AS_ILLUMINA, 'fastq-illumina') end def test_quality_scores_as_sanger scores = scores_to_sanger(self.class::RANGE) common_test_quality_scores(scores, self.class::FILENAME_AS_SANGER, 'fastq-sanger') end def test_error_probabilities_as_sanger scores = scores_to_sanger(self.class::RANGE) probs = phred_q2p(scores) common_test_error_probabilities(probs, self.class::FILENAME_AS_SANGER, 'fastq-sanger') end def test_quality_scores_as_solexa scores = scores_to_solexa(self.class::RANGE) common_test_quality_scores(scores, self.class::FILENAME_AS_SOLEXA, 'fastq-solexa') end def test_error_probabilities_as_solexa scores = scores_to_solexa(self.class::RANGE) probs = solexa_q2p(scores) common_test_error_probabilities(probs, self.class::FILENAME_AS_SOLEXA, 'fastq-solexa') end def test_quality_scores_as_illumina scores = scores_to_illumina(self.class::RANGE) common_test_quality_scores(scores, self.class::FILENAME_AS_ILLUMINA, 'fastq-illumina') end def test_error_probabilities_as_illumina scores = scores_to_illumina(self.class::RANGE) probs = phred_q2p(scores) common_test_error_probabilities(probs, self.class::FILENAME_AS_ILLUMINA, 'fastq-illumina') end end #module TestFastq_full_range class TestFastq_sanger_full_range < Test::Unit::TestCase include TestFastq_full_range RANGE = 0..93 FILENAME_AS_SANGER = 'sanger_full_range_as_sanger.fastq' FILENAME_AS_SOLEXA = 'sanger_full_range_as_solexa.fastq' FILENAME_AS_ILLUMINA = 'sanger_full_range_as_illumina.fastq' alias scores_to_sanger scores_through alias scores_to_solexa scores_phred2solexa alias scores_to_illumina scores_phred2illumina end #class TestFastq_sanger_full_range class TestFastq_solexa_full_range < Test::Unit::TestCase include TestFastq_full_range RANGE = (-5)..62 FILENAME_AS_SANGER = 'solexa_full_range_as_sanger.fastq' FILENAME_AS_SOLEXA = 'solexa_full_range_as_solexa.fastq' FILENAME_AS_ILLUMINA = 'solexa_full_range_as_illumina.fastq' alias scores_to_sanger scores_solexa2sanger alias scores_to_solexa scores_through alias scores_to_illumina scores_solexa2illumina end #class TestFastq_solexa_full_range class TestFastq_illumina_full_range < Test::Unit::TestCase include TestFastq_full_range RANGE = 0..62 FILENAME_AS_SANGER = 'illumina_full_range_as_sanger.fastq' FILENAME_AS_SOLEXA = 'illumina_full_range_as_solexa.fastq' FILENAME_AS_ILLUMINA = 'illumina_full_range_as_illumina.fastq' alias scores_to_sanger scores_phred2sanger alias scores_to_solexa scores_phred2solexa alias scores_to_illumina scores_through end #class TestFastq_illumina_full_range # common methods for testing error_*.fastq module TestFastq_error FILENAME = nil PRE_SKIP = 2 POST_SKIP = 2 ERRORS = [] def do_test_validate_format(ff) e = ff.next_entry #p e a = [] assert_equal(false, e.validate_format(a)) assert_equal(self.class::ERRORS.size, a.size) self.class::ERRORS.each do |ex| obj = a.shift assert_kind_of(ex.class, obj) assert_equal(ex.message, obj.message) end end private :do_test_validate_format def test_validate_format path = File.join(TestFastqDataDir, self.class::FILENAME) Bio::FlatFile.open(Bio::Fastq, path) do |ff| self.class::PRE_SKIP.times { ff.next_entry } do_test_validate_format(ff) self.class::POST_SKIP.times { ff.next_entry } assert(ff.eof?) end end end #module TestFastq_error class TestFastq_error_diff_ids < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_diff_ids.fastq' PRE_SKIP = 2 POST_SKIP = 2 ERRORS = [ Bio::Fastq::Error::Diff_ids.new ] end #class TestFastq_error_diff_ids class TestFastq_error_double_qual < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_double_qual.fastq' PRE_SKIP = 2 POST_SKIP = 2 ERRORS = [ Bio::Fastq::Error::Long_qual.new ] end #class TestFastq_error_double_qual class TestFastq_error_double_seq < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_double_seq.fastq' PRE_SKIP = 3 POST_SKIP = 0 ERRORS = [ Bio::Fastq::Error::Long_qual.new ] end #class TestFastq_error_double_seq class TestFastq_error_long_qual < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_long_qual.fastq' PRE_SKIP = 3 POST_SKIP = 1 ERRORS = [ Bio::Fastq::Error::Long_qual.new ] end #class TestFastq_error_long_qual class TestFastq_error_no_qual < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_no_qual.fastq' PRE_SKIP = 0 POST_SKIP = 0 private def do_test_validate_format(ff) 2.times do e = ff.next_entry a = [] e.validate_format(a) assert_equal(1, a.size) assert_kind_of(Bio::Fastq::Error::Long_qual, a[0]) end 1.times do e = ff.next_entry a = [] e.validate_format(a) assert_equal(1, a.size) assert_kind_of(Bio::Fastq::Error::Short_qual, a[0]) end end end #class TestFastq_error_no_qual class TestFastq_error_qual_del < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_del.fastq' PRE_SKIP = 3 POST_SKIP = 1 ERRORS = [ Bio::Fastq::Error::Qual_char.new(12) ] end #class TestFastq_error_qual_del class TestFastq_error_qual_escape < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_escape.fastq' PRE_SKIP = 4 POST_SKIP = 0 ERRORS = [ Bio::Fastq::Error::Qual_char.new(7) ] end #class TestFastq_error_qual_escape class TestFastq_error_qual_null < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_null.fastq' PRE_SKIP = 0 POST_SKIP = 4 ERRORS = [ Bio::Fastq::Error::Qual_char.new(3) ] end #class TestFastq_error_qual_null class TestFastq_error_qual_space < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_space.fastq' PRE_SKIP = 3 POST_SKIP = 1 ERRORS = [ Bio::Fastq::Error::Qual_char.new(18) ] end #class TestFastq_error_qual_space class TestFastq_error_qual_tab < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_tab.fastq' PRE_SKIP = 4 POST_SKIP = 0 ERRORS = [ Bio::Fastq::Error::Qual_char.new(10) ] end #class TestFastq_error_qual_tab class TestFastq_error_qual_unit_sep < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_unit_sep.fastq' PRE_SKIP = 2 POST_SKIP = 2 ERRORS = [ Bio::Fastq::Error::Qual_char.new(5) ] end #class TestFastq_error_qual_unit_sep class TestFastq_error_qual_vtab < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_qual_vtab.fastq' PRE_SKIP = 0 POST_SKIP = 4 ERRORS = [ Bio::Fastq::Error::Qual_char.new(10) ] end #class TestFastq_error_qual_vtab class TestFastq_error_short_qual < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_short_qual.fastq' PRE_SKIP = 2 POST_SKIP = 1 ERRORS = [ Bio::Fastq::Error::Long_qual.new ] end #class TestFastq_error_short_qual class TestFastq_error_spaces < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_spaces.fastq' PRE_SKIP = 0 POST_SKIP = 0 ERRORS = [ Bio::Fastq::Error::Seq_char.new(9), Bio::Fastq::Error::Seq_char.new(20), Bio::Fastq::Error::Qual_char.new(9), Bio::Fastq::Error::Qual_char.new(20) ] private def do_test_validate_format(ff) 5.times do e = ff.next_entry a = [] e.validate_format(a) assert_equal(4, a.size) self.class::ERRORS.each do |ex| obj = a.shift assert_kind_of(ex.class, obj) assert_equal(ex.message, obj.message) end end end end #class TestFastq_error_spaces class TestFastq_error_tabs < TestFastq_error_spaces FILENAME = 'error_tabs.fastq' end #class TestFastq_error_tabs class TestFastq_error_trunc_at_plus < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_trunc_at_plus.fastq' PRE_SKIP = 4 POST_SKIP = 0 ERRORS = [ Bio::Fastq::Error::No_qual.new ] end #class TestFastq_error_trunc_at_plus class TestFastq_error_trunc_at_qual < TestFastq_error_trunc_at_plus FILENAME = 'error_trunc_at_qual.fastq' end #class TestFastq_error_trunc_at_qual class TestFastq_error_trunc_at_seq < Test::Unit::TestCase include TestFastq_error FILENAME = 'error_trunc_at_seq.fastq' PRE_SKIP = 4 POST_SKIP = 0 ERRORS = [ Bio::Fastq::Error::No_qual.new ] end #class TestFastq_error_trunc_at_seq # Unit tests for Bio::Fastq#mask. class TestFastq_mask < Test::Unit::TestCase def setup fn = File.join(TestFastqDataDir, 'wrapping_original_sanger.fastq') Bio::FlatFile.open(Bio::Fastq, fn) do |ff| @entry = ff.next_entry end @entry.format = :fastq_sanger end def test_mask_60 expected = 'n' * 135 assert_equal(expected, @entry.mask(60).seq) end def test_mask_20 expected = "GAAnTTnCAGGnCCACCTTTnnnnnGATAGAATAATGGAGAAnnTTAAAnGCTGTACATATACCAATGAACAATAAnTCAATACATAAAnnnGGAGAAGTnGGAACCGAAnGGnTTnGAnTTCAAnCCnTTnCGn" assert_equal(expected, @entry.mask(20).seq) end def test_mask_20_with_x expected = "GAAxTTxCAGGxCCACCTTTxxxxxGATAGAATAATGGAGAAxxTTAAAxGCTGTACATATACCAATGAACAATAAxTCAATACATAAAxxxGGAGAAGTxGGAACCGAAxGGxTTxGAxTTCAAxCCxTTxCGx" assert_equal(expected, @entry.mask(20, 'x').seq) end def test_mask_20_with_empty_string expected = "GAATTCAGGCCACCTTTGATAGAATAATGGAGAATTAAAGCTGTACATATACCAATGAACAATAATCAATACATAAAGGAGAAGTGGAACCGAAGGTTGATTCAACCTTCG" assert_equal(expected, @entry.mask(20, '').seq) end def test_mask_20_with_longer_string expected = "GAA-*-TT-*-CAGG-*-CCACCTTT-*--*--*--*--*-GATAGAATAATGGAGAA-*--*-TTAAA-*-GCTGTACATATACCAATGAACAATAA-*-TCAATACATAAA-*--*--*-GGAGAAGT-*-GGAACCGAA-*-GG-*-TT-*-GA-*-TTCAA-*-CC-*-TT-*-CG-*-" assert_equal(expected, @entry.mask(20, '-*-').seq) end end #class TestFastq_mask end #module TestFastq end #module Bio bio-1.4.3.0001/test/unit/bio/data/0000755000004100000410000000000012200110570016303 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/data/test_codontable.rb0000644000004100000410000000451312200110570022004 0ustar www-datawww-data# # test/unit/bio/data/test_codontable.rb - Unit test for Bio::CodonTable # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/data/codontable' module Bio class TestCodonTableConstants < Test::Unit::TestCase def test_Definitions assert(Bio::CodonTable::DEFINITIONS) assert(Bio::CodonTable::DEFINITIONS[1], "Standard (Eukaryote)") end def test_Starts assert(Bio::CodonTable::STARTS) assert_equal(['ttg', 'ctg', 'atg', 'gtg'], Bio::CodonTable::STARTS[1]) end def test_stops assert(Bio::CodonTable::STOPS) assert_equal(['taa', 'tag', 'tga'], Bio::CodonTable::STOPS[1]) end def test_Tables assert(Bio::CodonTable::TABLES) end end class TestCodonTable < Test::Unit::TestCase def setup @ct = Bio::CodonTable[1] end def test_self_accessor assert(Bio::CodonTable[1]) end def test_self_copy assert(Bio::CodonTable.copy(1)) end def test_table assert(@ct.table) end def test_definition assert_equal("Standard (Eukaryote)", @ct.definition) end def test_start assert_equal(['ttg', 'ctg', 'atg', 'gtg'], @ct.start) end def test_stop assert_equal(['taa', 'tag', 'tga'], @ct.stop) end def test_accessor #[] assert_equal('M', @ct['atg']) end def test_set_accessor #[]= alternative = 'Y' @ct['atg'] = alternative assert_equal(alternative, @ct['atg']) @ct['atg'] = 'M' assert_equal('M', @ct['atg']) end def test_each assert(@ct.each {|x| }) end def test_revtrans assert_equal(['atg'], @ct.revtrans('M')) end def test_start_codon? assert_equal(true, @ct.start_codon?('atg')) assert_equal(false, @ct.start_codon?('taa')) end def test_stop_codon? assert_equal(false, @ct.stop_codon?('atg')) assert_equal(true, @ct.stop_codon?('taa')) end def test_Tables assert_equal(@ct.table, Bio::CodonTable::TABLES[1]) end end end # module Bio bio-1.4.3.0001/test/unit/bio/data/test_aa.rb0000644000004100000410000000355612200110570020261 0ustar www-datawww-data# # test/unit/bio/data/test_aa.rb - Unit test for Bio::AminoAcid # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/data/aa' module Bio class TestAAConstants < Test::Unit::TestCase def test_bio_aminoacid assert_equal('Ala', Bio::AminoAcid['A']) end end class TestAA < Test::Unit::TestCase def setup @obj = Bio::AminoAcid.new end def test_13 assert_equal("Ala", @obj['A']) end def test_1n assert_equal('alanine', @obj.name('A')) end def test_to_1_name assert_equal('A', @obj.to_1('alanine')) end def test_to_1_3 assert_equal('A', @obj.to_1('Ala')) end def test_to_1_1 assert_equal('A', @obj.to_1('A')) end def test_to_3_name assert_equal('Ala', @obj.to_3('alanine')) end def test_to_3_3 assert_equal('Ala', @obj.to_3('Ala')) end def test_to_3_1 assert_equal('Ala', @obj.to_3('A')) end def test_one2three assert_equal('Ala', @obj.one2three('A')) end def test_three2one assert_equal('A', @obj.three2one('Ala')) end def test_one2name assert_equal('alanine', @obj.one2name('A')) end def test_name2one assert_equal('A', @obj.name2one('alanine')) end def test_three2name assert_equal('alanine', @obj.three2name('Ala')) end def test_name2three assert_equal('Ala', @obj.name2three('alanine')) end def test_to_re assert_equal(/[DNB][EQZ]ACDEFGHIKLMNPQRSTVWYU/, @obj.to_re('BZACDEFGHIKLMNPQRSTVWYU')) end end end bio-1.4.3.0001/test/unit/bio/data/test_na.rb0000644000004100000410000000353112200110570020267 0ustar www-datawww-data# # test/unit/bio/data/test_na.rb - Unit test for Bio::NucleicAcid # # Copyright:: Copyright (C) 2005,2006 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/data/na' module Bio class TestNAConstants < Test::Unit::TestCase def test_NAMES assert_equal('a', Bio::NucleicAcid::NAMES['a']) end def test_NAMES_1_to_name assert_equal('Adenine', Bio::NucleicAcid::NAMES['A']) end def test_WEIGHT mw = 135.15 assert_equal(mw, Bio::NucleicAcid::WEIGHT['a']) assert_equal(mw, Bio::NucleicAcid::WEIGHT[:adenine]) end end class TestNA < Test::Unit::TestCase def setup @obj = Bio::NucleicAcid.new end def test_to_re re = /[tcy][agr][atw][gcs][tgk][acm][tgcyskb][atgrwkd][agcmrsv][atgcyrwskmbdhvn]atgc/ str = 'yrwskmbdvnatgc' str0 = str.clone assert_equal(re, @obj.to_re(str)) assert_equal(str0, str) assert_equal(re, Bio::NucleicAcid.to_re(str)) end def test_weight mw = 135.15 assert_equal(mw, @obj.weight('a')) assert_equal(mw, Bio::NucleicAcid.weight('a')) end def test_weight_rna mw = 135.15 assert_equal(mw, @obj.weight('A', true)) assert_equal(mw, Bio::NucleicAcid.weight('A', true)) end def test_accessor assert_equal('Adenine', @obj['A']) end def test_names assert_equal(Bio::NucleicAcid::NAMES, @obj.names) end def test_na assert_equal(Bio::NucleicAcid::NAMES, @obj.na) end def test_name assert_equal('Adenine', @obj.name('A')) end end end bio-1.4.3.0001/test/unit/bio/test_feature.rb0000644000004100000410000000624512200110570020420 0ustar www-datawww-data# # test/unit/bio/test_feature.rb - Unit test for Features/Feature classes # # Copyright:: Copyright (C) 2005 # Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/feature' require 'bio/compat/features' module Bio class TestQualifier < Test::Unit::TestCase def setup qualifier = 'gene' value = 'CDS' @obj = Bio::Feature::Qualifier.new(qualifier, value) end def test_qualifier assert_equal('gene', @obj.qualifier) end def test_value assert_equal('CDS', @obj.value) end end class TestFeature < Test::Unit::TestCase def setup @qualifier = Bio::Feature::Qualifier.new('organism', 'Arabidopsis thaliana') feature = "source" position = '1..615' qualifiers = [@qualifier] @obj = Bio::Feature.new(feature, position, qualifiers) end def test_new assert(Bio::Feature.new) end def test_feature assert_equal("source", @obj.feature) end def test_position assert_equal('1..615', @obj.position) end def test_qualifiers assert_equal([@qualifier], @obj.qualifiers) end def test_locations assert_equal(1, @obj.locations.first.from) assert_equal(615, @obj.locations.first.to) end def test_append_nil assert(@obj.append(nil)) assert_equal(1, @obj.qualifiers.size) end def test_append qualifier = Bio::Feature::Qualifier.new('db_xref', 'taxon:3702') assert(@obj.append(qualifier)) assert_equal('db_xref', @obj.qualifiers.last.qualifier) end def test_each @obj.each do |qua| assert_equal('Arabidopsis thaliana', qua.value) end end def test_assoc @obj.append(Bio::Feature::Qualifier.new("organism", "Arabidopsis thaliana")) assert_equal({"organism" => "Arabidopsis thaliana"}, @obj.assoc) end end class TestFeatures < Test::Unit::TestCase class NullStderr def initialize @log = [] end def write(*arg) #p arg @log.push([ :write, *arg ]) nil end def method_missing(*arg) #p arg @log.push arg nil end end #class NullStderr def setup # To suppress warning messages, $stderr is replaced by dummy object. @stderr_orig = $stderr $stderr = NullStderr.new @obj = Bio::Features.new([Bio::Feature.new('gene', '1..615', [])]) end def teardown # bring back $stderr $stderr = @stderr_orig end def test_features assert_equal(1, @obj.features.size) end def test_append assert(@obj.append(Bio::Feature.new('gene', '1..615', []))) assert_equal(2, @obj.features.size) end def test_each @obj.each do |feature| assert_equal('gene', feature.feature) end end def test_arg # def [](*arg) assert_equal('gene', @obj[0].feature) end end end bio-1.4.3.0001/test/unit/bio/test_db.rb0000644000004100000410000000342312200110570017345 0ustar www-datawww-data# # test/unit/bio/test_db.rb - Unit test for Bio::DB # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/db' module Bio class TestDB < Test::Unit::TestCase def setup @obj = Bio::DB.new @obj.instance_eval { @orig = {"TAG" => "TAG value1\n value2"} } end def test_open assert(Bio::DB.respond_to?(:open)) end def test_entry_id assert_raises(NotImplementedError) { @obj.entry_id } end def test_tags assert_equal(["TAG"], @obj.tags) end def test_exists assert_equal(true, @obj.exists?("TAG")) end def test_get assert_equal("TAG value1\n value2", @obj.get("TAG")) end def test_fetch assert(@obj.fetch("TAG")) assert(@obj.fetch("TAG", 1)) end end class TestNCBIDB < Test::Unit::TestCase def setup entry =< # 2005 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/alignment' module Bio class TestAlignmentPropertyMethods < Test::Unit::TestCase def setup @obj = Object.new @obj.extend(Alignment::PropertyMethods) end def test_is_gap_default_false assert_equal(false, @obj.is_gap?('a'), "\"a\" isn't a gap") end def test_is_gap_default_true assert_equal(true, @obj.is_gap?('-'), '"-" is a gap') end def test_gap_regexp assert_not_nil(@obj.gap_regexp) end def test_gap_regexp_never_nil @obj.gap_regexp = nil assert_not_nil(@obj.gap_regexp) end def test_gap_regexp=() @obj.gap_regexp = /[^a-zA-Z0-9]/ assert_equal(/[^a-zA-Z0-9]/, @obj.gap_regexp) end def test_is_gap_nodefault_false @obj.gap_regexp = /[^a-zA-Z0-9]/ assert_equal(false, @obj.is_gap?('3')) end def test_is_gap_nodefault_true @obj.gap_regexp = /[^atgc]/ assert_equal(true, @obj.is_gap?('z')) end def test_gap_char_default assert_not_nil(@obj.gap_char) end def test_gap_char_never_nil @obj.gap_char = nil assert_not_nil(@obj.gap_char) end def test_gap_char=() @obj.gap_char = '#' assert_equal('#', @obj.gap_char) end def test_missing_char_default assert_not_nil(@obj.missing_char) end def test_missing_char_never_nil @obj.missing_char = nil assert_not_nil(@obj.missing_char) end def test_missing_char=() @obj.missing_char = '_' assert_equal('_', @obj.missing_char) end def test_seqclass_default assert_not_nil(@obj.seqclass) end def test_seqclass_never_nil @obj.seqclass = nil assert_not_nil(@obj.seqclass) end def test_seqclass=() @obj.seqclass = Sequence::NA assert_equal(Sequence::NA, @obj.seqclass) end def test_get_all_property_default assert_equal({}, @obj.get_all_property) end def test_get_all_property_nodefault @obj.gap_regexp = /[^acgt]/ @obj.gap_char = '#' @obj.missing_char = '_' @obj.seqclass = Sequence::NA assert_equal({ :gap_regexp => /[^acgt]/, :gap_char => '#', :missing_char => '_', :seqclass => Sequence::NA }, @obj.get_all_property) end def test_set_all_property h = { :gap_regexp => /[^acgt]/, :gap_char => '#', :missing_char => '_', :seqclass => Sequence::NA } @obj.set_all_property(h) assert_equal(h, @obj.get_all_property) end end #class TestAlignmentPropertyMethods # This is a unit test of Bio::Alignment::Site class and # Bio::Alignment::SiteMethods module. # Since Bio::Alignment::Site includes Bio::Alignment::SiteMethods, # we can test both at a time. class TestAlignmentSite < Test::Unit::TestCase def test_has_gap_true site = Alignment::Site[ 'a', '-', 'c', 'g', 't' ] assert_equal(true, site.has_gap?) end def test_has_gap_false site = Alignment::Site[ 'a', 'c', 'g', 't' ] assert_equal(false, site.has_gap?) end def test_remove_gaps! site = Alignment::Site[ 'a', '-', 'c', '-' ] assert_equal(Alignment::Site['a', 'c'], site.remove_gaps!) end def test_remove_gaps_bang_not_removed site = Alignment::Site[ 'a', 'c'] assert_equal(nil, site.remove_gaps!) end def test_consensus_string_default site = Alignment::Site[ 'a', 'a', 'a', 'a'] assert_equal('a', site.consensus_string) end def test_consensus_string_default_nil site = Alignment::Site[ 'a', 'a', 'a', 'c'] assert_nil(site.consensus_string) end def test_consensus_string_50percent site = Alignment::Site[ 'a', 'a', 'c', 'g'] assert_equal('a', site.consensus_string(0.5)) end def test_consensus_string_50percent_nil site = Alignment::Site[ 'a', 'c', 'g', 't'] assert_nil(site.consensus_string(0.5)) end def test_consensus_iupac data = { 'a' => [ 'a' ], 'c' => [ 'c' ], 'g' => [ 'g' ], 't' => [ 't' ], 't' => [ 't', 'u' ], 'm' => [ 'a', 'c' ], 'r' => [ 'a', 'g' ], 'w' => [ 'a', 't' ], 's' => [ 'c', 'g' ], 'y' => [ 'c', 't' ], 'k' => [ 'g', 't' ], 'v' => [ 'a', 'c', 'g' ], 'h' => [ 'a', 'c', 't' ], 'd' => [ 'a', 'g', 't' ], 'b' => [ 'c', 'g', 't' ], 'n' => [ 'a', 'c', 'g', 't' ], nil => [ 'z', 'a' ] } data.each do |cons, testdata| site = Alignment::Site[ *testdata ] assert_equal(cons, site.consensus_iupac, "IUPAC consensus of #{testdata.join(',')} is #{cons}") end end def test_match_line_amino_missing site = Alignment::Site[ 'P', 'Q', 'R', 'S' ] assert_equal(' ', site.match_line_amino) end def test_match_line_amino_100percent site = Alignment::Site[ 'M', 'M', 'M', 'M' ] assert_equal('*', site.match_line_amino) end def test_match_line_amino_strong site = Alignment::Site[ 'N', 'E', 'Q', 'K' ] assert_equal(':', site.match_line_amino) end def test_match_line_amino_weak site = Alignment::Site[ 'S', 'G', 'N', 'D' ] assert_equal('.', site.match_line_amino) end def test_match_line_nuc_missing site = Alignment::Site[ 'A', 'C', 'G', 'T' ] assert_equal(' ', site.match_line_nuc) end def test_match_line_nuc_100percent site = Alignment::Site[ 'G', 'G', 'G', 'G' ] assert_equal('*', site.match_line_nuc) end end #class TestAlignmentSite # This is sample class for testing Bio::Alignment::EnumerableExtension. class A < Array include Alignment::EnumerableExtension end class TestAlignmentEnumerableExtension < Test::Unit::TestCase def test_each_seq expected_results = [ 'atg', 'aag', 'acg' ] a = A[ *expected_results ] a.each_seq do |x| assert_equal(expected_results.shift, x) end assert(expected_results.empty?) end def test_seqclass_default a = A.new assert_equal(String, a.seqclass) end def test_seqclass a = A[ Bio::Sequence::NA.new('atg') ] assert_equal(Bio::Sequence::NA, a.seqclass) end def test_seqclass=() a = A.new assert_equal(String, a.seqclass) a << Bio::Sequence::NA.new('a') assert_equal(Bio::Sequence::NA, a.seqclass) a.seqclass = Bio::Sequence::AA assert_equal(Bio::Sequence::AA, a.seqclass) end def test_alignment_length a = A[ 'a', 'at', 'atgc', 'atg', '' ] assert_equal(4, a.alignment_length) end def test_private_alignment_site a = A[ 'a', 'at', 'atgc', 'atg', '' ] assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ], a.instance_eval { _alignment_site(1) }) end def test_alignment_site a = A[ 'a', 'at', 'atgc', 'atg', '' ] assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ], a.__send__(:_alignment_site, 1)) end def test_each_site expected_results = [ Alignment::Site[ 'a', 'a', 'a', 'a', '-' ], Alignment::Site[ '-', 't', 't', 't', '-' ], Alignment::Site[ '-', '-', 'g', 'g', '-' ], Alignment::Site[ '-', '-', 'c', '-', '-' ] ] a = A[ 'a', 'at', 'atgc', 'atg', '' ] a.each_site do |site| assert_equal(expected_results.shift, site) end assert(expected_results.empty?) end def test_each_site_step expected_results = [ Alignment::Site[ '-', 't', 't', 't', '-' ], # site 1 Alignment::Site[ '-', 'a', 'g', 't', '-' ], # site 3 ] a = A[ 'a', 'atgatc', 'atggcc', 'atgtga', '' ] a.each_site_step(1, 4, 2) do |site| assert_equal(expected_results.shift, site) end assert(expected_results.empty?) end def test_alignment_collect a = A[ 'a', 'at', 'atgc', 'atg', '' ] assert_equal(Alignment::SequenceArray[ 'a', 'au', 'augc', 'aug', '' ], a.alignment_collect { |x| x.gsub(/t/, 'u') }) end def test_alignment_window a = A[ 'a', 'at', 'atgca', 'atg', '' ] assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', '' ], a.alignment_window(1, 3)) end def test_each_window expected_results = [ Alignment::SequenceArray[ 'atg', 'tcg', '' ], # 0..2 Alignment::SequenceArray[ 'gca', 'gat', '' ], # 2..4 Alignment::SequenceArray[ 'atg', 'tgc', '' ], # 4..6 Alignment::SequenceArray[ 'c', 'a', '' ] # 7..7 ] a = A[ 'atgcatgc', 'tcgatgca', '' ] r = a.each_window(3, 2) do |x| assert_equal(expected_results.shift, x) end assert_equal(expected_results.shift, r) assert(expected_results.empty?) end def test_collect_each_site a = A[ 'a', 'at', 'atgc', 'atg', '' ] assert_equal(["aaaa-", "-ttt-", "--gg-", "--c--" ], a.collect_each_site { |x| x.join('') }) end def test_consensus_each_site_default expected_results = [ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ], Alignment::Site[ 'a', 'c', 'g', 't', '-' ] ] a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ] result = a.consensus_each_site do |site| assert_equal(expected_results.shift, site) 'x' end assert_equal('xx', result) assert(expected_results.empty?) end def test_consensus_each_site_gap_mode_1 expected_results = [ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ] ] a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ] result = a.consensus_each_site(:gap_mode => 1) do |site| assert_equal(expected_results.shift, site) 'x' end assert_equal('x-', result) assert(expected_results.empty?) end def test_consensus_each_site_gap_mode_minus1 expected_results = [ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ], Alignment::Site[ 'a', 'c', 'g', 't' ] ] a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ] result = a.consensus_each_site(:gap_mode => -1) do |site| assert_equal(expected_results.shift, site) 'x' end assert_equal('xx', result) assert(expected_results.empty?) end def test_consensus_string_default a = A[ 'ata', 'aac', 'aag', 'aat' ] assert_equal('a??', a.consensus_string) end def test_consensus_string_half a = A[ 'ata', 'aac', 'aag', 'aat' ] assert_equal('aa?', a.consensus_string(0.5)) end def test_consensus_iupac a = A[ 'acgtaaaccgaaacaz', 'acgtaaaccgccggcz', 'acgtcgtgttgtttgz', 'acgtcgtgttaaactz' ] assert_equal('acgtmrwsykvhdbn?', a.consensus_iupac) end def test_match_line_amino a = A[ 'M-SNNNQMMHF-CASSSSSNNFH-AW', 'M-TEHDHIIYY-STATTGNDEVF-FW', 'M-AQQERLLHW-AVGNPNDEQLY-HW', 'M-SKKQKVFYF-CASKADEQHIH-LW', 'M-TNNNQMMHY-STASSSQHRMF-QW', 'M-AEHDHIIYW-AVGTTGKKKFY-YW' #* ::::::::: ........... * ] assert_equal('* ::::::::: ........... *', a.match_line_amino) end def test_match_line_nuc a = A[ 'aaa', 'aa-','aac', 'at-' ] assert_equal('* ', a.match_line_nuc) end def test_match_line a = A[ Sequence::AA.new('MNSA'), Sequence::AA.new('MHTL'), Sequence::AA.new('MQNV'), Sequence::AA.new('MKKW') ] assert_equal('*:. ', a.match_line) assert_equal('*:. ', a.match_line(:type => :aa)) assert_equal('* ', a.match_line(:type => :na)) end def test_convert_match a = A[ 'aaaa', 'accc', 'acac', 'actc' ] a.convert_match assert_equal(A[ 'aaaa', '.ccc', '.c.c', '.ctc' ], a) end def test_convert_unmatch a = A[ 'aaaa', '.ccc', '.c.c', '.ctc' ] a.convert_unmatch assert_equal(A[ 'aaaa', 'accc', 'acac', 'actc' ], a) end def test_alignment_normalize! a = A[ 'a', 'atg', 'atgc', '' ] a.alignment_normalize! assert_equal(A[ 'a---', 'atg-', 'atgc', '----'], a) end def test_alignment_rstrip! a = A[ '--aaa--', '--t-t--', '---g---', '--t' ] assert(a.alignment_rstrip!) assert_equal(A[ '--aaa', '--t-t', '---g-', '--t' ], a) end def test_alignment_rstrip_nil a = A[ 'aa', '-a', 'a-' ] assert_nil(a.alignment_rstrip!) assert_equal(A[ 'aa', '-a', 'a-' ], a) end def test_alignment_lstrip! a = A[ '--aaa--', '--t-t--', '---g---', '--t' ] assert(a.alignment_lstrip!) assert_equal(A[ 'aaa--', 't-t--', '-g---', 't' ], a) end def test_alignment_lstrip_nil a = A[ 'aa', '-a', 'a-' ] assert_nil(a.alignment_lstrip!) assert_equal(A[ 'aa', '-a', 'a-' ], a) end def test_alignment_strip! a = A[ '--aaa--', '--t-t--', '---g---', '--t' ] assert(a.alignment_strip!) assert_equal(A[ 'aaa', 't-t', '-g-', 't' ], a) end def test_alignment_strip_nil a = A[ 'aa', '-a', 'a-' ] assert_nil(a.alignment_strip!) assert_equal(A[ 'aa', '-a', 'a-' ], a) end def test_remove_all_gaps! a = A[ '--aaa--', '--t-t--', '---g---', '--t' ] assert(a.remove_all_gaps!) assert_equal(A[ 'aaa', 'tt', 'g', 't' ], a) end # test of alignment_slice. # Please also refer alignment_window. def test_alignment_slice a = A[ 'a', 'at', 'atgca', 'atg', '' ] assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', nil ], a.alignment_slice(1, 3)) end def test_alignment_subseq a = A[ Sequence::NA.new('a'), Sequence::NA.new('at'), Sequence::NA.new('atgca'), Sequence::NA.new('atg'), Sequence::NA.new('') ] assert_equal(Alignment::SequenceArray[ Sequence::NA.new(''), Sequence::NA.new('t'), Sequence::NA.new('tgc'), Sequence::NA.new('tg'), nil ], a.alignment_subseq(2,4)) end def test_alignment_concat a = A[ 'aaa', 'c', 'gg', 't' ] a.alignment_concat(A[ 'ttt', 'gg', 'aa', 'cc', 'aa' ]) assert_equal(A[ 'aaattt', 'cgg', 'ggaa', 'tcc' ], a) a.alignment_concat([ 'c', 't' ]) assert_equal(A[ 'aaatttc', 'cggt', 'ggaa', 'tcc' ], a) end end #class TestAlignmentEnumerableExtension class TestAlignmentOutput < Test::Unit::TestCase def setup @obj = Object.new @obj.extend(Alignment::Output) end def test_clustal_have_same_name_true assert_equal([ 0, 1 ], @obj.instance_eval { __clustal_have_same_name?([ 'ATP ATG', 'ATP ATA', 'BBB' ]) }) end def test_have_same_name_false assert_equal(false, @obj.instance_eval { __clustal_have_same_name?([ 'GTP ATG', 'ATP ATA', 'BBB' ]) }) end def test_avoid_same_name assert_equal([ 'ATP_ATG', 'ATP_ATA', 'BBB' ], @obj.instance_eval { __clustal_avoid_same_name([ 'ATP ATG', 'ATP ATA', 'BBB' ]) }) end def test_avoid_same_name_numbering assert_equal([ '0_ATP', '1_ATP', '2_BBB' ], @obj.instance_eval { __clustal_avoid_same_name([ 'ATP', 'ATP', 'BBB' ]) }) end end #class TestAlignmentOutput class TestAlignment < Test::Unit::TestCase # testing helper method def build_na_alignment(*sequences) sequences.inject(Alignment.new) { |alignment, sequence| alignment << Sequence::NA.new(sequence) } end private :build_na_alignment def test_equals alignment1 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")]) alignment2 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")]) assert_equal(alignment1, alignment2) end # Alignment#store def test_store_cannot_override_key alignment = Alignment.new alignment.store("Cat DNA", Sequence::NA.new("cat")) alignment.store("Cat DNA", Sequence::NA.new("gcat")) assert_equal("cat", alignment["Cat DNA"]) end def test_store_with_nil_key_uses_next_number_for_key alignment = Alignment.new alignment.store(nil, Sequence::NA.new("cat")) alignment.store(nil, Sequence::NA.new("gat")) alignment.store(nil, Sequence::NA.new("tat")) assert_equal({0=>"cat",1=>"gat",2=>"tat"}, alignment.to_hash) end def test_store_with_default_keys_and_user_defined_keys alignment = Alignment.new alignment.store("cat key", Sequence::NA.new("cat")) alignment.store(nil, Sequence::NA.new("cag")) alignment.store("gat key", Sequence::NA.new("gat")) alignment.store(nil, Sequence::NA.new("gag")) assert_equal({"gat key"=>"gat",1=>"cag",3=>"gag","cat key"=>"cat"}, alignment.to_hash) end # Test append operator def test_seqclass_when_sequence_used alignment = Alignment.new alignment << Sequence::NA.new("cat") assert_equal({0=>"cat"}, alignment.to_hash) end # Test seqclass def test_seqclass_when_sequence_used_no_seqclass_set alignment = Alignment.new alignment << Sequence::NA.new("cat") assert_equal(Sequence::NA, alignment.seqclass) end def test_seqclass_String_seq_not_present_no_seqclass_set alignment = Alignment.new alignment << nil assert_equal(String, alignment.seqclass) end def test_seqclass_when_seqclass_set alignment = Alignment.new alignment.seqclass = Fixnum alignment << "this doesn't really make sense" assert_equal(Fixnum, alignment.seqclass) end # Alignment#gap_char def test_default_gap_char alignment = Alignment.new assert_equal("-", alignment.gap_char) end def test_set_and_get_gap_char alignment = Alignment.new alignment.gap_char = "+" assert_equal("+", alignment.gap_char) end # Alignment#gap_regexp def test_default_gap_regexp_matches_default_gap_char alignment = Alignment.new assert(alignment.gap_regexp.match(alignment.gap_char)) end # Alignment#missing_char def test_default_missing_char alignment = Alignment.new assert_equal("?", alignment.missing_char) end # Alignment#seq_length def test_seq_length_when_one_sequence alignment = build_na_alignment("agt") assert_equal(3, alignment.seq_length) end def test_seq_length_is_max_seq_length alignment = build_na_alignment("agt", "agtaa", "agta") assert_equal(5, alignment.seq_length) end # Alignment#each_site def test_each_site_equal_length alignment = build_na_alignment("acg", "gta") expected_sites = [["a", "g"], ["c", "t"], ["g", "a"]] alignment.each_site do |site| assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong" end end def test_each_site_unequal_length alignment = build_na_alignment("ac", "gta") expected_sites = [["a", "g"], ["c", "t"], ["-", "a"]] alignment.each_site do |site| assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong" end end #TODO: Lots of stuff needing tests here # Alignment#add_seq def test_add_seq_no_key alignment = Alignment.new alignment.add_seq("agct") assert_equal(String, alignment.seqclass, "wrong class") assert_equal({0=>"agct"}, alignment.to_hash, "wrong hash") end def test_add_seq_using_seq_with_seq_method seq = "agtc" class <"agtc"}, alignment.to_hash, "wrong hash") end def test_add_seq_using_seq_with_naseq_method seq = "agtc" class <"agtc"}, alignment.to_hash, "wrong hash") end def test_add_seq_using_seq_with_aaseq_method seq = "AVGR" class <"AVGR"}, alignment.to_hash, "wrong hash") end def test_add_seq_using_seq_with_definition_method seq = "atgc" class <"atgc"}, alignment.to_hash, "wrong hash") end def test_add_seq_using_seq_with_entry_id_method seq = "atgc" class <"atgc"}, alignment.to_hash, "wrong hash") end # Alignment#consensus_string def test_consensus_string_no_gaps alignment = build_na_alignment("agtcgattaa", "tttcgatgcc") assert_equal("??tcgat???", alignment.consensus_string) end def test_consensus_threshold_two_sequences alignment = build_na_alignment("agtcgattaa", "tttcgatgcc") # the threshold is the fraction of sequences in which a symbol must # occur at a given position to be considered the consensus symbol assert_equal("agtcgattaa", alignment.consensus(0.5)) assert_equal("??tcgat???", alignment.consensus(0.500000001)) end def test_consensus_threshold_four_sequences alignment = build_na_alignment("agtg", "ttag", "actc", "tatc") # ties go to the symbol that occurs in the earliest sequence assert_equal("agtg", alignment.consensus(0.25)) assert_equal("a?tg", alignment.consensus(0.26)) end def test_consensus_opt_gap_mode alignment = build_na_alignment("gt-gt-a", "ttcggc-", "ttcggc-") # using threshold = 0.5, that is a symbol must occur >= half the time in order to be consensus # gap_mode -1 means gaps are ignored assert_equal("ttcggca", alignment.consensus(0.5, :gap_mode => -1), "gap mode -1") # gap_mode 0 means gaps are treated like regular symbols, yielding a gap in the last position assert_equal("ttcggc-", alignment.consensus(0.5, :gap_mode => 0), "gap mode 0") # gap_mode 1 means gaps take precedence over any other symbol, yielding two more gaps assert_equal("tt-gg--", alignment.consensus(0.5, :gap_mode => 1), "gap mode 1") end def test_consensus_opt_missing_char alignment = build_na_alignment("agtcgattaa", "tttcgatgcc") assert_equal("**tcgat***", alignment.consensus(1, :missing_char => "*")) end # Alignment#consensus_iupac def test_consensus_iupac_no_gaps alignment = build_na_alignment("agtcgattaa", "tttcgatgcc") assert_equal("wktcgatkmm", alignment.consensus_iupac) end def test_consensus_iupac_of_ambiguous_bases alignment = build_na_alignment("tmrwsykvhdbnd", "uaaaccgaaacab") assert_equal("tmrwsykvhdbnn", alignment.consensus_iupac) end def test_consensus_iupac_gap_modes alignment = build_na_alignment("a-t", "acc") # gap_mode -1 means gaps are ignored assert_equal("acy", alignment.consensus_iupac(:gap_mode => -1)) # gap_mode 0 means gaps are treated as normal characters, yielding a missing symbol assert_equal("a?y", alignment.consensus_iupac(:gap_mode => 0)) # gap_mode 1 means gaps take precedence over everything, yielding a gap assert_equal("a-y", alignment.consensus_iupac(:gap_mode => 1)) end def test_consensus_iupac_yields_correct_ambiguous_bases assert_equal "t", build_na_alignment("t", "u").consensus_iupac # not really IUPAC # m = a c assert_equal "m", build_na_alignment("a", "c").consensus_iupac, "m #1" assert_equal "m", build_na_alignment("m", "c").consensus_iupac, "m #2" assert_equal "m", build_na_alignment("a", "m").consensus_iupac, "m #3" assert_equal "m", build_na_alignment("m", "a", "c").consensus_iupac, "m #4" # r = a g assert_equal "r", build_na_alignment("a", "g").consensus_iupac, "r #1" assert_equal "r", build_na_alignment("r", "g").consensus_iupac, "r #2" assert_equal "r", build_na_alignment("a", "r").consensus_iupac, "r #3" assert_equal "r", build_na_alignment("a", "r", "g").consensus_iupac, "r #4" # w = a t/u assert_equal "w", build_na_alignment("a", "t").consensus_iupac, "w #1" assert_equal "w", build_na_alignment("a", "u").consensus_iupac, "w #2" assert_equal "w", build_na_alignment("w", "a").consensus_iupac, "w #3" assert_equal "w", build_na_alignment("t", "w").consensus_iupac, "w #4" assert_equal "w", build_na_alignment("w", "u").consensus_iupac, "w #5" assert_equal "w", build_na_alignment("u", "t", "a").consensus_iupac, "w #6" assert_equal "w", build_na_alignment("w", "u", "t", "a").consensus_iupac, "w #7" # s = c g assert_equal "s", build_na_alignment("c", "g").consensus_iupac, "s #1" assert_equal "s", build_na_alignment("s", "g").consensus_iupac, "s #2" assert_equal "s", build_na_alignment("c", "s").consensus_iupac, "s #3" assert_equal "s", build_na_alignment("c", "s", "g").consensus_iupac, "s #4" # y = c t/u assert_equal "y", build_na_alignment("c", "t").consensus_iupac, "y #1" assert_equal "y", build_na_alignment("c", "u").consensus_iupac, "y #2" assert_equal "y", build_na_alignment("y", "c").consensus_iupac, "y #3" assert_equal "y", build_na_alignment("t", "y").consensus_iupac, "y #4" assert_equal "y", build_na_alignment("y", "u").consensus_iupac, "y #5" assert_equal "y", build_na_alignment("u", "t", "c").consensus_iupac, "y #6" assert_equal "y", build_na_alignment("y", "u", "t", "c").consensus_iupac, "y #7" # k = g t/u assert_equal "k", build_na_alignment("g", "t").consensus_iupac, "k #1" assert_equal "k", build_na_alignment("g", "u").consensus_iupac, "k #2" assert_equal "k", build_na_alignment("k", "g").consensus_iupac, "k #3" assert_equal "k", build_na_alignment("t", "k").consensus_iupac, "k #4" assert_equal "k", build_na_alignment("k", "u").consensus_iupac, "k #5" assert_equal "k", build_na_alignment("u", "t", "g").consensus_iupac, "k #6" assert_equal "k", build_na_alignment("k", "u", "t", "g").consensus_iupac, "k #7" # v = a c g m r s assert_equal "v", build_na_alignment("a", "c", "g").consensus_iupac, "v #1" assert_equal "v", build_na_alignment("g", "m").consensus_iupac, "v #2" assert_equal "v", build_na_alignment("a", "s").consensus_iupac, "v #3" assert_equal "v", build_na_alignment("c", "r").consensus_iupac, "v #4" assert_equal "v", build_na_alignment("m", "s").consensus_iupac, "v #5" assert_equal "v", build_na_alignment("m", "r").consensus_iupac, "v #6" assert_equal "v", build_na_alignment("s", "r").consensus_iupac, "v #7" assert_equal "v", build_na_alignment("s", "r", "m").consensus_iupac, "v #8" assert_equal "v", build_na_alignment("s", "r", "m", "a", "c", "g").consensus_iupac, "v #9" assert_equal "v", build_na_alignment("v", "g").consensus_iupac, "v #10" # alright, enough # b = t/u c g s y k assert_equal "b", build_na_alignment("t", "c", "g").consensus_iupac, "b #1" assert_equal "b", build_na_alignment("g", "y").consensus_iupac, "b #2" assert_equal "b", build_na_alignment("t", "s").consensus_iupac, "b #3" assert_equal "b", build_na_alignment("c", "k").consensus_iupac, "b #4" assert_equal "b", build_na_alignment("y", "s").consensus_iupac, "b #5" assert_equal "b", build_na_alignment("y", "k").consensus_iupac, "b #6" assert_equal "b", build_na_alignment("s", "k").consensus_iupac, "b #7" assert_equal "b", build_na_alignment("s", "k", "y").consensus_iupac, "b #8" assert_equal "b", build_na_alignment("s", "k", "y", "u", "c", "g").consensus_iupac, "b #9" assert_equal "b", build_na_alignment("b", "g").consensus_iupac, "b #10" # h = t/u c a y w m assert_equal "h", build_na_alignment("t", "c", "a").consensus_iupac, "h #1" assert_equal "h", build_na_alignment("a", "y").consensus_iupac, "h #2" assert_equal "h", build_na_alignment("c", "w").consensus_iupac, "h #3" assert_equal "h", build_na_alignment("u", "m").consensus_iupac, "h #4" assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #5" assert_equal "h", build_na_alignment("y", "m").consensus_iupac, "h #6" assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #7" assert_equal "h", build_na_alignment("w", "m", "y").consensus_iupac, "h #8" assert_equal "h", build_na_alignment("w", "m", "y", "t", "c", "a").consensus_iupac, "h #9" assert_equal "h", build_na_alignment("h", "t").consensus_iupac, "h #10" # d = t/u g a r w k assert_equal "d", build_na_alignment("t", "g", "a").consensus_iupac, "d #1" assert_equal "d", build_na_alignment("r", "t").consensus_iupac, "d #2" assert_equal "d", build_na_alignment("w", "g").consensus_iupac, "d #3" assert_equal "d", build_na_alignment("k", "a").consensus_iupac, "d #4" assert_equal "d", build_na_alignment("k", "r").consensus_iupac, "d #5" assert_equal "d", build_na_alignment("k", "w").consensus_iupac, "d #6" assert_equal "d", build_na_alignment("r", "w").consensus_iupac, "d #7" assert_equal "d", build_na_alignment("r", "w", "k").consensus_iupac, "d #8" assert_equal "d", build_na_alignment("k", "r", "w", "t", "g", "a").consensus_iupac, "d #9" assert_equal "d", build_na_alignment("d", "t").consensus_iupac, "d #10" # n = anything assert_equal "n", build_na_alignment("a", "g", "c", "t").consensus_iupac, "n #1" assert_equal "n", build_na_alignment("a", "g", "c", "u").consensus_iupac, "n #2" assert_equal "n", build_na_alignment("w", "s").consensus_iupac, "n #3" assert_equal "n", build_na_alignment("k", "m").consensus_iupac, "n #4" assert_equal "n", build_na_alignment("r", "y").consensus_iupac, "n #5" end def test_consensus_iupac_missing_char alignment = build_na_alignment("a??", "ac?") assert_equal("a??", alignment.consensus_iupac()) end def test_consensus_iupac_missing_char_option alignment = build_na_alignment("a**t", "ac**") assert_equal("a***", alignment.consensus_iupac(:missing_char => "*")) end # Alignment#convert_match def test_convert_match alignment = Alignment.new alignment << Sequence::NA.new("agtcgattaa") alignment << Sequence::NA.new("tttcgatgcc") match = alignment.convert_match assert_equal(alignment[0], match[0], "first sequence altered") assert_equal("tt.....gcc", match[1], "wrong match") end # Alignment#convert_unmatch def test_convert_unmatch alignment = Alignment.new alignment << Sequence::NA.new("agtcgattaa") alignment << Sequence::NA.new("tt.....gcc") unmatched = alignment.convert_unmatch assert_equal("agtcgattaa", unmatched[0], "first changed") assert_equal("tttcgatgcc", unmatched[1], "second wrong") end def test_convert_unmatch_multiple_sequences alignment = Alignment.new alignment << Sequence::NA.new("agtcgattaa") alignment << Sequence::NA.new("tt.....gcc") alignment << Sequence::NA.new("c...c..g.c") unmatched = alignment.convert_unmatch assert_equal("agtcgattaa", unmatched[0], "first changed") assert_equal("tttcgatgcc", unmatched[1], "second wrong") assert_equal("cgtccatgac", unmatched[2], "third wrong") end def test_convert_unmatch_different_length_sequences_truncates_seq_if_last_matched alignment = Alignment.new alignment << Sequence::NA.new("agtcgatta") alignment << Sequence::NA.new("tt.....gc.") unmatched = alignment.convert_unmatch assert_equal("agtcgatta", unmatched[0], "first changed") assert_equal("tttcgatgc", unmatched[1], "second wrong") #TODO: verify this is correct, and not . at end end def test_convert_unmatch_different_match_char alignment = Alignment.new alignment << Sequence::NA.new("agtcga") alignment << Sequence::NA.new("tt====") unmatched = alignment.convert_unmatch('=') assert_equal("agtcga", unmatched[0], "first changed") assert_equal("tttcga", unmatched[1], "second wrong") end # Alignment#match_line def test_match_line_protein alignment = Alignment.new alignment << Sequence::AA.new("AELFMCF") alignment << Sequence::AA.new("AKLVNNF") assert_equal "*:*. *", alignment.match_line end #TODO: lots more on the consensus, match, etc. # Alignment#normalize def test_normalizebang_extends_sequences_with_gaps alignment = build_na_alignment("a", "ag", "agc", "agct") alignment.normalize! assert_equal({0=>"a---",1=>"ag--",2=>"agc-",3=>"agct"}, alignment.to_hash) end # Alignment#to_clustal end end bio-1.4.3.0001/test/unit/bio/test_map.rb0000644000004100000410000002014512200110570017535 0ustar www-datawww-data# # = test/unit/bio/test_map.rb - Unit test for Bio::Map # # Copyright:: Copyright (C) 2006 # Jan Aerts # License:: The Ruby License # # $Id:$ # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/map' module Bio class TestMapSimple < Test::Unit::TestCase def setup @marker1 = Bio::Map::Marker.new('marker1') @marker2 = Bio::Map::Marker.new('marker2') @map1 = Bio::Map::SimpleMap.new('map1', 'some_type', 500, 'some_unit') end def test_attributes assert_equal("marker1", @marker1.name) assert_equal("marker2", @marker2.name) assert_equal([], @marker1.mappings_as_marker) assert_equal([], @marker2.mappings_as_marker) assert_equal("map1", @map1.name) assert_equal("some_unit", @map1.units) assert_equal("some_type", @map1.type) assert_equal([], @map1.mappings_as_map) end end class TestMapping < Test::Unit::TestCase def setup @marker1 = Bio::Map::Marker.new('marker1') @marker2 = Bio::Map::Marker.new('marker2') @marker3 = Bio::Map::Marker.new('marker3') @map1 = Bio::Map::SimpleMap.new('map1', 'some_type', 'some_unit') @map2 = Bio::Map::SimpleMap.new('map2', 'some_other_type', 'some_other_unit') end def test_add_mapping_as_map @map1.add_mapping_as_map(@marker2, '5') assert_equal(1, @map1.mappings_as_map.length) assert_equal(1, @marker2.mappings_as_marker.length) assert_equal(0, @marker1.mappings_as_marker.length) assert_kind_of(Bio::Locations, @map1.mappings_as_map[0].location) assert_kind_of(Bio::Locations, @marker2.mappings_as_marker[0].location) end def test_add_mapping_as_marker @marker1.add_mapping_as_marker(@map1, '5') assert_equal(1, @map1.mappings_as_map.length, 'Mapping as map') assert_equal(1, @marker1.mappings_as_marker.length, 'Mapping as marker') assert_kind_of(Bio::Locations, @map1.mappings_as_map[0].location) assert_kind_of(Bio::Locations, @marker1.mappings_as_marker[0].location) end def test_mapped_to? @marker1.add_mapping_as_marker(@map1, '5') assert_equal(true, @marker1.mapped_to?(@map1)) assert_equal(false, @marker3.mapped_to?(@map1)) end def test_contains_marker? @marker1.add_mapping_as_marker(@map1, '5') assert_equal(true, @map1.contains_marker?(@marker1)) assert_equal(false, @map1.contains_marker?(@marker3)) end def test_mappings_as_map_each @map1.add_mapping_as_map(@marker1, '5') @marker2.add_mapping_as_marker(@map1, '7') mappings = 0 @map1.mappings_as_map.each do |mapping| mappings += 1 end assert_equal(2, mappings) end def test_mappings_as_marker_each @map1.add_mapping_as_map(@marker1, '5') @marker1.add_mapping_as_marker(@map1, '7') mappings = 0 @marker1.mappings_as_marker.each do |mapping| mappings += 1 end assert_equal(2, mappings) end def test_multiple_mappings_between_same_marker_and_map @map1.add_mapping_as_map(@marker1, '5') @map1.add_mapping_as_map(@marker1, '37') @marker1.add_mapping_as_marker(@map1, '53') assert_equal(3, @marker1.mappings_as_marker.length) @marker1.add_mapping_as_marker(@map1, '53') # This mapping should _not_ be added, because it's already defined. assert_equal(3, @marker1.mappings_as_marker.length) @map1.add_mapping_as_map(@marker1, '53') assert_equal(3, @marker1.mappings_as_marker.length) end def test_positions_on @map1.add_mapping_as_map(@marker1, '5') assert_equal(1, @marker1.mappings_as_marker.length) assert_equal('5', @marker1.positions_on(@map1).collect{|p| p.first.from.to_s}.join(',')) # FIXME: Test is not correct (uses Location.first) @map1.add_mapping_as_map(@marker1, '37') assert_equal('5,37', @marker1.positions_on(@map1).collect{|p| p.first.from.to_s}.sort{|a,b| a.to_i <=> b.to_i}.join(',')) # FIXME: Test is not correct (uses Location.first) end def test_mappings_on @map1.add_mapping_as_map(@marker1, '5') @map1.add_mapping_as_map(@marker1, '37') assert_equal('5,37', @marker1.mappings_on(@map1).sort{|a,b| a.location[0].from.to_i <=> b.location[0].from.to_i}.collect{|m| m.location[0].from}.join(',')) # FIXME: Test is not correct (uses Location.first) end def test_mapping_location_comparison @map1.add_mapping_as_map(@marker1, '5') @map1.add_mapping_as_map(@marker2, '5') @map1.add_mapping_as_map(@marker3, '17') mapping1 = @marker1.mappings_on(@map1)[0] mapping2 = @marker2.mappings_on(@map1)[0] mapping3 = @marker3.mappings_on(@map1)[0] assert_equal(true, mapping1 == mapping2) assert_equal(false, mapping1 < mapping2) assert_equal(false, mapping1 > mapping2) assert_equal(false, mapping1 == mapping3) assert_equal(true, mapping1 < mapping3) assert_equal(false, mapping1 > mapping3) @map2.add_mapping_as_map(@marker1, '23') mapping4 = @marker1.mappings_on(@map2)[0] assert_raise(RuntimeError) { mapping2 < mapping4 } end def test_raise_error_kind_of marker_without_class = 'marker1' assert_raise(RuntimeError) { @map1.add_mapping_as_map(marker_without_class, '5') } assert_raise(RuntimeError) { @map1.contains_marker?(marker_without_class) } map_without_class = 'map1' assert_raise(RuntimeError) { @marker1.add_mapping_as_marker(map_without_class, '5') } assert_raise(RuntimeError) { @marker1.mapped_to?(map_without_class) } assert_raise(RuntimeError) { @marker1.positions_on(map_without_class) } assert_raise(RuntimeError) { @marker1.mappings_on(map_without_class) } @map1.add_mapping_as_map(@marker1, '5') mapping1 = @marker1.mappings_on(@map1)[0] assert_raise(RuntimeError) { mapping1 > 'some_mapping' } end end class CloneToActLikeMap include Bio::Map::ActsLikeMap def initialize @mappings_as_map = Array.new end attr_accessor :mappings_as_map end class TestActsLikeMap < Test::Unit::TestCase def setup @clone = CloneToActLikeMap.new end def test_mixin assert_instance_of(CloneToActLikeMap, @clone) assert_respond_to(@clone, 'contains_marker?') assert_respond_to(@clone, 'add_mapping_as_map') assert_equal(0, @clone.mappings_as_map.length) end end class CloneToActLikeMarker include Bio::Map::ActsLikeMarker def initialize @mappings_as_marker = Array.new end attr_accessor :mappings_as_marker end class TestActsLikeMarker < Test::Unit::TestCase def setup @clone = CloneToActLikeMarker.new end def test_mixin assert_instance_of(CloneToActLikeMarker, @clone) assert_respond_to(@clone, 'mapped_to?') assert_respond_to(@clone, 'add_mapping_as_marker') end end class CloneToActLikeMapAndMarker include Bio::Map::ActsLikeMap include Bio::Map::ActsLikeMarker def initialize @mappings_as_map = Array.new @mappings_as_marker = Array.new end attr_accessor :mappings_as_map, :mappings_as_marker end class TestActsLikeMapAndMarker < Test::Unit::TestCase def setup @clone_a = CloneToActLikeMapAndMarker.new @clone_b = CloneToActLikeMapAndMarker.new @clone_a.add_mapping_as_map(@clone_b, nil) end def test_mixin assert_instance_of(CloneToActLikeMapAndMarker, @clone_a) assert_respond_to(@clone_a, 'contains_marker?') assert_respond_to(@clone_a, 'add_mapping_as_map') assert_respond_to(@clone_a, 'mapped_to?') assert_respond_to(@clone_a, 'add_mapping_as_marker') assert_equal(1, @clone_a.mappings_as_map.length) assert_equal(0, @clone_a.mappings_as_marker.length) assert_equal(0, @clone_b.mappings_as_map.length) assert_equal(1, @clone_b.mappings_as_marker.length) end end end bio-1.4.3.0001/test/unit/bio/appl/0000755000004100000410000000000012200110570016326 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/sosui/0000755000004100000410000000000012200110570017470 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/sosui/test_report.rb0000644000004100000410000000336412200110570022375 0ustar www-datawww-data# # test/unit/bio/appl/sosui/test_report.rb - Unit test for Bio::SOSUI::Report # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/sosui/report' module Bio test_data = Pathname.new(File.join(BioRubyTestDataPath, 'SOSUI')).cleanpath.to_s SOSUIReport = File.open(File.join(test_data, 'sample.report')).read class TestSOSUIReportConst < Test::Unit::TestCase def test_delimiter assert_equal("\n>", Bio::SOSUI::Report::DELIMITER) end def test_rs assert_equal("\n>", Bio::SOSUI::Report::RS) end end class TestSOSUIReport < Test::Unit::TestCase def setup @obj = Bio::SOSUI::Report.new(SOSUIReport) end def test_entry_id assert_equal('Q9HC19', @obj.entry_id) end def test_prediction assert_equal('MEMBRANE PROTEIN', @obj.prediction) end def test_tmhs assert_equal(Array, @obj.tmhs.class) assert_equal(Bio::SOSUI::Report::TMH, @obj.tmhs[0].class) end def test_tmh assert_equal(7, @obj.tmhs.size) end end # class TestSOSUIReport class TestSOSUITMH < Test::Unit::TestCase def setup @obj = Bio::SOSUI::Report.new(SOSUIReport).tmhs.first end def test_range assert_equal(31..53, @obj.range) end def test_grade assert_equal('SECONDARY', @obj.grade) end def test_sequence assert_equal('HIRMTFLRKVYSILSLQVLLTTV', @obj.sequence) end end # class TestSOSUITMH end bio-1.4.3.0001/test/unit/bio/appl/sim4/0000755000004100000410000000000012200110570017202 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/sim4/test_report.rb0000644000004100000410000005266612200110570022120 0ustar www-datawww-data# # test/unit/bio/appl/sim4/test_report.rb - Unit test for Bio::Sim4 # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/sim4/report' module Bio class TestDataForSim4Report DataPath = Pathname.new(File.join(BioRubyTestDataPath, 'sim4')).cleanpath.to_s def initialize(filename) @filename = filename end def read File.read(File.join(DataPath,@filename)) end def report Bio::Sim4::Report.new(self.read) end def self.report(filename) self.new(filename).report end def self.report1 filename = "simple-A4.sim4" self.new(filename).report end def self.report2 filename = "simple2-A4.sim4" self.new(filename).report end def self.report4 filename = "complement-A4.sim4" self.new(filename).report end end #class TestDataForSim4Report class TestSim4Report < Test::Unit::TestCase def setup @sim4 = TestDataForSim4Report.report1 end def test_hits assert_kind_of(Array, @sim4.hits) assert_equal(1, @sim4.hits.size) assert_instance_of(Bio::Sim4::Report::Hit, @sim4.hits[0]) end def test_all_hits assert_kind_of(Array, @sim4.all_hits) assert_equal(1, @sim4.all_hits.size) assert_instance_of(Bio::Sim4::Report::Hit, @sim4.all_hits[0]) end def exec_test_seq1_len(sd) assert_equal(94, sd.len) end private :exec_test_seq1_len def test_seq1 sd = @sim4.seq1 assert_instance_of(Bio::Sim4::Report::SeqDesc, sd) assert_equal('mrna1', sd.entry_id) assert_equal('mrna1', sd.definition) assert_equal('sample10-1.fst', sd.filename) exec_test_seq1_len(sd) end def exec_test_each(meth) count = 0 assert_nothing_raised { @sim4.__send__(meth) do |x| count += 1 end } assert_equal(1, count) @sim4.__send__(meth) do |x| assert_instance_of(Bio::Sim4::Report::Hit, x) end end private :exec_test_each def test_each exec_test_each(:each) end def test_each_hit exec_test_each(:each_hit) end def test_num_hits assert_equal(1, @sim4.num_hits) end def test_query_def assert_equal('mrna1', @sim4.query_def) end def test_query_id assert_equal('mrna1', @sim4.query_id) end def test_query_len assert_equal(94, @sim4.query_len) end end #class TestSim4Report class TestSim4ReportHit < Test::Unit::TestCase def setup @hit = TestDataForSim4Report.report1.hits.first end def test_align a = [ [ "TTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAG", "||||||||||||||||||||||||||||||||||||||||", "TTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAG" ], [ " ", "<<<...<<<", "CTG...TAC" ], [ "TCTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CAAGCTAA", "|||||||||||||||| |||||||||||||-|||||||||||||||-||||||||", "TCTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCAAGCTAA" ] ] assert_equal(a, @hit.align) end def test_complement? assert_equal(nil, @hit.complement?) end def test_definition assert_equal('genome1', @hit.definition) end def test_each count = 0 assert_nothing_raised { @hit.each do |x| count += 1 end } assert_equal(2, count) @hit.each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end def exec_test_exons(meth) assert_kind_of(Array, @hit.__send__(meth)) assert_equal(2, @hit.__send__(meth).size) @hit.__send__(meth).each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end private :exec_test_exons def test_exons exec_test_exons(:exons) end def test_hit_id assert_equal('genome1', @hit.hit_id) end def test_hsps exec_test_exons(:hsps) end def test_introns assert_kind_of(Array, @hit.introns) assert_equal(1, @hit.introns.size) @hit.introns.each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end def test_len assert_equal(599, @hit.len) end def test_query_def assert_equal('mrna1', @hit.query_def) end def test_query_id assert_equal('mrna1', @hit.query_id) end def test_query_len assert_equal(94, @hit.query_len) end def test_segmentpairs assert_kind_of(Array, @hit.segmentpairs) assert_equal(3, @hit.segmentpairs.size) @hit.segmentpairs.each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end def exec_test_seq1_len(sd) assert_equal(94, sd.len) end private :exec_test_seq1_len def test_seq1 sd = @hit.seq1 assert_instance_of(Bio::Sim4::Report::SeqDesc, sd) assert_equal('mrna1', sd.entry_id) assert_equal('mrna1', sd.definition) assert_equal('sample10-1.fst', sd.filename) exec_test_seq1_len(sd) end def test_seq2 sd = @hit.seq2 assert_instance_of(Bio::Sim4::Report::SeqDesc, sd) assert_equal('genome1', sd.entry_id) assert_equal('genome1', sd.definition) assert_equal(599, sd.len) #assert_equal('sample10-2.fst', sd.filename) assert_equal('sample10-2.fst (genome1)', sd.filename) end def test_target_def assert_equal('genome1', @hit.target_def) end def test_target_id assert_equal('genome1', @hit.target_id) end def test_target_len assert_equal(599, @hit.target_len) end end #class TestSim4ReportHit class TestSim4ReportSegmentPair_exon < Test::Unit::TestCase def setup @exon = TestDataForSim4Report.report1.hits[0].exons[1] end def test_align_len assert_equal(55, @exon.align_len) end def test_direction assert_equal("", @exon.direction) end def test_hit_from assert_equal(404, @exon.hit_from) end def test_hit_to assert_equal(457, @exon.hit_to) end def test_hseq hseq = "TCTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCAAGCTAA" assert_equal(hseq, @exon.hseq) end def test_midline midline = "|||||||||||||||| |||||||||||||-|||||||||||||||-||||||||" assert_equal(midline, @exon.midline) end def test_percent_identity #assert_equal(94, @exon.percent_identity) assert_equal("94", @exon.percent_identity) end def test_qseq qseq = "TCTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CAAGCTAA" assert_equal(qseq, @exon.qseq) end def test_query_from assert_equal(41, @exon.query_from) end def test_query_to assert_equal(94, @exon.query_to) end def exec_test_seq1_from_to(seg) assert_equal(41, seg.from) assert_equal(94, seg.to) end private :exec_test_seq1_from_to def test_seq1 assert_instance_of(Bio::Sim4::Report::Segment, @exon.seq1) assert_equal("TCTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CAAGCTAA", @exon.seq1.seq) exec_test_seq1_from_to(@exon.seq1) end def test_seq2 assert_instance_of(Bio::Sim4::Report::Segment, @exon.seq2) assert_equal(404, @exon.seq2.from) assert_equal(457, @exon.seq2.to) assert_equal("TCTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCAAGCTAA", @exon.seq2.seq) end end #class TestSim4ReportSegmentPair_exon class TestSim4ReportSegmentPair_intron < Test::Unit::TestCase def setup @intron = TestDataForSim4Report.report1.hits[0].introns[0] end def test_align_len assert_equal(9, @intron.align_len) end def test_direction assert_equal(nil, @intron.direction) end def test_hit_from assert_equal(185, @intron.hit_from) end def test_hit_to assert_equal(403, @intron.hit_to) end def test_hseq hseq = "CTG...TAC" assert_equal(hseq, @intron.hseq) end def test_midline midline = "<<<...<<<" assert_equal(midline, @intron.midline) end def test_percent_identity assert_equal(nil, @intron.percent_identity) end def test_qseq qseq = " " assert_equal(qseq, @intron.qseq) end def test_query_from assert_equal(0, @intron.query_from) end def test_query_to assert_equal(0, @intron.query_to) end def test_seq1 assert_instance_of(Bio::Sim4::Report::Segment, @intron.seq1) assert_equal(0, @intron.seq1.from) assert_equal(0, @intron.seq1.to) assert_equal(" ", @intron.seq1.seq) end def test_seq2 assert_instance_of(Bio::Sim4::Report::Segment, @intron.seq2) assert_equal(185, @intron.seq2.from) assert_equal(403, @intron.seq2.to) assert_equal("CTG...TAC", @intron.seq2.seq) end end #class TestSim4ReportSegmentPair_intron class TestSim4Report2 < TestSim4Report def setup @sim4 = TestDataForSim4Report.report2 end def test_query_len assert_equal(96, @sim4.query_len) end def exec_test_seq1_len(sd) assert_equal(96, sd.len) end private :exec_test_seq1_len end #class TestSim4Report2 class TestSim4ReportHit2 < TestSim4ReportHit def setup @hit = TestDataForSim4Report.report2.hits.first end def test_align a = [ [ "AGTTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAG", "||||||||||||||||||||||||||||||||||||||||||", "AGTTGTTTCCGTCGCTGGTTATTGTCTAGAACGCAAAAATAG" ], [ " ", "<<<...<<<", "CTG...TAC" ], [ "TCTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CAAGCTAA", "|||||||||||||||| |||||||||||||-|||||||||||||||-||||||||", "TCTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCAAGCTAA" ] ] assert_equal(a, @hit.align) end def test_query_len assert_equal(96, @hit.query_len) end def exec_test_seq1_len(sd) assert_equal(96, sd.len) end private :exec_test_seq1_len end #class TestSim4ReportHit2 class TestSim4ReportSegmentPair2_exon < TestSim4ReportSegmentPair_exon def setup @exon = TestDataForSim4Report.report2.hits[0].exons[1] end def test_query_from assert_equal(43, @exon.query_from) end def test_query_to assert_equal(96, @exon.query_to) end def exec_test_seq1_from_to(seg) assert_equal(43, seg.from) assert_equal(96, seg.to) end private :exec_test_seq1_from_to end #class TestSim4ReportSegmentPair2_exon class TestSim4ReportSegmentPair2_intron < TestSim4ReportSegmentPair_intron def setup @intron = TestDataForSim4Report.report2.hits[0].introns[0] end end #class TestSim4ReportSegmentPair2_intron class TestSim4Report4 < TestSim4Report def setup @sim4 = TestDataForSim4Report.report4 end def exec_test_seq1_len(sd) assert_equal(284, sd.len) end private :exec_test_seq1_len def test_seq1 sd = @sim4.seq1 assert_instance_of(Bio::Sim4::Report::SeqDesc, sd) assert_equal('mrna4c', sd.entry_id) assert_equal('mrna4c', sd.definition) assert_equal('sample41-1c.fst', sd.filename) exec_test_seq1_len(sd) end def test_query_def assert_equal('mrna4c', @sim4.query_def) end def test_query_id assert_equal('mrna4c', @sim4.query_id) end def test_query_len assert_equal(284, @sim4.query_len) end end #class TestSim4Report4 class TestSim4ReportHit4 < TestSim4ReportHit def setup @hit = TestDataForSim4Report.report4.hits.first end def test_align a = [ [ "TTTTAGCCGGCACGAGATTG AGCGTATGATCACGCGCGCGGCCTCCT CAGAGTGATGCATGATACAACTT AT ", "||||||||||||||||||||-||||-||||||||||||||||||||||-|-|||| ||||||||||||||||- |-", "TTTTAGCCGGCACGAGATTGCAGCG ATGATCACGCGCGCGGCCTCCTAC GAGTCATGCATGATACAACTTCTTG"], [ " ", ">>>...>>>", "GTT...GAT" ], [ "ATATGTACTTAGCTGGCAACCGAGATTTACTTTCGAAGCACTGTGATGAACCCGCGGCCCTTTGAGCGCT", "|||||||||||||-|||||||||||||||||||||||| |||||||||||||||||-|||||||||||||", "ATATGTACTTAGC GGCAACCGAGATTTACTTTCGAAGGACTGTGATGAACCCGCG CCCTTTGAGCGCT" ], [ "", "", "" ], [ "TATATATGTACTTAGCGG ACACCGAGATTTACTTTCGAAGGACTGTGGATGAACCCGCGCCCTTTGAGCGCT", "||||||||||||||||||-|-|||||||||||||||||||||||||||-||||||||||||||||||||||||", "TATATATGTACTTAGCGGCA ACCGAGATTTACTTTCGAAGGACTGTG ATGAACCCGCGCCCTTTGAGCGCT" ] ] assert_equal(a, @hit.align) end def test_complement? assert_equal(true, @hit.complement?) end def test_definition assert_equal('genome4', @hit.definition) end def test_each count = 0 assert_nothing_raised { @hit.each do |x| count += 1 end } assert_equal(3, count) @hit.each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end def exec_test_exons(meth) assert_kind_of(Array, @hit.__send__(meth)) assert_equal(3, @hit.__send__(meth).size) @hit.__send__(meth).each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end private :exec_test_exons def test_hit_id assert_equal('genome4', @hit.hit_id) end def test_introns assert_kind_of(Array, @hit.introns) assert_equal(2, @hit.introns.size) @hit.introns.each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end def test_len assert_equal(770, @hit.len) end def test_query_def assert_equal('mrna4c', @hit.query_def) end def test_query_id assert_equal('mrna4c', @hit.query_id) end def test_query_len assert_equal(284, @hit.query_len) end def test_segmentpairs assert_kind_of(Array, @hit.segmentpairs) assert_equal(5, @hit.segmentpairs.size) @hit.segmentpairs.each do |x| assert_instance_of(Bio::Sim4::Report::SegmentPair, x) end end def exec_test_seq1_len(sd) assert_equal(284, sd.len) end private :exec_test_seq1_len def test_seq1 sd = @hit.seq1 assert_instance_of(Bio::Sim4::Report::SeqDesc, sd) assert_equal('mrna4c', sd.entry_id) assert_equal('mrna4c', sd.definition) assert_equal('sample41-1c.fst', sd.filename) exec_test_seq1_len(sd) end def test_seq2 sd = @hit.seq2 assert_instance_of(Bio::Sim4::Report::SeqDesc, sd) assert_equal('genome4', sd.entry_id) assert_equal('genome4', sd.definition) assert_equal(770, sd.len) #assert_equal('sample40-2.fst', sd.filename) assert_equal('sample40-2.fst (genome4)', sd.filename) end def test_target_def assert_equal('genome4', @hit.target_def) end def test_target_id assert_equal('genome4', @hit.target_id) end def test_target_len assert_equal(770, @hit.target_len) end end #class TestSim4ReportHit4 class TestSim4ReportSegmentPair4_exon < TestSim4ReportSegmentPair_exon def setup @exon = TestDataForSim4Report.report4.hits[0].exons[1] end def test_align_len assert_equal(70, @exon.align_len) end def test_direction assert_equal("==", @exon.direction) end def test_hit_from assert_equal(563, @exon.hit_from) end def test_hit_to assert_equal(630, @exon.hit_to) end def test_hseq hseq = "ATATGTACTTAGC GGCAACCGAGATTTACTTTCGAAGGACTGTGATGAACCCGCG CCCTTTGAGCGCT" assert_equal(hseq, @exon.hseq) end def test_midline midline = "|||||||||||||-|||||||||||||||||||||||| |||||||||||||||||-|||||||||||||" assert_equal(midline, @exon.midline) end def test_percent_identity #assert_equal(95, @exon.percent_identity) assert_equal("95", @exon.percent_identity) end def test_qseq qseq = "ATATGTACTTAGCTGGCAACCGAGATTTACTTTCGAAGCACTGTGATGAACCCGCGGCCCTTTGAGCGCT" assert_equal(qseq, @exon.qseq) end def test_query_from assert_equal(73, @exon.query_from) end def test_query_to assert_equal(142, @exon.query_to) end def exec_test_seq1_from_to(seg) assert_equal(73, seg.from) assert_equal(142, seg.to) end private :exec_test_seq1_from_to def test_seq1 assert_instance_of(Bio::Sim4::Report::Segment, @exon.seq1) assert_equal("ATATGTACTTAGCTGGCAACCGAGATTTACTTTCGAAGCACTGTGATGAACCCGCGGCCCTTTGAGCGCT", @exon.seq1.seq) exec_test_seq1_from_to(@exon.seq1) end def test_seq2 assert_instance_of(Bio::Sim4::Report::Segment, @exon.seq2) assert_equal(563, @exon.seq2.from) assert_equal(630, @exon.seq2.to) assert_equal("ATATGTACTTAGC GGCAACCGAGATTTACTTTCGAAGGACTGTGATGAACCCGCG CCCTTTGAGCGCT", @exon.seq2.seq) end end #class TestSim4ReportSegmentPair4_exon class TestSim4ReportSegmentPair4_intron < TestSim4ReportSegmentPair_intron def setup @intron = TestDataForSim4Report.report4.hits[0].introns[0] end def test_hit_from assert_equal(425, @intron.hit_from) end def test_hit_to assert_equal(562, @intron.hit_to) end def test_hseq hseq = "GTT...GAT" assert_equal(hseq, @intron.hseq) end def test_midline midline = ">>>...>>>" assert_equal(midline, @intron.midline) end def test_seq2 assert_instance_of(Bio::Sim4::Report::Segment, @intron.seq2) assert_equal(425, @intron.seq2.from) assert_equal(562, @intron.seq2.to) assert_equal("GTT...GAT", @intron.seq2.seq) end end #class TestSim4ReportSegmentPair4_intron class TestSim4ReportSegmentPair4_intron1 < Test::Unit::TestCase def setup @intron = TestDataForSim4Report.report4.hits[0].introns[1] end def test_align_len assert_equal(0, @intron.align_len) end def test_direction assert_equal(nil, @intron.direction) end def test_hit_from assert_equal(631, @intron.hit_from) end def test_hit_to assert_equal(699, @intron.hit_to) end def test_hseq assert_equal("", @intron.hseq) end def test_midline assert_equal("", @intron.midline) end def test_percent_identity assert_equal(nil, @intron.percent_identity) end def test_qseq assert_equal("", @intron.qseq) end def test_query_from assert_equal(143, @intron.query_from) end def test_query_to assert_equal(212, @intron.query_to) end def test_seq1 assert_instance_of(Bio::Sim4::Report::Segment, @intron.seq1) assert_equal(143, @intron.seq1.from) assert_equal(212, @intron.seq1.to) assert_equal("", @intron.seq1.seq) end def test_seq2 assert_instance_of(Bio::Sim4::Report::Segment, @intron.seq2) assert_equal(631, @intron.seq2.from) assert_equal(699, @intron.seq2.to) assert_equal("", @intron.seq2.seq) end end #class TestSim4ReportSegmentPair4_intron1 class TestSim4ReportSeqDesc < Test::Unit::TestCase def setup @str1 = 'seq1 = c_NC_000011.5_101050001-101075000.fst, 25000 bp' @str2 = '>ref|NC_000011.5|NC_000011:c101075000-101050001 Homo sapiens chromosome 11, complete sequence' @seqdesc = Bio::Sim4::Report::SeqDesc.parse(@str1, @str2) end def test_entry_id assert_equal('ref|NC_000011.5|NC_000011:c101075000-101050001', @seqdesc.entry_id) end def test_definition assert_equal("ref|NC_000011.5|NC_000011:c101075000-101050001 Homo sapiens chromosome 11, complete sequence", @seqdesc.definition) end def test_len assert_equal(25000, @seqdesc.len) end def test_filename assert_equal('c_NC_000011.5_101050001-101075000.fst', @seqdesc.filename) end def test_self_parse assert_instance_of(Bio::Sim4::Report::SeqDesc, Bio::Sim4::Report::SeqDesc.parse(@str1, @str2)) assert_instance_of(Bio::Sim4::Report::SeqDesc, Bio::Sim4::Report::SeqDesc.parse(@str1)) end def test_self_new assert_instance_of(Bio::Sim4::Report::SeqDesc, Bio::Sim4::Report::SeqDesc.new('SEQID', 'SEQDEF', 123, 'file.sim4')) end end #class TestSim4ReportSeqDesc class TestSim4ReportSegment < Test::Unit::TestCase def setup @seq = "TCTACACATCACTAGCGTGGGTGGGCGGAAAGAGCAGCTCGCCACT CAAGCTAA".freeze @segment = Bio::Sim4::Report::Segment.new("123", "176", @seq.dup) end def test_from assert_equal(123, @segment.from) end def test_to assert_equal(176, @segment.to) end def test_seq assert_equal(@seq, @segment.seq) end def test_self_new assert_instance_of(Bio::Sim4::Report::Segment, Bio::Sim4::Report::Segment.new(1,9)) assert_instance_of(Bio::Sim4::Report::Segment, Bio::Sim4::Report::Segment.new(2,4, "ATG")) end end #class TestSim4ReportSegment end #module Bio bio-1.4.3.0001/test/unit/bio/appl/gcg/0000755000004100000410000000000012200110570017066 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/gcg/test_msf.rb0000644000004100000410000001072012200110570021237 0ustar www-datawww-data# # = test/unit/bio/appl/gcg/test_msf.rb - Unit test for Bio::GCG::Msf # # Copyright:: Copyright (C) 2009 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' require 'bio/alignment' require 'bio/appl/gcg/seq' require 'bio/appl/gcg/msf' module Bio module TestGCGMsfData Filename_PileupAA = Pathname.new(File.join(BioRubyTestDataPath, 'gcg', 'pileup-aa.msf')).cleanpath.to_s PileupAA = File.read(Filename_PileupAA).freeze mfst = <<__END_OF_MFST__ >xx_3183087 ~MAFLGLFSLLVLQSMATGA.TGEDENILFQKEIRHSMGYDSLKNGE.EF SNYINKWVENNTRTFSF.TKDDEVQIPMMYQQGEFYYGEFSDGSNEAGGI YQVLEIPYEGDEISMMLVLSRQEVPLATLEPLVKAQLVEEWANSVKKQKV EVYLPRFTVEQEIDLKDVLKALGITEIFIKD.ANLTG....LSDNKEIFL SKAIHKSFLEVNEEGSEAAAVSGMIAISRMAVLYP.....QVIVDHPFFF LIRNRRTGTILFMGRVMHPETMNTSGHDFEEL >xx_3183086 ~MYFLGLLSLLVLPSKAFKA.AREDENILFLKEIRHSLGFDSLKNGE.EF TTHINKWVENNTRTFSF.TKDDEVQIPMMYQQGEFYYGEFSDGSNEAGGI YQVLEIPYEGDEISMMIVLSRQEVPLVTLEPLVKASLINEWANSVKKQKV EVYLPRFTVEQEIDLKDVLKGLGITEVFSRS.ADLTA....MSDNKELYL AKAFHKAFLEVNEEGSEAAAASGMIAISRMAVLYP.....QVIVDHPFFF LVRNRRTGTVLFMGRVMHPEAMNTSGHDFEEL >xx_192453532 MLLLVVLPPLLLLRGCFCQAISSGEENIIFLQEIRQAVGYSHFREDE.EF SERINSWVLNNTRTFSF.TRDDGVQTLMMYQQGDFYYGEFSDGTTEAGGV YQVLEMLYEGEDMSMMIVLPRQEVPLASLEPIIKAPLLEEWANNVKRQKV EVYLPRFKVEQKIDLRESLQQLGIRSIFSKD.ADLSAMTAQMTDGQDLFI GKAVQKAYLEVTEEGAEGAAGSGMIALTRTLVLYP.....QVMADHPFFF IIRNRKTGSILFMGRVMNPELIDPFDNNFDM~ >xx_72157730 ~~~~~~~~MAFSKQQDISGQDERRGTNLFFATQIADVFRFNQVDQDQLHG TKSINDWVSKNTTQETFKVLDERVPVSLMIQKGKYALAV..DNTNDC... .LVLEMPYQGRNLSLLIALPVKDDGLGQLETKLSADILQSWDAGLKSRQV NVLLPKFKLEAQFQLKEFLQRMGMSDAFDEDRANFEGISG...DRE.LHI SAVIHKAFVDVNEEGSEAAAATAVVMMRRCAPPREPEKPILFRADHPFIF MIRHRPTKSVLFMGRMMDPS~~~~~~~~~~~~ >xx_210090185 ~~~~~~~~~~~~~MRSSTSQEKDHPENIFFAQQMSRVLRFHKMDASDLHM RQTINSWVEERTRLGTFHI.SRDVEVPMMHQQGRFKLAY..DEDLNC... .QILEMPYRGKHLSMVVVLPDKMDDLSAIETSLTPDLLRHWRKSMSEEST MVQIPKFKVEQDFLLKEKLAEMGMTDLFSMADADLSGITG...SRD.LHV SHVVHKAFVEVNEEGSEAAAATAVNMMKRSL...DGE...MFFADHPFLF LIRDNDSNSVLFLGRLVRPEGHTTKDEL~~~~ >xx_45552463 ~~~~~~~~~~~~~MADAAGQKP..GENIVFATQLDQGLGLASSDPEQ... .ATINNWVEQLTRPDTFH.LDGEVQVPMMSLKERFRYAD..LPALDA... .MALELPYKDSDLSMLIVLPNTKTGLPALEEKLRLTTLSQITQSLYETKV ALKLPRFKAEFQVELSEVFQKLGMSRMFS.DQAEFGKMLQ...SPEPLKV SAIIHKAFIEVNEEGTEAAAATGMVMCYASMLTFEPQ.PVQFHVQHPFNY YIINKDS.TILFAGRINKF~~~~~~~~~~~~~ __END_OF_MFST__ seqs = mfst.split(/^\>.*/).collect { |x| x.gsub(/\s+/, '').freeze } seqs.shift # removes the first empty string names = mfst.scan(/^\>.*/).collect { |x| x.sub(/\A\>/, '').freeze } PileupAA_seqs = seqs.freeze PileupAA_names = names.freeze end #module TestGCGMsfData class TestGCGMsf < Test::Unit::TestCase def setup @paa = Bio::GCG::Msf.new(TestGCGMsfData::PileupAA) end def test_alignment seqs = TestGCGMsfData::PileupAA_seqs.dup names = TestGCGMsfData::PileupAA_names aln = nil assert_nothing_raised { aln = @paa.alignment } assert_equal(names, aln.keys) aln.each do |s| assert_equal(seqs.shift, s) end assert(seqs.empty?) end def test_checksum assert_equal(5701, @paa.checksum) end def test_date assert_equal('April 22, 2009 22:31', @paa.date) end def test_description assert_equal("PileUp of: @/home/ngoto/.seqlab-localhost/pileup_24.list\n\n Symbol comparison table: GenRunData:blosum62.cmp CompCheck: 1102\n\n GapWeight: 8\n GapLengthWeight: 2 \n\n", @paa.description) end def test_entry_id assert_equal('pileup_24.msf', @paa.entry_id) end def test_heading assert_equal('!!AA_MULTIPLE_ALIGNMENT 1.0', @paa.heading) end def test_length assert_equal(282, @paa.length) end def test_seq_type assert_equal('P', @paa.seq_type) end def test_compcheck assert_equal(1102, @paa.compcheck) end def test_gap_length_weight assert_equal("2", @paa.gap_length_weight) end def test_gap_weight assert_equal("8", @paa.gap_weight) end def test_symbol_comparison_table assert_equal('GenRunData:blosum62.cmp', @paa.symbol_comparison_table) end def test_validate_checksum assert_equal(true, @paa.validate_checksum) end end #class TestGCGMsf_PileupAA end #module Bio bio-1.4.3.0001/test/unit/bio/appl/test_blast.rb0000644000004100000410000001615612200110570021030 0ustar www-datawww-data# # test/unit/bio/appl/test_blast.rb - Unit test for Bio::Blast # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/blast' module Bio class TestBlastData TestDataBlast = Pathname.new(File.join(BioRubyTestDataPath, 'blast')).cleanpath.to_s def self.input File.open(File.join(TestDataBlast, 'b0002.faa')).read end def self.output(format = '7') case format when '0' File.open(File.join(TestDataBlast, 'b0002.faa.m0')).read when '7' File.open(File.join(TestDataBlast, 'b0002.faa.m7')).read when '8' File.open(File.join(TestDataBlast, 'b0002.faa.m8')).read end end end class TestBlast < Test::Unit::TestCase def setup @program = 'blastp' @db = 'test' @option = [] @server = 'localhost' @blast = Bio::Blast.new(@program, @db, @option, @server) end def test_new blast = Bio::Blast.new(@program, @db) assert_equal(@program, blast.program) assert_equal(@db, blast.db) assert(blast.options) assert_equal('local', blast.server) assert_equal('blastall', blast.blastall) end def test_new_opt_string blast = Bio::Blast.new(@program, @db, '-m 7 -F F') assert_equal(['-m', '7', '-F', 'F'], blast.options) end def test_program assert_equal(@program, @blast.program) end def test_db assert_equal(@db, @blast.db) end def test_options assert_equal([], @blast.options) end def test_option assert_equal('', @blast.option) end def test_option_set @blast.option = '-m 7 -p T' assert_equal('-m 7 -p T', @blast.option) end def test_option_set_m0 @blast.option = '-m 0' assert_equal('-m 0', @blast.option) end def test_server assert_equal(@server, @blast.server) end def test_blastll assert_equal('blastall', @blast.blastall) end def test_matrix assert_equal(nil, @blast.matrix) end def test_filter assert_equal(nil, @blast.filter) end def test_parser assert_equal(nil, @blast.instance_eval { @parser }) end def test_output assert_equal('', @blast.output) end def test_format assert(@blast.format) end def test_self_local assert(Bio::Blast.local(@program, @db, @option)) end def test_self_remote assert(Bio::Blast.remote(@program, @db, @option)) end def test_query # to be tested in test/functional/bio/test_blast.rb end def test_blast_reports_xml ret = Bio::Blast.reports_xml(TestBlastData.output) assert_instance_of(Array, ret) count = 0 ret.each do |report| count += 1 assert_instance_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_xml_with_block count = 0 Bio::Blast.reports_xml(TestBlastData.output) do |report| count += 1 assert_instance_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format0 ret = Bio::Blast.reports(TestBlastData.output('0')) assert_instance_of(Array, ret) count = 0 ret.each do |report| count += 1 assert_instance_of(Bio::Blast::Default::Report, report) end assert_equal(1, count) end def test_blast_reports_format7 ret = Bio::Blast.reports(TestBlastData.output('7')) assert_instance_of(Array, ret) count = 0 ret.each do |report| count += 1 assert_instance_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format8 ret = Bio::Blast.reports(TestBlastData.output('8')) assert_instance_of(Array, ret) count = 0 ret.each do |report| count += 1 assert_kind_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format0_with_block count = 0 Bio::Blast.reports(TestBlastData.output('0')) do |report| count += 1 assert_instance_of(Bio::Blast::Default::Report, report) end assert_equal(1, count) end def test_blast_reports_format7_with_block count = 0 Bio::Blast.reports(TestBlastData.output('7')) do |report| count += 1 assert_instance_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format8_with_block count = 0 Bio::Blast.reports(TestBlastData.output('8')) do |report| count += 1 assert_kind_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format7_with_parser ret = Bio::Blast.reports(TestBlastData.output('7'), :rexml) assert_instance_of(Array, ret) count = 0 ret.each do |report| count += 1 assert_instance_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format8_with_parser ret = Bio::Blast.reports(TestBlastData.output('8'), :tab) assert_instance_of(Array, ret) count = 0 ret.each do |report| count += 1 assert_kind_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format7_with_parser_with_block count = 0 Bio::Blast.reports(TestBlastData.output('7'), :rexml) do |report| count += 1 assert_instance_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_blast_reports_format8_with_parser_with_block count = 0 Bio::Blast.reports(TestBlastData.output('8'), :tab) do |report| count += 1 assert_kind_of(Bio::Blast::Report, report) end assert_equal(1, count) end def test_make_command_line @blast = Bio::Blast.new(@program, @db, '-m 7 -F F') assert_equal(["blastall", "-p", "blastp", "-d", "test", "-m", "7", "-F", "F"], @blast.instance_eval { make_command_line }) end def test_make_command_line_2 @blast = Bio::Blast.new(@program, @db, '-m 0 -F F') assert_equal(["blastall", "-p", "blastp", "-d", "test", "-m", "0", "-F", "F"], @blast.instance_eval { make_command_line }) end def test_parse_result assert(@blast.instance_eval { parse_result(TestBlastData.output) }) end def test_exec_local # to be tested in test/functional/bio/test_blast.rb end def test_exec_genomenet # to be tested in test/functional/bio/test_blast.rb end def test_exec_ncbi # to be tested in test/functional/bio/test_blast.rb end end end bio-1.4.3.0001/test/unit/bio/appl/bl2seq/0000755000004100000410000000000012200110570017516 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/bl2seq/test_report.rb0000644000004100000410000000702012200110570022414 0ustar www-datawww-data# # test/unit/bio/appl/bl2seq/test_report.rb - Unit test for # Bio::Blast::Bl2seq::Report # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/bl2seq/report' module Bio class TestBl2seqReportData TestDataBl2seq = Pathname.new(File.join(BioRubyTestDataPath, 'bl2seq')).cleanpath.to_s def self.output(format = 7) case format when 'empty' File.open(File.join(TestDataBl2seq, 'cd8a_p53_e-5blastp.bl2seq')).read when 'blastp' File.open(File.join(TestDataBl2seq, 'cd8a_cd8b_blastp.bl2seq')).read when 'blastn' when 'blastx' when 'tblastn' when 'tblastx' end end end class TestBl2seqReportConstants < Test::Unit::TestCase def test_rs rs = nil assert_equal(nil, Bio::Blast::Bl2seq::Report::RS) assert_equal(nil, Bio::Blast::Bl2seq::Report::DELIMITER) end end class TestBl2seqReport < Test::Unit::TestCase def setup @empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty')) @blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp')) end def test_new assert(@empty) assert(@blastp) end def test_undefed_methods methods = ['format0_parse_header', 'program', 'version', 'version_number', 'version_date', 'message', 'converged?', 'reference', 'db'] methods.each do |x| assert_equal(false, @empty.methods.include?(x), "undefined? : #{x}") end methods.each do |x| assert_equal(false, @blastp.methods.include?(x), "undefined? : #{x}") end end # TestF0dbstat < Test::Unit::TestCase def test_db_num assert_equal(0, @empty.db_num) assert_equal(0, @blastp.db_num) end def test_db_len assert_equal(393, @empty.db_len) assert_equal(210, @blastp.db_len) end # TestIteration < Test::Unit::TestCase def test_undefed_methods_for_iteration methods = ['message', 'pattern_in_database', 'pattern', 'pattern_positions', 'hits_found_again', 'hits_newly_found', 'hits_for_pattern', 'parse_hitlist', 'converged?'] methods.each do |x| assert_equal(false, @empty.iterations.first.methods.include?(x), "undefined? : #{x}") end methods.each do |x| assert_equal(false, @blastp.iterations.first.methods.include?(x), "undefined? : #{x}") end end end class TestBl2seqReportHit < Test::Unit::TestCase def setup @empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty')) @blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp')) @empty_hit = @empty.hits.first @blastp_hit = @blastp.hits.first end def test_empty_hits assert_equal(0, @empty.hits.size) end def test_hits assert_equal(Bio::Blast::Bl2seq::Report::Hit, @blastp.hits.first.class) assert_equal(1, @blastp.hits.size) end end end # module Bio bio-1.4.3.0001/test/unit/bio/appl/hmmer/0000755000004100000410000000000012200110570017436 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/hmmer/test_report.rb0000644000004100000410000002101012200110570022327 0ustar www-datawww-data# # test/unit/bio/appl/hmmer/test_report.rb - Unit test for Bio::HMMER::Report # # Copyright:: Copyright (C) 2006 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/hmmer/report' module Bio class TestHMMERReportData TestDataHMMER = Pathname.new(File.join(BioRubyTestDataPath, 'HMMER')).cleanpath.to_s def self.hmmpfam File.open(File.join(TestDataHMMER, 'hmmpfam.out')).read end def self.output self.hmmpfam end def self.hmmsearch File.open(File.join(TestDataHMMER, 'hmmsearch.out')).read end end class TestHMMERReportClassMethods < Test::Unit::TestCase def test_reports_ary ary = Bio::HMMER.reports(Bio::TestHMMERReportData.output) assert_equal(Array, ary.class) end def test_reports_ary_contents Bio::HMMER.reports(Bio::TestHMMERReportData.output).each do |report| assert_equal(Bio::HMMER::Report, report.class) end end end class TestHMMERReportConstants < Test::Unit::TestCase def test_rs assert_equal("\n//\n", Bio::HMMER::Report::RS) assert_equal("\n//\n", Bio::HMMER::Report::DELIMITER) end end class TestHMMERReportHmmpfam < Test::Unit::TestCase def setup @obj = Bio::HMMER::Report.new(Bio::TestHMMERReportData.hmmpfam) end def test_program assert_equal(Hash, @obj.program.class) assert_equal("hmmpfam - search one or more sequences against HMM database", @obj.program['name']) assert_equal("HMMER 2.3.2 (Oct 2003)", @obj.program['version']) assert_equal("Copyright (C) 1992-2003 HHMI/Washington University School of Medicine", @obj.program['copyright']) assert_equal("Freely distributed under the GNU General Public License (GPL)", @obj.program['license']) end def test_parameter assert_equal(Hash, @obj.parameter.class) assert_equal("/Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc", @obj.parameter["Sequence file"]) assert_equal("/Users/nakao/Sites/iprscan/data/Pfam", @obj.parameter['HMM file']) end def test_query_info assert_equal(Hash, @obj.query_info.class) assert_equal("104K_THEPA", @obj.query_info["Query sequence"]) assert_equal("[none]", @obj.query_info["Accession"]) assert_equal("[none]", @obj.query_info["Description"]) end def test_hits assert_equal(Bio::HMMER::Report::Hit, @obj.hits.first.class) end def test_hsps assert_equal(Bio::HMMER::Report::Hsp, @obj.hsps.first.class) end def test_histogram assert_equal(nil, @obj.histogram) end def test_statistical_detail assert_equal(nil, @obj.statistical_detail) end def test_total_seq_searched assert_equal(nil, @obj.total_seq_searched) end def test_whole_seq_top_hits assert_equal(nil, @obj.whole_seq_top_hits) end def test_domain_top_hits assert_equal(nil, @obj.domain_top_hits) end def test_each @obj.each do |hit| assert_equal(Bio::HMMER::Report::Hit, hit.class) end end def test_each_hit @obj.each_hit do |hit| assert_equal(Bio::HMMER::Report::Hit, hit.class) end end end class TestHMMERReportHit < Test::Unit::TestCase def setup @obj = Bio::HMMER::Report.new(Bio::TestHMMERReportData.output).hits.first end def test_hit assert_equal(Bio::HMMER::Report::Hit, @obj.class) end def test_hsps assert_equal(Bio::HMMER::Report::Hsp, @obj.hsps.first.class) end def test_accession assert_equal("PF04385.4", @obj.accession) end def test_target_id assert_equal("PF04385.4", @obj.target_id) end def test_hit_id assert_equal("PF04385.4", @obj.hit_id) end def test_entry_id assert_equal("PF04385.4", @obj.entry_id) end def test_description assert_equal("Domain of unknown function, DUF529", @obj.description) end def test_definition assert_equal("Domain of unknown function, DUF529", @obj.definition) end def test_score assert_equal(259.3, @obj.score) end def test_bit_score assert_equal(259.3, @obj.bit_score) end def test_evalue assert_equal(6.6e-75, @obj.evalue) end def test_num assert_equal(4, @obj.num) end def test_each @obj.each do |hsp| assert_equal(Bio::HMMER::Report::Hsp, hsp.class) end end def test_each_hsp @obj.each_hsp do |hsp| assert_equal(Bio::HMMER::Report::Hsp, hsp.class) end end def test_target_def assert_equal("<4> Domain of unknown function, DUF529", @obj.target_def) end def test_append_hsp hsp = @obj.hsps.first assert_equal(5, @obj.append_hsp(hsp).size) end end class TestHMMERReportHsp < Test::Unit::TestCase def setup @obj = Bio::HMMER::Report.new(Bio::TestHMMERReportData.output).hits.first.hsps.first end def test_hsp assert_equal(Bio::HMMER::Report::Hsp, @obj.class) end def test_accession assert_equal("PF04385.4", @obj.accession) end def test_domain assert_equal("1/4", @obj.domain) end def test_seq_f assert_equal(36, @obj.seq_f) end def test_seq_t assert_equal(111, @obj.seq_t) end def test_seq_ft assert_equal("..", @obj.seq_ft) end def test_hmm_f assert_equal(1, @obj.hmm_f) end def test_hmm_t assert_equal(80, @obj.hmm_t) end def test_score assert_equal(65.0, @obj.score) end def test_bit_score assert_equal(65.0, @obj.bit_score) end def test_evalue assert_equal(2.0e-16, @obj.evalue) end def test_midline assert_equal("t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+We++ + +l++ ++++++++++++++++ +++", @obj.midline) end def test_hmmseq assert_equal("tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvWeseddpefglivtlsfyldsnkfLvlllintak", @obj.hmmseq) end def test_flatseq assert_equal("TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIWENA---STPLYTGAIVTNNDGPYMAYVEVLGDP", @obj.flatseq) end def test_query_frame assert_equal(1, @obj.query_frame) end def test_target_frame assert_equal(1, @obj.target_frame) end def test_csline assert_equal(nil, @obj.csline) end def test_rfline assert_equal(nil, @obj.rfline) end def test_set_alignment end def test_query_seq assert_equal("TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIWENA---STPLYTGAIVTNNDGPYMAYVEVLGDP", @obj.query_seq) end def test_target_seq assert_equal("tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvWeseddpefglivtlsfyldsnkfLvlllintak", @obj.target_seq) end def test_target_from assert_equal(1, @obj.target_from) end def test_targat_to assert_equal(80, @obj.target_to) end def test_query_from assert_equal(36, @obj.query_from) end def test_query_to assert_equal(111, @obj.query_to) end end class TestHMMERReportHmmsearch < Test::Unit::TestCase def setup @obj = Bio::HMMER::Report.new(Bio::TestHMMERReportData.hmmsearch) end def test_histogram hist = "score obs exp (one = represents 1 sequences)\n----- --- ---\n 377 1 0|=" assert_equal(hist, @obj.histogram) end def test_statistical_detail hash = {"P(chi-square)" => 0.0, "chi-sq statistic" => 0.0, "lambda" => 0.7676, "mu" => -10.6639} assert_equal(hash, @obj.statistical_detail) hash.keys.each do |key| assert_equal(hash[key], @obj.statistical_detail[key]) end end def test_total_seq_searched assert_equal(1, @obj.total_seq_searched) end def test_whole_seq_top_hit hash = {"Total memory" => "16K", "Satisfying E cutoff" => 1, "Total hits" => 1} assert_equal(hash, @obj.whole_seq_top_hits) hash.keys.each do |key| assert_equal(hash[key], @obj.whole_seq_top_hits[key]) end end def test_domain_top_hits hash = {"Total memory" => "17K", "Satisfying E cutoff" => 1, "Total hits" => 1} assert_equal(hash, @obj.domain_top_hits) hash.keys.each do |key| assert_equal(hash[key], @obj.domain_top_hits[key]) end end end end # module Bio bio-1.4.3.0001/test/unit/bio/appl/test_fasta.rb0000644000004100000410000000624612200110570021020 0ustar www-datawww-data# # test/unit/bio/appl/test_fasta.rb - Unit test for Bio::Fasta # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/fasta' module Bio class TestFastaInitialize < Test::Unit::TestCase def test_new_1 program = 'string' db = 'string' option = ['-e', '0.001'] server = 'local' assert_raise(ArgumentError) { Bio::Fasta.new() } assert_raise(ArgumentError) { Bio::Fasta.new(program) } assert(Bio::Fasta.new(program, db)) assert(Bio::Fasta.new(program, db, option)) assert(Bio::Fasta.new(program, db, option, server)) assert_raise(ArgumentError) { Bio::Fasta.new(program, db, option, server, nil) } end def test_option_backward_compatibility fasta = Bio::Fasta.new('program', 'db', "-e 10") assert_equal([ '-Q', '-H', '-m','10', '-e', '10'], fasta.options) end def test_option fasta = Bio::Fasta.new('program', 'db', ["-e", "10"]) assert_equal([ '-Q', '-H', '-m','10', '-e', '10'], fasta.options) end end class TestFasta < Test::Unit::TestCase def setup program = 'ssearch' db = 'nr' option = ['-e', '10'] @obj = Bio::Fasta.new(program, db, option) end def test_program assert_equal('ssearch', @obj.program) @obj.program = 'lalign' assert_equal('lalign', @obj.program) end def test_db assert_equal('nr', @obj.db) @obj.db = 'refseq' assert_equal('refseq', @obj.db) end def test_options assert_equal(["-Q", "-H", "-m", "10", "-e", "10"], @obj.options) @obj.options = ['-Q', '-H', '-m', '8'] assert_equal(['-Q', '-H', '-m', '8'], @obj.options) end def test_server assert_equal('local', @obj.server) @obj.server = 'genomenet' assert_equal('genomenet', @obj.server) end def test_ktup assert_equal(nil, @obj.ktup) @obj.ktup = 6 assert_equal(6, @obj.ktup) end def test_matrix assert_equal(nil, @obj.matrix) @obj.matrix = 'PAM120' assert_equal('PAM120', @obj.matrix) end def test_output assert_equal('', @obj.output) # assert_raise(NoMethodError) { @obj.output = "" } end def test_option option = ['-M'].join(' ') assert(@obj.option = option) assert_equal(option, @obj.option) end def test_format assert_equal(10, @obj.format) end def test_format_arg_str assert(@obj.format = '1') assert_equal(1, @obj.format) end def test_format_arg_integer assert(@obj.format = 2) assert_equal(2, @obj.format) end end class TestFastaQuery < Test::Unit::TestCase def test_self_parser end def test_self_local # test/functional/bio/test_fasta.rb end def test_self_remote # test/functional/bio/test_fasta.rb end def test_query end end end bio-1.4.3.0001/test/unit/bio/appl/clustalw/0000755000004100000410000000000012200110570020164 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/clustalw/test_report.rb0000644000004100000410000001037712200110570023073 0ustar www-datawww-data# # test/unit/bio/appl/clustalw/test_report.rb - Unit test for Bio::ClustalW::Report # # Copyright:: Copyright (C) 2010 Pjotr Prins # License:: The Ruby License # require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s require 'test/unit' require 'bio/appl/clustalw/report' module Bio class TestClustalWReport < Test::Unit::TestCase def setup test_data_path = Pathname.new(File.join(BioRubyTestDataPath, 'clustalw')).cleanpath.to_s aln_filename = File.join(test_data_path, 'example1.aln') text = File.read(aln_filename) @aln = Bio::ClustalW::Report.new(text) end # CLUSTAL 2.0.9 multiple sequence alignment # # # The alignment reads like: # # query -MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQ # gi|115023|sp|P10425| MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQ # .: : # # query LNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIE # gi|115023|sp|P10425| LNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIE # *: . . **. . .. ::*: . * : def test_header assert_equal('CLUSTAL 2.0.9 multiple sequence alignment',@aln.header) end def test_sequences seq = @aln.get_sequence(0) assert_equal('query',seq.definition) assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s) seq = @aln.get_sequence(1) assert_equal('gi|115023|sp|P10425|',seq.definition) assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s) end def test_alignment assert_equal("???????????SN?????????????D??????????L??????????????????H?H?D",@aln.alignment.consensus[60..120]) end def test_match_line assert_equal(" .: : *: . . **. . .. ::*: . * : : . .: .* * * * : * . : . . * : .: . .: .*: ::***:* .:* .* :: . . ::.: * : . " ,@aln.match_line) end end # class TestClustalwFormat class TestClustalWReportWith2ndArgument < Test::Unit::TestCase def setup aln_filename = File.join(BioRubyTestDataPath, 'clustalw', 'example1.aln') text = File.read(aln_filename) @aln = Bio::ClustalW::Report.new(text, "PROTEIN") end def test_sequences seq = @aln.get_sequence(0) assert_equal('query',seq.definition) assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s) seq = @aln.get_sequence(1) assert_equal('gi|115023|sp|P10425|',seq.definition) assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s) end end #class TestClustalWReportWith2ndArgument end bio-1.4.3.0001/test/unit/bio/appl/iprscan/0000755000004100000410000000000012200110570017765 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/iprscan/test_report.rb0000644000004100000410000002211212200110570022662 0ustar www-datawww-data# # test/unit/bio/appl/iprscan/test_report.rb - Unit test for Bio::InterProScan::Report # # Copyright (C) 2006 Mitsuteru Nakao # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/iprscan/report' module Bio class TestIprscanData TestDataIprscan = Pathname.new(File.join(BioRubyTestDataPath, "iprscan")).cleanpath.to_s def self.raw_format File.open(File.join(TestDataIprscan, "merged.raw")) end def self.txt_format File.open(File.join(TestDataIprscan, "merged.txt")) end end class TestIprscanPTxtReport < Test::Unit::TestCase def setup test_entry=<<-END slr0002\t860 InterPro\tIPR001264\tGlycosyl transferase, family 51 BlastProDom\tPD001895\tsp_Q55683_SYNY3_Q55683\t2e-37\t292-370 HMMPfam\tPF00912\tTransglycosyl\t8e-104\t204-372 InterPro\tIPR001460\tPenicillin-binding protein, transpeptidase domain HMMPfam\tPF00905\tTranspeptidase\t5.7e-30\t451-742 InterPro\tNULL\tNULL ProfileScan\tPS50310\tALA_RICH\t10.224\t805-856 // END @obj = Bio::Iprscan::Report.parse_ptxt_entry(test_entry) end def test_query_id assert_equal('slr0002', @obj.query_id) end def test_query_length assert_equal(860, @obj.query_length) end def test_matches_size assert_equal(4, @obj.matches.size) end def test_match_ipr_id assert_equal('IPR001264', @obj.matches.first.ipr_id) end def test_match_ipr_description assert_equal('Glycosyl transferase, family 51', @obj.matches.first.ipr_description) end def test_match_method assert_equal('BlastProDom', @obj.matches.first.method_name) end def test_match_accession assert_equal('PD001895', @obj.matches.first.accession) end def test_match_description assert_equal('sp_Q55683_SYNY3_Q55683', @obj.matches.first.description) end def test_match_evalue assert_equal('2e-37', @obj.matches.first.evalue) end def test_match_match_start assert_equal(292, @obj.matches.first.match_start) end def test_match_match_end assert_equal(370, @obj.matches.first.match_end) end end # TestIprscanPTxtReport class TestIprscanTxtEntry < Test::Unit::TestCase def setup test_txt = Bio::TestIprscanData.txt_format.read.split(/\n\nSequence/)[0] @obj = Bio::Iprscan::Report.parse_txt_entry(test_txt) end def test_iprscan_report_class assert_equal(Bio::Iprscan::Report, @obj.class) end def test_query_id assert_equal('Q9RHD9', @obj.query_id) end def test_query_length assert_equal(267, @obj.query_length) end def test_matches_size assert_equal(16, @obj.matches.size) end def test_match_ipr_id assert_equal('IPR000110', @obj.matches.first.ipr_id) end def test_match_ipr_description assert_equal('Ribosomal protein S1', @obj.matches.first.ipr_description) end def test_match_method assert_equal('FPrintScan', @obj.matches.first.method_name) end def test_match_accession assert_equal('PR00681', @obj.matches.first.accession) end def test_match_description assert_equal('RIBOSOMALS1', @obj.matches.first.description) end def test_match_evalue assert_equal('1.5e-17', @obj.matches.first.evalue) end def test_match_status assert_equal('T', @obj.matches.first.status) end def test_match_date assert_equal(nil, @obj.matches.first.date) end def test_match_match_start assert_equal(6, @obj.matches.first.match_start) end def test_match_match_end assert_equal(27, @obj.matches.first.match_end) end def test_match_go_terms ary = [["Molecular Function", "RNA binding", "GO:0003723"], ["Molecular Function", "structural constituent of ribosome", "GO:0003735"], ["Cellular Component", "ribosome", "GO:0005840"], ["Biological Process", "protein biosynthesis", "GO:0006412"]] assert_equal(ary, @obj.matches.first.go_terms) end end # TestIprscanTxtEntry class TestIprscanTxtEntryList < Test::Unit::TestCase def setup test_txt = Bio::TestIprscanData.txt_format.read.split(/\n\nSequence/)[0] @obj = Bio::Iprscan::Report.parse_txt_entry(test_txt) end def test_to_hash hsh = {"IPR008994" => [12, 13, 14].map {|x| @obj.matches[x] }, "IPR000110" => [0, 1, 2].map {|x| @obj.matches[x] }, "IPR003029" => [3, 4, 5, 6, 7, 8, 9, 10, 11].map {|x| @obj.matches[x] }, "NULL" => [15].map {|x| @obj.matches[x] }} assert_equal(hsh.keys.sort, @obj.to_hash.keys.sort) assert_equal(hsh, @obj.to_hash) end def test_to_hash_match? @obj.to_hash.each do |ipr_id, matches| matches.each do |match| assert_equal(ipr_id, match.ipr_id) end end end end # TestIprscanTxtEntryList class TestIprscanTxtReport < Test::Unit::TestCase def setup @test_txt = Bio::TestIprscanData.txt_format end def test_parse_txt Bio::Iprscan::Report.parse_txt(@test_txt) do |report| assert_equal(Bio::Iprscan::Report, report.class) end end end # TestIprscanTxtReport class TestIprscanRawReport < Test::Unit::TestCase def setup test_raw = Bio::TestIprscanData.raw_format entry = '' @obj = [] while line = test_raw.gets if entry.split("\t").first == line.split("\t").first entry << line elsif entry != '' and entry.split("\t").first != line.split("\t").first @obj << Bio::Iprscan::Report.parse_raw_entry(entry) entry = '' else entry << line end end @obj << Bio::Iprscan::Report.parse_raw_entry(entry) end def test_self_reports_in_raw io = File.open(File.join(Bio::TestIprscanData::TestDataIprscan, "merged.raw")) result = [] Bio::Iprscan::Report.parse_raw(io) {|x| result << x } assert_equal(@obj.size, result.size) assert_equal(@obj.first.query_id, result.first.query_id) assert_equal(@obj.first.query_id, result.first.query_id) assert_equal(@obj[2].query_id, result[2].query_id) assert_equal(@obj.last.query_id, result.last.query_id) end def test_obj assert_equal(3, @obj.size) end def test_query_id assert_equal('Q9RHD9', @obj.first.query_id) end def test_entry_id assert_equal('Q9RHD9', @obj.first.entry_id) end def test_query_length assert_equal(267, @obj.first.query_length) end def test_match_query_id assert_equal('Q9RHD9', @obj.first.matches.first.query_id) end def test_match_crc64 assert_equal('D44DAE8C544CB7C1', @obj.first.matches.first.crc64) end def test_match_query_length assert_equal(267, @obj.first.matches.first.query_length) end def test_match_method assert_equal('HMMPfam', @obj.first.matches.first.method_name) end def test_match_accession assert_equal('PF00575', @obj.first.matches.first.accession) end def test_match_description assert_equal('S1', @obj.first.matches.first.description) end def test_match_match_start assert_equal(1, @obj.first.matches.first.match_start) end def test_match_match_end assert_equal(55, @obj.first.matches.first.match_end) end def test_match_evalue assert_equal('3.3E-6', @obj.first.matches.first.evalue) end def test_match_status assert_equal('T', @obj.first.matches.first.status) end def test_match_date assert_equal('11-Nov-2005', @obj.first.matches.first.date) end def test_match_ipr_id assert_equal('IPR003029', @obj.first.matches.first.ipr_id) end def test_match_ipr_description assert_equal('RNA binding S1', @obj.first.matches.first.ipr_description) end def test_match_go_terms ary = ["Biological Process:phosphorylation (GO:0016310)", "Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772)"] assert_equal(ary, @obj.last.matches.last.go_terms) end end class TestIprscanReport < Test::Unit::TestCase def setup @test_txt = Bio::TestIprscanData.txt_format.read.split(/\n\nSequence/)[0] @obj = Bio::Iprscan::Report.parse_txt_entry(@test_txt) @test_raw = Bio::TestIprscanData.raw_format.read.split("RS16_ECOLI")[0] end def test_to_raw # assert_equal(@test_raw.split("\n").sort, # @obj.format_raw.split("\n").sort) end def test_output_raw # assert_equal(@test_raw.split("\n").sort, # @obj.output(:raw).split("\n").sort) # assert_equal(@test_raw.split("\n").sort, # @obj.output('raw').split("\n").sort) end end # TestIprscanReport end bio-1.4.3.0001/test/unit/bio/appl/genscan/0000755000004100000410000000000012200110570017744 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/genscan/test_report.rb0000644000004100000410000001154112200110570022645 0ustar www-datawww-data# # test/unit/bio/appl/genscan/test_report.rb - Unit test for Bio::Genscan::Report # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/genscan/report' module Bio class TestGenscanReport < Test::Unit::TestCase def setup test_data = Pathname.new(File.join(BioRubyTestDataPath, 'genscan')).cleanpath.to_s report = File.open(File.join(test_data, 'sample.report')).read @obj = Bio::Genscan::Report.new(report) end def test_genscan_version assert_equal('1.0', @obj.genscan_version) end def test_date_run assert_equal('30-May-103', @obj.date_run) end def test_time assert_equal("14:06:28", @obj.time) end def test_query_name assert_equal('HUMRASH', @obj.query_name) end def test_length assert_equal(12942, @obj.length) end def test_gccontent assert_equal(68.17, @obj.gccontent) end def test_isochore assert_equal('4 (57 - 100 C+G%)', @obj.isochore) end def test_matrix assert_equal('HumanIso.smat', @obj.matrix) end def test_predictions_size assert_equal(2, @obj.predictions.size) end end # TestGenscanReport class TestGenscanReportGene < Test::Unit::TestCase def setup test_data = Pathname.new(File.join(BioRubyTestDataPath, 'genscan')).cleanpath.to_s report = File.open(File.join(test_data, 'sample.report')).read @obj = Bio::Genscan::Report.new(report).predictions end def test_number assert_equal(1, @obj.first.number) end def test_aaseq assert_equal(Bio::FastaFormat, @obj.first.aaseq.class) seq = "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMSCKCVLS" assert_equal(seq, @obj.first.aaseq.seq) definition = "HUMRASH|GENSCAN_predicted_peptide_1|189_aa" assert_equal(definition, @obj.first.aaseq.definition) end def test_naseq assert_equal(Bio::FastaFormat, @obj.first.naseq.class) seq = "atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgaccatccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctaccggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggccaggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgtgtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatcaaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctggctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccctacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtgcgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggctgcatgagctgcaagtgtgtgctctcctga" assert_equal(seq, @obj.first.naseq.seq) definition = "HUMRASH|GENSCAN_predicted_CDS_1|570_bp" assert_equal(definition, @obj.first.naseq.definition) end def test_promoter assert_equal(Bio::Genscan::Report::Exon, @obj.last.promoter.class) assert_equal("Prom", @obj.last.promoter.exon_type) end def test_polyA assert_equal(Bio::Genscan::Report::Exon, @obj.first.polyA.class) assert_equal('PlyA', @obj.first.polyA.exon_type) end end # TestGenscanReportGene class TestGenscanReportExon < Test::Unit::TestCase def setup test_data = Pathname.new(File.join(BioRubyTestDataPath, 'genscan')).cleanpath.to_s report = File.open(File.join(test_data, 'sample.report')).read @obj = Bio::Genscan::Report.new(report).predictions.first.exons.first end def test_number assert_equal(1, @obj.number) end def test_exon_type assert_equal('Init', @obj.exon_type) end def test_exon_type_long assert_equal('Initial exon', @obj.exon_type_long) end def test_strand assert_equal('+', @obj.strand) end def test_first assert_equal(1664, @obj.first) end def test_last assert_equal(1774, @obj.last) end def test_range assert_equal(1664..1774, @obj.range) end def test_phase assert_equal('0', @obj.phase) end def test_acceptor_score assert_equal(94, @obj.acceptor_score) end def test_donor_score assert_equal(83, @obj.donor_score) end def test_initiation_score assert_equal(94, @obj.initiation_score) end def test_termination_score assert_equal(83, @obj.termination_score) end def test_score assert_equal(212, @obj.score) end def test_p_value assert_equal(0.997, @obj.p_value) end def test_t_score assert_equal(21.33, @obj.t_score) end end # TestGenscanReportExon end bio-1.4.3.0001/test/unit/bio/appl/targetp/0000755000004100000410000000000012200110570017774 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/targetp/test_report.rb0000644000004100000410000000734112200110570022700 0ustar www-datawww-data# # test/unit/bio/appl/targetp/test_report.rb - Unit test for Bio::TargetP::Report # # Copyright: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/targetp/report' module Bio TargetPReport_plant =<"MGI_2141503", "Loc."=>"_", "RC"=>3, "SP"=>0.271, "other"=>0.844, "mTP"=>0.161, "cTP"=>0.031, "Length"=>640} assert_equal(hash, @obj.pred) assert_equal(hash, @obj.prediction) end def test_cutoff hash = {"SP"=>0.0, "other"=>0.0, "mTP"=>0.0, "cTP"=>0.0} assert_equal(hash, @obj.cutoff) end def test_entry_id assert_equal('MGI_2141503', @obj.entry_id) end def test_name assert_equal('MGI_2141503', @obj.name) end def test_query_len assert_equal(640, @obj.query_len) end def test_length assert_equal(640, @obj.length) end def test_loc assert_equal('_', @obj.loc) end def test_rc assert_equal(3, @obj.rc) end end # class TestTargetPReport end bio-1.4.3.0001/test/unit/bio/appl/tmhmm/0000755000004100000410000000000012200110570017450 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/tmhmm/test_report.rb0000644000004100000410000000632112200110570022351 0ustar www-datawww-data# # test/unit/bio/appl/tmhmm/test_report.rb - Unit test for Bio::TMHMM::Report # # Copyright:: Copyright (C) 2005 Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/tmhmm/report' module Bio test_data = Pathname.new(File.join(BioRubyTestDataPath, 'TMHMM')).cleanpath.to_s TMHMMReport = File.open(File.join(test_data, 'sample.report')).read class TestTMHMMReport_reports < Test::Unit::TestCase def test_reports assert(Bio::TMHMM.reports("")) end end class TestTMHMMReport < Test::Unit::TestCase def setup @obj = Bio::TMHMM::Report.new(TMHMMReport) end def test_entry_id assert_equal('O42385', @obj.entry_id) end def test_query_len assert_equal(423, @obj.query_len) end def test_predicted_tmhs assert_equal(7, @obj.predicted_tmhs) end def test_tmhs assert_equal(Array, @obj.tmhs.class) assert_equal(15, @obj.tmhs.size) end def test_exp_aas_in_tmhs assert_equal(157.40784, @obj.exp_aas_in_tmhs) end def test_exp_first_60aa assert_equal(13.85627, @obj.exp_first_60aa) end def test_total_prob_of_N_in assert_equal(0.00993, @obj.total_prob_of_N_in) end def test_helix assert_equal(7, @obj.helix.size) assert_equal(Bio::TMHMM::TMH, @obj.helix[0].class) end def test_to_s str = ["# O42385\tLength:\t423", "# O42385\tNumber of predicted TMHs:\t7", "# O42385\tExp number of AAs in THMs:\t157.40784", "# O42385\tExp number, first 60 AAs:\t13.85627", "# O42385\tTotal prob of N-in:\t0.00993", "O42385\tTMHMM2.0\toutside\t1\t46", "O42385\tTMHMM2.0\tTMhelix\t47\t69", "O42385\tTMHMM2.0\tinside\t70\t81", "O42385\tTMHMM2.0\tTMhelix\t82\t104", "O42385\tTMHMM2.0\toutside\t105\t118", "O42385\tTMHMM2.0\tTMhelix\t119\t141", "O42385\tTMHMM2.0\tinside\t142\t161", "O42385\tTMHMM2.0\tTMhelix\t162\t184", "O42385\tTMHMM2.0\toutside\t185\t205", "O42385\tTMHMM2.0\tTMhelix\t206\t228", "O42385\tTMHMM2.0\tinside\t229\t348", "O42385\tTMHMM2.0\tTMhelix\t349\t371", "O42385\tTMHMM2.0\toutside\t372\t380", "O42385\tTMHMM2.0\tTMhelix\t381\t403", "O42385\tTMHMM2.0\tinside\t404\t423"].join("\n") assert_equal(str, @obj.to_s) end end # TestTMHMMReport class TestTMHMMTMH < Test::Unit::TestCase def setup @obj = Bio::TMHMM::Report.new(TMHMMReport).tmhs.first end def test_entry_id assert_equal('O42385', @obj.entry_id) end def test_version assert_equal('TMHMM2.0', @obj.version) end def test_status assert_equal('outside', @obj.status) end def test_range assert_equal(1..46, @obj.range) end def test_pos assert_equal(1..46, @obj.pos) end end # class TestTMHMMTMH end bio-1.4.3.0001/test/unit/bio/appl/blast/0000755000004100000410000000000012200110570017433 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/blast/test_report.rb0000644000004100000410000011242512200110570022337 0ustar www-datawww-data# # test/unit/bio/appl/blast/test_report.rb - Unit test for Bio::Blast::Report # # Copyright:: Copyright (C) 2005, 2008 # Mitsuteru Nakao , # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/blast/report' module Bio module TestBlastReportHelper TestDataBlast = Pathname.new(File.join(BioRubyTestDataPath, 'blast')).cleanpath.to_s private def get_input_data(basename = 'b0002.faa') File.open(File.join(TestDataBlast, basename)).read end def get_output_data(basename = 'b0002.faa', format = 7) fn = basename + ".m#{format.to_i}" # available filenames: # 'b0002.faa.m0' # 'b0002.faa.m7' # 'b0002.faa.m8' File.open(File.join(TestDataBlast, fn)).read end def create_report_object(basename = 'b0002.faa') case self.class.name.to_s when /XMLParser/i text = get_output_data(basename, 7) Bio::Blast::Report.new(text, :xmlparser) when /REXML/i text = get_output_data(basename, 7) Bio::Blast::Report.new(text, :rexml) when /Default/i text = get_output_data(basename, 0) Bio::Blast::Default::Report.new(text) when /Tab/i text = get_output_data(basename, 8) Bio::Blast::Report.new(text) else text = get_output_data(basename, 7) Bio::Blast::Report.new(text) end end end #module TestBlastReportHelper class TestBlastReport < Test::Unit::TestCase include TestBlastReportHelper def setup @report = create_report_object end def test_iterations assert(@report.iterations) end def test_parameters assert_equal('BLOSUM62', @report.parameters['matrix']) assert_equal(10, @report.parameters['expect']) assert_equal(11, @report.parameters['gap-open']) assert_equal(1, @report.parameters['gap-extend']) assert_equal('S', @report.parameters['filter']) end def test_program assert_equal('blastp', @report.program) end def test_version assert_equal('blastp 2.2.10 [Oct-19-2004]', @report.version) end def test_reference xml_quoted_str = "~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402." text_str = '~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.' # assert_equal(xml_quoted_str, @report.reference) assert_equal(text_str, @report.reference) end def test_db assert_equal('b0002.faa', @report.db) end def test_query_id assert_equal('lcl|QUERY', @report.query_id) end def test_query_def assert_equal('eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)', @report.query_def) end def test_query_len assert_equal(820, @report.query_len) end def test_matrix assert_equal('BLOSUM62', @report.matrix) end def test_expect assert_equal(10, @report.expect) end def test_inclusion assert_nothing_raised { @report.inclusion } end def test_sc_match assert_nothing_raised { @report.sc_match } end def test_sc_mismatch assert_nothing_raised { @report.sc_mismatch } end def test_gap_open assert_equal(11, @report.gap_open) end def test_gap_extend assert_equal(1, @report.gap_extend) end def test_filter assert_equal('S', @report.filter) end def test_pattern assert_equal(nil, @report.pattern) end def test_entrez_query assert_equal(nil, @report.entrez_query) end def test_each_iteration assert_nothing_raised { @report.each_iteration { |itr| } } end def test_each_hit assert_nothing_raised { @report.each_hit { |hit| } } end def test_hits assert(@report.hits) end def test_statistics assert_equal({"kappa"=>0.041, "db-num"=>1, "eff-space"=>605284.0, "hsp-len"=>42, "db-len"=>820, "lambda"=>0.267, "entropy"=>0.14}, @report.statistics) end def test_db_num assert_equal(1, @report.db_num) end def test_db_len assert_equal(820, @report.db_len) end def test_hsp_len assert_equal(42, @report.hsp_len) end def test_eff_space assert_equal(605284, @report.eff_space) end def test_kappa assert_equal(0.041, @report.kappa) end def test_lambda assert_equal(0.267, @report.lambda) end def test_entropy assert_equal(0.14, @report.entropy) end def test_message assert_equal(nil, @report.message) end end class TestBlastReportIteration < Test::Unit::TestCase include TestBlastReportHelper def setup report = create_report_object @itr = report.iterations.first end def test_hits assert(@itr.hits) end def test_statistics stat = {"kappa" => 0.041, "eff-space" => 605284, "db-num" => 1, "hsp-len" => 42, "db-len" => 820, "lambda" => 0.267, "entropy" => 0.14} assert_equal(stat, @itr.statistics) end def test_num assert_equal(1, @itr.num) end def test_message assert_equal(nil, @itr.message) end end class TestBlastReportHit < Test::Unit::TestCase include TestBlastReportHelper def setup report = create_report_object @hit = report.hits.first end def test_Hit_hsps assert(@hit.hsps) end def test_Hit_query_id assert_equal('lcl|QUERY', @hit.query_id) end def test_Hit_query_def assert_equal('eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)', @hit.query_def) end def test_Hit_query_len assert_equal(820, @hit.query_len) end def test_Hit_num assert(@hit.num) end def test_Hit_hit_id assert_equal('gnl|BL_ORD_ID|0', @hit.hit_id) end def test_Hit_len assert_equal(820, @hit.len) end def test_Hit_target_len assert_equal(820, @hit.target_len) end def test_Hit_definition assert(@hit.definition) end def test_Hit_taeget_def assert(@hit.target_def) end def test_Hit_accession assert(@hit.accession) end def test_Hit_target_id assert(@hit.target_id) end def test_Hit_evalue assert_equal(0, @hit.evalue) end def test_Hit_bit_score assert_equal(1567.75, @hit.bit_score) end def test_Hit_identity assert_equal(820, @hit.identity) end def test_Hit_overlap assert_equal(820, @hit.overlap) end def test_Hit_query_seq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(seq, @hit.query_seq) end def test_Hit_target_seq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(seq, @hit.target_seq) end def test_Hit_midline seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(seq, @hit.midline) end def test_Hit_query_start assert_equal(1, @hit.query_start) # assert_equal(1, @hit.query_from) end def test_Hit_query_end assert_equal(820, @hit.query_end) # assert_equal(820, @hit.query_to) end def test_Hit_target_start assert_equal(1, @hit.target_start) # assert_equal(1, @hit.hit_from) end def test_Hit_target_end assert_equal(820, @hit.target_end) # assert_equal(820, @hit.hit_to) end def test_Hit_lap_at assert_equal([1, 820, 1, 820], @hit.lap_at) end end class TestBlastReportHsp < Test::Unit::TestCase include TestBlastReportHelper def setup report = create_report_object @hsp = report.hits.first.hsps.first end def test_Hsp_num assert_equal(1, @hsp.num) end def test_Hsp_bit_score assert_equal(1567.75, @hsp.bit_score) end def test_Hsp_score assert_equal(4058, @hsp.score) end def test_Hsp_evalue assert_equal(0, @hsp.evalue) end def test_Hsp_identity assert_equal(820, @hsp.identity) end def test_Hsp_gaps assert_nothing_raised { @hsp.gaps } end def test_Hsp_positive assert_equal(820, @hsp.positive) end def test_Hsp_align_len assert_equal(820, @hsp.align_len) end def test_Hsp_density assert(@hsp.density) end def test_Hsp_query_frame assert_equal(1, @hsp.query_frame) end def test_Hsp_query_from assert_equal(1, @hsp.query_from) end def test_Hsp_query_to assert_equal(820, @hsp.query_to) end def test_Hsp_hit_frame assert_equal(1, @hsp.hit_frame) end def test_Hsp_hit_from assert_equal(1, @hsp.hit_from) end def test_Hsp_hit_to assert_equal(820, @hsp.hit_to) end def test_Hsp_pattern_from assert_nothing_raised { @hsp.pattern_from } end def test_Hsp_pattern_to assert_nothing_raised { @hsp.pattern_to } end def test_Hsp_qseq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(seq, @hsp.qseq) end def test_Hsp_midline seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(seq, @hsp.midline) end def test_Hsp_hseq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(seq, @hsp.hseq) end def test_Hsp_percent_identity assert_nothing_raised { @hsp.percent_identity } end def test_Hsp_mismatch_count assert_nothing_raised { @hsp.mismatch_count } end end class TestBlastReportREXML < TestBlastReport end class TestBlastReportIterationREXML < TestBlastReportIteration end class TestBlastReportHitREXML < TestBlastReportHit end class TestBlastReportHspREXML < TestBlastReportHsp end if defined? XMLParser then class TestBlastReportXMLParser < TestBlastReport end class TestBlastReportIterationXMLParser < TestBlastReportIteration end class TestBlastReportHitXMLParser < TestBlastReportHit end class TestBlastReportHspXMLParser < TestBlastReportHsp end end #if defined? XMLParser class TestBlastReportDefault < TestBlastReport undef test_entrez_query undef test_filter undef test_hsp_len undef test_inclusion undef test_parameters undef test_query_id undef test_statistics def test_program assert_equal('BLASTP', @report.program) end def test_reference text_str = 'Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.' assert_equal(text_str, @report.reference) end def test_version assert_equal('BLASTP 2.2.10 [Oct-19-2004]', @report.version) end def test_kappa assert_equal(0.134, @report.kappa) end def test_lambda assert_equal(0.319, @report.lambda) end def test_entropy assert_equal(0.383, @report.entropy) end def test_gapped_kappa assert_equal(0.0410, @report.gapped_kappa) end def test_gapped_lambda assert_equal(0.267, @report.gapped_lambda) end def test_gapped_entropy assert_equal(0.140, @report.gapped_entropy) end end class TestBlastReportIterationDefault < TestBlastReportIteration undef test_statistics end class TestBlastReportHitDefault < TestBlastReportHit undef test_Hit_accession undef test_Hit_hit_id undef test_Hit_num undef test_Hit_query_def undef test_Hit_query_id undef test_Hit_query_len def setup @filtered_query_sequence = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQxxxxxxxxxxxxxxALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' super end def test_Hit_bit_score # differs from XML because of truncation in the default format assert_equal(1567.0, @hit.bit_score) end def test_Hit_identity # differs from XML because filtered residues are not counted in the # default format assert_equal(806, @hit.identity) end def test_Hit_midline # differs from XML because filtered residues are not specified in XML seq = @filtered_query_sequence.gsub(/x/, ' ') assert_equal(seq, @hit.midline) end def test_Hit_query_seq # differs from XML because filtered residues are not specified in XML seq = @filtered_query_sequence.gsub(/x/, 'X') assert_equal(seq, @hit.query_seq) end end class TestBlastReportHspDefault < TestBlastReportHsp undef test_Hsp_density undef test_Hsp_mismatch_count undef test_Hsp_num undef test_Hsp_pattern_from undef test_Hsp_pattern_to def setup @filtered_query_sequence = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQxxxxxxxxxxxxxxALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' super end def test_Hsp_identity # differs from XML because filtered residues are not counted in the # default format assert_equal(806, @hsp.identity) end def test_Hsp_positive # differs from XML because filtered residues are not counted in the # default format assert_equal(806, @hsp.positive) end def test_Hsp_midline # differs from XML because filtered residues are not specified in XML seq = @filtered_query_sequence.gsub(/x/, ' ') assert_equal(seq, @hsp.midline) end def test_Hsp_qseq # differs from XML because filtered residues are not specified in XML seq = @filtered_query_sequence.gsub(/x/, 'X') assert_equal(seq, @hsp.qseq) end def test_Hsp_bit_score # differs from XML because of truncation in the default format assert_equal(1567.0, @hsp.bit_score) end def test_Hsp_hit_frame # differs from XML because not available in the default BLASTP format assert_equal(nil, @hsp.hit_frame) end def test_Hsp_query_frame # differs from XML because not available in the default BLASTP format assert_equal(nil, @hsp.query_frame) end end module TestBlastReportTabularHelper def def_test_assert_nil(test_method) str = test_method.to_s name = str.sub(/test(\_(Hit|Hsp))?\_/, '') method = name.intern instance = case self.to_s when /Iteration/ :@itr when /Hit/ :@hit when /Hsp/ :@hsp else :@report end define_method(test_method) do assert_nil(instance_variable_get(instance).__send__(method)) end end end #module class TestBlastReportTabular < TestBlastReport extend TestBlastReportTabularHelper def_test_assert_nil :test_db def_test_assert_nil :test_db_len def_test_assert_nil :test_db_num def_test_assert_nil :test_eff_space def_test_assert_nil :test_entropy def_test_assert_nil :test_expect def_test_assert_nil :test_filter def_test_assert_nil :test_gap_extend def_test_assert_nil :test_gap_open def_test_assert_nil :test_hsp_len def_test_assert_nil :test_kappa def_test_assert_nil :test_lambda def_test_assert_nil :test_matrix def_test_assert_nil :test_program def_test_assert_nil :test_query_len def_test_assert_nil :test_reference def_test_assert_nil :test_version # No parameters information available in the "-m 8" format def test_parameters assert_equal({}, @report.parameters) end def test_query_def # differs from XML because of truncation in the "-m 8" format assert_equal("eco:b0002", @report.query_def) end def test_query_id # differs from XML because of the limited data assert_equal("eco:b0002", @report.query_id) end # No statistics information available in the "-m 8" format def test_statistics assert_equal({}, @report.statistics) end end #class TestBlastReportTabular class TestBlastReportIterationTabular < TestBlastReportIteration # No statistics information available in the "-m 8" format def test_statistics assert_equal({}, @itr.statistics) end end #class TestBlastReportIterationTabular class TestBlastReportHitTabular < TestBlastReportHit extend TestBlastReportTabularHelper def_test_assert_nil :test_Hit_hit_id def_test_assert_nil :test_Hit_identity def_test_assert_nil :test_Hit_len def_test_assert_nil :test_Hit_midline def_test_assert_nil :test_Hit_query_len def_test_assert_nil :test_Hit_query_seq def_test_assert_nil :test_Hit_target_len def_test_assert_nil :test_Hit_target_seq def test_Hit_bit_score # differs from XML because of truncation in the "-m 8" format assert_equal(1567.0, @hit.bit_score) end def test_Hit_query_def # differs from XML because of truncation in the "-m 8" format assert_equal("eco:b0002", @hit.query_def) end def test_Hit_query_id # differs from XML because of the limited data assert_equal("eco:b0002", @hit.query_id) end end #class TestBlastReportHitTabular class TestBlastReportHspTabular < TestBlastReportHsp extend TestBlastReportTabularHelper def_test_assert_nil :test_Hsp_density def_test_assert_nil :test_Hsp_hit_frame def_test_assert_nil :test_Hsp_hseq def_test_assert_nil :test_Hsp_identity def_test_assert_nil :test_Hsp_midline def_test_assert_nil :test_Hsp_positive def_test_assert_nil :test_Hsp_qseq def_test_assert_nil :test_Hsp_query_frame def_test_assert_nil :test_Hsp_score def test_Hsp_bit_score # differs from XML because of truncation in the "-m 8" format assert_equal(1567.0, @hsp.bit_score) end end #class TestBlastReportHspTabular ######################################################################## # Tests for new BLAST XML format (blastall 2.2.14 or later) # with the result of multiple query sequences ######################################################################## class TestBlastReportMulti < Test::Unit::TestCase include TestBlastReportHelper def setup @report = create_report_object('blastp-multi') @overall = [ @report ] + @report.reports end def test_reports assert_equal(5, @report.reports.size) end def test_iterations assert_equal(1, @report.iterations.size) assert_equal([ 1, 1, 1, 1, 1], @report.reports.collect { |x| x.iterations.size }) end def test_parameters @overall.each do |r| assert_equal('BLOSUM62', r.parameters['matrix']) assert_equal(0.001, r.parameters['expect']) assert_equal(11, r.parameters['gap-open']) assert_equal(1, r.parameters['gap-extend']) assert_equal('F', r.parameters['filter']) end end def test_program @overall.each do |r| assert_equal('blastp', r.program) end end def test_version @overall.each do |r| assert_equal('blastp 2.2.18 [Mar-02-2008]', r.version) end end def test_reference text_str = '~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.' @overall.each do |r| assert_equal(text_str, r.reference) end end def test_db @overall.each do |r| assert_equal('BA000007.faa', r.db) end end def test_query_id qids = [ 'lcl|1_0', 'lcl|2_0', 'lcl|3_0', nil, 'lcl|5_0' ] assert_equal(qids[0], @report.query_id) assert_equal(qids, @report.reports.collect { |r| r.query_id }) end def test_query_def qdefs = [ 'gi|1790845|gb|AAC77338.1| predicted DNA-binding transcriptional regulator [Escherichia coli str. K-12 substr. MG1655]', 'gi|1790846|gb|AAC77339.1| lipoate-protein ligase A [Escherichia coli str. K-12', 'gi|1790847|gb|AAC77340.1| conserved protein [Escherichia coli str. K-12 substr. MG1655]', nil, 'gi|1790849|gb|AAC77341.1| 3-phosphoserine phosphatase [Escherichia coli str. K-12 substr. MG1655]' ] assert_equal(qdefs[0], @report.query_def) assert_equal(qdefs, @report.reports.collect { |r| r.query_def }) end def test_query_len qlens = [ 443, 346, 214, nil, 322 ] assert_equal(qlens[0], @report.query_len) assert_equal(qlens, @report.reports.collect { |r| r.query_len }) end def test_matrix @overall.each do |r| assert_equal('BLOSUM62', r.matrix) end end def test_expect @overall.each do |r| assert_equal(0.001, r.expect) end end def test_gap_open @overall.each do |r| assert_equal(11, r.gap_open) end end def test_gap_extend @overall.each do |r| assert_equal(1, r.gap_extend) end end def test_filter @overall.each do |r| assert_equal('F', r.filter) end end def test_pattern @overall.each do |r| assert_equal(nil, r.pattern) end end def test_each_iteration @overall.each do |r| count = 0 assert_nothing_raised { r.each_iteration { |itr| count += 1 } } assert_equal(1, count) end end def test_each_hit @overall.each do |r| assert_nothing_raised { r.each_hit { |hit| } } end end def test_hits hsizes = [ 0, 1, 1, 0, 2 ] assert_equal(hsizes[0], @report.hits.size) assert_equal(hsizes, @report.reports.collect { |r| r.hits.size }) end def test_statistics assert_equal({}, @report.statistics) stat = { "kappa" => 0.041, "eff-space" => 0, "db-num" => 5361, "hsp-len" => 0, "db-len" => 1609188, "lambda" => 0.267, "entropy" => 0.14 } stats = [ {}, stat, stat, {}, stat ] @report.reports.each do |r| assert_equal(stats.shift, r.statistics) end end def test_db_num assert_equal(nil, @report.db_num) ary = [ nil, 5361, 5361, nil, 5361 ] @report.reports.each do |r| assert_equal(ary.shift, r.db_num) end end def test_db_len assert_equal(nil, @report.db_len) ary = [ nil, 1609188, 1609188, nil, 1609188 ] @report.reports.each do |r| assert_equal(ary.shift, r.db_len) end end def test_hsp_len assert_equal(nil, @report.hsp_len) ary = [ nil, 0, 0, nil, 0 ] @report.reports.each do |r| assert_equal(ary.shift, r.hsp_len) end end def test_eff_space assert_equal(nil, @report.eff_space) ary = [ nil, 0, 0, nil, 0 ] @report.reports.each do |r| assert_equal(ary.shift, r.eff_space) end end def test_kappa assert_equal(nil, @report.kappa) ary = [ nil, 0.041, 0.041, nil, 0.041 ] @report.reports.each do |r| assert_equal(ary.shift, r.kappa) end end def test_lambda assert_equal(nil, @report.lambda) ary = [ nil, 0.267, 0.267, nil, 0.267 ] @report.reports.each do |r| assert_equal(ary.shift, r.lambda) end end def test_entropy assert_equal(nil, @report.entropy) ary = [ nil, 0.14, 0.14, nil, 0.14 ] @report.reports.each do |r| assert_equal(ary.shift, r.entropy) end end def test_message @overall.each do |r| assert_equal(nil, r.message) end end end class TestBlastReportIterationMulti < Test::Unit::TestCase include TestBlastReportHelper def setup report = create_report_object('blastp-multi') @itr = report.reports[4].iterations[0] end def test_query_id assert_equal('lcl|5_0', @itr.query_id) end def test_query_def assert_equal('gi|1790849|gb|AAC77341.1| 3-phosphoserine phosphatase [Escherichia coli str. K-12 substr. MG1655]', @itr.query_def) end def test_query_len assert_equal(322, @itr.query_len) end def test_hits assert_equal(2, @itr.hits.size) end def test_each count = 0 assert_nothing_raised { @itr.each { |hit| count += 1 } } assert_equal(2, count) end def test_statistics stat = { "kappa" => 0.041, "eff-space" => 0, "db-num" => 5361, "hsp-len" => 0, "db-len" => 1609188, "lambda" => 0.267, "entropy" => 0.14 } assert_equal(stat, @itr.statistics) end def test_num assert_equal(5, @itr.num) end def test_message assert_equal(nil, @itr.message) end end class TestBlastReportHitMulti < Test::Unit::TestCase include TestBlastReportHelper def setup report = create_report_object('blastp-multi') @hit = report.reports[4].iterations[0].hits[1] end def test_Hit_hsps assert_equal(1, @hit.hsps.size) end def test_Hit_query_id assert_equal('lcl|5_0', @hit.query_id) end def test_Hit_query_def assert_equal('gi|1790849|gb|AAC77341.1| 3-phosphoserine phosphatase [Escherichia coli str. K-12 substr. MG1655]', @hit.query_def) end def test_Hit_query_len assert_equal(322, @hit.query_len) end def test_Hit_num assert_equal(2, @hit.num) end def test_Hit_hit_id assert_equal('gi|13363792|dbj|BAB37741.1|', @hit.hit_id) end def test_Hit_len assert_equal(732, @hit.len) end def test_Hit_target_len assert_equal(732, @hit.target_len) end def test_Hit_definition assert_equal('zinc-transporting ATPase [Escherichia coli O157:H7 str. Sakai]', @hit.definition) end def test_Hit_taeget_def assert_equal('zinc-transporting ATPase [Escherichia coli O157:H7 str. Sakai]', @hit.target_def) end def test_Hit_accession assert_equal('BAB37741', @hit.accession) end def test_Hit_target_id #assert_equal('gi|13363792|dbj|BAB37741.1|', @hit.target_id) assert_equal('BAB37741', @hit.target_id) end def test_Hit_evalue assert_equal(0.000899657, @hit.evalue) end def test_Hit_bit_score assert_equal(38.1206, @hit.bit_score) end def test_Hit_identity assert_equal(39, @hit.identity) end def test_Hit_overlap # alignment length assert_equal(123, @hit.overlap) end def test_Hit_query_seq seq = 'VLKLETLGWKVAIASGGFTFFAEYLRDKLRLTAVVANELEIMDGKFTGNVIGDIVDAQYKAKTLTRLAQEYEIPLAQTVAIGDGANDLPMIKAAGLGIAYHAKPKVN-EKAEVTIRHADLMGV' assert_equal(seq, @hit.query_seq) end def test_Hit_target_seq seq = 'ISELNALGVKGVILTG----------DNPRAAAAIAGELGL---EFKAGLL-----PEDKVKAVTELNQHA--PLAM---VGDGINDAPAMKAAAIGIAMGSGTDVALETADAALTHNHLRGL' assert_equal(seq, @hit.target_seq) end def test_Hit_midline seq = '+ +L LG K I +G D R A +A EL + +F ++ + K K +T L Q PLA +GDG ND P +KAA +GIA + V E A+ + H L G+' assert_equal(seq, @hit.midline) end def test_Hit_query_start assert_equal(190, @hit.query_start) # assert_equal(190, @hit.query_from) end def test_Hit_query_end assert_equal(311, @hit.query_end) # assert_equal(311, @hit.query_to) end def test_Hit_target_start assert_equal(569, @hit.target_start) # assert_equal(569, @hit.hit_from) end def test_Hit_target_end assert_equal(668, @hit.target_end) # assert_equal(668, @hit.hit_to) end def test_Hit_lap_at assert_equal([190, 311, 569, 668], @hit.lap_at) end end class TestBlastReportHspMulti < Test::Unit::TestCase include TestBlastReportHelper def setup report = create_report_object('blastp-multi') @hsp = report.reports[4].iterations[0].hits[1].hsps[0] end def test_Hsp_num assert_equal(1, @hsp.num) end def test_Hsp_bit_score assert_equal(38.1206, @hsp.bit_score) end def test_Hsp_score assert_equal(87, @hsp.score) end def test_Hsp_evalue assert_equal(0.000899657, @hsp.evalue) end def test_Hsp_identity assert_equal(39, @hsp.identity) end def test_Hsp_gaps assert_equal(24, @hsp.gaps) end def test_Hsp_positive assert_equal(56, @hsp.positive) end def test_Hsp_align_len assert_equal(123, @hsp.align_len) end def test_Hsp_density assert_nothing_raised { @hsp.density } end def test_Hsp_query_frame assert_equal(1, @hsp.query_frame) end def test_Hsp_query_from assert_equal(190, @hsp.query_from) end def test_Hsp_query_to assert_equal(311, @hsp.query_to) end def test_Hsp_hit_frame assert_equal(1, @hsp.hit_frame) end def test_Hsp_hit_from assert_equal(569, @hsp.hit_from) end def test_Hsp_hit_to assert_equal(668, @hsp.hit_to) end def test_Hsp_pattern_from assert_nothing_raised { @hsp.pattern_from } end def test_Hsp_pattern_to assert_nothing_raised { @hsp.pattern_to } end def test_Hsp_qseq seq = 'VLKLETLGWKVAIASGGFTFFAEYLRDKLRLTAVVANELEIMDGKFTGNVIGDIVDAQYKAKTLTRLAQEYEIPLAQTVAIGDGANDLPMIKAAGLGIAYHAKPKVN-EKAEVTIRHADLMGV' assert_equal(seq, @hsp.qseq) end def test_Hsp_midline seq = '+ +L LG K I +G D R A +A EL + +F ++ + K K +T L Q PLA +GDG ND P +KAA +GIA + V E A+ + H L G+' assert_equal(seq, @hsp.midline) end def test_Hsp_hseq seq = 'ISELNALGVKGVILTG----------DNPRAAAAIAGELGL---EFKAGLL-----PEDKVKAVTELNQHA--PLAM---VGDGINDAPAMKAAAIGIAMGSGTDVALETADAALTHNHLRGL' assert_equal(seq, @hsp.hseq) end def test_Hsp_percent_identity assert_nothing_raised { @hsp.percent_identity } end def test_Hsp_mismatch_count assert_nothing_raised { @hsp.mismatch_count } end end # Tests for REXML version class TestBlastReportMultiREXML < TestBlastReportMulti end class TestBlastReportIterationMultiREXML < TestBlastReportIterationMulti end class TestBlastReportHitMultiREXML < TestBlastReportHitMulti end class TestBlastReportHspMultiREXML < TestBlastReportHspMulti end # Tests for XMLParser version if defined? XMLParser then class TestBlastReportMultiXMLParser < TestBlastReportMulti end class TestBlastReportIterationMultiXMLParser < TestBlastReportIterationMulti end class TestBlastReportHitMultiXMLParser < TestBlastReportHitMulti end class TestBlastReportHspMultiXMLParser < TestBlastReportHspMulti end end #if defined? XMLParser end # module Bio bio-1.4.3.0001/test/unit/bio/appl/blast/test_rpsblast.rb0000644000004100000410000003421412200110570022655 0ustar www-datawww-data# # test/unit/bio/appl/blast/test_rpsblast.rb - Unit test for Bio::Blast::RPSBlast::Report # # Copyright:: Copyright (C) 2008 # Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'digest/sha1' require 'bio/io/flatfile' require 'bio/appl/blast/rpsblast' module Bio module TestRPSBlast TestFileName = Pathname.new(File.join(BioRubyTestDataPath, 'rpsblast', 'misc.rpsblast')).cleanpath.to_s class TestRPSBlastSplitter < Test::Unit::TestCase def setup @io = File.open(TestFileName) @io.binmode @bstream = Bio::FlatFile::BufferedInputStream.new(@io, TestFileName) @klass = Bio::Blast::RPSBlast::Report @splitter = Bio::Blast::RPSBlast::RPSBlastSplitter.new(@klass, @bstream) end def teardown @io.close end def test_skip_leader assert_equal(nil, @splitter.skip_leader) assert_equal(0, @bstream.pos) # force to push back white spaces @bstream.ungets(" \n\n \t\t \n") assert_equal(nil, @splitter.skip_leader) assert_equal("RPS-BLAST 2.2.18 [Mar-02-2008]\n", @bstream.gets) end def test_rewind assert_nothing_raised { @splitter.rewind } end def test_get_entry assert(raw = @splitter.get_entry) assert_equal(4388, raw.size) assert_equal('12201ff286b16f8578e2a3b0778c721438ac8278', Digest::SHA1.hexdigest(raw)) assert(raw = @splitter.get_entry) assert_equal(245, raw.size) assert_equal('f5fb1ac1aa62ba65a68c5c7c8240c0a9fc047a46', Digest::SHA1.hexdigest(raw)) assert(raw = @splitter.get_entry) assert_equal(3144, raw.size) assert_equal('db0ff4bf9901186758b2a0d6e94734a53733631f', Digest::SHA1.hexdigest(raw)) assert_nil(@splitter.get_entry) end def test_entry_pos @splitter.entry_pos_flag = true @splitter.get_entry assert_equal(0, @splitter.entry_start_pos) assert_equal(4388, @splitter.entry_ended_pos) @splitter.get_entry assert_equal(4388, @splitter.entry_start_pos) assert_equal(4461, @splitter.entry_ended_pos) @splitter.get_entry assert_equal(4461, @splitter.entry_start_pos) assert_equal(7433, @splitter.entry_ended_pos) end end #class TestRPSBlastSplitter class TestRPSBlastReport < Test::Unit::TestCase def setup @flatfile = Bio::FlatFile.open(Bio::Blast::RPSBlast::Report, TestFileName) @obj = @flatfile.next_entry end def teardown @flatfile.close end def test_program assert_equal('RPS-BLAST', @obj.program) end def test_version exp = 'RPS-BLAST 2.2.18 [Mar-02-2008]' assert_equal(exp, @obj.version) end def test_version_number assert_equal('2.2.18', @obj.version_number) end def test_version_date assert_equal('Mar-02-2008', @obj.version_date) end def test_db assert_equal('Pfam.v.22.0', @obj.db) end def test_query_def ary = [ 'TestSequence mixture of globin and rhodopsin (computationally randomly concatenated)', 'randomseq3', 'gi|6013469|gb|AAD49229.2|AF159462_1 EHEC factor for adherence [Escherichia coli]' ] @flatfile.rewind @flatfile.each do |rep| assert_equal(ary.shift, rep.query_def) end assert(ary.empty?) end def test_query_len ary = [ 495, 1087, 3223 ] @flatfile.rewind @flatfile.each do |rep| assert_equal(ary.shift, rep.query_len) end assert(ary.empty?) end def test_hits_size ary = [ 3, 0, 2 ] @flatfile.rewind @flatfile.each do |rep| assert_equal(ary.shift, rep.hits.size) end assert(ary.empty?) end def test_iterations_size ary = [ 1, 1, 1 ] @flatfile.rewind @flatfile.each do |rep| assert_equal(ary.shift, rep.iterations.size) end assert(ary.empty?) end end #class TestRPSBlastReport class TestRPSBlastReportHit < Test::Unit::TestCase def setup flatfile = Bio::FlatFile.open(Bio::Blast::RPSBlast::Report, TestFileName) @hits = flatfile.next_entry.hits flatfile.close end def test_hsps_size ary = [ 1, 2, 1 ] @hits.each do |h| assert_equal(ary.shift, h.hsps.size) end assert(ary.empty?) end def test_len assert_equal(110, @hits[0].len) assert_equal(258, @hits[1].len) assert_equal(336, @hits[2].len) end def test_target_len assert_equal(110, @hits[0].target_len) assert_equal(258, @hits[1].target_len) assert_equal(336, @hits[2].target_len) end def test_target_def assert_equal('gnl|CDD|84466 pfam00042, Globin, Globin..', @hits[0].target_def) assert_equal("gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodopsin family). This" \ " family contains, amongst other G-protein-coupled" \ " receptors (GCPRs), members of the opsin family, which" \ " have been considered to be typical members of the" \ " rhodopsin superfamily. They share several motifs, mainly" \ " the seven transmembrane helices, GCPRs of the rhodopsin" \ " superfamily. All opsins bind a chromophore, such as" \ " 11-cis-retinal. The function of most opsins other than" \ " the photoisomerases is split into two steps: light" \ " absorption and G-protein activation. Photoisomerases, on" \ " the other hand, are not coupled to G-proteins - they are" \ " thought to generate and supply the chromophore that is" \ " used by visual opsins..", @hits[1].target_def) assert_equal("gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (DUF1300). This" \ " family represents a conserved region approximately 80" \ " residues long within a number of proteins of unknown" \ " function that seem to be specific to C. elegans. Some" \ " family members contain more than one copy of this" \ " region..", @hits[2].target_def) end def test_definition assert_equal('gnl|CDD|84466 pfam00042, Globin, Globin..', @hits[0].definition) assert_equal("gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodopsin family). This" \ " family contains, amongst other G-protein-coupled" \ " receptors (GCPRs), members of the opsin family, which" \ " have been considered to be typical members of the" \ " rhodopsin superfamily. They share several motifs, mainly" \ " the seven transmembrane helices, GCPRs of the rhodopsin" \ " superfamily. All opsins bind a chromophore, such as" \ " 11-cis-retinal. The function of most opsins other than" \ " the photoisomerases is split into two steps: light" \ " absorption and G-protein activation. Photoisomerases, on" \ " the other hand, are not coupled to G-proteins - they are" \ " thought to generate and supply the chromophore that is" \ " used by visual opsins..", @hits[1].definition) assert_equal("gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (DUF1300). This" \ " family represents a conserved region approximately 80" \ " residues long within a number of proteins of unknown" \ " function that seem to be specific to C. elegans. Some" \ " family members contain more than one copy of this" \ " region..", @hits[2].definition) end def test_evalue assert_equal(2.0e-25, @hits[0].evalue) assert_equal(2.0e-19, @hits[1].evalue) assert_equal(0.003, @hits[2].evalue) end def test_bit_score assert_equal(110.0, @hits[0].bit_score) assert_equal(90.8, @hits[1].bit_score) assert_equal(37.1, @hits[2].bit_score) end def test_identity assert_equal(50, @hits[0].identity) assert_equal(37, @hits[1].identity) assert_equal(32, @hits[2].identity) end def test_overlap assert_equal(110, @hits[0].overlap) assert_equal(162, @hits[1].overlap) assert_equal(145, @hits[2].overlap) end def test_query_seq assert_equal("EKQLITGLWGKV--NVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPMVRAHGKKVLTSFGDAVKNLDN---IKNTFSQLSELHCDKLHVDPENFRLLGDILI", @hits[0].query_seq) assert_equal("HAIMGVAFTWVMALACAAPPLAGWSRY-IPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTV----KEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFY--IFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIY", @hits[1].query_seq) assert_equal("IDYYTLKPEVNNESFVIYMFV--VHFT-IPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIM----MNKQFRNCMLTTICCGKN", @hits[2].query_seq) end def test_target_seq assert_equal("QKALVKASWGKVKGNAPEIGAEILARLFTAYPDTKAYFPKFGDLSTAEALKSSPKFKAHGKKVLAALGEAVKHLDDDGNLKAALKKLGARHAKRGHVDPANFKLFGEALL", @hits[0].target_seq) assert_equal("RAKVLILLVWVLALLLSLPPLLFSWLRTVEEGNVTTCLIDFPEESLLR---SYTLLSTLLGFVLPLLVILVCYTRILRTLRRRARSGASIARSLKRRSSSERKAAKMLLVVVVVFVLCWLPYHIVLLLDSLCLLSIIRVLPTALLITLWLAYVNSCLNPIIY", @hits[1].target_seq) assert_equal("IEYIIETTELFGSSYEILLLIEGILFKLIPSIILPIATILLIFQLKKNKKVSSRSSTSSSSNDRSTKLVTFVTISFLIATVPLGILYLIKFFVFEYEGLVMIIDKLAIIFTFLSTINGTIHFLICYFMSSQYRNTVREMFGRKKK", @hits[2].target_seq) end def test_midline assert_equal("+K L+ WGKV N E GAE LARL YP T+ +F FG+LS+ A+ +P +AHGKKVL + G+AVK+LD+ +K +L H + HVDP NF+L G+ L+", @hits[0].midline) assert_equal(" A + + WV+AL + PPL + EG +C ID+ S+ + ++ F +P+++I CY +++ T+ + A+ + +E++ +M++++V+ F++CW+PY V + P + I + A + NP+IY", @hits[1].midline) assert_equal("I+Y E+ S+ I + + + F IP II+ L+F +K+ S+T+ + T++V + I+FLI VP + F + + A + N I+ + M+ Q+RN + K ", @hits[2].midline) end def test_query_start assert_equal(148, @hits[0].query_start) assert_equal(299, @hits[1].query_start) assert_equal(336, @hits[2].query_start) end def test_query_end assert_equal(252, @hits[0].query_end) assert_equal(453, @hits[1].query_end) assert_equal(473, @hits[2].query_end) end def test_target_start assert_equal(1, @hits[0].target_start) assert_equal(100, @hits[1].target_start) assert_equal(192, @hits[2].target_start) end def test_target_end assert_equal(110, @hits[0].target_end) assert_equal(258, @hits[1].target_end) assert_equal(336, @hits[2].target_end) end def test_lap_at assert_equal([148, 252, 1, 110], @hits[0].lap_at) assert_equal([299, 453, 100, 258], @hits[1].lap_at) assert_equal([336, 473, 192, 336], @hits[2].lap_at) end end #class TestRPSBlastHit class TestRPSBlastHSP < Test::Unit::TestCase def setup flatfile = Bio::FlatFile.open(Bio::Blast::RPSBlast::Report, TestFileName) @hsps = flatfile.next_entry.hits[1].hsps flatfile.close end def test_bit_score assert_equal(90.8, @hsps[0].bit_score) assert_equal(73.4, @hsps[1].bit_score) end def test_score assert_equal(225, @hsps[0].score) assert_equal(180, @hsps[1].score) end def test_evalue assert_equal(2.0e-19, @hsps[0].evalue) assert_equal(3.0e-14, @hsps[1].evalue) end def test_identity assert_equal(37, @hsps[0].identity) assert_equal(32, @hsps[1].identity) end def test_gaps assert_equal(10, @hsps[0].gaps) assert_equal(nil, @hsps[1].gaps) end def test_positive assert_equal(76, @hsps[0].positive) assert_equal(47, @hsps[1].positive) end def test_align_len assert_equal(162, @hsps[0].align_len) assert_equal(86, @hsps[1].align_len) end def test_query_from assert_equal(299, @hsps[0].query_from) assert_equal(55, @hsps[1].query_from) end def test_query_to assert_equal(453, @hsps[0].query_to) assert_equal(140, @hsps[1].query_to) end def test_hit_from assert_equal(100, @hsps[0].hit_from) assert_equal(2, @hsps[1].hit_from) end def test_hit_to assert_equal(258, @hsps[0].hit_to) assert_equal(87, @hsps[1].hit_to) end def test_qseq assert_equal("HAIMGVAFTWVMALACAAPPLAGWSRY-IPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTV----KEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFY--IFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIY", @hsps[0].qseq) assert_equal("NFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC", @hsps[1].qseq) end def test_hseq assert_equal("RAKVLILLVWVLALLLSLPPLLFSWLRTVEEGNVTTCLIDFPEESLLR---SYTLLSTLLGFVLPLLVILVCYTRILRTLRRRARSGASIARSLKRRSSSERKAAKMLLVVVVVFVLCWLPYHIVLLLDSLCLLSIIRVLPTALLITLWLAYVNSCLNPIIY", @hsps[0].hseq) assert_equal("NLLVILVILRTKRLRTPTNIFLLNLAVADLLFLLTLPPWALYYLVGGDWPFGDALCKLVGALFVVNGYASILLLTAISIDRYLAIV", @hsps[1].hseq) end def test_midline assert_equal(" A + + WV+AL + PPL + EG +C ID+ S+ + ++ F +P+++I CY +++ T+ + A+ + +E++ +M++++V+ F++CW+PY V + P + I + A + NP+IY", @hsps[0].midline) assert_equal("N L + V ++ K+LRTP N LLNLAVADL +L LY + G + FG C L G + G ++ L ++I+RY+ + ", @hsps[1].midline) end def test_percent_identity assert_equal(22, @hsps[0].percent_identity) assert_equal(37, @hsps[1].percent_identity) end end #class TestRPSBlastHSP end #module TestRPSBlast end #module Bio bio-1.4.3.0001/test/unit/bio/appl/blast/test_ncbioptions.rb0000644000004100000410000000636112200110570023354 0ustar www-datawww-data# # = test/unit/bio/appl/blast/test_ncbioptions.rb - Unit test for Bio::Blast::NCBIOptions # # Copyright:: Copyright (C) 2008 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/blast/ncbioptions' module Bio class TestBlastNCBIOptions < Test::Unit::TestCase def setup @str = '-p blastn -m0 -m 1 -m2 -m 3 -F T -m 4 m5 -pblastx -m 6 -m 7' @options = %w( -p blastn -m0 -m 1 -m2 -m 3 -F T -m 4 m5 -pblastx -m 6 -m 7 ) @normalized_options = %w( -F T m5 -p blastx -m 7 ) @obj = Bio::Blast::NCBIOptions.parse(@str) end def test_parse str = '-p tblastx -d cdna_human -i est001.fst -o test.blastn -e 0.1' options = %w( -p tblastx -d cdna_human -i est001.fst -o test.blastn -e 0.1 ) obj = Bio::Blast::NCBIOptions.parse(str) assert_equal(options, obj.options) end def test_normalize! assert_nothing_raised { @obj.normalize! } assert_equal(@normalized_options, @obj.options) end def test_get assert_equal('blastx', @obj.get('-p')) assert_equal('blastx', @obj.get('p')) assert_equal('7', @obj.get('-m')) assert_equal('7', @obj.get('m')) assert_equal('T', @obj.get('-F')) assert_equal('T', @obj.get('F')) assert_nil(@obj.get('-X')) end def test_delete assert_equal('blastx', @obj.delete('-p')) assert_nil(@obj.delete('p')) assert_equal('7', @obj.delete('-m')) assert_nil(@obj.delete('m')) assert_equal('T', @obj.delete('F')) assert_nil(@obj.delete('-F')) assert_nil(@obj.delete('-X')) end def test_set assert_equal('blastx', @obj.set('-p', 'blastp')) assert_equal('blastp', @obj.set('p', 'tblastx')) assert_equal('tblastx',@obj.get('p')) assert_equal('7', @obj.set('m', '8')) assert_equal('8', @obj.set('-m', '0')) assert_equal('0', @obj.get('m')) assert_equal('T', @obj.set('-F', 'F')) assert_equal('F', @obj.get('F')) assert_nil(@obj.set('-d', 'nr')) assert_equal('nr', @obj.get('d')) assert_nil(@obj.set('i', 'test.fst')) assert_equal('test.fst', @obj.get('-i')) end def test_equal_equal obj1 = Bio::Blast::NCBIOptions.parse(@str) assert_equal(true, @obj == obj1) obj2 = Bio::Blast::NCBIOptions.parse('-F F') assert_equal(false, @obj == obj2) assert_equal(false, @obj == 12345) end def test_add_options opts = %w( -p tblastx -m 8 -d cdna -i est.fst -o test.blast -e 0.01 ) result_opts = %w( -F T m5 ) + opts assert_nothing_raised { @obj.add_options(opts) } assert_equal(result_opts, @obj.options) end def test_make_command_line_options opts = %w( -p tblastx -d cdna -i est.fst -o test.blast -e 0.01 ) result_opts = opts + %w( -m 0 -m 1 -m 2 -m 3 -F T -m 4 m5 -m 6 -m 7 ) assert_equal(result_opts, @obj.make_command_line_options(opts)) end end #class TestBlastNCBIOptions end #module Bio bio-1.4.3.0001/test/unit/bio/appl/test_pts1.rb0000644000004100000410000000337712200110570020613 0ustar www-datawww-data# # = test/unit/bio/appl/test_pts1.rb - Unit test for Bio::PTS1 # # Copyright:: Copyright (C) 2006 # Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/pts1' module Bio class TestPTS1Constant < Test::Unit::TestCase def test_FUNCTION keys = ['METAZOA-specific','FUNGI-specific','GENERAL'].sort assert_equal(keys, Bio::PTS1::FUNCTION.keys.sort) end end class TestPTS1New < Test::Unit::TestCase def test_metazoa pts1 = Bio::PTS1.new_with_metazoa_function assert_equal('METAZOA-specific', pts1.function) end def test_fungi pts1 = Bio::PTS1.new_with_fungi_function assert_equal('FUNGI-specific', pts1.function) end def test_general pts1 = Bio::PTS1.new_with_general_function assert_equal('GENERAL', pts1.function) end end class TestPTS1 < Test::Unit::TestCase def setup @serv = Bio::PTS1.new end def test_function_set @serv.function("GENERAL") assert_equal("GENERAL", @serv.function) end def test_function_show assert_equal("METAZOA-specific", @serv.function) end def test_function_set_number_1 @serv.function(1) assert_equal("METAZOA-specific", @serv.function) end def test_function_set_number_2 @serv.function(2) assert_equal("FUNGI-specific", @serv.function) end def test_function_set_number_3 @serv.function(3) assert_equal("GENERAL", @serv.function) end end end bio-1.4.3.0001/test/unit/bio/appl/mafft/0000755000004100000410000000000012200110570017423 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/mafft/test_report.rb0000644000004100000410000000373712200110570022334 0ustar www-datawww-data# # test/unit/bio/appl/mafft/test_report.rb - Unit test for Bio::Alignment::MultiFastaFormat # # Copyright:: Copyright (C) 2007 # 2005 Naohisa Goto # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/mafft/report' module Bio class TestAlignmentMultiFastaFormat < Test::Unit::TestCase def setup @na = Bio::Alignment::MultiFastaFormat.new <<__END_OF_NASEQS__ >naseq1 TAGATTTCGAATTTCTAGnGAACCGAACCGkACAGCCTTACATyATTCAGACCAATGTGT TACCAATTCGAGTATACAAGAACAGTGATAAGGTACCAAACAACGACTTCTTCCCGAACC >naseq2 TAGATTTCGAATCTAGGGAATCCGATACGGACAGCCTTACATTATTCAGACCAATGTGTA TACCAATTCGAGAATACAAGAACGTGATAAGGTACCCAAACAACGACTTCTTCCCGAACC >naseq3 TAGATTTCGAATCTAGGGAATCCGATACCGGACAGCCTTACATTATTCAGACCAATGTGT TACCAATTCGAGAATACAAGAACGTGATAAGGTACCCAAACAACGACTTCTTCCCGAACC __END_OF_NASEQS__ @aa = Bio::Alignment::MultiFastaFormat.new <<__END_OF_AASEQS__ >aaseq1 MVHWTAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPMVRAHGKKVLT >aaseq2 MLTAEEKAAVTGFWGKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSSADAVMNNAKVKAHGKKVLDSF >aaseq3 MVHLTDAEKSAVSCLWAKVNPDEVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNPKVKAHGKKVIT >aaseq4 MVHLTDAEKAAVNGLWGKVNPDDVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNPKVKAHGKKVIN __END_OF_AASEQS__ end #def setup def test_alignment assert_equal(120, @na.alignment.alignment_length) assert_equal(70, @aa.alignment.alignment_length) end def test_entries assert_equal(3, @na.entries.size) assert_equal(4, @aa.entries.size) end def test_determine_seq_method @na.alignment assert_equal(:naseq, @na.instance_eval { @seq_method }) @aa.alignment assert_equal(:aaseq, @aa.instance_eval { @seq_method }) end end #class TestAlignmentMultiFastaFormat end #module Bio bio-1.4.3.0001/test/unit/bio/appl/meme/0000755000004100000410000000000012200110570017251 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/meme/mast/0000755000004100000410000000000012200110570020215 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/meme/mast/test_report.rb0000644000004100000410000000222612200110570023116 0ustar www-datawww-data# # test/unit/bio/appl/meme/test_mast.rb - Unit test for Bio::Meme::Mast::Report # # Copyright:: Copyright (C) 2008 Adam Kraut # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/meme/mast/report' module Bio module TestMastReportData TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'meme')).cleanpath.to_s def self.example_mast_output File.join TEST_DATA, 'mast.out' end end class TestMastReport < Test::Unit::TestCase TEST_DATA = TestMastReportData::TEST_DATA def setup @report = Meme::Mast::Report.new(File.read(TestMastReportData.example_mast_output)) end def test_report_has_motifs obj = @report.motifs.first assert_kind_of(Meme::Motif, obj) end def test_parse_hit_list_with_bad_data data = "#heres\n2 bad data lines\n" assert_raises(RuntimeError) { Meme::Mast::Report.new(data) } end end # TestMastReport end # Bio bio-1.4.3.0001/test/unit/bio/appl/meme/test_motif.rb0000644000004100000410000000177512200110570021765 0ustar www-datawww-data# # test/unit/bio/appl/meme/test_motif.rb - Unit test for Bio::Meme::Motif # # Copyright:: Copyright (C) 2008 Adam Kraut # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/meme/motif' module Bio class TestMotif < Test::Unit::TestCase def setup @motif = Meme::Motif.new("P12345", "A", "1", "10", "30", "1.0e-100") end def test_creation_and_attributes assert_equal("P12345", @motif.sequence_name) assert_equal("A", @motif.strand) assert_equal(1, @motif.motif) assert_equal(10, @motif.start_pos) assert_equal(30, @motif.end_pos) assert_equal(1.0e-100, @motif.pvalue) end def test_length assert_equal(20, @motif.length) end end # TestMotif end # Bio bio-1.4.3.0001/test/unit/bio/appl/meme/test_mast.rb0000644000004100000410000000537612200110570021614 0ustar www-datawww-data# # test/unit/bio/appl/meme/test_mast.rb - Unit test for Bio::Meme::Mast # # Copyright:: Copyright (C) 2008 Adam Kraut # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/meme/mast' module Bio module TestMastData TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'meme')).cleanpath.to_s def self.example_mfile File.join TEST_DATA, 'meme.out' end def self.dummy_binary File.join TEST_DATA, 'mast' end def self.dummy_db File.join TEST_DATA, 'db' end end class TestMast < Test::Unit::TestCase TEST_DATA = TestMastData::TEST_DATA def setup @example_mfile = TestMastData.example_mfile @binary = TestMastData.dummy_binary @db = TestMastData.dummy_db @mast = Meme::Mast.new(@binary) end def test_config_defaults assert_equal(true, @mast.options[:hit_list]) assert_equal(true, @mast.options[:stdout]) assert_equal(true, @mast.options[:nostatus]) end def test_minimal_config options = {:mfile => @example_mfile, :d => @db} @mast.config(options) assert_equal(@db, @mast.options[:d]) assert_equal(@example_mfile, @mast.options[:mfile]) end def test_more_config options = {:mfile => @example_mfile, :d => @db, :dna => true} @mast.config(options) assert_equal(true, @mast.options[:dna]) end def test_check_options_with_valid_opts options = {:mfile => @example_mfile, :d => @db} @mast.config(options) assert_nothing_raised { @mast.check_options } end def test_check_options_with_invalid_opts options = {:mfile => @example_mfile, :d => @db, :bad => "option"} @mast.config(options) assert_raises(ArgumentError) { @mast.check_options } end def test_check_options_with_empty_opts # and <-d> are required options = {} @mast.config(options) assert_raises(ArgumentError) { @mast.check_options } end # this is ugly def test_command_to_be_run options = {:mfile => @example_mfile, :d => @db} @mast.config(options) assert_equal(true, @mast.cmd.include?("#{@binary} #{@example_mfile} -d #{@db}") ) assert_equal(true, @mast.cmd.include?('-hit_list') ) assert_equal(true, @mast.cmd.include?('-stdout') ) assert_equal(true, @mast.cmd.include?('-nostatus') ) end # this would require a working executable and a database def test_run # options = {:mfile => @example_mfile, :d => @db} # @mast.config(options) # report = @mast.run # assert_kind_of(Meme::Mast::Report, report) end end # TestMast end # Bio bio-1.4.3.0001/test/unit/bio/appl/paml/0000755000004100000410000000000012200110570017257 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/paml/test_codeml.rb0000644000004100000410000001247712200110570022121 0ustar www-datawww-data# # test/unit/bio/appl/paml/test_codeml.rb - Unit test for Bio::PAML::Codeml # # Copyright:: Copyright (C) 2008 Michael D. Barton # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/paml/codeml' module Bio; module TestPAMLCodeml module TestCodemlData TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'paml', 'codeml')).cleanpath.to_s def self.example_control File.join TEST_DATA, 'control_file.txt' end end #module TestCodemlData class TestCodemlInitialize < Test::Unit::TestCase def test_new_with_one_argument factory = Bio::PAML::Codeml.new('echo') assert_instance_of(Bio::PAML::Codeml, factory) assert_equal('echo', factory.instance_eval { @program }) end def test_new_with_two_argument factory = Bio::PAML::Codeml.new('echo', { :test => 'value' }) assert_instance_of(Bio::PAML::Codeml, factory) assert_equal('echo', factory.instance_eval { @program }) assert_equal('value', factory.parameters[:test]) end def test_new_with_parameters factory = Bio::PAML::Codeml.new(nil, { :test => 'value' }) assert_instance_of(Bio::PAML::Codeml, factory) assert_equal('codeml', factory.instance_eval { @program }) assert_equal('value', factory.parameters[:test]) end def test_new_without_argument factory = Bio::PAML::Codeml.new assert_instance_of(Bio::PAML::Codeml, factory) assert_equal('codeml', factory.instance_eval { @program }) end end #class TestCodemlInitialize class TestCodeml < Test::Unit::TestCase def setup @codeml = Bio::PAML::Codeml.new end def test_parameters params = { :verbose => 1 } @codeml.parameters = params assert_equal(params, @codeml.parameters) end def test_load_parameters str = " seqfile = test.aa \n verbose = 1 \n" params = { :seqfile => 'test.aa', :verbose => '1' } assert_equal(params, @codeml.load_parameters(str)) end def test_set_default_parameters assert_equal(Bio::PAML::Codeml::DEFAULT_PARAMETERS, @codeml.set_default_parameters) # modifying parameters should not affect DEFAULT_PARAMETERS @codeml.parameters[:only_for_test] = 'this is test' assert_not_equal(Bio::PAML::Codeml::DEFAULT_PARAMETERS, @codeml.parameters) end def test_dump_parameters params = { :seqfile => 'test.aa', :verbose => '1' } @codeml.parameters = params assert_equal("seqfile = test.aa\nverbose = 1\n", @codeml.dump_parameters) end end #class TestCodeml class TestCodemlControlGeneration < Test::Unit::TestCase TEST_DATA = TestCodemlData::TEST_DATA def generate_control_file @tempfile_control = Tempfile.new('codeml_control') @tempfile_control.close(false) @tempfile_outfile = Tempfile.new('codeml_test') @tempfile_outfile.close(false) test_control = @tempfile_control.path Bio::PAML::Codeml.create_control_file({ :model => 1, :fix_kappa => 1, :aaRatefile => File.join(TEST_DATA, 'wag.dat'), :seqfile => File.join(TEST_DATA, 'abglobin.aa'), :treefile => File.join(TEST_DATA, 'abglobin.trees'), :outfile => @tempfile_outfile.path, }, test_control) test_control end private :generate_control_file def setup @example_control = generate_control_file end def teardown @tempfile_control.close(true) @tempfile_outfile.close(true) end def test_control_file_generated assert_not_nil(File.size?(@example_control)) end def test_expected_parameters_set_in_control_file produced_control = File.open(@example_control) do |f| f.inject(Hash.new) do |hash,line| hash.store(*line.strip.split(' = ')) hash end end assert_equal(File.join(TEST_DATA, 'abglobin.aa'), produced_control['seqfile']) assert_equal('1', produced_control['fix_kappa']) assert_equal('1', produced_control['model']) end end #class TestCodemlControlGeneration class TestControlFileUsage < Test::Unit::TestCase def setup @codeml = Bio::PAML::Codeml.new @codeml.load_parameters(File.read(TestCodemlData.example_control)) end def test_parameters_should_be_loaded_from_control assert_not_nil(@codeml.parameters) end def test_correct_parameters_should_be_loaded assert_equal('abglobin.aa', @codeml.parameters[:seqfile]) assert_equal('1', @codeml.parameters[:fix_kappa]) assert_equal('1', @codeml.parameters[:model]) end end #class TestControlFileUsage class TestExpectedErrorsThrown < Test::Unit::TestCase def test_error_thrown_if_seqfile_does_not_specified codeml = Bio::PAML::Codeml.new('echo') codeml.load_parameters(File.read(TestCodemlData.example_control)) codeml.parameters[:seqfile] = nil assert_raises RuntimeError do codeml.query_by_string() end end end #class TestExpectedErrorsThrown end; end #module TestPAMLCodeml; module Bio bio-1.4.3.0001/test/unit/bio/appl/paml/codeml/0000755000004100000410000000000012200110570020522 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/appl/paml/codeml/test_rates.rb0000644000004100000410000000263112200110570023226 0ustar www-datawww-data# # test/unit/bio/appl/paml/codeml/test_rates.rb - Unit test for Bio::PAML::Codeml::Rates # # Copyright:: Copyright (C) 2008 Michael D. Barton # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/paml/codeml/rates' module Bio; module TestPAMLCodeml class TestCodemlRates < Test::Unit::TestCase TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'paml', 'codeml')).cleanpath.to_s def setup str = File.read(File.join(TEST_DATA, 'rates')) @example_rates = Bio::PAML::Codeml::Rates.new(str) end def test_rates_first_position assert_equal('***M', @example_rates.first[:data]) assert_equal(1, @example_rates.first[:rate]) assert_equal(1, @example_rates.first[:freq]) end def test_rates_hundred_and_fiftieth_position assert_equal('GGGG', @example_rates[150][:data]) assert_equal(0.828, @example_rates[150][:rate]) assert_equal(8, @example_rates[150][:freq]) end def test_rates_last_position assert_equal('PHPP', @example_rates.last[:data]) assert_equal(1.752, @example_rates.last[:rate]) assert_equal(1, @example_rates.last[:freq]) end end end; end #module TestPAMLCodeml; module Bio bio-1.4.3.0001/test/unit/bio/appl/paml/codeml/test_report_single.rb0000644000004100000410000000245312200110570024766 0ustar www-datawww-data# # test/unit/bio/appl/paml/codeml/test_report_single.rb - Unit test for Bio::PAML::Codeml::Report # # Copyright:: Copyright (C) 2008 Michael D. Barton # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/paml/codeml/report' module Bio; module TestPAMLCodeml class TestCodemlReport < Test::Unit::TestCase TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'paml', 'codeml')).cleanpath.to_s def setup str = File.read(File.join(TEST_DATA, 'output.txt')) @example_report = Bio::PAML::Codeml::Report.new(str) end def test_tree_log_likelihood assert_equal(-1817.465211, @example_report.tree_log_likelihood) end def test_tree_length assert_equal(0.77902, @example_report.tree_length) end def test_alpha assert_equal(0.58871, @example_report.alpha) end def test_tree tree = "(((rabbit: 0.082889, rat: 0.187866): 0.038008, human: 0.055050): 0.033639, goat-cow: 0.096992, marsupial: 0.284574);" assert_equal(tree, @example_report.tree) end end end; end #module TestPAMLCodeml; module Bio bio-1.4.3.0001/test/unit/bio/appl/paml/codeml/test_report.rb0000644000004100000410000001607712200110570023434 0ustar www-datawww-data# # = test/unit/bio/appl/paml/codeml/test_report.rb - Unit tests for Codeml report parser # # Copyright:: Copyright (C) 2008-2010 # Michael D. Barton , # Pjotr Prins # # License:: The Ruby License # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/appl/paml/codeml/report' module Bio # The test code is copied from the examples of Bio::PAML::Codeml::Report # described in lib/bio/appl/paml/codeml/report.rb. module TestPAMLCodemlReportWithModels FILENAME_M0M3 = File.join(BioRubyTestDataPath, 'paml/codeml/models/results0-3.txt') class TestCodemlReportM0M3 < Test::Unit::TestCase def setup buf = File.read(FILENAME_M0M3) @c = Bio::PAML::Codeml::Report.new(buf) end # Invoke Bioruby's PAML codeml parser, after having read the contents # of the codeml result file into _buf_ (for example using File.read) def test_initialize assert_instance_of(Bio::PAML::Codeml::Report, @c) end # Do we have two models? def test_models assert_equal(2, @c.models.size) assert_equal("M0", @c.models[0].name) assert_equal("M3", @c.models[1].name) end # Check the general information def test_num_sequences assert_equal(6, @c.num_sequences) end def test_num_codons assert_equal(134, @c.num_codons) end def test_descr assert_equal("M0-3", @c.descr) end # Test whether the second model M3 is significant over M0 def test_significant assert_equal(true, @c.significant) end # Next take the overall posterior analysis def test_nb_sites assert_equal(44, @c.nb_sites.size) assert_equal([17, "I", 0.988, 3.293], @c.nb_sites[0].to_a) end # We also provide the raw buffers to adhere to the principle of # unexpected use. Test the raw buffers for content: def test_header assert_equal(1, @c.header.to_s =~ /seed/) end def test_footer assert_equal(16, @c.footer.to_s =~ /Bayes/) end end #class TestCodemlReportM0M3 class TestCodemlModelM0M3 < Test::Unit::TestCase # Now fetch the results of the first model M0, and check its values def setup buf = File.read(FILENAME_M0M3) c = Bio::PAML::Codeml::Report.new(buf) @m0 = c.models[0] @m3 = c.models[1] end def test_tree_length assert_equal(1.90227, @m0.tree_length) end def test_lnL assert_equal(-1125.800375, @m0.lnL) end def test_omega assert_equal(0.58589, @m0.omega) end def test_dN_dS assert_equal(0.58589, @m0.dN_dS) end def test_kappa assert_equal(2.14311, @m0.kappa) end def test_alpha assert_equal(nil, @m0.alpha) end # We also have a tree (as a string) def test_tree str = "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.400074): 0.000004, PITG_23257T0: 0.952614): 0.000004, PITG_23264T0: 0.445507): 0.000004, PITG_23267T0: 0.011814, PITG_23293T0: 0.092242);" assert_equal(str, @m0.tree) end # Check the M3 and its specific values def test_m3_lnL assert_equal(-1070.964046, @m3.lnL) end def test_m3_classes assert_equal(3, @m3.classes.size) assert_equal({:w=>0.00928, :p=>0.56413}, @m3.classes[0]) end def test_m3_tree str = "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.762597): 0.000004, PITG_23257T0: 2.721710): 0.000004, PITG_23264T0: 0.924326): 0.014562, PITG_23267T0: 0.000004, PITG_23293T0: 0.237433);" assert_equal(str, @m3.tree) end def test_to_s assert_equal(3, @m0.to_s =~ /one-ratio/) end def test_m3_to_s assert_equal(3, @m3.to_s =~ /discrete/) end end #class TestCodemlModelM0M3 class TestCodemlPositiveSiteM0M3 < Test::Unit::TestCase def setup buf = File.read(FILENAME_M0M3) c = Bio::PAML::Codeml::Report.new(buf) @codon = c.nb_sites[0] end def test_position assert_equal(17, @codon.position) end def test_probability assert_equal(0.988, @codon.probability) end def test_dN_dS assert_equal(3.293, @codon.dN_dS) end # with aliases def test_p assert_equal(0.988, @codon.p) end def test_w assert_equal(3.293, @codon.w) end end #class TestCodemlPositiveSiteM0M3 class TestCodemlPositiveSitesM0M3 < Test::Unit::TestCase def setup buf = File.read(FILENAME_M0M3) c = Bio::PAML::Codeml::Report.new(buf) @nb_sites = c.nb_sites end # Now we generate special string 'graph' for positive selection. The # following returns a string the length of the input alignment and # shows the locations of positive selection: def test_graph str = " ** * * *" assert_equal(str, @nb_sites.graph[0..32]) end # And with dN/dS (high values are still an asterisk *) def test_graph_omega str = " 3* 6 6 2" assert_equal(str, @nb_sites.graph_omega[0..32]) end end #class TestCodemlPositiveSitesM0M3 # Finally we do a test on an M7+M8 run. FILENAME_M7M8 = File.join(BioRubyTestDataPath, 'paml/codeml/models/results7-8.txt') class TestCodemlReportM7M8 < Test::Unit::TestCase def setup buf = File.read(FILENAME_M7M8) @c = Bio::PAML::Codeml::Report.new(buf) end # Do we have two models? def test_models assert_equal(2, @c.models.size) assert_equal("M7", @c.models[0].name) assert_equal("M8", @c.models[1].name) end # Assert the results are significant def test_significant assert_equal(true, @c.significant) end # Compared to M0/M3 there are some differences. The important ones # are the parameters and the full Bayesian result available for M7/M8. # This is the naive Bayesian result: def test_nb_sites assert_equal(10, @c.nb_sites.size) end # And this is the full Bayesian result: def test_sites assert_equal(30, @c.sites.size) array = [17, "I", 0.672, 2.847] assert_equal(array, @c.sites[0].to_a) str = " ** * * *" assert_equal(str, @c.sites.graph[0..32]) # Note the differences of omega with earlier M0-M3 naive Bayesian # analysis: str2 = " 24 3 3 2" assert_equal(str2, @c.sites.graph_omega[0..32]) # The locations are the same, but the omega differs. end end #class TestCodemlReportM7M8 end #module TestPAMLCodemlReportWithModels end #module Bio bio-1.4.3.0001/test/unit/bio/test_pathway.rb0000644000004100000410000003442012200110570020436 0ustar www-datawww-data# # test/bio/tc_pathway.rb - Unit test for Bio::Pathway # # Copyright:: Copyright (C) 2004 # Moses Hohman # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/pathway' module Bio class TestMyGraph < Test::Unit::TestCase def test_cliquishness graph = Pathway.new([ Relation.new(1, 3, 1), Relation.new(2, 3, 1), Relation.new(1, 5, 1), Relation.new(2, 6, 1), Relation.new(3, 6, 1), Relation.new(4, 6, 1), Relation.new(5, 6, 1), ], true) assert_equal(0, graph.cliquishness(1), "1's cliquishness wrong") assert_equal(1, graph.cliquishness(2), "2's cliquishness wrong") assert_in_delta(0.33, graph.cliquishness(3), 0.01, "3's cliquishness wrong") # Because cliquishness (clustering coefficient) for a node # that has only one neighbor node is undefined, test for # node 4 is commented out. #assert_equal(1, graph.cliquishness(4), "4's cliquishness wrong") assert_equal(0, graph.cliquishness(5), "5's cliquishness wrong") assert_in_delta(0.16, graph.cliquishness(6), 0.01, "6's cliquishness wrong") end end class TestRelation < Test::Unit::TestCase def test_comparison_operator r1 = Relation.new('a', 'b', 1) r2 = Relation.new('b', 'a', 1) r3 = Relation.new('b', 'a', 2) r4 = Relation.new('a', 'b', 1) assert(r1 === r2, "r1 === r2 not true, === not symmetric wrt nodes") assert(!(r1 === r3), "r1 === r3 not false, === does not take edge into account") assert(r1 === r4, "r1 === r4 not true, === is not reflexive wrt nodes") assert_equal([r1, r3], [ r1, r2, r3, r4 ].uniq, "uniq did not have expected effect") assert(r1.eql?(r2), "r1 not eql r2") assert(!r3.eql?(r2), "r3 eql to r2") end end class TestSampleGraph < Test::Unit::TestCase TheInfinity = 1/0.0 # Sample Graph : # +----------------+ # | | # v | # +---------(q)-->(t)------->(y)<----(r) # | | | ^ | # v | v | | # +--(s)<--+ | (x)<---+ (u)<-----+ # | | | | | # v | | v | # (v)----->(w)<---+ (z)----+ def setup @data = [ [ 'q', 's', 1, ], [ 'q', 't', 1, ], [ 'q', 'w', 1, ], [ 'r', 'u', 1, ], [ 'r', 'y', 1, ], [ 's', 'v', 1, ], [ 't', 'x', 1, ], [ 't', 'y', 1, ], [ 'u', 'y', 1, ], [ 'v', 'w', 1, ], [ 'w', 's', 1, ], [ 'x', 'z', 1, ], [ 'y', 'q', 1, ], [ 'z', 'x', 1, ], ] @graph = Pathway.new(@data.collect { |x| Relation.new(*x) }) end def test_to_matrix matrix = @graph.to_matrix(0) index = @graph.index # expected values source_matrix = [ #v w x y z q r s t u [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], #v [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], #w [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], #x [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], #y [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], #z [0, 1, 0, 0, 0, 0, 0, 1, 1, 0], #q [0, 0, 0, 1, 0, 0, 0, 0, 0, 1], #r [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], #s [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], #t [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] #u ] source_index = { "v"=>0, "w"=>1, "x"=>2, "y"=>3, "z"=>4, "q"=>5, "r"=>6, "s"=>7, "t"=>8, "u"=>9 } # test index size assert_equal(10, source_index.size) # test index keys assert_equal(source_index.keys.sort, index.keys.sort) # test index values assert_equal(source_index.values.sort, index.values.sort) # prepare expected matrix ary = Array.new(index.size) ary.collect! { |a| Array.new(index.size) } index.each do |row_k, row_v| src_row = source_index[row_k] index.each do |col_k, col_v| src_col = source_index[col_k] ary[row_v][col_v] = source_matrix[src_row][src_col] end end expected_matrix = Matrix.rows(ary) # test the matrix assert_equal(expected_matrix, matrix, "matrix wrong") end def test_to_matrix_fixed_index # begin workaround removing depencency to order of Hash#each %w( v w x y z q r s t u ).each_with_index do |x, i| @graph.index[x] = i end # end workaround removing depencency to order of Hash#each assert_equal(Matrix[ [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 1, 1, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] ], @graph.to_matrix(0), "matrix wrong") assert_equal({"v"=>0,"w"=>1,"x"=>2,"y"=>3,"z"=>4,"q"=>5,"r"=>6,"s"=>7,"t"=>8,"u"=>9}, @graph.index, "node --> matrix index order wrong") end def test_dump_matrix # begin workaround removing depencency to order of Hash#each %w( v w x y z q r s t u ).each_with_index do |x, i| @graph.index[x] = i end # end workaround removing depencency to order of Hash#each dumped = "[" + "# v, w, x, y, z, q, r, s, t, u\n" + " [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n" + # v " [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],\n" + # w " [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],\n" + # x " [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],\n" + # y " [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],\n" + # z " [0, 1, 0, 0, 0, 0, 0, 1, 1, 0],\n" + # q " [0, 0, 0, 1, 0, 0, 0, 0, 0, 1],\n" + # r " [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n" + # s " [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],\n" + # t " [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]\n]" # u assert_equal(dumped, @graph.dump_matrix(0)) end def test_dump_list # begin workaround removing depencency to order of Hash#each %w( v w x y z q r s t u ).each_with_index do |x, i| @graph.index[x] = i end # end workaround removing depencency to order of Hash#each dumped = "v => w (1)\n" + "w => s (1)\n" + "x => z (1)\n" + "y => q (1)\n" + "z => x (1)\n" + "q => w (1), s (1), t (1)\n" + "r => y (1), u (1)\n" + "s => v (1)\n" + "t => x (1), y (1)\n" + "u => y (1)\n" assert_equal(dumped, @graph.dump_list) end def test_extract_subgraph_by_label hash = { 'q' => "L1", 's' => "L2", 'v' => "L3", 'w' => "L4" } @graph.label = hash subgraph = @graph.subgraph # begin workaround removing depencency to order of Hash#each %w( v w q s ).each_with_index do |x, i| subgraph.index[x] = i end # end workaround removing depencency to order of Hash#each dumped = "v => w (1)\n" + "w => s (1)\n" + "q => w (1), s (1)\n" + "s => v (1)\n" assert_equal(dumped, subgraph.dump_list) end def test_extract_subgraph_by_list subgraph = @graph.subgraph(['q', 't', 'x', 'y', 'z']) # begin workaround removing depencency to order of Hash#each %w( x y z q t ).each_with_index do |x, i| subgraph.index[x] = i end # end workaround removing depencency to order of Hash#each dumped = "x => z (1)\n" + "y => q (1)\n" + "z => x (1)\n" + "q => t (1)\n" + "t => x (1), y (1)\n" assert_equal(dumped, subgraph.dump_list) end def test_extract_subgraph_retains_disconnected_nodes assert_equal(4, @graph.subgraph(['r', 's', 'v', 'w']).nodes, "wrong number of nodes") end # Sample Graph : # +----------------+ # | | # v | # +---------(q)-->(t)------->(y)<----(r) # | | | ^ | # v | v | | # +--(s)<--+ | (x)<---+ (u)<-----+ # | | | | | # v | | v | # (v)----->(w)<---+ (z)----+ def test_undirected_cliquishness @graph.undirected assert_in_delta(0.33, @graph.cliquishness('q'), 0.01) end def test_small_world_aka_node_degree_histogram expected = {1=>7, 2=>2, 3=>1} expected.default = 0 assert_equal(expected, @graph.small_world) end # Sample Graph : # +----------------+ # | | # v | # +---------(q)-->(t)------->(y)<----(r) # | | | ^ | # v | v | | # +--(s)<--+ | (x)<---+ (u)<-----+ # | | | | | # v | | v | # (v)----->(w)<---+ (z)----+ def test_breadth_first_search distances, predecessors = @graph.breadth_first_search('q') assert_equal({ "v"=>2, "w"=>1, "x"=>2, "y"=>2, "z"=>3, "q"=>0, "s"=>1, "t"=>1}, distances, "distances wrong") assert_equal({ "v"=>"s", "w"=>"q", "x"=>"t", "y"=>"t", "z"=>"x", "q"=>nil, "s"=>"q", "t"=>"q"}, predecessors, "predecessors wrong") end def test_bfs_shortest_path step, path = @graph.bfs_shortest_path('y', 'w') assert_equal(2, step, "wrong # of steps") assert_equal(["y", "q", "w"], path, "wrong path") end def test_depth_first_search # fixing node order to aviod dependency of Hash#each_key %w( v w x y z q r s t u ).each_with_index do |x, i| @graph.index[x] = i end # exec dfs timestamp, tree, back, cross, forward = @graph.depth_first_search assert_equal({ "v"=>[1, 6], "w"=>[2, 5], "x"=>[7, 10], "y"=>[11, 16], "z"=>[8, 9], "q"=>[12, 15], "r"=>[17, 20], "s"=>[3, 4], "t"=>[13, 14], "u"=>[18, 19]}, timestamp, "timestamps wrong") assert_equal({ "w"=>"v", "z"=>"x", "q"=>"y", "s"=>"w", "t"=>"q", "u"=>"r"}, tree, "tree edges wrong") assert_equal({ "z"=>"x", "s"=>"v", "t"=>"y"}, back, "back edges wrong") assert_equal({ "q"=>"s", "r"=>"y", "t"=>"x", "u"=>"y"}, cross, "cross edges wrong") assert_equal({}, forward, "forward edges wrong") end # Sample Graph : # +----------------+ # | | # v | # +---------(q)-->(t)------->(y)<----(r) # | | | ^ | # v | v | | # +--(s)<--+ | (x)<---+ (u)<-----+ # | | | | | # v | | v | # (v)----->(w)<---+ (z)----+ def test_dijkstra distances, predecessors = @graph.dijkstra('q') assert_equal({ "v"=>2, "w"=>1, "x"=>2, "y"=>2, "z"=>3, "q"=>0, "r"=>TheInfinity, "s"=>1, "t"=>1, "u"=>TheInfinity}, distances, "distances wrong") assert_equal({ "v"=>"s", "w"=>"q", "x"=>"t", "y"=>"t", "z"=>"x", "q"=>nil, "r"=>nil, "s"=>"q", "t"=>"q", "u"=>nil}, predecessors, "predecessors wrong") end def test_bellman_ford distances, predecessors = @graph.bellman_ford('q') assert_equal({ "v"=>2, "w"=>1, "x"=>2, "y"=>2, "z"=>3, "q"=>0, "r"=>TheInfinity, "s"=>1, "t"=>1, "u"=>TheInfinity}, distances, "distances wrong") assert_equal({ "v"=>"s", "w"=>"q", "x"=>"t", "y"=>"t", "z"=>"x", "q"=>nil, "r"=>nil, "s"=>"q", "t"=>"q", "u"=>nil}, predecessors, "predecessors wrong") end end class TestTopologicalSort < Test::Unit::TestCase # # Professor Bumstead topologically sorts his clothing when getting dressed. # # "undershorts" "socks" # | | | # v | v "watch" # "pants" --+-------> "shoes" # | # v # "belt" <----- "shirt" ----> "tie" ----> "jacket" # | ^ # `---------------------------------------' # def test_dfs_topological_sort dag = Pathway.new([ Relation.new("undershorts", "pants", true), Relation.new("undershorts", "shoes", true), Relation.new("socks", "shoes", true), Relation.new("watch", "watch", true), Relation.new("pants", "belt", true), Relation.new("pants", "shoes", true), Relation.new("shirt", "belt", true), Relation.new("shirt", "tie", true), Relation.new("tie", "jacket", true), Relation.new("belt", "jacket", true), ]) sorted = dag.dfs_topological_sort assert(sorted.index("socks") < sorted.index("shoes"), "socks >= shoes") assert(sorted.index("undershorts") < sorted.index("pants"), "undershorts >= pants") assert(sorted.index("undershorts") < sorted.index("shoes"), "undershorts >= shoes") assert(sorted.index("pants") < sorted.index("shoes"), "pants >= shoes") assert(sorted.index("pants") < sorted.index("belt"), "pants >= belt") assert(sorted.index("shirt") < sorted.index("belt"), "shirt >= belt") assert(sorted.index("shirt") < sorted.index("tie"), "shirt >= tie") assert(sorted.index("belt") < sorted.index("jacket"), "belt >= jacket") assert(sorted.index("tie") < sorted.index("jacket"), "tie >= jacket") end end #TODO: verify the below class TestWeightedGraph < Test::Unit::TestCase # 'a' --> 'b' # | 1 | 3 # |5 v # `----> 'c' def setup r1 = Relation.new('a', 'b', 1) r2 = Relation.new('a', 'c', 5) r3 = Relation.new('b', 'c', 3) @w_graph = Pathway.new([r1, r2, r3]) end def test_dijkstra_on_weighted_graph distances, predecessors = @w_graph.dijkstra('a') assert_equal({ "a"=>0, "b"=>1, "c"=>4}, distances, "distances wrong") assert_equal({ "a"=>nil, "b"=>"a", "c"=>"b"}, predecessors, "predecessors wrong") end def test_bellman_ford_on_negative_weighted_graph # ,-- 'a' --> 'b' # | | 1 | 3 # | |5 v # | `----> 'c' # | ^ # |2 | -5 # `--> 'd' ----' r4 = Relation.new('a', 'd', 2) r5 = Relation.new('d', 'c', -5) @w_graph.append(r4) @w_graph.append(r5) distances, predecessors = @w_graph.bellman_ford('a') assert_equal({ "a"=>0, "b"=>1, "c"=>-3, "d"=>2}, distances, "distances wrong") assert_equal({ "a"=>nil, "b"=>"a", "c"=>"d", "d"=>"a"}, predecessors, "predecessors wrong") end end end bio-1.4.3.0001/test/unit/bio/test_sequence.rb0000644000004100000410000002240312200110570020567 0ustar www-datawww-data# # test/unit/bio/test_sequence.rb - Unit test for Bio::Sequencce # # Copyright:: Copyright (C) 2004 # Moses Hohman # 2006 Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 2, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/sequence' module Bio class TestSequence < Test::Unit::TestCase def setup @na = Sequence::NA.new('atgcatgcatgcatgcaaaa') @rna = Sequence::NA.new('augcaugcaugcaugcaaaa') @aa = Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU') end # "main" method tests translated into unit tests # Test Sequence::NA.new def test_DNA_new_blank_sequence sequence = Sequence::NA.new('') assert_equal(0, sequence.size) end def test_DNA_new_sequence_downcases_symbols string = 'atgcatgcATGCATGCAAAA' sequence = Sequence::NA.new(string) assert_equal(string.downcase, sequence.to_s) end def test_RNA_new_sequence string = 'augcaugcaugcaugcaaaa' sequence = Sequence::NA.new(string) assert_equal(string, sequence.to_s) end # added def test_DNA_new_sequence_removes_whitespace sequence = Sequence::NA.new("a g\tc\nt\ra") assert_equal("agcta", sequence) end # Test Sequence::AA.new def test_AA_new_blank_sequence sequence = Sequence::AA.new('') assert_equal(0, sequence.size) end def test_AA_new_sequence_all_legal_symbols string = 'ACDEFGHIKLMNPQRSTVWYU' sequence = Sequence::AA.new(string) assert_equal(string, sequence.to_s) end # added def test_AA_new_sequence_upcases_symbols string = 'upcase' sequence = Sequence::AA.new(string) assert_equal(string.upcase, sequence.to_s) end def test_AA_new_sequence_removes_whitespace sequence = Sequence::AA.new("S T\tR\nI\rP") assert_equal("STRIP", sequence) end # test element indexing def test_element_reference_operator_with_two_arguments sequence = Sequence::NA.new("atggggggtc") assert_equal("gggggg", sequence[2,6]) end # added def test_element_reference_operator_with_one_argument sequence = Sequence::NA.new("atggggggtc") assert_equal(?t, sequence[1]) end # Test Sequence#total def test_total sequence = Sequence::NA.new("catccagtccctggt") assert_equal(2346, sequence.total({'a'=>1000, 'g'=>100, 't'=>10, 'c'=>1})) end # Test Sequence#composition def test_dna_composition sequence = Sequence::NA.new("aggtttcccc") expected = {'a'=>1,'g'=>2,'t'=>3,'c'=>4} expected.default = 0 assert_equal(expected, sequence.composition) end def test_rna_composition sequence = Sequence::NA.new("agguuucccc") expected = {'a'=>1,'g'=>2,'u'=>3,'c'=>4} expected.default = 0 assert_equal(expected, sequence.composition) end # I don't get splicing # Test Sequence::NA#complement def test_dna_sequence_complement assert_equal('ttttgcatgcatgcatgcat', @na.complement) end def test_rna_sequence_complement assert_equal('uuuugcaugcaugcaugcau', @rna.complement) end def test_ambiguous_dna_sequence_complement assert_equal("nwsbvhdkmyrcgta", Sequence::NA.new('tacgyrkmhdbvswn').complement) end def test_ambiguous_rna_sequence_complement assert_equal("nwsbvhdkmyrcgua", Sequence::NA.new('uacgyrkmhdbvswn').complement) end # Test Sequence::NA#translate def test_dna_sequence_translate assert_equal("MHACMQ", @na.translate) end def test_rna_sequence_translate assert_equal("MHACMQ", @rna.translate) end # Test Sequence::NA#gc_percent def test_dna_gc_percent assert_equal(40, @na.gc_percent) end def test_rna_gc_percent assert_equal(40, @rna.gc_percent) end # Test Sequence::NA#illegal_bases def test_valid_dna_sequence_illegal_bases assert_equal([], @na.illegal_bases) end def test_invalid_nucleic_acid_illegal_bases string = 'tacgyrkmhdbvswn' expected = [] string[4..-1].each_byte { |val| expected << val.chr } assert_equal(expected.sort, Sequence::NA.new(string).illegal_bases) end def test_invalid_nucleic_acid_illegal_bases_more string = ('abcdefghijklmnopqrstuvwxyz-!%#$@') expected = [] 'bdefhijklmnopqrsvwxyz-!%#$@'.each_byte { |val| expected << val.chr } assert_equal(expected.sort, Sequence::NA.new(string).illegal_bases) end # Test Sequence::NA#molecular_weight def test_dna_molecular_weight assert_in_delta(6174.3974, @na.molecular_weight, 1e-5) end def test_rna_molecular_weight assert_in_delta(6438.2774, @rna.molecular_weight, 1e-5) end # Test Sequence::NA#to_re def test_dna_to_re assert_equal(/atgc[agr][tcy][acm][tgk][atgrwkd][atcwmyh][agcmrsv][tgcyskb][gcs][atw][atgcyrwskmbdhvn]/, Sequence::NA.new('atgcrymkdhvbswn').to_re) end def test_rna_to_re assert_equal(/augc[agr][ucy][acm][ugk][augrwkd][aucwmyh][agcmrsv][ugcyskb][gcs][auw][augcyrwskmbdhvn]/, Sequence::NA.new('augcrymkdhvbswn').to_re) end # Test Sequence::NA#names def test_nucleic_acid_names # It is a Bio::NucleicAcid feature. # assert_equal(["adenine", "cytosine", "guanine", "thymine", "uracil"], Sequence::NA.new('acgtu').names) assert(Sequence::NA.new('acgtu').names) end # Test Sequence::NA#pikachu def test_dna_pikachu assert_equal("pika", Sequence::NA.new('atgc').pikachu) end def test_rna_pikachu assert_equal("pika", Sequence::NA.new('augc').pikachu) end # Test Sequence::NA#randomize def test_randomize_dna_retains_composition assert_equal(@na.composition, @na.randomize.composition) end # this test has a neglibly small chance of failure def test_two_consecutive_dna_randomizations_not_equal assert_not_equal(@na.randomize, @na.randomize) end def test_randomize_dna_can_be_chained assert_equal(@na.composition, @na.randomize.randomize.composition) end def test_randomize_dna_with_block appended = "" @na.randomize {|x| appended << x} assert_equal(@na.composition, Sequence::NA.new(appended).composition) end # Test Sequence::NA.randomize(counts) def test_NA_randomize_with_counts counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40} counts.default = 0 assert_equal(counts, Sequence::NA.randomize(counts).composition) end def test_NA_randomize_with_counts_and_block appended = "" counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40} counts.default = 0 Sequence::NA.randomize(counts) {|x| appended << x} assert_equal(counts, Sequence::NA.new(appended).composition) end # Test Sequence::AA#codes def test_amino_acid_codes assert_equal(["Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", "Trp", "Tyr", "Sec"], @aa.codes) end # Test Sequence::AA#names def test_amino_acid_names assert_equal(["alanine", "cysteine", "aspartic acid", "glutamic acid", "phenylalanine", "glycine", "histidine", "isoleucine", "lysine", "leucine", "methionine", "asparagine", "proline", "glutamine", "arginine", "serine", "threonine", "valine", "tryptophan", "tyrosine", "selenocysteine"], @aa.names) end # Test Sequence::AA#molecular_weight def test_amino_acid_molecular_weight assert_in_delta(2395.725, @aa.subseq(1,20).molecular_weight, 0.0001) end #Test Sequence::AA#randomize def test_amino_acid_randomize_has_same_composition aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA' s = Sequence::AA.new(aaseq) assert_equal(s.composition, s.randomize.composition) end # this test has a neglibly small chance of failure def test_consecutive_amino_acid_randomizes_are_not_equal aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA' s = Sequence::AA.new(aaseq) assert_not_equal(s.randomize, s.randomize) end def test_amino_acid_randomize_can_be_chained aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA' s = Sequence::AA.new(aaseq) assert_equal(s.randomize.composition, s.randomize.randomize.composition) end end class TestNATranslate < Test::Unit::TestCase def setup @obj = Bio::Sequence::NA.new("AAA") end def test_translate assert_equal("K", @obj.translate) end def test_translate_1 assert_equal("K", @obj.translate(1)) end def test_translate_2 assert_equal("", @obj.translate(2)) end def test_translate_3 assert_equal("", @obj.translate(3)) end def test_translate_4 assert_equal("F", @obj.translate(4)) end def test_translate_5 assert_equal("", @obj.translate(5)) end def test_translate_6 assert_equal("", @obj.translate(6)) end end end bio-1.4.3.0001/test/unit/bio/shell/0000755000004100000410000000000012200110570016501 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/shell/plugin/0000755000004100000410000000000012200110570017777 5ustar www-datawww-databio-1.4.3.0001/test/unit/bio/shell/plugin/test_seq.rb0000644000004100000410000001061612200110570022157 0ustar www-datawww-data# # test/unit/bio/shell/plugin/test_seq.rb - Unit test for Bio::Shell plugin for biological sequence manipulations # # Copyright:: Copyright (C) 2005 # Mitsuteru Nakao # License:: The Ruby License # # $Id:$ # # loading helper routine for testing bioruby require 'pathname' load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'bioruby_test_helper.rb')).cleanpath.to_s # libraries needed for the tests require 'test/unit' require 'bio/shell' require 'bio/shell/plugin/seq' module Bio class TestShellPluginSeq < Test::Unit::TestCase include Bio::Shell Bio::Shell.config = {:color => false} def test_htmlseq str = 'ATGC' html =< a t g c
    END assert_equal(html, htmlseq(str)) end def test_naseq str = 'ACGT' assert_equal(Bio::Sequence, getseq(str).class) assert_equal(Bio::Sequence::NA, getseq(str).moltype) assert_equal('acgt', getseq(str).seq) end def test_aaseq str = 'WD' assert_equal(Bio::Sequence, getseq(str).class) assert_equal(Bio::Sequence::AA, getseq(str).moltype) assert_equal('WD', getseq(str).seq) end def test_na_seqstat naseq = 'atgcatgcatgc' output =<3' sequence : atgcatgcatgc 3'->5' sequence : gcatgcatgcat Translation 1 : MHAC Translation 2 : CMH Translation 3 : ACM Translation -1 : ACMH Translation -2 : HAC Translation -3 : MHA Length : 12 bp GC percent : 50 % Composition : a - 3 ( 25.00 %) c - 3 ( 25.00 %) g - 3 ( 25.00 %) t - 3 ( 25.00 %) Codon usage : *---------------------------------------------* | | 2nd | | | 1st |-------------------------------| 3rd | | | U | C | A | G | | |-------+-------+-------+-------+-------+-----| | U U |F 0.0%|S 0.0%|Y 0.0%|C 0.0%| u | | U U |F 0.0%|S 0.0%|Y 0.0%|C 25.0%| c | | U U |L 0.0%|S 0.0%|* 0.0%|* 0.0%| a | | UUU |L 0.0%|S 0.0%|* 0.0%|W 0.0%| g | |-------+-------+-------+-------+-------+-----| | CCCC |L 0.0%|P 0.0%|H 25.0%|R 0.0%| u | | C |L 0.0%|P 0.0%|H 0.0%|R 0.0%| c | | C |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| a | | CCCC |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| g | |-------+-------+-------+-------+-------+-----| | A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| u | | A A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| c | | AAAAA |I 0.0%|T 0.0%|K 0.0%|R 0.0%| a | | A A |M 25.0%|T 0.0%|K 0.0%|R 0.0%| g | |-------+-------+-------+-------+-------+-----| | GGGG |V 0.0%|A 0.0%|D 0.0%|G 0.0%| u | | G |V 0.0%|A 0.0%|D 0.0%|G 0.0%| c | | G GGG |V 0.0%|A 25.0%|E 0.0%|G 0.0%| a | | GG G |V 0.0%|A 0.0%|E 0.0%|G 0.0%| g | *---------------------------------------------* Molecular weight : 3701.61444 Protein weight : 460.565 // END $str = '' alias puts_orig puts def puts(*args) args.each do |obj| $str << obj.to_s end end seqstat(naseq) undef puts alias puts puts_orig assert_equal(output, $str) end def test_aa_seqstat aaseq = 'WD' output =<C sequence : WD Length : 2 aa Composition : D Asp - 1 ( 50.00 %) aspartic acid W Trp - 1 ( 50.00 %) tryptophan Protein weight : 319.315 // END $str = '' alias puts_orig puts def puts(*args) args.each do |obj| $str << obj.to_s end end seqstat(aaseq) undef puts alias puts puts_orig assert_equal(output, $str) end def test_doublehelix seq = 'ACGTACGTACGTACGT' output = < Date: Fri Aug 26 15:03:14 2011 +0900 BioRuby 1.4.2 is released. ChangeLog | 954 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 954 insertions(+), 0 deletions(-) commit 3acc1e098839cacbe85b5c23367ab14e0c4fe3ea Author: Naohisa Goto Date: Fri Aug 26 15:01:49 2011 +0900 Preparation for bioruby-1.4.2 release. bioruby.gemspec | 2 +- lib/bio/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit bf69125192fa01ae3495e094e7ef1b5e895954ad Author: Naohisa Goto Date: Fri Aug 26 14:42:02 2011 +0900 updated bioruby.gemspec bioruby.gemspec | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) commit e0a3ead917812199c6a0e495f3afa6a636bbf0c5 Author: Naohisa Goto Date: Fri Aug 26 14:39:54 2011 +0900 Added PLUGIN section to README.rdoc, and some changes made. README.rdoc | 20 +++++++++++++++++--- 1 files changed, 17 insertions(+), 3 deletions(-) commit 1da0a1ce6eddcef8f8fc811b0a2cf8d58f880642 Author: Naohisa Goto Date: Fri Aug 26 14:38:13 2011 +0900 updated doc/Tutorial.rd.html doc/Tutorial.rd.html | 41 ++++++++++++++++++++--------------------- 1 files changed, 20 insertions(+), 21 deletions(-) commit a8b90367b830e58b397536be3dada10cdde97aab Author: Naohisa Goto Date: Fri Aug 26 14:36:24 2011 +0900 Removed sections contain obsolete (404 Not Found) URL in Tutorial.rd. doc/Tutorial.rd | 12 ------------ 1 files changed, 0 insertions(+), 12 deletions(-) commit e17546cb90a012cd1f51674ceb4c8da5dd516bdf Author: Michael O'Keefe Date: Tue Aug 23 20:15:44 2011 -0400 Updated tutorial * Updated tutorial (original commit id: 7b9108657961cf2354278e04971c32059b3ed4e2 and some preceding commits) doc/Tutorial.rd | 55 ++++++++++++++++++++++++++++++++----------------------- 1 files changed, 32 insertions(+), 23 deletions(-) commit de8a394129c752a0b9a5975a73c5eb582d9681d3 Author: Naohisa Goto Date: Fri Aug 26 13:24:27 2011 +0900 fix typo and change order of lines RELEASE_NOTES.rdoc | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit 1cf2a11199655e4c9f5fc49c5a588b99c18ab7ca Author: Naohisa Goto Date: Fri Aug 26 13:16:11 2011 +0900 RELEASE_NOTE.rdoc modified to reflect recent changes RELEASE_NOTES.rdoc | 14 ++++++++++++++ 1 files changed, 14 insertions(+), 0 deletions(-) commit b44871a5866eeb2d379f080b39b09693c9e9e3cc Author: Naohisa Goto Date: Fri Aug 26 13:15:14 2011 +0900 In BioRuby Shell, getent() fails when EMBOSS seqret does not found. lib/bio/shell/plugin/entry.rb | 10 +++++++--- 1 files changed, 7 insertions(+), 3 deletions(-) commit 179e7506b008a220d5dd42ce1a6c7ce589c3fcda Author: Naohisa Goto Date: Fri Aug 26 12:26:52 2011 +0900 New methods Bio::NCBI::REST::EFetch.nucleotide and protein * New methods Bio::NCBI::REST::EFetch.nucleotide and protein, to get data from "nucleotide" and "protein" database respectively. Because NCBI changed not to accept "gb" format for the database "sequence", the two new methods are added for convenience. * In BioRuby Shell, efetch method uses the above new methods. lib/bio/io/ncbirest.rb | 122 +++++++++++++++++++++++++++++++++++++- lib/bio/shell/plugin/ncbirest.rb | 6 ++- 2 files changed, 126 insertions(+), 2 deletions(-) commit 99b31379bb41c7cad34c1e7dc00f802da37de1cd Author: Naohisa Goto Date: Thu Aug 25 19:03:43 2011 +0900 New method Bio::Fastq#to_s * New method Bio::Fastq#to_s. Thanks to Tomoaki NISHIYAMA who wrote a patch. (https://github.com/bioruby/bioruby/pull/37) lib/bio/db/fastq.rb | 14 ++++++++++++++ test/unit/bio/db/test_fastq.rb | 14 ++++++++++++++ 2 files changed, 28 insertions(+), 0 deletions(-) commit 8ab772b37850c3874b55cf37d091046394cda5bd Author: Naohisa Goto Date: Thu Aug 25 15:23:00 2011 +0900 RELEASE_NOTES.rdoc changed to reflect recent changes. RELEASE_NOTES.rdoc | 16 ++++++++++++++++ 1 files changed, 16 insertions(+), 0 deletions(-) commit 8db6abdcc81db6a58bdd99e7f8d410b1a74496b1 Author: Naohisa Goto Date: Thu Aug 25 14:28:42 2011 +0900 A test connecting to DDBJ BLAST web service is enabled. test/functional/bio/appl/test_blast.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 121ad93c0c1f018ee389972ac5e5e8cc395f00d1 Author: Naohisa Goto Date: Thu Aug 25 14:15:23 2011 +0900 Bio::DDBJ::REST::*. new classes for DDBJ REST web services. * Bio::DDBJ::REST::*: new classes for DDBJ REST web services (WABI). Currently not all services are covered. (lib/bio/io/ddbjrest.rb) * autoload of the above (lib/bio/db/genbank/ddbj.rb, lib/bio.rb) * Tests for the above (but still incomplete) (test/functional/bio/io/test_ddbjrest.rb) * Remote BLAST using DDBJ server now uses REST interface instead of SOAP, for Ruby 1.9.x support. (lib/bio/appl/blast/ddbj.rb) lib/bio.rb | 1 + lib/bio/appl/blast/ddbj.rb | 33 +--- lib/bio/db/genbank/ddbj.rb | 3 +- lib/bio/io/ddbjrest.rb | 344 +++++++++++++++++++++++++++++++ test/functional/bio/io/test_ddbjrest.rb | 47 +++++ 5 files changed, 399 insertions(+), 29 deletions(-) create mode 100644 lib/bio/io/ddbjrest.rb create mode 100644 test/functional/bio/io/test_ddbjrest.rb commit 7e8ba7c1388204daa5245d2128d01f6f40298185 Author: Naohisa Goto Date: Thu Aug 18 00:08:51 2011 +0900 In Fastq formatter, default width value changed to nil * In Bio::Sequence#output(:fastq) (Fastq output formatter), default width value is changed from 70 to nil, which means "without wrapping". close [Feature #3191] (https://redmine.open-bio.org/issues/3191) RELEASE_NOTES.rdoc | 8 ++++++-- lib/bio/db/fastq/format_fastq.rb | 4 ++-- test/unit/bio/db/test_fastq.rb | 12 ++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) commit 0fb65211519febff18413c589fe7af753ee2e61d Author: Naohisa Goto Date: Wed Aug 17 22:02:03 2011 +0900 Bug fix: Bio::SPTR follow-up of UniProtKB format changes * Bug fix: Bio::SPTR follow-up of UniProtKB format changes. * Tests are added about the fix. * Bug fix: Bio::SPTR#cc_web_resource should be private. * Incompatible changes in Bio::SPTR#cc("WEB RESOURCE") is documented in RELEASE_NOTES.rdoc. * KNOWN_ISSUES.rdoc: description about incompleteness of the fix. * Thanks to Nicholas Letourneau who reports the issue. (https://github.com/bioruby/bioruby/pull/36) KNOWN_ISSUES.rdoc | 5 + RELEASE_NOTES.rdoc | 20 ++- lib/bio/db/embl/sptr.rb | 214 +++++++++++++++++++++--- test/unit/bio/db/embl/test_sptr.rb | 12 +- test/unit/bio/db/embl/test_uniprot_new_part.rb | 208 +++++++++++++++++++++++ 5 files changed, 430 insertions(+), 29 deletions(-) create mode 100644 test/unit/bio/db/embl/test_uniprot_new_part.rb commit 0d066ab6b8fc19f1cf6e66e07c2065775739cccd Author: Naohisa Goto Date: Sat Aug 13 00:58:51 2011 +0900 preparation for release: alpha test version 1.4.2-alpha1 bioruby.gemspec | 23 +++++++++++++++++++++-- lib/bio/version.rb | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) commit 55ece17775f5d24cf62f93d54ded5dc6eed53584 Author: Naohisa Goto Date: Fri Aug 12 21:57:25 2011 +0900 Test bug fix: use sort command in PATH * Test bug fix: FuncTestCommandQuery: use sort command in PATH. Thanks to Tomoaki Nishiyama who reports the issue. (https://github.com/bioruby/bioruby/pull/13) test/functional/bio/test_command.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit 2f464aae016387cd50031f9d9664e78e220e2d01 Author: Naohisa Goto Date: Fri Aug 12 20:37:18 2011 +0900 RELEASE_NOTES.rdoc is updated following recent changes. RELEASE_NOTES.rdoc | 21 ++++++++++++++------- 1 files changed, 14 insertions(+), 7 deletions(-) commit d1a193684afdfd4c632ef75a978d4f3680d1bdf3 Author: Naohisa Goto Date: Fri Aug 12 20:30:53 2011 +0900 README.rdoc: changed required Ruby version etc. * README.rdoc: now Ruby 1.8.6 or later is required. * README.rdoc: removed old obsolete descriptions. * README.rdoc: modified about RubyGems. * KNOWN_ISSUES.rdoc: moved descriptions about older RubyGems and CVS from README.rdoc. * KNOWN_ISSUES.rdoc: modified about end-of-life Ruby versions. KNOWN_ISSUES.rdoc | 40 ++++++++++++++++++++++++++++++---- README.rdoc | 61 ++++++++++++++++++++-------------------------------- 2 files changed, 59 insertions(+), 42 deletions(-) commit b5cbdc6ab7e81aae4db9aeb708fac57ffbce5636 Author: Naohisa Goto Date: Sat Jul 16 00:12:17 2011 +0900 Added topics for the release notes RELEASE_NOTES.rdoc | 39 ++++++++++++++++++++++++++++++++++++++- 1 files changed, 38 insertions(+), 1 deletions(-) commit f062b5f37a6d8ad35b5b10c942fd61e1a4d37e08 Author: Naohisa Goto Date: Sat Jul 2 01:05:42 2011 +0900 Speedup of Bio::RestrictionEnzyme::Analysis.cut. * Speedup of Bio::RestrictionEnzyme::Analysis.cut. The new code is 50 to 80 fold faster than the previous code when cutting 1Mbp sequence running on Ruby 1.9.2p180. * Thanks to Tomoaki NISHIYAMA who wrote the first version of the patch. Thanks to ray1729 (on GitHub) who reports the issue. (https://github.com/bioruby/bioruby/issues/10) lib/bio/util/restriction_enzyme.rb | 3 + .../restriction_enzyme/range/sequence_range.rb | 14 ++-- .../range/sequence_range/calculated_cuts.rb | 75 +++++++++++++++----- .../range/sequence_range/fragment.rb | 4 +- 4 files changed, 69 insertions(+), 27 deletions(-) commit 735379421d9d6b7ceb06b91dcfcca6d5ff841236 Author: Naohisa Goto Date: Sat Jul 2 00:59:58 2011 +0900 New classes (for internal use only) for restriction enzyme classes * New classes Bio::RestrictionEnzyme::SortedNumArray and Bio::RestrictionEnzyme::DenseIntArray. Both of them are for Bio::RestrictionEnzyme internal use only. They will be used for the speedup of restriction enzyme analysis. lib/bio/util/restriction_enzyme/dense_int_array.rb | 195 ++++++++++++++ .../util/restriction_enzyme/sorted_num_array.rb | 219 +++++++++++++++ .../restriction_enzyme/test_dense_int_array.rb | 201 ++++++++++++++ .../restriction_enzyme/test_sorted_num_array.rb | 281 ++++++++++++++++++++ 4 files changed, 896 insertions(+), 0 deletions(-) create mode 100644 lib/bio/util/restriction_enzyme/dense_int_array.rb create mode 100644 lib/bio/util/restriction_enzyme/sorted_num_array.rb create mode 100644 test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb create mode 100644 test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb commit 6cbb0c230d1a0bf3125c3b0fdb9ec3333d9564f8 Author: Naohisa Goto Date: Thu Jun 30 20:47:26 2011 +0900 A sample benchmark script for Bio::RestrictionEnzyme::Analysis.cut sample/test_restriction_enzyme_long.rb | 4403 ++++++++++++++++++++++++++++++++ 1 files changed, 4403 insertions(+), 0 deletions(-) create mode 100644 sample/test_restriction_enzyme_long.rb commit 413442bd7424f837c73d8170ced8e01a01f87a59 Author: Naohisa Goto Date: Tue May 24 23:26:41 2011 +0900 Added a test for Bio::FastaFormat#entry_overrun etc. * Added a test for Bio::FastaFormat#entry_overrun. * Removed a void test class. test/unit/bio/db/test_fasta.rb | 24 ++++++++++++------------ 1 files changed, 12 insertions(+), 12 deletions(-) commit b74020ff9b5c9fc8531c584898a329987008870e Author: Naohisa Goto Date: Tue May 24 22:21:17 2011 +0900 Bug fix: Bio::FastaFormat#query passes nil to the given factory * Bug fix: Bio::FastaFormat#query passes nil to the given factory object. Thanks to Philipp Comans who reports the bug. (https://github.com/bioruby/bioruby/issues/35) * Test method for Bio::FastaFormat#query is added. lib/bio/db/fasta.rb | 2 +- test/unit/bio/db/test_fasta.rb | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletions(-) commit 80e49373e0e9013442680ba33499be80c58471db Author: Naohisa Goto Date: Tue May 17 22:33:56 2011 +0900 Changed database name in the example. * Changed database name in the example. Thanks to Philipp Comans who reports the issue. lib/bio/appl/blast.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 7427d1f1355a6c190c6bf8522978e462dea64134 Author: Naohisa Goto Date: Thu May 12 22:15:37 2011 +0900 Bug fix: changed GenomeNet remote BLAST URL. * Bug fix: changed GenomeNet remote BLAST host name and path. Thanks to Philipp Comans who reports the bug. ( https://github.com/bioruby/bioruby/issues/34 ) lib/bio/appl/blast/genomenet.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit c1c231b0a17c06ec042534245ed903e0256a59ed Author: Naohisa Goto Date: Tue May 10 20:57:17 2011 +0900 updated doc/Tutorial.rd.html doc/Tutorial.rd.html | 34 ++++++++++++++++++---------------- 1 files changed, 18 insertions(+), 16 deletions(-) commit 5261c926cae8dac890d7d0380e84f2eb88912417 Author: Pjotr Prins Date: Thu May 5 12:07:54 2011 +0200 Tutorial: Fixed URL and the intro doc/Tutorial.rd | 34 ++++++++++++++++++++-------------- 1 files changed, 20 insertions(+), 14 deletions(-) commit 71de394053376f4759d705c52e6f16eca3da9d62 Author: Pjotr Prins Date: Wed Mar 9 10:26:53 2011 +0100 Tutorial: Added a commnet for rubydoctest, changed Ruby version * Added a comment for rubydoctest * Changed example Ruby version representation * This is part of commit ba5b9c2d29223860252451110a99d4ff0250395d and modified to merge with the current HEAD. doc/Tutorial.rd | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 1d27153065b8e8595a470b2201961b0a39bf8ca1 Author: Naohisa Goto Date: Thu Apr 28 23:58:57 2011 +0900 updated doc/Tutorial.rd.html doc/Tutorial.rd.html | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit ae9beff3bc43db3724a292b10a214583d9fbc111 Author: Michael O'Keefe Date: Wed Apr 6 11:46:54 2011 -0400 Updated through the section on Homology searching with BLAST doc/Tutorial.rd | 64 ++++++++++++++++++++-------------------------- doc/Tutorial.rd.html | 68 ++++++++++++++++++++++---------------------------- 2 files changed, 58 insertions(+), 74 deletions(-) commit 971da799b16628a927abd7dd6c218994506f8fd8 Author: Michael O'Keefe Date: Thu Mar 24 18:29:20 2011 -0400 Updated the html file generated from the RDoc doc/Tutorial.rd.html | 224 ++++++++++++++++++++++++++------------------------ 1 files changed, 117 insertions(+), 107 deletions(-) commit c6afd7eeed121926b56d300cb4170b5024f29eb0 Author: Michael O'Keefe Date: Thu Mar 24 18:08:17 2011 -0400 Finished updating the tutorial doc/Tutorial.rd | 16 ++++++++-------- 1 files changed, 8 insertions(+), 8 deletions(-) commit 7349ac550ec03e2c5266496297becbdb3f4e0edd Author: Michael O'Keefe Date: Thu Mar 24 15:56:37 2011 -0400 Edited tutorial up through the extra stuff section doc/Tutorial.rd | 28 ++++++++++++++-------------- 1 files changed, 14 insertions(+), 14 deletions(-) commit 32ba3b15ad00d02b12ef2b44636505e23caaf620 Author: Michael O'Keefe Date: Thu Mar 24 15:26:48 2011 -0400 Updated tutorial up through BioSQL doc/Tutorial.rd | 172 ++++++++++++++++++------------------------------------- 1 files changed, 57 insertions(+), 115 deletions(-) commit 249580edb49a13545708fdcb559104217e37f162 Author: Michael O'Keefe Date: Thu Mar 24 12:09:03 2011 -0400 Updated tutorial through the section on alignments doc/Tutorial.rd | 40 +++++++++++++++++++++++++++++++++------- 1 files changed, 33 insertions(+), 7 deletions(-) commit 54f7b54044bb245ec5953dc7426f1c434b41f24f Author: Michael O'Keefe Date: Thu Mar 24 11:51:23 2011 -0400 Updated the tutorial (mostly grammar fixes) up until GenBank doc/Tutorial.rd | 31 +++++++++++++++---------------- 1 files changed, 15 insertions(+), 16 deletions(-) commit f046a52081a8af0e9afbf65fd2673c29689be769 Author: Naohisa Goto Date: Tue Feb 8 12:58:50 2011 +0900 Added a test protein sequence data for BLAST test. test/data/fasta/EFTU_BACSU.fasta | 8 ++++++++ 1 files changed, 8 insertions(+), 0 deletions(-) create mode 100644 test/data/fasta/EFTU_BACSU.fasta commit f61a5f4bdde16fa051f43cbe3efef4570b415a6a Author: Anthony Underwood Date: Mon Jan 31 12:44:55 2011 +0000 Bug fix: GenBank sequence output should format date as 27-JAN-2011 * Bug fix: GenBank sequence output should format date as 27-JAN-2011 rather than 2011-01-27 as specified by offical GenBank specs. lib/bio/db/genbank/format_genbank.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit be144e75a059058ab000a55d7bf535597e7e2617 Author: Naohisa Goto Date: Thu Feb 3 20:28:03 2011 +0900 Added tests for remote BLAST execution via GenomeNet and DDBJ. * Added tests for remote BLAST execution via GenomeNet and DDBJ. Currently, a test for DDBJ BLAST web API is disabled because it takes relatively long time. * Tests to retrieve remote BLAST database information for GenomeNet and DDBJ servers are also added. test/functional/bio/appl/blast/test_remote.rb | 93 +++++++++++++++++++++++++ test/functional/bio/appl/test_blast.rb | 61 ++++++++++++++++ 2 files changed, 154 insertions(+), 0 deletions(-) create mode 100644 test/functional/bio/appl/blast/test_remote.rb create mode 100644 test/functional/bio/appl/test_blast.rb commit 67314f1f1a248954c030f7ffe048faf862bf07d2 Author: Naohisa Goto Date: Thu Feb 3 20:19:11 2011 +0900 Updated _parse_databases following the changes in the DDBJ server * Updated _parse_databases following the changes in the DDBJ server. Changed to use (NT) or (AA) in the tail of each description. Thanks to DDBJ to improve their web service API. lib/bio/appl/blast/ddbj.rb | 29 ++++++++++++++++++++--------- 1 files changed, 20 insertions(+), 9 deletions(-) commit d6aad2f4cc53c1227c86b6b573644cca15c9ed82 Author: Naohisa Goto Date: Wed Feb 2 00:02:32 2011 +0900 Release notes for the next release is added. RELEASE_NOTES.rdoc | 38 ++++++++++++++++++++++++++++++++++++++ 1 files changed, 38 insertions(+), 0 deletions(-) create mode 100644 RELEASE_NOTES.rdoc commit b4a30cc8ac9472b9e1c2a298afc624d0229c64c9 Author: Naohisa Goto Date: Tue Feb 1 23:33:18 2011 +0900 Bug fix: Execution failure due to the changes of DDBJ BLAST server lib/bio/appl/blast/ddbj.rb | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) commit d30cb5975febd8b526088612c4fb9689a6cc46ba Author: Naohisa Goto Date: Tue Feb 1 23:01:34 2011 +0900 Support for database "mine-aa" and "mine-nt" with KEGG organism codes * Added support for database "mine-aa" and "mine-nt" combined with KEGG organism codes. When database name starts with mine-aa or mine-nt, space-separated list of KEGG organism codes can be given. For example, "mine-aa eco bsu hsa". lib/bio/appl/blast/genomenet.rb | 11 +++++++++++ 1 files changed, 11 insertions(+), 0 deletions(-) commit abcba798ccf57894dcd570a6578ef78db30a3e25 Author: Naohisa Goto Date: Tue Feb 1 22:20:02 2011 +0900 RELEASE_NOTES.rdoc is renamed to doc/RELEASE_NOTES-1.4.1.rdoc RELEASE_NOTES.rdoc | 104 ------------------------------------------ doc/RELEASE_NOTES-1.4.1.rdoc | 104 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 104 deletions(-) delete mode 100644 RELEASE_NOTES.rdoc create mode 100644 doc/RELEASE_NOTES-1.4.1.rdoc commit 8719cf4e06fc8a8cd0564aeb0b95372a7a0bcefb Author: Naohisa Goto Date: Tue Feb 1 22:07:32 2011 +0900 Bug: Options "-v" and "-b" should be used for the limit of hits. * Bug: Options "-v" and "-b" should be used for the limit of hits, and "-V" and "-B" should not be used for the purpose. lib/bio/appl/blast/genomenet.rb | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) commit 974b640badae9837fe9fc173d690b27c9b045454 Author: Naohisa Goto Date: Tue Feb 1 20:34:50 2011 +0900 Bug fix: Workaround for a change in the GenomeNet BLAST site. * Bug fix: Workaround for a change in the GenomeNet BLAST site. Thanks to the anonymous reporter. The patch was originally made by Toshiaki Katyama. lib/bio/appl/blast/genomenet.rb | 7 +++---- 1 files changed, 3 insertions(+), 4 deletions(-) commit 001d3e3570c77185cece4aed1be5be2ed6f94f7e Author: Naohisa Goto Date: Thu Jan 6 23:39:19 2011 +0900 Added tests to check the previous Bio::Reference#endnote fix. test/unit/bio/test_reference.rb | 30 ++++++++++++++++++++++++++++++ 1 files changed, 30 insertions(+), 0 deletions(-) commit e1cd766abe24dbcc08a42103127c75ad0ab929aa Author: Naohisa Goto Date: Thu Jan 6 23:07:35 2011 +0900 Bio::Reference#pubmed_url is updated to follow recent NCBI changes. lib/bio/reference.rb | 5 ++--- test/unit/bio/test_reference.rb | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) commit 48024313a7568a38f4291618708541ae1dac312c Author: Naohisa Goto Date: Thu Jan 6 22:56:37 2011 +0900 Bug fix: Bio::Reference#endnote fails when url is not set * Bug fix: Bio::Reference#endnote fails when url is not set. Thanks to Warren Kibbe who reports the bug. (https://github.com/bioruby/bioruby/issues#issue/15) lib/bio/reference.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 577278a95340abfa32d3e67415d3a10bc74b82c0 Author: Pjotr Prins Date: Fri Dec 17 12:16:31 2010 +0100 Bug fix: In Bio::MEDLINE#reference, doi field should be filled. * Bug fix: In Bio::MEDLINE#reference, doi field should be filled. (https://github.com/bioruby/bioruby/issues#issue/29) lib/bio/db/medline.rb | 1 + test/unit/bio/db/test_medline.rb | 1 + 2 files changed, 2 insertions(+), 0 deletions(-) commit daa20c85681576d3bfbdc8f87580a4b6227b122c Author: Naohisa Goto Date: Thu Jan 6 20:25:03 2011 +0900 Bug fix: Bio::Newick#reparse failure * Bug fix: Bio::Newick#reparse failure. Thanks to jdudley who reports the bug. (https://github.com/bioruby/bioruby/issues#issue/28) * Tests are added to confirm the bug fix. lib/bio/db/newick.rb | 4 +++- test/unit/bio/db/test_newick.rb | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletions(-) commit 16117aefdf57ac3ae16b5568f462f7b919ef005f Author: Naohisa Goto Date: Thu Jan 6 20:14:18 2011 +0900 Use setup for the preparation of adding more test methods. test/unit/bio/db/test_newick.rb | 14 ++++++++++---- 1 files changed, 10 insertions(+), 4 deletions(-) commit e5dc5896a5c6249e2a6cb03d63a3c2ade36b67e7 Author: Naohisa Goto Date: Fri Nov 19 21:07:13 2010 +0900 Ruby 1.9 support: suppressed warning "mismatched indentations" test/unit/bio/db/pdb/test_pdb.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 1bce41c7ebed46ac6cf433b047fe6a4c3a538089 Author: Naohisa Goto Date: Fri Nov 19 21:05:06 2010 +0900 Ruby 1.9 support: Suppressed warning "shadowing outer local variable" lib/bio/db/pdb/residue.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 0f31727769833ccf9d6891ae192da1bc180223e0 Author: Naohisa Goto Date: Fri Nov 19 21:04:07 2010 +0900 Ruby 1.9 support: Suppressed warning "shadowing outer local variable" lib/bio/location.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 70c1135b171fcf47dd9dc1bc396d15d1c3acfa62 Author: Naohisa Goto Date: Fri Nov 19 21:02:57 2010 +0900 Ruby 1.9 support: Suppressed warning "shadowing outer local variable" lib/bio/db/pdb/pdb.rb | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-) commit a77e4ab78211a85aa052ca6645a2051a4f3b76d8 Author: Naohisa Goto Date: Fri Nov 19 20:42:22 2010 +0900 Ruby 1.9 support: use Array#join instead of Array#to_s lib/bio/db/pdb/pdb.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 734c2e54613e3ed5efd95e1212feab8f014d5f19 Author: Naohisa Goto Date: Fri Nov 19 20:38:11 2010 +0900 Changed to use assert_instance_of * Changed to use assert_instance_of(klass, obj) instead of assert_equal(klass, obj.class). test/unit/bio/db/pdb/test_pdb.rb | 16 ++++++++-------- 1 files changed, 8 insertions(+), 8 deletions(-) commit 490286018c8ce314da441c646ea9c5fb3f765c95 Author: Naohisa Goto Date: Fri Nov 19 20:24:36 2010 +0900 Float::EPSILON was too small for the delta tolerance. test/unit/bio/db/pdb/test_pdb.rb | 20 ++++++++++---------- 1 files changed, 10 insertions(+), 10 deletions(-) commit e92d225cadabe63fe23c7c32a4d1d50a371366cc Author: Naohisa Goto Date: Fri Nov 19 20:16:30 2010 +0900 Ruby 1.9 support: use assert_in_delta test/unit/bio/db/pdb/test_pdb.rb | 24 +++++++++++++++--------- 1 files changed, 15 insertions(+), 9 deletions(-) commit 41452971a132ef55de3486022962fa2c333b4c85 Author: Naohisa Goto Date: Fri Nov 19 13:19:39 2010 +0900 Fixed Object#id problem and suppressed warning messages. * Changed not to call nil.id (==4) invoked from chain.id. * Suppressed warning message about useless use of a variable. * Suppressed waring about conflict of IDs when testing addResidue, addLigand and addChain methods. test/unit/bio/db/pdb/test_pdb.rb | 119 +++++++++++++++++++------------------- 1 files changed, 59 insertions(+), 60 deletions(-) commit b4af5826f77002933de9d3c2ddfcc5a7cb5629db Author: Naohisa Goto Date: Wed Nov 17 22:26:26 2010 +0900 Adjusted copyright line test/unit/bio/db/pdb/test_pdb.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 06fd989072b2287a3accbb60684b8a029bfc0ac3 Author: Naohisa Goto Date: Wed Nov 17 21:54:05 2010 +0900 A module name is changed to avoid potential name conflict. * A module name is changed to avoid potential name conflict. * Removed a Shift_JIS character (Zenkaku space) in a comment line. test/unit/bio/db/pdb/test_pdb.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 09007f93abe8c9c5e7561f082b55ca307a7d4a1e Author: Kazuhiro Hayashi Date: Thu Jul 15 21:06:28 2010 +0900 Added more unit tests for Bio::PDB * Added more unit tests for Bio::PDB. * This is part of combinations of the 13 commits: * 555f7b49a43e7c35c82cd48b199af96ca93d4179 * 2b5c87a9ada248597e0517e22191bb4c88be2a42 * a16e24fa35debdcacd11cf36fdf0e60fe82b3684 * e3607c0f7154a4156fd53ed17470aa3628cd2586 * 4e74c9107f09c5eb3fc7fc9ec38d9d773fe89367 * 605fb0a222f70eeaa1e576d31af484a9a6a2ac27 * 2c8b2b5496fee04b886bfcbd11fb99948816cc28 * 202cf2b1b57fbcac215aa135cf6343af6a49d2ef * f13c070c763c9035451c3639e6e29c9a156947cd * 843378e608bd1ef27a713d9be2d50f0f56915b0b * a9078b8a586b66d8026af612d9a5999c6c77de33 * f0174a8ca3ee136271c51047fce12597d3fbb58c * 6675fd930718e41ad009f469b8167f81c9b2ad52 test/unit/bio/db/pdb/test_pdb.rb | 3281 +++++++++++++++++++++++++++++++++++++- 1 files changed, 3276 insertions(+), 5 deletions(-) commit 2f6a1d29b14d89ac39b408582c9865ad06560ae1 Author: Naohisa Goto Date: Sat Nov 6 00:41:21 2010 +0900 Adjusted test data file path, required files and header descriptions. test/unit/bio/db/test_litdb.rb | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) commit d3394e69c98b3be63c8287af84dc530830fb977a Author: Kazuhiro Hayashi Date: Fri Jun 18 17:11:25 2010 +0900 added unit test for Bio::LITDB with a sample file test/data/litdb/1717226.litdb | 13 +++++ test/unit/bio/db/test_litdb.rb | 95 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 0 deletions(-) create mode 100644 test/data/litdb/1717226.litdb create mode 100644 test/unit/bio/db/test_litdb.rb commit cef1d2c824f138fe268d20eaa9ffd85223c85ef9 Author: Naohisa Goto Date: Fri Nov 5 23:54:31 2010 +0900 Adjusted test data file path, required files and header descriptions. test/unit/bio/db/test_nbrf.rb | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) commit e892e7f9b9b4daadeff44c9e479e6f51f02e383e Author: Kazuhiro Hayashi Date: Fri Jun 25 13:20:46 2010 +0900 Added unit tests for Bio::NBRF with test data. * Added unit tests for Bio::NBRF with test data. * This is part of combinations of the two commits: * 53873a82182e072e738da20381dcb2bfd8bc9e96 (Modified the unit test for Bio::NBRF) * 4675cf85aa9c0b4de9f527f9c6bb80804fdaaaa9 (Modified Bio::TestNBRF and Bio::TestTRANSFAC.) test/data/pir/CRAB_ANAPL.pir | 6 +++ test/unit/bio/db/test_nbrf.rb | 82 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 0 deletions(-) create mode 100644 test/data/pir/CRAB_ANAPL.pir create mode 100644 test/unit/bio/db/test_nbrf.rb commit 4922d5151138312d5a09ac60a06419c23978ba3c Author: Naohisa Goto Date: Fri Nov 5 23:07:13 2010 +0900 Mock class for testing is moved under the test class * Mock class for testing is moved under the test class, to avoid potential name conflicts. test/unit/bio/db/genbank/test_common.rb | 25 ++++++++++++------------- 1 files changed, 12 insertions(+), 13 deletions(-) commit 8f630700bc6dd8c183d08291c66c665394873586 Author: Naohisa Goto Date: Fri Nov 5 23:01:32 2010 +0900 Adjusted header descriptions. test/unit/bio/db/genbank/test_common.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit ab3d6384ca721fb6004efb5988461eecefad4d6b Author: Naohisa Goto Date: Fri Nov 5 22:58:21 2010 +0900 Adjusted test data file path and header descriptions. test/unit/bio/db/genbank/test_genpept.rb | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) commit 1d24aecfac9dbc190cdc3eef0956451cc88cfe4f Author: Naohisa Goto Date: Fri Nov 5 22:54:19 2010 +0900 Adjusted test data file path, required files and header descriptions. test/unit/bio/db/genbank/test_genbank.rb | 12 ++++++++---- 1 files changed, 8 insertions(+), 4 deletions(-) commit 09275d8661f3d49a7e40be59a086ba33659b2448 Author: Kazuhiro Hayashi Date: Thu Jun 17 22:08:15 2010 +0900 Added unit tests for Bio::GenPept newly. * Added unit tests for Bio::GenPept newly. * This is part of the commit 8e46ff42b627791f259033d5a20c1610e32cfa62 (Added unit tests for NBRF and GenPept newly.) test/data/genbank/CAA35997.gp | 48 ++++++++++++++++++ test/unit/bio/db/genbank/test_genpept.rb | 81 ++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 0 deletions(-) create mode 100644 test/data/genbank/CAA35997.gp create mode 100644 test/unit/bio/db/genbank/test_genpept.rb commit 2cde22cb358a2b7ec8197866fe35a0b46ebf9b00 Author: Kazuhiro Hayashi Date: Thu Jun 24 18:52:37 2010 +0900 Added unit tests for Bio::NCBIDB::Common * Added unit tests for Bio::NCBIDB::Common. * This is part of combination of the 4 commits: * 7da8d557e8ee53da9d93c6fadfd0d8f493977c81 (added test/unit/bio/db/genbank/test_common.rb newly) * 2b5c87a9ada248597e0517e22191bb4c88be2a42 (Modified a few lines of Bio::NCBIDB::TestCommon, Bio::TestPDBRecord and Bio::TestPDB) * 10c043535dd7bf5b9682b4060183f494742c53df (Modified unit test for Bio::GenBank::Common) * 0af08fb988e08948a54e33273861b5460b7f6b2d (Modified the unit test for Bio::GenBank) test/unit/bio/db/genbank/test_common.rb | 275 +++++++++++++++++++++++++++++++ 1 files changed, 275 insertions(+), 0 deletions(-) create mode 100644 test/unit/bio/db/genbank/test_common.rb commit f775d9b7f7deda2e30d4196d4cf507b59936a654 Author: Kazuhiro Hayashi Date: Sat Jun 26 17:54:32 2010 +0900 Added unit tests for Bio::GenBank with test data. * Added unit tests for Bio::GenBank with test data. * This is part of combination of the two commits: * 555f7b49a43e7c35c82cd48b199af96ca93d4179 (added test_genbank.rb and test_go.rb with the test files. modified test_pdb.rb) * a46f895bf378ce08143ff031ddda302f970c270a (Modified Bio::GenBank and Bio::Nexus) test/data/genbank/SCU49845.gb | 167 +++++++++++++ test/unit/bio/db/genbank/test_genbank.rb | 397 ++++++++++++++++++++++++++++++ 2 files changed, 564 insertions(+), 0 deletions(-) create mode 100644 test/data/genbank/SCU49845.gb create mode 100644 test/unit/bio/db/genbank/test_genbank.rb commit 33621c1f4c16173efd05861759f577b2c4733a53 Author: Naohisa Goto Date: Fri Oct 29 16:45:22 2010 +0900 Bio::BIORUBY_EXTRA_VERSION is changed to ".5000". bioruby.gemspec | 2 +- lib/bio/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit cfb2c744f3762689077f5bf2092f715d25e066ed Author: Naohisa Goto Date: Fri Oct 22 13:02:03 2010 +0900 BioRuby 1.4.1 is released. ChangeLog | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 55 insertions(+), 0 deletions(-) commit 92cfda14c08b270ed1beca33153125141f88510e Author: Naohisa Goto Date: Fri Oct 22 13:00:09 2010 +0900 Preparation for bioruby-1.4.1 release. bioruby.gemspec | 2 +- lib/bio/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit d7999539392bba617b041e3120b5b2d785301f24 Author: Naohisa Goto Date: Fri Oct 22 10:27:02 2010 +0900 Newly added issue is copied from KNOWN_ISSUES.rdoc to the release note. RELEASE_NOTES.rdoc | 15 +++++++++++++++ 1 files changed, 15 insertions(+), 0 deletions(-) commit a9f287658441038a4e9bb220502523de039417f9 Author: Naohisa Goto Date: Fri Oct 22 10:26:44 2010 +0900 updated description of an issue KNOWN_ISSUES.rdoc | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) commit bb946d1c97d1eb0de62c8b509bbfb02d67efffeb Author: Naohisa Goto Date: Thu Oct 21 23:17:25 2010 +0900 Added an issue about command-line string escaping on Windows with Ruby 1.9. KNOWN_ISSUES.rdoc | 8 ++++++++ 1 files changed, 8 insertions(+), 0 deletions(-) commit fe7d26516cc6b9a3cf8c16e6f8204a4d5eb5e5ae Author: Naohisa Goto Date: Thu Oct 21 20:34:32 2010 +0900 Added descriptions. RELEASE_NOTES.rdoc | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 52 insertions(+), 0 deletions(-) commit fd5da3b47ebce1df46922f20d013439faef483e9 Author: Naohisa Goto Date: Thu Oct 21 18:27:44 2010 +0900 ChangeLog is updated. ChangeLog | 1657 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1657 insertions(+), 0 deletions(-) commit fab16977d23bb3a5fdfc976eece14dfdabdcac4d Author: Naohisa Goto Date: Thu Oct 21 18:07:43 2010 +0900 preparation for release candidate 1.4.1-rc1 bioruby.gemspec | 40 ++++++++++++++++++++++++++++++++++++++-- lib/bio/version.rb | 4 ++-- 2 files changed, 40 insertions(+), 4 deletions(-) commit 119cc3bf5582735a5df574450ec685fd2f989b5d Author: Naohisa Goto Date: Thu Oct 21 18:05:13 2010 +0900 Temporarily removed for packaging new version. It will be reverted later. doc/howtos/sequence_codon.txt | 38 -------------------------------------- 1 files changed, 0 insertions(+), 38 deletions(-) delete mode 100644 doc/howtos/sequence_codon.txt commit 1b1b3752e3c98a29caf837bfc12c1ed79a04dba2 Author: Naohisa Goto Date: Thu Oct 21 16:48:43 2010 +0900 Fixed typo, reported by Tomoaki NISHIYAMA. KNOWN_ISSUES.rdoc | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 47ed7e5eaca4a261ef0fd4f76909c930e52aadd5 Merge: c002142 548cb58 Author: Naohisa Goto Date: Thu Oct 21 16:17:59 2010 +0900 Merge branch 'test-defline-by-jtprince' commit 548cb58aaad06bb9161e09f7b4ae45729898ca5e Author: Naohisa Goto Date: Thu Oct 21 16:16:28 2010 +0900 adjusted filename in header test/unit/bio/db/fasta/test_defline_misc.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 95be260708ef21be7848a5d4b7c494cc6bb3d81f Author: Naohisa Goto Date: Thu Oct 21 16:14:58 2010 +0900 Renamed to test_defline_misc.rb to resolve the file name conflict. test/unit/bio/db/fasta/test_defline.rb | 490 --------------------------- test/unit/bio/db/fasta/test_defline_misc.rb | 490 +++++++++++++++++++++++++++ 2 files changed, 490 insertions(+), 490 deletions(-) delete mode 100644 test/unit/bio/db/fasta/test_defline.rb create mode 100644 test/unit/bio/db/fasta/test_defline_misc.rb commit 1e7628e2c396330743d4904b100d62d2c2773bf0 Author: Naohisa Goto Date: Thu Oct 21 16:11:14 2010 +0900 Test bug fix: mistake in test_get method in two classes. test/unit/bio/db/fasta/test_defline.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit c479f56f14fb531e31c7e5fdd02f6c934ac468fa Author: Naohisa Goto Date: Thu Oct 21 16:06:46 2010 +0900 Test bug fix: test classes should inherit Test::Unit::TestCase. test/unit/bio/db/fasta/test_defline.rb | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) commit 0e8ea46e5a239df5c1da3c63e602376c04191ef4 Author: Naohisa Goto Date: Thu Oct 21 15:55:02 2010 +0900 Bug fix: syntax error in Ruby 1.8.7 due to a comma. test/unit/bio/db/fasta/test_defline.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 62a2c1d7c47fbef7a7e7c4f1c079f98fa74e5099 Author: John Prince Date: Tue Oct 19 11:20:16 2010 -0600 added individual unit tests for Bio::FastaDefline test/unit/bio/db/fasta/test_defline.rb | 490 ++++++++++++++++++++++++++++++++ 1 files changed, 490 insertions(+), 0 deletions(-) create mode 100644 test/unit/bio/db/fasta/test_defline.rb commit c002142cdb478b0ad08b7bd5e3331c7b643222f1 Author: Naohisa Goto Date: Thu Oct 21 15:36:58 2010 +0900 Adjusted file paths and the copyright line. test/unit/bio/db/fasta/test_defline.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit a5818c5f8ae07e4ec4bdcc2229df9a59bded63f0 Author: Kazuhiro Hayashi Date: Thu Jun 17 11:54:15 2010 +0900 Newly added unit tests for Bio::FastaDefline * Newly added unit tests for Bio::FastaDefline. * This is part of combination of the two commits: bd2452caf0768e7000d19d462465b1772e3c030b (modified test file for Bio::FastaDefline) cae1b6c00cdb9044cb0dfb4db58e6acfe9b7d246 (Added test/unit/bio/db/fasta/test_defline.rb and test/unit/bio/db/kegg/test_kgml.rb with the sample file newly.) test/unit/bio/db/fasta/test_defline.rb | 160 ++++++++++++++++++++++++++++++++ 1 files changed, 160 insertions(+), 0 deletions(-) create mode 100644 test/unit/bio/db/fasta/test_defline.rb commit 4addb906df442adf4ed20275428070b651abbf07 Author: Naohisa Goto Date: Thu Oct 21 15:08:46 2010 +0900 Added note for a dead link, updated a URL, and added a new reference. lib/bio/db/fasta/defline.rb | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) commit e636f123adf28688748cc5bbbc6e0c817358d475 Author: John Prince Date: Thu Oct 14 15:38:07 2010 -0600 included TREMBL prefix to list of NSIDs (tr|) * Included TREMBL prefix to list of NSIDs (tr|). This is a standard prefix found in UniprotKB FASTA files. lib/bio/db/fasta/defline.rb | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) commit 5277eb0b5376a0dc217dc051c49993c505956400 Author: Naohisa Goto Date: Thu Oct 21 14:16:03 2010 +0900 Bug fix: Bio::ClustalW::Report#get_sequence may fail * Bug fix: Bio::ClustalW::Report#get_sequence may fail when the second argument of Bio::ClustalW::Report.new is specified. lib/bio/appl/clustalw/report.rb | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) commit 81b9238abb643573a4051dc0f10c4f9a2cff40fa Author: Naohisa Goto Date: Thu Oct 21 14:21:52 2010 +0900 Added a test class to test the second argument of Bio::ClustalW::Report.new. test/unit/bio/appl/clustalw/test_report.rb | 19 +++++++++++++++++++ 1 files changed, 19 insertions(+), 0 deletions(-) commit 3e9b149aec91585732a34efaa960c96bcec2eef8 Author: Naohisa Goto Date: Thu Oct 21 13:22:38 2010 +0900 Ruby 1.9.2 support: defined Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands#initialize * Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands#initialize is explicitly defined, due to the behavior change of argument number check in the default initialize method in Ruby 1.9.2. .../double_stranded/aligned_strands.rb | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) commit cfe31c02d4bd0d97e588d25dc30188da6be81e85 Author: Naohisa Goto Date: Thu Oct 21 11:49:08 2010 +0900 Ruby 1.9.2 support: assert_in_delta for a float value. test/unit/bio/db/test_aaindex.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 3f5d2ccb9ac8bc44febc88d441f47aeddf7f12ff Author: Naohisa Goto Date: Thu Oct 21 11:41:38 2010 +0900 Ruby 1.9.2 support: using assert_in_delta for float values. * Ruby 1.9.2 support: using assert_in_delta for float values. The patch is written by Tomoaki NISHIYAMA during BH2010.10. test/unit/bio/util/test_contingency_table.rb | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-) commit f357929bc5dcf8295b0a11a09b4025e3592d9eda Author: Naohisa Goto Date: Thu Oct 21 11:16:37 2010 +0900 Small changes for README.rdoc. README.rdoc | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) commit c6c567fe9602ae8d7d343a5773f51d8aa22c8876 Author: Naohisa Goto Date: Thu Oct 21 11:12:17 2010 +0900 Shows message when running "ruby setup.rb test" with Ruby1.9. setup.rb | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) commit 9b66463c4150a679e63289e0cee3c4d1200c7d0f Author: Naohisa Goto Date: Wed Oct 20 17:53:36 2010 +0900 Added description about incompatible the change in Bio::AAindex2. RELEASE_NOTES.rdoc | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) commit 327ea878d4e15b99711d8121a54698da29d4b0aa Author: Naohisa Goto Date: Wed Oct 20 17:35:53 2010 +0900 Changed the expected return values in the unit tests, following the last change to Bio::AAindex2. * Changed the expected return values in the unit tests, following the last change to Bio::AAindex2. * The patch is written by Tomoaki NISHIYAMA during BH2010.10. test/unit/bio/db/test_aaindex.rb | 15 ++++++++------- 1 files changed, 8 insertions(+), 7 deletions(-) commit 31963b43daab2801087f5f6d23b04e357bb7b1e2 Author: Naohisa Goto Date: Wed Oct 20 17:32:26 2010 +0900 Ruby 1.9.2 support: Incompatible change: the symmetric elements for triangular matrix should be copied * Ruby 1.9.2 support: Incompatible change: the symmetric elements for triangular matrix should be copied. The patch is written by Tomoaki NISHIYAMA during BH2010.10. lib/bio/db/aaindex.rb | 12 +++++++++++- 1 files changed, 11 insertions(+), 1 deletions(-) commit e8a1d65984781466eff9d5a262f18cb1c3e01056 Author: Naohisa Goto Date: Wed Oct 20 16:10:51 2010 +0900 Test bug fix: confusion between assert and assert_equal * Test bug fix: the assert should be assert_equal. The bug was found with Ruby 1.9.2-p0. test/unit/bio/db/embl/test_sptr.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit feb2cda47beab91e2fc3dddf99d5cc1cacf3fbae Author: Naohisa Goto Date: Wed Oct 20 14:59:40 2010 +0900 Test bug fix: confusion between assert and assert_equal, and apparently wrong expected values. * Test bug fix: the assert should be assert_equal. The bug was found with Ruby 1.9.2-p0. * In the test_rates_hundred_and_fiftieth_position method, the index for @example_rates and the expected value of the third assertion were apparently wrong. * Reported by Tomoaki NISHIYAMA during BH2010.10. test/unit/bio/appl/paml/codeml/test_rates.rb | 12 ++++++------ 1 files changed, 6 insertions(+), 6 deletions(-) commit ffc03a11a4ef7b36ea78de58d4c8d4e9259093c4 Author: Naohisa Goto Date: Sat Oct 16 01:06:36 2010 +0900 Tests for Bio::KEGG::PATHWAY are improved with new test data. test/data/KEGG/ec00072.pathway | 23 + test/data/KEGG/hsa00790.pathway | 59 ++ test/data/KEGG/ko00312.pathway | 16 + test/data/KEGG/rn00250.pathway | 114 ++++ test/unit/bio/db/kegg/test_pathway.rb | 1055 +++++++++++++++++++++++++++++++++ 5 files changed, 1267 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/ec00072.pathway create mode 100644 test/data/KEGG/hsa00790.pathway create mode 100644 test/data/KEGG/ko00312.pathway create mode 100644 test/data/KEGG/rn00250.pathway commit 1e1d974c2c72ddf5a45e41c6f2510729fb65a4ad Author: Toshiaki Katayama Date: Tue Jul 20 11:46:52 2010 +0900 Added methods for parsing KEGG PATHWAY fields. * Added methods for parsing KEGG PATHWAY fields. * This is part of commit e2abe5764f3ded91c82689245f19a0412d3a7afb and modified to merge with the current HEAD (original commit message: Changes for TogoWS). lib/bio/db/kegg/pathway.rb | 146 +++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 145 insertions(+), 1 deletions(-) commit 957c8ee630538a8c49c52339cb3c0364e5328378 Author: Naohisa Goto Date: Sat Oct 16 00:50:42 2010 +0900 Private method strings_as_hash is moved to Bio::KEGG::Common::StringsAsHash. lib/bio/db/kegg/common.rb | 18 ++++++++++++++++++ lib/bio/db/kegg/module.rb | 19 +++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) commit b7fd85382bccceaa29958d5daf98ca0a513e5a9a Author: Naohisa Goto Date: Fri Oct 15 22:01:05 2010 +0900 Renamed Bio::KEGG::*#pathway_modules to modules, etc. * Renamed following methods in Bio::KEGG::ORTHOLOGY and Bio::KEGG:PATHWAY classes: pathway_modules to modules, pathway_modules_as_strings to modules_as_strings, and pathway_modules_as_hash to modules_as_hash. * Unit tests are also modified. lib/bio/db/kegg/common.rb | 18 +++++++++--------- lib/bio/db/kegg/orthology.rb | 10 +++++----- lib/bio/db/kegg/pathway.rb | 10 +++++----- test/unit/bio/db/kegg/test_orthology.rb | 12 ++++++------ test/unit/bio/db/kegg/test_pathway.rb | 20 ++++++++++---------- 5 files changed, 35 insertions(+), 35 deletions(-) commit 02aea9f18ff6e3079309a76d04d02ea1f2902e7b Author: Naohisa Goto Date: Fri Oct 15 00:20:48 2010 +0900 Modified tests for Bio::KEGG::GENES following the changes of the class. test/unit/bio/db/kegg/test_genes.rb | 30 +++++++++++++++++++++++++++++- 1 files changed, 29 insertions(+), 1 deletions(-) commit f4b45ea629734ecff820483475d83fef6cbe068e Author: Naohisa Goto Date: Thu Oct 14 23:57:18 2010 +0900 Reverted Bio::KEGG::GENES#genes, gene and motif methods and modified. * Reverted Bio::KEGG::GENES#genes, gene and motif methods which are removed in the last commit. To avoid code duplication, they are also modified to use other methods, and RDoc is added about the deprecation or change of the methods. * Modified RDoc. lib/bio/db/kegg/genes.rb | 32 +++++++++++++++++++++++++++++++- 1 files changed, 31 insertions(+), 1 deletions(-) commit dd987911cb4a84e23565bb37707611d054c22101 Author: Toshiaki Katayama Date: Tue Jul 20 11:46:52 2010 +0900 New methods Bio::KEGG::GENES#keggclass etc. * New methods and aliases are added: Bio::KEGG::GENES#keggclass, keggclasses, names_as_array, names, motifs_as_strings, motifs_as_hash, motifs. * Removed Bio::KEGG::GENES#genes, gene and motif methods. * Added a comment about deprecation of CODON_USAGE lines. * This is part of commit e2abe5764f3ded91c82689245f19a0412d3a7afb (original commit message: Changes for TogoWS). lib/bio/db/kegg/genes.rb | 41 ++++++++++++++++++++++++++--------------- 1 files changed, 26 insertions(+), 15 deletions(-) commit b2575f5acfeca269c93a35baa3809fdac17a7271 Author: Naohisa Goto Date: Wed Oct 13 23:12:33 2010 +0900 Release notes for the upcoming release version. RELEASE_NOTES.rdoc | 31 +++++++++++++++++++++++++++++++ 1 files changed, 31 insertions(+), 0 deletions(-) create mode 100644 RELEASE_NOTES.rdoc commit 83992875c45a1fdd54d042c923dee51119026e49 Author: Naohisa Goto Date: Wed Oct 13 23:11:21 2010 +0900 Renamed RELEASE_NOTES.rdoc to doc/RELEASE_NOTES-1.4.0.rdoc. RELEASE_NOTES.rdoc | 167 ------------------------------------------ doc/RELEASE_NOTES-1.4.0.rdoc | 167 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 167 deletions(-) delete mode 100644 RELEASE_NOTES.rdoc create mode 100644 doc/RELEASE_NOTES-1.4.0.rdoc commit f649629eb6216aeabbd2020bcac9b7f870b12395 Author: Naohisa Goto Date: Wed Oct 13 21:58:58 2010 +0900 Added acknowledgement to Kozo Nishida for KEGG parsers. RELEASE_NOTES.rdoc | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) commit 379f177edc0f95dee1ec0c2d2cf679c27918e41b Author: Naohisa Goto Date: Fri Oct 8 16:31:58 2010 +0900 Fixed a variable name mistake in Bio::Command, and English grammer fix. * Fixed a variable name mistake in Bio::Command#no_fork?. * English grammer fix for comments. Thanks to Andrew Grimm who reports the fix. lib/bio/command.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit 1344c27c90438d8c8840ee507d0ab43224f89054 Author: Naohisa Goto Date: Wed Oct 6 23:47:26 2010 +0900 Bug fix: fork(2) is called on platforms that do not support it. * Bug fix: fork(2) is called on platforms that do not support it. Thanks to Andrew Grimm who reports the bug (fork() is called on platforms that do not support it; http://github.com/bioruby/bioruby/issues#issue/6). * Bio::Command#call_command and query_command can now fall back into using popen when fork(2) is not implemented. * Detection of Windows platform is improved. The idea of the code is taken from Redmine's platform.rb. lib/bio/command.rb | 98 +++++++++++++++++++++++++++++------ test/functional/bio/test_command.rb | 9 +-- 2 files changed, 84 insertions(+), 23 deletions(-) commit 0bfa1c3a8d7b8d03919d54a2a241ca96a79bad83 Author: Naohisa Goto Date: Wed Oct 6 15:49:51 2010 +0900 Bug fix: Bio::MEDLINE#reference is changed not to put empty values * Bug fix: Bio::MEDLINE#reference is changed not to put empty values in the returned Bio::Reference object. I think the original bahavior is a bug. This is an incompatible change but the effect is very small. lib/bio/db/medline.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 8e0bc03d79a1f20743c29f0a44e273d362eaa2cd Author: Naohisa Goto Date: Wed Oct 6 15:39:34 2010 +0900 Bug fix: Bio::MEDLINE#initialize should handle continuation of lines. * Bug fix: Bio::MEDLINE#initialize should handle continuation of lines. Thanks to Steven Bedrick who reports the bug (Bio::MEDLINE#initialize handles multi-line MeSH terms incorrectly; http://github.com/bioruby/bioruby/issues#issue/7). lib/bio/db/medline.rb | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) commit 728de78b438108e44066a7ce7490632c81108fb6 Author: Naohisa Goto Date: Wed Oct 6 15:29:10 2010 +0900 Added unit tests for Bio::MEDLINE with test data. * Added unit tests for Bio::MEDLINE with test data. The data is taken from NCBI and the abstract was removed to avoid possible copyright problem. The choice of the data (PMID: 20146148) is suggested by Steven Bedrick in a bug report (Bio::MEDLINE#initialize handles multi-line MeSH terms incorrectly). test/data/medline/20146148_modified.medline | 54 ++++++++++ test/unit/bio/db/test_medline.rb | 148 +++++++++++++++++++++++++++ 2 files changed, 202 insertions(+), 0 deletions(-) create mode 100644 test/data/medline/20146148_modified.medline commit 930095817ce60793ac909a4d01731d1f97bc4fa5 Author: Naohisa Goto Date: Wed Sep 29 20:51:12 2010 +0900 Bug fix: NoMethodError in Bio::Tree#collect_edge! * Bug fix: NoMethodError in Bio::Tree#collect_edge!. Thanks to Kazuhiro Hayashi who reports the bug. lib/bio/tree.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit c7927ec4743ddc4ec4501790bbed097b69f616e7 Author: Naohisa Goto Date: Wed Sep 29 20:49:44 2010 +0900 Modified and improved tests for Bio::Tree. test/unit/bio/test_tree.rb | 393 +++++++++++++++++++++++++++----------------- 1 files changed, 240 insertions(+), 153 deletions(-) commit 0161148c9b4d9ea404af92b4baf8241239a283de Author: Kazuhiro Hayashi Date: Fri Jul 16 00:09:39 2010 +0900 Modified unit tests for Bio::Tree * Modified unit tests for Bio::Tree. * This is part of combination of the two commits: * 6675fd930718e41ad009f469b8167f81c9b2ad52 (Modified unit tests and classes) * a6dc63ffe3460ea8d8980b3d6c641356881e0862 (Modified unit test for Bio::Tree) test/unit/bio/test_tree.rb | 174 +++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 173 insertions(+), 1 deletions(-) commit 31ded691a9329e45fe563e5f70138648d3b30bbf Author: Kazuhiro Hayashi Date: Thu Jul 15 21:06:28 2010 +0900 Bug fix: Bio::Tree#remove_edge_if did not work. * Bug fix: Bio::Tree#remove_edge_if did not work. * This is part of commit 6675fd930718e41ad009f469b8167f81c9b2ad52 (original commit message: Modified unit tests and classes) and removed a comment line. lib/bio/tree.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit d0a3af23c74004688a8fc0b5be3d09f7144e33a1 Author: Naohisa Goto Date: Wed Sep 22 19:23:50 2010 +0900 Renamed test/data/go/part_of_* to avoid possible confusion * Renamed test/data/go/part_of_* to selected_* to avoid possible confusion: The word "part_of" is a keyword in Gene Ontology. test/data/go/part_of_component.ontology | 12 ---------- test/data/go/part_of_gene_association.sgd | 31 ---------------------------- test/data/go/part_of_wikipedia2go | 13 ----------- test/data/go/selected_component.ontology | 12 ++++++++++ test/data/go/selected_gene_association.sgd | 31 ++++++++++++++++++++++++++++ test/data/go/selected_wikipedia2go | 13 +++++++++++ test/unit/bio/db/test_go.rb | 6 ++-- 7 files changed, 59 insertions(+), 59 deletions(-) delete mode 100644 test/data/go/part_of_component.ontology delete mode 100644 test/data/go/part_of_gene_association.sgd delete mode 100644 test/data/go/part_of_wikipedia2go create mode 100644 test/data/go/selected_component.ontology create mode 100644 test/data/go/selected_gene_association.sgd create mode 100644 test/data/go/selected_wikipedia2go commit e4f82da52402f8175bd92b50209b09bc83bfddd6 Author: Naohisa Goto Date: Wed Sep 22 19:21:36 2010 +0900 Removed unused test/data/go/wikipedia2go.txt. test/data/go/wikipedia2go.txt | 728 ----------------------------------------- 1 files changed, 0 insertions(+), 728 deletions(-) delete mode 100644 test/data/go/wikipedia2go.txt commit 5003fd53b0a3852fa23b76ad6ec8e9e76d5850fc Author: Naohisa Goto Date: Thu Sep 16 22:37:31 2010 +0900 Adjusted test data file paths and header lines in test_go.rb. * Adjusted test data file paths. * Adjusted copyright and description in the header. test/unit/bio/db/test_go.rb | 26 +++++++++++++++++--------- 1 files changed, 17 insertions(+), 9 deletions(-) commit 540cb7ab27e79634f5436476cce51cc20ca0f70f Author: Kazuhiro Hayashi Date: Thu Jul 15 21:06:28 2010 +0900 Added tests for Bio::GO classes. * Added tests for Bio::GO classes. * This is part of combination of the three commits: * 555f7b49a43e7c35c82cd48b199af96ca93d4179 (added test_genbank.rb and test_go.rb with the test files. modified test_pdb.rb) * e966f17546427b8ad39cb9942807ceb8a068d746 (modified test/unit/bio/db/test_go.rb and added the test files for each GO class) * 6675fd930718e41ad009f469b8167f81c9b2ad52 (Modified unit tests and classes) test/unit/bio/db/test_go.rb | 163 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 163 insertions(+), 0 deletions(-) create mode 100644 test/unit/bio/db/test_go.rb commit 5ff01f7dfbc3661d8c66b44874a2ba4ff2f96b56 Author: Kazuhiro Hayashi Date: Fri Jun 11 21:02:29 2010 +0900 Added test data for Bio::GO classes. * Added test data for Bio::GO classes. * This is part of combination of the three commits: * 555f7b49a43e7c35c82cd48b199af96ca93d4179 (added test_genbank.rb and test_go.rb with the test files. modified test_pdb.rb) * e966f17546427b8ad39cb9942807ceb8a068d746 (modified test/unit/bio/db/test_go.rb and added the test files for each GO class) * 6675fd930718e41ad009f469b8167f81c9b2ad52 (Modified unit tests and classes) * License for the test data is the public domain. ( http://wiki.geneontology.org/index.php/Legal_FAQ ) test/data/go/part_of_component.ontology | 12 + test/data/go/part_of_gene_association.sgd | 31 ++ test/data/go/part_of_wikipedia2go | 13 + test/data/go/wikipedia2go.txt | 728 +++++++++++++++++++++++++++++ 4 files changed, 784 insertions(+), 0 deletions(-) create mode 100644 test/data/go/part_of_component.ontology create mode 100644 test/data/go/part_of_gene_association.sgd create mode 100644 test/data/go/part_of_wikipedia2go create mode 100644 test/data/go/wikipedia2go.txt commit d4210673a1a696bfb02c93b7743e60dea1a5fcc8 Author: Kazuhiro Hayashi Date: Thu Jul 15 21:06:28 2010 +0900 Bug fix: Typo and missing field in Bio::GO::GeneAssociation#to_str. * Bug fix: Typo and missing field in Bio::GO::GeneAssociation#to_str. * This is part of commit 6675fd930718e41ad009f469b8167f81c9b2ad52 (original commit message: Modified unit tests and classes) and modified. The bug is also reported by Ralf Stephan ([BioRuby] [PATCH] GO annotations fixes and improvements). lib/bio/db/go.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit acab0bb4a4e0f970f8f6be3aea2c371f63a49fa7 Author: Naohisa Goto Date: Wed Aug 25 22:58:42 2010 +0900 Database names used in tests are changed, following the change of TogoWS. * Database names used in tests are changed, following the change of TogoWS: "gene" to "kegg-genes" and "enzyme" to "kegg-enzyme". test/functional/bio/io/test_togows.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit 1fb1f1cc5ca3edb42de03874b3527ce0cf0de294 Author: Toshiaki Katayama Date: Tue Jul 20 11:46:52 2010 +0900 Database name used in tests is changed, following the change of TogoWS. * The database name "genbank" is changed to "nucleotide", following the change in TogoWS. * This is part of commit e2abe5764f3ded91c82689245f19a0412d3a7afb. (Original commit message: Changes for TogoWS) test/functional/bio/io/test_togows.rb | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) commit 1db4b8011f4fee158aeb78ec2d76c76688714788 Author: Naohisa Goto Date: Wed Aug 11 23:33:49 2010 +0900 New method Bio::Fastq#mask for masking low score regions. * New method Bio::Fastq#mask for masking low score regions is added with unit tests. This method is implemented as a shortcut of Bio::Sequence#mask_with_quality_score method. lib/bio/db/fastq.rb | 15 +++++++++++++++ test/unit/bio/db/test_fastq.rb | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 0 deletions(-) commit 72b47b2391a01c5f4214fd188abe0857cd3ed166 Author: Naohisa Goto Date: Wed Aug 11 23:04:57 2010 +0900 New module Bio::Sequence::SequenceMasker to help masking a sequence. * New module Bio::Sequence::SequenceMasker to help masking a sequence. The module is only expected to be included in Bio::Sequence. In the future, methods in this module might be moved to Bio::Sequence or other module and this module might be removed. * Unit tests for Bio::Sequence::SequenceMasker are also added. lib/bio/sequence.rb | 2 + lib/bio/sequence/sequence_masker.rb | 95 +++++++++++++ test/unit/bio/sequence/test_sequence_masker.rb | 169 ++++++++++++++++++++++++ 3 files changed, 266 insertions(+), 0 deletions(-) create mode 100644 lib/bio/sequence/sequence_masker.rb create mode 100644 test/unit/bio/sequence/test_sequence_masker.rb commit a2b21fa31c87fc47ae375380fb34958460414107 Author: Naohisa Goto Date: Wed Aug 11 22:59:46 2010 +0900 New method Bio::Sequence#output_fasta, a replacement for to_fasta. * New method Bio::Sequence#output_fasta, a replacement for Bio::Sequence#to_fasta. This is also implemented as a shortcut of Bio::Sequence#output(:fasta). lib/bio/sequence/format.rb | 14 ++++++++++++++ 1 files changed, 14 insertions(+), 0 deletions(-) commit d139a1e3e7f77317102eaa24649515541761a212 Author: Toshiaki Katayama Date: Tue Jul 20 11:46:52 2010 +0900 File format autodetection for Bio::KEGG::PATHWAY and Bio::KEGG::MODULE. * Added file format autodetection for Bio::KEGG::PATHWAY and Bio::KEGG::MODULE. * This is part of commit e2abe5764f3ded91c82689245f19a0412d3a7afb. (Original commit message: Changes for TogoWS) lib/bio/io/flatfile/autodetection.rb | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) commit 920d92c13b44921a3f58ddbd8566e7a90dd59996 Author: Toshiaki Katayama Date: Tue Jul 20 11:46:52 2010 +0900 Added autoload for Bio::KEGG::PATHWAY and Bio::KEGG::MODULE. * Added autoload for Bio::KEGG::PATHWAY and Bio::KEGG::MODULE. * This is part of commit e2abe5764f3ded91c82689245f19a0412d3a7afb. (Original commit message: Changes for TogoWS) lib/bio.rb | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) commit bf342c28e0c75c9b48770144f421dd12babd9d0e Author: Naohisa Goto Date: Tue Aug 17 23:19:47 2010 +0900 Unit tests for Bio::KEGG::MODULE is modified and improved. test/unit/bio/db/kegg/test_module.rb | 194 ++++++++++++++++++++++++++++++---- 1 files changed, 173 insertions(+), 21 deletions(-) commit 1742568a4f27e75a19441e4a4437ca3f1c0251f8 Author: Naohisa Goto Date: Tue Aug 17 23:15:44 2010 +0900 In Bio::KEGG::MODULE, an internal-only method is changed to private. lib/bio/db/kegg/module.rb | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) commit 8f5ff66cca678ac6be75a7dda1ff840ac3111f42 Author: Naohisa Goto Date: Tue Aug 17 23:10:39 2010 +0900 Removed unused comments. lib/bio/db/kegg/module.rb | 32 -------------------------------- 1 files changed, 0 insertions(+), 32 deletions(-) commit f9d23fb32eeb15dc57580e25653d3f2fff5fa1dc Author: Naohisa Goto Date: Tue Aug 17 22:59:18 2010 +0900 Reverted Bio::KEGG::MODULE#keggclass. * Reverted Bio::KEGG::MODULE#keggclass. * Removed keggclasses and keggclasses_as_array methods, because they are inconsistent with Bio::KEGG::ORTHOLOGY#keggclasses. lib/bio/db/kegg/module.rb | 6 +----- 1 files changed, 1 insertions(+), 5 deletions(-) commit 94188d23ad843c7cb998c99f46371e540ce457dc Author: Toshiaki Katayama Date: Tue Jul 20 11:41:50 2010 +0900 For Bio::KEGG::MODULE, methods are added and modified. * For Bio::KEGG::MODULE, methods are added and modified. * New methods: definition, etc. * Removed methods: pathway, orthologies, keggclass, etc. * Changed methods: reactions, compounds, etc. * (Original commit message: Changes for TogoWS) lib/bio/db/kegg/module.rb | 136 +++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 126 insertions(+), 10 deletions(-) commit 92efc03707a49fa0b2c02e7b2f8b53749a75ad59 Author: Kozo Nishida Date: Thu Feb 4 22:59:20 2010 +0900 New class Bio::KEGG::MODULE, parser for KEGG MODULE (Pathway Module). lib/bio/db/kegg/module.rb | 83 ++++++++++++++++++++++++++++++ test/data/KEGG/M00118.module | 44 ++++++++++++++++ test/unit/bio/db/kegg/test_module.rb | 94 ++++++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/kegg/module.rb create mode 100644 test/data/KEGG/M00118.module create mode 100644 test/unit/bio/db/kegg/test_module.rb commit b7c75cc6023d5dc9096111fde99a6e89db2e4bdc Author: Naohisa Goto Date: Wed May 12 01:13:52 2010 +0900 Improvement of tests for Bio::KEGG::ORTHOLOGY using updated test data. test/unit/bio/db/kegg/test_orthology.rb | 95 +++++++++++++++++++++++++++++++ 1 files changed, 95 insertions(+), 0 deletions(-) commit f61c232371f6f673044960b0626486b2e8e160b8 Author: Naohisa Goto Date: Wed May 12 01:11:16 2010 +0900 Updated test data K02338.orthology to follow KEGG format changes. test/data/KEGG/K02338.orthology | 232 ++++++++++++++++++++++++++++++--------- 1 files changed, 180 insertions(+), 52 deletions(-) commit 2aa060f42263392877683a47ef9bd744ef4de7f8 Author: Naohisa Goto Date: Wed May 12 01:03:18 2010 +0900 Incompatible change of Bio::KEGG::ORTHOLOGY#pathways, and added new methods * Incompatible change of Bio::KEGG::ORTHOLOGY#pathways due to the changes of KEGG ORTHOLOGY format changes: Because PATHWAY field is added, the method is changed to return a hash. The pathway method of old behavior is renamed to pathways_in_keggclass for compatibility. * New methods are added to Bio::KEGG::ORTHOLOGY: references, pathways_as_strings, pathways_as_hash, pathway_modules, pathway_modules_as_hash, pathway_modules_as_strings. lib/bio/db/kegg/orthology.rb | 41 ++++++++++++++++++++++++++++++++++++++++- 1 files changed, 40 insertions(+), 1 deletions(-) commit 2e6754f2598f66f29afb573c3dc83592089b411c Author: Naohisa Goto Date: Wed May 12 00:59:26 2010 +0900 Changed to use Bio::KEGG::Common::PathwayModulesAsHash. lib/bio/db/kegg/pathway.rb | 25 ++++++++----------------- 1 files changed, 8 insertions(+), 17 deletions(-) commit 527920da990f4374e20333d6852b810ea73ead02 Author: Naohisa Goto Date: Wed May 12 00:55:10 2010 +0900 New module Bio::KEGG::Common::PathwayModulesAsHash (internal use only) * New module Bio::KEGG::Common::PathwayModulesAsHash is added, based on Bio::KEGG::PATHWAY#pathway_modules_as_hash method. Note that the method is Bio::KEGG::* internal use only. lib/bio/db/kegg/common.rb | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+), 0 deletions(-) commit 36041377dbafce642180eb1c664ee36ef21d3bfb Author: Naohisa Goto Date: Fri Mar 19 23:25:56 2010 +0900 New method Bio::KEGG::PATHWAY#references. * New method Bio::KEGG::PATHWAY#references. * Additional unit tests for Bio::KEGG::PATHWAY with test data. lib/bio/db/kegg/pathway.rb | 8 ++ test/data/KEGG/map00030.pathway | 37 ++++++++++ test/unit/bio/db/kegg/test_pathway.rb | 120 ++++++++++++++++++++++++++++++++- 3 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 test/data/KEGG/map00030.pathway commit d743892d298786eb9e88e2a51ac9f7774785848f Author: Naohisa Goto Date: Fri Mar 19 00:06:20 2010 +0900 Improvement of Bio::KEGG::Common::References#references. * Improvement of Bio::KEGG::Common::References#references: added support for parsing "journal (year)" style. lib/bio/db/kegg/common.rb | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) commit 35807ae22c9ad9a3ce37ed5c655d1c080f8d2334 Author: Naohisa Goto Date: Fri Mar 19 00:02:10 2010 +0900 Implementation of Bio::KEGG::GENOME#references is moved. * Implementation of Bio::KEGG::GENOME#references is moved to Bio::KEGG::Common::References#references, which will be shared with Bio::KEGG::Pathway and other classes. lib/bio/db/kegg/common.rb | 61 +++++++++++++++++++++++++++++++++++++++++++- lib/bio/db/kegg/genome.rb | 62 +++++--------------------------------------- 2 files changed, 67 insertions(+), 56 deletions(-) commit 263c37a07203b87e3b33d35adef3aa3ddcf89601 Author: Naohisa Goto Date: Wed Mar 17 00:26:56 2010 +0900 Bug fix: Bio::KEGG::GENES#pathway may fail, and other parse issues due to the format changes of KEGG GENES. * Bug fix: Bio::KEGG::GENES#pathway may return unexpected value after calling pathways, pathways_as_hash or pathways_as_string methods. * Bio::KEGG::GENES#eclinks, Bio::KEGG::Common::PathwaysAsHash, and Bio::KEGG::Common::OrthologsAsHash are modified due to the file format changes of KEGG::GENES. lib/bio/db/kegg/common.rb | 9 +++++---- lib/bio/db/kegg/genes.rb | 17 +++++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) commit 364cd405a10d0742091281c5a16b77cb54a8087e Author: Naohisa Goto Date: Wed Mar 17 00:25:51 2010 +0900 New methods Bio::Location#== and Bio::Locations#==. lib/bio/location.rb | 39 +++++++++++++++++++++++++++++++++++++++ 1 files changed, 39 insertions(+), 0 deletions(-) commit 2c7ffd6808e572cf35b82d6e74790447d44d08cc Author: Naohisa Goto Date: Wed Mar 17 00:23:00 2010 +0900 Improved unit tests for Bio::KEGG::GENES with new test data. test/data/KEGG/b0529.gene | 47 +++++++ test/unit/bio/db/kegg/test_genes.rb | 254 ++++++++++++++++++++++++++++++++++- 2 files changed, 300 insertions(+), 1 deletions(-) create mode 100644 test/data/KEGG/b0529.gene commit 764869fd42d1e3f96885b3499844bf4fadde80f1 Author: Naohisa Goto Date: Wed Mar 17 00:10:51 2010 +0900 Bug fix: Bio::KEGG::GENOME parser issues for PLASMID, REFERENCE, and ORIGINAL_DB fields. * Bug fix: Fixed parse error for PLASMID fields due to the changes of the KEGG GENOME file format. For the bug fix, tag_get and tag_cut methods are redefined. * Bug fix: Fixed parse error for REFERENCE fields due to the changes of the file format. * New method Bio::KEGG::GENOME#original_databases is added to get ORIGINAL_DB record as an Array of String objects. lib/bio/db/kegg/genome.rb | 69 +++++++++++++++++++++++++++++++++++++++----- 1 files changed, 61 insertions(+), 8 deletions(-) commit 75db7c6c7132f19e212be36d06643a0f48a7df44 Author: Naohisa Goto Date: Wed Mar 17 00:09:09 2010 +0900 New method Bio::Reference#==. lib/bio/reference.rb | 24 ++++++++++++++++++++++++ 1 files changed, 24 insertions(+), 0 deletions(-) commit 64a6bfb52ca1bb27bd38c86c060e2925f38924fb Author: Naohisa Goto Date: Wed Mar 17 00:07:43 2010 +0900 Newly added unit tests for Bio::KEGG::GENOME with test data. test/data/KEGG/T00005.genome | 140 ++++++++++++ test/data/KEGG/T00070.genome | 34 +++ test/unit/bio/db/kegg/test_genome.rb | 408 ++++++++++++++++++++++++++++++++++ 3 files changed, 582 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/T00005.genome create mode 100644 test/data/KEGG/T00070.genome create mode 100644 test/unit/bio/db/kegg/test_genome.rb commit 21c92bb991c83dce27a4411382c456cdd6029a82 Author: Naohisa Goto Date: Tue Mar 9 23:24:15 2010 +0900 Renamed Bio::KEGG::PATHWAY#keggmodules to pathway_modules_as_strings, etc. * Bio::KEGG::PATHWAY#keggmodules is renamed to pathway_modules_as_strings. * New method pathway_modules_as_hash and its alias method pathway_modules is added. * Bio::KEGG::PATHWAY#rel_pathways is renamed to rel_pathways_as_strings. * New method rel_pathways_as_hash is added, and rel_pathways is changed to be the alias of the rel_pathways_as_hash method. * Unit tests are also changed. lib/bio/db/kegg/pathway.rb | 42 +++++++++++++++++++++++++++++++- test/unit/bio/db/kegg/test_pathway.rb | 29 ++++++++++++++++++++-- 2 files changed, 66 insertions(+), 5 deletions(-) commit fa10f38716ec2eecd6fa8e8b027085377e9ee421 Author: Naohisa Goto Date: Tue Mar 9 21:13:18 2010 +0900 Fixed text indentations. lib/bio/db/kegg/pathway.rb | 4 ++-- test/unit/bio/db/kegg/test_pathway.rb | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 14 deletions(-) commit 0916b0cac5d17ce47ef5cc3382e3167293bcf4c2 Author: Kozo Nishida Date: Tue Feb 2 17:34:17 2010 +0900 Newly added Bio::KEGG::PATHWAY with test code and test data. lib/bio/db/kegg/pathway.rb | 73 +++++++++++++++++++++++++++++++++ test/data/KEGG/map00052.pathway | 13 ++++++ test/unit/bio/db/kegg/test_pathway.rb | 57 +++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/kegg/pathway.rb create mode 100644 test/data/KEGG/map00052.pathway create mode 100644 test/unit/bio/db/kegg/test_pathway.rb commit c3ceea339164754071f03ce13da4f65e08230f40 Author: Naohisa Goto Date: Fri Feb 19 00:43:38 2010 +0900 Tutorial.rd.html is regenerated. doc/Tutorial.rd.html | 55 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 54 insertions(+), 1 deletions(-) commit 315da0213edfece696d22cc4648cb7a74f18ad34 Author: Ra Date: Sun Feb 7 10:38:36 2010 +0100 Added BioSQL docs links doc/Tutorial.rd | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 22374415873906f4bcd3e84950c14b5f0b6c7e61 Author: Ra Date: Sun Feb 7 02:39:16 2010 +0100 Added link to BioSQL install doc. doc/Tutorial.rd | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 4d18dd2f5a3f18348e5f4aa07b14c104d3a65f5b Author: Ra Date: Fri Feb 5 21:30:36 2010 +0100 Added other examples about BioSQL doc/Tutorial.rd | 36 ++++++++++++++++++++++++++++-------- 1 files changed, 28 insertions(+), 8 deletions(-) commit b704f01cd0799ab1a7e3975119e9d6139ddfbd51 Author: Ra Date: Wed Jan 27 21:08:25 2010 +0100 BioSQL tutorial continue... doc/Tutorial.rd | 18 ++++++++++++++++-- 1 files changed, 16 insertions(+), 2 deletions(-) commit 1993a1566b5ade937703d0291c4eaf2de673d170 Author: Ra Date: Wed Jan 27 20:32:23 2010 +0100 BioSQL tutorial inital draft. doc/Tutorial.rd | 25 ++++++++++++++++++++++++- 1 files changed, 24 insertions(+), 1 deletions(-) commit 09047b664a03492d7546d92b619faacee72d0cd5 Author: Jan Aerts Date: Sun Feb 7 17:58:59 2010 +0900 Added code example that will serve as basis for sequence/codon howto doc/howtos/sequence_codon.txt | 38 ++++++++++++++++++++++++++++++++++++++ 1 files changed, 38 insertions(+), 0 deletions(-) create mode 100644 doc/howtos/sequence_codon.txt commit c1e2165ba801cccd52135b13ed36713517e1fa8a Author: Naohisa Goto Date: Fri Feb 5 12:50:38 2010 +0900 Suppressed "warning: parenthesize argument(s) for future version" in Ruby 1.8.5. lib/bio/appl/paml/codeml/report.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit f7ce9ba6a2f4e680ee40017a21aa95d05baf34f4 Author: Naohisa Goto Date: Thu Feb 4 20:26:00 2010 +0900 Added :startdoc: and removed an empty line for RDoc. lib/bio/appl/paml/codeml/report.rb | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) commit 6d4d2e1f37efb1e53091fbc9a0977568996788ff Author: Naohisa Goto Date: Thu Feb 4 16:58:10 2010 +0900 New unit test for Bio::PAML::Codeml::Report. * New unit test for Bio::PAML::Codeml::Report and related classes. The test code is copied from the examples described in lib/bio/appl/paml/codeml/report.rb and modified for the unit test. test/unit/bio/appl/paml/codeml/test_report.rb | 253 +++++++++++++++++++++++++ 1 files changed, 253 insertions(+), 0 deletions(-) create mode 100644 test/unit/bio/appl/paml/codeml/test_report.rb commit 8418549811293c3e20b91d4e95da2cb2a282a064 Author: Naohisa Goto Date: Thu Feb 4 16:47:37 2010 +0900 Changes due to the rename to report_single.rb. .../bio/appl/paml/codeml/test_report_single.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 7bfb428da237709b243c8d0e4646bd41710d1519 Author: Naohisa Goto Date: Thu Feb 4 16:39:10 2010 +0900 Renamed codeml/test_report.rb to codeml/test_report_single.rb. * Renamed test/unit/bio/appl/paml/codeml/test_report.rb to test_report_single.rb. test/unit/bio/appl/paml/codeml/test_report.rb | 46 -------------------- .../bio/appl/paml/codeml/test_report_single.rb | 46 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 46 deletions(-) delete mode 100644 test/unit/bio/appl/paml/codeml/test_report.rb create mode 100644 test/unit/bio/appl/paml/codeml/test_report_single.rb commit 762d38b1564da7d846e3dcd461cf465aa685a1ae Author: Pjotr Prins Date: Tue Jan 12 10:13:35 2010 +0100 Modified output of Bio::PAML::Codeml::PositiveSites#graph_to_s * Modified output of Bio::PAML::Codeml::PositiveSites#graph_to_s. (Part of commit ea350da85e5db2ba35cb8dd1e86e3d4323ee3fd1. Original commit message is: HtmlPaml: fixed some missing output use real greek omega in output) lib/bio/appl/paml/codeml/report.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit f88645cd783b7027950133c0badb0a8da8e4fb95 Author: Pjotr Prins Date: Tue Jan 12 09:24:46 2010 +0100 Codeml: no negative gaps lib/bio/appl/paml/codeml/report.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 978b21cf90d0280e6e6c7d6e4fa65c49692bdd69 Author: Pjotr Prins Date: Mon Jan 11 17:31:45 2010 +0100 Codeml: always raise an error when significance can not be calculated lib/bio/appl/paml/codeml/report.rb | 15 ++++++++++----- 1 files changed, 10 insertions(+), 5 deletions(-) commit 12b5895f6f1819252d616bb0a38aa88a7828daff Author: Pjotr Prins Date: Mon Jan 11 17:22:34 2010 +0100 Codeml: oops lib/bio/appl/paml/codeml/report.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit a8ff0a07fdbef72f72103f0bceb9c24a63162fc6 Author: Pjotr Prins Date: Mon Jan 11 17:19:26 2010 +0100 Codeml: added significance testing for a few model combinations lib/bio/appl/paml/codeml/report.rb | 57 +++++++++++++++++++++++++++++++++++- 1 files changed, 56 insertions(+), 1 deletions(-) commit 0e11af19450faca3568f89b23d5bd764688f75c0 Author: Pjotr Prins Date: Mon Jan 11 16:24:51 2010 +0100 Codeml: raise error instead of a 'nil' error when buffer is incomplete lib/bio/appl/paml/codeml/report.rb | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) commit 1cb2aaaa701a1613812dd479201d27c1d7dcf016 Author: Pjotr Prins Date: Mon Jan 11 14:37:52 2010 +0100 Bio::PAML::Codeml::PositiveSites#graph_to_s gets fill character * Bio::PAML::Codeml::PositiveSites#graph_to_s gets fill character as an argument. (Part of commit d67259c9f203dc92c68ad04b4112329a7093a259. Original commit message is: HtmlPaml: show colors for probabilities of positive selection) lib/bio/appl/paml/codeml/report.rb | 15 +++++++++------ 1 files changed, 9 insertions(+), 6 deletions(-) commit ae5b9cf9ee697cc237c77335b12b57709a0e7a46 Author: Pjotr Prins Date: Mon Jan 11 13:37:52 2010 +0100 Codeml: return correct buffer lib/bio/appl/paml/codeml/report.rb | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) commit 44f2e28c3e0d382505b067ec3c7aa55cbb9f0a38 Author: Pjotr Prins Date: Mon Jan 11 13:10:40 2010 +0100 Improvement of Bio::PAML::Codeml::PositiveSites#initialize, etc * Improved target analysis location detection in Bio::PAML::Codeml::PositiveSites#initialize. * Changed description inside Bio::PAML::Codeml::Report#nb_sites and sites methods. * This is part of commit e88ff474748b3295a8a4089356d3086638200d64. (Original commit message: HtmlPaml: improved output) lib/bio/appl/paml/codeml/report.rb | 20 ++++++++++++-------- 1 files changed, 12 insertions(+), 8 deletions(-) commit ee8973696d0434c591ceaffc580f1aa30fd036f9 Author: Pjotr Prins Date: Mon Jan 11 12:54:40 2010 +0100 Codeml: fixed doctests lib/bio/appl/paml/codeml/report.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit f7bbb0859e28cb51137d0a8f8d962821eb67db91 Author: Pjotr Prins Date: Mon Jan 11 12:51:25 2010 +0100 New method Bio::PAML::Codeml::PositiveSites#to_s * New method Bio::PAML::Codeml::PositiveSites#to_s (part of the commit 82e933fd1961a2b31873bc37cbf3205adbf0a6de, original commit message: HtmlPaml: add facility for color output) lib/bio/appl/paml/codeml/report.rb | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) commit d4f3dbaf78f623d870a1a76ab1353d786e0fb73b Author: Pjotr Prins Date: Mon Jan 11 12:34:59 2010 +0100 Codeml: HtmlPaml: minor tweaks lib/bio/appl/paml/codeml/report.rb | 7 ++----- 1 files changed, 2 insertions(+), 5 deletions(-) commit 7d41b6acb41c5913622fde127a030f940a432cc5 Author: Pjotr Prins Date: Mon Jan 11 12:19:15 2010 +0100 Codeml: add short description to positive sites line lib/bio/appl/paml/codeml/report.rb | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-) commit a9d6765b3a5d23be7e8cf59954d67cd2354e5878 Author: Pjotr Prins Date: Mon Jan 11 12:13:55 2010 +0100 Codeml: fixed bug in graph output lib/bio/appl/paml/codeml/report.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit af124dcaa2adb446e273456f8dd0b84aff9b00db Author: Pjotr Prins Date: Mon Jan 11 12:11:00 2010 +0100 Codeml: added graph_seq, which shows the AA of the first sequence at positive sites lib/bio/appl/paml/codeml/report.rb | 11 +++++++++-- 1 files changed, 9 insertions(+), 2 deletions(-) commit 1924dcd951a9e655726bc1af72626526ed223258 Author: Pjotr Prins Date: Fri Jan 8 10:13:37 2010 +0100 Codeml: added :stopdoc: directive for rdoc lib/bio/appl/paml/codeml/report.rb | 21 ++++++++++++--------- 1 files changed, 12 insertions(+), 9 deletions(-) commit 7b68fd9d785723935f90144f67999fcf74bcc7c0 Author: Pjotr Prins Date: Fri Jan 8 10:01:49 2010 +0100 Codeml: fixed the doctests and added some info. all tests pass & lib/bio/appl/paml/codeml/report.rb | 43 ++++++++++++++++++++++++----------- 1 files changed, 29 insertions(+), 14 deletions(-) commit ce212c507e9e81120e7ad12be6df955e90d0ad33 Author: Pjotr Prins Date: Fri Jan 8 09:46:05 2010 +0100 Codeml: exclude TestFile class from RDoc generated documentation lib/bio/appl/paml/codeml/report.rb | 9 ++++++--- 1 files changed, 6 insertions(+), 3 deletions(-) commit 9d64a70b628cb3cda9cc1576b9966dae242e5230 Author: Pjotr Prins Date: Fri Jan 8 09:39:02 2010 +0100 Codeml: added many comments lib/bio/appl/paml/codeml/report.rb | 87 +++++++++++++++++++++++++++++------ 1 files changed, 72 insertions(+), 15 deletions(-) commit 2b92df64016865a2ab40c93650409cfd67a2a98e Author: Pjotr Prins Date: Mon Jan 4 17:57:32 2010 +0100 codeml: Added parser for full Bayesian sites all tests pass & lib/bio/appl/paml/codeml/report.rb | 52 ++++++++++++++++++++++++++++++----- 1 files changed, 44 insertions(+), 8 deletions(-) commit 198f0c014f7993fbe22825d0be67e9b1aa19d2de Author: Pjotr Prins Date: Mon Jan 4 17:37:15 2010 +0100 codeml: show graph lib/bio/appl/paml/codeml/report.rb | 46 ++++++++++++++++++++++++++++++++++- 1 files changed, 44 insertions(+), 2 deletions(-) commit 0843bb4d79dc4d94a22d79a797a39dc2866222c5 Author: Pjotr Prins Date: Mon Jan 4 17:09:27 2010 +0100 Codeml: added full support for positive selection sites doctests + unit tests pass & lib/bio/appl/paml/codeml/report.rb | 150 ++++++++++++++++++++++++++++-------- 1 files changed, 118 insertions(+), 32 deletions(-) commit 1610c5d86cddd53f7f0300d0f7a137daaa61ef94 Author: Pjotr Prins Date: Mon Jan 4 12:33:20 2010 +0100 codeml: added M3 classes lib/bio/appl/paml/codeml/report.rb | 28 +++++++++++++++++++++++++--- 1 files changed, 25 insertions(+), 3 deletions(-) commit efc939fbd300ceb371b342172382cdec9fcc74b7 Author: Pjotr Prins Date: Mon Jan 4 12:02:47 2010 +0100 codeml: adding compatibility layer for single model (old type) unit tests pass & lib/bio/appl/paml/codeml/report.rb | 44 +++++++++++++++++++++++++++++++----- 1 files changed, 38 insertions(+), 6 deletions(-) commit b89990daa3ea26a9f9195ec16044fb2070bcdd1a Author: Pjotr Prins Date: Sun Jan 3 12:34:53 2010 +0100 Implementation parsing one model - doctests for M0 pass lib/bio/appl/paml/codeml/report.rb | 90 ++++++++++++++++++++++++++++++++---- 1 files changed, 81 insertions(+), 9 deletions(-) commit dce447d3e81e738323e6fb6b2d28324e1fa62e7d Author: Pjotr Prins Date: Sun Jan 3 11:27:54 2010 +0100 Codeml: use BioTestFile for locating test data in the doctest lib/bio/appl/paml/codeml/report.rb | 9 ++++++++- 1 files changed, 8 insertions(+), 1 deletions(-) commit 27a7b558d60b7ec127df2c351542433c321704ac Author: Pjotr Prins Date: Sat Jan 2 23:36:47 2010 +0100 Codeml: split new type report and old type report lib/bio/appl/paml/codeml/report.rb | 19 +++++++++++++++---- 1 files changed, 15 insertions(+), 4 deletions(-) commit 027808e4723ca77af3e15b461ddcc09faf692732 Author: Pjotr Prins Date: Sat Jan 2 23:25:22 2010 +0100 Added example files for PAML codeml dual model runs test/data/paml/codeml/models/aa.aln | 26 ++ test/data/paml/codeml/models/aa.dnd | 13 + test/data/paml/codeml/models/aa.ph | 13 + test/data/paml/codeml/models/alignment.phy | 49 ++++ test/data/paml/codeml/models/results0-3.txt | 312 ++++++++++++++++++++++++ test/data/paml/codeml/models/results7-8.txt | 340 +++++++++++++++++++++++++++ 6 files changed, 753 insertions(+), 0 deletions(-) create mode 100644 test/data/paml/codeml/models/aa.aln create mode 100644 test/data/paml/codeml/models/aa.dnd create mode 100644 test/data/paml/codeml/models/aa.ph create mode 100644 test/data/paml/codeml/models/alignment.phy create mode 100644 test/data/paml/codeml/models/results0-3.txt create mode 100644 test/data/paml/codeml/models/results7-8.txt commit 1d35e616ce411bf643ab6dcb7126a6e1aca1e186 Author: Pjotr Prins Date: Sat Jan 2 17:04:56 2010 +0100 Codeml::Report Added new description and reference lib/bio/appl/paml/codeml/report.rb | 113 ++++++++++++++++++++++++++++++------ 1 files changed, 96 insertions(+), 17 deletions(-) commit d21b26044e776fab44dbc95f181afd04b67abe28 Author: Naohisa Goto Date: Mon Feb 1 22:31:21 2010 +0900 Bug fix and Ruby 1.9 support: Bio::Command.call_command_fork etc. * Bug fix: In Bio::Command.call_command_fork, thread switching is disabled in the child process. Thanks to Andrew Grimm who reports the bug ([BioRuby] Thread-safety of alignment). Note that call_command_fork no longer works in Ruby 1.9 because it is changed to use Thread.critical which is removed in Ruby 1.9. * Ruby 1.9 support: In Ruby 1.9, Bio::Command.call_command_popen bypasses shell execution by passing command-line as an Array, which is a new feature added in Ruby 1.9. Now, call_command_popen is safe and robust enough with Ruby 1.9. * Ruby 1.9 support: In Ruby 1.9, Bio::Command.call_command and query_command use call_command_popen and query_command_popen, respectively. * RDoc for the above and related methods are modified. lib/bio/command.rb | 80 +++++++++++++++++++++++++++++++---- test/functional/bio/test_command.rb | 4 ++ 2 files changed, 76 insertions(+), 8 deletions(-) commit 981dc1c89049bf00e56a9e83ef352cb4c4b45d6a Author: Naohisa Goto Date: Tue Feb 2 22:47:36 2010 +0900 Bug fix: Bio::FastaNumericFormat#to_biosequence bug fix * Bug fix: New method Bio::FastaNumericFormat#to_biosequence is defined to avoid NomethodError occurred in the superclass'es method. For the purpose, a new module Bio::Sequence::Adapter::FastaNumericFormat is added. Thanks to Hiroyuki MISHIMA who reports the bug ([BioRuby] trouble on the FASTA.QUAL format (Bio::FastaNumericFormat)). * Newly added unit test for Bio::FastaNumericFormat#to_biosequence. lib/bio/db/fasta/qual.rb | 24 ++++++++++++++++++++++++ lib/bio/db/fasta/qual_to_biosequence.rb | 29 +++++++++++++++++++++++++++++ lib/bio/sequence/adapter.rb | 1 + test/unit/bio/db/test_qual.rb | 11 +++++++++-- 4 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 lib/bio/db/fasta/qual_to_biosequence.rb commit 29ed6870e453f54aac2ce9dcb7891186eb01c40d Author: Ben J Woodcroft Date: Wed Jan 13 14:38:13 2010 +1000 Bug fix: fixed uniprot GN parsing issue lib/bio/db/embl/sptr.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit a3002a79ec012559f5847ba8ebe4faf6e7fa609e Author: Naohisa Goto Date: Fri Jan 8 22:14:15 2010 +0900 Tutorial.rd.html is regenerated. doc/Tutorial.rd.html | 29 ++++++++++++++--------------- 1 files changed, 14 insertions(+), 15 deletions(-) commit 9238c3cb0e8f1156d23a5dfb3ce4e299a91b9f23 Author: Pjotr Prins Date: Fri Jan 8 09:04:23 2010 +0100 Tutorial: removed bad links doc/Tutorial.rd | 10 +--------- 1 files changed, 1 insertions(+), 9 deletions(-) commit 60542fd9863c5fc1240a15cc76f8fa90644a15c8 Author: Naohisa Goto Date: Wed Jan 6 20:38:25 2010 +0900 Changed header and the depth of loading helper due to the rename. test/unit/bio/appl/clustalw/test_report.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 68924e736df76fe3c77d9fe132b6df01fc0621fe Author: Naohisa Goto Date: Wed Jan 6 20:34:10 2010 +0900 Renamed test/unit/bio/db/test_clustalw.rb to test/unit/bio/appl/clustalw/test_report.rb. test/unit/bio/appl/clustalw/test_report.rb | 61 ++++++++++++++++++++++++++++ test/unit/bio/db/test_clustalw.rb | 61 ---------------------------- 2 files changed, 61 insertions(+), 61 deletions(-) create mode 100644 test/unit/bio/appl/clustalw/test_report.rb delete mode 100644 test/unit/bio/db/test_clustalw.rb commit 8368eee50de51f6218ffc7b1bf1aad332702c4ba Author: Pjotr Prins Date: Tue Jan 5 12:54:43 2010 +0100 Clustal: unit tests according to Naohisa lib/bio/appl/clustalw/report.rb | 6 +++--- test/unit/bio/db/test_clustalw.rb | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) commit ad525a01fa17052e9b7e9b7f30639c48596552ba Author: Pjotr Prins Date: Tue Jan 5 12:50:17 2010 +0100 Clustal: unit test uses File.read test/unit/bio/db/test_clustalw.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 7ab517e05cc470b9ca57273092599adb8c00dc11 Author: Pjotr Prins Date: Tue Jan 5 12:49:21 2010 +0100 Clustal: unit test, changed class name and copyright header test/unit/bio/db/test_clustalw.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 0829ee91a97976eb6671a2feec7edfc524f44b2c Author: Pjotr Prins Date: Tue Jan 5 12:46:37 2010 +0100 Clustal: Changed [] to get_sequence, with method description * Clustal: Added copyright. * Changed [] to get_sequence, with method description. lib/bio/appl/clustalw/report.rb | 12 ++++++++++-- 1 files changed, 10 insertions(+), 2 deletions(-) commit 3b8968a6b7b98e0f03b0822849594262a8f4ac99 Author: Pjotr Prins Date: Sun Dec 27 16:44:30 2009 +0100 ClustalW: Added [] method to reach sequence + definition lib/bio/appl/clustalw/report.rb | 9 +++++++++ test/unit/bio/db/test_clustalw.rb | 6 ++---- 2 files changed, 11 insertions(+), 4 deletions(-) commit 3926fabbcc0636c6e4ed08233af3d647c620cd5b Author: Pjotr Prins Date: Sun Dec 27 16:22:38 2009 +0100 ClustalW: Add ALN parser unit test test/data/clustalw/example1.aln | 58 ++++++++++++++++++++++++++++++++++ test/unit/bio/db/test_clustalw.rb | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 0 deletions(-) create mode 100644 test/data/clustalw/example1.aln create mode 100644 test/unit/bio/db/test_clustalw.rb commit 2ef97f945b122dc279eb0ec0a34a2adb0c5f0cff Author: Pjotr Prins Date: Sat Jan 2 13:24:33 2010 +0100 Tutorial: Fixed URLs doc/Tutorial.rd | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) commit 567ca8b010e15cbea9398ee74c78eae01fc6671d Author: Pjotr Prins Date: Fri Jan 1 12:08:50 2010 +0100 Tutorial: Added info on gem install doc/Tutorial.rd | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) commit 21070ab4928d9c7446d58f3003d43ee6235046aa Author: Pjotr Prins Date: Thu Dec 31 11:41:54 2009 +0100 Tutorial.rd: Added Naohisa's Ruby replacement for sed conversion doc/Tutorial.rd | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) commit ebded2364f716fa03b0fdbec9887f807836eb789 Author: Naohisa Goto Date: Wed Jan 6 10:59:39 2010 +0900 Bio::BIORUBY_EXTRA_VERSION is changed to ".5000". bioruby.gemspec | 2 +- lib/bio/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit a1bda9088662edec55af0106b4292c39e51c8b7b Author: Naohisa Goto Date: Mon Dec 28 21:56:33 2009 +0900 BioRuby 1.4.0 is released. ChangeLog | 32 ++++++++++++++++++++++++++++++++ bioruby.gemspec | 3 ++- 2 files changed, 34 insertions(+), 1 deletions(-) commit 5c88896357e1eff0686ceb06cbec0a7837f85050 Author: Naohisa Goto Date: Mon Dec 28 21:55:41 2009 +0900 Preparation for bioruby-1.4.0 release. lib/bio/version.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 11f56d3d8efc2cf5d9408da865af044fa099b925 Author: Naohisa Goto Date: Mon Dec 28 21:52:25 2009 +0900 Added about ChangeLog which is replaced by git-log. RELEASE_NOTES.rdoc | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) commit 17d5b1825b6c73d710d72903d8710caa9996353a Author: Naohisa Goto Date: Mon Dec 28 20:11:49 2009 +0900 ChangeLog is autogenerated from git log. * ChangeLog is autogenerated from git log with the following command: % git log --stat --summary \ 3d1dfcc0e13ad582b9c70c7fdde3a89d0bacdc80..HEAD > ChangeLog ChangeLog | 2306 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 2306 insertions(+), 0 deletions(-) create mode 100644 ChangeLog commit 02bf77af589ea62df81e9634df6fe949df2fd3ef Author: Naohisa Goto Date: Mon Dec 28 19:25:39 2009 +0900 test_output_size is disabled because it depends on html decorations test/functional/bio/appl/test_pts1.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit 5781fb402e85e73fd47948b4466c8129355b714b Author: Naohisa Goto Date: Mon Dec 28 19:21:21 2009 +0900 The PTS1 Predictor's URL had been changed. * The PTS1 Predictor's URL had been changed. * Changed to use @uri instead of @host and @cgi_path. lib/bio/appl/pts1.rb | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) commit a4e691d913e1ae51eadb1a871efc2c8718ef5587 Author: Naohisa Goto Date: Mon Dec 28 18:33:00 2009 +0900 Preparation of ChangeLog autogeneration: old ChangeLog is moved to doc/ChangeLog-before-1.3.1. ChangeLog | 3961 -------------------------------------------- doc/ChangeLog-before-1.3.1 | 3961 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 3961 insertions(+), 3961 deletions(-) delete mode 100644 ChangeLog create mode 100644 doc/ChangeLog-before-1.3.1 commit c011604766baa3cdf5ca2f4a776aa67c56460d29 Author: Naohisa Goto Date: Mon Dec 28 17:53:51 2009 +0900 Tutorial.rd.html is regenerated. doc/Tutorial.rd.html | 70 +++++++++++++------------------------------------ 1 files changed, 19 insertions(+), 51 deletions(-) commit 6e2cdd13d61970aa4704475bfb5aefb70719c2e1 Author: Naohisa Goto Date: Mon Dec 28 17:42:25 2009 +0900 Added Bio::NCBI.default_email= in the example, and examples using deprecated Bio::PubMed methods are temporarily commented out. doc/Tutorial.rd | 8 ++++++++ 1 files changed, 8 insertions(+), 0 deletions(-) commit 8e6d5e9baf98be7e58f4dda8b5d043a42149874b Author: Naohisa Goto Date: Mon Dec 28 17:15:09 2009 +0900 Reinserted "==>" for Blast example, and removed duplicated Ruby Ensembl API example. doc/Tutorial.rd | 25 ++----------------------- 1 files changed, 2 insertions(+), 23 deletions(-) commit 849edd7e8c5b26923cab47e7f5542948fab2b1fb Author: Pjotr Prins Date: Sun Dec 27 09:49:14 2009 +0100 Tutorial: Added info on how to run rubydoctest Removed bioruby> prefix for one failing BLAST test doc/Tutorial.rd | 69 ++++++++++++++++++++++++++++++++++++++---------------- 1 files changed, 48 insertions(+), 21 deletions(-) commit a39fcf0ca1a5265789110f42cc616fc5d3c16414 Author: Naohisa Goto Date: Fri Dec 25 12:30:18 2009 +0900 Modified for release notes and fixed typo. RELEASE_NOTES.rdoc | 29 +++++++++++++++-------------- 1 files changed, 15 insertions(+), 14 deletions(-) commit 3fa8b68f19fc2b6aaf8f54eb10517cc761b2193b Author: Naohisa Goto Date: Fri Dec 25 12:10:34 2009 +0900 Changes following the rename to RELEASE_NOTES.rdoc. README.rdoc | 2 +- bioruby.gemspec | 6 +++--- bioruby.gemspec.erb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) commit fd692a1165d368b9bdbe068ea6bf63fd91c9925c Author: Naohisa Goto Date: Fri Dec 25 12:03:41 2009 +0900 Renamed doc/Changes-1.4.rdoc to RELEASE_NOTES.rdoc. RELEASE_NOTES.rdoc | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ doc/Changes-1.4.rdoc | 160 -------------------------------------------------- 2 files changed, 160 insertions(+), 160 deletions(-) create mode 100644 RELEASE_NOTES.rdoc delete mode 100644 doc/Changes-1.4.rdoc commit 0e37f04dd8d34517693fdd4bc27f8bdada7c2f13 Author: Naohisa Goto Date: Thu Dec 24 21:48:52 2009 +0900 Changed Bio::PhyloXML::Parser.new to open, and regenerated html. doc/Tutorial.rd | 10 ++-- doc/Tutorial.rd.html | 125 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 112 insertions(+), 23 deletions(-) commit aeacbbd425c2e88369c171bd92c60bf8e520a9e5 Author: Naohisa Goto Date: Thu Dec 24 19:26:49 2009 +0900 bioruby.gemspec is regenerated bioruby.gemspec | 8 +++++++- 1 files changed, 7 insertions(+), 1 deletions(-) commit 1034205c199a638c359780922293f8b39c467356 Author: Naohisa Goto Date: Thu Dec 24 19:24:56 2009 +0900 Version number changed to 1.4.0-rc1 lib/bio/version.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 04bf2da43f78fbb702b67323f3be1fe3bd2d0351 Author: Naohisa Goto Date: Thu Dec 24 19:22:41 2009 +0900 Issues added and modified. KNOWN_ISSUES.rdoc | 35 +++++++++++++++++++++++++++++++++-- 1 files changed, 33 insertions(+), 2 deletions(-) commit f1a76157b009fb0ca94d9a0e0f8a85522c383b19 Author: Naohisa Goto Date: Thu Dec 24 19:22:19 2009 +0900 Added news and incompatible changes. doc/Changes-1.4.rdoc | 102 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 98 insertions(+), 4 deletions(-) commit 9c8ef18a20c49f17d5b89aa1db5819b2c8ee9b1d Author: Naohisa Goto Date: Thu Dec 24 19:10:02 2009 +0900 Email address for NCBI Entrez is given with Bio::NCBI.default_email=. bin/bioruby | 5 ++++- sample/demo_ncbi_rest.rb | 2 ++ sample/demo_pubmed.rb | 2 ++ sample/pmfetch.rb | 2 ++ sample/pmsearch.rb | 2 ++ test/functional/bio/io/test_pubmed.rb | 4 ++++ 6 files changed, 16 insertions(+), 1 deletions(-) commit 7a7179665694da35ab0970909bfbda9ad1b057da Author: Naohisa Goto Date: Thu Dec 24 19:09:09 2009 +0900 Changed autoload hierarchy of Bio::NCBI. lib/bio.rb | 10 ++++++---- lib/bio/io/ncbisoap.rb | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) commit f8dc0268d9edf699fd3f0cf18dd55a2b10ec3bcc Author: Naohisa Goto Date: Thu Dec 24 18:58:18 2009 +0900 New singleton methods Bio::NCBI.default_email=, default_tool=, etc. * New singleton methods Bio::NCBI.default_email=, default_email, default_tools=, default_tools, etc., because email and tool parameters will be mandatory in Entrez eUtils. * Changed to raise error when email or tool is empty. Note that default email is nil and library users should always set their email address. * Default tool name is changed to include $0 and bioruby version ID. * Added multi-thread support for Bio::NCBI::REST#ncbi_access_wait. lib/bio/io/ncbirest.rb | 161 ++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 130 insertions(+), 31 deletions(-) commit 2e311dc44290ef6bda48f0bcba09a3c22bf32d9a Author: Naohisa Goto Date: Mon Dec 21 22:24:52 2009 +0900 Description about the incompatible change of Bio::KEGG::REACTION#rpairs. doc/Changes-1.4.rdoc | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit d57ace3a89077caae3c681743da4b92d16b90af8 Author: Naohisa Goto Date: Mon Dec 21 22:17:46 2009 +0900 Bio::KEGG::R#ACTION#rpairs is changed to return a hash. lib/bio/db/kegg/reaction.rb | 65 ++++++++++++++++++++++++-------- test/unit/bio/db/kegg/test_reaction.rb | 27 ++++++++++++- 2 files changed, 74 insertions(+), 18 deletions(-) commit 60e4c77d184ee81c51668b446518cfbc9256be50 Author: Naohisa Goto Date: Mon Dec 21 22:15:44 2009 +0900 Document bug fix: return value mistake. lib/bio/db/kegg/genes.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 6376dd55aa4995769746e556ca719d37f02975d6 Author: Naohisa Goto Date: Sun Dec 20 17:32:52 2009 +0900 Added README.txt for FASTQ example data. test/data/fastq/README.txt | 109 ++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 109 insertions(+), 0 deletions(-) create mode 100644 test/data/fastq/README.txt commit 8dec18794c846726733d66c5a22170f5b2c4bb1a Author: Naohisa Goto Date: Tue Dec 15 13:51:13 2009 +0900 Newly added unit tests for Bio::KEGG::GLYCAN with test data. test/data/KEGG/G00024.glycan | 47 ++++++ test/data/KEGG/G01366.glycan | 18 +++ test/unit/bio/db/kegg/test_glycan.rb | 260 ++++++++++++++++++++++++++++++++++ 3 files changed, 325 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/G00024.glycan create mode 100644 test/data/KEGG/G01366.glycan create mode 100644 test/unit/bio/db/kegg/test_glycan.rb commit 90b97bfbcfb3f7e3d5c28b195bdb9b9c058df887 Author: Naohisa Goto Date: Tue Dec 15 11:42:39 2009 +0900 Newly added unit test for Bio::KEGG::DRUG with test data. test/data/KEGG/D00063.drug | 104 +++++++++++++++++++ test/unit/bio/db/kegg/test_drug.rb | 194 ++++++++++++++++++++++++++++++++++++ 2 files changed, 298 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/D00063.drug create mode 100644 test/unit/bio/db/kegg/test_drug.rb commit 443f778795b82a7f572cb8b85d2a8a8b3cea1334 Author: Naohisa Goto Date: Tue Dec 15 11:38:59 2009 +0900 New method Bio::KEGG::DRUG#products * New method Bio::KEGG::DRUG#products. * Improved RDoc. lib/bio/db/kegg/drug.rb | 50 +++++++++++++++++++++++++++++++++++++--------- 1 files changed, 40 insertions(+), 10 deletions(-) commit 48184d96b989f909ac0effb759cbc4b1ddc98dd1 Author: Naohisa Goto Date: Fri Dec 11 01:36:54 2009 +0900 Methods in Bio::KEGG::Common::* are changed to cache return values in instance variables. lib/bio/db/kegg/common.rb | 62 ++++++++++++++++++++++++++------------------ 1 files changed, 37 insertions(+), 25 deletions(-) commit f364ea609f1e01ca5270a5bd7404e0bbf752bc89 Author: Naohisa Goto Date: Fri Dec 11 01:23:42 2009 +0900 Version is changed to 1.4.0-alpha1, and bioruby.gemspec is regenerated. bioruby.gemspec | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++- bioruby.gemspec.erb | 4 +- lib/bio/version.rb | 4 +- 3 files changed, 145 insertions(+), 5 deletions(-) commit 096b5fbf6b7ff906203aabf93eb9a0bd56ae9ba2 Author: Naohisa Goto Date: Fri Dec 11 01:22:59 2009 +0900 Added documents about Bio::KEGG incompatible changes. doc/Changes-1.4.rdoc | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 42 insertions(+), 6 deletions(-) commit 72ed277fe30bb1033cbc16d462f137510afb84e6 Author: Naohisa Goto Date: Fri Dec 11 01:21:26 2009 +0900 Newly added unit tests for Bio::KEGG::ENZYME with test data. test/data/KEGG/1.1.1.1.enzyme | 935 ++++++++++++++++++++++++++++++++++ test/unit/bio/db/kegg/test_enzyme.rb | 241 +++++++++ 2 files changed, 1176 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/1.1.1.1.enzyme create mode 100644 test/unit/bio/db/kegg/test_enzyme.rb commit b99fcb39f7c5d2857cbb65283d85ea868ae8561d Author: Naohisa Goto Date: Fri Dec 11 01:09:03 2009 +0900 Changed Bio::KEGG::*#dblinks, pathways, orthologs, genes methods. * In Bio::KEGG::COMPOUND, DRUG, ENZYME, GLYCAN and ORTHOLOGY, the method dblinks is changed to return a Hash. The old methods are renamed to dblinks_as_strings. * In Bio::KEGG::COMPOUND, DRUG, ENZYME, GENES, GLYCAN and REACTION, the method pathways is changed to return a Hash. The old methods are renamed to pathways_as_strings except for GENES. * In Bio::KEGG::ENZYME, GENES, GLYCAN and REACTION, the method orthologs is changed to return a Hash. The old methods are renamed to orthologs_as_strings. * Bio::KEGG::ENZYME#genes and Bio::KEGG::ORTHOLOGY#genes is changed to return a Hash. The old methods are renamed to genes_as_strings. * Added Bio::KEGG::REACTION#rpairs_as_tokens, older behavior of rpairs. * Modules in lib/bio/db/kegg/common.rb are moved uner Bio::KEGG::Common namespace. * Refactoring. * Added documents. * Tests modified. lib/bio/db/kegg/common.rb | 40 +++++++++++++++++++++++++------ lib/bio/db/kegg/compound.rb | 10 ++++--- lib/bio/db/kegg/drug.rb | 27 +++++++++++++++------ lib/bio/db/kegg/enzyme.rb | 31 ++++++++++++++++++++---- lib/bio/db/kegg/genes.rb | 39 +++++++++++++++++++------------ lib/bio/db/kegg/glycan.rb | 22 +++++++++++++++-- lib/bio/db/kegg/orthology.rb | 25 +++++++------------ lib/bio/db/kegg/reaction.rb | 16 +++++++++--- test/unit/bio/db/kegg/test_compound.rb | 27 ++++++++++++-------- test/unit/bio/db/kegg/test_reaction.rb | 13 +++++---- 10 files changed, 170 insertions(+), 80 deletions(-) commit 2cc9d4e2f28f6b2bbcb8f714f9e2eb144c594fbf Author: Naohisa Goto Date: Thu Dec 10 16:02:54 2009 +0900 Bio::KEGG::GENES#structure no more adds PDB: prefix. * Bio::KEGG::GENES#structure no more adds PDB: prefix. * Added Bio::KEGG::GENES#structures as an alias of structure. lib/bio/db/kegg/genes.rb | 7 +++---- test/unit/bio/db/kegg/test_genes.rb | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) commit a8ceb23bdf19d6649aa4d879cba76a9e3f91d1d4 Author: Naohisa Goto Date: Thu Dec 10 15:28:33 2009 +0900 Refactoring of Bio::KEGG::Orthology#dblinks and genes. * Refactoring of Bio::KEGG::Orthology#dblinks and genes: no need to treat @data because lines_fetch internally does so. lib/bio/db/kegg/orthology.rb | 10 ++-------- 1 files changed, 2 insertions(+), 8 deletions(-) commit 720e0bccdfdc6fac6222cac1a9f05d6e2419896c Author: Naohisa Goto Date: Wed Dec 9 16:39:03 2009 +0900 Changed dummy lines for RDoc. lib/bio/db/kegg/compound.rb | 4 ++-- lib/bio/db/kegg/orthology.rb | 2 +- lib/bio/db/kegg/reaction.rb | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) commit 20f8c03af92e5cfedcb49e8ed9fc6fda2b86e9c9 Author: Naohisa Goto Date: Wed Dec 9 15:17:39 2009 +0900 Refactoring of Bio::KEGG::REACTION#orthologs. * Refactoring of Bio::KEGG::REACTION#orthologs: no need to treat @data because lines_fetch internally does so. lib/bio/db/kegg/reaction.rb | 5 +---- 1 files changed, 1 insertions(+), 4 deletions(-) commit b924601bacd643f66b37dd991913e6862df704a9 Author: Naohisa Goto Date: Sun Dec 6 15:51:03 2009 +0900 Bio::KEGG::GENES#pathways is changed to return raw lines as an Array of strings. * Bio::KEGG::GENES#pathways is changed to return raw lines as an Array of strings. * RDoc is added for Bio::KEGG::GENES. lib/bio/db/kegg/genes.rb | 99 ++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 96 insertions(+), 3 deletions(-) commit 4c840dc6a539db1d854b23991269b3e6515f637e Author: Kozo Nishida Date: Wed Dec 2 17:02:00 2009 +0900 Added test methods. test/unit/bio/db/kegg/test_compound.rb | 47 ++++++++++++++++++++++++++++++++ 1 files changed, 47 insertions(+), 0 deletions(-) commit 105efa1ecd1bc99a54aac32710a97df15035119d Author: Naohisa Goto Date: Wed Dec 2 23:31:07 2009 +0900 Refactoring: to use lib/bio/db/kegg/common.rb for dblinks_as_hash method. lib/bio/db/kegg/orthology.rb | 16 +++++----------- 1 files changed, 5 insertions(+), 11 deletions(-) commit c394ead051c3a13ceb534f93816af7ad35be932a Author: Naohisa Goto Date: Wed Dec 2 23:07:23 2009 +0900 Bio::KEGG::REACTION#orthologies is renamed to orthologs_as_hash with changing its return value to a hash. * Bio::KEGG::REACTION#orthologies is renamed to orthologs_as_hash with changing its return value to a hash. * The code of the orthologs_as_hash method is moved to lib/bio/db/kegg/common.rb. * Added new method Bio::KEGG::REACTION#orthologs, copied from lib/bio/db/kegg/glycan.rb. lib/bio/db/kegg/common.rb | 18 +++++++++++++++++- lib/bio/db/kegg/reaction.rb | 14 ++++++-------- sample/demo_kegg_reaction.rb | 6 ++++-- test/unit/bio/db/kegg/test_reaction.rb | 12 ++++++++++-- 4 files changed, 37 insertions(+), 13 deletions(-) commit 4e01fda27166faf066104ab9897904fd46f57123 Author: Naohisa Goto Date: Wed Dec 2 22:48:06 2009 +0900 Added Bio::KEGG::REACTION#pathways_as_hash and reverted pathways method. * New method Bio::KEGG::REACTION#pathways_as_hash, using a module in lib/bio/db/kegg/common.rb. * Bio::KEGG::REACTION#pathways is reverted to return an array of string. lib/bio/db/kegg/reaction.rb | 18 +++++++++++------- test/unit/bio/db/kegg/test_reaction.rb | 8 +++++++- 2 files changed, 18 insertions(+), 8 deletions(-) commit 0c2ce4b8462792d496ab3f58206fdbd47143e280 Author: Naohisa Goto Date: Wed Dec 2 22:35:21 2009 +0900 New methods Bio::KEGG::COMPOUND#dblinks_as_hash and pathways_as_hash, using modules in lib/bio/db/kegg/common.rb. lib/bio/db/kegg/compound.rb | 14 +++++++++++ test/unit/bio/db/kegg/test_compound.rb | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 0 deletions(-) commit 63df07e030120eb43de22555277529822b072270 Author: Naohisa Goto Date: Wed Dec 2 22:25:20 2009 +0900 Methods commonly used from Bio::KEGG::* classes. * Modules containing methods commonly used from Bio::KEGG::* classes. The "dblinks_as_hash" method is copied from lib/bio/db/kegg/orthology.rb. The "pathways_as_hash" method is derived from the dblinks_as_hash and Bio::KEGG::REACTION#pathways methods. lib/bio/db/kegg/common.rb | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 60 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/kegg/common.rb commit 0e55c6701b09a52356ac55300181ee656773826f Author: Naohisa Goto Date: Wed Dec 2 21:39:06 2009 +0900 Bio::KEGG::COMPOUND#dblinks is reverted to return an array of string. lib/bio/db/kegg/compound.rb | 11 ++--------- test/unit/bio/db/kegg/test_compound.rb | 8 +++++++- 2 files changed, 9 insertions(+), 10 deletions(-) commit a05adcddf6c7ed67c042f31ecd86848af1ba8a22 Author: Naohisa Goto Date: Wed Dec 2 21:13:39 2009 +0900 Bug fix: fixed a copy-and-paste mistake. lib/bio/db/kegg/drug.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 86925f3c80730e3ea3377a23a70cadb3876258c4 Author: Naohisa Goto Date: Tue Dec 1 21:31:40 2009 +0900 Bio::KEGG::ORTHOLOGY#dblinks_as_hash should preserve database names. doc/Changes-1.4.rdoc | 4 ++++ lib/bio/db/kegg/orthology.rb | 2 +- test/unit/bio/db/kegg/test_orthology.rb | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) commit 60847cd2d0701fa38a499578649cb216c93993a2 Author: Naohisa Goto Date: Tue Dec 1 20:41:51 2009 +0900 Test class names are changed to avoid potential class name conflict. test/unit/bio/db/kegg/test_compound.rb | 2 +- test/unit/bio/db/kegg/test_genes.rb | 4 ++-- test/unit/bio/db/kegg/test_orthology.rb | 2 +- test/unit/bio/db/kegg/test_reaction.rb | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) commit 2bda62af7a020c22379dd9ec3a42496d2a5b94cb Author: Kozo Nishida Date: Tue Dec 1 04:38:27 2009 +0900 Added unit tests for Bio::KEGG::ORTHOLOGY. test/data/KEGG/K02338.orthology | 902 +++++++++++++++++++++++++++++++ test/unit/bio/db/kegg/test_orthology.rb | 50 ++ 2 files changed, 952 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/K02338.orthology create mode 100644 test/unit/bio/db/kegg/test_orthology.rb commit acad9497caf5d737394568e911691fdad11ca091 Author: Naohisa Goto Date: Mon Nov 30 21:39:32 2009 +0900 Changed to use BioRubyTestDataPath instead of __FILE__. test/unit/bio/db/kegg/test_compound.rb | 3 +-- test/unit/bio/db/kegg/test_reaction.rb | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) commit 8e95f3fb60cd61b2bfad8e66caf03d3ff02a6dca Author: Naohisa Goto Date: Sun Nov 29 16:37:21 2009 +0900 Bio::Fastq::QualityScore is renamed to Bio::Sequence::QualityScore. * Bio::Fastq::QualityScore is renamed to Bio::Sequence::QualityScore. * Changes of filenames due to the previous file move. lib/bio/db/fasta/format_qual.rb | 18 ++++++++-------- lib/bio/db/fastq.rb | 7 ++--- lib/bio/sequence.rb | 3 +- lib/bio/sequence/quality_score.rb | 25 +++++++++++------------ test/unit/bio/sequence/test_quality_score.rb | 28 +++++++++++++------------- 5 files changed, 40 insertions(+), 41 deletions(-) commit 2b29654c1d7e927e445e7acdd525835a873c2a2a Author: Naohisa Goto Date: Sun Nov 29 16:15:42 2009 +0900 lib/bio/db/fastq/quality_score.rb is moved to lib/bio/sequence/. The unit test is also moved. * lib/bio/db/fastq/quality_score.rb is moved to lib/bio/sequence/. * test/unit/bio/db/fastq/test_quality_score.rb is moved to test/unit/bio/sequence/. * The file contents will be modified with the following commit. lib/bio/db/fastq/quality_score.rb | 206 ---------------- lib/bio/sequence/quality_score.rb | 206 ++++++++++++++++ test/unit/bio/db/fastq/test_quality_score.rb | 330 -------------------------- test/unit/bio/sequence/test_quality_score.rb | 330 ++++++++++++++++++++++++++ 4 files changed, 536 insertions(+), 536 deletions(-) delete mode 100644 lib/bio/db/fastq/quality_score.rb create mode 100644 lib/bio/sequence/quality_score.rb delete mode 100644 test/unit/bio/db/fastq/test_quality_score.rb create mode 100644 test/unit/bio/sequence/test_quality_score.rb commit aa8d49bf31f90dd2796c18ee0aa6291979284ec2 Author: Naohisa Goto Date: Sun Nov 29 15:20:36 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_gff1.rb. lib/bio/db/gff.rb | 17 ----------------- sample/demo_gff1.rb | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 sample/demo_gff1.rb commit 76fffd2d2429346478fb3d8c88cdcd878a1047b1 Author: Naohisa Goto Date: Sun Nov 29 15:06:41 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_tmhmm_report.rb. lib/bio/appl/tmhmm/report.rb | 36 ---------------------- sample/demo_tmhmm_report.rb | 68 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 36 deletions(-) create mode 100644 sample/demo_tmhmm_report.rb commit dfafb0a2bcec4c0b4cd3640374e151e2039056dc Author: Naohisa Goto Date: Sun Nov 29 14:59:27 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_targetp_report.rb. lib/bio/appl/targetp/report.rb | 105 +------------------------------ sample/demo_targetp_report.rb | 135 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 104 deletions(-) create mode 100644 sample/demo_targetp_report.rb commit 75f7c8527546f8ea3079f53b90a9b4d8260b4de0 Author: Naohisa Goto Date: Sun Nov 29 14:33:28 2009 +0900 Follow-up of the SOSUI server URL change. lib/bio/appl/sosui/report.rb | 6 ++++-- 1 files changed, 4 insertions(+), 2 deletions(-) commit 8022696295dc296462f73b40cc74ad5259bee387 Author: Naohisa Goto Date: Sun Nov 29 14:32:11 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_sosui_report.rb. lib/bio/appl/sosui/report.rb | 53 +------------------------ sample/demo_sosui_report.rb | 89 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 52 deletions(-) create mode 100644 sample/demo_sosui_report.rb commit 4acfe7f565039b34a036682912a75f55da808b45 Author: Naohisa Goto Date: Sun Nov 29 14:02:32 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_hmmer_report.rb. lib/bio/appl/hmmer/report.rb | 100 ---------------------------- sample/demo_hmmer_report.rb | 149 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 100 deletions(-) create mode 100644 sample/demo_hmmer_report.rb commit 4f7bd1b7628d90661d8b557ca854b14cc44fb99c Author: Naohisa Goto Date: Thu Nov 26 15:49:21 2009 +0900 Demo codes in the "if __FILE__ == $0" are removed because they are very short. lib/bio/appl/fasta/format10.rb | 14 -------------- lib/bio/appl/hmmer.rb | 16 +--------------- lib/bio/io/flatfile.rb | 8 +------- 3 files changed, 2 insertions(+), 36 deletions(-) commit c2a72d195189755532e7e206af34d152ab6332d8 Author: Naohisa Goto Date: Thu Nov 26 15:20:28 2009 +0900 Bug fix: Failure of Bio::Fasta.remote due to the remote site changes. lib/bio/appl/fasta.rb | 5 ++++- 1 files changed, 4 insertions(+), 1 deletions(-) commit 549112fb4dfb5f6b2fe3491fb161887a9f5262ac Author: Naohisa Goto Date: Thu Nov 26 15:13:10 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_fasta_remote.rb. lib/bio/appl/fasta.rb | 18 --------------- sample/demo_fasta_remote.rb | 51 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 18 deletions(-) create mode 100644 sample/demo_fasta_remote.rb commit 0e4ca0db83692fdbbe93e90272a07bcbac89192c Author: Naohisa Goto Date: Thu Nov 26 10:17:47 2009 +0900 Text indents for some comment lines are changed. sample/demo_blast_report.rb | 4 ++-- sample/demo_kegg_compound.rb | 4 ++-- sample/demo_prosite.rb | 4 ++-- sample/demo_sirna.rb | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) commit c0cf91fe2a9247bc3705b20515f9d4fa14288d5a Author: Naohisa Goto Date: Thu Nov 26 10:13:26 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_keggapi.rb. * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_keggapi.rb. * Commented out demonstrations of deprecated methods: get_neighbors_by_gene, get_similarity_between_genes, get_ko_members, get_oc_members_by_gene, get_pc_members_by_gene. * Commented out demonstrations of methods internally using the deprecated methods: get_all_neighbors_by_gene, get_all_oc_members_by_gene, get_all_pc_members_by_gene. lib/bio/io/keggapi.rb | 442 ------------------------------------------ sample/demo_keggapi.rb | 502 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 502 insertions(+), 442 deletions(-) create mode 100644 sample/demo_keggapi.rb commit 8b8206c1d8ee699185fdd19d3329311c85ee003c Author: Naohisa Goto Date: Thu Nov 26 01:50:06 2009 +0900 Fixed the license line. lib/bio/db/prosite.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit ebfeec8243abd4e2f65335fda1ead18efff66897 Author: Naohisa Goto Date: Thu Nov 26 01:41:58 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_ncbi_rest.rb. lib/bio/io/ncbirest.rb | 101 ------------------------------------ sample/demo_ncbi_rest.rb | 128 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 101 deletions(-) create mode 100644 sample/demo_ncbi_rest.rb commit 5a0f8379a374650d12fc88fbbd5b28c38ae96395 Author: Naohisa Goto Date: Thu Nov 26 01:33:07 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_prosite.rb. lib/bio/db/prosite.rb | 95 +------------------------------------- sample/demo_prosite.rb | 120 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 94 deletions(-) create mode 100644 sample/demo_prosite.rb commit c560a5d0ba9d4919dbcca156ea620056dcb8f725 Author: Naohisa Goto Date: Thu Nov 26 01:14:37 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_psort.rb. lib/bio/appl/psort.rb | 111 --------------------------------------- sample/demo_psort.rb | 138 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 111 deletions(-) create mode 100644 sample/demo_psort.rb commit 1299a55d214784a536ae3cd8bfabdfd61fe1da86 Author: Naohisa Goto Date: Thu Nov 26 01:04:29 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_psort_report.rb. * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_psort_report.rb, without any checks. lib/bio/appl/psort/report.rb | 46 +--------------------------- sample/demo_psort_report.rb | 70 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 45 deletions(-) create mode 100644 sample/demo_psort_report.rb commit a2686fe3c5a93947c94d4602514a62a808c182d5 Author: Naohisa Goto Date: Thu Nov 26 00:53:54 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_genscan_report.rb. lib/bio/appl/genscan/report.rb | 176 ---------------------------------- sample/demo_genscan_report.rb | 202 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+), 176 deletions(-) create mode 100644 sample/demo_genscan_report.rb commit 22f662ba69dd2d4a2273562dd7ea921f5cdd84bd Author: Naohisa Goto Date: Thu Nov 26 00:28:01 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_ddbjxml.rb. lib/bio/io/ddbjxml.rb | 182 +----------------------------------------- sample/demo_ddbjxml.rb | 212 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 181 deletions(-) create mode 100644 sample/demo_ddbjxml.rb commit ed3b34b6598f632c7b9b3f1a17b42406c19ca32d Author: Naohisa Goto Date: Thu Nov 26 00:12:33 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_pubmed.rb. * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_pubmed.rb. * Codes using Entrez CGI are disabled in the demo. lib/bio/io/pubmed.rb | 88 ------------------------------------- sample/demo_pubmed.rb | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 88 deletions(-) create mode 100644 sample/demo_pubmed.rb commit 9e6d720f383e88e247eacab6f0e43f38140a62f2 Author: Naohisa Goto Date: Wed Nov 25 23:59:10 2009 +0900 Demo codes in the "if __FILE__ == $0" are removed. * Demo codes in the "if __FILE__ == $0" are removed because their function have already been moved to sample/demo_blast_report.rb. lib/bio/appl/blast/format0.rb | 193 -------------------------------------- lib/bio/appl/blast/report.rb | 149 +----------------------------- lib/bio/appl/blast/wublast.rb | 208 ----------------------------------------- 3 files changed, 2 insertions(+), 548 deletions(-) commit bbba2812fa9131d01fc655eb174d84f06facd8b8 Author: Naohisa Goto Date: Wed Nov 25 23:49:36 2009 +0900 New demo code of BLAST parser based on codes in "if __FILE__ ==$0" * Newly added sample/demo_blast_report.rb, demonstration of BLAST parsers Bio::Blast::Report, Bio::Blast::Default::Report, and Bio::Blast::WU::Report. It is based on the demonstration codes in the "if __FILE__ == $0" in lib/bio/appl/blast/report.rb, lib/bio/appl/blast/format0.rb, and lib/bio/appl/blast/wublast.rb. sample/demo_blast_report.rb | 285 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 285 insertions(+), 0 deletions(-) create mode 100644 sample/demo_blast_report.rb commit 5235ed15db8d3ba3e59d8dc3bbbcf1b5b9c58281 Author: Naohisa Goto Date: Wed Nov 25 21:57:08 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_das.rb. * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_das.rb. * Demo codes using UCSC DAS server is added. * Demo using the WormBase DAS server is temporarily disabled because it does not work well possibly because of the server trouble. lib/bio/io/das.rb | 44 ---------------------- sample/demo_das.rb | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 44 deletions(-) create mode 100644 sample/demo_das.rb commit b7b0f7bef0505b9678673e54bb863d4ff7897dd5 Author: Naohisa Goto Date: Wed Nov 25 20:58:35 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_taxonomy.rb, although it does not work correctly now. lib/bio/db/kegg/taxonomy.rb | 53 +----------------------- sample/demo_kegg_taxonomy.rb | 92 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 52 deletions(-) create mode 100644 sample/demo_kegg_taxonomy.rb commit 23da98ca19fce1f0b487e1f955ef4cd896839590 Author: Naohisa Goto Date: Wed Nov 25 20:11:12 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_reaction.rb. lib/bio/db/kegg/reaction.rb | 16 +---------- sample/demo_kegg_reaction.rb | 64 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 15 deletions(-) create mode 100644 sample/demo_kegg_reaction.rb commit 9c0bfb857a6b41d8e6a42ff2cbf7b06ca1d38d78 Author: Naohisa Goto Date: Wed Nov 25 19:12:00 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_orthology.rb. lib/bio/db/kegg/orthology.rb | 23 +-------------- sample/demo_kegg_orthology.rb | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 22 deletions(-) create mode 100644 sample/demo_kegg_orthology.rb commit 6f6f1eb3d87dea588ea333708c4d4486ac7136b6 Author: Naohisa Goto Date: Wed Nov 25 12:19:26 2009 +0900 Commented out demo for nonexistent method "bindings". sample/demo_kegg_glycan.rb | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 98a6d904058b5af4808f16bcb710d73bd97c9764 Author: Naohisa Goto Date: Wed Nov 25 12:18:31 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_glycan.rb. lib/bio/db/kegg/glycan.rb | 21 ------------- sample/demo_kegg_glycan.rb | 72 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 21 deletions(-) create mode 100644 sample/demo_kegg_glycan.rb commit d26e835ca9def2287f1050f1b048892e3cafdaa0 Author: Naohisa Goto Date: Wed Nov 25 11:49:05 2009 +0900 Added references. lib/bio/db/kegg/genome.rb | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) commit c3c460462481b5b8d6e9441216bcf6370b4890ef Author: Naohisa Goto Date: Wed Nov 25 11:45:31 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_genome.rb. lib/bio/db/kegg/genome.rb | 42 +------------------------ sample/demo_kegg_genome.rb | 74 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 41 deletions(-) create mode 100644 sample/demo_kegg_genome.rb commit 0d8e709b66bf18ead5944c27a50eb6cf2c47862f Author: Naohisa Goto Date: Wed Nov 25 11:43:24 2009 +0900 Added document about downloading sample data. sample/demo_kegg_drug.rb | 13 ++++++++++++- 1 files changed, 12 insertions(+), 1 deletions(-) commit 0608893198e9bc88521b6c013069d8c7a13bb0e5 Author: Naohisa Goto Date: Wed Nov 25 00:10:48 2009 +0900 Added documents. lib/bio/db/kegg/drug.rb | 15 +++++++++++++++ 1 files changed, 15 insertions(+), 0 deletions(-) commit 0ecdc1ee0460f16dba1e4cd5ab575c92e1c6b1ac Author: Naohisa Goto Date: Wed Nov 25 00:06:02 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_drug.rb. lib/bio/db/kegg/drug.rb | 18 +-------------- sample/demo_kegg_drug.rb | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 sample/demo_kegg_drug.rb commit b0c349103f01a26f4741999bd696bf5b1c032e06 Author: Naohisa Goto Date: Tue Nov 24 23:51:13 2009 +0900 Added documents. lib/bio/db/kegg/compound.rb | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) commit e965b454c553ed9670bc83962a2a9d7c5de49929 Author: Naohisa Goto Date: Tue Nov 24 23:45:15 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_kegg_compound.rb. lib/bio/db/kegg/compound.rb | 19 +------------- sample/demo_kegg_compound.rb | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 sample/demo_kegg_compound.rb commit 7454db7c8b8ef7202736d311356d4ca350af336f Author: Naohisa Goto Date: Tue Nov 24 23:06:21 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_litdb.rb. lib/bio/db/litdb.rb | 17 +---------------- sample/demo_litdb.rb | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 16 deletions(-) create mode 100644 sample/demo_litdb.rb commit fde284248e013e44184ee2ba7da85e5b83155a69 Author: Naohisa Goto Date: Tue Nov 24 22:57:01 2009 +0900 Ruby 1.9 support: String#each_line instead of String#each lib/bio/db/go.rb | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) commit 8b60099615790fe372b4fde27a391dedc767aab2 Author: Naohisa Goto Date: Tue Nov 24 22:53:12 2009 +0900 Sample code bug fix: fixed method names, and workaround for Zlib error. * Sample code bug fix: Following method name changes. * Workaround for Zlib::DataError. sample/demo_go.rb | 13 +++++++++---- 1 files changed, 9 insertions(+), 4 deletions(-) commit 737fec3db555811d127d2356e5ceef63b0413fb8 Author: Naohisa Goto Date: Tue Nov 24 19:47:14 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_go.rb. lib/bio/db/go.rb | 70 +--------------------------------------- sample/demo_go.rb | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 69 deletions(-) create mode 100644 sample/demo_go.rb commit 8264b15690132d9e766f16d0829bb12cd122b900 Author: Naohisa Goto Date: Tue Nov 24 19:20:52 2009 +0900 Document bug fix: Changed Bio::Bl2seq to Bio::Blast::Bl2seq in the RDoc. * Document bug fix: Changed Bio::Bl2seq to Bio::Blast::Bl2seq in the RDoc. * Modified copyright line. lib/bio/appl/bl2seq/report.rb | 18 +++++++++--------- 1 files changed, 9 insertions(+), 9 deletions(-) commit c572ff022fee43505355608f0a0e3ba2181e87e2 Author: Naohisa Goto Date: Tue Nov 24 19:17:04 2009 +0900 Bug fix: Failed to read Bio::Blast::Bl2seq::Report data by using Bio::FlatFile. lib/bio/appl/bl2seq/report.rb | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) commit 4f6b080623442dfcc5864e2aefde7e53ace068e8 Author: Naohisa Goto Date: Tue Nov 24 19:15:11 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_bl2seq_report.rb. lib/bio/appl/bl2seq/report.rb | 194 +------------------------------------ sample/demo_bl2seq_report.rb | 220 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 193 deletions(-) create mode 100644 sample/demo_bl2seq_report.rb commit 2f03e8757383e0d1a26c0f6942c74a30f3b26d90 Author: Naohisa Goto Date: Tue Nov 24 18:24:43 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_genbank.rb. * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_genbank.rb, and modified as below. * To get sequences from the NCBI web service. * By default, arguments are sequence IDs (accession numbers). * New option "--files" (or "-files", "--file", or "-file") to read sequences from file(s). lib/bio/db/genbank/genbank.rb | 87 +-------------------------- sample/demo_genbank.rb | 132 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 86 deletions(-) create mode 100644 sample/demo_genbank.rb commit a2981c28fdb629a655c71c920f6588f8b80aff06 Author: Naohisa Goto Date: Tue Nov 24 15:06:50 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_aaindex.rb. lib/bio/db/aaindex.rb | 39 +--------------------------- sample/demo_aaindex.rb | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 38 deletions(-) create mode 100644 sample/demo_aaindex.rb commit b741d17ec5c5ac234bab35b8716fee072635de1a Author: Naohisa Goto Date: Tue Nov 24 12:45:43 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_sirna.rb * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_sirna.rb, and modified for reading normal sequence files instead of a raw sequence. lib/bio/util/sirna.rb | 24 +------------------ sample/demo_sirna.rb | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 23 deletions(-) create mode 100644 sample/demo_sirna.rb commit 7cc778e78bc63ef73796ee15d6f0db8d6967aefe Author: Naohisa Goto Date: Mon Nov 23 23:00:42 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_pathway.rb. lib/bio/pathway.rb | 171 ----------------------------------------- sample/demo_pathway.rb | 196 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 171 deletions(-) create mode 100644 sample/demo_pathway.rb commit 7e5510587abc0b50b6851f005a3236bf9dc79d08 Author: Naohisa Goto Date: Mon Nov 23 22:49:13 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_locations.rb. lib/bio/location.rb | 73 ---------------------------------- sample/demo_locations.rb | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 73 deletions(-) create mode 100644 sample/demo_locations.rb commit f1c02666f4b11d5cf208d6beb592d8ac962ce2da Author: Naohisa Goto Date: Mon Nov 23 22:35:50 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_codontable.rb. lib/bio/data/codontable.rb | 96 +----------------------------------- sample/demo_codontable.rb | 119 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 95 deletions(-) create mode 100644 sample/demo_codontable.rb commit c11a7793f85faf3d66d630833c38358ffa34a698 Author: Naohisa Goto Date: Mon Nov 23 16:35:16 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_nucleicacid.rb. lib/bio/data/na.rb | 27 +----------------------- sample/demo_nucleicacid.rb | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 26 deletions(-) create mode 100644 sample/demo_nucleicacid.rb commit de41b67c3f65baa0f122689b2e9f479d8a247934 Author: Naohisa Goto Date: Mon Nov 23 16:25:05 2009 +0900 Demo codes in the "if __FILE__ == $0" are moved to sample/demo_aminoacid.rb. lib/bio/data/aa.rb | 78 +----------------------------------- sample/demo_aminoacid.rb | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 77 deletions(-) create mode 100644 sample/demo_aminoacid.rb commit e652dd44ecb6b6dad652e33a398f92bb8373e7dd Author: Naohisa Goto Date: Mon Nov 23 16:12:48 2009 +0900 Added an error message about encoding in Ruby 1.9.1 KNOWN_ISSUES.rdoc | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) commit 003133b0d4e2234c27927c9d10b75185c354102e Author: Naohisa Goto Date: Mon Nov 23 15:52:21 2009 +0900 changed recommended Ruby version README.rdoc | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) commit 408483d36b713678361cecf6c77ff7a2098f71fc Author: Naohisa Goto Date: Sun Nov 22 17:07:37 2009 +0900 added information about doc/Changes-1.4.rdoc README.rdoc | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) commit ef342933839e8c6cef9883045fcaf468aff5da23 Author: Naohisa Goto Date: Sun Nov 22 16:50:55 2009 +0900 In PhyloXML support, added a link to GNOME Libxml2 and fixed RDoc syntax. README.rdoc | 6 ++++-- 1 files changed, 4 insertions(+), 2 deletions(-) commit 0237ef42d60c7a76cadf8ea78f4251bcfe89c95f Author: Naohisa Goto Date: Thu Nov 19 09:43:15 2009 +0900 Ruby 1.9 support: String#each_line instead of String#each lib/bio/appl/meme/mast/report.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit ec935ea9b19415bf3325bcc0763fbc22f3c71a3d Author: Naohisa Goto Date: Thu Nov 19 09:40:49 2009 +0900 The "libpath magic" is replaced by loading helper routine. test/unit/bio/appl/meme/mast/test_report.rb | 11 ++++++----- test/unit/bio/appl/meme/test_mast.rb | 11 ++++++----- test/unit/bio/appl/meme/test_motif.rb | 8 +++++--- 3 files changed, 17 insertions(+), 13 deletions(-) commit 3f65eeb503f3b2ef866cab4c73d2d700ca572835 Author: Adam Kraut Date: Tue Mar 17 19:41:31 2009 -0400 Added basic support for MEME/MAST applications lib/bio/appl/meme/mast.rb | 156 +++++++++++++++++++++++++++ lib/bio/appl/meme/mast/report.rb | 91 ++++++++++++++++ lib/bio/appl/meme/motif.rb | 48 ++++++++ test/data/meme/mast.out | 13 +++ test/data/meme/meme.out | 3 + test/unit/bio/appl/meme/mast/test_report.rb | 45 ++++++++ test/unit/bio/appl/meme/test_mast.rb | 102 +++++++++++++++++ test/unit/bio/appl/meme/test_motif.rb | 36 ++++++ 8 files changed, 494 insertions(+), 0 deletions(-) create mode 100644 lib/bio/appl/meme/mast.rb create mode 100644 lib/bio/appl/meme/mast/report.rb create mode 100644 lib/bio/appl/meme/motif.rb create mode 100644 test/data/meme/db create mode 100644 test/data/meme/mast create mode 100644 test/data/meme/mast.out create mode 100644 test/data/meme/meme.out create mode 100644 test/unit/bio/appl/meme/mast/test_report.rb create mode 100644 test/unit/bio/appl/meme/test_mast.rb create mode 100644 test/unit/bio/appl/meme/test_motif.rb commit 3862f54fda0caec2a07e563a1f8a11913baca2e3 Author: Naohisa Goto Date: Wed Nov 18 20:29:56 2009 +0900 New version of PhyloXML schema, version 1.10. * Upgraded to New version of PhyloXML schema, version 1.10, developed by Christian M Zmasek. lib/bio/db/phyloxml/phyloxml.xsd | 1155 +++++++++++++++++++------------------- 1 files changed, 582 insertions(+), 573 deletions(-) commit 45ffd9228d513b3dbf29e1011c6a6689a8bd1b08 Author: Naohisa Goto Date: Wed Nov 18 00:26:44 2009 +0900 Newly added sample script to test big PhyloXML data * Newly added a sample script to test big PhyloXML data based on Diana Jaunzeikare's work. (http://github.com/latvianlinuxgirl/bioruby/blob/ 20627fc5a443d6c2e3dc73ed50e9c578ffcbc330/ test/unit/bio/db/test_phyloxml_big.rb). sample/test_phyloxml_big.rb | 205 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 205 insertions(+), 0 deletions(-) create mode 100644 sample/test_phyloxml_big.rb commit 828a8971e057919b80508cf29fd9518828b74a2f Author: Naohisa Goto Date: Tue Nov 17 23:54:37 2009 +0900 Speed up of Bio::Tree#children and parent: caching node's parent. * For speed up of Bio::Tree#children and parent, internal cache of the parent for each node is added. The cache is automatically cleared when the tree is modified. Note that the cache can only be accessed from inside Bio::Tree. * Bio::Tree#parent is changed to directly raise IndexError when both of the root specified in the argument and preset in the tree are nil (previously, the same error is raised in the path method which is internally called from the parent method). * Bio::Tree#path is changed not to call bfs_shortest_path if the node1 and node2 are adjacent. lib/bio/tree.rb | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 70 insertions(+), 5 deletions(-) commit 75862212e6bb807a570338e39e19d527219b6f13 Author: Naohisa Goto Date: Mon Nov 16 22:11:15 2009 +0900 Documented incompatible changes of Bio::KEGG::COMPOUND and Bio::KEGG:REACTION. doc/Changes-1.4.rdoc | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) commit c74cfabd6414c8b50db0251739f967accd90773f Author: Naohisa Goto Date: Mon Nov 16 21:20:42 2009 +0900 Ruby compatibility issue: Enumerable#each_slice(4).each does not work in Ruby 1.8.5. lib/bio/db/kegg/reaction.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit e6a920e401a2b06c355174ccdc9b993a38f9d7ec Author: Mitsuteru Nakao Date: Wed Jul 22 22:50:22 2009 +0900 Added new method Bio::KEGG::GENES#structure with the unit tests. lib/bio/db/kegg/genes.rb | 12 ++++++++++++ test/unit/bio/db/kegg/test_genes.rb | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 0 deletions(-) commit 9fee0c133d069348857014410983f682e468c1c7 Author: Naohisa Goto Date: Mon Nov 16 21:04:02 2009 +0900 The "libpath magic" is replaced by loading helper routine. test/unit/bio/db/kegg/test_compound.rb | 6 ++++-- test/unit/bio/db/kegg/test_reaction.rb | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) commit 1199330eab95b8434303e92c5f792818e96db814 Author: Kozo Nishida Date: Sat Nov 14 09:03:50 2009 +0900 Newly added unit tests for Bio::KEGG::COMPOUND and Bio::KEGG::REACTION * Newly added unit tests for Bio::KEGG::COMPOUND and Bio::KEGG::REACTION with test data. (Note that this is a combination of several commits made by Kozo Nishida, merged from git://github.com/kozo2/bioruby.git ). test/data/KEGG/C00025.compound | 102 ++++++++++++++++++++++++++++++++ test/data/KEGG/R00006.reaction | 14 ++++ test/unit/bio/db/kegg/test_compound.rb | 49 +++++++++++++++ test/unit/bio/db/kegg/test_reaction.rb | 57 ++++++++++++++++++ 4 files changed, 222 insertions(+), 0 deletions(-) create mode 100644 test/data/KEGG/C00025.compound create mode 100644 test/data/KEGG/R00006.reaction create mode 100644 test/unit/bio/db/kegg/test_compound.rb create mode 100644 test/unit/bio/db/kegg/test_reaction.rb commit 1b47640665d4332bafd9e9709628ee9722f1f3f4 Author: Kozo Nishida Date: Sat Nov 14 09:03:50 2009 +0900 Bio::KEGG::COMPOUND#dblinks changed to return hash list * Bio::KEGG::COMPOUND#dblinks is changed to return hash list (array containing hashes). lib/bio/db/kegg/compound.rb | 11 +++++++++-- 1 files changed, 9 insertions(+), 2 deletions(-) commit 2aa43a0aa765ee4502923c2102e352826a9a7abd Author: Kozo Nishida Date: Sat Nov 14 07:29:19 2009 +0900 Bio::KEGG:REACTION#rpair and pathways changed to return hash list, and added orthologies method. * New method: Bio::KEGG:REACTION#orthologies * Bio::KEGG:REACTION#rpair and pathways are changed to return hash list (array containing hashes). lib/bio/db/kegg/reaction.rb | 33 ++++++++++++++++++++++++++++++--- 1 files changed, 30 insertions(+), 3 deletions(-) commit a82f5d228370beeeb397be07e07394652fd7837e Author: Naohisa Goto Date: Mon Nov 16 20:03:19 2009 +0900 Changed not to modify given argument lib/bio/util/restriction_enzyme/single_strand.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit ff10d5540759a5e7eaaa71da020d95170b98e007 Author: Naohisa Goto Date: Mon Nov 16 19:50:08 2009 +0900 Newly added a document for incompatible and/or important changes of the new release version. * Newly added a document for incompatible and/or important changes of the new release version. * Added description about Bio::RestrictionEnzyme validation is disabled (although very small change). doc/Changes-1.4.rdoc | 16 ++++++++++++++++ 1 files changed, 16 insertions(+), 0 deletions(-) create mode 100644 doc/Changes-1.4.rdoc commit 629e537f90e0825fadeec2e0207f8caddfbed59a Author: trevor <> Date: Sat Sep 19 11:03:23 2009 -0500 speed-up serial calls to RestrictionEnzyme lib/bio/db/rebase.rb | 2 +- lib/bio/util/restriction_enzyme/single_strand.rb | 3 +- .../util/restriction_enzyme/test_single_strand.rb | 24 ++++++++++--------- .../test_single_strand_complement.rb | 24 ++++++++++--------- 4 files changed, 29 insertions(+), 24 deletions(-) commit 9b55a92d5300294bef7b624d0f9aa3edd3e8d7fc Author: trevor <> Date: Sat Sep 19 10:46:21 2009 -0500 speed-up rebase library lib/bio/db/rebase.rb | 9 ++++----- 1 files changed, 4 insertions(+), 5 deletions(-) commit 4aaa24b3fc3cf2d1f7cf8b6d974d2115958b5a1b Author: Naohisa Goto Date: Mon Nov 16 15:08:31 2009 +0900 Ruby 1.9 support: Array#to_s is changed to join('') lib/bio/db/sanger_chromatogram/scf.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 1cf924b81150545e807169144aeaca6a75f9731c Author: Naohisa Goto Date: Mon Nov 16 12:59:10 2009 +0900 Ruby 1.9 support: Array#nitems (counts the number of non-nil elements) is removed in 1.9. * Ruby 1.9 support: Array#nitems (counts the number of non-nil elements) is removed in Ruby 1.9. In scf.rb, it seems that nil would never be included in the array, and simply replaced by Array#size. lib/bio/db/sanger_chromatogram/scf.rb | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) commit a95994d9cfbf4fc89fa716358ac5b92d42a1307b Author: Naohisa Goto Date: Sun Nov 15 19:23:12 2009 +0900 Bug fix: error when quality_scores are larger than the sequence length, and added a require line. * Bug fix: error when sequence.quality_scores are larger than the sequence length. * Added a require line. lib/bio/db/fasta/format_qual.rb | 5 ++++- 1 files changed, 4 insertions(+), 1 deletions(-) commit c5aafca19b58b1651080e81699b7020cd3fd3f47 Author: Naohisa Goto Date: Sun Nov 15 19:21:49 2009 +0900 Newly added unit tests for Bio::Sequence::Format::Formatter::Fasta_numeric and Qual. test/unit/bio/db/fasta/test_format_qual.rb | 346 ++++++++++++++++++++++++++++ 1 files changed, 346 insertions(+), 0 deletions(-) create mode 100644 test/unit/bio/db/fasta/test_format_qual.rb commit 6e24170e29d2576ff69b18eaadc94e9769b8612a Author: Naohisa Goto Date: Sat Nov 14 02:41:13 2009 +0900 Newly added Bio::Sequence::Format::Formatter::Qual and Fasta_numeric, formatter for Qual format and FastaNumericFormat. lib/bio/db/fasta/format_qual.rb | 201 +++++++++++++++++++++++++++++++++++++++ lib/bio/sequence/format.rb | 7 ++ 2 files changed, 208 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/fasta/format_qual.rb commit 6959fd359040b6ca9570111d515118dc2d472029 Author: Naohisa Goto Date: Sat Nov 14 02:19:17 2009 +0900 Split quality score methods in Bio::Fastq::FormatData into separete modules * Quality score calculation methods in Bio::Fastq::FormatData in lib/bio/db/fastq.rb is splitted into separate modules Bio::Fastq::QualityScore::Converter, Phred, and Solexa in lib/bio/db/fastq/quality_score.rb. * Unit tests for Bio::Fastq::QualityScore::* are newly added in test/unit/bio/db/fastq/test_quality_score.rb. * Possible bug fix: probability should be 0 <= p <= 1. lib/bio/db/fastq.rb | 112 +-------- lib/bio/db/fastq/quality_score.rb | 206 ++++++++++++++++ test/unit/bio/db/fastq/test_quality_score.rb | 330 ++++++++++++++++++++++++++ 3 files changed, 544 insertions(+), 104 deletions(-) create mode 100644 lib/bio/db/fastq/quality_score.rb create mode 100644 test/unit/bio/db/fastq/test_quality_score.rb commit 98f7703c28f0c2c34e4fe1631de227e20b9666c3 Author: Naohisa Goto Date: Fri Nov 13 23:48:19 2009 +0900 When no error_probabilities in the sequence and quality_score_type is nil, Fastq formatter implicitly assumes that the quality_score_type is :phred. * When no error_probabilities in the sequence and quality_score_type is nil, Fastq formatter implicitly assumes that the quality_score_type is :phred. * Bug fix: fixed typo in lib/bio/db/fastq/format_fastq.rb. lib/bio/db/fastq/format_fastq.rb | 5 ++++- lib/bio/sequence.rb | 3 +++ 2 files changed, 7 insertions(+), 1 deletions(-) commit f85a6aee9827bc573dcb735f4a1a1827926cc66c Author: Naohisa Goto Date: Fri Nov 13 23:29:19 2009 +0900 Bug fix: fixed typo for Bio::Sequence#quality_score_type. lib/bio/sequence.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit d81a611d7c7c46a789b86e99cebe064ba559e3e0 Author: Naohisa Goto Date: Fri Nov 13 20:42:12 2009 +0900 Splitting lib/bio/db/fasta.rb: FastaNumericFormat is moved to a new file, etc. * Splitting lib/bio/db/fasta.rb as follows: * Bio::FastaNumericFormat is moved to lib/bio/db/fasta/qual.rb. * Demo codes in the "if __FILE__ == $0" are moved to sample/demo_fastaformat.rb. * Unit tests for Bio::FastaNumericFormat are moved from test/unit/bio/db/test_fasta.rb to test/unit/bio/db/test_qual.rb. * lib/bio.rb is also modified for the autoload. * Bug fix: fixed incorrect autoload path for Bio::FastaDefline. lib/bio.rb | 4 +- lib/bio/db/fasta.rb | 135 +--------------------------------------- lib/bio/db/fasta/qual.rb | 102 ++++++++++++++++++++++++++++++ sample/demo_fastaformat.rb | 105 +++++++++++++++++++++++++++++++ test/unit/bio/db/test_fasta.rb | 43 ------------- test/unit/bio/db/test_qual.rb | 63 +++++++++++++++++++ 6 files changed, 273 insertions(+), 179 deletions(-) create mode 100644 lib/bio/db/fasta/qual.rb create mode 100644 sample/demo_fastaformat.rb create mode 100644 test/unit/bio/db/test_qual.rb commit c70bed5c3f828c94084fdeabe255fbb3930097d0 Author: Andrew Grimm Date: Sun Aug 16 19:49:38 2009 +1000 Removed use of uninitialized variable in FastaNumericFormat. lib/bio/db/fasta.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit ce6dcc344a3d7beec544d7164308dd97bafa8a19 Author: Naohisa Goto Date: Fri Nov 13 13:04:11 2009 +0900 User data type should be stored as is, even if unknown data type. lib/bio/db/sanger_chromatogram/abif.rb | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) commit ca96e59f151c2e10b5cd8c0690b8297979e52036 Author: Naohisa Goto Date: Fri Nov 13 12:50:59 2009 +0900 Removed Bio::Abif#method_missing and added alternative method * Removed Bio::Abif#method_missing, because method_missing can hide many errors related to method calls e.g. method name typo, and it is not suitable for only getting data. * New method Bio::Abif#data is added to get data (alternative of the method_missing). lib/bio/db/sanger_chromatogram/abif.rb | 19 ++++++++++++------- 1 files changed, 12 insertions(+), 7 deletions(-) commit 8b0da27523998cb9a9df07f5e907cda6e3cef0dc Author: Naohisa Goto Date: Fri Nov 13 12:04:41 2009 +0900 removed a non-ascii character in comment lib/bio/db/sanger_chromatogram/chromatogram.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 55c1a180fec97338bae8e3c5b5d5ceec64aed0f6 Author: Naohisa Goto Date: Fri Nov 13 12:02:53 2009 +0900 Bug fix: Bio::SangerChromatogram#complement fails when the object is frozen. lib/bio/db/sanger_chromatogram/chromatogram.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 4e7d8b0ba304d1ff01364fad68035f7ec9463fb9 Author: Naohisa Goto Date: Thu Nov 12 22:23:47 2009 +0900 fixed a typo in a copyright line test/unit/bio/util/test_sirna.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit f376124112d23ba9b0491dbd427d328edc81d872 Author: Naohisa Goto Date: Thu Nov 12 22:05:37 2009 +0900 The "libpath magic" in tests are replaced by the load of helper routine. * In all unit tests, the "libpath magic" are replaced by the load of helper routine. * Changed to use a constant BioRubyTestDataPath for generating test data file path. * Some "require" lines are modified. * "File.open(...).read" in some tests are replaced by "File.read(...)". * Header comment lines of some tests with wrong filename and/or class/module name information are fixed. test/functional/bio/appl/test_pts1.rb | 6 ++++-- test/functional/bio/io/test_ensembl.rb | 7 ++++--- test/functional/bio/io/test_pubmed.rb | 8 +++++--- test/functional/bio/io/test_soapwsdl.rb | 9 +++++---- test/functional/bio/io/test_togows.rb | 9 +++++---- test/functional/bio/sequence/test_output_embl.rb | 10 ++++++---- test/functional/bio/test_command.rb | 10 +++++----- test/runner.rb | 8 +++++--- test/unit/bio/appl/bl2seq/test_report.rb | 9 +++++---- test/unit/bio/appl/blast/test_ncbioptions.rb | 6 ++++-- test/unit/bio/appl/blast/test_report.rb | 9 +++++---- test/unit/bio/appl/blast/test_rpsblast.rb | 9 +++++---- test/unit/bio/appl/gcg/test_msf.rb | 10 +++++----- test/unit/bio/appl/genscan/test_report.rb | 17 ++++++++--------- test/unit/bio/appl/hmmer/test_report.rb | 9 +++++---- test/unit/bio/appl/iprscan/test_report.rb | 11 ++++++----- test/unit/bio/appl/mafft/test_report.rb | 11 ++++++----- test/unit/bio/appl/paml/codeml/test_rates.rb | 9 +++++---- test/unit/bio/appl/paml/codeml/test_report.rb | 9 +++++---- test/unit/bio/appl/paml/test_codeml.rb | 9 +++++---- test/unit/bio/appl/sim4/test_report.rb | 9 +++++---- test/unit/bio/appl/sosui/test_report.rb | 11 ++++++----- test/unit/bio/appl/targetp/test_report.rb | 8 +++++--- test/unit/bio/appl/test_blast.rb | 9 +++++---- test/unit/bio/appl/test_fasta.rb | 6 ++++-- test/unit/bio/appl/test_pts1.rb | 6 ++++-- test/unit/bio/appl/tmhmm/test_report.rb | 11 ++++++----- test/unit/bio/data/test_aa.rb | 8 +++++--- test/unit/bio/data/test_codontable.rb | 9 +++++---- test/unit/bio/data/test_na.rb | 8 +++++--- test/unit/bio/db/biosql/tc_biosql.rb | 6 +++++- test/unit/bio/db/embl/test_common.rb | 6 ++++-- test/unit/bio/db/embl/test_embl.rb | 12 ++++++------ test/unit/bio/db/embl/test_embl_rel89.rb | 12 ++++++------ test/unit/bio/db/embl/test_embl_to_bioseq.rb | 15 +++++++-------- test/unit/bio/db/embl/test_sptr.rb | 14 ++++++-------- test/unit/bio/db/embl/test_uniprot.rb | 11 ++++++----- test/unit/bio/db/kegg/test_genes.rb | 8 +++++--- test/unit/bio/db/pdb/test_pdb.rb | 6 ++++-- test/unit/bio/db/sanger_chromatogram/test_abif.rb | 3 ++- test/unit/bio/db/sanger_chromatogram/test_scf.rb | 3 ++- test/unit/bio/db/test_aaindex.rb | 12 ++++++------ test/unit/bio/db/test_fasta.rb | 8 +++++--- test/unit/bio/db/test_fastq.rb | 10 +++++----- test/unit/bio/db/test_gff.rb | 6 ++++-- test/unit/bio/db/test_lasergene.rb | 12 +++++++----- test/unit/bio/db/test_medline.rb | 6 ++++-- test/unit/bio/db/test_newick.rb | 12 ++++++------ test/unit/bio/db/test_nexus.rb | 6 ++++-- test/unit/bio/db/test_phyloxml.rb | 14 +++++++------- test/unit/bio/db/test_phyloxml_writer.rb | 15 +++++++-------- test/unit/bio/db/test_prosite.rb | 11 ++++++----- test/unit/bio/db/test_rebase.rb | 8 +++++--- test/unit/bio/db/test_soft.rb | 13 +++++++------ test/unit/bio/io/flatfile/test_autodetection.rb | 13 ++++++------- test/unit/bio/io/flatfile/test_buffer.rb | 11 ++++++----- test/unit/bio/io/flatfile/test_splitter.rb | 8 ++++---- test/unit/bio/io/test_ddbjxml.rb | 7 ++++--- test/unit/bio/io/test_ensembl.rb | 8 +++++--- test/unit/bio/io/test_fastacmd.rb | 7 ++++--- test/unit/bio/io/test_flatfile.rb | 11 ++++++----- test/unit/bio/io/test_soapwsdl.rb | 7 ++++--- test/unit/bio/io/test_togows.rb | 6 ++++-- test/unit/bio/sequence/test_aa.rb | 8 +++++--- test/unit/bio/sequence/test_common.rb | 6 ++++-- test/unit/bio/sequence/test_compat.rb | 6 ++++-- test/unit/bio/sequence/test_dblink.rb | 8 +++++--- test/unit/bio/sequence/test_na.rb | 6 ++++-- test/unit/bio/shell/plugin/test_seq.rb | 8 +++++--- test/unit/bio/test_alignment.rb | 8 +++++--- test/unit/bio/test_command.rb | 7 ++++--- test/unit/bio/test_db.rb | 8 +++++--- test/unit/bio/test_feature.rb | 6 ++++-- test/unit/bio/test_location.rb | 6 ++++-- test/unit/bio/test_map.rb | 8 +++++--- test/unit/bio/test_pathway.rb | 6 ++++-- test/unit/bio/test_reference.rb | 6 ++++-- test/unit/bio/test_sequence.rb | 8 +++++--- test/unit/bio/test_shell.rb | 8 +++++--- test/unit/bio/test_tree.rb | 12 ++++++------ .../analysis/test_calculated_cuts.rb | 6 ++++-- .../restriction_enzyme/analysis/test_cut_ranges.rb | 6 ++++-- .../analysis/test_sequence_range.rb | 6 ++++-- .../double_stranded/test_aligned_strands.rb | 6 ++++-- .../double_stranded/test_cut_location_pair.rb | 6 ++++-- .../test_cut_location_pair_in_enzyme_notation.rb | 6 ++++-- .../double_stranded/test_cut_locations.rb | 6 ++++-- .../test_cut_locations_in_enzyme_notation.rb | 6 ++++-- .../test_cut_locations_in_enzyme_notation.rb | 6 ++++-- .../bio/util/restriction_enzyme/test_analysis.rb | 6 ++++-- .../bio/util/restriction_enzyme/test_cut_symbol.rb | 6 ++++-- .../restriction_enzyme/test_double_stranded.rb | 6 ++++-- .../util/restriction_enzyme/test_single_strand.rb | 6 ++++-- .../test_single_strand_complement.rb | 6 ++++-- .../restriction_enzyme/test_string_formatting.rb | 6 ++++-- test/unit/bio/util/test_color_scheme.rb | 8 +++++--- test/unit/bio/util/test_contingency_table.rb | 8 +++++--- test/unit/bio/util/test_restriction_enzyme.rb | 6 ++++-- test/unit/bio/util/test_sirna.rb | 8 +++++--- 99 files changed, 479 insertions(+), 343 deletions(-) commit f4fa0a5edc6ff6fc35577d84bda86363014a57a4 Author: Naohisa Goto Date: Wed Nov 11 17:04:43 2009 +0900 test_chromatogram.rb is splitted into test_abif.rb and test_scf.rb test/unit/bio/db/sanger_chromatogram/test_abif.rb | 75 +++++++++++++++ .../db/sanger_chromatogram/test_chromatogram.rb | 101 -------------------- test/unit/bio/db/sanger_chromatogram/test_scf.rb | 97 +++++++++++++++++++ 3 files changed, 172 insertions(+), 101 deletions(-) create mode 100644 test/unit/bio/db/sanger_chromatogram/test_abif.rb delete mode 100644 test/unit/bio/db/sanger_chromatogram/test_chromatogram.rb create mode 100644 test/unit/bio/db/sanger_chromatogram/test_scf.rb commit d9cc613273cadc7f9fdfe2bafbd933efb1f403ca Author: Naohisa Goto Date: Wed Nov 11 17:01:37 2009 +0900 Newly added unit test helper routine which aims to replace the libpath magic test/bioruby_test_helper.rb | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 61 insertions(+), 0 deletions(-) create mode 100644 test/bioruby_test_helper.rb commit 10e76db2a8ec37bde541157d0735303b4ca8b3b8 Author: Naohisa Goto Date: Tue Nov 10 20:59:01 2009 +0900 Bio::SangerChromatogram#to_s is renamed to sequence_string. lib/bio/db/sanger_chromatogram/chromatogram.rb | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) commit 0ec2c2f38c4b4a3e451841dc32540dfa10743bc2 Author: Naohisa Goto Date: Fri Oct 30 22:49:19 2009 +0900 Renamed/moved files/directories following the rename of class names. * renamed: lib/bio/db/chromatogram.rb -> lib/bio/db/sanger_chromatogram/chromatogram.rb * renamed: lib/bio/db/chromatogram/abi.rb -> lib/bio/db/sanger_chromatogram/abif.rb * renamed: lib/bio/db/chromatogram/scf.rb -> lib/bio/db/sanger_chromatogram/scf.rb * renamed: lib/bio/db/chromatogram/chromatogram_to_biosequence.rb -> lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb * renamed: test/unit/bio/db/test_chromatogram.rb -> test/unit/bio/db/sanger_chromatogram/test_chromatogram.rb * renamed: test/data/chromatogram/test_chromatogram_abi.ab1 -> test/data/sanger_chromatogram/test_chromatogram_abif.ab1 * renamed: test/data/chromatogram/*.scf -> test/data/sanger_chromatogram/*.scf lib/bio/db/chromatogram.rb | 133 ------------- lib/bio/db/chromatogram/abi.rb | 114 ----------- .../db/chromatogram/chromatogram_to_biosequence.rb | 32 --- lib/bio/db/chromatogram/scf.rb | 210 -------------------- lib/bio/db/sanger_chromatogram/abif.rb | 114 +++++++++++ lib/bio/db/sanger_chromatogram/chromatogram.rb | 133 +++++++++++++ .../chromatogram_to_biosequence.rb | 32 +++ lib/bio/db/sanger_chromatogram/scf.rb | 210 ++++++++++++++++++++ test/data/chromatogram/test_chromatogram_abi.ab1 | Bin 228656 -> 0 bytes .../data/chromatogram/test_chromatogram_scf_v2.scf | Bin 47503 -> 0 bytes .../data/chromatogram/test_chromatogram_scf_v3.scf | Bin 47503 -> 0 bytes .../sanger_chromatogram/test_chromatogram_abif.ab1 | Bin 0 -> 228656 bytes .../test_chromatogram_scf_v2.scf | Bin 0 -> 47503 bytes .../test_chromatogram_scf_v3.scf | Bin 0 -> 47503 bytes .../db/sanger_chromatogram/test_chromatogram.rb | 101 ++++++++++ test/unit/bio/db/test_chromatogram.rb | 101 ---------- 16 files changed, 590 insertions(+), 590 deletions(-) delete mode 100644 lib/bio/db/chromatogram.rb delete mode 100644 lib/bio/db/chromatogram/abi.rb delete mode 100644 lib/bio/db/chromatogram/chromatogram_to_biosequence.rb delete mode 100644 lib/bio/db/chromatogram/scf.rb create mode 100644 lib/bio/db/sanger_chromatogram/abif.rb create mode 100644 lib/bio/db/sanger_chromatogram/chromatogram.rb create mode 100644 lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb create mode 100644 lib/bio/db/sanger_chromatogram/scf.rb delete mode 100644 test/data/chromatogram/test_chromatogram_abi.ab1 delete mode 100644 test/data/chromatogram/test_chromatogram_scf_v2.scf delete mode 100644 test/data/chromatogram/test_chromatogram_scf_v3.scf create mode 100644 test/data/sanger_chromatogram/test_chromatogram_abif.ab1 create mode 100644 test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf create mode 100644 test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf create mode 100644 test/unit/bio/db/sanger_chromatogram/test_chromatogram.rb delete mode 100644 test/unit/bio/db/test_chromatogram.rb commit 49bfe319e535c8414be32b47c07fe5204a24b398 Author: Naohisa Goto Date: Fri Oct 30 22:02:08 2009 +0900 Renamed Chromatogram to SangerChromatogram and Abi to Abif, and preparation of filename changes. * Renamed Chromatogram to SangerChromatogram because the word "chromatogram" may be used by various experimental methods other than the Sanger chromatogram. * Renamed Abi to Abif because Applied Biosystems who determined the file format says that its name is ABIF. * Preparation of changing filenames. However, filenames are not really changed now because of recording history of file contents modification. The paths shown in the "require" lines and test data paths may not be existed now. lib/bio.rb | 6 ++-- lib/bio/db/chromatogram.rb | 24 +++++++------- lib/bio/db/chromatogram/abi.rb | 15 +++++---- .../db/chromatogram/chromatogram_to_biosequence.rb | 10 +++--- lib/bio/db/chromatogram/scf.rb | 15 +++++---- lib/bio/sequence/adapter.rb | 3 +- test/unit/bio/db/test_chromatogram.rb | 32 ++++++++++--------- 7 files changed, 57 insertions(+), 48 deletions(-) commit 6c020440663214014973ae8e5007ce2d31d8d45e Author: Naohisa Goto Date: Sat Oct 24 00:44:47 2009 +0900 New class method Bio::PhyloXML::Parser.open(filename) and API change of new(), etc. * New class methods to create parser object from various data source are added: Bio::PhyloXML::Parser.open(filename), for_io(io), open_uri(uri). * API change of Bio::PhyloXML::Parser.new(). Now, new(filename) is deprecated and it can only take a XML-formatted string. * Tests are added and modified to reflect the above changes. * test/unit/bio/db/test_phyloxml_writer.rb: avoid using WeakRef for temporary directory maintenance. lib/bio/db/phyloxml/phyloxml_parser.rb | 224 +++++++++++++++++++++++++++--- lib/bio/db/phyloxml/phyloxml_writer.rb | 4 +- test/unit/bio/db/test_phyloxml.rb | 178 ++++++++++++++++++++++-- test/unit/bio/db/test_phyloxml_writer.rb | 70 +++++----- 4 files changed, 408 insertions(+), 68 deletions(-) commit fca5e800fc051a38ac6d25652c684fdd4f9bff14 Author: Naohisa Goto Date: Fri Oct 23 15:13:25 2009 +0900 Rearrangement of require and autoload so as to correctly load PhyloXML classes lib/bio.rb | 11 +++++++---- lib/bio/db/phyloxml/phyloxml_elements.rb | 16 +++++++++++++++- lib/bio/db/phyloxml/phyloxml_parser.rb | 11 ++++++----- lib/bio/db/phyloxml/phyloxml_writer.rb | 5 ++++- test/unit/bio/db/test_phyloxml.rb | 5 ----- test/unit/bio/db/test_phyloxml_writer.rb | 3 --- 6 files changed, 32 insertions(+), 19 deletions(-) commit a291af62ef262ee04f3a0e1b6415d4e256c56a94 Author: Naohisa Goto Date: Fri Oct 23 00:08:44 2009 +0900 Fixed argument order of assert_equal(expected, actual), etc. * Test bug fix: Argument order of assert_equal must be assert_equal(expected, actual). * assert_instance_of() instead of assert_equal() in TestPhyloXML1#test_init. * Removed some commented-out tests which may not be needed. test/unit/bio/db/test_phyloxml.rb | 295 +++++++++++++++--------------- test/unit/bio/db/test_phyloxml_writer.rb | 8 +- 2 files changed, 147 insertions(+), 156 deletions(-) commit 152304dc9809102f56a2f1779c59111f84b9cd02 Author: Naohisa Goto Date: Sat Oct 17 01:40:49 2009 +0900 Improvement of tests for Bio::Fastq and related classes. test/unit/bio/db/test_fastq.rb | 372 ++++++++++++++++++++++++++-------------- 1 files changed, 245 insertions(+), 127 deletions(-) commit 61556223a469a5f8b1bb4f343eca92c88c66cb9a Author: Naohisa Goto Date: Sat Oct 17 01:38:52 2009 +0900 FASTQ output support is added to Bio::Sequence. lib/bio/db/fastq/format_fastq.rb | 172 ++++++++++++++++++++++++++++++++++++++ lib/bio/sequence/format.rb | 9 ++ 2 files changed, 181 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/fastq/format_fastq.rb commit ea4203ebb7ca268a5b6d6c50aeb63ed0eed5a803 Author: Naohisa Goto Date: Sat Oct 17 01:32:50 2009 +0900 New attributes for genome sequencer data are added to Bio::Sequence. * New attributes for genome sequencer data are added to Bio::Sequence class: quality_scores, quality_scores_type, error_probabilities. lib/bio/sequence.rb | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-) commit fce158b2194519081361e12c170882ec2e87fc5e Author: Naohisa Goto Date: Sat Oct 17 01:13:27 2009 +0900 New methods Bio::Fastq#to_biosequence, etc. and improvement of tolerance for overflows * Bio::Fastq#to_biosequence is newly added. * New methods: Bio::Fastq#seq, entry_id, quality_score_type. * Default behavior of Bio::Fastq::FormatData#scores2str is changed not to raise error but to truncate saturated values. * Improvement of tolerance for overflows, and preventing to calculate log of negative number. lib/bio/db/fastq.rb | 105 ++++++++++++++++++++++++++++-- lib/bio/db/fastq/fastq_to_biosequence.rb | 40 +++++++++++ lib/bio/sequence/adapter.rb | 1 + 3 files changed, 139 insertions(+), 7 deletions(-) create mode 100644 lib/bio/db/fastq/fastq_to_biosequence.rb commit 0f189974d2027cecee575b27e969de7f62508309 Author: Naohisa Goto Date: Tue Oct 13 21:41:58 2009 +0900 Avoid using Numeric#fdiv because it can only be used in Ruby 1.8.7 or later lib/bio/db/fastq.rb | 6 +++--- test/unit/bio/db/test_fastq.rb | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) commit 42999fc6230e52c4f241f411d299db941196f62e Author: Naohisa Goto Date: Tue Oct 13 21:30:25 2009 +0900 Bio::Fastq#qualities is renamed to quality_scores. * Bio::Fastq#qualities is renamed to Bio::Fastq#quality_scores, and the original method name is changed to be an alias of the new name. lib/bio/db/fastq.rb | 16 +++++++++------- test/unit/bio/db/test_fastq.rb | 30 +++++++++++++++--------------- 2 files changed, 24 insertions(+), 22 deletions(-) commit cc0ee2169f298046c5e55fcbadfeaac01f6bf704 Author: Naohisa Goto Date: Sun Oct 11 19:19:18 2009 +0900 Newly added unit tests for Bio::Fastq with test data * Newly added unit tests for Bio::Fastq with test data. The test data is created by P.J.A. Cock et al., and is also used in Biopython and BioPerl. test/data/fastq/error_diff_ids.fastq | 20 + test/data/fastq/error_double_qual.fastq | 22 + test/data/fastq/error_double_seq.fastq | 22 + test/data/fastq/error_long_qual.fastq | 20 + test/data/fastq/error_no_qual.fastq | 20 + test/data/fastq/error_qual_del.fastq | 20 + test/data/fastq/error_qual_escape.fastq | 20 + test/data/fastq/error_qual_null.fastq | Bin 0 -> 610 bytes test/data/fastq/error_qual_space.fastq | 21 + test/data/fastq/error_qual_tab.fastq | 21 + test/data/fastq/error_qual_unit_sep.fastq | 20 + test/data/fastq/error_qual_vtab.fastq | 20 + test/data/fastq/error_short_qual.fastq | 20 + test/data/fastq/error_spaces.fastq | 20 + test/data/fastq/error_tabs.fastq | 21 + test/data/fastq/error_trunc_at_plus.fastq | 19 + test/data/fastq/error_trunc_at_qual.fastq | 19 + test/data/fastq/error_trunc_at_seq.fastq | 18 + test/data/fastq/error_trunc_in_plus.fastq | 19 + test/data/fastq/error_trunc_in_qual.fastq | 20 + test/data/fastq/error_trunc_in_seq.fastq | 18 + test/data/fastq/error_trunc_in_title.fastq | 17 + .../fastq/illumina_full_range_as_illumina.fastq | 8 + .../data/fastq/illumina_full_range_as_sanger.fastq | 8 + .../data/fastq/illumina_full_range_as_solexa.fastq | 8 + .../illumina_full_range_original_illumina.fastq | 8 + test/data/fastq/longreads_as_illumina.fastq | 40 ++ test/data/fastq/longreads_as_sanger.fastq | 40 ++ test/data/fastq/longreads_as_solexa.fastq | 40 ++ test/data/fastq/longreads_original_sanger.fastq | 120 ++++ test/data/fastq/misc_dna_as_illumina.fastq | 16 + test/data/fastq/misc_dna_as_sanger.fastq | 16 + test/data/fastq/misc_dna_as_solexa.fastq | 16 + test/data/fastq/misc_dna_original_sanger.fastq | 16 + test/data/fastq/misc_rna_as_illumina.fastq | 16 + test/data/fastq/misc_rna_as_sanger.fastq | 16 + test/data/fastq/misc_rna_as_solexa.fastq | 16 + test/data/fastq/misc_rna_original_sanger.fastq | 16 + .../data/fastq/sanger_full_range_as_illumina.fastq | 8 + test/data/fastq/sanger_full_range_as_sanger.fastq | 8 + test/data/fastq/sanger_full_range_as_solexa.fastq | 8 + .../fastq/sanger_full_range_original_sanger.fastq | 8 + .../data/fastq/solexa_full_range_as_illumina.fastq | 8 + test/data/fastq/solexa_full_range_as_sanger.fastq | 8 + test/data/fastq/solexa_full_range_as_solexa.fastq | 8 + .../fastq/solexa_full_range_original_solexa.fastq | 8 + test/data/fastq/wrapping_as_illumina.fastq | 12 + test/data/fastq/wrapping_as_sanger.fastq | 12 + test/data/fastq/wrapping_as_solexa.fastq | 12 + test/data/fastq/wrapping_original_sanger.fastq | 24 + test/unit/bio/db/test_fastq.rb | 711 ++++++++++++++++++++ 51 files changed, 1652 insertions(+), 0 deletions(-) create mode 100644 test/data/fastq/error_diff_ids.fastq create mode 100644 test/data/fastq/error_double_qual.fastq create mode 100644 test/data/fastq/error_double_seq.fastq create mode 100644 test/data/fastq/error_long_qual.fastq create mode 100644 test/data/fastq/error_no_qual.fastq create mode 100644 test/data/fastq/error_qual_del.fastq create mode 100644 test/data/fastq/error_qual_escape.fastq create mode 100644 test/data/fastq/error_qual_null.fastq create mode 100644 test/data/fastq/error_qual_space.fastq create mode 100644 test/data/fastq/error_qual_tab.fastq create mode 100644 test/data/fastq/error_qual_unit_sep.fastq create mode 100644 test/data/fastq/error_qual_vtab.fastq create mode 100644 test/data/fastq/error_short_qual.fastq create mode 100644 test/data/fastq/error_spaces.fastq create mode 100644 test/data/fastq/error_tabs.fastq create mode 100644 test/data/fastq/error_trunc_at_plus.fastq create mode 100644 test/data/fastq/error_trunc_at_qual.fastq create mode 100644 test/data/fastq/error_trunc_at_seq.fastq create mode 100644 test/data/fastq/error_trunc_in_plus.fastq create mode 100644 test/data/fastq/error_trunc_in_qual.fastq create mode 100644 test/data/fastq/error_trunc_in_seq.fastq create mode 100644 test/data/fastq/error_trunc_in_title.fastq create mode 100644 test/data/fastq/illumina_full_range_as_illumina.fastq create mode 100644 test/data/fastq/illumina_full_range_as_sanger.fastq create mode 100644 test/data/fastq/illumina_full_range_as_solexa.fastq create mode 100644 test/data/fastq/illumina_full_range_original_illumina.fastq create mode 100644 test/data/fastq/longreads_as_illumina.fastq create mode 100644 test/data/fastq/longreads_as_sanger.fastq create mode 100644 test/data/fastq/longreads_as_solexa.fastq create mode 100644 test/data/fastq/longreads_original_sanger.fastq create mode 100644 test/data/fastq/misc_dna_as_illumina.fastq create mode 100644 test/data/fastq/misc_dna_as_sanger.fastq create mode 100644 test/data/fastq/misc_dna_as_solexa.fastq create mode 100644 test/data/fastq/misc_dna_original_sanger.fastq create mode 100644 test/data/fastq/misc_rna_as_illumina.fastq create mode 100644 test/data/fastq/misc_rna_as_sanger.fastq create mode 100644 test/data/fastq/misc_rna_as_solexa.fastq create mode 100644 test/data/fastq/misc_rna_original_sanger.fastq create mode 100644 test/data/fastq/sanger_full_range_as_illumina.fastq create mode 100644 test/data/fastq/sanger_full_range_as_sanger.fastq create mode 100644 test/data/fastq/sanger_full_range_as_solexa.fastq create mode 100644 test/data/fastq/sanger_full_range_original_sanger.fastq create mode 100644 test/data/fastq/solexa_full_range_as_illumina.fastq create mode 100644 test/data/fastq/solexa_full_range_as_sanger.fastq create mode 100644 test/data/fastq/solexa_full_range_as_solexa.fastq create mode 100644 test/data/fastq/solexa_full_range_original_solexa.fastq create mode 100644 test/data/fastq/wrapping_as_illumina.fastq create mode 100644 test/data/fastq/wrapping_as_sanger.fastq create mode 100644 test/data/fastq/wrapping_as_solexa.fastq create mode 100644 test/data/fastq/wrapping_original_sanger.fastq create mode 100644 test/unit/bio/db/test_fastq.rb commit 951d8f7303a5c28783a2c8b25c9fb347730c1a8f Author: Naohisa Goto Date: Sun Oct 11 19:10:15 2009 +0900 Bio::Fastq API changed. * Bio::Fastq API changed. Removed methods: phred_quality, solexa_quality. New methods: qualities, error_probabilities, format, format=, validate_format. * New exception classes Bio::Fastq::Error::* for errors. * Internal structure is also changed. Internal only classes Bio::Fastq::FormatData::* which store parameters for format variants. lib/bio/db/fastq.rb | 519 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 501 insertions(+), 18 deletions(-) commit 9bb7f6ca762c615e50d98c35b60982a4caeea323 Author: Naohisa Goto Date: Fri Sep 25 23:36:13 2009 +0900 Bug fix: infinite loop in Bio::Fastq.new. Thanks to Hiroyuki Mishima for reporting the bug. lib/bio/db/fastq.rb | 16 ++++++++++------ 1 files changed, 10 insertions(+), 6 deletions(-) commit fca6aa5333a95db4dc87e8fc814bd028d5720de4 Author: Naohisa Goto Date: Fri Mar 20 11:52:33 2009 +0900 Added file format autodetection for Bio::Fastq lib/bio/io/flatfile/autodetection.rb | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) commit 1ba21545e7d49ae8b775fbed7a4e92b1daa54ac6 Author: Naohisa Goto Date: Fri Mar 20 11:48:59 2009 +0900 Added autoload for Bio::Fastq lib/bio.rb | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) commit 380b99106d4c7955b9d07ee8668b53d384c974f4 Author: Naohisa Goto Date: Thu Mar 19 17:07:25 2009 +0900 Newly added FASTQ format parser (still a prototype) lib/bio/db/fastq.rb | 162 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 162 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/fastq.rb commit 2c5df2a5f1b5ae1ea9e61c1dccc8bcd2f496f6ce Author: Naohisa Goto Date: Sun Sep 20 19:08:55 2009 +0900 Removed "require 'rubygems'". lib/bio/db/phyloxml/phyloxml_parser.rb | 2 -- 1 files changed, 0 insertions(+), 2 deletions(-) commit 67818d2550e5d53eeee0f3d710f66f7506fb8127 Author: Naohisa Goto Date: Sat Sep 19 17:06:21 2009 +0900 Use Bio::PubMed.esearch and efetch, etc. * Changed to use Bio::PubMed.esearch and efetch instead of deprecated methods. * Regular expression for extracting option is changed. sample/pmfetch.rb | 15 +++++++++++---- sample/pmsearch.rb | 17 +++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) commit 0c95889bf69e3140b5f09ade1203d50136aee014 Author: Naohisa Goto Date: Fri Sep 18 17:58:17 2009 +0900 Changed to use temporary directory when writing a file, etc. * To avoid unexpected file corruption and possibly security risk, changed to use temporary directory when writing files. The temporary directory is normally removed when all tests end. To prevent removing the directory, set environment variable BIORUBY_TEST_DEBUG. * To avoid test class name conflict, TestPhyloXMLData is renamed to TestPhyloXMLWriterData. * Added a new test to check existence of libxml-ruby, and removed code to raise error when it is not found. The code of the new test is completely the same as of in test_phyloxml.rb, but it is added for the purpose when test_phyloxml_writer.rb is called independently. test/unit/bio/db/test_phyloxml_writer.rb | 161 +++++++++++++++++++++--------- 1 files changed, 112 insertions(+), 49 deletions(-) commit 520d0f5ed535f621aed60b71d8765a99e97306a6 Author: Naohisa Goto Date: Sun Sep 20 18:34:19 2009 +0900 Newly added internal-only class Bio::Command::Tmpdir to handle temporary directory * Newly added internal-only class Bio::Command::Tmpdir to handle temporary directory. It is BioRuby library internal use only. * Bio::Command.mktmpdir is changed to be completely compatible with Ruby 1.9.x's Dir.mktmpdir. lib/bio/command.rb | 104 +++++++++++++++++++++++++++++++--- test/functional/bio/test_command.rb | 49 ++++++++++++++++ 2 files changed, 143 insertions(+), 10 deletions(-) commit c813b60ae62f44d9688b21d47c84e4b7083547e6 Author: Naohisa Goto Date: Fri Sep 18 17:55:14 2009 +0900 Added new test to check existence of libxml-ruby, instead of raising error. test/unit/bio/db/test_phyloxml.rb | 30 ++++++++++++++++++++---------- 1 files changed, 20 insertions(+), 10 deletions(-) commit 1b71dd9624640f3f775baab360eef0be92a86677 Author: Diana Jaunzeikare Date: Fri Sep 18 21:43:18 2009 -0400 Renamed output files generated by phyloxml_writer unit tests. test/unit/bio/db/test_phyloxml_writer.rb | 13 ++++++++++--- 1 files changed, 10 insertions(+), 3 deletions(-) commit f8e138cb9e28996f1024fa9cf7c68c8f08603941 Author: Diana Jaunzeikare Date: Fri Sep 18 21:33:35 2009 -0400 Added ncbi_taxonomy_mollusca_short.xml test file .../data/phyloxml/ncbi_taxonomy_mollusca_short.xml | 65 ++++++++++++++++++++ 1 files changed, 65 insertions(+), 0 deletions(-) create mode 100644 test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml commit be1be310b7581928581cde24303fe2e16c04e82f Author: Diana Jaunzeikare Date: Fri Sep 18 21:29:20 2009 -0400 Made the code compactible with libxml-ruby 1.1.3 (previous was 0.9.4) version. lib/bio/db/phyloxml/phyloxml_elements.rb | 58 +++++++++++++++--------------- lib/bio/db/phyloxml/phyloxml_parser.rb | 10 ++++- lib/bio/db/phyloxml/phyloxml_writer.rb | 8 +++-- 3 files changed, 42 insertions(+), 34 deletions(-) commit a3441afd5650069a5ada64b202a0714e8723e911 Author: Diana Jaunzeikare Date: Tue May 26 00:55:47 2009 -0400 Newly added PhyloXML support written by Diana Jaunzeikare. * Newly added PhyloXML support written by Diana Jaunzeikare. It have been written during the Google Summer of Code 2009 "Implementing phyloXML support in BioRuby", mentored by Christian Zmasek et al. with NESCent. For details of development, see git://github.com/latvianlinuxgirl/bioruby.git and BioRuby mailing list archives. * This is a combination of 119 commits. The last commit date was Mon Aug 17 10:30:10 2009 -0400. README.rdoc | 3 + doc/Tutorial.rd | 120 ++- lib/bio.rb | 6 + lib/bio/db/phyloxml/phyloxml.xsd | 573 ++++++++ lib/bio/db/phyloxml/phyloxml_elements.rb | 1160 +++++++++++++++++ lib/bio/db/phyloxml/phyloxml_parser.rb | 767 +++++++++++ lib/bio/db/phyloxml/phyloxml_writer.rb | 223 ++++ test/data/phyloxml/apaf.xml | 666 ++++++++++ test/data/phyloxml/bcl_2.xml | 2097 ++++++++++++++++++++++++++++++ test/data/phyloxml/made_up.xml | 144 ++ test/data/phyloxml/phyloxml_examples.xml | 415 ++++++ test/unit/bio/db/test_phyloxml.rb | 619 +++++++++ test/unit/bio/db/test_phyloxml_writer.rb | 258 ++++ 13 files changed, 7050 insertions(+), 1 deletions(-) create mode 100644 lib/bio/db/phyloxml/phyloxml.xsd create mode 100644 lib/bio/db/phyloxml/phyloxml_elements.rb create mode 100644 lib/bio/db/phyloxml/phyloxml_parser.rb create mode 100644 lib/bio/db/phyloxml/phyloxml_writer.rb create mode 100644 test/data/phyloxml/apaf.xml create mode 100644 test/data/phyloxml/bcl_2.xml create mode 100644 test/data/phyloxml/made_up.xml create mode 100644 test/data/phyloxml/phyloxml_examples.xml create mode 100644 test/unit/bio/db/test_phyloxml.rb create mode 100644 test/unit/bio/db/test_phyloxml_writer.rb commit fd8281f03423ddf23f7d409863b4df647f1b1564 Author: Naohisa Goto Date: Wed Sep 9 21:08:15 2009 +0900 Newly added Chromatogram classes contributed by Anthony Underwood. * Newly added Chromatogram classes contributed by Anthony Underwood. See git://github.com/aunderwo/bioruby.git for details of development before this merge. lib/bio.rb | 3 + lib/bio/db/chromatogram.rb | 133 +++++++++++++ lib/bio/db/chromatogram/abi.rb | 111 +++++++++++ .../db/chromatogram/chromatogram_to_biosequence.rb | 32 +++ lib/bio/db/chromatogram/scf.rb | 207 ++++++++++++++++++++ lib/bio/sequence/adapter.rb | 1 + test/data/chromatogram/test_chromatogram_abi.ab1 | Bin 0 -> 228656 bytes .../data/chromatogram/test_chromatogram_scf_v2.scf | Bin 0 -> 47503 bytes .../data/chromatogram/test_chromatogram_scf_v3.scf | Bin 0 -> 47503 bytes test/unit/bio/db/test_chromatogram.rb | 99 ++++++++++ 10 files changed, 586 insertions(+), 0 deletions(-) create mode 100644 lib/bio/db/chromatogram.rb create mode 100644 lib/bio/db/chromatogram/abi.rb create mode 100644 lib/bio/db/chromatogram/chromatogram_to_biosequence.rb create mode 100644 lib/bio/db/chromatogram/scf.rb create mode 100644 test/data/chromatogram/test_chromatogram_abi.ab1 create mode 100644 test/data/chromatogram/test_chromatogram_scf_v2.scf create mode 100644 test/data/chromatogram/test_chromatogram_scf_v3.scf create mode 100644 test/unit/bio/db/test_chromatogram.rb commit 78f9463b764687401ff4a7480c1383c5594e5133 Author: Naohisa Goto Date: Thu Sep 10 12:38:25 2009 +0900 Bio::BIORUBY_EXTRA_VERSION is changed to ".5000". bioruby.gemspec | 2 +- lib/bio/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit e731c6e52bc9a672e4546eeca4f2d2d968bdba09 Author: Naohisa Goto Date: Wed Sep 2 15:24:00 2009 +0900 BioRuby 1.3.1 is released. ChangeLog is modified, and bioruby.gemspec is regenerated. ChangeLog | 11 +++++++++++ bioruby.gemspec | 2 +- 2 files changed, 12 insertions(+), 1 deletions(-) bio-1.4.3.0001/doc/RELEASE_NOTES-1.4.0.rdoc0000644000004100000410000001422412200110570016772 0ustar www-datawww-data= BioRuby 1.4.0 RELEASE NOTES A lot of changes have been made to the BioRuby 1.4.0 after the version 1.3.1 is released. This document describes important and/or incompatible changes since the BioRuby 1.3.1 release. == New features === PhyloXML support Support for reading and writing PhyloXML file format is added. New classes Bio::PhyloXML::Parser and Bio::PhyloXML::Writer are used to read and write a PhyloXML file, respectively. The code is developed by Diana Jaunzeikare, mentored by Christian M Zmasek and co-mentors, supported by Google Summer of Code 2009 in collaboration with the National Evolutionary Synthesis Center (NESCent). === FASTQ file format support Support for reading and writing FASTQ file format is added. All of the three FASTQ format variants are supported. To read a FASTQ file, Bio::FlatFile can be used. File format auto-detection of the FASTQ format is supported (although the three format variants should be specified later by users if quality scores are needed). New class Bio::Fastq is the parser class for the FASTQ format. An object of the Bio::Fastq class can be converted to a Bio::Sequence object with the "to_biosequnece" method. Bio::Sequence#output now supports output of the FASTQ format. The code is written by Naohisa Goto, with the help of discussions in the open-bio-l mailing list. The prototype of Bio::Fastq class was first developed during the BioHackathon 2009 held in Okinawa. === DNA chromatogram support Support for reading DNA chromatogram files are added. SCF and and ABIF file formats are supported. The code is developed by Anthony Underwood. === MEME (motif-based sequence analysis tools) support Support for running MAST (Motif Aliginment & Search Tool, part of the MEME Suite, motif-based sequence analysis tools) and parsing its results are added. The code is developed by Adam Kraut. === Improvement of KEGG parser classes Some new methods are added to parse new fields added to some KEGG file formats. Unit tests for KEGG parsers are also added and improved. In addition, return value types of some methods are also changed for unifying APIs among KEGG parser classes. See incompatible changes below for details. Most of them are contributed by Kozo Nishida. === Many sample scripts are added Many sample scripts showing demonstrations of usages of classes are added. They are moved from primitive test codes for the classes described in the "if __FILE__ == $0" convention in the library files. === Unit tests can test installed BioRuby Mechanism to load library and to find test data in the unit tests are changed, and the library path and test data path can be specified with environment variables. BIORUBY_TEST_LIB is the path to be added to the Ruby's $LOAD_PATH. For example, to test BioRuby installed in /usr/local/lib/site_ruby/1.8, run env BIORUBY_TEST_LIB=/usr/local/lib/site_ruby/1.8 ruby test/runner.rb BIORUBY_TEST_DATA is the path of the test data, and BIORUBY_TEST_DEBUG is a flag to turn on debug of the tests. == Deprecated features === ChangeLog is replaced by git log ChangeLog is replaced by the output of git-log command, and ChangeLog before the 1.3.1 release is moved to doc/ChangeLog-before-1.3.1. === "if __FILE__ == $0" convention Primitive test codes in the "if __FILE__ == $0" convention are removed and the codes are moved to the sample scripts named sample/demo_*.rb (except some older or deprecated files). == Incompatible changes === Bio::NCBI::REST NCBI announces that all Entrez E-utility requests must contain email and tool parameters, and requests without them will return error after June 2010. To set default email address and tool name, following methods are added. * Bio::NCBI.default_email=(email) * Bio::NCBI.default_tool=(tool_name) For every query, Bio::NCBI::REST checks the email and tool parameters and raises error if they are empty. IMPORTANT NOTE: No default email address is preset in BioRuby. Programmers using BioRuby must set their own email address or implement to get user's email address in some way (from input form, configuration file, etc). Default tool name is set as "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})". For example, if you run "ruby my_script.rb" with BioRuby 1.4.0, the value is "my_script.rb (bioruby/1.4.0)". === Bio::KEGG ==== dblinks method In Bio::KEGG::COMPOUND, DRUG, ENZYME, GLYCAN and ORTHOLOGY, the method dblinks is changed to return a Hash. Each key of the hash is a database name and its value is an array of entry IDs in the database. If old behavior (returns raw entry lines as an array of strings) is needed, use dblinks_as_strings. ==== pathways method In Bio::KEGG::COMPOUND, DRUG, ENZYME, GENES, GLYCAN and REACTION, the method pathways is changed to return a Hash. Each key of the hash is a pathway ID and its value is the description of the pathway. In Bio::KEGG::GENES, if old behavior (returns pathway IDs as an Array) is needed, use pathways.keys. In Bio::KEGG::COMPOUND, DRUG, ENZYME, GLYCAN, and REACTION, if old behavior (returns raw entry lines as an array of strings) is needed, use pathways_as_strings. Note that Bio::KEGG::ORTHOLOGY#pathways is not changed (returns an array containing pathway IDs). ==== orthologs method In Bio::KEGG::ENZYME, GENES, GLYCAN and REACTION, the method orthologs is changed to return a Hash. Each key of the hash is a ortholog ID and its value is the name of the ortholog. If old behavior (returns raw entry lines as an array of strings) is needed, use orthologs_as_strings. ==== genes method In Bio::KEGG::ENZYME#genes and Bio::KEGG::ORTHOLOGY#genes is changed to return a Hash that is the same as Bio::KEGG::ORTHOLOGY#genes_as_hash. If old behavior (returns raw entry lines as an array of strings) is needed, use genes_as_strings. ==== Bio::KEGG:REACTION#rpairs Bio::KEGG::REACTION#rpairs is changed to return a Hash. Each key of the hash is a KEGG Rpair ID and its value is an array containing name and type. If old behavior (returns as tokens) is needed, use rpairs_as_tokens. ==== Bio::KEGG::ORTHOLOGY Bio::KEGG:ORTHOLOGY#dblinks_as_hash does not lower-case database names. === Bio::RestrictionEnzyme Format validation when creating an object is turned off because of efficiency. == Known problems See KNOWN_ISSUES.rdoc for details. bio-1.4.3.0001/doc/Tutorial.rd.html0000644000004100000410000015550512200110570016540 0ustar www-datawww-data Tutorial.rd

    BioRuby Tutorial

    • Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
    • Copyright (C) 2005-2011 Pjotr Prins, Naohisa Goto and others

    This document was last modified: 2011/10/14 Current editor: Michael O'Keefe <okeefm (at) rpi (dot) edu>

    The latest version resides in the GIT source code repository: ./doc/Tutorial.rd.

    Introduction

    This is a tutorial for using Bioruby. A basic knowledge of Ruby is required. If you want to know more about the programming language, we recommend the latest Ruby book Programming Ruby by Dave Thomas and Andy Hunt - the first edition can be read online here.

    For BioRuby you need to install Ruby and the BioRuby package on your computer

    You can check whether Ruby is installed on your computer and what version it has with the

    % ruby -v

    command. You should see something like:

    ruby 1.9.2p290 (2011-07-09 revision 32553) [i686-linux]

    If you see no such thing you'll have to install Ruby using your installation manager. For more information see the Ruby website.

    With Ruby download and install Bioruby using the links on the Bioruby website. The recommended installation is via RubyGems:

    gem install bio

    See also the Bioruby wiki.

    A lot of BioRuby's documentation exists in the source code and unit tests. To really dive in you will need the latest source code tree. The embedded rdoc documentation can be viewed online at bioruby's rdoc. But first lets start!

    Trying Bioruby

    Bioruby comes with its own shell. After unpacking the sources run one of the following commands:

    bioruby

    or, from the source tree

    cd bioruby
    ruby -I lib bin/bioruby

    and you should see a prompt

    bioruby>

    Now test the following:

    bioruby> require 'bio'
    bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
    ==> "atgcatgcaaaa"
    
    bioruby> seq.complement
    ==> "ttttgcatgcat"

    See the the Bioruby shell section below for more tweaking. If you have trouble running examples also check the section below on trouble shooting. You can also post a question to the mailing list. BioRuby developers usually try to help.

    Working with nucleic / amino acid sequences (Bio::Sequence class)

    The Bio::Sequence class allows the usual sequence transformations and translations. In the example below the DNA sequence "atgcatgcaaaa" is converted into the complemental strand and spliced into a subsequence; next, the nucleic acid composition is calculated and the sequence is translated into the amino acid sequence, the molecular weight calculated, and so on. When translating into amino acid sequences, the frame can be specified and optionally the codon table selected (as defined in codontable.rb).

    bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
    ==> "atgcatgcaaaa"
    
    # complemental sequence (Bio::Sequence::NA object)
    bioruby> seq.complement
    ==> "ttttgcatgcat"
    
    bioruby> seq.subseq(3,8) # gets subsequence of positions 3 to 8 (starting from 1)
    ==> "gcatgc"
    bioruby> seq.gc_percent 
    ==> 33
    bioruby> seq.composition 
    ==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
    bioruby> seq.translate 
    ==> "MHAK"
    bioruby> seq.translate(2)        # translate from frame 2
    ==> "CMQ"
    bioruby> seq.translate(1,11)     # codon table 11
    ==> "MHAK"
    bioruby> seq.translate.codes
    ==> ["Met", "His", "Ala", "Lys"]
    bioruby> seq.translate.names
    ==> ["methionine", "histidine", "alanine", "lysine"]
    bioruby>  seq.translate.composition
    ==> {"K"=>1, "A"=>1, "M"=>1, "H"=>1}
    bioruby> seq.translate.molecular_weight
    ==> 485.605
    bioruby> seq.complement.translate
    ==> "FCMH"

    get a random sequence with the same NA count:

    bioruby> counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
    ==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
    bioruby!> randomseq = Bio::Sequence::NA.randomize(counts) 
    ==!> "aaacatgaagtc"
    
    bioruby!> print counts
    a6c2g2t2  
    bioruby!> p counts
    {"a"=>6, "c"=>2, "g"=>2, "t"=>2}

    The p, print and puts methods are standard Ruby ways of outputting to the screen. If you want to know more about standard Ruby commands you can use the 'ri' command on the command line (or the help command in Windows). For example

    % ri puts
    % ri p
    % ri File.open

    Nucleic acid sequence are members of the Bio::Sequence::NA class, and amino acid sequence are members of the Bio::Sequence::AA class. Shared methods are in the parent Bio::Sequence class.

    As Bio::Sequence inherits Ruby's String class, you can use String class methods. For example, to get a subsequence, you can not only use subseq(from, to) but also String#[].

    Please take note that the Ruby's string's are base 0 - i.e. the first letter has index 0, for example:

    bioruby> s = 'abc'
    ==> "abc"
    bioruby> s[0].chr
    ==> "a"
    bioruby> s[0..1]
    ==> "ab"

    So when using String methods, you should subtract 1 from positions conventionally used in biology. (subseq method will throw an exception if you specify positions smaller than or equal to 0 for either one of the "from" or "to".)

    The window_search(window_size, step_size) method shows a typical Ruby way of writing concise and clear code using 'closures'. Each sliding window creates a subsequence which is supplied to the enclosed block through a variable named +s+.

    • Show average percentage of GC content for 20 bases (stepping the default one base at a time):

      bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
      ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
      
      bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent } 
      bioruby> a
      ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]

    Since the class of each subsequence is the same as original sequence (Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can use all methods on the subsequence. For example,

    • Shows translation results for 15 bases shifting a codon at a time

      bioruby> a = []
      bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
      bioruby> a
      ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]

    Finally, the window_search method returns the last leftover subsequence. This allows for example

    • Divide a genome sequence into sections of 10000bp and output FASTA formatted sequences (line width 60 chars). The 1000bp at the start and end of each subsequence overlapped. At the 3' end of the sequence the leftover is also added:

      i = 1
      textwidth=60
      remainder = seq.window_search(10000, 9000) do |s|
        puts s.to_fasta("segment #{i}", textwidth)
        i += 1
      end
      if remainder
        puts remainder.to_fasta("segment #{i}", textwidth) 
      end

    If you don't want the overlapping window, set window size and stepping size to equal values.

    Other examples

    • Count the codon usage

      bioruby> codon_usage = Hash.new(0)
      bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
      bioruby> codon_usage
      ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
    • Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)

      bioruby> a = []
      bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
      bioruby> a
      ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]

    In most cases, sequences are read from files or retrieved from databases. For example:

    require 'bio'
    
    input_seq = ARGF.read       # reads all files in arguments
    
    my_naseq = Bio::Sequence::NA.new(input_seq)
    my_aaseq = my_naseq.translate
    
    puts my_aaseq

    Save the program above as na2aa.rb. Prepare a nucleic acid sequence described below and save it as my_naseq.txt:

    gtggcgatctttccgaaagcgatgactggagcgaagaaccaaagcagtgacatttgtctg
    atgccgcacgtaggcctgataagacgcggacagcgtcgcatcaggcatcttgtgcaaatg
    tcggatgcggcgtga

    na2aa.rb translates a nucleic acid sequence to a protein sequence. For example, translates my_naseq.txt:

    % ruby na2aa.rb my_naseq.txt

    or use a pipe!

    % cat my_naseq.txt|ruby na2aa.rb

    Outputs

    VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA*

    You can also write this, a bit fancifully, as a one-liner script.

    % ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt

    In the next section we will retrieve data from databases instead of using raw sequence files. One generic example of the above can be found in ./sample/na2aa.rb.

    Parsing GenBank data (Bio::GenBank class)

    We assume that you already have some GenBank data files. (If you don't, download some .seq files from ftp://ftp.ncbi.nih.gov/genbank/)

    As an example we will fetch the ID, definition and sequence of each entry from the GenBank format and convert it to FASTA. This is also an example script in the BioRuby distribution.

    A first attempt could be to use the Bio::GenBank class for reading in the data:

    #!/usr/bin/env ruby
    
    require 'bio'
    
    # Read all lines from STDIN split by the GenBank delimiter
    while entry = gets(Bio::GenBank::DELIMITER)
      gb = Bio::GenBank.new(entry)      # creates GenBank object
    
      print ">#{gb.accession} "         # Accession
      puts gb.definition                # Definition
      puts gb.naseq                     # Nucleic acid sequence 
                                        # (Bio::Sequence::NA object)
    end

    But that has the disadvantage the code is tied to GenBank input. A more generic method is to use Bio::FlatFile which allows you to use different input formats:

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
    ff.each_entry do |gb|
      definition = "#{gb.accession} #{gb.definition}"
      puts gb.naseq.to_fasta(definition, 60)
    end

    For example, in turn, reading FASTA format files:

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
    ff.each_entry do |f|
      puts "definition : " + f.definition
      puts "nalen      : " + f.nalen.to_s
      puts "naseq      : " + f.naseq
    end

    In the above two scripts, the first arguments of Bio::FlatFile.new are database classes of BioRuby. This is expanded on in a later section.

    Again another option is to use the Bio::DB.open class:

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::GenBank.open("gbvrl1.seq")
    ff.each_entry do |gb|
      definition = "#{gb.accession} #{gb.definition}"
      puts gb.naseq.to_fasta(definition, 60)
    end

    Next, we are going to parse the GenBank 'features', which is normally very complicated:

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
    
    # iterates over each GenBank entry
    ff.each_entry do |gb|
    
      # shows accession and organism
      puts "# #{gb.accession} - #{gb.organism}"
    
      # iterates over each element in 'features'
      gb.features.each do |feature|
        position = feature.position
        hash = feature.assoc            # put into Hash
    
        # skips the entry if "/translation=" is not found
        next unless hash['translation']
    
        # collects gene name and so on and joins it into a string
        gene_info = [
          hash['gene'], hash['product'], hash['note'], hash['function']
        ].compact.join(', ')
    
        # shows nucleic acid sequence
        puts ">NA splicing('#{position}') : #{gene_info}"
        puts gb.naseq.splicing(position)
    
        # shows amino acid sequence translated from nucleic acid sequence
        puts ">AA translated by splicing('#{position}').translate"
        puts gb.naseq.splicing(position).translate
    
        # shows amino acid sequence in the database entry (/translation=)
        puts ">AA original translation"
        puts hash['translation']
      end
    end
    • Note: In this example Feature#assoc method makes a Hash from a feature object. It is useful because you can get data from the hash by using qualifiers as keys. But there is a risk some information is lost when two or more qualifiers are the same. Therefore an Array is returned by Feature#feature.

    Bio::Sequence#splicing splices subsequences from nucleic acid sequences according to location information used in GenBank, EMBL and DDBJ.

    When the specified translation table is different from the default (universal), or when the first codon is not "atg" or the protein contains selenocysteine, the two amino acid sequences will differ.

    The Bio::Sequence#splicing method takes not only DDBJ/EMBL/GenBank feature style location text but also Bio::Locations object. For more information about location format and Bio::Locations class, see bio/location.rb.

    • Splice according to location string used in a GenBank entry

      naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
    • Generate Bio::Locations object and pass the splicing method

      locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
      naseq.splicing(locs)

    You can also use this splicing method for amino acid sequences (Bio::Sequence::AA objects).

    • Splicing peptide from a protein (e.g. signal peptide)

      aaseq.splicing('21..119')

    More databases

    Databases in BioRuby are essentially accessed like that of GenBank with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in the ./lib/bio/db directory of the BioRuby source tree.

    In many cases the Bio::DatabaseClass acts as a factory pattern and recognises the database type automatically - returning a parsed object. For example using Bio::FlatFile class as described above. The first argument of the Bio::FlatFile.new is database class name in BioRuby (such as Bio::GenBank, Bio::KEGG::GENES and so on).

    ff = Bio::FlatFile.new(Bio::DatabaseClass, ARGF)

    Isn't it wonderful that Bio::FlatFile automagically recognizes each database class?

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.auto(ARGF)
    ff.each_entry do |entry|
      p entry.entry_id          # identifier of the entry
      p entry.definition        # definition of the entry
      p entry.seq               # sequence data of the entry
    end

    An example that can take any input, filter using a regular expression and output to a FASTA file can be found in sample/any2fasta.rb. With this technique it is possible to write a Unix type grep/sort pipe for sequence information. One example using scripts in the BIORUBY sample folder:

    fastagrep.rb '/At|Dm/' database.seq | fastasort.rb

    greps the database for Arabidopsis and Drosophila entries and sorts the output to FASTA.

    Other methods to extract specific data from database objects can be different between databases, though some methods are common (see the guidelines for common methods in bio/db.rb).

    • entry_id --> gets ID of the entry
    • definition --> gets definition of the entry
    • reference --> gets references as Bio::Reference object
    • organism --> gets species
    • seq, naseq, aaseq --> returns sequence as corresponding sequence object

    Refer to the documents of each database to find the exact naming of the included methods.

    In general, BioRuby uses the following conventions: when a method name is plural, the method returns some object as an Array. For example, some classes have a "references" method which returns multiple Bio::Reference objects as an Array. And some classes have a "reference" method which returns a single Bio::Reference object.

    Alignments (Bio::Alignment)

    The Bio::Alignment class in bio/alignment.rb is a container class like Ruby's Hash and Array classes and BioPerl's Bio::SimpleAlign. A very simple example is:

    bioruby> seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
    bioruby> seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
    # creates alignment object
    bioruby> a = Bio::Alignment.new(seqs)
    bioruby> a.consensus 
    ==> "a?gc?"
    # shows IUPAC consensus
    p a.consensus_iupac       # ==> "ahgcr"
    
    # iterates over each seq
    a.each { |x| p x }
      # ==>
      #    "atgca"
      #    "aagca"
      #    "acgca"
      #    "acgcg"
    # iterates over each site
    a.each_site { |x| p x }
      # ==>
      #    ["a", "a", "a", "a"]
      #    ["t", "a", "c", "c"]
      #    ["g", "g", "g", "g"]
      #    ["c", "c", "c", "c"]
      #    ["a", "a", "a", "g"]
    
    # doing alignment by using CLUSTAL W.
    # clustalw command must be installed.
    factory = Bio::ClustalW.new
    a2 = a.do_align(factory)

    Read a ClustalW or Muscle 'ALN' alignment file:

    bioruby> aln = Bio::ClustalW::Report.new(File.read('../test/data/clustalw/example1.aln'))
    bioruby> aln.header
    ==> "CLUSTAL 2.0.9 multiple sequence alignment"

    Fetch a sequence:

    bioruby> seq = aln.get_sequence(1)
    bioruby> seq.definition
    ==> "gi|115023|sp|P10425|"

    Get a partial sequence:

    bioruby> seq.to_s[60..120]
    ==> "LGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHAD"

    Show the full alignment residue match information for the sequences in the set:

    bioruby> aln.match_line[60..120]
    ==> "     .     **. .   ..   ::*:       . * : : .        .: .* * *"

    Return a Bio::Alignment object:

    bioruby> aln.alignment.consensus[60..120]
    ==> "???????????SN?????????????D??????????L??????????????????H?H?D"

    Restriction Enzymes (Bio::RE)

    BioRuby has extensive support for restriction enzymes (REs). It contains a full library of commonly used REs (from REBASE) which can be used to cut single stranded RNA or double stranded DNA into fragments. To list all enzymes:

    rebase = Bio::RestrictionEnzyme.rebase
    rebase.each do |enzyme_name, info|
      p enzyme_name
    end

    and to cut a sequence with an enzyme follow up with:

    res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0}, 
      {:view_ranges => true})
    if res.kind_of? Symbol #error
       err = Err.find_by_code(res.to_s)
       unless err
         err = Err.new(:code => res.to_s)
       end
    end
    res.each do |frag|
       em = EnzymeMatch.new
    
       em.p_left = frag.p_left
       em.p_right = frag.p_right
       em.c_left = frag.c_left
       em.c_right = frag.c_right
    
       em.err = nil
       em.enzyme = ar_enz
       em.sequence = ar_seq
       p em
     end

    Sequence homology search by using the FASTA program (Bio::Fasta)

    Let's start with a query.pep file which contains a sequence in FASTA format. In this example we are going to execute a homology search from a remote internet site or on your local machine. Note that you can use the ssearch program instead of fasta when you use it in your local machine.

    using FASTA in local machine

    Install the fasta program on your machine (the command name looks like fasta34. FASTA can be downloaded from ftp://ftp.virginia.edu/pub/fasta/).

    First, you must prepare your FASTA-formatted database sequence file target.pep and FASTA-formatted query.pep.

    #!/usr/bin/env ruby
    
    require 'bio'
    
    # Creates FASTA factory object ("ssearch" instead of 
    # "fasta34" can also work)
    factory = Bio::Fasta.local('fasta34', ARGV.pop)
    (EDITOR's NOTE: not consistent pop command)
    
    ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
    
    # Iterates over each entry. the variable "entry" is a 
    # Bio::FastaFormat object:
    ff.each do |entry|
      # shows definition line (begins with '>') to the standard error output
      $stderr.puts "Searching ... " + entry.definition
    
      # executes homology search. Returns Bio::Fasta::Report object.
      report = factory.query(entry)
    
      # Iterates over each hit
      report.each do |hit|
        # If E-value is smaller than 0.0001
        if hit.evalue < 0.0001
          # shows identifier of query and hit, E-value, start and 
          # end positions of homologous region 
          print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at "
          p hit.lap_at
        end
      end
    end

    We named above script f_search.rb. You can execute it as follows:

    % ./f_search.rb query.pep target.pep > f_search.out

    In above script, the variable "factory" is a factory object for executing FASTA many times easily. Instead of using Fasta#query method, Bio::Sequence#fasta method can be used.

    seq = ">test seq\nYQVLEEIGRGSFGSVRKVIHIPTKKLLVRKDIKYGHMNSKE"
    seq.fasta(factory)

    When you want to add options to FASTA commands, you can set the third argument of the Bio::Fasta.local method. For example, the following sets ktup to 1 and gets a list of the top 10 hits:

    factory = Bio::Fasta.local('fasta34', 'target.pep', '-b 10')
    factory.ktup = 1

    Bio::Fasta#query returns a Bio::Fasta::Report object. We can get almost all information described in FASTA report text with the Report object. For example, getting information for hits:

    report.each do |hit|
      puts hit.evalue           # E-value
      puts hit.sw               # Smith-Waterman score (*)
      puts hit.identity         # % identity
      puts hit.overlap          # length of overlapping region
      puts hit.query_id         # identifier of query sequence
      puts hit.query_def        # definition(comment line) of query sequence
      puts hit.query_len        # length of query sequence
      puts hit.query_seq        # sequence of homologous region
      puts hit.target_id        # identifier of hit sequence
      puts hit.target_def       # definition(comment line) of hit sequence
      puts hit.target_len       # length of hit sequence
      puts hit.target_seq       # hit of homologous region of hit sequence
      puts hit.query_start      # start position of homologous 
                                # region in query sequence
      puts hit.query_end        # end position of homologous region 
                                # in query sequence
      puts hit.target_start     # start posiotion of homologous region 
                                # in hit(target) sequence
      puts hit.target_end       # end position of homologous region 
                                # in hit(target) sequence
      puts hit.lap_at           # array of above four numbers
    end

    Most of above methods are common to the Bio::Blast::Report described below. Please refer to the documentation of the Bio::Fasta::Report class for FASTA-specific details.

    If you need the original output text of FASTA program you can use the "output" method of the factory object after the "query" method.

    report = factory.query(entry)
    puts factory.output

    using FASTA from a remote internet site

    • Note: Currently, only GenomeNet (fasta.genome.jp) is supported. check the class documentation for updates.

    For accessing a remote site the Bio::Fasta.remote method is used instead of Bio::Fasta.local. When using a remote method, the databases available may be limited, but, otherwise, you can do the same things as with a local method.

    Available databases in GenomeNet:

    • Protein database
      • nr-aa, genes, vgenes.pep, swissprot, swissprot-upd, pir, prf, pdbstr
    • Nucleic acid database
      • nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, htgs, dbsts, embl-nonst, embnonst-upd, genes-nt, genome, vgenes.nuc

    Select the databases you require. Next, give the search program from the type of query sequence and database.

    • When query is an amino acid sequence
      • When protein database, program is "fasta".
      • When nucleic database, program is "tfasta".
    • When query is a nucleic acid sequence
      • When nucleic database, program is "fasta".
      • (When protein database, the search would fail.)

    For example, run:

    program = 'fasta'
    database = 'genes'
    
    factory = Bio::Fasta.remote(program, database)

    and try out the same commands as with the local search shown earlier.

    Homology search by using BLAST (Bio::Blast class)

    The BLAST interface is very similar to that of FASTA and both local and remote execution are supported. Basically replace above examples Bio::Fasta with Bio::Blast!

    For example the BLAST version of f_search.rb is:

    # create BLAST factory object
    factory = Bio::Blast.local('blastp', ARGV.pop)

    For remote execution of BLAST in GenomeNet, Bio::Blast.remote is used. The parameter "program" is different from FASTA - as you can expect:

    • When query is a amino acid sequence
      • When protein database, program is "blastp".
      • When nucleic database, program is "tblastn".
    • When query is a nucleic acid sequence
      • When protein database, program is "blastx"
      • When nucleic database, program is "blastn".
      • ("tblastx" for six-frame search.)

    Bio::BLAST uses "-m 7" XML output of BLAST by default when either XMLParser or REXML (both of them are XML parser libraries for Ruby - of the two XMLParser is the fastest) is installed on your computer. In Ruby version 1.8.0 or later, REXML is bundled with Ruby's distribution.

    When no XML parser library is present, Bio::BLAST uses "-m 8" tabular deliminated format. Available information is limited with the "-m 8" format so installing an XML parser is recommended.

    Again, the methods in Bio::Fasta::Report and Bio::Blast::Report (and Bio::Fasta::Report::Hit and Bio::Blast::Report::Hit) are similar. There are some additional BLAST methods, for example, bit_score and midline.

    report.each do |hit|
      puts hit.bit_score       
      puts hit.query_seq       
      puts hit.midline         
      puts hit.target_seq      
    
      puts hit.evalue          
      puts hit.identity        
      puts hit.overlap         
      puts hit.query_id        
      puts hit.query_def       
      puts hit.query_len       
      puts hit.target_id       
      puts hit.target_def      
      puts hit.target_len      
      puts hit.query_start     
      puts hit.query_end       
      puts hit.target_start    
      puts hit.target_end      
      puts hit.lap_at          
    end

    For simplicity and API compatibility, some information such as score is extracted from the first Hsp (High-scoring Segment Pair).

    Check the documentation for Bio::Blast::Report to see what can be retrieved. For now suffice to say that Bio::Blast::Report has a hierarchical structure mirroring the general BLAST output stream:

    • In a Bio::Blast::Report object, @iterations is an array of Bio::Blast::Report::Iteration objects.
      • In a Bio::Blast::Report::Iteration object, @hits is an array of Bio::Blast::Report::Hits objects.
        • In a Bio::Blast::Report::Hits object, @hsps is an array of Bio::Blast::Report::Hsp objects.

    See bio/appl/blast.rb and bio/appl/blast/*.rb for more information.

    Parsing existing BLAST output files

    When you already have BLAST output files and you want to parse them, you can directly create Bio::Blast::Report objects without the Bio::Blast factory object. For this purpose use Bio::Blast.reports, which supports the "-m 0" default and "-m 7" XML type output format.

    • For example:

      blast_version = nil; result = []
      Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
        blast_version = report.version
        report.iterations.each do |itr|
          itr.hits.each do |hit|
            result.push hit.target_id
          end
        end
      end
      blast_version
      # ==> "blastp 2.2.18 [Mar-02-2008]"
      result
      # ==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]
    • another example:

      require 'bio'
      Bio::Blast.reports(ARGF) do |report| 
        puts "Hits for " + report.query_def + " against " + report.db
        report.each do |hit|
          print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
        end
      end

    Save the script as hits_under_0.001.rb and to process BLAST output files *.xml, you can run it with:

    % ruby hits_under_0.001.rb *.xml

    Sometimes BLAST XML output may be wrong and can not be parsed. Check whether blast is version 2.2.5 or later. See also blast --help.

    Bio::Blast loads the full XML file into memory. If this causes a problem you can split the BLAST XML file into smaller chunks using XML-Twig. An example can be found in Biotools.

    Add remote BLAST search sites

    Note: this section is an advanced topic

    Here a more advanced application for using BLAST sequence homology search services. BioRuby currently only supports GenomeNet. If you want to add other sites, you must write the following:

    • the calling CGI (command-line options must be processed for the site).
    • make sure you get BLAST output text as supported format by BioRuby (e.g. "-m 8", "-m 7" or default("-m 0")).

    In addition, you must write a private class method in Bio::Blast named "exec_MYSITE" to get query sequence and to pass the result to Bio::Blast::Report.new(or Bio::Blast::Default::Report.new):

    factory = Bio::Blast.remote(program, db, option, 'MYSITE')

    When you write above routines, please send them to the BioRuby project, and they may be included in future releases.

    Generate a reference list using PubMed (Bio::PubMed)

    Nowadays using NCBI E-Utils is recommended. Use Bio::PubMed.esearch and Bio::PubMed.efetch.

    #!/usr/bin/env ruby
    
    require 'bio'
    
    # NCBI announces that queries without email address will return error
    # after June 2010. When you modify the script, please enter your email
    # address instead of the staff's.
    Bio::NCBI.default_email = 'staff@bioruby.org'
    
    keywords = ARGV.join(' ')
    
    options = {
      'maxdate' => '2003/05/31',
      'retmax' => 1000,
    }
    
    entries = Bio::PubMed.esearch(keywords, options)
    
    Bio::PubMed.efetch(entries).each do |entry|
      medline = Bio::MEDLINE.new(entry)
      reference = medline.reference
      puts reference.bibtex
    end

    The script works same as pmsearch.rb. But, by using NCBI E-Utils, more options are available. For example published dates to search and maximum number of hits to show results can be specified.

    See the help page of E-Utils for more details.

    More about BibTeX

    In this section, we explain the simple usage of TeX for the BibTeX format bibliography list collected by above scripts. For example, to save BibTeX format bibliography data to a file named genoinfo.bib.

    % ./pmfetch.rb 10592173 >> genoinfo.bib
    % ./pmsearch.rb genome bioinformatics >> genoinfo.bib

    The BibTeX can be used with Tex or LaTeX to form bibliography information with your journal article. For more information on using BibTex see BibTex HowTo site. A quick example:

    Save this to hoge.tex:

    \documentclass{jarticle}
    \begin{document}
    \bibliographystyle{plain}
    foo bar KEGG database~\cite{PMID:10592173} baz hoge fuga.
    \bibliography{genoinfo}
    \end{document}

    Then,

    % latex hoge
    % bibtex hoge # processes genoinfo.bib
    % latex hoge  # creates bibliography list
    % latex hoge  # inserts correct bibliography reference

    Now, you get hoge.dvi and hoge.ps - the latter of which can be viewed with any Postscript viewer.

    Bio::Reference#bibitem

    When you don't want to create a bib file, you can use Bio::Reference#bibitem method instead of Bio::Reference#bibtex. In the above pmfetch.rb and pmsearch.rb scripts, change

    puts reference.bibtex

    to

    puts reference.bibitem

    Output documents should be bundled in \begin{thebibliography} and \end{thebibliography}. Save the following to hoge.tex

    \documentclass{jarticle}
    \begin{document}
    foo bar KEGG database~\cite{PMID:10592173} baz hoge fuga.
    
    \begin{thebibliography}{00}
    
    \bibitem{PMID:10592173}
    Kanehisa, M., Goto, S.
    KEGG: kyoto encyclopedia of genes and genomes.,
    {\em Nucleic Acids Res}, 28(1):27--30, 2000.
    
    \end{thebibliography}
    \end{document}

    and run

    % latex hoge   # creates bibliography list
    % latex hoge   # inserts corrent bibliography reference

    OBDA

    OBDA (Open Bio Database Access) is a standardized method of sequence database access developed by the Open Bioinformatics Foundation. It was created during the BioHackathon by BioPerl, BioJava, BioPython, BioRuby and other projects' members (2002).

    • BioRegistry (Directory)
      • Mechanism to specify how and where to retrieve sequence data for each database.
    • BioFlat
      • Flatfile indexing by using binary tree or BDB(Berkeley DB).
    • BioFetch
      • Server-client model for getting entry from database via http.
    • BioSQL
      • Schemas to store sequence data to relational databases such as MySQL and PostgreSQL, and methods to retrieve entries from the database.

    This tutorial only gives a quick overview of OBDA. Check out the OBDA site for more extensive details.

    BioRegistry

    BioRegistry allows for locating retrieval methods and database locations through configuration files. The priorities are

    • The file specified with method's parameter
    • ~/.bioinformatics/seqdatabase.ini
    • /etc/bioinformatics/seqdatabase.ini
    • http://www.open-bio.org/registry/seqdatabase.ini

    Note that the last locaation refers to www.open-bio.org and is only used when all local configulation files are not available.

    In the current BioRuby implementation all local configulation files are read. For databases with the same name settings encountered first are used. This means that if you don't like some settings of a database in the system's global configuration file (/etc/bioinformatics/seqdatabase.ini), you can easily override them by writing settings to ~/.bioinformatics/seqdatabase.ini.

    The syntax of the configuration file is called a stanza format. For example

    [DatabaseName]
    protocol=ProtocolName
    location=ServerName

    You can write a description like the above entry for every database.

    The database name is a local label for yourself, so you can name it freely and it can differ from the name of the actual databases. In the actual specification of BioRegistry where there are two or more settings for a database of the same name, it is proposed that connection to the database is tried sequentially with the order written in configuration files. However, this has not (yet) been implemented in BioRuby.

    In addition, for some protocols, you must set additional options other than locations (e.g. user name for MySQL). In the BioRegistory specification, current available protocols are:

    • index-flat
    • index-berkeleydb
    • biofetch
    • biosql
    • bsane-corba
    • xembl

    In BioRuby, you can use index-flat, index-berkleydb, biofetch and biosql. Note that the BioRegistry specification sometimes gets updated and BioRuby does not always follow quickly.

    Here is an example. It creates a Bio::Registry object and reads the configuration files:

    reg = Bio::Registry.new
    
    # connects to the database "genbank"
    serv = reg.get_database('genbank')
    
    # gets entry of the ID
    entry = serv.get_by_id('AA2CG')

    The variable "serv" is a server object corresponding to the settings written in the configuration files. The class of the object is one of Bio::SQL, Bio::Fetch, and so on. Note that Bio::Registry#get_database("name") returns nil if no database is found.

    After that, you can use the get_by_id method and some specific methods. Please refer to the sections below for more information.

    BioFlat

    BioFlat is a mechanism to create index files of flat files and to retrieve these entries fast. There are two index types. index-flat is a simple index performing binary search without using any external libraries of Ruby. index-berkeleydb uses Berkeley DB for indexing - but requires installing bdb on your computer, as well as the BDB Ruby package. To create the index itself, you can use br_bioflat.rb command bundled with BioRuby.

    % br_bioflat.rb --makeindex database_name [--format data_format] filename...

    The format can be omitted because BioRuby has autodetection. If that doesn't work, you can try specifying the data format as the name of a BioRuby database class.

    Search and retrieve data from database:

    % br_bioflat.rb database_name identifier

    For example, to create an index of GenBank files gbbct*.seq and get the entry from the database:

    % br_bioflat.rb --makeindex my_bctdb --format GenBank gbbct*.seq
    % br_bioflat.rb my_bctdb A16STM262

    If you have Berkeley DB on your system and installed the bdb extension module of Ruby (see the BDB project page ), you can create and search indexes with Berkeley DB - a very fast alternative that uses little computer memory. When creating the index, use the "--makeindex-bdb" option instead of "--makeindex".

    % br_bioflat.rb --makeindex-bdb database_name [--format data_format] filename...

    BioFetch

    Note: this section is an advanced topic

    BioFetch is a database retrieval mechanism via CGI. CGI Parameters, options and error codes are standardized. Client access via http is possible giving the database name, identifiers and format to retrieve entries.

    The BioRuby project has a BioFetch server at bioruby.org. It uses GenomeNet's DBGET system as a backend. The source code of the server is in sample/ directory. Currently, there are only two BioFetch servers in the world: bioruby.org and EBI.

    Here are some methods to retrieve entries from our BioFetch server.

    1. Using a web browser

      http://bioruby.org/cgi-bin/biofetch.rb
    2. Using the br_biofetch.rb command

      % br_biofetch.rb db_name entry_id
    3. Directly using Bio::Fetch in a script

      serv = Bio::Fetch.new(server_url)
      entry = serv.fetch(db_name, entry_id)
    4. Indirectly using Bio::Fetch via BioRegistry in script

      reg = Bio::Registry.new
      serv = reg.get_database('genbank')
      entry = serv.get_by_id('AA2CG')

    If you want to use (4), you have to include some settings in seqdatabase.ini. For example:

    [genbank]
    protocol=biofetch
    location=http://bioruby.org/cgi-bin/biofetch.rb
    biodbname=genbank

    The combination of BioFetch, Bio::KEGG::GENES and Bio::AAindex1

    Bioinformatics is often about gluing things together. Here is an example that gets the bacteriorhodopsin gene (VNG1467G) of the archaea Halobacterium from KEGG GENES database and gets alpha-helix index data (BURA740101) from the AAindex (Amino acid indices and similarity matrices) database, and shows the helix score for each 15-aa length overlapping window.

    #!/usr/bin/env ruby
    
    require 'bio'
    
    entry = Bio::Fetch.query('hal', 'VNG1467G')
    aaseq = Bio::KEGG::GENES.new(entry).aaseq
    
    entry = Bio::Fetch.query('aax1', 'BURA740101')
    helix = Bio::AAindex1.new(entry).index
    
    position = 1
    win_size = 15
    
    aaseq.window_search(win_size) do |subseq|
      score = subseq.total(helix)
      puts [ position, score ].join("\t")
      position += 1
    end

    The special method Bio::Fetch.query uses the preset BioFetch server at bioruby.org. (The server internally gets data from GenomeNet. Because the KEGG/GENES database and AAindex database are not available from other BioFetch servers, we used the bioruby.org server with Bio::Fetch.query method.)

    BioSQL

    BioSQL is a well known schema to store and retrive biological sequences using a RDBMS like PostgreSQL or MySQL: note that SQLite is not supported. First of all, you must install a database engine or have access to a remote one. Then create the schema and populate with the taxonomy. You can follow the Official Guide to accomplish these steps. Next step is to install these gems:

    • ActiveRecord
    • CompositePrimaryKeys (Rails doesn't handle by default composite primary keys)
    • The layer to comunicate with you preferred RDBMS (postgresql, mysql, jdbcmysql in case you are running JRuby )

    You can find ActiveRecord's models in /bioruby/lib/bio/io/biosql

    When you have your database up and running, you can connect to it like this:

    #!/usr/bin/env ruby
    
    require 'bio'
    
    connection = Bio::SQL.establish_connection({'development'=>{'hostname'=>"YourHostname",
    'database'=>"CoolBioSeqDB",
    'adapter'=>"jdbcmysql",
    'username'=>"YourUser",
    'password'=>"YouPassword"
          }
      },
    'development')
    
    #The first parameter is the hash contaning the description of the configuration; similar to database.yml in Rails applications, you can declare different environment. 
    #The second parameter is the environment to use: 'development', 'test', or 'production'.
    
    #To store a sequence into the database you simply need a biosequence object.
    biosql_database = Bio::SQL::Biodatabase.find(:first)
    ff = Bio::GenBank.open("gbvrl1.seq")
    
    ff.each_entry do |gb|
      Bio::SQL::Sequence.new(:biosequence=>gb.to_biosequence, :biodatabase=>biosql_database
    end
    
    #You can list all the entries into every database 
    Bio::SQL.list_entries
    
    #list databases:
    Bio::SQL.list_databases
    
    #retriving a generic accession
    bioseq = Bio::SQL.fetch_accession("YouAccession")
    
    #If you use biosequence objects, you will find all its method mapped to BioSQL sequences. 
    #But you can also access to the models directly:
    
    #get the raw sequence associated with your accession
    bioseq.entry.biosequence 
    
    #get the length of your sequence; this is the explicit form of bioseq.length
    bioseq.entry.biosequence.length
    
    #convert the sequence into GenBank format
    bioseq.to_biosequence.output(:genbank)

    BioSQL's schema is not very intuitive for beginners, so spend some time on understanding it. In the end if you know a little bit of Ruby on Rails, everything will go smoothly. You can find information on Annotation here. ToDo: add exemaples from George. I remember he did some cool post on BioSQL and Rails.

    PhyloXML

    PhyloXML is an XML language for saving, analyzing and exchanging data of annotated phylogenetic trees. PhyloXML's parser in BioRuby is implemented in Bio::PhyloXML::Parser, and its writer in Bio::PhyloXML::Writer. More information can be found at www.phyloxml.org.

    Requirements

    In addition to BioRuby, you need the libxml Ruby bindings. To install, execute:

    % gem install -r libxml-ruby

    For more information see the libxml installer page

    Parsing a file

    require 'bio'
    
    # Create new phyloxml parser
    phyloxml = Bio::PhyloXML::Parser.open('example.xml')
    
    # Print the names of all trees in the file
    phyloxml.each do |tree|
      puts tree.name
    end

    If there are several trees in the file, you can access the one you wish by specifying its index:

    tree = phyloxml[3]

    You can use all Bio::Tree methods on the tree, since PhyloXML::Tree inherits from Bio::Tree. For example,

    tree.leaves.each do |node|
      puts node.name
    end

    PhyloXML files can hold additional information besides phylogenies at the end of the file. This info can be accessed through the 'other' array of the parser object.

    phyloxml = Bio::PhyloXML::Parser.open('example.xml')
    while tree = phyloxml.next_tree
      # do stuff with trees
    end 
    
    puts phyloxml.other

    Writing a file

    # Create new phyloxml writer
    writer = Bio::PhyloXML::Writer.new('tree.xml')
    
    # Write tree to the file tree.xml
    writer.write(tree1) 
    
    # Add another tree to the file
    writer.write(tree2)

    Retrieving data

    Here is an example of how to retrieve the scientific name of the clades included in each tree.

    require 'bio'
    
    phyloxml = Bio::PhyloXML::Parser.open('ncbi_taxonomy_mollusca.xml')
    phyloxml.each do |tree|
      tree.each_node do |node|
        print "Scientific name: ", node.taxonomies[0].scientific_name, "\n"
      end
    end

    Retrieving 'other' data

    require 'bio'
    
    phyloxml = Bio::PhyloXML::Parser.open('phyloxml_examples.xml')
    while tree = phyloxml.next_tree
     #do something with the trees
    end
    
    p phyloxml.other
    puts "\n"
    #=> output is an object representation
    
    #Print in a readable way
    puts phyloxml.other[0].to_xml, "\n"
    #=>:
    #
    #<align:alignment xmlns:align="http://example.org/align">
    #  <seq name="A">acgtcgcggcccgtggaagtcctctcct</seq>
    #  <seq name="B">aggtcgcggcctgtggaagtcctctcct</seq>
    #  <seq name="C">taaatcgc--cccgtgg-agtccc-cct</seq>
    #</align:alignment>
    
    #Once we know whats there, lets output just sequences
    phyloxml.other[0].children.each do |node|
     puts node.value
    end
    #=>
    #
    #acgtcgcggcccgtggaagtcctctcct
    #aggtcgcggcctgtggaagtcctctcct
    #taaatcgc--cccgtgg-agtccc-cct

    The BioRuby example programs

    Some sample programs are stored in ./samples/ directory. For example, the n2aa.rb program (transforms a nucleic acid sequence into an amino acid sequence) can be run using:

    ./sample/na2aa.rb test/data/fasta/example1.txt 

    Unit testing and doctests

    BioRuby comes with an extensive testing framework with over 1300 tests and 2700 assertions. To run the unit tests:

    cd test
    ruby runner.rb

    We have also started with doctest for Ruby. We are porting the examples in this tutorial to doctest - more info upcoming.

    Further reading

    See the BioRuby in anger Wiki. A lot of BioRuby's documentation exists in the source code and unit tests. To really dive in you will need the latest source code tree. The embedded rdoc documentation for the BioRuby source code can be viewed online at <URL:http://bioruby.org/rdoc/>.

    BioRuby Shell

    The BioRuby shell implementation is located in ./lib/bio/shell. It is very interesting as it uses IRB (the Ruby intepreter) which is a powerful environment described in Programming Ruby's IRB chapter. IRB commands can be typed directly into the shell, e.g.

    bioruby!> IRB.conf[:PROMPT_MODE]
    ==!> :PROMPT_C

    Additionally, you also may want to install the optional Ruby readline support - with Debian libreadline-ruby. To edit a previous line you may have to press line down (down arrow) first.

    Helpful tools

    Apart from rdoc you may also want to use rtags - which allows jumping around source code by clicking on class and method names.

    cd bioruby/lib
    rtags -R --vi

    For a tutorial see here

    APPENDIX

    Biogem: Additional BioRuby plugins

    Biogem is one of the exciting developments for Ruby in bioinformatics! Biogems add new functionality next to the BioRuby core project (BioRuby is a biogem itself). A biogem is simply installed with

    gem install bio                 # The core BioRuby gem
    gem install bio-core            # BioRuby + stable pure Ruby biogems
    gem install bio-core-ext        # bio-core + stable Ruby extensions

    Information on these biogems, and the many others available, see Biogems.info or gems.bioruby.org.

    KEGG API

    Please refer to KEGG_API.rd.ja (English version: <URL:http://www.genome.jp/kegg/soap/doc/keggapi_manual.html> ) and

    Ruby Ensembl API

    The Ruby Ensembl API is a Ruby API to the Ensembl database. It is NOT currently included in the BioRuby archives. To install it, see the Ruby-Ensembl Github for more information.

    Gene Ontology (GO) through the Ruby Ensembl API

    Gene Ontologies can be fetched through the Ruby Ensembl API package:

    require 'ensembl'
    Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
    infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
    infile.each do |line|
      accs = line.split(",")          # Split the comma-sep.entries into an array
      drosphila_acc = accs.shift      # the first entry is the Drosophila acc
      mosq_acc = accs.shift           # the second entry is your Mosq. acc
      gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
      print "#{mosq_acc}"
      gene.go_terms.each do |go|
         print ",#{go}"
      end
    end

    Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila homologues.

    Using BioPerl or BioPython from Ruby

    A possible route is to opt for JRuby and Jython on the JAVA virtual machine (JVM).

    At the moment there is no easy way of accessing BioPerl or BioPython directly from Ruby. A possibility is to create a Perl or Python server that gets accessed through XML/RPC or SOAP.

    Installing required external libraries

    At this point for using BioRuby no additional libraries are needed, except if you are using the Bio::PhyloXML module; then you have to install libxml-ruby.

    This may change, so keep an eye on the Bioruby website. Also when a package is missing BioRuby should show an informative message.

    At this point installing third party Ruby packages can be a bit painful, as the gem standard for packages evolved late and some still force you to copy things by hand. Therefore read the README's carefully that come with each package.

    Installing libxml-ruby

    The simplest way is to use the RubyGems packaging system:

    gem install -r libxml-ruby

    If you get `require': no such file to load - mkmf (LoadError) error then do

    sudo apt-get install ruby-dev

    If you have other problems with installation, then see <URL:http://libxml.rubyforge.org/install.xml>.

    Trouble shooting

    • Error: in `require': no such file to load -- bio (LoadError)

    Ruby is failing to find the BioRuby libraries - add it to the RUBYLIB path, or pass it to the interpeter. For example:

    ruby -I$BIORUBYPATH/lib yourprogram.rb

    Modifying this page

    IMPORTANT NOTICE: This page is maintained in the BioRuby source code repository. Please edit the file there otherwise changes may get lost. See BioRuby Developer Information for repository and mailing list access.

    bio-1.4.3.0001/doc/bioruby.css0000644000004100000410000000761612200110570015627 0ustar www-datawww-data/* body */ body { color: #000000; background-color: #ffffff; margin-left: 5%; margin-right: 5%; font-family: verdana, arial; } em { font-weight: bold; font-style: normal; } /* link */ a:link { color: #00ca65; text-decoration: none; } a:visited { color: #49ba18; text-decoration: none; } a:hover, a:focus { color: #c2fe20; background-color: #ffffff; text-decoration: underline; } /* header */ h1 { color: #000000; background-color: transparent; border-color: #49ba18; /* border-width: 1px 0px 1px 0px;*/ border-width: 0px 0px 5px 0px; border-style: solid; padding-bottom: 3px; padding-right: 10%; text-align: left; } h2 { color: #000000; /* background-color: transparent;*/ background-color: #b0ffb0; border-color: #b0ffb0; border-style: none; border-width: 1px 0px 1px 0px; margin-bottom: 0px; margin-top: 1em; padding: 3px; } h3 { padding-top: 2px; padding-left: 5px; font-weight: bold; border-style: solid; border-color: #d8ffd8; /* border-width: 2px 0px 0px 10px;*/ border-width: 2px 0px 0px 2px; } h4 { padding-bottom: 1px; padding-left: 5px; font-weight: bold; font-weight: bold; border-style: solid; border-color: #b0ffb0; border-width: 0px 0px 2px 0px; /* margin: 1.5em 10px 0px*/ } h5 { padding-top: 2px; padding-left: 5px; font-weight: bold; border-style: dotted; border-color: #d8ffd8; border-width: 2px 0px 0px 2px; /* margin: 1.5em 20px 0px*/ } h6 { padding-left: 5px; font-weight: bold; border-style: solid; border-color: #b0ffb0; border-width: 0px 0px 0px 5px; /* margin: 1.5em 20px 0px*/ } /* paragraph */ p { margin-left: 0em! important; text-indent: 0em } /* line */ hr { border-style: solid; border-color: #00ca65; border-width: 1px; } /* list */ dl { } dt { padding-left: 5px; font-size: 110%; border-style: solid; border-color: #B0FFB0; border-width: 0px 0px 0px 5px; font-weight: bold; } dd { } li { /* list-style-type: disc; */ } ul,ol{ } /* table */ th { padding: 5px; text-align: left; } td { padding: 5px; } /* quote */ pre { color: #000000; background-color: #d8ffd8; margin-left: 20px; padding: 8px; border-style: solid; border-color: #b0ffb0; border-width: 1px 5px 1px 5px; white-space: pre; } blockquote { color: #008080; background-color: #ffffff; margin-left: 20px; padding: 8px; border-style: solid; border-color: #38c868; border-width: 3px 1px 3px 1px; } /* image */ img { border-width: 0px; } /* form */ input, select { color: #000000; background-color: transparent; padding: 2px; border-style: solid; border-color: #71e63e; border-width: 1px; } textarea { color: #000000; background-color: #ffffff; border-style: solid; border-color: #00ca65; border-width: 1px; font-family: monospace; } /* reviz */ table.reviz { width: 100%; } th.file { background-color: #008080; width: 15%; } th.rev, th.age, th.author { background-color: #38c868; width: 10%; } th.log { background-color: #c2fe20; } td.dir { background-color: #b0ffb0; background-image: url(reviz/dir.gif); background-position: center left; background-repeat: no-repeat; text-indent: 15px; } td.file { background-color: #b0ffb0; background-image: url(reviz/file.gif); background-position: center left; background-repeat: no-repeat; text-indent: 15px; } td.rev, td.age, td.author, td.log { background-color: #d8ffd8; } /* rwiki */ .navi { text-align: right; } .headerURL { text-align: right; font-size: 10pt; } address { color: gray; text-align: right; font-style: normal; font-variant: normal; font-weight: normal; } /* hiki */ ins.added { font-weight: bold; } del.deleted { text-decoration: line-through; } form.update textarea.keyword { width: 15em; height: 3em; } div.adminmenu { text-align: right; } div.caption { text-align: right; } div.footer { text-align: right; } /* top */ span.title { font-size: +2 } span.lead { text-decoration: underline } span.expire { color: #c04040 } span.ruby { font-weight: bold } bio-1.4.3.0001/doc/KEGG_API.rd.ja0000644000004100000410000015502212200110570015563 0ustar www-datawww-data=begin $Id: KEGG_API.rd.ja,v 1.11 2006/12/27 13:40:45 k Exp $ Copyright (C) 2003-2006 Toshiaki Katayama = KEGG API KEGG API ϥץʤɤ KEGG Ѥ뤿Υ֥ӥǤ ȾǤϡKEGG ǡ١긡ꤹ뤿 KEGG API ȤˡޤȾΥե󥹤 KEGG API ǽ ⤷ޤȤƼ Ruby ȤäƲ⤷ޤSOAP WSDL ȤΤǤPerl, Python, Java ʤɡˤǤдñ KEGG API Ѥ뤳ȤǤޤ == ܼ * ((<ȥ>)) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * ((<Ѹ>)) * ((<ͤΥǡ>)) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * ((<᥽åɰ>)) * ((<᥿>)) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * ((<ǡ١֤Υ>)) * (()) * (()) * ((<Ҥȹֹδط>)) * (()) * (()) * ((<ǡʪꥢδط>)) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * ((<ѥؤοŤ>)) * (()) * (()) * (()) * (()) * (()) * (()) * ((<ѥΥ֥ȴ֤δط>)) * (()) * ((<ѥΥ֥ȸ>)) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * ((<֥Ȥѥ>)) * (()) * (()) * (()) * (()) * (()) * (()) * ((<ѥ֤δط>)) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) == ȥ ֥ӥȤϡ饤Ȥ׵򥤥󥿡ͥåȤ𤷤ƥФ ꡢФץμ¹Է̤򥯥饤Ȥ֤ȤߤǡŪˤ ֥ڡǻȤ HTTP ץȥȡ¤ĥǡɽˡȤ ڤƤ XML ޡåʸѤΤؤޤ ֥ӥϥץफѤǤ뤿ᡢŪ˸Ԥäꡢ ͤѤ͡׵ưŪ˽ꤹΤ˸Ƥޤ Τᡢŷ䤤碌Google ؤʣ縡ʤɤǤȤ ޤ HTTP ѤåȤˤϡïǤȤ뤳Ȥեʤɤ¤ ˤȤꡢXML ˤϴϢѤ·äƤ뤳Ȥʣʥǡ ¤ɽǤȤäݥȤޤ ֥ӥǤ XML ϢѤǤ SOAP WSDL ȤȤ¿ʤä ޤSOAP ϥ饤ȤȥФȤꤹåɽˡ ɸಽΤǡ Simple Object Access Method άȤƤޤ ( Service Oriented Access Protocol ȤȤ⤢褦Ǥ) WSDL SOAP ˴Ťӥ򥳥ԥ塼ñѤǤ褦ˤ ΤΤǡWeb Service Description Language άȤʤäƤޤ KEGG API ϤεѤȤäơʬζ̣Ҥѥʤɤ ͳ˸ϤѤꤹ뤿μʤ󶡤ޤ桼 KEGG ¿εǽ򡢥֥ڡ򥯥å˼ʬΥץ 椫鼡ȼ¹Ԥ뤳ȤǤ褦ˤʤޤ KEGG API ˴ؤǿξϰʲ URL 뤳ȤǤޤ * (()) == KEGG API λȤ ʲǤ Ruby, Perl, Python, Java γƸˤ KEGG API δñʻȤ Ҳ𤷤ޤƸ SOAP WSDL 򰷤饤֥ɲå󥹥ȡ뤹 ɬפޤ === Ruby ξ Ruby 1.8.1 ʹߤǤϡɸ SOAP ȤǤޤΤɲå󥹥ȡ ɬפޤ Ruby 1.8.0 Ǥ (()), (()), (()) ʤɤΥ饤֥򥤥󥹥ȡ뤹ɬפޤ Ruby 1.6.8 ξϤ SOAP4R ɬפȤ¾Υ饤֥ (date2, uconv, XML Υѡʤ) ⥤󥹥ȡ뤹ɬפޤΤǡ餫 SOAP4R ΥɥȤ˽äƤޤ ʲΥץ륳ɤϡIJݤ b0002 ҤȺǤƱι⤤ Smith-Waterman ι⤤ 5 ĸɽץǤ #!/usr/bin/env ruby require 'soap/wsdlDriver' wsdl = "http://soap.genome.jp/KEGG.wsdl" serv = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver serv.generate_explicit_type = true # SOAP Ruby ηѴͭˤ offset = 1 limit = 5 top5 = serv.get_best_neighbors_by_gene('eco:b0002', offset, limit) top5.each do |hit| print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" end ץ 'get_best_neighbors_by_gene' ϡKEGG SSDB ǡ ١Ȥä KEGG GENES ˴ޤޤƤʪ椫ǤƱ ι⤤ҤõƤ API Ǥ̤ϼΤ褦ɽޤ eco:b0002 eco:b0002 5283 eco:b0002 ecj:JW0001 5283 eco:b0002 sfx:S0002 5271 eco:b0002 sfl:SF0002 5271 eco:b0002 ecc:c0003 5269 ޤưʤϡ serv = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver serv.wiredump_dev = STDERR # ιԤ­ serv.generate_explicit_type = true Τ褦 wiredump_dev STDERR ꤷԤɲäƼ¹Ԥ뤳Ȥǡ ФȤΤ꤬ɸ२顼˽Ϥޤ KEGG API v3.0 顢Фôڤ꥿ॢȤɤŪǡ ̤η̤֤᥽åɤˤ offset, limit Ƴ졢٤ ̤ο¤褦ˤʤޤKEGG API v3.0v5.0 Ǥ 줾 start, max_results ȸƤФƤޤKEGG API v6.0 offset, limit ̾ѹޤˡΤᡢΥ᥽åɤ Ƥη̤뤿ˤϥ롼פѤɬפޤ #!/usr/bin/env ruby require 'soap/wsdlDriver' wsdl = "http://soap.genome.jp/KEGG.wsdl" serv = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver serv.generate_explicit_type = true offset = 1 limit = 100 loop do results = serv.get_best_neighbors_by_gene('eco:b0002', offset, limit) break unless results # ̤֤äƤʤнλ results.each do |hit| print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" end offset += limit end WSDL ѤƤ뤿ᡢǤ Ruby ξϽʬ˴ñ˽񤱤 (()) ȤȤ˥å ȤǤޤ #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new results = serv.get_all_best_neighbors_by_gene('eco:b0002') results.each do |hit| print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" end BioRuby Ǥ 'get_all_best_neighbors_by_gene' ᥽åɤƤꡢ ưǾ嵭Υ롼פ󤷤Ƥη̤֤Ƥޤޤ ̾ΥꥹȤϤб֤ͤƤ filter ᥽ ɤȤȤǤޤ #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new results = serv.get_all_best_neighbors_by_gene('eco:b0002') # ߤͤ̾Υڥ SW ξ fields = [:genes_id1, :genes_id2, :sw_score] results.each do |hit| puts hit.filter(fields).join("\t") end # 줾ΰҤǥ饤Ȥ줿ݥʤɤɽ fields1 = [:genes_id1, :start_position1, :end_position1, :best_flag_1to2] fields2 = [:genes_id2, :start_position2, :end_position2, :best_flag_2to1] results.each do |hit| print "> score: ", hit.sw_score, ", identity: ", hit.identity, "\n" print "1:\t", hit.filter(fields1).join("\t"), "\n" print "2:\t", hit.filter(fields2).join("\t"), "\n" end ϡIJ (eco) Ф KEGG ѥΰ֤Ǥ #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new list = serv.list_pathways("eco") list.each do |path| print path.entry_id, "\t", path.definition, "\n" end ArrayOfDefinition ֤Τǡ줾ˤĤ Definition entry_id (ѥID) definition (ѥΥȥ) Фޤ SSDB ⡢¤ SSDBRelation genes_id1 sw_score ʤɤФƤΤǤˡ ǸϡIJݤΰ b1002 b2388 бܥå˿դ ѥ eco00010 βơե¸Ǥ #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new genes = ["eco:b1002", "eco:b2388"] url = serv.mark_pathway_by_objects("path:eco00010", genes) puts url # BioRuby ξ硢¸Τ save_image ᥽åɤȤ serv.save_image(url, "filename.gif") === Perl ξ Perl ǤϡʲΥ饤֥ɲå󥹥ȡ뤷Ƥɬפޤ * (()) (Ver. 0.60 ưǧ) * 0.60 꿷СǤϤĤΥ᥽åɤȤʤ褦Ǥ * (()) * (()) * (()) ʲRuby κǽƱ¹Ԥ륵ץ륳ɤǤ #!/usr/bin/env perl use SOAP::Lite; $wsdl = 'http://soap.genome.jp/KEGG.wsdl'; $serv = SOAP::Lite -> service($wsdl); $offset = 1; $limit = 5; $top5 = $serv->get_best_neighbors_by_gene('eco:b0002', $offset, $limit); foreach $hit (@{$top5}) { print "$hit->{genes_id1}\t$hit->{genes_id2}\t$hit->{sw_score}\n"; } ƱIJݤ KEGG ѥΥꥹȤ֤Ǥ #!/usr/bin/env perl use SOAP::Lite; $wsdl = 'http://soap.genome.jp/KEGG.wsdl'; $results = SOAP::Lite -> service($wsdl) -> list_pathways("eco"); foreach $path (@{$results}) { print "$path->{entry_id}\t$path->{definition}\n"; } SOAP::Lite ǤϰϤˤϡ SOAP::Data->type(array => [value1, value2, .. ]) Τ褦ѴɬפΤդɬפǤȤХѥؤο ŤǰҤΥꥹȤϤϡ #!/usr/bin/env perl use SOAP::Lite; $wsdl = 'http://soap.genome.jp/KEGG.wsdl'; $serv = SOAP::Lite -> service($wsdl); $genes = SOAP::Data->type(array => ["eco:b1002", "eco:b2388"]); $result = $serv -> mark_pathway_by_objects("path:eco00010", $genes); print $result; Τ褦ˤʤޤ === Perl KEGG API ˤʸͤ˼᥽åɤĤޤ Ѥ SOAP::Lite ΥС˱ơɬʲΤɤ餫н褬ɬפǤ ==== SOAP::Lite v0.60 ޤ Perl (array) ֥ȤȤ KEGG API Ϥϡ ɬʲΤ褦 SOAP ֥ȤŪѴɬפޤ SOAP::Data->type(array => [value1, value2, ... ]) ==== SOAP::Lite v0.61 ʹ SOAP::Lite v0.68 ޤǤϥХޤΤ v0.69 ʹߤѤ򤪴ᤷޤ Perl ʸ (string) (int) (array) ֥Ȥ ArrayOfstring ArrayOfint SOAP ֥ȤѴ ֥롼ɲäɬפޤ sub SOAP::Serializer::as_ArrayOfstring{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } sub SOAP::Serializer::as_ArrayOfint{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } ޤץˤ񤤤ƤȤǡ $genes = SOAP::Data->type(array => ["eco:b1002", "eco:b2388"]); ǤϤʤ $genes = ["eco:b1002", "eco:b2388"]; Τ褦˾άƽ񤯤ȤǤ褦ˤʤޤʾάʤƤ⹽ޤˡ ==== ƥȥץ SOAP::Lite v0.69 ArrayOfstring ѴޤƯƤ뤫ɤ ʲΥץǥƥȤǤޤ URL ɽ OK Ǥ #!/usr/bin/env perl use SOAP::Lite +trace => [qw(debug)]; print "SOAP::Lite = ", $SOAP::Lite::VERSION, "\n"; my $serv = SOAP::Lite -> service("http://soap.genome.jp/KEGG.wsdl"); my $result = $serv->mark_pathway_by_objects("map:eco00010", $genes); print $result, "\n"; # sub routines implicitly used in the above code sub SOAP::Serializer::as_ArrayOfstring{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } sub SOAP::Serializer::as_ArrayOfint{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } === Python ξ Python ǤϰʲΥ饤֥ɲå󥹥ȡ뤷Ƥɬפޤ * (()) ޤSOAPpy ¸Ƥ뤤ĤΥѥå (fpconst, PyXML ʤ) ɬפˤʤޤ ʲKEGG/PATHWAY 00020 ֤Υѥ˺ܤäƤIJݤΰҤ ꥹȤ֤ץ륳ɤǤ #!/usr/bin/env python from SOAPpy import WSDL wsdl = 'http://soap.genome.jp/KEGG.wsdl' serv = WSDL.Proxy(wsdl) results = serv.get_genes_by_pathway('path:eco00020') print results === Java ξ Java Ǥ Apache Axis 饤֥ axis-1.2alpha 꿷С (axis-1_1 ǤϤޤưޤˤꤷơɬפ jar եŬ ʥǥ쥯ȥ֤Ƥɬפޤ * (()) Ȥ Apache Axis С axis-1_2beta ΥХʥۤξ硢 axis-1_2beta/lib ʲˤ jar ե򥤥󥹥ȡΥǥ쥯 ˥ԡޤ % cp axis-1_2beta/lib/* /path/to/lib/ ʲΤ褦˼¹Ԥ WSDL KEGG API ѤΥ饹ưޤ ޤ줿եԶľˡ (()) ץȤꤷƤޤ % java -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/commons-logging.jar:/path/to/lib/commons-discovery.jar:/path/to/lib/saaj.jar:/path/to/lib/wsdl4j.jar:. org.apache.axis.wsdl.WSDL2Java -p keggapi http://soap.genome.jp/KEGG.wsdl % perl -i axisfix.pl keggapi/KEGGBindingStub.java % javac -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/wsdl4j.jar:. keggapi/KEGGLocator.java % jar cvf keggapi.jar keggapi/* % javadoc -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar -d keggapi_javadoc keggapi/*.java javadoc αѸǤɬפʾ javadoc -locale en_US ץ ĤƼ¹Ԥޤ ʲϡPython Ʊͤˡꤷ KEGG/PATHWAY ˺ܤäƤҤ ꥹȤɽ륵ץ륳ɤǤ import keggapi.*; class GetGenesByPathway { public static void main(String[] args) throws Exception { KEGGLocator locator = new KEGGLocator(); KEGGPortType serv = locator.getKEGGPort(); String query = args[0]; String[] results = serv.get_genes_by_pathway(query); for (int i = 0; i < results.length; i++) { System.out.println(results[i]); } } } ϡSSDBRelation äƤǤ import keggapi.*; class GetBestNeighborsByGene { public static void main(String[] args) throws Exception { KEGGLocator locator = new KEGGLocator(); KEGGPortType serv = locator.getKEGGPort(); String query = args[0]; SSDBRelation[] results = null; results = serv.get_best_neighbors_by_gene(query, 1, 50); for (int i = 0; i < results.length; i++) { String gene1 = results[i].getGenes_id1(); String gene2 = results[i].getGenes_id2(); int score = results[i].getSw_score(); System.out.println(gene1 + "\t" + gene2 + "\t" + score); } } } ΥץϰʲΤ褦 -classpath ץ keggapi.jar ե äƥѥ롢¹Ԥޤ % javac -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/wsdl4j.jar:/path/to/keggapi.jar GetBestNeighborsByGene.java % java -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/commons-logging.jar:/path/to/lib/commons-discovery.jar:/path/to/lib/saaj.jar:/path/to/lib/wsdl4j.jar:/path/to/keggapi.jar:. GetBestNeighborsByGene eco:b0002 Ķѿ CLASSPATH ꤷƤȡĹץɬפ ʤޤ bash ޤ zsh ξ硧 % for i in /path/to/lib/*.jar do CLASSPATH="${CLASSPATH}:${i}" done % export CLASSPATH tcsh ξ硧 % foreach i ( /path/to/lib/*.jar ) setenv CLASSPATH ${CLASSPATH}:${i} end ¾ͤȷȤͤμФʤɤˤĤƤϡWSDL2Java ˤ 줿ʲΥɥȤ򻲾ȤƤ * (()) == KEGG API ե ʲǤϡKEGG API ȤΤɬפʾƤΥ᥽åɤ⤷ޤ === WSDL ե SOAP ǤϡФɤΤ褦ʥ᥽åɤäƤ뤫ΤäƤɬפ ޤWSDL ȤȤμưǤޤWSDL ե ƥ饤ȥɥ饤ФȤޤǡ̾ SOAP/WSDL 饤֥꤬ƤϤǤKEGG API WSDL եϰʲ URL ˤޤ * (()) === Ѹ ʲβǽФƤ KEGG ϢѸ򤷤Ƥޤ * org KEGG ˴ޤޤƤʪ򤽤줾죳ʸɤ ɽΤǡeco IJݡsce вʤɤȤʤäƤޤ ʸɤΥꥹȤ list_organisms ᥽åɤʲΥڡ ȤƤ * (()) * db GenomeNet 󶡤Ƥǡ١̾Ǥǡ١̾ ꥹȤˤĤƤ list_databases ᥽åɤ򻲾ȤƤ * entry_id db_name ȥȥ̾ ':' Ƿ礷ƤΥǡ١֤ ˡ ID ǤȤ embl:J00231 EMBL Υȥ J00231 ؤޤentry_id ϡʲ genes_id, enzyme_id, compound_id, drug_id, glycan_id, reaction_id, pathway_id, motif_id ʤɤޤߤޤ * genes_id keggorg Ȱ̾ ':' Ƿ礷 KEGG ΰ ID Ǥ eco:b0001 IJݤΰ b0001 ؤޤ * enzyme_id ec: Ĥֹ ID Ǥec:1.1.1.1 Ϲֹ 1.1.1.1 ιǤǤ륢륳롦ǥҥɥʡؤޤ * compound_id cpd: Ĥʪ ID Ǥcpd:C00158 ϲʪֹ C00158 βʪǤ륯ؤޤ * drug_id dr: Ĥʪ ID Ǥdr:D00201 ϥɥåֹ D00201 ΥɥåǤƥȥ饵ؤޤ * glycan_id gl: Ĥʪ ID Ǥgl:G00050 ֹ G00050 Ǥ Paragloboside ؤޤ * reaction_id REACTION ǡ١Υȥֹǡrn:R00959 ꥢֹ R00959 ȿ (cpd:C00103 cpd:00668 ֤Ѵ) ؤޤ * pathway_id KEGG/PATHWAY ǡ١Υѥֹǡѥ ֹΥץեå map ξϥե󥹥ѥkeggorg ϤʪλİҤκܤäѥɽޤ㤨 path:map00020 ϥե󥹥ѥ 00020 ֤path:eco00020 IJݤΥѥ 00020 ֤ؤޤ * motif_id ϥեǡ١Υȥ̾ǡpf:DnaJ Pfam Υȥ DnaJ ؤޤ'pf' ¾'ps' PROSITE, 'bl' BLOCS, 'pr' PRINTS, 'pd' PRODOM ؤޤ * ko_id KO (KEGG Orthology) ǡ١Υȥֹǡko:K02598 KO ֹ K02598 nitrite transporter NirC Υʰ 롼פؤޤ * ko_class_id KO ŪʬषƳءʥ饹ˤ ID ǡ '01110' Carbohydrate Metabolism 饹ˤʤޤ KO 饹 KO ֹΥꥹȤϰʲΥڡ򻲾ȤƤ * (()) * offset, limit ϰ٤֤äƤ̤οꤹ륪ץǡ offset ܤ limit Ĥη̤ޤƤη̤ˤ offset = offset + limit Ȥƶ֤äƤޤǷ֤ ᥽åɤƤӤޤ * fg_color_list ϥѥؤοŤǥ֥ȤȤʸ ꤹǤ * fg_color_list ϥѥؤοŤǥ֥ȤȤطʤ ꤹǤ Ϣ URL * (()) === ͤΥǡ KEGG API Υ᥽åɤʸʤñ֤ͤΤǤʤʣʥǡ ¤ä֤ͤ⤢ꡢΤΥǡƤޤ API Υ᥽åɤˤʸʤɤΡ˷̤ʤä硢ˤäưʲ ֤ͤޤ * -- Ȥ ArrayOfOBJ ʥ֥Ȥˤ֤᥽åɤξ * ʸ -- Ȥ String ʸˤ֤᥽åɤξ * -1 -- Ȥ int ʿ͡ˤ֤᥽åɤξ * NULL -- ȤȰʲ줿֥ȤΤɤ줫֤᥽åɤξ + SSDBRelation SSDB ǡ١γƥեɤбͤäǡǤ genes_id1 ꡼ genes_id (string) genes_id2 åȤ genes_id (string) sw_score genes_id1 genes_id2 ֤ Smith-Waterman (int) bit_score genes_id1 genes_id2 ֤ bit (float) identity genes_id1 genes_id2 ֤ ǥƥƥ (float) overlap genes_id1 genes_id2 ΥСåΰĹ (int) start_position1 genes_id1 Υ饤ȤγϻĴ (int) end_position1 genes_id1 Υ饤ȤνüĴ (int) start_position2 genes_id2 Υ饤ȤγϻĴ (int) end_position2 genes_id2 Υ饤ȤνüĴ (int) best_flag_1to2 genes_id1 鸫 genes_id2 ٥ȥҥåȤ (boolean) best_flag_2to1 genes_id2 鸫 genes_id1 ٥ȥҥåȤ (boolean) definition1 genes_id1 Υǥե˥ʸ (string) definition2 genes_id2 Υǥե˥ʸ (string) length1 genes_id1 ΥߥλĹ (int) length2 genes_id2 ΥߥλĹ (int) + ArrayOfSSDBRelation ʣ SSDBRelation ǡޤǤ + MotifResult motif_id եǡ١Υȥ ID (string) definition դΥǥե˥ (string) genes_id դäƤҤ genes_id (string) start_position դγϻĴ (int) end_position դνλĴ (int) score (PROSITE Profile, TIGRFAM) Υ (float) evalue (Pfam) E-value (double) score evalue Τб̵ͤΤˤĤƤ -1 ֤ޤ + ArrayOfMotifResult ʣ MotifResult ǡޤǤ + Definition entry_id ǡ١ȥ꡼ID (string) definition ȥ꡼Υǥե˥ (string) + ArrayOfDefinition ʣ Definitioin ǡޤǤ + LinkDBRelation entry_id1 ǡ١Υȥ ID (string) entry_id2 ǡ١Υȥ ID (string) type "direct" ޤ "indirect" Υ󥯤μ (string) path 󥯤ηϩ (string) + ArrayOfLinkDBRelation ʣ LinkDBRelation ǡޤǤ + PathwayElement ѥƤġȢݤʤɤΥ֥ȤɽǡǤ element_id ѥΥ֥Ȥؤˡ ID (int) type ֥Ȥμ ("gene", "enzyme" ʤ) (string) names ֥ȤˤĤ줿̾Υꥹ (ArrayOfstring) components 롼פξʤɴޤޤ륪֥ȤΥꥹ (ArrayOfint) + ArrayOfPathwayElement ʣ PathwayElement ǡޤǤ + PathwayElementRelation PathwayElement ֤δطɽǡǤ element_id1 ѥΥ֥Ȥؤˡ ID (int) element_id2 ѥΥ֥Ȥؤˡ ID (int) type طμ ("ECrel", "maplink" ʤ) (string) subtypes ط˴ؤ륪֥Ȥ (ArrayOfSubtype) + ArrayOfPathwayElementRelation ʣ PathwayElementRelation ǡޤǤ ++ Subtype PathwayElementRelation ǻȤǤǡPathwayElement ֤ طŤ륪֥ȡʲʪʤɡˤɽǡǤ element_id ѥΥ֥Ȥؤˡ ID (int) relation طμ ("compound", "inhibition" ʤ) (string) type طε ("+p", "--|" ʤ) (string) ++ ArrayOfSubtype ʣ PathwayElementRelation ǡޤǤ + StructureAlignment 桼λꤷع¤ȥǡ١ι¤Ӥ 饤ȤΥбΡɡʸǡˤΥꥹȤɽ ǡǤ target_id ¤оݤΥȥ ID (string) score ¤ӤΥ (float) query_nodes Ϲ¤ǥ饤Ȥ줿Ρֹ (ArrayOfint) target_nodes оݹ¤бΡֹ (ArrayOfint) + ArrayOfStructureAlignment ʣ StructureAlignment ǡޤǤ === ᥽åɰ ʲKEGG API ᥽åɤΥꥹȤǤ᥽åɤˤϥ᥿֤Τ ƥǡ١ФΤޤߡKEGG ˤǡ١Τ KEGG API оݤȤʤäƤΤ SSDB, PATHWAY, GENES, LIGAND Ǥ ʳΥǡ١ؤб᥽åɤɲä缡ʤͽǤ ʲǤϡʤɤ Ruby ɽäƽ񤫤Ƥޤºݤ ä˥ꥹȤϤʤɡˤϻѤˤäưۤʤǽޤ ==== ᥿ ǿΥǡ١ʤɤ֤Υ᥽åɤǤ --- list_databases KEGG 󶡤Ƥ륲ΥͥåȤǸѤǤǡ١ΰ֤ޤ ͡ ArrayOfDefinition (db, definition) Ϣ URL * (()) * (()) (section 2.2) --- list_organisms KEGG ˴ޤޤƤʪ (org) ΥꥹȤ֤ޤ ͡ ArrayOfDefinition (org, definition) Ϣ URL * (()) * (()) --- list_pathways(string:org) KEGG ˴ޤޤƤꤷʪΥѥΥꥹȤ֤ޤ 'map' ȤʸͿȥե󥹥ѥΥꥹȤ֤ޤ ͡ ArrayOfDefinition (pathway_id, definition) Ϣ URL * (()) ==== DBGET DBGET ƥФ᥽åɤΰǤDBGET ˤĤƾܤϰʲ ڡ򻲾ȤƤ Ϣ URL * (()) * (()) --- binfo(string:db) ꤷǡ١Υȥ乹ʤɾܤǿ֤ޤ 'all' ϤѲǽƤΥǡ١ξ֤ޤ binfo ޥɤؤΰʸϤޤ ͡ string 㡧 # GenBank ǡ١κǿ binfo('gb') # ƤΥǡ١κǿ binfo('all') --- bfind(string:str) DBGET bfind ޥɤؤΥåѡǤɤˤꥨȥ 뤳ȤǤޤ٤Ϳ륭ɤο 100 İʲ ¤Ƥޤ ͡ string 㡧 # ǥե˥ E-cadherin human GenBank Υȥ򸡺 bfind("gb E-cadherin human") --- bget(string:str) ꤷ entry_id Υȥ֤ޤGENES ΰҥȥϤᡢ ΥͥåȤ DBGET ƥ󶡤Ƥ͡ʥǡ١ (list_databases 򻲾) ΥȥƼǤޤbget ޥɤؤ ޥɥ饤󥪥ץʸϤޤ٤˼Ǥ륨ȥ 100 İʲ¤Ƥޤ ͡ string 㡧 # ʣΥȥ bget("eco:b0002 bsu:BG10065 cpd:C00209") # FASTA եޥåȤΥߥλ bget("-f -n a eco:b0002 bsu:BG10065") # FASTA եޥåȤα bget("-f -n n eco:b0002 hin:tRNA-Cys-1") --- btit(string:str) DBGET btit ޥɤؤΥåѡǤꤷȥ ID б ǥե˥֤ޤ٤Ϳ륨ȥο 100 İʲ ¤Ƥޤ ͡ string 㡧 # 飴ĤΰҤΥǥե˥򸡺 btit("hsa:1798 mmu:13478 dme:CG5287-PA cel:Y60A3A.14") --- bconv(string:str) ǡ١ ID KEGG ID Ѵޤ ̤ϡ䤤碌 ID Ѵ ID ΥڤʸȤ֤ޤ ߡʲγǡ١бƤޤ ǡ١ ǡ١̾ prefix ---------------- ----------------------- NCBI GI ncbi-gi: NCBI GeneID ncbi-geneid: GenBank genbank: UniGene unigene: UniProt uniprot: OMIM omim: Ȥ UniProt ID ϡǤ KEGG GENES ˵ܤƤ UniProt ؤΥ󥯤¾EBI Genome Reviews ˵ܤƤ UniProt ID ȥ ID бѤơNCBI ȶ̤Υ ID KEGG GENES бɽȤѴƤޤ ͡ string 㡧 # NCBI GI Gene ID KEGG genes_id Ѵ serv.bconv("ncbi-gi:10047086 ncbi-gi:10047090 ncbi-geneid:14751") Ϣ URL * (()) (Gene name conversion) ==== LinkDB + ǡ١֤Υ --- get_linkdb_by_entry(string:entry_id, string:db, int:offset, int:limit) ꤷ entry_id ľܤޤϴŪ˥󥯤Ƥ륨ȥηϩ db ǻꤷǡ١ˤɤޤǸޤ ͡ ArrayOfLinkDBRelation 㡧 # E. coli ΰ b0002 󥯤Τɤ KEGG/PATHWAY Υȥ򸡺 get_linkdb_by_entry('eco:b0002', 'pathway', 1, 10) get_linkdb_by_entry('eco:b0002', 'pathway', 11, 10) Ϣ URL * (()) (Single entry to database) --- get_linkdb_between_databases(string:from_db, string:to_db, int:offset, int:limit) ꤷĤΥǡ١ǡȥ֤Υ󥯤Ƹޤ ͡ ArrayOfLinkDBRelation 㡧 # IJݤ KEGG GENES KEGG PATHWAY δ֤Υ󥯤Ƹ get_linkdb_between_databases("eco", "pathway", 1, 100) # Ruby Ǽ󥯤ɽ links = get_linkdb_between_databases("eco", "pathway", 1, 100) links.each do |link| puts link.entry_id1 # => "eco:b0084" puts link.entry_id2 # => "path:map00550" puts link.type # => "indirect" puts link.path # => "eco->ec->path" end Ϣ URL * (()) (Database to database) + Ҥȹֹδط --- get_genes_by_enzyme(string:enzyme_id, string:org) оʪˤơꤷֹİҤΥꥹȤ֤ޤ ͡ ArrayOfstring (genes_id) 㡧 # ֹ 1.1.1.1 IJݤΰҤΥꥹ get_genes_by_enzyme('ec:1.1.1.1', 'eco') --- get_enzymes_by_gene(string:genes_id) ꤷҤбֹΥꥹȤ֤ޤ ͡ ArrayOfstring (enzyme_id) 㡧 # IJݰ 'eco:b0002' ιֹΥꥹ get_enzymes_by_gene(eco:b0002) + ǡʪꥢδط --- get_enzymes_by_compound(string:compound_id) ꤷʪбֹΥꥹȤ֤ޤ ͡ ArrayOfstring (compound_id) 㡧 # ʪ 'cpd:C00345' դ˴ؤǤΥꥹ get_enzymes_by_compound('cpd:C00345') --- get_enzymes_by_glycan(string:compound_id) ꤷбֹΥꥹȤ֤ޤ ͡ ArrayOfstring (glycan_id) 㡧 # 'gl:G00001' դ˴ؤǤΥꥹ get_enzymes_by_glycan('gl:G00001') --- get_enzymes_by_reaction(string:reaction_id) ꤷꥢֹбֹΥꥹȤ֤ޤ ͡ ArrayOfstring (reaction_id) 㡧 # ꥢֹ R00100 ĹǤΥꥹ get_enzymes_by_reaction('rn:R00100') --- get_compounds_by_enzyme(string:enzyme_id) ꤷֹб벽ʪΥꥹȤ֤ޤ ͡ ArrayOfstring (compound_id) 㡧 # ֹ 'ec:2.7.1.12' դ˴ؤ벽ʪΥꥹ get_compounds_by_enzyme('ec:2.7.1.12') --- get_compounds_by_reaction(string:reaction_id) ꤷꥢб벽ʪΥꥹȤ֤ޤ ͡ ArrayOfstring (compound_id) 㡧 # ꥢֹ 'rn:R00100' ȿ˴ؤ벽ʪΥꥹ get_compounds_by_reaction('rn:R00100') --- get_glycans_by_enzyme(string:enzyme_id) ꤷֹбΥꥹȤ֤ޤ ͡ ArrayOfstring (glycan_id) 㡧 # ֹ 'ec:2.4.1.141' դ˴ؤΥꥹ get_glycans_by_enzyme('ec:2.4.1.141') --- get_glycans_by_reaction(string:reaction_id) ꤷꥢбΥꥹȤ֤ޤ ͡ ArrayOfstring (glycan_id) 㡧 # ꥢֹ 'rn:R06164' ȿ˴ؤΥꥹ get_glycans_by_reaction('rn:R06164') --- get_reactions_by_enzyme(string:enzyme_id) ꤷֹбꥢΥꥹȤ֤ޤ ͡ ArrayOfstring (reaction_id) 㡧 # ֹ 'ec:2.7.1.12' ȿ˴ؤꥢֹΥꥹ get_reactions_by_enzyme('ec:2.7.1.12') --- get_reactions_by_compound(string:compound_id) ꤷʪбꥢΥꥹȤ֤ޤ ͡ ArrayOfstring (reaction_id) 㡧 # ʪ 'cpd:C00199' οȿ˴ؤꥢֹΥꥹ get_reactions_by_compound('cpd:C00199') --- get_reactions_by_glycan(string:glycan_id) ꤷбꥢΥꥹȤ֤ޤ ͡ ArrayOfstring (reaction_id) 㡧 # 'gl:G00001' οȿ˴ؤꥢֹΥꥹ get_reactions_by_glycan('gl:G00001') ==== SSDB SSDB ǡ١Ф᥽åɤΰǤSSDB KEGG/GENES ˴ޤޤ ʪҴ֤ ssearch Ѥ Smith-Waterman 르ꥺ 븡Ԥä̤ȡҤΥո̤Ͽǡ١ǡ ֤Τ׻餫ὪäƤ뤿®ʸǽˤʤäƤޤ KEGG Υηޤäʪ濴оݤȤƤ뤳ȤȡSmith- Waterman ˤӤǤ뤳Ȥ饪ѥطˤ ҤõʪͭΰҤθϤ͡ʱѤͤޤ SSDB ǡ١ˤĤƾܤϰʲΥڡ򻲾ȤƤ * (()) --- get_best_best_neighbors_by_gene(string:genes_id, int:offset, int:limit) ȥåȤ best-best δطˤҤ򸡺ޤ ͡ ArrayOfSSDBRelation 㡧 # IJݤΰ b0002 ʪ best-best δطˤ get_best_best_neighbors_by_gene('eco:b0002', 1, 10) get_best_best_neighbors_by_gene('eco:b0002', 11, 10) --- get_best_neighbors_by_gene(string:genes_id, int:offset, int:limit) ꤫鸫ƥ٥ȥҥåȤδطˤҤ򸡺ޤ ͡ ArrayOfSSDBRelation 㡧 # IJݤΰ b0002 ʪ best neighbor δطˤ get_best_neighbors_by_gene('eco:b0002', 1, 10) get_best_neighbors_by_gene('eco:b0002', 11, 10) --- get_reverse_best_neighbors_by_gene(string:genes_id, int:offset, int:limit) å¦ʪ狼鸫ƥ꤬٥ȥҥåȤȤʤҤ򸡺ޤ ͡ ArrayOfSSDBRelation 㡧 # IJݤΰ b0002 reverse best neighbor δطˤ get_reverse_best_neighbors_by_gene('eco:b0002', 1, 10) get_reverse_best_neighbors_by_gene('eco:b0002', 11, 10) --- get_paralogs_by_gene(string:genes_id, int:offset, int:limit) ƱʪǥѥҤ򸡺ޤ ͡ ArrayOfSSDBRelation 㡧 # IJݤΰ b0002 ȥѥδطˤ get_paralogs_by_gene('eco:b0002', 1, 10) get_paralogs_by_gene('eco:b0002', 11, 10) ==== Motif --- get_motifs_by_gene(string:genes_id, string:db) ꤷҤ¸ߤդΥꥹȤ֤ޤեǡ١ ΥꥹȤˤϡPfam (pfam), TIGRFAM (tfam), PROSITE pattern (pspt), PROSITE profile (pspf) ޤϤ (all) ޤ ͡ ArrayOfMotifResult 㡧 # IJݤΰ b0002 PfamդΥꥹ get_motifs_by_gene('eco:b0002', 'pfam') --- get_genes_by_motifs([string]:motif_id_list, int:offset, int:limit) ꤷդƻİҤ򸡺ޤ ͡ ArrayOfDefinition (genes_id, definition) 㡧 # Pfam DnaJ Prosite DNAJ_2 ˥ҥåȤҤ򸡺 list = ['pf:DnaJ', 'ps:DNAJ_2'] get_genes_by_motifs(list, 1, 10) get_genes_by_motifs(list, 11, 10) ==== KO KO (KEGG orthology), OC (KEGG ortholog cluster), PC (KEGG paralog cluster) 뤿Υ᥽åɤǤKO ϥ졼󤵤줿ҷ OC PC ϵŪ˥饹󥰤줿ƱΤҷΥǡ١Ǥ --- get_ko_by_gene(string:genes_id) ꤷҤ˥󤵤Ƥ KO Υȥֹ֤ޤ ͡ ArrayOfstring (ko_id) 㡧 # eco:b0002 Ҥ˥󤵤Ƥ KO Υꥹ get_ko_by_gene('eco:b0002') --- get_ko_by_ko_class(string:ko_class_id) ꤷ ko_class_id ˴ޤޤ ko_id ΥꥹȤ֤ޤ ͡ ArrayOfDefinition (ko_id) 㡧 # KO class '01196' ˴ޤޤ KO Υꥹ get_ko_by_ko_class('01196') --- get_genes_by_ko_class(string:ko_class_id, string:org, int:offset, int:limit) ꤷʪ ko_class_id ˴ޤޤҤΥꥹȤ֤ޤ ͡ ArrayOfDefinition (genes_id, definition) 㡧 # KO 饹 '00930' ˴ޤޤҥȰҤΥꥹ get_genes_by_ko_class('00903', 'hsa' , 1, 100) --- get_genes_by_ko(string:ko_id, string:org) ꤷʪ ko_id ˴ޤޤҤΥꥹȤ֤ޤ ʪ拾ɤ all ꤹʪΰҤ֤ޤ ͡ ArrayOfDefinition (genes_id, definition) 㡧 # KO ֹ 'K00001' ˴ޤޤIJݰҤΥꥹ get_genes_by_ko('ko:K00010', 'eco') # KO ֹ 'K00010' ˴ޤޤʪΰҥꥹ get_genes_by_ko('ko:K00010', 'all') ==== PATHWAY PATHWAY ǡ١Ф᥽åɤΰǤPATHWAY ǡ١ ĤƾܤϰʲΥڡ򻲾ȤƤ * (()) + ѥؤοŤ Ϣ URL * (()) --- mark_pathway_by_objects(string:pathway_id, [string]:object_id_list) ꤷʪǡͿ줿ѥޥåפͿ줿֥ ʰҡʪֹˤбȤ˿Ĥ URL ֤ޤ ͡ string (URL) 㡧 # IJݤΥѥ path:eco00260 ΰ eco:b0002 Homoserine # cpd:C00263 бܥå֤忧 URL obj_list = ['eco:b0002', 'cpd:C00263'] mark_pathway_by_objects('path:eco00260', obj_list) --- color_pathway_by_objects(string:pathway_id, [string]:object_id_list, [string]:fg_color_list, [string]:bg_color_list) ꤷѥͿ줿֥ȡʰҡʪǡˤФ ʸȤ fg_color_list ǻꤷطʤ bg_color_list ǻꤷ Ĥ URL ֤ޤobject_id_list fg_color_list, bg_color_list ǤοȽ֤·褦դɬפޤ ͡ string (URL) 㡧 # ѥ path:eco00053 ˺ܤäƤIJݤΰ eco:b0207 # طʤ֡ʸȤĤ忧eco:b1300 طʤ򲫿ʸȤФ # 忧 URL ֤ޤ obj_list = ['eco:b0207', 'eco:b1300'] fg_list = ['blue', '#00ff00'] bg_list = ['#ff0000', 'yellow'] color_pathway_by_objects('path:eco00053', obj_list, fg_list, bg_list) --- color_pathway_by_elements(string:pathway_id, [int]:element_id_list, [string]:fg_color_list, [string]:bg_color_list) ꤷ element_id бѥΥ֥ȡĹݤʤɡ ФʸȤ fg_color_list ǻꤷطʤ bg_color_list ꤷĤ URL ֤ޤobject_id_list fg_color_list, bg_color_list ǤοȽ֤·褦դ ɬפޤ KEGG PATHWAY ǤϡʣΰҤĤΥܥå˳ƤƤꡢ դˣĤΰҤʣΥܥå˳ƤƤ뤳Ȥޤ Τ褦ʾ color_pathway_by_objects ᥽åɤǤɤʬ뤳Ȥ ޤ󤬡color_pathway_by_elements ȤȤDzǤޤ element_id KEGG PATHWAY XML ɽǤ KGML ꤵ롢ѥΥեˤĤ줿ˡ ͤǤѥ element_id get_elements_by_pathway ᥽åɤ PathwayElement ֥ȤΥꥹȤȤ뤳ȤǤޤ PathwayElement type name °ˤҡǡʪʤɤ бꡢelement_id ͤȤäƥ֥Ȥꤷޤ KGML ˤĤƾܤϲΥڡ򻲾ȤƤ * (()) ͡ string (URL) 㡧 # ݤΥѥ path:bsu00010 ˺ܤäƤ롢 # bsu:BG11350 (element_id 78, ec:3.2.1.86) /طʤ # bsu:BG11203 (element_id 79, ec:3.2.1.86) /طʤ # bsu:BG11685 (element_id 51, ec:2.7.1.2) /طʤ # bsu:BG11685 (element_id 47, ec:2.7.1.2) /طʤ # 줾忧 URL ֤ޤ element_id_list = [ 78, 79, 51, 47 ] fg_list = [ '#ff0000', '#0000ff', '#ff0000', '#0000ff' ] bg_list = [ '#ffff00', '#ffff00', '#ffcc00', '#ffcc00' ] color_pathway_by_elements('path:bsu00010', element_id_list, fg_list, bg_list) --- get_html_of_marked_pathway_by_objects(string:pathway_id, [string]:object_id_list) ˥å֥ޥåפޤ HTML ڡ URL ֤ С 'mark_pathway_by_objects' ᥽åɤǤ ͡ string (URL) 㡧 # IJݤΥѥ '00970' Ρ 'eco:b4258'ʪ 'cpd:C00135' # KO ֹ 'ko:K01881' ֿǥޡĤΥå֥ޥåפ # ɽ HTML URL ֤ obj_list = ['eco:b4258', 'cpd:C00135', 'ko:K01881'] get_html_of_marked_pathway_by_objects('path:eco00970', obj_list) --- get_html_of_colored_pathway_by_objects(string:pathway_id, [string]:object_id_list, [string]:fg_color_list, [string]:bg_color_list) ˥å֥ޥåפޤ HTML ڡ URL ֤ С 'color_pathway_by_objects' ᥽åɤǤ ͡ string (URL) 㡧 # IJݤΥѥ '00970' Ρ 'eco:b4258' Ϥ˳ʪ # 'cpd:C00135' 򲫿ϤСKO ֹ 'ko:K01881' ϤĤοŤ򤷤 # Υå֥ޥåפɽ HTML URL ֤ obj_list = ['eco:b4258', 'cpd:C00135', 'ko:K01881'] fg_list = ['gray', '#00ff00', 'blue'] bg_list = ['#ff0000', 'yellow', 'orange'] get_html_of_colored_pathway_by_objects('path:eco00970', obj_list, fg_list, bg_list) --- get_html_of_colored_pathway_by_elements(string:pathway_id, [int]:element_id_list, [string]:fg_color_list, [string]:bg_color_list) ˥å֥ޥåפޤ HTML ڡ URL ֤ С 'color_pathway_by_elements' ᥽åɤǤ ͡ string (URL) 㡧 # ݤΥѥ path:bsu00010 ˺ܤäƤ롢 # bsu:BG11350 (element_id 78, ec:3.2.1.86) /طʤ # bsu:BG11203 (element_id 79, ec:3.2.1.86) /طʤ # bsu:BG11685 (element_id 51, ec:2.7.1.2) /طʤ # bsu:BG11685 (element_id 47, ec:2.7.1.2) /طʤ # 忧Υå֥ޥåפɽ HTML URL ֤ޤ element_id_list = [ 78, 79, 51, 47 ] fg_list = [ '#ff0000', '#0000ff', '#ff0000', '#0000ff' ] bg_list = [ '#ffff00', '#ffff00', '#ffcc00', '#ffcc00' ] color_pathway_by_elements('path:bsu00010', element_id_list, fg_list, bg_list) + ѥΥ֥ȴ֤δط --- get_element_relations_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤ륪֥Ǥδ֤δط֤ޤ ѥˤƤطɽKGML ˤ ʥաˤ뤳ȤǤޤ get_elements_by_pathway ᥽åɤ⻲ȡ ͡ ArrayOfPathwayElementRelation 㡧 # ݤΥѥ path:bsu00010 ˺ܤäƤ PathwayElement ֤ # طǤ PathwayElementRelation ΥꥹȤ롣 relations = get_element_relations_by_pathway('path:bsu00010') # ꥹȤȤɽ롣 relations.each do |rel| puts rel.element_id1 puts rel.element_id2 puts rel.type rel.subtypes.each do |sub| puts sub.element_id puts sub.relation puts sub.type end end + ѥΥ֥ȸ --- get_elements_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤ륪֥ǤΥꥹȤ֤ޤ ˡˤĤƤ color_pathway_by_elements ᥽åɤ򻲾ȤƤ ͡ ArrayOfPathwayElement 㡧 # ݤΥѥ path:bsu00010 ˺ܤäƤ PathwayElement # ꥹȤ롣 get_elements_by_pathway('path:bsu00010') # Ruby ǰ bsu:BG11350, bsu:BG11203 bsu:BG11685 element_id # Ĵ٤㡣 elems = serv.get_elements_by_pathway('path:bsu00010') genes = [ 'bsu:BG11350', 'bsu:BG11203', 'bsu:BG11685' ] elems.each do |elem| genes.each do |gene| if elem.names.include?(gene) puts gene, elem.element_id end end end --- get_genes_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤҤΥꥹȤ֤ޤʪ̾ pathway_id ˴ޤޤ keggorg ǻꤷޤ ͡ ArrayOfstring (genes_id) 㡧 # IJݤΥѥ 00020 ֤˺ܤäƤҤΥꥹ get_genes_by_pathway('path:eco00020') --- get_enzymes_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤֹΥꥹȤ֤ޤ ͡ ArrayOfstring (enzyme_id) 㡧 # IJݤΥѥ 00020 ֤˺ܤäƤֹΥꥹ get_enzymes_by_pathway('path:eco00020') --- get_compounds_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤ벽ʪΥꥹȤ֤ޤ ͡ ArrayOfstring (compound_id) 㡧 # IJݤΥѥ 00020 ˺ܤäƤ벽ʪΥꥹ get_compounds_by_pathway('path:eco00020') --- get_glycans_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤΥꥹȤ֤ޤ ͡ ArrayOfstring (glycan_id) 㡧 # IJݤΥѥ 00510 ˺ܤäƤΥꥹ get_glycans_by_pathway('path:eco00510') --- get_reactions_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤꥢֹΥꥹȤ֤ޤ ͡ ArrayOfstring (reaction_id) 㡧 # IJݤΥѥ 00260 ֤˺ܤäƤꥢΥꥹ get_reactions_by_pathway('path:eco00260') --- get_kos_by_pathway(string:pathway_id) ꤷѥ˺ܤäƤ KO ֹΥꥹȤ֤ޤ ͡ ArrayOfstring (ko_id) 㡧 # ҥȤΥѥ 00010 ˺ܤäƤ KO ֹΥꥹ get_kos_by_pathway('path:hsa00010') + ֥Ȥѥ Ϣ URL * (()) --- get_pathways_by_genes([string]:genes_id_list) ꤷҤƺܤäƤѥΥꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # IJݤΰ b0077 b0078 ξܤäƤѥΥꥹ get_pathways_by_genes(['eco:b0077', 'eco:b0078']) --- get_pathways_by_enzymes([string]:enzyme_id_list) ꤷֹ椬ƺܤäƤѥΥꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # ֹ 1.3.99.1 ιǤܤäƤѥΥꥹ get_pathways_by_enzymes(['ec:1.3.99.1']) --- get_pathways_by_compounds([string]:compound_id_list) ꤷʪƺܤäƤѥΥꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # ʪ C00033 C00158 ξܤäƤѥΥꥹ get_pathways_by_compounds(['cpd:C00033', 'cpd:C00158']) --- get_pathways_by_glycans([string]:compound_id_list) ꤷƺܤäƤѥΥꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # G00009 G00011 ξܤäƤѥΥꥹ get_pathways_by_glycans(['gl:G00009', 'gl:G00011']) --- get_pathways_by_reactions([string]:reaction_id_list) ꤷꥢֹ椬ƺܤäƤѥΥꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # ꥢֹ rn:R00959, rn:R02740, rn:R00960, rn:R01786 Ƥ # ȿޤѥΥꥹ get_pathways_by_reactions(['rn:R00959', 'rn:R02740', 'rn:R00960', 'rn:R01786']) --- get_pathways_by_kos([string]:ko_id_list, string:org) ꤷʪ KO ֹ椬ƺܤäƤѥΥꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # KO ֹ 'ko:K00016' 'ko:K00382' ޤҥȤΥѥΥꥹ get_pathways_by_kos(['ko:K00016', 'ko:K00382'], 'hsa') # KO ֹ 'ko:K00016' 'ko:K00382' ޤʪΥѥΥꥹ get_pathways_by_kos(['ko:K00016', 'ko:K00382'], 'all') + ѥ֤δط --- get_linked_pathways(string:pathway_id) ꤷѥֹΥѥ󥯤Ƥѥ ꥹȤ֤ޤ ͡ ArrayOfstring (pathway_id) 㡧 # ѥ path:eco00620 󥯤ƤѥΥꥹ get_linked_pathways('path:eco00620') ==== GENES GENES ǡ١Ф᥽åɤΰǤGENES ǡ١ˤĤ ܤϰʲΥڡ򻲾ȤƤ * (()) --- get_genes_by_organism(string:org, int:offset, int:limit) ꤷʪ GENES ȥΤoffset ܤ limit ʬ ̤֤ޤ ͡ ArrayOfstring (genes_id) 㡧 # ե륨󥶶ݤΰҥꥹȤ 100 Ĥ get_genes_by_organism('hin', 1, 100) get_genes_by_organism('hin', 101, 100) ==== GENOME GENOME ǡ١Ф᥽åɤΰǤGENOME ǡ١ˤĤ ܤϰʲΥڡ򻲾ȤƤ * (()) --- get_number_of_genes_by_organism(string:org) ꤷʪ郎İҿ֤ޤ ͡ int 㡧 # IJݤİҤο get_number_of_genes_by_organism('eco') ==== LIGAND LIGAND ǡ١Ф᥽åɤΰǤ Ϣ URL * (()) --- convert_mol_to_kcf(string:mol) MOL եޥåȤΥȥ KCF եޥåȤѴޤ ͡ string 㡧 convert_mol_to_kcf(mol_str) --- search_compounds_by_name(string:name) ʪ̾Ǹޤ ͡ ArrayOfstring (compound_id) 㡧 search_compounds_by_name("shikimic acid") --- search_drugs_by_name(string:name) ɥå̾Ǹޤ ͡ ArrayOfstring (drug_id) 㡧 search_drugs_by_name("tetracyclin") --- search_glycans_by_name(string:name) ̾Ǹޤ ͡ ArrayOfstring 㡧 search_glycans_by_name("Paragloboside") --- search_compounds_by_composition(string:composition) ʪǸޤ ϸǤȸĿĤʤʸǻꤷޤ Ǥν̵֤طǤ ͡ ArrayOfstring (compound_id) 㡧 search_compounds_by_composition("C7H10O5") --- search_drugs_by_composition(string:composition) ɥåǸޤ ϸǤȸĿĤʤʸǻꤷޤ Ǥν̵֤طǤ ͡ ArrayOfstring (drug_id) 㡧 search_drugs_by_composition("HCl") --- search_glycans_by_composition(string:composition) Ǹޤ ϥådzäȸĿ򥹥ڡǶڤäʸǻꤷޤ ν֤ϼͳǤ ͡ ArrayOfstring 㡧 search_glycans_by_composition("(Man)4 (GalNAc)1") --- search_compounds_by_mass(float:mass, float:range) ʪʬ̤Ǹޤ mass 濴Ȥ range νŤβʪޤ ͡ ArrayOfstring (compound_id) 㡧 search_compounds_by_mass(174.05, 0.1) --- search_drugs_by_mass(float:mass, float:range) ɥåʬ̤Ǹޤ mass 濴Ȥ range νŤΥɥåޤ ͡ ArrayOfstring (drug_id) 㡧 search_drugs_by_mass(150, 1.0) --- search_glycans_by_mass(float:mass, float:range) ʬ̤Ǹޤ mass 濴Ȥ range νŤޤ ͡ ArrayOfstring 㡧 search_glycans_by_mass(1200, 0.5) --- search_compounds_by_subcomp(string:mol, int:offset, int:limit) ʬ¤IJʪ subcomp ץȤäƸޤ 饤Ȥ줿ʬΥΡֹ椬֤Τǡ 饤Ȥ줿ʪι¤ bget ޥɤ "-f m" ץĤ MOL եޥåȤǼбǧޤ ͡ ArrayOfStructureAlignment 㡧 mol = bget("-f m cpd:C00111") search_compounds_by_subcomp(mol, 1, 5) Ϣ URL * (()) --- search_drugs_by_subcomp(string:mol, int:offset, int:limit) ʬ¤ĥɥå subcomp ץȤäƸޤ 饤Ȥ줿ʬΥΡֹ椬֤Τǡ 饤Ȥ줿ɥåι¤ bget ޥɤ "-f m" ץĤ MOL եޥåȤǼбǧޤ ͡ ArrayOfStructureAlignment 㡧 mol = bget("-f m dr:D00201") search_drugs_by_subcomp(mol, 1, 5) Ϣ URL * (()) --- search_glycans_by_kcam(string:kcf, string:program, string:option, int:offset, int:limit) ʬ¤ KCaM ץȤäƸޤ program ˤ approximate ޥåԤ "gapped" ޤ exact ޥåԤ "ungapped" ꤷޤޤ option ˤ "global" ޤ "local" ꤷޤ 饤Ȥ줿ʬΥΡֹ椬֤Τǡ 饤Ȥ줿ι¤ bget ޥɤ "-f k" ץĤ KCF եޥåȤǼбǧޤ ͡ ArrayOfStructureAlignment 㡧 kcf = bget("-f k gl:G12922") search_glycans_by_kcam(kcf, "gapped", "local", 1, 5) Ϣ URL * (()) * (()) == Notes Last updated: December 27, 2006 =end bio-1.4.3.0001/doc/Tutorial.rd.ja0000644000004100000410000025104212200110570016157 0ustar www-datawww-data=begin # $Id:$ Copyright (C) 2001-2003, 2005, 2006 Toshiaki Katayama Copyright (C) 2005, 2006 Naohisa Goto = BioRuby λȤ BioRuby Ϲ񻺤ιⵡǽ֥Ȼظץȸ Ruby Τ ץ󥽡ʥХեޥƥѥ饤֥Ǥ Ruby Perl 椺ζϤʥƥȽȡ ץʬ䤹ʸˡꥢʥ֥Ȼظǽˤꡢ Ȥ褦ˤʤޤRuby ˤĤƾܤϡ֥ (()) Τν򻲾ȤƤ == Ϥ BioRuby Ѥˤ Ruby BioRuby 򥤥󥹥ȡ뤹ɬפޤ === Ruby Υ󥹥ȡ Ruby Mac OS X Ƕ UNIX ˤ̾磻󥹥ȡ뤵Ƥޤ Windows ξ⣱å󥹥ȡ ActiveScriptRuby ʤɤ ѰդƤޤޤ󥹥ȡ뤵Ƥʤ * (()) * (()) ʤɤ򻲹ͤˤƥ󥹥ȡ뤷ޤ礦 ʤΥԥ塼ˤɤΥС Ruby 󥹥ȡ뤵Ƥ뤫 åˤ % ruby -v ȥޥɤϤƤȡȤ ruby 1.8.2 (2004-12-25) [powerpc-darwin7.7.0] Τ褦ʴǥСɽޤС 1.8.5 ʹߤ򤪴ᤷޤ Ruby ɸΥ饹᥽åɤˤĤƤϡRuby Υե󥹥ޥ˥奢 ȤƤ * (()) * (()) ޥɥ饤ǥإפ򻲾ȤˤϡRuby ɸźդ ri ޥɤ䡢 ܸǤ refe ޥɤǤ * (()) === RubyGems Υ󥹥ȡ RubyGems ΥڡǿǤɤޤ * (()) Ÿƥ󥹥ȡ뤷ޤ % tar zxvf rubygems-x.x.x.tar.gz % cd rubygems-x.x.x % ruby setup.rb === BioRuby Υ󥹥ȡ BioRuby Υ󥹥ȡˡ (()) ǿǤưʲΤ褦˹Ԥޤ(1)ƱƤ README եˤ ܤ̤ĺΤǤʤȣˤʤ BioPerl ٤ BioRuby Υ󥹥ȡϤ˽ϤǤ % wget http://bioruby.org/archive/bioruby-x.x.x.tar.gz % tar zxvf bioruby-x.x.x.tar.gz % cd bioruby-x.x.x % su # ruby setup.rb RubyGems ȤĶǤ % gem install bio ǥ󥹥ȡǤޤΤ README ե˽񤫤Ƥ褦 bioruby-x.x.x/etc/bioinformatics/seqdatabase.ini Ȥեۡǥ쥯ȥ ~/.bioinformatics ˥ԡ Ȥ褤Ǥ礦RubyGems ξ /usr/local/lib/ruby/gems/1.8/gems/bio-x.x.x/ ʤɤˤϤǤ % mkdir ~/.bioinformatics % cp bioruby-x.x.x/etc/bioinformatics/seqdatabase.ini ~/.bioinformatics ޤEmacs ǥȤͤ Ruby ΥƱƤ misc/ruby-mode.el 򥤥󥹥ȡ뤷ƤȤ褤Ǥ礦 % mkdir -p ~/lib/lisp/ruby % cp ruby-x.x.x/misc/ruby-mode.el ~/lib/lisp/ruby ʤɤȤƤơ~/.emacs ˰ʲ­ޤ ; subdirs (let ((default-directory "~/lib/lisp")) (normal-top-level-add-subdirs-to-load-path) ; ruby-mode (autoload 'ruby-mode "ruby-mode" "Mode for editing ruby source files") (add-to-list 'auto-mode-alist '("\\.rb$" . rd-mode)) (add-to-list 'interpeter-mode-alist '("ruby" . ruby-mode)) == BioRuby BioRuby С 0.7 ʹߤǤϡñ BioRuby ȶ˥󥹥ȡ뤵 bioruby ޥɤǹԤȤǤޤbioruby ޥɤ Ruby ¢Ƥ 󥿥饯ƥ֥ irb ѤƤꡢRuby BioRuby ˤǤ뤳Ȥ ͳ˼¹Ԥ뤳ȤǤޤ % bioruby project1 ǻꤷ̾Υǥ쥯ȥ꤬졢DzϤԤޤ 嵭ξ project1 Ȥǥ쥯ȥ꤬졢˰ʲ ֥ǥ쥯ȥե뤬ޤ data/ 桼βϥե֤ plugin/ ɬפ˱ɲäΥץ饰֤ session/ 䥪֥ȡҥȥʤɤ¸ session/config 桼¸ե session/history 桼ϤޥɤΥҥȥ¸ե session/object ³줿֥ȤγǼե Τdata ǥ쥯ȥϥ桼ͳ˽񤭴ƹޤ ޤsession/history ե򸫤ȡĤɤΤ褦Ԥä ǧ뤳ȤǤޤ ܰʹߤϡƱͤ % bioruby project1 ȤƵưƤ⹽ޤ󤷡줿ǥ쥯ȥ˰ư % cd project1 % bioruby Τ褦˰ʤǵư뤳ȤǤޤ ¾script ޥɤǺ륹ץȥե䡢 web ޥɤǺ Rails Τեʤɤޤ ˤĤƤɬפ˱ƸҤޤ BioRuby ǤϥǥեȤǤĤʥ饤֥ɤ߹Ǥޤ 㤨 readline 饤֥꤬ȤĶǤ Tab ǥ᥽å̾ѿ̾ 䴰ϤǤopen-uri, pp, yaml ʤɤǽ餫ɤ߹ޤƤޤ === , ߥλ --- getseq(str) getseq ޥ(2)Ȥäʸ󤫤䥢ߥλ뤳Ȥ Ǥޤȥߥλ ATGC δ̤ 90% ʾ夫ɤǼưȽꤵޤ ǤϡǤ dna Ȥѿޤ bioruby> dna = getseq("atgcatgcaaaa") ѿȤǧˤ Ruby puts ᥽åɤȤޤ bioruby> puts dna atgcatgcaaaa ե̾Ϳȼ긵ˤե뤫뤳ȤǤޤ GenBank, EMBL, UniProt, FASTA ʤɼפեޥåȤϼưȽ̤ޤ ʳĥҤʤɤΥե̾ǤϤʤȥȤȽꤷޤˡ ʲ UniProt եޥåȤΥȥե뤫ɤ߹Ǥޤ ˡǤϡʣΥȥ꤬ǽΥȥɤ߹ޤޤ bioruby> cdc2 = getseq("p04551.sp") bioruby> puts cdc2 MENYQKVEKIGEGTYGVVYKARHKLSGRIVAMKKIRLEDESEGVPSTAIREISLLKEVNDENNRSN...(ά) ǡ١̾ȥȥ̾ʬäƤС󥿡ͥåȤ̤ ưŪ˼뤳ȤǤޤ bioruby> psaB = getseq("genbank:AB044425") bioruby> puts psaB actgaccctgttcatattcgtcctattgctcacgcgatttgggatccgcactttggccaaccagca...(ά) ɤΥǡ١ɤΤ褦ˡǥȥ뤫ϡBioPerl ʤɤȶ̤ OBDA ե ~/.bioinformatics/seqdatabase.ini Ѥƥǡ١Ȥ˻ꤹ뤳ȤǤޤʸҡˡ ޤEMBOSS seqret ޥɤˤˤбƤޤΤǡ EMBOSS USA ɽǤ⥨ȥǤޤEMBOSS Υޥ˥奢򻲾Ȥ ~/.embossrc ŬڤꤷƤ ɤˡǼ⡢getseq ޥɤˤä֤ϡ Ѥ󥯥饹 Bio::Sequence ˤʤޤ(3) 󤬱ȥߥλΤɤȽꤵƤΤϡ moltype ᥽åɤѤ bioruby> p cdc2.moltype Bio::Sequence::AA bioruby> p psaB.moltype Bio::Sequence::NA Τ褦Ĵ٤뤳ȤǤޤưȽְ꤬äƤʤɤˤ na, aa ᥽åɤǶŪѴǤޤʤΥ᥽åɤ Υ֥ȤŪ˽񤭴ޤ bioruby> dna.aa bioruby> p dna.moltype Bio::Sequence::AA bioruby> dna.na bioruby> p dna.moltype Bio::Sequence::NA ޤϡto_naseq, to_aaseq ᥽åɤǶŪѴ뤳ȤǤޤ bioruby> pep = dna.to_aaseq to_naseq, to_aaseq ᥽åɤ֤֥Ȥϡ줾졢 DNA Τ Bio::Sequence::NA 饹ߥλΤ Bio::Sequence::AA 饹Υ֥Ȥˤʤޤ 󤬤ɤΥ饹°뤫 Ruby class ᥽åɤѤ bioruby> p pep.class Bio::Sequence::AA Τ褦Ĵ٤뤳ȤǤޤ ŪѴˡBio::Sequence::NA 饹ޤ Bio::sequence::AA 饹 Τɤ餫Υ֥Ȥˤ seq ᥽åɤȤޤ(4) bioruby> pep2 = cdc2.seq bioruby> p pep2.class Bio::Sequence::AA ޤʲDz⤹ complement translate ʤɤΥ᥽åɤη̤ϡ ֤ȤԤ᥽åɤ Bio::Sequence::NA 饹 ߥλ֤ȤԤ᥽åɤ Bio::sequence::AA 饹 Υ֥Ȥˤʤޤ 䥢ߥλΥ饹 Ruby ʸ󥯥饹Ǥ String ѾƤޤޤBio::Sequence 饹Υ֥Ȥ String ֥ȤȸƱͤƯ褦˹פƤޤΤᡢ length ĹĴ٤ꡢ+ ­碌ꡢ* Ƿ֤ʤɡ Ruby ʸФƹԤѲǽǤ Τ褦ħϥ֥ȻظζϤ¦̤ΰĤȸǤ礦 bioruby> puts dna.length 12 bioruby> puts dna + dna atgcatgcaaaaatgcatgcaaaa bioruby> puts dna * 5 atgcatgcaaaaatgcatgcaaaaatgcatgcaaaaatgcatgcaaaaatgcatgcaaaa :complement 亿ˤϱ complement ᥽åɤƤӤޤ bioruby> puts dna.complement ttttgcatgcat :translate 򥢥ߥλˤ translate ᥽åɤȤޤ 줿ߥλ pep ȤѿƤߤޤ bioruby> pep = dna.translate bioruby> puts pep MHAK ե졼Ѥˤ bioruby> puts dna.translate(2) CMQ bioruby> puts dna.translate(3) ACK ʤɤȤޤ :molecular_weight ʬ̤ molecular_weight ᥽åɤɽޤ bioruby> puts dna.molecular_weight 3718.66444 bioruby> puts pep.molecular_weight 485.605 --- seqstat(seq) seqstat ޥɤȤȡʤɤξ٤ɽޤ bioruby> seqstat(dna) * * * Sequence statistics * * * 5'->3' sequence : atgcatgcaaaa 3'->5' sequence : ttttgcatgcat Translation 1 : MHAK Translation 2 : CMQ Translation 3 : ACK Translation -1 : FCMH Translation -2 : FAC Translation -3 : LHA Length : 12 bp GC percent : 33 % Composition : a - 6 ( 50.00 %) c - 2 ( 16.67 %) g - 2 ( 16.67 %) t - 2 ( 16.67 %) Codon usage : *---------------------------------------------* | | 2nd | | | 1st |-------------------------------| 3rd | | | U | C | A | G | | |-------+-------+-------+-------+-------+-----| | U U |F 0.0%|S 0.0%|Y 0.0%|C 0.0%| u | | U U |F 0.0%|S 0.0%|Y 0.0%|C 0.0%| c | | U U |L 0.0%|S 0.0%|* 0.0%|* 0.0%| a | | UUU |L 0.0%|S 0.0%|* 0.0%|W 0.0%| g | |-------+-------+-------+-------+-------+-----| | CCCC |L 0.0%|P 0.0%|H 25.0%|R 0.0%| u | | C |L 0.0%|P 0.0%|H 0.0%|R 0.0%| c | | C |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| a | | CCCC |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| g | |-------+-------+-------+-------+-------+-----| | A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| u | | A A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| c | | AAAAA |I 0.0%|T 0.0%|K 25.0%|R 0.0%| a | | A A |M 25.0%|T 0.0%|K 0.0%|R 0.0%| g | |-------+-------+-------+-------+-------+-----| | GGGG |V 0.0%|A 0.0%|D 0.0%|G 0.0%| u | | G |V 0.0%|A 0.0%|D 0.0%|G 0.0%| c | | G GGG |V 0.0%|A 25.0%|E 0.0%|G 0.0%| a | | GG G |V 0.0%|A 0.0%|E 0.0%|G 0.0%| g | *---------------------------------------------* Molecular weight : 3718.66444 Protein weight : 485.605 // ߥλξϰʲΤ褦ˤʤޤ bioruby> seqstat(pep) * * * Sequence statistics * * * N->C sequence : MHAK Length : 4 aa Composition : A Ala - 1 ( 25.00 %) alanine H His - 1 ( 25.00 %) histidine K Lys - 1 ( 25.00 %) lysine M Met - 1 ( 25.00 %) methionine Protein weight : 485.605 // :composition seqstat ɽƤ composition ᥽åɤ뤳ȤǤޤ ̤ʸǤϤʤ Hash ֤ΤǡȤꤢɽƤߤˤ puts p ޥɤȤɤǤ礦 bioruby> p dna.composition {"a"=>6, "c"=>2, "g"=>2, "t"=>2} ==== 󡢥ߥλΤ¾Υ᥽å ¾ˤ󡢥ߥλФƹԤϿȤޤ :subseq(from, to) ʬФˤ subseq ᥽åɤȤޤ bioruby> puts dna.subseq(1, 3) atg Ruby ʤ¿Υץߥ󥰸ʸ 1 ʸܤ 0 ޤ subseq ᥽åɤ 1 ڤФ褦ˤʤäƤޤ bioruby> puts dna[0, 3] atg Ruby String 饹 slice ᥽å str[] ŬȤʬ 褤Ǥ礦 :window_search(len, step) window_search ᥽åɤȤĹʬη֤ ñ˹ԤȤǤޤDNA 򥳥ɥ˽硢 ʸĤ餷ʤ飳ʸڤФФ褤ΤǰʲΤ褦ˤʤޤ bioruby> dna.window_search(3, 3) do |codon| bioruby+ puts "#{codon}\t#{codon.translate}" bioruby+ end atg M cat H gca A aaa K Υü 1000bp 򥪡Сåפʤ 11000bp Ȥ ֥ڤˤ FASTA եޥåȤϰʲΤ褦ˤʤޤ bioruby> seq.window_search(11000, 10000) do |subseq| bioruby+ puts subseq.to_fasta bioruby+ end Ǹ 10000bp ʤ 3' ü;֤ͤȤΤǡ ɬפʾӼäɽޤ bioruby> i = 1 bioruby> remainder = seq.window_search(11000, 10000) do |subseq| bioruby+ puts subseq.to_fasta("segment #{i*10000}", 60) bioruby+ i += 1 bioruby+ end bioruby> puts remainder.to_fasta("segment #{i*10000}", 60) :splicing(position) GenBank position ʸˤڤФ splicing ᥽åɤǹԤޤ bioruby> puts dna atgcatgcaaaa bioruby> puts dna.splicing("join(1..3,7..9)") atggca :randomize randomize ᥽åɤϡ¸ޤޥޤ bioruby> puts dna.randomize agcaatagatac :to_re to_re ᥽åɤϡۣʱɽޤ atgc ѥ󤫤ʤɽѴޤ bioruby> ambiguous = getseq("atgcyatgcatgcatgc") bioruby> p ambiguous.to_re /atgc[tc]atgcatgcatgc/ bioruby> puts ambiguous.to_re (?-mix:atgc[tc]atgcatgcatgc) seq ᥽åɤ ATGC δ̤ͭ 90% ʲȥߥλȤߤʤΤǡ ۣʱ¿ޤޤξ to_naseq ᥽åɤȤä Ū Bio::Sequence::NA ֥ȤѴɬפޤ bioruby> s = getseq("atgcrywskmbvhdn").to_naseq bioruby> p s.to_re /atgc[ag][tc][at][gc][tg][ac][tgc][agc][atc][atg][atgc]/ bioruby> puts s.to_re (?-mix:atgc[ag][tc][at][gc][tg][ac][tgc][agc][atc][atg][atgc]) :names ޤȤȤϤޤ󤬡̾䥢ߥλ̾Ѵ ᥽åɤǤ bioruby> p dna.names ["adenine", "thymine", "guanine", "cytosine", "adenine", "thymine", "guanine", "cytosine", "adenine", "adenine", "adenine", "adenine"] bioruby> p pep.names ["methionine", "histidine", "alanine", "lysine"] :codes ߥλʸɤѴ names Ȼ᥽åɤǤ bioruby> p pep.codes ["Met", "His", "Ala", "Lys"] :gc_percent GC ̤ gc_percent ᥽åɤޤ bioruby> p dna.gc_percent 33 :to_fasta FASTA եޥåȤѴˤ to_fasta ᥽åɤȤޤ bioruby> puts dna.to_fasta("dna sequence") >dna sequence aaccggttacgt === 䥢ߥλΥɡɥɽ򤢤Ĥ ߥλ𡢥ɥơ֥뤿 aminoacids, nucleicacids, codontables, codontable ޥɤҲ𤷤ޤ --- aminoacids ߥλΰ aminoacids ޥɤɽǤޤ bioruby> aminoacids ? Pyl pyrrolysine A Ala alanine B Asx asparagine/aspartic acid C Cys cysteine D Asp aspartic acid E Glu glutamic acid F Phe phenylalanine G Gly glycine H His histidine I Ile isoleucine K Lys lysine L Leu leucine M Met methionine N Asn asparagine P Pro proline Q Gln glutamine R Arg arginine S Ser serine T Thr threonine U Sec selenocysteine V Val valine W Trp tryptophan Y Tyr tyrosine Z Glx glutamine/glutamic acid ֤ͤûɽбĹɽΥϥåˤʤäƤޤ bioruby> aa = aminoacids bioruby> puts aa["G"] Gly bioruby> puts aa["Gly"] glycine --- nucleicacids ΰ nucleicacids ޥɤɽǤޤ bioruby> nucleicacids a a Adenine t t Thymine g g Guanine c c Cytosine u u Uracil r [ag] puRine y [tc] pYrimidine w [at] Weak s [gc] Strong k [tg] Keto m [ac] aroMatic b [tgc] not A v [agc] not T h [atc] not G d [atg] not C n [atgc] ֤ͤϱΣʸɽȳΥϥåˤʤäƤޤ bioruby> na = nucleicacids bioruby> puts na["r"] [ag] --- codontables ɥơ֥ΰ codontables ޥɤɽǤޤ bioruby> codontables 1 Standard (Eukaryote) 2 Vertebrate Mitochondrial 3 Yeast Mitochondorial 4 Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma 5 Invertebrate Mitochondrial 6 Ciliate Macronuclear and Dasycladacean 9 Echinoderm Mitochondrial 10 Euplotid Nuclear 11 Bacteria 12 Alternative Yeast Nuclear 13 Ascidian Mitochondrial 14 Flatworm Mitochondrial 15 Blepharisma Macronuclear 16 Chlorophycean Mitochondrial 21 Trematode Mitochondrial 22 Scenedesmus obliquus mitochondrial 23 Thraustochytrium Mitochondrial ֤ͤϥơֹ֥̾ΥϥåˤʤäƤޤ bioruby> ct = codontables bioruby> puts ct[3] Yeast Mitochondorial --- codontable(num) ɥɽΤ codontable ޥɤɽǤޤ bioruby> codontable(11) = Codon table 11 : Bacteria hydrophilic: H K R (basic), S T Y Q N S (polar), D E (acidic) hydrophobic: F L I M V P A C W G (nonpolar) *---------------------------------------------* | | 2nd | | | 1st |-------------------------------| 3rd | | | U | C | A | G | | |-------+-------+-------+-------+-------+-----| | U U | Phe F | Ser S | Tyr Y | Cys C | u | | U U | Phe F | Ser S | Tyr Y | Cys C | c | | U U | Leu L | Ser S | STOP | STOP | a | | UUU | Leu L | Ser S | STOP | Trp W | g | |-------+-------+-------+-------+-------+-----| | CCCC | Leu L | Pro P | His H | Arg R | u | | C | Leu L | Pro P | His H | Arg R | c | | C | Leu L | Pro P | Gln Q | Arg R | a | | CCCC | Leu L | Pro P | Gln Q | Arg R | g | |-------+-------+-------+-------+-------+-----| | A | Ile I | Thr T | Asn N | Ser S | u | | A A | Ile I | Thr T | Asn N | Ser S | c | | AAAAA | Ile I | Thr T | Lys K | Arg R | a | | A A | Met M | Thr T | Lys K | Arg R | g | |-------+-------+-------+-------+-------+-----| | GGGG | Val V | Ala A | Asp D | Gly G | u | | G | Val V | Ala A | Asp D | Gly G | c | | G GGG | Val V | Ala A | Glu E | Gly G | a | | GG G | Val V | Ala A | Glu E | Gly G | g | *---------------------------------------------* ֤ͤ Bio::CodonTable 饹Υ֥Ȥǡɥȥߥλ ѴǤǤʤʲΤ褦ʥǡ뤳ȤǤޤ bioruby> ct = codontable(2) bioruby> p ct["atg"] "M" :definition ɥɽ bioruby> puts ct.definition Vertebrate Mitochondrial :start ϥɥ bioruby> p ct.start ["att", "atc", "ata", "atg", "gtg"] :stop ߥɥ bioruby> p ct.stop ["taa", "tag", "aga", "agg"] :revtrans ߥλ򥳡ɤ륳ɥĴ٤ bioruby> p ct.revtrans("V") ["gtc", "gtg", "gtt", "gta"] === եåȥեΥȥ ǡ١Υȥȡեåȥե뤽ΤΤ򰷤ˡҲ𤷤ޤ GenBank ǡ١ǤϡեΥȥ꤬ޤޤ gbphg.seq ե륵ΤǡΥեȤƻȤޤ % wget ftp://ftp.hgc.jp/pub/mirror/ncbi/genbank/gbphg.seq.gz % gunzip gbphg.seq.gz --- getent(str) getseq ޥɤޤǤʤȥΤ ˤ getent ޥ(2)Ȥޤgetseq ޥƱ͡getent ޥɤǤ OBDA, EMBOSS, NCBI, EBI, TogoWS, KEGG API Υǡ١ѲǽǤ(5) ˤĤƤ getseq ޥɤ򻲾ȤƤ bioruby> entry = getent("genbank:AB044425") bioruby> puts entry LOCUS AB044425 1494 bp DNA linear PLN 28-APR-2001 DEFINITION Volvox carteri f. kawasakiensis chloroplast psaB gene for photosystem I P700 chlorophyll a apoprotein A2, strain:NIES-732. (ά) getent ޥɤΰˤ db:entry_id ʸEMBOSS USA ե롢IO Ϳ졢ǡ١Σȥʬʸ֤ޤ ǡ١˸¤餺¿Υǡ١ȥбƤޤ --- flatparse(str) ȥѡߤǡȤˤ flatparse ޥɤȤޤ bioruby> entry = getent("gbphg.seq") bioruby> gb = flatparse(entry) bioruby> puts gb.entry_id AB000833 bioruby> puts gb.definition Bacteriophage Mu DNA for ORF1, sheath protein gpL, ORF2, ORF3, complete cds. bioruby> puts psaB.naseq acggtcagacgtttggcccgaccaccgggatgaggctgacgcaggtcagaaatctttgtgacgacaaccgtatcaat (ά) --- getobj(str) getobj ޥ(2)ϡgetent ǥȥʸȤƼ flatparse ѡ֥ȤѴΤƱǤgetent ޥɤƱ դޤ getseqȥ getent ѡ֥Ȥ getobj ȤȤˤʤޤ bioruby> gb = getobj("gbphg.seq") bioruby> puts gb.entry_id AB000833 --- flatfile(file) getent ޥɤϣȥꤷʤᡢΥե򳫤 ƥȥ˽Ԥˤ flatfile ޥɤȤޤ bioruby> flatfile("gbphg.seq") do |entry| bioruby+ # do something on entry bioruby+ end ֥åꤷʤϡեκǽΥȥޤ bioruby> entry = flatfile("gbphg.seq") bioruby> gb = flatparse(entry) bioruby> puts gb.entry_id --- flatauto(file) ƥȥ flatparse Ʊͤ˥ѡ֤ǽ֤˽뤿ˤϡ flatfile ޥɤ flatauto ޥɤȤޤ bioruby> flatauto("gbphg.seq") do |entry| bioruby+ print entry.entry_id bioruby+ puts entry.definition bioruby+ end flatfile Ʊ֥͡åꤷʤϡեκǽΥȥ ѡ֥Ȥ֤ޤ bioruby> gb = flatfile("gbphg.seq") bioruby> puts gb.entry_id === եåȥեΥǥ EMBOSS dbiflat ˻ǽȤơBioRuby, BioPerl ʤɤ˶̤ BioFlat ȤǥåȤߤޤ٥ǥå ƤȥȥμФ®ưפ˹Ԥޤ ˤ꼫ʬѤΥǡ١ڤ˺뤳ȤǤޤ --- flatindex(db_name, *source_file_list) GenBank Υեե gbphg.seq äƤ륨ȥФ mydb Ȥǡ١̾ǥǥåޤ bioruby> flatindex("mydb", "gbphg.seq") Creating BioFlat index (.bioruby/bioflat/mydb) ... done --- flatsearch(db_name, entry_id) mydb ǡ١饨ȥȤФˤ flatsearch ޥɤ Ȥޤ bioruby> entry = flatsearch("mydb", "AB004561") bioruby> puts entry LOCUS AB004561 2878 bp DNA linear PHG 20-MAY-1998 DEFINITION Bacteriophage phiU gene for integrase, complete cds, integration site. ACCESSION AB004561 (ά) === ͡ DB FASTA եޥåȤѴ¸ FASTA եޥåȤǡɸŪѤƤեޥåȤǤ >׵ǤϤޤ룱ܤꡢܰʹߤ󤬤ĤŤޤ ζʸ̵뤵ޤ >entry_id definition ... ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT Ԥϡǽñ줬 ID ˤʤäƤ뤳Ȥ¿ΤǤ NCBI BLAST ѥǡ١ǤϤ˹٤ʹ¤ʤƤޤ * (()) * (()) * FASTA format (Wikipedia) (()) BioRuby Υǡ١ȥΥ饹ˤϥȥIDˤĤ ̤Υ᥽åɤѰդƤޤ * entry_id - ȥ ID * definition - ʸ * seq - ζ̥᥽åɤȤȡɤǡ١ȥǤ FASTA եޥåȤѴǤץबñ˺ޤ entry.seq.to_fasta("#{entry.entry_id} #{entry.definition}", 60) ˡBioRuby Ǥϥǡ١ηưȽ̤ǤޤΤǡ GenBank, UniProt ʤ¿μפǡ١Ǥ ե̾ꤹ FASTA եޥåȤѴǤޤ --- flatfasta(fasta_file, *source_file_list) ϥǡ١Υե̾ΥꥹȤ顢ꤷ FASTA եޥåȤ ե륳ޥɤǤǤϤĤ GenBank Υե FASTA եޥåȤѴmyfasta.fa Ȥե¸Ƥޤ bioruby> flatfasta("myfasta.fa", "gbphg.seq", "gbvrl1.seq", "gbvrl2.seq") Saving fasta file (myfasta.fa) ... converting -- gbphg.gbk converting -- gbvrl1.gbk converting -- gbvrl2.gbk done === KEGG API BioRuby Ǥ KEGG API Υ֥ӥñѤǤޤ --- keggdbs ΥͥåȤ KEGG API ̤Ѳǽʥǡ١ΥꥹȤɽޤ bioruby> keggdbs nt: Non-redundant nucleic acid sequence database aa: Non-redundant protein sequence database gb: GenBank nucleic acid sequence database (ά) --- keggorgs KEGG ˼ϿƤʪΥꥹȤɽޤ bioruby> keggorgs aae: Aquifex aeolicus aci: Acinetobacter sp. ADP1 afu: Archaeoglobus fulgidus (ά) --- keggpathways KEGG ˼ϿƤѥΥꥹȤɽޤ bioruby> keggpathways path:map00010: Glycolysis / Gluconeogenesis - Reference pathway path:map00020: Citrate cycle (TCA cycle) - Reference pathway path:map00030: Pentose phosphate pathway - Reference pathway (ά) ˣʸ KEGG ʪﵭ򤢤ȡʪѤǤ ѥΰ֤ޤIJ eco ξʲΤ褦ˤʤޤ bioruby> keggpathways("eco") path:eco00010: Glycolysis / Gluconeogenesis - Escherichia coli K-12 MG1655 path:eco00020: Citrate cycle (TCA cycle) - Escherichia coli K-12 MG1655 path:eco00030: Pentose phosphate pathway - Escherichia coli K-12 MG1655 (ά) --- keggapi ʳ KEGG API Υ᥽åɤϡkeggapi ³ƸƤӽФȤ ѤǤޤ bioruby> p keggapi.get_genes_by_pathway("path:eco00010") ["eco:b0114", "eco:b0115", "eco:b0116", "eco:b0356", "eco:b0688", (ά) Ѳǽʥ᥽åɤΰ KEGG API Υޥ˥奢򻲾ȤƤ * (()) === DBGET ΥͥåȤ DBGET ΥޥɤǤ binfo, bfind, bget, btit, bconv KEGG API ѤƤΤޤ޼¹ԤǤ褦ˤʤäƤޤ --- binfo bioruby> binfo *** Last database updates *** Date Database Release #Entries #Residues -------- ------------- ------------------------ ------------ ---------------- 05/12/06 nr-nt 05-12-04 (Dec 05) 63,078,043 111,609,773,616 05/12/06 nr-aa 05-12-05 (Dec 05) 2,682,790 890,953,839 05/10/25 genbank 150.0 (Oct 05) 49,152,445 53,655,236,500 05/12/06 genbank-upd 150.0+/12-04 (Dec 05) 7,470,976 6,357,888,366 (ά) binfo ޥɤ³ƥǡ١̾ꤹ뤳ȤǤܺ٤ʾ ɽޤ bioruby> binfo "genbank" genbank GenBank nucleic acid sequence database gb Release 150.0, Oct 05 National Center for Biotechnology Information 49,152,445 entries, 53,655,236,500 bases Last update: 05/10/25 --- bfind(keyword) bfind ޥɤǥǡ١Ф륭ɥԤȤǤޤ ǡ١̾ȸɤʸϤޤ bioruby> list = bfind "genbank ebola human" bioruby> puts list gb:BD177378 [BD177378] A monoclonal antibody recognizing ebola virus. gb:BD177379 [BD177379] A monoclonal antibody recognizing ebola virus. (ά) --- bget(entry_id) bget ޥɤǻꤷ db:entry_id Υǡ١ȥǤޤ bioruby> entry = bget "gb:BD177378" bioruby> puts entry LOCUS BD177378 24 bp DNA linear PAT 16-APR-2003 DEFINITION A monoclonal antibody recognizing ebola virus. (ά) === ץ ȼ򥹥ץȲ¸ƤȤǤޤ bioruby> script -- 8< -- 8< -- 8< -- Script -- 8< -- 8< -- 8< -- bioruby> seq = getseq("gbphg.seq") bioruby> p seq bioruby> p seq.translate bioruby> script -- >8 -- >8 -- >8 -- Script -- >8 -- >8 -- >8 -- Saving script (script.rb) ... done 줿 script.rb ϰʲΤ褦ˤʤޤ #!/usr/bin/env bioruby seq = getseq("gbphg.seq") p seq p seq.translate ΥץȤ bioruby ޥɤǼ¹Ԥ뤳ȤǤޤ % bioruby script.rb === ʰץ뵡ǽ --- cd(dir) ȥǥ쥯ȥѹޤ bioruby> cd "/tmp" "/tmp" ۡǥ쥯ȥˤϰĤ cd ¹Ԥޤ bioruby> cd "/home/k" --- pwd ȥǥ쥯ȥɽޤ bioruby> pwd "/home/k" --- dir ȥǥ쥯ȥΥեɽޤ bioruby> dir UGO Date Byte File ------ ---------------------------- ----------- ------------ 40700 Tue Dec 06 07:07:35 JST 2005 1768 "Desktop" 40755 Tue Nov 29 16:55:20 JST 2005 2176 "bin" 100644 Sat Oct 15 03:01:00 JST 2005 42599518 "gbphg.seq" (ά) bioruby> dir "gbphg.seq" UGO Date Byte File ------ ---------------------------- ----------- ------------ 100644 Sat Oct 15 03:01:00 JST 2005 42599518 "gbphg.seq" --- head(file, lines = 10) ƥȥե䥪֥ȤƬ 10 Ԥɽޤ bioruby> head "gbphg.seq" GBPHG.SEQ Genetic Sequence Data Bank October 15 2005 NCBI-GenBank Flat File Release 150.0 Phage Sequences 2713 loci, 16892737 bases, from 2713 reported sequences ɽԿꤹ뤳ȤǤޤ bioruby> head "gbphg.seq", 2 GBPHG.SEQ Genetic Sequence Data Bank October 15 2005 ƥȤäƤѿƬ򸫤뤳ȤǤޤ bioruby> entry = getent("gbphg.seq") bioruby> head entry, 2 GBPHG.SEQ Genetic Sequence Data Bank October 15 2005 --- disp(obj) ƥȥե䥪֥ȤȤڡ㡼ɽޤ ǻѤڡ㡼 pager ޥɤѹ뤳ȤǤޤʸҡˡ bioruby> disp "gbphg.seq" bioruby> disp entry bioruby> disp [1, 2, 3] * 4 === ѿ --- ls å˺ѿʥ֥ȡˤΰɽޤ bioruby> ls ["entry", "seq"] bioruby> a = 123 ["a", "entry", "seq"] --- rm(symbol) ѿõޤ bioruby> rm "a" bioruby> ls ["entry", "seq"] --- savefile(filename, object) ѿ¸ƤƤƥȥե¸ޤ bioruby> savefile "testfile.txt", entry Saving data (testfile.txt) ... done bioruby> disp "testfile.txt" === Ƽ ³λȤߤȤ BioRuby 뽪λ session ǥ쥯ȥ ҥȥꡢ֥ȡĿ꤬ͤ¸졢ư˼ưŪ ɤ߹ޤޤ --- config BioRuby γƼɽޤ bioruby> config message = "...BioRuby in the shell..." marshal = [4, 8] color = false pager = nil echo = false echo ɽ뤫ɤڤؤޤon ξϡputs p ʤɤ ĤʤƤɾ̤ͤɽޤ irb ޥɤξϽ꤬ on ˤʤäƤޤbioruby ޥɤǤ Ĺ䥨ȥʤĹʸ򰷤Ȥ¿ᡢǤ off ˤƤޤ bioruby> config :echo Echo on ==> nil bioruby> config :echo Echo off ɥɽʤɡǽʾ˥顼ɽ뤫ɤڤؤޤ 顼ɽξ硢ץץȤˤ⿧ĤޤΤȽ̤Ǥޤ bioruby> config :color bioruby> codontable (դ) ¹Ԥ뤿Ӥ꤬ڤؤޤ bioruby> config :color bioruby> codontable (ʤ) BioRuby 뵯ưɽ륹ץåå㤦ʸ ѹޤβϥץѤΥǥ쥯ȥ꤫ꤷƤΤ 褤Ǥ礦 bioruby> config :message, "Kumamushi genome project" K u m a m u s h i g e n o m e p r o j e c t Version : BioRuby 0.8.0 / Ruby 1.8.4 ǥեȤʸ᤹ˤϡʤǼ¹Ԥޤ bioruby> config :message BioRuby 뵯ưɽ륹ץååݥ ˥᡼ɽ뤫ɤڤؤޤ ¹Ԥ뤿Ӥ꤬ڤؤޤ bioruby> config :splash Splash on --- pager(command) disp ޥɤǼºݤѤڡ㡼ڤؤޤ bioruby> pager "lv" Pager is set to 'lv' bioruby> pager "less -S" Pager is set to 'less -S' ڡ㡼ѤʤˤϰʤǼ¹Ԥޤ bioruby> pager Pager is set to 'off' ڡ㡼 off λ˰ʤǼ¹ԤȴĶѿ PAGER ͤѤޤ bioruby> pager Pager is set to 'less' === ҥ --- doublehelix(sequence) DNA 򥢥Ȥɽ륪ޥǽޤ Ŭʱ seq äݤɽƤߤޤ礦 bioruby> dna = getseq("atgc" * 10).randomize bioruby> doublehelix dna ta t--a a---t a----t a----t t---a g--c cg gc a--t g---c c----g c----g (ά) === Ҳ --- midifile(midifile, sequence) DNA MIDI եѴ륪ޥǽޤ Ŭʱ seq Ȥä midifile.mid MIDI ץ쥤䡼DZդƤߤޤ礦 bioruby> midifile("midifile.mid", seq) Saving MIDI file (midifile.mid) ... done ʾ BioRuby β򽪤ꡢʲǤ BioRuby 饤֥꼫Τ Ԥޤ == 𡦥ߥλ (Bio::Sequence 饹) Bio::Sequence 饹ϡФ͡ԤȤǤޤ ñȤơû atgcatgcaaaa ȤäơؤѴ ʬڤФη׻ߥλؤʬ̷׻ʤɤ ԤʤäƤߤޤߥλؤǤϡɬפ˱Ʋܤ Ϥ뤫ե졼ꤷꡢcodontable.rb Ƥ륳ɥơ ֥椫ѤΤꤷꤹǤޤʥɥơ֥ ֹ (()) 򻲾ȡˡ #!/usr/bin/env ruby require 'bio' seq = Bio::Sequence::NA.new("atgcatgcaaaa") puts seq # puts seq.complement # (Bio::Sequence::NA) puts seq.subseq(3,8) # 3 ܤ 8 ܤޤ p seq.gc_percent # GC γ (Integer) p seq.composition # (Hash) puts seq.translate # (Bio::Sequence::AA) puts seq.translate(2) # ʸܤ̤ϣ puts seq.translate(1,9) # ֤Υɥơ֥ p seq.translate.codes # ߥλʸɤɽ (Array) p seq.translate.names # ߥλ̾ɽ (Array) p seq.translate.composition # ߥλ (Hash) p seq.translate.molecular_weight # ʬ̤׻ (Float) puts seq.complement.translate # print, puts, p Ƥ̤ɽ뤿 Ruby ɸ᥽åɤǤ ܤȤʤ print ٤ơputs ϲԤưǤĤƤ롢 p ʸʳΥ֥Ȥʹ֤䤹褦ɽƤ롢 ȤħޤΤŬȤʬޤˡ require 'pp' ȤлȤ褦ˤʤ pp ᥽åɤϡp ɽ䤹ʤޤ Bio::Sequence::NA 饹Ρߥλ Bio::Sequence::AA 饹Υ֥Ȥˤʤޤ줾 Bio::Sequence 饹Ѿ Ƥ뤿ᡢ¿Υ᥽åɤ϶̤Ǥ Bio::Sequence::NA, AA 饹 Ruby String 饹ѾƤΤ String 饹ĥ᥽åɤȤǤޤ㤨ʬڤФˤ Bio::Sequence 饹 subseq(from,to) ᥽åɤ¾ˡString 饹 [] ᥽åɤȤȤǤޤ Ruby ʸ 1 ʸܤ 0 ܤȤƿˤդɬפǤȤС puts seq.subseq(1, 3) puts seq[0, 3] Ϥɤ seq κǽΣʸ atg ɽޤ Τ褦ˡString Υ᥽åɤȤϡʪؤ̻Ѥ 1 ʸܤ 1 ܤȤƿ 1 ɬפޤsubseq ᥽åɤ ǤäƤޤޤfrom, to Τɤ餫Ǥ 0 ʲξ 㳰ȯ褦ˤʤäƤޤˡ ޤǤν BioRuby ǻȰʲΤ褦ˤʤޤ # ιԤ seq = seq("atgcatgcaaaa") Ǥ褤 bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa") # ɽ bioruby> puts seq atgcatgcaaaa # ɽ bioruby> puts seq.complement ttttgcatgcat # ʬɽʣܤ飸ܤޤǡ bioruby> puts seq.subseq(3,8) gcatgc # GC% ɽ bioruby> p seq.gc_percent 33 # ɽ bioruby> p seq.composition {"a"=>6, "c"=>2, "g"=>2, "t"=>2} # ߥλؤ bioruby> puts seq.translate MHAK # 򳫻ϱȤ bioruby> puts seq.translate(2) CMQ # ֤Υɥơ֥Ѥ bioruby> puts seq.translate(1,9) MHAN # 줿ߥλʸɤɽ bioruby> p seq.translate.codes ["Met", "His", "Ala", "Lys"] # 줿ߥλ򥢥ߥλ̾ɽ bioruby> p seq.translate.names ["methionine", "histidine", "alanine", "lysine"] # 줿ߥλɽ bioruby> p seq.translate.composition {"K"=>1, "A"=>1, "M"=>1, "H"=>1} # 줿ߥλʬ̤ɽ bioruby> p seq.translate.molecular_weight 485.605 # bioruby> puts seq.complement.translate FCMH # ʬʣܤ飳ܤޤǡ bioruby> puts seq.subseq(1, 3) atg # ʬʣܤ飳ܤޤǡ bioruby> puts seq[0, 3] atg window_search(window_size, step_size) ᥽åɤȤȡФƥ ɥ򤺤餷ʤ餽줾ʬФԤȤǤޤ Ruby ĹΤҤȤĤǤ֥֥åפˤäơ֤줾Фפ ʷ餫Ƥ˽񤯤ȤǽǤʲǤϡsubseq Ȥѿˤ줾 ʬʤ֥å򷫤֤¹Ԥ뤳Ȥˤʤޤ * 100 𤴤Ȥˡ1𤺤Ĥ餷ʤʿ GC% ׻ɽ seq.window_search(100) do |subseq| puts subseq.gc_percent end ֥åǼʬ⡢Ʊ Bio::Sequence::NA ޤ Bio::Sequence::AA 饹Υ֥ȤʤΤǡ󥯥饹λƤΥ åɤ¹Ԥ뤳ȤǤޤ ޤܤΰ˰ưꤹ뤳Ȥ褦ˤʤäƤΤǡ * ɥñ̤Ǥ餷ʤ 15 5 ĴΥڥץɤɽ seq.window_search(15, 3) do |subseq| puts subseq.translate end ȤäȤǤޤ˰ưʤüʬ᥽å Τ֤ͤȤ᤹褦ˤʤäƤΤǡ * Υ 10000bp Ȥ˥֥ڤˤ FASTA եޥåȤ ΤȤü 1000bp ϥСåפ10000bp ʤ 3' ü Ӽäɽ i = 1 remainder = seq.window_search(10000, 9000) do |subseq| puts subseq.to_fasta("segment #{i}", 60) i += 1 end puts remainder.to_fasta("segment #{i}", 60) Τ褦ʻȴñˤǤޤ ɥȰưƱˤȥСåפʤɥ ǤΤǡ * ɥ٤ codon_usage = Hash.new(0) seq.window_search(3, 3) do |subseq| codon_usage[subseq] += 1 end * 10 Ĵ𤺤ʬ̤׻ seq.window_search(10, 10) do |subseq| puts subseq.molecular_weight end ȤäѤͤޤ ºݤˤ Bio::Sequence::NA ֥Ȥϥե뤫ɤ߹ʸ ꡢǡ١ΤȤäꤷޤȤС #!/usr/bin/env ruby require 'bio' input_seq = ARGF.read # Ϳ줿եԤɤ߹ my_naseq = Bio::Sequence::NA.new(input_seq) my_aaseq = my_naseq.translate puts my_aaseq Υץ na2aa.rb Ȥơʲα gtggcgatctttccgaaagcgatgactggagcgaagaaccaaagcagtgacatttgtctg atgccgcacgtaggcctgataagacgcggacagcgtcgcatcaggcatcttgtgcaaatg tcggatgcggcgtga 񤤤ե my_naseq.txt ɤ߹ % ./na2aa.rb my_naseq.txt VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA* Τ褦ˤʤޤʤߤˡΤ餤ʤûȣԤǽ񤱤ޤ % ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt եΤݤʤΤǡϥǡ١ɬפ Ƥߤޤ == GenBank Υѡ (Bio::GenBank 饹) GenBank ΥեѰդƤʼ긵ˤʤϡ ftp://ftp.ncbi.nih.gov/genbank/ .seq եɤޤˡ % wget ftp://ftp.hgc.jp/pub/mirror/ncbi/genbank/gbphg.seq.gz % gunzip gbphg.seq.gz ޤϡƥȥ꤫ ID ʸФ FASTA Ѵ ߤޤ礦 Bio::GenBank::DELIMITER GenBank 饹Ƥǡ ǡ١Ȥ˰ۤʤ륨ȥζڤʸʤȤ GenBank ξ // ФƤʤƤɤ褦ˤʤäƤޤ #!/usr/bin/env ruby require 'bio' while entry = gets(Bio::GenBank::DELIMITER) gb = Bio::GenBank.new(entry) # GenBank ֥ print ">#{gb.accession} " # ACCESSION ֹ puts gb.definition # DEFINITION puts gb.naseq # Sequence::NA ֥ȡ end νǤ GenBank եΥǡ¤˰¸Ƥޤ ե뤫ΥǡϤ򰷤饹 Bio::FlatFile Ѥ뤳Ȥǡ ʲΤ褦˶ڤʸʤɤ򵤤ˤ񤯤ȤǤޤ #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::GenBank, ARGF) ff.each_entry do |gb| definition = "#{gb.accession} #{gb.definition}" puts gb.naseq.to_fasta(definition, 60) end ΰ㤦ǡȤFASTAեޥåȤΥեɤ߹ȤǤ⡢ #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF) ff.each_entry do |f| puts "definition : " + f.definition puts "nalen : " + f.nalen.to_s puts "naseq : " + f.naseq end Τ褦ˡƱ褦ʽǺѤޤޤ ˡ Bio::DB 饹 open ᥽åɤƱͤΤȤǤޤȤС #!/usr/bin/env ruby require 'bio' ff = Bio::GenBank.open("gbvrl1.seq") ff.each_entry do |gb| definition = "#{gb.accession} #{gb.definition}" puts gb.naseq.to_fasta(definition, 60) end ʤɤȽ񤯤ȤǤޤʤνϤޤȤƤޤ) ˡGenBank ʣ FEATURES ѡɬפʾФޤ ޤ /tranlation="ߥλ" Ȥ Qualifier ߥλФɽƤߤޤ #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::GenBank, ARGF) # GenBank ΣȥꤴȤ ff.each_entry do |gb| # FEATURES ǤĤĽ gb.features.each do |feature| # Feature ˴ޤޤ Qualifier ƥϥåѴ hash = feature.to_hash # Qualifier translation if hash['translation'] # ȥΥåֹɽ puts ">#{gb.accession} puts hash['translation'] end end end ˡFeature Υݥ˽񤫤Ƥ󤫤饨ȥα ץ饤󥰤Τ /translation= ˽񤫤Ƥ ξɽ٤Ƥߤޤ礦 #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::GenBank, ARGF) # GenBank ΣȥꤴȤ ff.each_entry do |gb| # ACCESSION ֹʪ̾ɽ puts "### #{gb.accession} - #{gb.organism}" # FEATURES ǤĤĽ gb.features.each do |feature| # Feature position (join ...ʤ) Ф position = feature.position # Feature ˴ޤޤ Qualifier ƥϥåѴ hash = feature.to_hash # /translation= ʤХå next unless hash['translation'] # /gene=, /product= ʤɤ Qualifier ̾ʤɤξ򽸤 gene_info = [ hash['gene'], hash['product'], hash['note'], hash['function'] ].compact.join(', ') puts "## #{gene_info}" # position ξˤäƥץ饤󥰡 puts ">NA splicing('#{position}')" puts gb.naseq.splicing(position) # ߥλʥץ饤󥰤󤫤 puts ">AA translated by splicing('#{position}').translate" puts gb.naseq.splicing(position).translate # ߥλ/translation= ˽񤫤ƤΤΡ puts ">AA original translation" puts hash['translation'] end end ⤷ѤƤ륳ɥơ֥뤬ǥե (universal) Ȱäꡢ ǽΥɥ "atg" ʳäꡢΥƥ󤬴ޤޤƤꡢ 뤤 BioRuby ˥ХСɽ룲ĤΥߥλ ۤʤˤʤޤ ǻѤƤ Bio::Sequence#splicing ᥽åɤϡGenBank, EMBL, DDBJ եޥåȤǻȤƤ Location ɽ򸵤ˡ󤫤 ʬڤФϤʥ᥽åɤǤ splicing ᥽åɤΰˤ GenBank Location ʸʳ BioRuby Bio::Locations ֥ȤϤȤǽǤ ̾ϸƤ Location ʸʬ䤹Τޤ Location ʸΥեޥåȤ Bio::Locations ˤĤƾܤΤꤿ BioRuby bio/location.rb 򸫤Ƥ * GenBank Υǡ Feature ǻȤƤ Location ʸ naseq.splicing('join(2035..2050,complement(1775..1818),13..345') * 餫 Locations ֥ȤѴƤϤƤ褤 locs = Bio::Locations.new('join((8298.8300)..10206,1..855)') naseq.splicing(locs) ʤߤˡߥλ (Bio::Sequence::AA) ˤĤƤ splicing ᥽å ѤʬФȤǽǤ * ߥλʬڤФʥʥڥץɤʤɡ aaseq.splicing('21..119') === GenBank ʳΥǡ١ BioRuby ǤϡGenBank ʳΥǡ١ˤĤƤŪʰƱǡ ǡ١Σȥʬʸбǡ١Υ饹ϤС ѡ줿̤֥Ȥˤʤä֤äƤޤ ǡ١Υեåȥե뤫飱ȥꤺļФƥѡ줿 ֥ȤФˤϡˤФƤ Bio::FlatFile Ȥޤ Bio::FlatFile.new ΰˤϥǡ١б BioRuby ǤΥ饹 ̾ (Bio::GenBank Bio::KEGG::GENES ʤ) ꤷޤ ff = Bio::FlatFile.new(Bio::ǡ١饹̾, ARGF) Ф餷Ȥˡ¤ FlatFile 饹ϥǡ١μưǧ ǤޤΤǡ ff = Bio::FlatFile.auto(ARGF) ȤΤִñǤ #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.auto(ARGF) ff.each_entry do |entry| p entry.entry_id # ȥ ID p entry.definition # ȥʸ p entry.seq # ǡ١ξ end ff.close ˡǡ١Ĥ˺ʤˤ Ruby Υ֥å ѤưʲΤ褦˽񤯤Τ褤Ǥ礦 #!/usr/bin/env ruby require 'bio' Bio::FlatFile.auto(ARGF) do |ff| ff.each_entry do |entry| p entry.entry_id # ȥ ID p entry.definition # ȥʸ p entry.seq # ǡ١ξ end end ѡ줿֥Ȥ顢ȥΤ줾ʬФ ᥽åɤϥǡ١˰ۤʤޤ褯ܤˤĤƤ * entry_id ᥽å ȥ ID ֹ椬֤ * definition ᥽å ȥԤ֤ * reference ᥽å ե󥹥֥Ȥ֤ * organism ᥽å ʪ̾ * seq naseq aaseq ᥽å б󥪥֥Ȥ֤ ʤɤΤ褦˶̲褦ȤƤޤƤΥ᥽åɤƤ ǤϤޤʶ̲λؿˤ bio/db.rb ȡˡޤ٤ʬϳ ǡ١ѡ˰ۤʤΤǡ줾ΥɥȤ˽ޤ §Ȥơ᥽å̾ʣξϡ֥ȤȤ֤ޤ Ȥ references ᥽åɤĥ饹ʣ Bio::Reference ֥ Ȥ Array ˤ֤ޤ̤Υ饹Ǥñ reference ᥽å ʤĤ Bio::Reference ֥Ȥ֤ȤäǤ == PDB Υѡ (Bio::PDB 饹) Bio::PDB ϡPDB ɤ߹िΥ饹ǤPDB ǡ١ PDB, mmCIF, XML (PDBML) ΣΥեޥåȤ󶡤Ƥޤ Τ BioRuby бƤΤ PDB եޥåȤǤ PDB եޥåȤλͤϡʲ Protein Data Bank Contents Guide ȤƤ * (()) === PDB ǡɤ߹ PDB Σȥ꤬ 1bl8.pdb Ȥե˳ǼƤϡ Ruby Υեɤ߹ߵǽȤä entry = File.read("1bl8.pdb") Τ褦ˤ뤳ȤǡȥƤʸȤ entry Ȥѿ 뤳ȤǤޤȥƤѡˤ pdb = Bio::PDB.new(entry) Ȥޤǥȥ꤬ Bio::PDB ֥ȤȤʤꡢǤդΥǡ Ф褦ˤʤޤ PDB եޥåȤ Bio::FlatFile ˤ뼫ưǧǽǤߤ եʣȥޤˤбƤޤ Bio::FlatFile Ȥäƣȥʬɤ߹ˤϡ pdb = Bio::FlatFile.auto("1bl8.pdb") { |ff| ff.next_entry } ȤޤɤˡǤѿ pdb ˤƱ̤ޤ === ֥Ȥγع¤ PDB ȥϡѿʸʤ ID դƤޤ Bio::PDB ֥Ȥ ID Фˤ entry_id ᥽åɤȤޤ p pdb.entry_id # => "1BL8" ȥγפ˴ؤб᥽åɤǼФȤǤޤ p pdb.definition # => "POTASSIUM CHANNEL (KCSA) FROM STREPTOMYCES LIVIDANS" p pdb.keywords # => ["POTASSIUM CHANNEL", "INTEGRAL MEMBRANE PROTEIN"] ¾ˡϿԤʸ¸ˡʤɤξǤޤʤ줾 authors, jrnl, method ᥽åɡˡ PDB ǡϡŪˤϣԤĤΥ쥳ɤƤޤ Ԥ꤭ʤǡʣԤ˳Ǽ continuation Ȥ ȤߤѰդƤޤܤϣԣ쥳ɤǤ ƹԤƬʸιԤΥǡμ򼨤̾ʥ쥳ɡˤˤʤޤ BioRuby ǤϡHEADER 쥳ɤФƤ Bio::PDB::Record::HEADER 饹 TITLE 쥳ɤФƤ Bio::PDB::Record::TITLE 饹Ȥ褦 Ūˤϳƥ쥳ɤб륯饹򣱤ѰդƤޤ REMARK JRNL 쥳ɤ˴ؤƤϡ줾ʣΥեޥåȤ ¸ߤ뤿ᡢʣΥ饹ѰդƤޤ ƥ쥳ɤ˥äȤñˡ record ᥽åɤǤ pdb.record("HELIX") Τ褦ˤȡ PDB ȥ˴ޤޤƤ HELIX 쥳ɤ Bio::PDB::Record::HELIX 饹Υ֥ȤȤƼǤޤ ΤȤդޤʲǤϡPDB ȥΥᥤƤǤΩι¤ ؤǡ¤ΰ򸫤Ƥޤ ==== : Bio::PDB::Record::ATOM, Bio::PDB::Record::HETATM 饹 PDB ȥϡѥ˻DNA,RNAˤ䤽¾ʬҤΩι¤ ŪˤϸҤΣɸޤǤޤ ѥޤϳ˻θҤκɸϡATOM 쥳ɤ˳ǼƤޤ б륯饹ϡBio::PDB::Record::ATOM 饹Ǥ ѥ˻ʳθҤκɸϡHETATM 쥳ɤ˳ǼƤޤ б륯饹ϡBio::PDB::Record::HETATM 饹Ǥ HETATM饹 ATOM 饹ѾƤ뤿ᡢATOM HETATM ᥽åɤλȤϤޤäƱǤ ==== ߥλĴʤޤϱ: Bio::PDB::Residue 饹 ߥλޤϣñ̤ǸҤޤȤ᤿Τ Bio::PDB::Residue Ǥ Bio::PDB::Residue ֥Ȥϡİʾ Bio::PDB::Record::ATOM ֥Ȥޤߤޤ ==== ʪ: Bio::PDB::Heterogen 饹 ѥ˻ʳʬҤθҤϡŪˤʬñ̤ Bio::PDB::Heterogen ˤޤȤƤޤ Bio::PDB::Heterogen ֥Ȥϡİʾ Bio::PDB::Record::HETATM ֥Ȥޤߤޤ ==== ʥ: Bio::PDB::Chain 饹 Bio::PDB::Chain ϡʣ Bio::PDB::Residue ֥Ȥʤ ĤΥѥޤϳ˻ȡʣ Bio::PDB::Heterogen ֥ ʤ룱İʾΤʳʬҤǼǡ¤Ǥ ʤȾξϡѥ˻Bio::PDB::Residueˤ ʳʬҡBio::PDB::HeterogenˤΤɤ餫षޤ Chain ҤȤĤޤޤʤ PDB ȥǤξľ礬褦Ǥ Chain ˤϡѿʸ ID դƤޤChain ҤȤĤ ޤޤʤ PDB ȥξ϶ʸΤȤ⤢ޤˡ ==== ǥ: Bio::PDB::Model İʾ Bio::PDB::Chain ޤäΤ Bio::PDB::Model Ǥ 뾽¤ξ硢Model ̾ĤǤNMR ¤ξ硢 ʣ Model ¸ߤ뤳Ȥޤ ʣ Model ¸ߤ硢 Model ˤϥꥢֹ椬դޤ ơİʾ Model ޤäΤBio::PDB ֥Ȥˤʤޤ === Ҥ˥᥽å Bio::PDB#each_atom Ƥ ATOM ֤ˣĤé륤ƥ졼Ǥ pdb.each_atom do |atom| p atom.xyz end each_atom ᥽åɤ Model, Chain, Residue ֥ȤФƤ Ѥ뤳ȤǤ줾졢 Model, Chain, Residue Τ٤Ƥ ATOM 򤿤ɤ륤ƥ졼ȤƯޤ Bio::PDB#atoms Ƥ ATOM Ȥ֤᥽åɤǤ p pdb.atoms.size # => 2820 Ĥ ATOM ޤޤ뤳Ȥ狼 each_atom Ʊͤ atoms ᥽åɤ Model, Chain, Residue ֥ ФƻѲǽǤ pdb.chains.each do |chain| p chain.atoms.size # => Chain ATOM ɽ end Bio::PDB#each_hetatm ϡƤ HETATM ֤ˣĤé륤ƥ졼Ǥ pdb.each_hetatm do |hetatm| p hetatm.xyz end Bio::PDB#hetatms Ƥ HETATM Ȥ֤Τ hetatms ᥽åɤǤ p pdb.hetatms.size atoms ξƱͤˡModel, Chain, Heterogen ֥Ȥ ФƻѲǽǤ ==== Bio::PDB::Record::ATOM, Bio::PDB::Record::HETATM 饹λȤ ATOM ϥѥ˻DNARNAˤ븶ҡHETATM Ϥʳ ҤǼ뤿Υ饹ǤHETATM ATOM 饹ѾƤ뤿 Υ饹ǥ᥽åɤλȤϤޤäƱǤ p atom.serial # ꥢֹ p atom.name # ̾ p atom.altLoc # Alternate location indicator p atom.resName # ߥλ̾ޤϲʪ̾ p atom.chainID # Chain ID p atom.resSeq # ߥλĴΥֹ p atom.iCode # Code for insertion of residues p atom.x # X ɸ p atom.y # Y ɸ p atom.z # Z ɸ p atom.occupancy # Occupancy p atom.tempFactor # Temperature factor p atom.segID # Segment identifier p atom.element # Element symbol p atom.charge # Charge on the atom Υ᥽å̾ϡ§Ȥ Protein Data Bank Contents Guide ܤ˹碌Ƥޤ᥽å̾ resName resSeq Ȥä̾ˡ CamelCaseˤѤƤΤϤΤǤ 줾Υ᥽åɤ֤ǡΰ̣ϡͽ򻲹ͤˤƤ ¾ˤ⡢Ĥʥ᥽åɤѰդƤޤ xyz ᥽åɤϡɸ򣳼Υ٥ȥȤ֤᥽åɤǤ Υ᥽åɤϡRuby Vector 饹ѾƣΥ٥ȥ ò Bio::PDB::Coordinate 饹Υ֥Ȥ֤ޤ : VectorѾ饹ΤϤޤ侩ʤ褦ʤΤǡ 衢Vector饹Υ֥Ȥ֤褦ѹ뤫⤷ޤˡ p atom.xyz ٥ȥʤΤǡ­Ѥʤɤ뤳ȤǤޤ # Ҵ֤εΥ p (atom1.xyz - atom2.xyz).r # r ϥ٥ȥͤ᥽å # Ѥ p atom1.xyz.inner_product(atom2.xyz) ¾ˤϡθҤб TER, SIGATM, ANISOU 쥳ɤ ter, sigatm, anisou ᥽åɤѰդƤޤ === ߥλĴ (Residue) ˥᥽å Bio::PDB#each_residue ϡƤ Residue ֤é륤ƥ졼Ǥ each_residue ᥽åɤϡModel, Chain ֥ȤФƤ Ѥ뤳ȤǤ줾 Model, Chain ˴ޤޤƤ Residue é륤ƥ졼ȤƯޤ pdb.each_residue do |residue| p residue.resName end Bio::PDB#residues ϡƤ Residue Ȥ֤᥽åɤǤ each_residue ƱͤˡModel, Chain ֥ȤФƤѲǽǤ p pdb.residues.size === ʪ (Heterogen) ˥᥽å Bio::PDB#each_heterogen Ƥ Heterogen ֤ˤɤ륤ƥ졼 Bio::PDB#heterogens Ƥ Heterogen Ȥ֤᥽åɤǤ pdb.each_heterogen do |heterogeon| p heterogen.resName end p pdb.heterogens.size Υ᥽åɤ Residue Ʊͤ Model, Chain ֥ȤФƤ ѲǽǤ === Chain, Model ˥᥽å ƱͤˡBio::PDB#each_chain Ƥ Chain ֤ˤɤ륤ƥ졼 Bio::PDB#chains Ƥ Chain Ȥ֤᥽åɤǤ Υ᥽åɤ Model ֥ȤФƤѲǽǤ Bio::PDB#each_model Ƥ Model ֤ˤɤ륤ƥ졼 Bio::PDB#models Ƥ Model Ȥ֤᥽åɤǤ === PDB Chemical Component Dictionary Υǡɤ߹ Bio::PDB::ChemicalComponent 饹ϡPDB Chemical Component Dictionary ʵ̾ HET Group DictionaryˤΥѡǤ PDB Chemical Component Dictionary ˤĤƤϰʲΥڡ򻲾ȤƤ * (()) ǡϰʲǥɤǤޤ * (()) Υ饹ϡRESIDUE ϤޤäƶԤǽ룱ȥѡޤ PDB եޥåȤˤΤбƤޤˡ Bio::FlatFile ˤեưȽ̤бƤޤ Υ饹Τ ID 鲽ʪ򸡺ꤹ뵡ǽϻäƤޤ br_bioflat.rb ˤ륤ǥåˤбƤޤΤǡ ɬפʤ餽ѤƤ Bio::FlatFile.auto("het_dictionary.txt") |ff| ff.each do |het| p het.entry_id # ID p het.hetnam # HETNAM 쥳ɡʲʪ̾Ρ p het.hetsyn # HETSYM 쥳ɡʲʪ̾ p het.formul # FORMUL 쥳ɡʲʪ p het.conect # CONECT 쥳 end end Ǹ conect ᥽åɤϡʪη Hash Ȥ֤ޤ ȤСΡΥȥϼΤ褦ˤʤޤ RESIDUE EOH 9 CONECT C1 4 C2 O 1H1 2H1 CONECT C2 4 C1 1H2 2H2 3H2 CONECT O 2 C1 HO CONECT 1H1 1 C1 CONECT 2H1 1 C1 CONECT 1H2 1 C2 CONECT 2H2 1 C2 CONECT 3H2 1 C2 CONECT HO 1 O END HET EOH 9 HETNAM EOH ETHANOL FORMUL EOH C2 H6 O1 ΥȥФ conect ᥽åɤƤ֤ { "C1" => [ "C2", "O", "1H1", "2H1" ], "C2" => [ "C1", "1H2", "2H2", "3H2" ], "O" => [ "C1", "HO" ], "1H1" => [ "C1" ], "1H2" => [ "C2" ], "2H1" => [ "C1" ], "2H2" => [ "C2" ], "3H2" => [ "C2" ], "HO" => [ "O" ] } Ȥ Hash ֤ޤ ޤǤν BioRuby ǻȰʲΤ褦ˤʤޤ # PDB ȥ 1bl8 ͥåȥͳǼ bioruby> ent_1bl8 = getent("pdb:1bl8") # ȥȤǧ bioruby> head ent_1bl8 # ȥե¸ bioruby> savefile("1bl8.pdb", ent_1bl8) # ¸줿եȤǧ bioruby> disp "data/1bl8.pdb" # PDB ȥѡ bioruby> pdb_1bl8 = flatparse(ent_1bl8) # PDB Υȥ ID ɽ bioruby> pdb_1bl8.entry_id # getent("pdb:1bl8") flatparse ˡʲǤOK bioruby> obj_1bl8 = getobj("pdb:1bl8") bioruby> obj_1bl8.entry_id # HETEROGEN Ȥ˻Ĵ̾ɽ bioruby> pdb_1bl8.each_heterogen { |heterogen| p heterogen.resName } # PDB Chemical Component Dictionary bioruby> het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read # եΥХȿǧ bioruby> het_dic.size # ե¸ bioruby> savefile("data/het_dictionary.txt", het_dic) # եȤǧ bioruby> disp "data/het_dictionary.txt" # Τ˥ǥå het_dic Ȥǡ١ bioruby> flatindex("het_dic", "data/het_dictionary.txt") # ID EOH ΥΡΥȥ򸡺 bioruby> ethanol = flatsearch("het_dic", "EOH") # ȥѡ bioruby> osake = flatparse(ethanol) # Ҵ֤ηơ֥ɽ bioruby> sake.conect == 饤 (Bio::Alignment 饹) Bio::Alignment 饹Υ饤ȤǼ뤿ΥƥʤǤ Ruby Hash Array ˻ǽǡBioPerl Bio::SimpleAlign ˤʤäƤޤʲ˴ñʻȤ򼨤ޤ require 'bio' seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ] seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) } # 饤ȥ֥Ȥ a = Bio::Alignment.new(seqs) # 󥻥󥵥ɽ p a.consensus # ==> "a?gc?" # IUPAC ɸۣʱѤ󥻥󥵥ɽ p a.consensus_iupac # ==> "ahgcr" # ˤĤƷ֤ a.each { |x| p x } # ==> # "atgca" # "aagca" # "acgca" # "acgcg" # ƥȤˤĤƷ֤ a.each_site { |x| p x } # ==> # ["a", "a", "a", "a"] # ["t", "a", "c", "c"] # ["g", "g", "g", "g"] # ["c", "c", "c", "c"] # ["a", "a", "a", "g"] # Clustal W Ѥƥ饤ȤԤ # 'clustalw' ޥɤƥ˥󥹥ȡ뤵Ƥɬפ롣 factory = Bio::ClustalW.new a2 = a.do_align(factory) == FASTA ˤƱԤBio::Fasta 饹 FASTA ե query.pep ФơʬΥޥ()뤤 󥿡ͥåȾΥ(⡼) FASTA ˤƱԤˡǤ ξ SSEARCH ʤɤƱͤ˻ȤȤǤޤ === ξ FASTA 󥹥ȡ뤵Ƥ뤳ȤǧƤʲǤϡ ޥ̾ fasta34 ǥѥ̤äǥ쥯ȥ˥󥹥ȡ ƤꤷƤޤ * (()) оݤȤ FASTA Υǡ١ե target.pep ȡFASTA 䤤碌󤬤Ĥäե query.pep ޤ Ǥϡ䤤碌󤴤Ȥ FASTA ¹ԤҥåȤ evalue 0.0001 ʲΤΤɽޤ #!/usr/bin/env ruby require 'bio' # FASTA ¹ԤĶ֥Ȥssearch ʤɤǤɤ factory = Bio::Fasta.local('fasta34', ARGV.pop) # եåȥեɤ߹ߡFastaFormat ֥ȤΥꥹȤˤ ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF) # ȥꤺĤ FastaFormat ֥ȤФ ff.each do |entry| # '>' ǻϤޤ륳ȹԤƤʹԾɸ२顼Ϥɽ $stderr.puts "Searching ... " + entry.definition # FASTA ˤƱ¹ԡ̤ Fasta::Report ֥ report = factory.query(entry) # ҥåȤΤ줾Ф report.each do |hit| # evalue 0.0001 ʲξ if hit.evalue < 0.0001 # evalue ȡ̾Сåΰɽ print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at " p hit.lap_at end end end factory Ϸ֤ FASTA ¹Ԥ뤿ˡ餫äƤ ¹ԴĶǤ 嵭ΥץȤ search.rb Ȥȡ䤤碌ȥǡ١ ե̾ˤơʲΤ褦˼¹Ԥޤ % ruby search.rb query.pep target.pep > search.out FASTA ޥɤ˥ץͿ硢ܤΰ FASTA ޥɥ饤󥪥ץ񤤤Ϥޤktup ͤ ᥽åɤȤäƻꤹ뤳ȤˤʤäƤޤ Ȥ ktup ͤ 1 ˤơȥå 10 ̰ΥҥåȤ ץϡʲΤ褦ˤʤޤ factory = Bio::Fasta.local('fasta34', 'target.pep', '-b 10') factory.ktup = 1 Bio::Fasta#query ᥽åɤʤɤ֤ͤ Bio::Fasta::Report ֥ Ǥ Report ֥Ȥ顢͡ʥ᥽åɤ FASTA νϷ̤ ۤƤͳ˼Ф褦ˤʤäƤޤȤСҥåȤ˴ؤ ʤɤμʾϡ report.each do |hit| puts hit.evalue # E-value puts hit.sw # Smith-Waterman (*) puts hit.identity # % identity puts hit.overlap # СåפƤΰĹ puts hit.query_id # 䤤碌 ID puts hit.query_def # 䤤碌Υ puts hit.query_len # 䤤碌Ĺ puts hit.query_seq # 䤤碌 puts hit.target_id # ҥåȤ ID puts hit.target_def # ҥåȤΥ puts hit.target_len # ҥåȤĹ puts hit.target_seq # ҥåȤ puts hit.query_start # Ʊΰ䤤碌ǤγϻĴ puts hit.query_end # Ʊΰ䤤碌ǤνλĴ puts hit.target_start # ƱΰΥåǤγϻĴ puts hit.target_end # ƱΰΥåǤνλĴ puts hit.lap_at # 嵭֤οͤ end ʤɤΥ᥽åɤǸƤӽФޤΥ᥽åɤ¿ϸ Bio::Blast::Report 饹ȶ̤ˤƤޤ嵭ʳΥ᥽åɤ FASTA ͭͤФ᥽åɤɬפʾϡBio::Fasta::Report 饹ΥɥȤ򻲾ȤƤ ⤷ѡμäƤʤ fasta ޥɤμ¹Է̤ɬפ ˤϡ report = factory.query(entry) puts factory.output Τ褦ˡquery ᥽åɤ¹Ԥ factory ֥Ȥ output ᥽åɤȤäƼФȤǤޤ === ⡼Ȥξ ΤȤ GenomeNet (fasta.genome.jp) ǤθΤߥݡȤƤޤ ⡼ȤξϻѲǽʸоݥǡ١ޤäƤޤ ˤĤƤ Bio::Fasta.remote Bio::Fasta.local Ʊ褦˻Ȥ ȤǤޤ GenomeNet ǻѲǽʸоݥǡ١ * ߥλǡ١ * nr-aa, genes, vgenes.pep, swissprot, swissprot-upd, pir, prf, pdbstr * ǡ١ * nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, htgs, dbsts, embl-nonst, embnonst-upd, genes-nt, genome, vgenes.nuc ޤ椫鸡ǡ١򤷤ޤ䤤碌μ ȸǡ١μˤäƥץϷޤޤ * 䤤碌󤬥ߥλΤȤ * оݥǡ١ߥλǡ١ξ硢program 'fasta' * оݥǡ١˻ǡ١ξ硢program 'tfasta' * 䤤碌󤬳˻ΤȤ * оݥǡ١˻ǡ١ξ硢program 'fasta' * (оݥǡ١ߥλǡ١ξϸǽ?) ץȥǡ١Ȥ߹礻ޤä program = 'fasta' database = 'genes' factory = Bio::Fasta.remote(program, database) Ȥƥեȥ꡼ꡢξƱ褦 factory.query ʤ Υ᥽åɤǸ¹Ԥޤ == BLAST ˤƱԤBio::Blast 饹 BLAST GenomeNet (blast.genome.jp) Ǥθ򥵥ݡȤ ޤǤ Bio::Fasta API ̤ˤƤޤΤǡ嵭 Bio::Blast Ƚ񤭴Ǥפʾ礬¿Ǥ ȤС f_search.rb # BLAST ¹ԤĶ֥Ȥ factory = Bio::Blast.local('blastp', ARGV.pop) ѹƱ褦˼¹ԤǤޤ ƱͤˡGenomeNet ѤBLASTԤˤ Bio::Blast.remote Ȥޤ ξ硢programλƤ FASTA Ȱۤʤޤ * 䤤碌󤬥ߥλΤȤ * оݥǡ١ߥλǡ١ξ硢program 'blastp' * оݥǡ١˻ǡ١ξ硢program 'tblastn' * 䤤碌󤬱ΤȤ * оݥǡ١ߥλǡ١ξ硢program 'blastx' * оݥǡ١ǡ١ξ硢program 'blastn' * (䤤碌ǡ١6ե졼Ԥ 'tblastx') 򤽤줾ꤷޤ ȤǡBLAST Ǥ "-m 7" ץˤ XML ϥեޥååȤ ˭٤ʤᡢBio::Blast Ruby Ѥ XML 饤֥Ǥ XMLParser ޤ REXML ѲǽʾϡXML ϤѤޤ ξѲǽʾ硢XMLParser Τۤ®ʤΤͥŪ˻Ѥޤ ʤRuby 1.8.0 ʹߤǤ REXML Ruby ΤɸźդƤޤ ⤷ XML 饤֥꤬󥹥ȡ뤵Ƥʤ "-m 8" Υֶڤ Ϸ򰷤褦ˤƤޤΥեޥåȤǤ ǡ¤Τǡ"-m 7" XML νϤȤȤ򤪴ᤷޤ Ǥ˸褦 Bio::Fasta::Report Bio::Blast::Report Hit ֥ ȤϤĤ̤Υ᥽åɤäƤޤBLAST ͭΥ᥽åɤɤ ʤΤˤ bit_score midline ʤɤޤ report.each do |hit| puts hit.bit_score # bit (*) puts hit.query_seq # 䤤碌 puts hit.midline # 饤Ȥ midline ʸ (*) puts hit.target_seq # ҥåȤ puts hit.evalue # E-value puts hit.identity # % identity puts hit.overlap # СåפƤΰĹ puts hit.query_id # 䤤碌 ID puts hit.query_def # 䤤碌Υ puts hit.query_len # 䤤碌Ĺ puts hit.target_id # ҥåȤ ID puts hit.target_def # ҥåȤΥ puts hit.target_len # ҥåȤĹ puts hit.query_start # Ʊΰ䤤碌ǤγϻĴ puts hit.query_end # Ʊΰ䤤碌ǤνλĴ puts hit.target_start # ƱΰΥåǤγϻĴ puts hit.target_end # ƱΰΥåǤνλĴ puts hit.lap_at # 嵭֤οͤ end FASTAȤAPI̲ΤȴؤΤᡢʤɤĤξ1ܤ Hsp (High-scoring segment pair) ͤHit֤褦ˤƤޤ Bio::Blast::Report ֥Ȥϡʲ˼褦ʡBLASTη̽Ϥ ǡ¤򤽤ΤޤȿǤŪʥǡ¤äƤޤŪˤ * Bio::Blast::Report ֥Ȥ @iteratinos * Bio::Blast::Report::Iteration ֥Ȥ Array äƤ Bio::Blast::Report::Iteration ֥Ȥ @hits * Bio::Blast::Report::Hits ֥Ȥ Array äƤ Bio::Blast::Report::Hits ֥Ȥ @hsps * Bio::Blast::Report::Hsp ֥Ȥ Array äƤ Ȥع¤ˤʤäƤꡢ줾줬ͤФΥ᥽åɤ äƤޤΥ᥽åɤξܺ٤䡢BLAST ¹Ԥ׾ʤɤͤ ɬפʾˤϡ bio/appl/blast/*.rb ΥɥȤƥȥɤ ȤƤ === ¸ BLAST ϥեѡ BLAST ¹Ԥ̥ե뤬Ǥ¸ƤäơϤ ˤϡBio::Blast ֥Ȥ餺ˡ Bio::Blast::Report ֥ ȤꤿȤȤˤʤޤˤ Bio::Blast.reports ᥽å ȤޤбƤΤ ǥեȽϥեޥå("-m 0") ޤ "-m 7" ץ XML եޥåȽϤǤ #!/usr/bin/env ruby require 'bio' # BLASTϤ˥ѡ Bio::Blast::Report ֥Ȥ֤ Bio::Blast.reports(ARGF) do |report| puts "Hits for " + report.query_def + " against " + report.db report.each do |hit| print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001 end end Τ褦ʥץ hits_under_0.001.rb 񤤤ơ % ./hits_under_0.001.rb *.xml ʤɤȼ¹ԤСͿ BLAST η̥ե *.xml ֤˽ ޤ Blast ΥС OS ʤɤˤäƽϤ XML ηۤʤǽ ꡢ XML ΥѡޤȤʤȤ褦Ǥξ Blast 2.2.5 ʹߤΥС򥤥󥹥ȡ뤹뤫 -D -m ʤɤΥץ Ȥ߹礻ѤƻƤߤƤ === ⡼ȸȤɲäˤ : ΥϾ桼ǤǽǤ SOAP ʤɤˤ ֥ӥѤ褤Ǥ礦 Blast NCBI Ϥ͡ʥȤǥӥƤޤΤȤ BioRuby Ǥ GenomeNet ʳˤбƤޤ󡣤ΥȤϡ * CGI ƤӽФʥޥɥ饤󥪥ץϤΥѤ˽ * -m 8 ʤ BioRuby ѡäƤϥեޥåȤ blast ϤФ ȤǤСquery äƸ̤ Bio::Blast::Report.new Ϥ褦ʥ᥽åɤǻȤ褦ˤʤޤŪˤϡ ᥽åɤexec_̾פΤ褦̾ Bio::Blast private ᥽å ȤϿȡܤΰˡ֥̾פꤷ factory = Bio::Blast.remote(program, db, option, '̾') Τ褦˸ƤӽФ褦ˤʤäƤޤ BioRuby ץȤ äƤ館мޤĺޤ == PubMed ưʸꥹȤ (Bio::PubMed 饹) ϡNCBI ʸǡ١ PubMed 򸡺ưʸꥹȤǤ #!/usr/bin/env ruby require 'bio' ARGV.each do |id| entry = Bio::PubMed.query(id) # PubMed 륯饹᥽å medline = Bio::MEDLINE.new(entry) # Bio::MEDLINE ֥ reference = medline.reference # Bio::Reference ֥ puts reference.bibtex # BibTeX եޥåȤǽ end ΥץȤ pmfetch.rb ʤɹ̾¸ % ./pmfetch.rb 11024183 10592278 10592173 ʤɰѤʸ PubMed ID (PMID) ¤٤ NCBI ˥ MEDLINE եޥåȤѡ BibTeX եޥåȤѴƽϤ ϤǤ ¾ˡɤǸ뵡ǽ⤢ޤ #!/usr/bin/env ruby require 'bio' # ޥɥ饤ͿɤΥꥹȤ򣱤Ĥʸˤ keywords = ARGV.join(' ') # PubMed 򥭡ɤǸ entries = Bio::PubMed.search(keywords) entries.each do |entry| medline = Bio::MEDLINE.new(entry) # Bio::MEDLINE ֥ reference = medline.reference # Bio::Reference ֥ puts reference.bibtex # BibTeX եޥåȤǽ end ΥץȤ pmsearch.rb ʤɹ̾¸ % ./pmsearch.rb genome bioinformatics ʤɸɤ¤٤Ƽ¹ԤȡPubMed 򥭡 ƥҥåȤʸΥꥹȤ BibTeX եޥåȤǽϤޤ ǶǤϡNCBI E-Utils Ȥ֥ץꥱȤȤ 侩ƤΤǡ Bio::PubMed.esearch ᥽åɤ Bio::PubMed.efetch ᥽åɤȤɤǤ礦 #!/usr/bin/env ruby require 'bio' keywords = ARGV.join(' ') options = { 'maxdate' => '2003/05/31', 'retmax' => 1000, } entries = Bio::PubMed.esearch(keywords, options) Bio::PubMed.efetch(entries).each do |entry| medline = Bio::MEDLINE.new(entry) reference = medline.reference puts reference.bibtex end ΥץȤǤϡ嵭 pmsearch.rb ȤۤƱ褦ưޤˡ NCBI E-Utils Ѥ뤳Ȥˤꡢоݤդҥåȷʤɤ Ǥ褦ˤʤäƤΤǡⵡǽǤץͿ ˤĤƤ (()) 򻲾ȤƤ ʤߤˡǤ bibtex ᥽åɤ BibTeX եޥåȤѴƤޤ ҤΤ褦 bibitem ᥽åɤȤ¾ʶĴ䥤åʤ ʸνϤǤޤ󤬡nature ᥽åɤ nar ʤɡĤλ եޥåȤˤбƤޤ === BibTeX λȤΥ 嵭ǽ᤿ BibTeX եޥåȤΥꥹȤ TeX ǻȤˡñˤ ȤƤޤѤʸ % ./pmfetch.rb 10592173 >> genoinfo.bib % ./pmsearch.rb genome bioinformatics >> genoinfo.bib ʤɤȤ genoinfo.bib ե˽¸Ƥ \documentclass{jarticle} \begin{document} \bibliographystyle{plain} ۤˤ KEGG ǡ١~\cite{PMID:10592173}ϤդۤǤ롣 \bibliography{genoinfo} \end{document} Ȥե hoge.tex 񤤤ơ % platex hoge % bibtex hoge # genoinfo.bib ν % platex hoge # ʸꥹȤκ % platex hoge # ʸֹ Ȥ̵ hoge.dvi Ǥޤ === bibitem λȤΥ ʸѤ̤ .bib եꤿʤ Reference#bibitem ᥽ ɤνϤȤޤ嵭 pmfetch.rb pmsearch.rb puts reference.bibtex ιԤ puts reference.bibitem ˽񤭴ʤɤơϷ̤ \documentclass{jarticle} \begin{document} ۤˤ KEGG ǡ١~\cite{PMID:10592173}ϤդۤǤ롣 \begin{thebibliography}{00} \bibitem{PMID:10592173} Kanehisa, M., Goto, S. KEGG: kyoto encyclopedia of genes and genomes., {\em Nucleic Acids Res}, 28(1):27--30, 2000. \end{thebibliography} \end{document} Τ褦 \begin{thebibliography} ǰϤߤޤ hoge.tex Ȥ % platex hoge # ʸꥹȤκ % platex hoge # ʸֹ ȣФǤǤ = OBDA OBDA (Open Bio Database Access) ȤϡOpen Bioinformatics Foundation ˤäꤵ줿ǡ١ؤζ̥ˡǤϡ 2002 ǯ12 Arizona Cape Town ˤƳŤ줿 BioHackathon ˤơBioPerl, BioJava, BioPython, BioRuby ʤɤγƥץȤ Сäƺޤ * BioRegistry (Directory) * ǡ١ɤˤɤΤ褦˼˹ԤꤹȤ * BioFlat * եåȥե 2 ʬڤޤ BDB Ȥäǥå * BioFetch * HTTP ͳǥǡ١饨ȥ륵Фȥ饤 * BioSQL * MySQL PostgreSQL ʤɤδطǡ١ǡǼ schema ȡȥФΥ᥽å ܺ٤ (()) 򻲾ȤƤ 줾λͽ cvs.open-bio.org CVSݥȥ֤Ƥޤ ޤϡ(()) 黲ȤǤޤ == BioRegistry BioRegistryȤϡեˤäƳƥǡ١Υȥˡ ꤹ뤳ȤˤꡢɤˡȤäƤ뤫ۤȤɰռǡ 뤳ȤǽȤ뤿λȤߤǤ ե̤ͥ * (᥽åɤΥѥ᡼)ꤷե * ~/.bioinformatics/seqdatabase.ini * /etc/bioinformatics/seqdatabase.ini * http://www.open-bio.org/registry/seqdatabase.ini Ǹ open-bio.org ϡե뤬Ĥʤ Ȥޤ BioRuby θߤμǤϡ٤ƤΥեɤ߹ߡ Ʊ꤬̾ʣ¸ߤϡǽ˸ĤäѤޤ ѤȡȤСƥԤ /etc/bioinformatics/ ֤ ΤĿŪѹΤ ~/.bioinformatics/ Ǿ񤭤뤳Ȥ Ǥޤץ seqdatabase.ini ե뤬 bioruby Υ˴ޤޤ ޤΤǻȤƤ եȤ stanza եޥåȤȸƤФ񼰤ǵҤޤ [ǡ١̾] protocol=ץȥ̾ location=̾ Τ褦ʥȥƥǡ١ˤĤƵҤ뤳Ȥˤʤޤ ǡ١̾ϡʬѤ뤿Υ٥ʤΤʬ䤹Τ ĤɤºݤΥǡ١̾ȰۤʤäƤƤ⹽ʤ褦Ǥ Ʊ̾Υǡ١ʣȤϺǽ˽񤫤ƤΤ ³褦˻ͽǤƤƤޤΤȤ BioRuby Ǥ ˤбƤޤ ޤץȥμˤäƤ location ʳˤMySQL Υ桼̾ʤɡ ɲäΥץ򵭽ҤɬפޤߤΤȤͽǵꤵ Ƥ protocol ȤƤϰʲΤΤޤ * index-flat * index-berkeleydb * biofetch * biosql * bsane-corba * xembl ΤȤ BioRuby ǻѲǽʤΤ index-flat, index-berkleydb, biofetch biosql ǤޤBioRegistryƥץȥλͤѹ뤳 ޤBioRubyϤɽǤƤʤ⤷ޤ BioRegistry Ȥˤϡޤ Bio::Registry֥Ȥޤ ȡե뤬ɤ߹ޤޤ reg = Bio::Registry.new # ե˽񤤤ǡ١̾ǥФ³ serv = reg.get_database('genbank') # ID ꤷƥȥ entry = serv.get_by_id('AA2CG') serv ե [genbank] ǻꤷ protocol ץȥ б륵Х֥ȤǡBio::SQL Bio::Fetch ʤɤΥ󥹥 󥹤֤äƤϤǤʥǡ١̾Ĥʤä nilˡ Ȥ OBDA ̤Υȥ᥽å get_by_id ƤꡢХ ֥˸ͭΥ᥽åɤƤ֤ȤˤʤޤΤǡʲ BioFetch BioSQL β򻲾ȤƤ == BioFlat BioFlat ϥեåȥեФƥǥåȥ® ФȤߤǤǥåμϡRUbyγĥ饤֥˰¸ʤ index-flat Berkeley DB (bdb) Ȥä index-berkeleydb 2ब¸ ޤʤindex-berkeleydb ѤˤϡBDB Ȥ Ruby γĥ 饤֥ӥ󥹥ȡ뤹ɬפޤǥåκˤ bioruby ѥå° br_bioflat.rb ޥɤȤäơ % br_bioflat.rb --makeindex ǡ١̾ [--format 饹̾] ե̾ Τ褦ˤޤBioRubyϥǡեޥåȤμưǧǽܤƤ Τ --format ץϾάǽǤ줦ޤǧʤä BioRuby γƥǡ١Υ饹̾ꤷƤϡ % bioflat ǡ١̾ ȥID ȤޤŪ GenBank gbbct*.seq ե˥ǥå Ƹ硢 % bioflat --makeindex my_bctdb --format GenBank gbbct*.seq % bioflat my_bctdb A16STM262 Τ褦ʴˤʤޤ Ruby bdb ĥ⥸塼(ܺ٤ http://raa.ruby-lang.org/project/bdb/ ) 󥹥ȡ뤵Ƥ Berkeley DB Ѥƥǥå ȤǤޤξ硢 % bioflat --makeindex-bdb ǡ١̾ [--format 饹̾] ե̾ Τ褦 "--makeindex" Τ "--makeindex-bdb" ꤷޤ == BioFetch BioFetch CGI ͳƥФǡ١Υȥ ǡФ CGI Υץ̾顼ɤʤɤƤ ޤ饤Ȥ HTTP Ȥäƥǡ١IDեޥåȤʤɤ ꤷȥޤ BioRuby ץȤǤ GenomeNet DBGET ƥХåɤȤ BioFetch ФƤꡢbioruby.org DZѤƤޤΥФ ɤ BioRuby sample/ ǥ쥯ȥäƤޤߤΤȤ BioFetch ФϤ bioruby.org ΤΤ EBI 󤫽ꤷޤ BioFetch ȤäƥȥˤϡĤˡޤ (1) ֥֥饦鸡ˡʰʲΥڡ򳫤 http://bioruby.org/cgi-bin/biofetch.rb (2) BioRuby° br_biofetch.rb ޥɤѤˡ % br_biofetch.rb db_name entry_id (3) ץȤ椫 Bio::Fetch 饹ľܻȤˡ serv = Bio::Fetch.new(server_url) entry = serv.fetch(db_name, entry_id) (4) ץȤ BioRegistry ͳ Bio::Fetch 饹Ū˻Ȥˡ reg = Bio::Registry.new serv = reg.get_database('genbank') entry = serv.get_by_id('AA2CG') ⤷ (4) Ȥ seqdatabase.ini [genbank] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb biodbname=genbank ʤɤȻꤷƤɬפޤ === BioFetch Bio::KEGG::GENES, Bio::AAindex1 Ȥ߹碌 ΥץϡBioFetch Ȥä KEGG GENES ǡ١źٶ Halobacterium ΥХƥꥢɥץ (VNG1467G) äƤơƱ 褦˥ߥλɸǡ١Ǥ AAindex إå ɸ (BURA740101) Ȥäơ 15 ĴΥɥ򤹤Ǥ #!/usr/bin/env ruby require 'bio' entry = Bio::Fetch.query('hal', 'VNG1467G') aaseq = Bio::KEGG::GENES.new(entry).aaseq entry = Bio::Fetch.query('aax1', 'BURA740101') helix = Bio::AAindex1.new(entry).index position = 1 win_size = 15 aaseq.window_search(win_size) do |subseq| score = subseq.total(helix) puts [ position, score ].join("\t") position += 1 end ǻȤäƤ륯饹᥽å Bio::Fetch.query ϰۤ bioruby.org BioFetch ФȤѤΥ硼ȥåȤǤʤΥФŪˤ ΥͥåȤǡƤޤKEGG/GENES ǡ١ hal AAindex ǡ١ aax1 Υȥϡ¾ BioFetch ФǤϼǤ ʤȤ⤢äơ query ᥽åɤȤäƤޤ == BioSQL to be written... == BioRuby ΥץץλȤ BioRuby Υѥåˤ samples/ ǥ쥯ȥʲˤĤΥץ बޤޤƤޤŤΤ⺮äƤޤ̤ȤƤ⽽ʬȤ ʤΤǡŪ򤤥ץ󶡤ϴޤǤ to be written... == ʤ ¾Υ塼ȥꥢŪʥɥȤȤƤϡBioRuby Wiki֤Ƥ BioRuby in Anger ޤ == * (1) BioRuby 1.2.1 ΥСǤϡsetup.rb Τ install.rb ѤޤޤʲΤ褦3ʳƧɬפޤ % ruby install.rb config % ruby install.rb setup # ruby install.rb install * (2) BioRuby 1.0.0 ΥСǤϡgetseq, getent, getobj γƥޥɤΤˡseq, ent, obj γƥޥɤѤƤ * (3) BioRuby 0.7.1 ΥСǤϡBio::Sequence::NA 饹 Bio::sequence::AA 饹Τɤ餫Υ֥Ȥˤʤޤ 󤬤ɤΥ饹°뤫 Ruby class ᥽åɤѤ bioruby> p cdc2.class Bio::Sequence::AA bioruby> p psaB.class Bio::Sequence::NA Τ褦Ĵ٤뤳ȤǤޤưȽְ꤬äƤʤɤˤ to_naseq, to_aaseq ᥽åɤǶŪѴǤޤ * (4) seq ᥽åɤϡɤ߹ǡμˤäƤϡ𡦥ߥλ ɤˤƤϤޤʤΤ Bio::Sequence::Generic 饹 String 饹Υ֥Ȥ֤礬뤫⤷ޤ * (5) NCBI, EBI, TogoWS ̵̤ getseq, getent, getobj ޥ ѲǽȤʤäΤ BioRuby 1.3.0 ʹߤǤ =end bio-1.4.3.0001/doc/Changes-1.3.rdoc0000644000004100000410000002272212200110570016155 0ustar www-datawww-data= Incompatible and important changes since the BioRuby 1.2.1 release A lot of changes have been made to the BioRuby after the version 1.2.1 is released. == New features === Support for sequence output with improvements of Bio::Sequence The outputting of EMBL and GenBank formatted text are now supported in the Bio::Sequence class. See the document of Bio::Sequence#output for details. You can also create Bio::Sequence objects from many kinds of data such as Bio::GenBank, Bio::EMBL, and Bio::FastaFormat by using the to_biosequence method. === BioSQL support BioSQL support is completely rewritten by using ActiveRecord. === Bio::Blast Bio::Blast#reports can parse NCBI default (-m 0) format and tabular (-m 8) format, in addition to XML (-m 7) format. Bio::Blast::Report now supports XML format with multiple query sequences generated by blastall 2.2.14 or later. Bio::Blast.remote supports DDBJ, in addition to GenomeNet. In addition, a list of available blast databases on remote sites can be obtained by using Bio::Blast::Remote::DDBJ.databases and Bio::Blast::Remote::GenomeNet.databases methods. Note that the above remote blast methods may be changed in the future to support NCBI. Bio::Blast::RPSBlast::Report is newly added, a parser for NCBI RPS Blast (Reversed Position Specific Blast) default (-m 0 option) results. === Bio::GFF::GFF2 and Bio::GFF::GFF3 The outputting of GFF2/GFF3-formatted text is now supported. However, many incompatible changes have been made (See below for details). === Bio::Hinv H-Invitational Database web service (REST) client class is newly added. === Bio::NCBI::REST NCBI E-Utilities client class is newly added. === Bio::PAML::Codeml and Bio::PAML::Codeml::Report Bio::PAML::Codeml, wrapper for PAML codeml program, and Bio::PAML::Codeml::Report, parser for codeml result are newly added, though some of them are still under construction and too specific to particular use cases. === Bio::Locations New method Bio::Locations#to_s is added to support output of features. === Bio::TogoWS::REST TogoWS REST client class is newly added. Information about TogoWS REST service can be found on http://togows.dbcls.jp/site/en/rest.html. == Deprecated classes === Bio::Features Bio::Features is obsoleted and changed to an array of Bio::Feature object with some backward compatibility methods. The backward compatibility methods will soon be removed in the future. === Bio::References Bio::References is obsoleted and changed to an array of Bio::Reference object with some backward compatibility methods. The backward compatibility methods will soon be removed in the future. == Incompatible changes === Bio::BIORUBY_VERSION Definition of the constant Bio::BIORUBY_VERSION is moved from lib/bio.rb to lib/bio/version.rb. Normally, the autoload mechanism of Ruby correctly loads the version.rb, but special scripts directly using bio.rb may be needed to be changed. Bio::BIORUBY_VERSION is changed to be frozen. New constants Bio::BIORUBY_EXTRA_VERSION and Bio::BIORUBY_VERSION_ID are added. See their RDoc for details. === Bio::Sequence Bio::Sequence#date is removed. Alternatively, date_created or date_modified can be used. Bio::Sequence#taxonomy is changed to be an alias of classification, and the data type is changed to an array of string. === Bio::Locations and Bio::Location A carat in a location (e.g. "123^124") is now parsed, instead of being replaced by "..". To distinguish from normal "..", a new attribute Bio::Location#carat is used. "order(...)" or "group(...)" are also parsed, instead of being regarded as "join(...)". To distinguish from "join(...)", a new attribute Bio::Locations#operator is used. When "order(...)" or "group(...)", the attribute is set to :order or :group, respectively. Note that "group(...)" is already deprecated in EMBL/GenBank/DDBJ. === Bio::Blast Return value of Bio::Blast#exec_* is changed to String instead of Report object. Parsing the string is now processed in Bio::Blast#query method. Bio::Blast#exec_genomenet_tab and Bio::Blast#server="genomenet_tab" is deprecated. Bio::Blast#options=() can now change the following attributes: program, db, format, matrix, and filter. Bio::Blast.reports now supports default (-m 0) and tabular (-m 8) formats. Old implementation (only supports XML) is renamed to Bio::Blast.reports_xml, to keep compatibility for older BLAST XML documents which might not be parsed by the new Bio::Blast.reports nor Bio::FlatFile, although we are not sure whether such documents really exist or not. === Bio::Blast::Default::Report and Bio::Blast::WU::Report Iteration#lambda, #kappa, #entropy, #gapped_lambda, #gapped_kappa, and #gapped_entropy, and the same methods in the Report class are changed to return float or nil instead of string or nil. === Bio::Blat When reading BLAT psl (or pslx) data by using Bio::FlatFile, it checks each query name and returns a new entry object when the query name is changed from previous queries. This is, data is stored to two or more Bio::Blat::Report objects, instead of previous version's behavior (always reads all data at once and stores to a Bio::Blat::Report object). === Bio::GFF, Bio::GFF::GFF2 and Bio::GFF::GFF3 Bio::GFF::Record#comments is renamed to #comment, and #comments= is renamed to #comment=, because they only allow a single String (or nil) and the plural form "comments" may be confusable. The "comments" and "comments=" methods can still be used, but warning messages will be shown when using in GFF2::Record and GFF3::Record objects. See below about GFF2 and/or GFF3 specific changes. === Bio::GFF::GFF2 and Bio::GFF::GFF3 Bio::GFF::GFF2::Record.new and Bio::GFF::GFF3::Record.new can also get 9 arguments corresponding to GFF columns, which helps to create Record object directly without formatted text. Bio::GFF::GFF2::Record#start, #end, and #frame return Integer or nil, and #score returns Float or nil, instead of String or nil. The same changes are also made to Bio::GFF::GFF3::Record. Bio::GFF::GFF2::Record#attributes and Bio::GFF::GFF3::Record#attributes are changed to return a nested Array, containing [ tag, value ] pairs, because of supporting multiple tags in the same tag names. If you want to get a Hash, use Record#attributes_to_hash method, though some tag-value pairs in the same tag names may be lost. Note that Bio::GFF::Record#attribute still returns a Hash for compatibility. New methods for getting, setting and manipulating attributes are added to Bio::GFF::GFF2::Record and Bio::GFF::GFF3::Record classes: attribute, get_attribute, get_attributes, set_attribute, replace_attributes, add_attribute, delete_attribute, delete_attributes, sort_attributes_by_tag!. It is recommended to use these methods instead of directly manipulating the array returned by Record#attributes. Bio::GFF::GFF2#to_s, Bio::GFF::GFF3#to_s, Bio::GFF::GFF2::Record#to_s, and Bio::GFF::GFF3::Record#to_s are added to support output of GFF2/GFF3 data. === Bio::GFF::GFF2 GFF2 attribute values are now automatically unescaped. In addition, if a value of an attribute is consisted of two or more tokens delimited by spaces, an object of the new class Bio::GFF::GFF2::Record::Value is returned instead of String. The new class Bio::GFF::GFF2::Record::Value aims to store a parsed value of an attribute. If you really want to get unparsed string, Bio::GFF::GFF2::Record::Value#to_s can be used. The metadata (lines beginning with "##") are parsed to Bio::GFF::GFF2::MetaData objects and are stored to Bio::GFF::GFF2#metadata as an array, except the "##gff-version" line. The "##gff-version" version string is stored to the Bio::GFF::GFF2#gff_version as a string. === Bio::GFF::GFF3 Aliases of columns which are renamed in the GFF3 specification are added to the Bio::GFF::GFF3::Record class: seqid (column 1; alias of "seqname"), feature_type (column 3; alias of "feature"; in the GFF3 spec, it is called "type", but because "type" is already used by Ruby, we use "feature_type"), phase (column 8; formerly "frame"). Original names can still be used because they are only aliases. Sequences bundled within GFF3 after "##FASTA" are now supported (Bio::GFF::GFF3#sequences). GFF3 attribute keys and values are automatically unescaped. Each attribute value is stored as a string, except for special attributes listed below: * Bio::GFF::GFF3::Record::Target to store a "Target" attribute. * Bio::GFF::GFF3::Record::Gap to store a "Gap" attribute. The metadata (lines beginning with "##") are parsed to Bio::GFF::GFF3::MetaData objects and stored to Bio::GFF::GFF3#metadata as an array, except "##gff-version", "##sequence-region", "###", and "##FASTA" lines. * "##gff-version" version string is stored to Bio::GFF::GFF3#gff_version. * "##sequence-region" lines are parsed to Bio::GFF::GFF3::SequenceRegion objects and stored to Bio::GFF::GFF3#sequence_regions as an array. * "###" lines are parsed to Bio::GFF::GFF3::RecordBoundary objects. * "##FASTA" is regarded as the beginning of bundled sequences. === Bio::Pathway Bio::Pathway#cliquishness is changed to calculate cliquishness (clustering coefficient) for not only undirected graphs but also directed graphs. In Bio::Pathway#to_matrix, dump_matrix, dump_list, and depth_first_search methods, to avoid dependency to the order of objects in Hash#each (and each_keys etc.), Bio::Pathway#index is used to specify preferences of nodes in a graph. === Bio::SQL and BioSQL related classes BioSQL support is completely rewritten by using ActiveRecord. See documents in lib/bio/io/sql.rb, lib/bio/io/biosql, and lib/bio/db/biosql for details of changes and usage of the classes/modules. bio-1.4.3.0001/doc/KEGG_API.rd0000644000004100000410000016101712200110570015173 0ustar www-datawww-data=begin $Id: KEGG_API.rd,v 1.5 2006/12/27 13:40:45 k Exp $ Copyright (C) 2003-2006 Toshiaki Katayama = KEGG API KEGG API is a web service to use the KEGG system from your program via SOAP/WSDL. We have been making the (()) system available at (()). KEGG is a suite of databases including GENES, SSDB, PATHWAY, LIGAND, LinkDB, etc. for genome research and related research areas in molecular and cellular biology. These databases and associated computation services are available via WWW and the user interfaces are built on web browsers. Thus, the interfaces are designed to be accessed by humans, not by machines, which means that it is troublesome for the researchers who want to use KEGG in an automated manner. Besides, from the database developer's side, it is impossible to prepare all the CGI programs that satisfy a variety of users' needs. In recent years, the Internet technology for application-to-application communication referred to as the (()) is improving at a rapid rate. For exmaple, Google, a popular Internet search engine, provides the web service called the (()). The service enables users to develop software that accesses and manipulates a massive amount of web documents that are constantly refreshed. In the field of genome research, a similar kind of web service called (()) (distributed annotation system) has been used on several web sites, including (()), (()), (()), (()), (()). With the background and the trends noted above, we have started developing a new web service called KEGG API using (()) and (()). The service has been tested with (()) (Ruby 1.8.2 or Ruby 1.6.8 with (()) version 1.4.8.1) and (()) ((()) version 0.55) languages. Although the service has not been tested with clients written in other languages, it should work if the language can treat SOAP/WSDL. The (()) project prepared a Ruby library to handle the KEGG API, so users of the Ruby language should check out the latest release of the BioRuby distribution. For the general information on KEGG API, see the following page at GenomeNet: * (()) == Table of contents * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()), (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) * (()) == Introduction This guide explains how to use the KEGG API in your programs for searching and retrieving data from the KEGG database. == KEGG API Quick Start As always, the best way to become familar with it is by looking at an example. In this document, sample codes written in several languages are shown. After understanding the first exsample, try other APIs. Firstly, you have to install the SOAP related libraries for the programming language of your choice. === Quick Start with Perl In the case of Perl, you need to install the following packages: * (()) (tested with 0.60) * Note: SOAP Lite > 0.60 is reported to have errors in some methods for now. * (()) * (()) * (()) Here's a first example in Perl language. #!/usr/bin/env perl use SOAP::Lite; $wsdl = 'http://soap.genome.jp/KEGG.wsdl'; $serv = SOAP::Lite->service($wsdl); $offset = 1; $limit = 5; $top5 = $serv->get_best_neighbors_by_gene('eco:b0002', $offset, $limit); foreach $hit (@{$top5}) { print "$hit->{genes_id1}\t$hit->{genes_id2}\t$hit->{sw_score}\n"; } The output will be eco:b0002 eco:b0002 5283 eco:b0002 ecj:JW0001 5283 eco:b0002 sfx:S0002 5271 eco:b0002 sfl:SF0002 5271 eco:b0002 ecc:c0003 5269 showing that eco:b0002 has Smith-Waterman score 5271 with sfl:SF0002 as a 4th hit among the entire KEGG/GENES database (here, "eco" means E. coli K-12 MG1655 and "sfl" means Shigella flexneri 2457T in the KEGG organism codes). The method internally searches the KEGG/SSDB (Sequence Similarity Database) database which contains information about the amino acid sequence similarities among all protein coding genes in the complete genomes, together with information about best hits and bidirectional best hits (best-best hits). The relation of gene x in genome A and gene y in genome B is called bidirectional best hits, when x is the best hit of query y against all genes in A and vice versa, and it is often used as an operational definition of ortholog. Next example simply lists PATHWAYs for E. coli ("eco") in KEGG database. #!/usr/bin/env perl use SOAP::Lite; $wsdl = 'http://soap.genome.jp/KEGG.wsdl'; $results = SOAP::Lite -> service($wsdl) -> list_pathways("eco"); foreach $path (@{$results}) { print "$path->{entry_id}\t$path->{definition}\n"; } This example colors the boxes corresponding to the E. coli genes b1002 and b2388 on a Glycolysis pathway of E. coli (path:eco00010). #!/usr/bin/env perl use SOAP::Lite; $wsdl = 'http://soap.genome.jp/KEGG.wsdl'; $serv = SOAP::Lite -> service($wsdl); $genes = SOAP::Data->type(array => ["eco:b1002", "eco:b2388"]); $result = $serv -> mark_pathway_by_objects("path:eco00010", $genes); print $result; # URL of the generated image === Perl FAQ If you use the KEGG API methods which requires arguments in ArrayOfstring datatype, you must need following modifications depending on the version of SOAP::Lite. ==== SOAP::Lite version <= 0.60 As you see in the above example, you always need to convert a Perl's array into a SOAP object expicitly in SOAP::Lite by SOAP::Data->type(array => [value1, value2, .. ]) when you pass an array as the argument for any KEGG API method. ==== SOAP::Lite version > 0.60 You should use version >= 0.69 as the versions between 0.61-0.68 contain bugs. You need to add following code to your program to pass the array of string and/or int data to the SOAP server. sub SOAP::Serializer::as_ArrayOfstring{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } sub SOAP::Serializer::as_ArrayOfint{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } By adding the above, you can write $genes = ["eco:b1002", "eco:b2388"]; instead of the following (writing as follows is also permitted). $genes = SOAP::Data->type(array => ["eco:b1002", "eco:b2388"]); ==== Sample program You can test with the following script for the SOAP::Lite v0.69. If it works, a URL of the generated image will be returned. #!/usr/bin/env perl use SOAP::Lite +trace => [qw(debug)]; print "SOAP::Lite = ", $SOAP::Lite::VERSION, "\n"; my $serv = SOAP::Lite -> service("http://soap.genome.jp/KEGG.wsdl"); my $genes = ["eco:b1002", "eco:b2388"]; my $result = $serv->mark_pathway_by_objects("path:eco00010", $genes); print $result, "\n"; # sub routines implicitly used in the above code sub SOAP::Serializer::as_ArrayOfstring{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } sub SOAP::Serializer::as_ArrayOfint{ my ($self, $value, $name, $type, $attr) = @_; return [$name, {'xsi:type' => 'array', %$attr}, $value]; } === Quick Start with Ruby If you are using Ruby 1.8.1 or later, you are ready to use KEGG API as Ruby already supports SOAP in its standard library. If your Ruby is 1.6.8 or older, you need to install followings: * (()) 1.5.1 or later * One of the following XML processing library * (()) * (()) * (()) * (()) * (()) * (()) * (()) Here's a sample code for Ruby having the same functionality with Perl's first example shown above. #!/usr/bin/env ruby require 'soap/wsdlDriver' wsdl = "http://soap.genome.jp/KEGG.wsdl" serv = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver serv.generate_explicit_type = true # if uncommented, you can see transactions for debug #serv.wiredump_dev = STDERR offset = 1 limit = 5 top5 = serv.get_best_neighbors_by_gene('eco:b0002', offset, limit) top5.each do |hit| print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" end You may need to iterate to obtain all the results by increasing offset and/or limit. #!/usr/bin/env ruby require 'soap/wsdlDriver' wsdl = "http://soap.genome.jp/KEGG.wsdl" serv = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver serv.generate_explicit_type = true offset = 1 limit = 100 loop do results = serv.get_best_neighbors_by_gene('eco:b0002', offset, limit) break unless results results.each do |hit| print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" end offset += limit end It is automatically done by using (()) library, which implements get_all_* methods for this. BioRuby also provides filtering functionality for selecting needed fields from the complex data type. #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new results = serv.get_all_best_neighbors_by_gene('eco:b0002') results.each do |hit| print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" end # Same as above but using filter to select fields fields = [:genes_id1, :genes_id2, :sw_score] results.each do |hit| puts hit.filter(fields).join("\t") end # Different filters to pick additional fields for each amino acid sequence fields1 = [:genes_id1, :start_position1, :end_position1, :best_flag_1to2] fields2 = [:genes_id2, :start_position2, :end_position2, :best_flag_2to1] results.each do |hit| print "> score: ", hit.sw_score, ", identity: ", hit.identity, "\n" print "1:\t", hit.filter(fields1).join("\t"), "\n" print "2:\t", hit.filter(fields2).join("\t"), "\n" end The equivalent for the Perl's second example described above will be #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new list = serv.list_pathways("eco") list.each do |path| print path.entry_id, "\t", path.definition, "\n" end and equivalent for the last example is as follows. #!/usr/bin/env ruby require 'bio' serv = Bio::KEGG::API.new genes = ["eco:b1002", "eco:b2388"] result = serv.mark_pathway_by_objects("path:eco00010", genes) print result # URL of the generated image === Quick Start with Python In the case of Python, you have to install * (()) plus some extra packages required for SOAPpy ( (()), (()) etc.). Here's a sample code using KEGG API with Python. #!/usr/bin/env python from SOAPpy import WSDL wsdl = 'http://soap.genome.jp/KEGG.wsdl' serv = WSDL.Proxy(wsdl) results = serv.get_genes_by_pathway('path:eco00020') print results === Quick Start with Java In the case of Java, you need to obtain Apache Axis library version axis-1_2alpha or newer (axis-1_1 doesn't work properly for KEGG API) * (()) and put required jar files in an appropriate directory. For the binary distribution of the Apache axis-1_2alpha release, copy the jar files stored under the axis-1_2alpha/lib/ to the directory of your choice. % cp axis-1_2alpha/lib/*.jar /path/to/lib/ You can use WSDL2Java coming with Apache Axis to generate classes needed for the KEGG API automatically. To generate classes and documents for the KEGG API, download the script (()) and follow the steps below: % java -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/commons-logging.jar:/path/to/lib/commons-discovery.jar:/path/to/lib/saaj.jar:/path/to/lib/wsdl4j.jar:. org.apache.axis.wsdl.WSDL2Java -p keggapi http://soap.genome.jp/KEGG.wsdl % perl -i axisfix.pl keggapi/KEGGBindingStub.java % javac -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/wsdl4j.jar:. keggapi/KEGGLocator.java % jar cvf keggapi.jar keggapi/* % javadoc -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar -d keggapi_javadoc keggapi/*.java This program will do the same job as the Python's example (extended to accept a pathway_id as the argument). import keggapi.*; class GetGenesByPathway { public static void main(String[] args) throws Exception { KEGGLocator locator = new KEGGLocator(); KEGGPortType serv = locator.getKEGGPort(); String query = args[0]; String[] results = serv.get_genes_by_pathway(query); for (int i = 0; i < results.length; i++) { System.out.println(results[i]); } } } This is another example which uses ArrayOfSSDBRelation data type. import keggapi.*; class GetBestNeighborsByGene { public static void main(String[] args) throws Exception { KEGGLocator locator = new KEGGLocator(); KEGGPortType serv = locator.getKEGGPort(); String query = args[0]; SSDBRelation[] results = null; results = serv.get_best_neighbors_by_gene(query, 1, 50); for (int i = 0; i < results.length; i++) { String gene1 = results[i].getGenes_id1(); String gene2 = results[i].getGenes_id2(); int score = results[i].getSw_score(); System.out.println(gene1 + "\t" + gene2 + "\t" + score); } } } Compile and execute this program (don't forget to include keggapi.jar file in your classpath) as follows: % javac -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/wsdl4j.jar:/path/to/keggapi.jar GetBestNeighborsByGene.java % java -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/commons-logging.jar:/path/to/lib/commons-discovery.jar:/path/to/lib/saaj.jar:/path/to/lib/wsdl4j.jar:/path/to/keggapi.jar:. GetBestNeighborsByGene eco:b0002 You may wish to set the CLASSPATH environmental variable. bash/zsh: % for i in /path/to/lib/*.jar do CLASSPATH="${CLASSPATH}:${i}" done % export CLASSPATH tcsh: % foreach i ( /path/to/lib/*.jar ) setenv CLASSPATH ${CLASSPATH}:${i} end For the other cases, consult the javadoc pages generated by WSDL2Java. * (()) == KEGG API Reference === WSDL file Users can use a WSDL file to create a SOAP client driver. The WSDL file for the KEGG API can be found at: * (()) === Terminology * 'org' is a three-letter (or four-letter) organism code used in KEGG. The list can be found at (see the description of the list_organisms method below): * (()) * 'db' is a database name used in GenomeNet service. See the description of the list_databases method below. * 'entry_id' is a unique identifier of which format is the combination of the database name and the identifier of an entry joined by a colon sign as 'database:entry' (e.g. 'embl:J00231' means an EMBL entry 'J00231'). 'entry_id' includes 'genes_id', 'enzyme_id', 'compound_id', 'drug_id', 'glycan_id', 'reaction_id', 'pathway_id' and 'motif_id' described in below. * 'genes_id' is a gene identifier used in KEGG/GENES which consists of 'keggorg' and a gene name (e.g. 'eco:b0001' means an E. coli gene 'b0001'). * 'enzyme_id' is an enzyme identifier consisting of database name 'ec' and an enzyme code used in KEGG/LIGAND ENZYME database. (e.g. 'ec:1.1.1.1' means an alcohol dehydrogenase enzyme) * 'compound_id' is a compound identifier consisting of database name 'cpd' and a compound number used in KEGG COMPOUND / LIGAND database (e.g. 'cpd:C00158' means a citric acid). Note that some compounds also have 'glycan_id' and both IDs are accepted and converted internally by the corresponding methods. * 'drug_id' is a drug identifier consisting of database name 'dr' and a compound number used in KEGG DRUG / LIGAND database (e.g. 'dr:D00201' means a tetracycline). * 'glycan_id' is a glycan identifier consisting of database name 'gl' and a glycan number used in KEGG GLYCAN database (e.g. 'gl:G00050' means a Paragloboside). Note that some glycans also have 'compound_id' and both IDs are accepted and converted internally by the corresponding methods. * 'reaction_id' is a reaction identifier consisting of database name 'rn' and a reaction number used in KEGG/REACTION (e.g. 'rn:R00959' is a reaction which catalyze cpd:C00103 into cpd:C00668) * 'pathway_id' is a pathway identifier consisting of 'path' and a pathway number used in KEGG/PATHWAY. Pathway numbers prefixed by 'map' specify the reference pathway and pathways prefixed by the 'keggorg' specify pathways specific to the organism (e.g. 'path:map00020' means a reference pathway for the cytrate cycle and 'path:eco00020' means a same pathway of which E. coli genes are marked). * 'motif_id' is a motif identifier consisting of motif database names ('ps' for prosite, 'bl' for blocks, 'pr' for prints, 'pd' for prodom, and 'pf' for pfam) and a motif entry name. (e.g. 'pf:DnaJ' means a Pfam database entry 'DnaJ'). * 'ko_id' is a KO identifier consisting of 'ko' and a ko number used in KEGG/KO. KO (KEGG Orthology) is an classification of orthologous genes defined by KEGG (e.g. 'ko:K02598' means a KO group for nitrite transporter NirC genes). * 'ko_class_id' is a KO class identifier which is used to classify 'ko_id' hierarchically (e.g. '01110' means a 'Carbohydrate Metabolism' class). * (()) * 'offset' and 'limit' are both an integer and used to control the number of the results returned at once. Methods having these arguments will return first 'limit' results starting from 'offset'th. * 'fg_color_list' is a list of colors for the foreground (corresponding to the texts and borders of the objects on the KEGG pathway map). * 'bg_color_list' is a list of colors for the background (corresponding to the inside of the objects on the KEGG pathway map). Related site: * (()) === Returned values Many of the KEGG API methods will return a set of values in a complex data structure as described below. This section summarizes all kind of these data types. Note that, the retuened values for the empty result will be * an empty array -- for the methods which return ArrayOf'OBJ' * an empty string -- for the methods which return String * -1 -- for the methods which return int * NULL -- for the methods which return any other 'OBJ' + SSDBRelation SSDBRelation data type contains the following fields: genes_id1 genes_id of the query (string) genes_id2 genes_id of the target (string) sw_score Smith-Waterman score between genes_id1 and genes_id2 (int) bit_score bit score between genes_id1 and genes_id2 (float) identity identity between genes_id1 and genes_id2 (float) overlap overlap length between genes_id1 and genes_id2 (int) start_position1 start position of the alignment in genes_id1 (int) end_position1 end position of the alignment in genes_id1 (int) start_position2 start position of the alignment in genes_id2 (int) end_position2 end position of the alignment in genes_id2 (int) best_flag_1to2 best flag from genes_id1 to genes_id2 (boolean) best_flag_2to1 best flag from genes_id2 to genes_id1 (boolean) definition1 definition string of the genes_id1 (string) definition2 definition string of the genes_id2 (string) length1 amino acid length of the genes_id1 (int) length2 amino acid length of the genes_id2 (int) + ArrayOfSSDBRelation ArrayOfSSDBRelation data type is a list of the SSDBRelation data type. + MotifResult MotifResult data type contains the following fields: motif_id motif_id of the motif (string) definition definition of the motif (string) genes_id genes_id of the gene containing the motif (string) start_position start position of the motif match (int) end_position end position of the motif match (int) score score of the motif match for TIGRFAM and PROSITE (float) evalue E-value of the motif match for Pfam (double) Note: 'score' and/or 'evalue' is set to -1 if the corresponding value is not applicable. + ArrayOfMotifResult ArrayOfMotifResult data type is a list of the MotifResult data type. + Definition Definition data type contains the following fields: entry_id database entry_id (string) definition definition of the entry (string) + ArrayOfDefinition ArrayOfDefinition data type is a list of the Definition data type. + LinkDBRelation LinkDBRelation data type contains the following fields: entry_id1 entry_id of the starting entry (string) entry_id2 entry_id of the terminal entry (string) type type of the link as "direct" or "indirect" (string) path link path information across the databases (string) + ArrayOfLinkDBRelation ArrayOfLinkDBRelation data type is a list of the LinkDBRelation data type. + PathwayElement PathwayElement represents the object on the KEGG PATHWAY map. PathwayElement data type contains the following fields: element_id unique identifier of the object on the pathway (int) type type of the object ("gene", "enzyme" etc.) (string) names array of names of the object (ArrayOfstring) components array of element_ids of the group components (ArrayOfint) + ArrayOfPathwayElement ArrayOfPathwayElement data type is a list of the PathwayElement data type. + PathwayElementRelation PathwayElementRelation represents the relationship between PathwayElements. PathwayElementRelation data type contains the following fields: element_id1 unique identifier of the object on the pathway (int) element_id2 unique identifier of the object on the pathway (int) type type of relation ("ECrel", "maplink" etc.) (string) subtypes array of objects involved in the relation (ArrayOfSubtype) + ArrayOfPathwayElementRelation ArrayOfPathwayElementRelation data type is a list of the PathwayElementRelation data type. ++ Subtype Subtype is used in the PathwayElementRelation data type to represent the object involved in the relation. Subtype data type contains the following fields: element_id unique identifier of the object on the pathway (int) relation kind of relation ("compound", "inhibition" etc.) (string) type type of relation ("+p", "--|" etc.) (string) ++ ArrayOfSubtype ArrayOfSubtype data type is a list of the Subtype data type. + StructureAlignment StructureAlignment represents structural alignment of nodes between two molecules with score. StructureAlignment data type contains the following fields: target_id entry_id of the target (string) score alignment score (float) query_nodes indices of aligned nodes in the query molecule (ArrayOfint) target_nodes indices of aligned nodes in the target molecule (ArrayOfint) + ArrayOfStructureAlignment ArrayOfStructureAlignment data type is a list of the StructureAlignment data type. === Methods ==== Meta information This section describes the APIs for retrieving the general information concerning latest version of the KEGG database. --- list_databases List of database names and its definitions available on the GenomeNet is returned. Return value: ArrayOfDefinition (db, definition) Related site: * (()) * (()) (section 2.2) --- list_organisms List up the organisms in the KEGG/GENES database. 'org' code and the organism's full name is returned in the Definition data type. Return value: ArrayOfDefinition (org, definition) Related site: * (()) * (()) --- list_pathways(string:org) List up the pathway maps of the given organism in the KEGG/PATHWAY database. Passing the string "map" as its argument, this method returns a list of the reference pathways. Return value: ArrayOfDefinition (pathway_id, definition) Related site: * (()) ==== DBGET This section describes the wrapper methods for DBGET system developed at the GenomeNet. For more details on DBGET system, see: Related site: * (()) * (()) --- binfo(string:db) Show the version information of the specified database. Passing the string "all" as its argument, this method returns the version information of all databases available on the GenomeNet. Return value: string Example: # Show the information of the latest GenBank database. binfo("gb") --- bfind(string:str) Wrapper method for bfind command. bfind is used for searching entries by keywords. User need to specify a database from those which are supported by DBGET system before keywords. Number of keywords given at a time is restricted up to 100. Return value: string Example: # Returns the IDs and definitions of entries which have definition # including the word 'E-cadherin' and 'human' from GenBank. bfind("gb E-cadherin human") --- bget(string:str) The bget command is used for retrieving database entries specified by a list of 'entry_id'. This method accepts all the bget command line options as a string. Number of entries retrieved at a time is restricted up to 100. Return value: string Example: # retrieve two KEGG/GENES entries bget("eco:b0002 hin:tRNA-Cys-1") # retrieve nucleic acid sequences in a FASTA format bget("-f -n n eco:b0002 hin:tRNA-Cys-1") # retrieve amino acid sequence in a FASTA format bget("-f -n a eco:b0002") --- btit(string:str) Wrapper method for btit command. btit is used for retrieving the definitions by given database entries. Number of entries given at a time is restricted up to 100. Return value: string Example: # Returns the ids and definitions of four GENES entries "hsa:1798", # "mmu:13478", "dme:CG5287-PA" and cel:Y60A3A.14". btit("hsa:1798 mmu:13478 dme:CG5287-PA cel:Y60A3A.14") --- bconv(string:str) The bconv command converts external IDs to KEGG IDs. Currently, following external databases are available. External database Database prefix ----------------- --------------- NCBI GI ncbi-gi: NCBI GeneID ncbi-geneid: GenBank genbank: UniGene unigene: UniProt uniprot: OMIM omim: The result is a tab separated pair of the given ID and the converted ID in each line. Return value: string Example: # Convert NCBI GI and NCBI GeneID to KEGG genes_id serv.bconv("ncbi-gi:10047086 ncbi-gi:10047090 ncbi-geneid:14751") Related site: * (()) (Gene name conversion section) ==== LinkDB + Database cross references --- get_linkdb_by_entry(string:entry_id, string:db, int:offset, int:limit) Retrieve the database entries linked from the user specified database entry. It can also be specified the targeted database. Return value: ArrayOfLinkDBRelation Example: # Get the entries of KEGG/PATHWAY database linked from the entry 'eco:b0002'. get_linkdb_by_entry('eco:b0002', 'pathway', 1, 10) get_linkdb_by_entry('eco:b0002', 'pathway', 11, 10) Related site: * (()) (Single entry to database) --- get_linkdb_between_databases(string:from_db, string:to_db, int:offset, int:limit) Retrieve all links between entries among the given two databases. Return value: ArrayOfLinkDBRelation Example: # Get all links from "eco" (KEGG GENES) to "pathway" (KEGG PATHWAY) # databases. get_linkdb_between_databases("eco", "pathway", 1, 100) # Print the contents of obtained links in Ruby language links = get_linkdb_between_databases("eco", "pathway", 1, 100) links.each do |link| puts link.entry_id1 # => "eco:b0084" puts link.entry_id2 # => "path:map00550" puts link.type # => "indirect" puts link.path # => "eco->ec->path" end Related site: * (()) (Database to database) + Relation among genes and enzymes --- get_genes_by_enzyme(string:enzyme_id, string:org) Retrieve all genes of the given organism. Return value: ArrayOfstring (genes_id) Example: # Returns all the GENES entry IDs in E.coli genome which are assigned # EC number ec:1.2.1.1 get_genes_by_enzyme('ec:1.2.1.1', 'eco') --- get_enzymes_by_gene(string:genes_id) Retrieve all the EC numbers which are assigned to the given gene. Return value: ArrayOfstring (enzyme_id) Example: # Returns the EC numbers which are assigned to E.coli genes b0002 get_enzymes_by_gene('eco:b0002') + Relation among enzymes, compounds and reactions --- get_enzymes_by_compound(string:compound_id) Retrieve all enzymes which have a link to the given compound_id. Return value: ArrayOfstring (enzyme_id) Example: # Returns the ENZYME entry IDs which have a link to the COMPOUND entry, # 'cpd:C00345' get_enzymes_by_compound('cpd:C00345') --- get_enzymes_by_glycan(string:glycan_id) Retrieve all enzymes which have a link to the given glycan_id. Return value: ArrayOfstring (enzyme_id) Example # Returns the ENZYME entry IDs which have a link to the GLYCAN entry, # 'gl:G00001' get_enzymes_by_glycan('gl:G00001') --- get_enzymes_by_reaction(string:reaction_id) Retrieve all enzymes which have a link to the given reaction_id. Return value: ArrayOfstring (enzyme_id) Example: # Returns the ENZYME entry IDs which have a link to the REACTION entry, # 'rn:R00100'. get_enzymes_by_reaction('rn:R00100') --- get_compounds_by_enzyme(string:enzyme_id) Retrieve all compounds which have a link to the given enzyme_id. Return value: ArrayOfstring (compound_id) Example: # Returns the COMPOUND entry IDs which have a link to the ENZYME entry, # 'ec:2.7.1.12'. get_compounds_by_enzyme('ec:2.7.1.12') --- get_compounds_by_reaction(reaction_id) Retrieve all compounds which have a link to the given reaction_id. Return value: ArrayOfstring (compound_id) Example: # Returns the COMPOUND entry IDs which have a link to the REACTION entry, # 'rn:R00100' get_compounds_by_reaction('rn:R00100') --- get_glycans_by_enzyme(string:enzyme_id) Retrieve all glycans which have a link to the given enzyme_id. Return value: ArrayOfstring (glycan_id) Example # Returns the GLYCAN entry IDs which have a link to the ENZYME entry, # 'ec:2.4.1.141' get_glycans_by_enzyme('ec:2.4.1.141') --- get_glycans_by_reaction(string:reaction_id) Retrieve all glycans which have a link to the given reaction_id. Return value: ArrayOfstring (glycan_id) Example # Returns the GLYCAN entry IDs which have a link to the REACTION entry, # 'rn:R06164' get_glycans_by_reaction('rn:R06164') --- get_reactions_by_enzyme(string:enzyme_id) Retrieve all reactions which have a link to the given enzyme_id. Return value: ArrayOfstring (reaction_id) Example: # Returns the REACTION entry IDs which have a link to the ENZYME entry, # 'ec:2.7.1.12' get_reactions_by_enzyme('ec:2.7.1.12') --- get_reactions_by_compound(string:compound_id) Retrieve all reactions which have a link to the given compound_id. Return value: ArrayOfstring (reaction_id) Example: # Returns the REACTION entry IDs which have a link to the COMPOUND entry, # 'cpd:C00199' get_reactions_by_compound('cpd:C00199') --- get_reactions_by_glycan(string:glycan_id) Retrieve all reactions which have a link to the given glycan_id. Return value: ArrayOfstring (reaction_id) Example # Returns the REACTION entry IDs which have a link to the GLYCAN entry, # 'gl:G00001' get_reactions_by_glycan('gl:G00001') ==== SSDB This section describes the APIs for SSDB database. For more details on SSDB, see: * (()) --- get_best_best_neighbors_by_gene(string:genes_id, int:offset, int:limit) Search best-best neighbor of the gene in all organisms. Return value: ArrayOfSSDBRelation Example: # List up best-best neighbors of 'eco:b0002'. get_best_best_neighbors_by_gene('eco:b0002', 1, 10) get_best_best_neighbors_by_gene('eco:b0002', 11, 10) --- get_best_neighbors_by_gene(string:genes_id, int:offset, int:limit) Search best neighbors in all organism. Return value: ArrayOfSSDBRelation Example: # List up best neighbors of 'eco:b0002'. get_best_neighbors_by_gene('eco:b0002', 1, 10) get_best_neighbors_by_gene('eco:b0002', 11, 10) --- get_reverse_best_neighbors_by_gene(string:genes_id, int:offset, int:limit) Search reverse best neighbors in all organisms. Return value: ArrayOfSSDBRelation Example: # List up reverse best neighbors of 'eco:b0002'. get_reverse_best_neighbors_by_gene('eco:b0002', 1, 10) get_reverse_best_neighbors_by_gene('eco:b0002', 11, 10) --- get_paralogs_by_gene(string:genes_id, int:offset, int:limit) Search paralogous genes of the given gene in the same organism. Return value: ArrayOfSSDBRelation Example: # List up paralogous genes of 'eco:b0002'. get_paralogs_by_gene('eco:b0002', 1, 10) get_paralogs_by_gene('eco:b0002', 11, 10) ==== Motif --- get_motifs_by_gene(string:genes_id, string:db) Search motifs in the specified gene. As for 'db', user can specify one of the four database; Pfam, TIGRFAM, PROSITE pattern, PROSITE profile as 'pfam', 'tfam', 'pspt', 'pspf', respectively. You can also use 'all' to specify all of the four databases above. Return value: ArrayOfMotifResult Example: # Returns the all pfam motifs in the E. coli gene 'b0002' get_motifs_by_gene('eco:b0002', 'pfam') --- get_genes_by_motifs([string]:motif_id_list, int:offset, int:limit) Search all genes which contains all of the specified motifs. Return value: ArrayOfDefinition (genes_id, definition) Example: # Returns all genes which have Pfam 'DnaJ' and Prosite 'DNAJ_2' motifs. list = ['pf:DnaJ', 'ps:DNAJ_2'] get_genes_by_motifs(list, 1, 10) get_genes_by_motifs(list, 11, 10) ==== KO --- get_ko_by_gene(string:genes_id) Search all KOs to which given genes_id belongs. Return value: ArrayOfstring (ko_id) Example: # Returns ko_ids to which GENES entry 'eco:b0002' belongs. get_ko_by_gene('eco:b0002') --- get_ko_by_ko_class(string:ko_class_id) Return all KOs which belong to the given ko_class_id. Return value: ArrayOfDefinition (ko_id, definition) Example: # Returns ko_ids which belong to the KO class '01196'. get_ko_by_ko_class('01196') --- get_genes_by_ko_class(string:ko_class_id, string:org, int:offset, int:limit) Retrieve all genes of the specified organism which are classified under the given ko_class_id. Return value: ArrayOfDefinition (genes_id, definition) Example: # Returns first 100 human genes which belong to the KO class '00930' get_genes_by_ko_class('00903', 'hsa' , 1, 100) --- get_genes_by_ko(string:ko_id, string:org) Retrieve all genes of the specified organism which belong to the given ko_id. Return value: ArrayOfDefinition (genes_id, definition) Example # Returns E.coli genes which belong to the KO 'K00001' get_genes_by_ko('ko:K00001', 'eco') # Returns genes of all organisms which are assigned to the KO 'K00010' get_genes_by_ko('ko:K00010', 'all') ==== PATHWAY This section describes the APIs for PATHWAY database. For more details on PATHWAY database, see: * (()) + Coloring pathways Related site: * (()) --- mark_pathway_by_objects(string:pathway_id, [string]:object_id_list) Mark the given objects on the given pathway map and return the URL of the generated image. Return value: string (URL) Example: # Returns the URL of the generated image for the given map 'path:eco00260' # with objects corresponding to 'eco:b0002' and 'cpd:C00263' colored in red. obj_list = ['eco:b0002', 'cpd:C00263'] mark_pathway_by_objects('path:eco00260', obj_list) --- color_pathway_by_objects(string:pathway_id, [string]:object_id_list, [string]:fg_color_list, [string]:bg_color_list) Color the given objects on the pathway map with the specified colors and return the URL of the colored image. In the KEGG pathway maps, a gene or enzyme is represented by a rectangle and a compound is shown as a small circle. 'fg_color_list' is used for specifying the color of text and border of the given objects and 'bg_color_list' is used for its background area. The order of colors in these lists correspond with the order of objects in the 'object_id_list' list. Return value: string (URL) Example: # Returns the URL for the given pathway 'path:eco00260' with genes # 'eco:b0514' colored in red with yellow background and # 'eco:b2913' colored in green with yellow background. obj_list = ['eco:b0514', 'eco:b2913'] fg_list = ['#ff0000', '#00ff00'] bg_list = ['#ffff00', 'yellow'] color_pathway_by_objects('path:eco00260', obj_list, fg_list, bg_list) --- color_pathway_by_elements(string:pathway_id, [int]:element_id_list, [string]:fg_color_list, [string]:bg_color_list) Color the objects (rectangles and circles on a pathway map) corresponding to the given 'element_id_list' with the specified colors and return the URL of the colored image. 'fg_color_list' is used for specifying the color of text and border of the objects with given 'element_id_list' and 'bg_color_list' is used for its background area. The order of colors in these lists correspond with the order of objects in the 'element_id_list' list. This method is useful to specify which graphical object on the pathway to be colored as there are some cases that multiple genes are assigned to one rectangle or a gene is assigned to more than one rectangle on the pathway map. The 'element_id' is an unique numerical identifier on the pathway defined by the KGML (XML represeentation of the KEGG PATHWAY) in the tag. List of the 'element_id's can be obtained by the 'get_elements_by_pathway' method. For more details on KGML, see: * (()) Return value: string (URL) Example: # Returns the URL of the colored image of given pathway 'path:bsu00010' with # * gene bsu:BG11350 (element_id 78, ec:3.2.1.86) colored in red on yellow # * gene bsu:BG11203 (element_id 79, ec:3.2.1.86) colored in blue on yellow # * gene bsu:BG11685 (element_id 51, ec:2.7.1.2) colored in red on orange # * gene bsu:BG11685 (element_id 47, ec:2.7.1.2) colored in blue on orange element_id_list = [ 78, 79, 51, 47 ] fg_list = [ '#ff0000', '#0000ff', '#ff0000', '#0000ff' ] bg_list = [ '#ffff00', '#ffff00', '#ffcc00', '#ffcc00' ] color_pathway_by_elements('path:bsu00010', element_id_list, fg_list, bg_list) --- get_html_of_marked_pathway_by_objects(string:pathway_id, [string]:object_id_list) HTML version of the 'mark_pathway_by_objects' method. Mark the given objects on the given pathway map and return the URL of the HTML with the generated image as a clickable map. Return value: string (URL) Example: # Returns the URL of the HTML which can be passed to the web browser # as a clickable map of the generated image of the given pathway # 'path:eco00970' with three objects corresponding to 'eco:b4258', # 'cpd:C00135' and 'ko:K01881' colored in red. obj_list = ['eco:b4258', 'cpd:C00135', 'ko:K01881'] get_html_of_marked_pathway_by_objects('path:eco00970', obj_list) --- get_html_of_colored_pathway_by_objects(string:pathway_id, [string]:object_id_list, [string]:fg_color_list, [string]:bg_color_list) HTML version of the 'color_pathway_by_object' method. Color the given objects on the pathway map with the specified colors and return the URL of the HTML containing the colored image as a clickable map. Return value: string (URL) Example: # Returns the URL of the HTML which can be passed to the web browser # as a clickable map of colored image of the given pathway 'path:eco00970' # with a gene 'eco:b4258' colored in gray/red, a compound 'cpd:C00135' # colored in green/yellow and a KO 'ko:K01881' colored in blue/orange. obj_list = ['eco:b4258', 'cpd:C00135', 'ko:K01881'] fg_list = ['gray', '#00ff00', 'blue'] bg_list = ['#ff0000', 'yellow', 'orange'] get_html_of_colored_pathway_by_objects('path:eco00970', obj_list, fg_list, bg_list) --- get_html_of_colored_pathway_by_elements(string:pathway_id, [int]:element_id_list, [string]:fg_color_list, [string]:bg_color_list) HTML version of the 'color_pathway_by_elements' method. Color the objects corresponding to the given 'element_id_list' on the pathway map with the specified colors and return the URL of the HTML containing the colored image as a clickable map. Return value: string (URL) Example: # Returns the URL of the HTML which can be passed to the web browser as a # clickable map of colored image of the given pathway 'path:bsu00010' with # * gene bsu:BG11350 (element_id 78, ec:3.2.1.86) colored in red on yellow # * gene bsu:BG11203 (element_id 79, ec:3.2.1.86) colored in blue on yellow # * gene bsu:BG11685 (element_id 51, ec:2.7.1.2) colored in red on orange # * gene bsu:BG11685 (element_id 47, ec:2.7.1.2) colored in blue on orange element_id_list = [ 78, 79, 51, 47 ] fg_list = [ '#ff0000', '#0000ff', '#ff0000', '#0000ff' ] bg_list = [ '#ffff00', '#ffff00', '#ffcc00', '#ffcc00' ] get_html_of_colored_pathway_by_elements('path:bsu00010', element_id_list, fg_list, bg_list) + Relations of objects on the pathway --- get_element_relations_by_pathway(string:pathway_id) Search all relations of the objects on the specified pathway. Return value: ArrayOfPathwayElementRelation Example: # Returns list of PathwayElementRelation on the pathway map 'path:bsu00010' relations = get_element_relations_by_pathway('path:bsu00010') # Print the contents of obtained relations in Ruby language relations.each do |rel| puts rel.element_id1 puts rel.element_id2 puts rel.type rel.subtypes.each do |sub| puts sub.element_id puts sub.relation puts sub.type end end + Objects on the pathway --- get_elements_by_pathway(string:pathway_id) Search all objects on the specified pathway. This method will be used in combination with the color_pathway_by_elements method to distingish graphical objects on the pathway sharing the same name. Return value: ArrayOfPathwayElement Example: # Returns list of PathwayElement on the pathway map 'path:bsu00010' get_elements_by_pathway('path:bsu00010') # Find entry_ids for genes 'bsu:BG11350', 'bsu:BG11203' and 'bsu:BG11685' # in Ruby language elems = serv.get_elements_by_pathway('path:bsu00010') genes = [ 'bsu:BG11350', 'bsu:BG11203', 'bsu:BG11685' ] elems.each do |elem| genes.each do |gene| if elem.names.include?(gene) puts gene, elem.element_id end end end --- get_genes_by_pathway(string:pathway_id) Search all genes on the specified pathway. Organism name is given by the name of the pathway map. Return value: ArrayOfstring (genes_id) Example: # Returns all E. coli genes on the pathway map '00020'. get_genes_by_pathway('path:eco00020') --- get_enzymes_by_pathway(string:pathway_id) Search all enzymes on the specified pathway. Return value: ArrayOfstring (enzyme_id) Example: # Returns all E. coli enzymes on the pathway map '00020'. get_enzymes_by_pathway('path:eco00020') --- get_compounds_by_pathway(string:pathway_id) Search all compounds on the specified pathway. Return value: ArrayOfstring (compound_id) Example: # Returns all E. coli compounds on the pathway map '00020'. get_compounds_by_pathway('path:eco00020') --- get_glycans_by_pathway(string:pathway_id) Search all glycans on the specified pathway. Return value: ArrayOfstring (glycan_id) Example # Returns all E. coli glycans on the pathway map '00510' get_glycans_by_pathway('path:eco00510') --- get_reactions_by_pathway(string:pathway_id) Retrieve all reactions on the specified pathway. Return value: ArrayOfstring (reaction_id) Example: # Returns all E. coli reactions on the pathway map '00260' get_reactions_by_pathway('path:eco00260') --- get_kos_by_pathway(string:pathway_id) Retrieve all KOs on the specified pathway. Return value: ArrayOfstring (ko_id) Example: # Returns all ko_ids on the pathway map 'path:hsa00010' get_kos_by_pathway('path:hsa00010') + Pathways by objects Related site: * (()) --- get_pathways_by_genes([string]:genes_id_list) Search all pathways which include all the given genes. How to pass the list of genes_id will depend on the language specific implementations. Return value: ArrayOfstring (pathway_id) Example: # Returns all pathways including E. coli genes 'b0077' and 'b0078' get_pathways_by_genes(['eco:b0077' , 'eco:b0078']) --- get_pathways_by_enzymes([string]:enzyme_id_list) Search all pathways which include all the given enzymes. Return value: ArrayOfstring (pathway_id) Example: # Returns all pathways including an enzyme '1.3.99.1' get_pathways_by_enzymes(['ec:1.3.99.1']) --- get_pathways_by_compounds([string]:compound_id_list) Search all pathways which include all the given compounds. Return value: ArrayOfstring (pathway_id) Example: # Returns all pathways including compounds 'C00033' and 'C00158' get_pathways_by_compounds(['cpd:C00033', 'cpd:C00158']) --- get_pathways_by_glycans([string]:glycan_id_list) Search all pathways which include all the given glycans. Return value: ArrayOfstring (pathway_id) Example # Returns all pathways including glycans 'G00009' and 'G00011' get_pathways_by_glycans(['gl:G00009', 'gl:G00011']) --- get_pathways_by_reactions([string]:reaction_id_list) Retrieve all pathways which include all the given reaction_ids. Return value: ArrayOfstring (pathway_id) Example: # Returns all pathways including reactions 'rn:R00959', 'rn:R02740', # 'rn:R00960' and 'rn:R01786' get_pathways_by_reactions(['rn:R00959', 'rn:R02740', 'rn:R00960', 'rn:R01786']) --- get_pathways_by_kos([string]:ko_id_list, string:org) Retrieve all pathways of the organisms which include all the given KO IDs. Return value: ArrayOfstring (pathway_id) Example: # Returns all human pathways including 'ko:K00016' and 'ko:K00382' get_pathways_by_kos(['ko:K00016', 'ko:K00382'], 'hsa') # Returns pathways of all organisms including 'ko:K00016' and 'ko:K00382' get_pathways_by_kos(['ko:K00016', 'ko:K00382'], 'all') + Relation among pathways --- get_linked_pathways(string:pathway_id) Retrieve all pathways which are linked from a given pathway_id. Return value: ArrayOfstring (pathway_id) Example: # Returns IDs of PATHWAY entries linked from 'path:eco00620'. get_linked_pathways('path:eco00620') ==== GENES This section describes the APIs for GENES database. For more details on GENES database, see: * (()) --- get_genes_by_organism(string:org, int:offset, int:limit) Retrieve all genes of the specified organism. Return value: ArrayOfstring (genes_id) Example: # Retrive hundred H. influenzae genes at once. get_genes_by_organism('hin', 1, 100) get_genes_by_organism('hin', 101, 100) ==== GENOME This section describes the APIs for GENOME database. For more details on GENOME database, see: * (()) --- get_number_of_genes_by_organism(string:org) Get the number of genes coded in the specified organism's genome. Return value: int Example: # Get the number of the genes on the E.coli genome. get_number_of_genes_by_organism('eco') ==== LIGAND This section describes the APIs for LIGAND database. Related site: * (()) --- convert_mol_to_kcf(string:mol) Convert a MOL format into the KCF format. Return value: string Example: convert_mol_to_kcf(mol_str) --- search_compounds_by_name(string:name) Returns a list of compounds having the specified name. Return value: ArrayOfstring (compound_id) Example: search_compounds_by_name("shikimic acid") --- search_drugs_by_name(string:name) Returns a list of drugs having the specified name. Return value: ArrayOfstring (drug_id) Example: search_drugs_by_name("tetracyclin") --- search_glycans_by_name(string:name) Returns a list of glycans having the specified name. Return value: ArrayOfstring (glycan_id) Example: search_glycans_by_name("Paragloboside") --- search_compounds_by_composition(string:composition) Returns a list of compounds containing elements indicated by the composition. Order of the elements is insensitive. Return value: ArrayOfstring (compound_id) Example: search_compounds_by_composition("C7H10O5") --- search_drugs_by_composition(string:composition) Returns a list of drugs containing elements indicated by the composition. Order of the elements is insensitive. Return value: ArrayOfstring (drug_id) Example: search_drugs_by_composition("HCl") --- search_glycans_by_composition(string:composition) Returns a list of glycans containing sugars indicated by the composition. Order of the sugars (in parenthesis with number) is insensitive. Return value: ArrayOfstring (glycan_id) Example: search_glycans_by_composition("(Man)4 (GalNAc)1") --- search_compounds_by_mass(float:mass, float:range) Returns a list of compounds having the molecular weight around 'mass' with some ambiguity (range). Return value: ArrayOfstring (compound_id) Example: search_compounds_by_mass(174.05, 0.1) --- search_drugs_by_mass(float:mass, float:range) Returns a list of drugs having the molecular weight around 'mass' with some ambiguity (range). Return value: ArrayOfstring (drug_id) Example: search_drugs_by_mass(150, 1.0) --- search_glycans_by_mass(float:mass, float:range) Returns a list of glycans having a molecular weight around 'mass' with some ambiguity (range). Return value: ArrayOfstring (glycan_id) Example: search_glycans_by_mass(174.05, 0.1) --- search_compounds_by_subcomp(string:mol, int:offset, int:limit) Returns a list of compounds with the alignment having common sub-structure calculated by the subcomp program. You can obtain a MOL formatted structural data of matched compounds using bget method with the "-f m" option to confirm the alignment. Return value: ArrayOfStructureAlignment Example: mol = bget("-f m cpd:C00111") search_compounds_by_subcomp(mol, 1, 5) Related site: * (()) --- search_drugs_by_subcomp(string:mol, int:offset, int:limit) Returns a list of drugs with the alignment having common sub-structure calculated by the subcomp program. You can obtain a MOL formatted structural data of matched drugs using bget method with the "-f m" option to confirm the alignment. Return value: ArrayOfStructureAlignment Example: mol = bget("-f m dr:D00201") search_drugs_by_subcomp(mol, 1, 5) Related site: * (()) --- search_glycans_by_kcam(string:kcf, string:program, string:option, int:offset, int:limit) Returns a list of glycans with the alignment having common sub-structure calculated by the KCaM program. The argument 'program' can be 'gapped' or 'ungaped'. The next argument 'option' can be 'global' or 'local'. You can obtain a KCF formatted structural data of matched glycans using bget method with the "-f k" option to confirm the alignment. Return value: ArrayOfStructureAlignment Example: kcf = bget("-f k gl:G12922") search_glycans_by_kcam(kcf, "gapped", "local", 1, 5) Related site: * (()) * (()) == Notes Last updated: December 27, 2006 =end bio-1.4.3.0001/doc/Tutorial.rd.ja.html0000644000004100000410000030372012200110570017123 0ustar www-datawww-data doc/Tutorial.rd.ja
    Copyright (C) 2001-2003, 2005, 2006 Toshiaki Katayama <k@bioruby.org>
    Copyright (C) 2005, 2006 Naohisa Goto <ng@bioruby.org>

    BioRuby λȤ

    BioRuby Ϲ񻺤ιⵡǽ֥Ȼظץȸ Ruby Τ ץ󥽡ʥХեޥƥѥ饤֥Ǥ

    Ruby Perl 椺ζϤʥƥȽȡ ץʬ䤹ʸˡꥢʥ֥Ȼظǽˤꡢ Ȥ褦ˤʤޤRuby ˤĤƾܤϡ֥ <URL:http://www.ruby-lang.org/> Τν򻲾ȤƤ

    Ϥ

    BioRuby Ѥˤ Ruby BioRuby 򥤥󥹥ȡ뤹ɬפޤ

    Ruby Υ󥹥ȡ

    Ruby Mac OS X Ƕ UNIX ˤ̾磻󥹥ȡ뤵Ƥޤ Windows ξ⣱å󥹥ȡ ActiveScriptRuby ʤɤ ѰդƤޤޤ󥹥ȡ뤵Ƥʤ

    ʤɤ򻲹ͤˤƥ󥹥ȡ뤷ޤ礦

    ʤΥԥ塼ˤɤΥС Ruby 󥹥ȡ뤵Ƥ뤫 åˤ

    % ruby -v

    ȥޥɤϤƤȡȤ

    ruby 1.8.2 (2004-12-25) [powerpc-darwin7.7.0]

    Τ褦ʴǥСɽޤС 1.8.5 ʹߤ򤪴ᤷޤ

    Ruby ɸΥ饹᥽åɤˤĤƤϡRuby Υե󥹥ޥ˥奢 ȤƤ

    ޥɥ饤ǥإפ򻲾ȤˤϡRuby ɸźդ ri ޥɤ䡢 ܸǤ refe ޥɤǤ

    RubyGems Υ󥹥ȡ

    RubyGems ΥڡǿǤɤޤ

    Ÿƥ󥹥ȡ뤷ޤ

    % tar zxvf rubygems-x.x.x.tar.gz
    % cd rubygems-x.x.x
    % ruby setup.rb

    BioRuby Υ󥹥ȡ

    BioRuby Υ󥹥ȡˡ <URL:http://bioruby.org/archive/> ǿǤưʲΤ褦˹Ԥޤ(1)ƱƤ README եˤ ܤ̤ĺΤǤʤȣˤʤ BioPerl ٤ BioRuby Υ󥹥ȡϤ˽ϤǤ

    % wget http://bioruby.org/archive/bioruby-x.x.x.tar.gz
    % tar zxvf bioruby-x.x.x.tar.gz
    % cd bioruby-x.x.x
    % su
    # ruby setup.rb

    RubyGems ȤĶǤ

    % gem install bio

    ǥ󥹥ȡǤޤΤ README ե˽񤫤Ƥ褦

    bioruby-x.x.x/etc/bioinformatics/seqdatabase.ini

    Ȥեۡǥ쥯ȥ ~/.bioinformatics ˥ԡ Ȥ褤Ǥ礦RubyGems ξ

    /usr/local/lib/ruby/gems/1.8/gems/bio-x.x.x/

    ʤɤˤϤǤ

    % mkdir ~/.bioinformatics
    % cp bioruby-x.x.x/etc/bioinformatics/seqdatabase.ini ~/.bioinformatics

    ޤEmacs ǥȤͤ Ruby ΥƱƤ misc/ruby-mode.el 򥤥󥹥ȡ뤷ƤȤ褤Ǥ礦

    % mkdir -p ~/lib/lisp/ruby
    % cp ruby-x.x.x/misc/ruby-mode.el ~/lib/lisp/ruby

    ʤɤȤƤơ~/.emacs ˰ʲ­ޤ

    ; subdirs 
    (let ((default-directory "~/lib/lisp"))
      (normal-top-level-add-subdirs-to-load-path)
    
    ; ruby-mode 
    (autoload 'ruby-mode "ruby-mode" "Mode for editing ruby source files")
    (add-to-list 'auto-mode-alist '("\\.rb$" . rd-mode))
    (add-to-list 'interpeter-mode-alist '("ruby" . ruby-mode))

    BioRuby

    BioRuby С 0.7 ʹߤǤϡñ BioRuby ȶ˥󥹥ȡ뤵 bioruby ޥɤǹԤȤǤޤbioruby ޥɤ Ruby ¢Ƥ 󥿥饯ƥ֥ irb ѤƤꡢRuby BioRuby ˤǤ뤳Ȥ ͳ˼¹Ԥ뤳ȤǤޤ

    % bioruby project1

    ǻꤷ̾Υǥ쥯ȥ꤬졢DzϤԤޤ 嵭ξ project1 Ȥǥ쥯ȥ꤬졢˰ʲ ֥ǥ쥯ȥե뤬ޤ

    data/           桼βϥե֤
    plugin/         ɬפ˱ɲäΥץ饰֤
    session/        䥪֥ȡҥȥʤɤ¸
    session/config  桼¸ե
    session/history 桼ϤޥɤΥҥȥ¸ե
    session/object  ³줿֥ȤγǼե

    Τdata ǥ쥯ȥϥ桼ͳ˽񤭴ƹޤ ޤsession/history ե򸫤ȡĤɤΤ褦Ԥä ǧ뤳ȤǤޤ

    ܰʹߤϡƱͤ

    % bioruby project1

    ȤƵưƤ⹽ޤ󤷡줿ǥ쥯ȥ˰ư

    % cd project1
    % bioruby

    Τ褦˰ʤǵư뤳ȤǤޤ

    ¾script ޥɤǺ륹ץȥե䡢 web ޥɤǺ Rails Τեʤɤޤ ˤĤƤɬפ˱ƸҤޤ

    BioRuby ǤϥǥեȤǤĤʥ饤֥ɤ߹Ǥޤ 㤨 readline 饤֥꤬ȤĶǤ Tab ǥ᥽å̾ѿ̾ 䴰ϤǤopen-uri, pp, yaml ʤɤǽ餫ɤ߹ޤƤޤ

    , ߥλ

    getseq(str)

    getseq ޥ(2)Ȥäʸ󤫤䥢ߥλ뤳Ȥ Ǥޤȥߥλ ATGC δ̤ 90% ʾ夫ɤǼưȽꤵޤ ǤϡǤ dna Ȥѿޤ

    bioruby> dna = getseq("atgcatgcaaaa")

    ѿȤǧˤ Ruby puts ᥽åɤȤޤ

    bioruby> puts dna
    atgcatgcaaaa

    ե̾Ϳȼ긵ˤե뤫뤳ȤǤޤ GenBank, EMBL, UniProt, FASTA ʤɼפեޥåȤϼưȽ̤ޤ ʳĥҤʤɤΥե̾ǤϤʤȥȤȽꤷޤˡ ʲ UniProt եޥåȤΥȥե뤫ɤ߹Ǥޤ ˡǤϡʣΥȥ꤬ǽΥȥɤ߹ޤޤ

    bioruby> cdc2 = getseq("p04551.sp")
    bioruby> puts cdc2
    MENYQKVEKIGEGTYGVVYKARHKLSGRIVAMKKIRLEDESEGVPSTAIREISLLKEVNDENNRSN...(ά)

    ǡ١̾ȥȥ̾ʬäƤС󥿡ͥåȤ̤ ưŪ˼뤳ȤǤޤ

    bioruby> psaB = getseq("genbank:AB044425")
    bioruby> puts psaB
    actgaccctgttcatattcgtcctattgctcacgcgatttgggatccgcactttggccaaccagca...(ά)

    ɤΥǡ١ɤΤ褦ˡǥȥ뤫ϡBioPerl ʤɤȶ̤ OBDA ե ~/.bioinformatics/seqdatabase.ini Ѥƥǡ١Ȥ˻ꤹ뤳ȤǤޤʸҡˡ ޤEMBOSS seqret ޥɤˤˤбƤޤΤǡ EMBOSS USA ɽǤ⥨ȥǤޤEMBOSS Υޥ˥奢򻲾Ȥ ~/.embossrc ŬڤꤷƤ

    ɤˡǼ⡢getseq ޥɤˤä֤ϡ Ѥ󥯥饹 Bio::Sequence ˤʤޤ(3)

    󤬱ȥߥλΤɤȽꤵƤΤϡ moltype ᥽åɤѤ

    bioruby> p cdc2.moltype
    Bio::Sequence::AA
    
    bioruby> p psaB.moltype
    Bio::Sequence::NA

    Τ褦Ĵ٤뤳ȤǤޤưȽְ꤬äƤʤɤˤ na, aa ᥽åɤǶŪѴǤޤʤΥ᥽åɤ Υ֥ȤŪ˽񤭴ޤ

    bioruby> dna.aa
    bioruby> p dna.moltype
    Bio::Sequence::AA
    
    bioruby> dna.na
    bioruby> p dna.moltype
    Bio::Sequence::NA

    ޤϡto_naseq, to_aaseq ᥽åɤǶŪѴ뤳ȤǤޤ

    bioruby> pep = dna.to_aaseq

    to_naseq, to_aaseq ᥽åɤ֤֥Ȥϡ줾졢 DNA Τ Bio::Sequence::NA 饹ߥλΤ Bio::Sequence::AA 饹Υ֥Ȥˤʤޤ 󤬤ɤΥ饹°뤫 Ruby class ᥽åɤѤ

    bioruby> p pep.class
    Bio::Sequence::AA

    Τ褦Ĵ٤뤳ȤǤޤ

    ŪѴˡBio::Sequence::NA 饹ޤ Bio::sequence::AA 饹 Τɤ餫Υ֥Ȥˤ seq ᥽åɤȤޤ(4)

    bioruby> pep2 = cdc2.seq
    bioruby> p pep2.class
    Bio::Sequence::AA

    ޤʲDz⤹ complement translate ʤɤΥ᥽åɤη̤ϡ ֤ȤԤ᥽åɤ Bio::Sequence::NA 饹 ߥλ֤ȤԤ᥽åɤ Bio::sequence::AA 饹 Υ֥Ȥˤʤޤ

    䥢ߥλΥ饹 Ruby ʸ󥯥饹Ǥ String ѾƤޤޤBio::Sequence 饹Υ֥Ȥ String ֥ȤȸƱͤƯ褦˹פƤޤΤᡢ length ĹĴ٤ꡢ+ ­碌ꡢ* Ƿ֤ʤɡ Ruby ʸФƹԤѲǽǤ Τ褦ħϥ֥ȻظζϤ¦̤ΰĤȸǤ礦

    bioruby> puts dna.length
    12
    
    bioruby> puts dna + dna
    atgcatgcaaaaatgcatgcaaaa
    
    bioruby> puts dna * 5
    atgcatgcaaaaatgcatgcaaaaatgcatgcaaaaatgcatgcaaaaatgcatgcaaaa
    complement

    亿ˤϱ complement ᥽åɤƤӤޤ

    bioruby> puts dna.complement
    ttttgcatgcat
    translate

    򥢥ߥλˤ translate ᥽åɤȤޤ 줿ߥλ pep ȤѿƤߤޤ

    bioruby> pep = dna.translate
    bioruby> puts pep
    MHAK

    ե졼Ѥˤ

    bioruby> puts dna.translate(2)
    CMQ
    bioruby> puts dna.translate(3)
    ACK

    ʤɤȤޤ

    molecular_weight

    ʬ̤ molecular_weight ᥽åɤɽޤ

    bioruby> puts dna.molecular_weight
    3718.66444
    
    bioruby> puts pep.molecular_weight
    485.605
    seqstat(seq)

    seqstat ޥɤȤȡʤɤξ٤ɽޤ

    bioruby> seqstat(dna)
    
    * * * Sequence statistics * * *
    
    5'->3' sequence   : atgcatgcaaaa
    3'->5' sequence   : ttttgcatgcat
    Translation   1   : MHAK
    Translation   2   : CMQ
    Translation   3   : ACK
    Translation  -1   : FCMH
    Translation  -2   : FAC
    Translation  -3   : LHA
    Length            : 12 bp
    GC percent        : 33 %
    Composition       : a -  6 ( 50.00 %)
                        c -  2 ( 16.67 %)
                        g -  2 ( 16.67 %)
                        t -  2 ( 16.67 %)
    Codon usage       :
    
     *---------------------------------------------*
     |       |              2nd              |     |
     |  1st  |-------------------------------| 3rd |
     |       |  U    |  C    |  A    |  G    |     |
     |-------+-------+-------+-------+-------+-----|
     | U   U |F  0.0%|S  0.0%|Y  0.0%|C  0.0%|  u  |
     | U   U |F  0.0%|S  0.0%|Y  0.0%|C  0.0%|  c  |
     | U   U |L  0.0%|S  0.0%|*  0.0%|*  0.0%|  a  |
     |  UUU  |L  0.0%|S  0.0%|*  0.0%|W  0.0%|  g  |
     |-------+-------+-------+-------+-------+-----|
     |  CCCC |L  0.0%|P  0.0%|H 25.0%|R  0.0%|  u  |
     | C     |L  0.0%|P  0.0%|H  0.0%|R  0.0%|  c  |
     | C     |L  0.0%|P  0.0%|Q  0.0%|R  0.0%|  a  |
     |  CCCC |L  0.0%|P  0.0%|Q  0.0%|R  0.0%|  g  |
     |-------+-------+-------+-------+-------+-----|
     |   A   |I  0.0%|T  0.0%|N  0.0%|S  0.0%|  u  |
     |  A A  |I  0.0%|T  0.0%|N  0.0%|S  0.0%|  c  |
     | AAAAA |I  0.0%|T  0.0%|K 25.0%|R  0.0%|  a  |
     | A   A |M 25.0%|T  0.0%|K  0.0%|R  0.0%|  g  |
     |-------+-------+-------+-------+-------+-----|
     |  GGGG |V  0.0%|A  0.0%|D  0.0%|G  0.0%|  u  |
     | G     |V  0.0%|A  0.0%|D  0.0%|G  0.0%|  c  |
     | G GGG |V  0.0%|A 25.0%|E  0.0%|G  0.0%|  a  |
     |  GG G |V  0.0%|A  0.0%|E  0.0%|G  0.0%|  g  |
     *---------------------------------------------*
    
    Molecular weight  : 3718.66444
    Protein weight    : 485.605
    //

    ߥλξϰʲΤ褦ˤʤޤ

    bioruby> seqstat(pep)
    
    * * * Sequence statistics * * *
    
    N->C sequence     : MHAK
    Length            : 4 aa
    Composition       : A Ala - 1 ( 25.00 %) alanine
                        H His - 1 ( 25.00 %) histidine
                        K Lys - 1 ( 25.00 %) lysine
                        M Met - 1 ( 25.00 %) methionine
    Protein weight    : 485.605
    //
    composition

    seqstat ɽƤ composition ᥽åɤ뤳ȤǤޤ ̤ʸǤϤʤ Hash ֤ΤǡȤꤢɽƤߤˤ puts p ޥɤȤɤǤ礦

    bioruby> p dna.composition
    {"a"=>6, "c"=>2, "g"=>2, "t"=>2}

    󡢥ߥλΤ¾Υ᥽å

    ¾ˤ󡢥ߥλФƹԤϿȤޤ

    subseq(from, to)

    ʬФˤ subseq ᥽åɤȤޤ

    bioruby> puts dna.subseq(1, 3)
    atg

    Ruby ʤ¿Υץߥ󥰸ʸ 1 ʸܤ 0 ޤ subseq ᥽åɤ 1 ڤФ褦ˤʤäƤޤ

    bioruby> puts dna[0, 3]
    atg

    Ruby String 饹 slice ᥽å str[] ŬȤʬ 褤Ǥ礦

    window_search(len, step)

    window_search ᥽åɤȤĹʬη֤ ñ˹ԤȤǤޤDNA 򥳥ɥ˽硢 ʸĤ餷ʤ飳ʸڤФФ褤ΤǰʲΤ褦ˤʤޤ

    bioruby> dna.window_search(3, 3) do |codon|
    bioruby+   puts "#{codon}\t#{codon.translate}"
    bioruby+ end
    atg     M
    cat     H
    gca     A
    aaa     K

    Υü 1000bp 򥪡Сåפʤ 11000bp Ȥ ֥ڤˤ FASTA եޥåȤϰʲΤ褦ˤʤޤ

    bioruby> seq.window_search(11000, 10000) do |subseq|
    bioruby+   puts subseq.to_fasta
    bioruby+ end

    Ǹ 10000bp ʤ 3' ü;֤ͤȤΤǡ ɬפʾӼäɽޤ

    bioruby> i = 1
    bioruby> remainder = seq.window_search(11000, 10000) do |subseq|
    bioruby+   puts subseq.to_fasta("segment #{i*10000}", 60)
    bioruby+   i += 1
    bioruby+ end
    bioruby> puts remainder.to_fasta("segment #{i*10000}", 60)
    splicing(position)

    GenBank position ʸˤڤФ splicing ᥽åɤǹԤޤ

    bioruby> puts dna
    atgcatgcaaaa
    bioruby> puts dna.splicing("join(1..3,7..9)")
    atggca
    randomize

    randomize ᥽åɤϡ¸ޤޥޤ

    bioruby> puts dna.randomize
    agcaatagatac
    to_re

    to_re ᥽åɤϡۣʱɽޤ atgc ѥ󤫤ʤɽѴޤ

    bioruby> ambiguous = getseq("atgcyatgcatgcatgc")
    
    bioruby> p ambiguous.to_re
    /atgc[tc]atgcatgcatgc/
    
    bioruby> puts ambiguous.to_re
    (?-mix:atgc[tc]atgcatgcatgc)

    seq ᥽åɤ ATGC δ̤ͭ 90% ʲȥߥλȤߤʤΤǡ ۣʱ¿ޤޤξ to_naseq ᥽åɤȤä Ū Bio::Sequence::NA ֥ȤѴɬפޤ

    bioruby> s = getseq("atgcrywskmbvhdn").to_naseq
    bioruby> p s.to_re
    /atgc[ag][tc][at][gc][tg][ac][tgc][agc][atc][atg][atgc]/
    
    bioruby> puts s.to_re
    (?-mix:atgc[ag][tc][at][gc][tg][ac][tgc][agc][atc][atg][atgc])
    names

    ޤȤȤϤޤ󤬡̾䥢ߥλ̾Ѵ ᥽åɤǤ

    bioruby> p dna.names
    ["adenine", "thymine", "guanine", "cytosine", "adenine", "thymine",
    "guanine", "cytosine", "adenine", "adenine", "adenine", "adenine"]
    
    bioruby> p pep.names
    ["methionine", "histidine", "alanine", "lysine"]
    codes

    ߥλʸɤѴ names Ȼ᥽åɤǤ

    bioruby> p pep.codes
    ["Met", "His", "Ala", "Lys"]
    gc_percent

    GC ̤ gc_percent ᥽åɤޤ

    bioruby> p dna.gc_percent
    33
    to_fasta

    FASTA եޥåȤѴˤ to_fasta ᥽åɤȤޤ

    bioruby> puts dna.to_fasta("dna sequence")
    >dna sequence
    aaccggttacgt

    䥢ߥλΥɡɥɽ򤢤Ĥ

    ߥλ𡢥ɥơ֥뤿 aminoacids, nucleicacids, codontables, codontable ޥɤҲ𤷤ޤ

    aminoacids

    ߥλΰ aminoacids ޥɤɽǤޤ

    bioruby> aminoacids
    ?       Pyl     pyrrolysine
    A       Ala     alanine
    B       Asx     asparagine/aspartic acid
    C       Cys     cysteine
    D       Asp     aspartic acid
    E       Glu     glutamic acid
    F       Phe     phenylalanine
    G       Gly     glycine
    H       His     histidine
    I       Ile     isoleucine
    K       Lys     lysine
    L       Leu     leucine
    M       Met     methionine
    N       Asn     asparagine
    P       Pro     proline
    Q       Gln     glutamine
    R       Arg     arginine
    S       Ser     serine
    T       Thr     threonine
    U       Sec     selenocysteine
    V       Val     valine
    W       Trp     tryptophan
    Y       Tyr     tyrosine
    Z       Glx     glutamine/glutamic acid

    ֤ͤûɽбĹɽΥϥåˤʤäƤޤ

    bioruby> aa = aminoacids
    bioruby> puts aa["G"]
    Gly
    bioruby> puts aa["Gly"]
    glycine
    nucleicacids

    ΰ nucleicacids ޥɤɽǤޤ

    bioruby> nucleicacids
    a       a       Adenine
    t       t       Thymine
    g       g       Guanine
    c       c       Cytosine
    u       u       Uracil
    r       [ag]    puRine
    y       [tc]    pYrimidine
    w       [at]    Weak
    s       [gc]    Strong
    k       [tg]    Keto
    m       [ac]    aroMatic
    b       [tgc]   not A
    v       [agc]   not T
    h       [atc]   not G
    d       [atg]   not C
    n       [atgc]  

    ֤ͤϱΣʸɽȳΥϥåˤʤäƤޤ

    bioruby> na = nucleicacids
    bioruby> puts na["r"]
    [ag]
    codontables

    ɥơ֥ΰ codontables ޥɤɽǤޤ

    bioruby> codontables
    1       Standard (Eukaryote)
    2       Vertebrate Mitochondrial
    3       Yeast Mitochondorial
    4       Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma
    5       Invertebrate Mitochondrial
    6       Ciliate Macronuclear and Dasycladacean
    9       Echinoderm Mitochondrial
    10      Euplotid Nuclear
    11      Bacteria
    12      Alternative Yeast Nuclear
    13      Ascidian Mitochondrial
    14      Flatworm Mitochondrial
    15      Blepharisma Macronuclear
    16      Chlorophycean Mitochondrial
    21      Trematode Mitochondrial
    22      Scenedesmus obliquus mitochondrial
    23      Thraustochytrium Mitochondrial

    ֤ͤϥơֹ֥̾ΥϥåˤʤäƤޤ

    bioruby> ct = codontables
    bioruby> puts ct[3]
    Yeast Mitochondorial
    codontable(num)

    ɥɽΤ codontable ޥɤɽǤޤ

    bioruby> codontable(11)
    
     = Codon table 11 : Bacteria
    
       hydrophilic: H K R (basic), S T Y Q N S (polar), D E (acidic)
       hydrophobic: F L I M V P A C W G (nonpolar)
    
     *---------------------------------------------*
     |       |              2nd              |     |
     |  1st  |-------------------------------| 3rd |
     |       |  U    |  C    |  A    |  G    |     |
     |-------+-------+-------+-------+-------+-----|
     | U   U | Phe F | Ser S | Tyr Y | Cys C |  u  |
     | U   U | Phe F | Ser S | Tyr Y | Cys C |  c  |
     | U   U | Leu L | Ser S | STOP  | STOP  |  a  |
     |  UUU  | Leu L | Ser S | STOP  | Trp W |  g  |
     |-------+-------+-------+-------+-------+-----|
     |  CCCC | Leu L | Pro P | His H | Arg R |  u  |
     | C     | Leu L | Pro P | His H | Arg R |  c  |
     | C     | Leu L | Pro P | Gln Q | Arg R |  a  |
     |  CCCC | Leu L | Pro P | Gln Q | Arg R |  g  |
     |-------+-------+-------+-------+-------+-----|
     |   A   | Ile I | Thr T | Asn N | Ser S |  u  |
     |  A A  | Ile I | Thr T | Asn N | Ser S |  c  |
     | AAAAA | Ile I | Thr T | Lys K | Arg R |  a  |
     | A   A | Met M | Thr T | Lys K | Arg R |  g  |
     |-------+-------+-------+-------+-------+-----|
     |  GGGG | Val V | Ala A | Asp D | Gly G |  u  |
     | G     | Val V | Ala A | Asp D | Gly G |  c  |
     | G GGG | Val V | Ala A | Glu E | Gly G |  a  |
     |  GG G | Val V | Ala A | Glu E | Gly G |  g  |
     *---------------------------------------------*

    ֤ͤ Bio::CodonTable 饹Υ֥Ȥǡɥȥߥλ ѴǤǤʤʲΤ褦ʥǡ뤳ȤǤޤ

    bioruby> ct = codontable(2)
    bioruby> p ct["atg"]
    "M"
    definition

    ɥɽ

    bioruby> puts ct.definition
    Vertebrate Mitochondrial
    start

    ϥɥ

    bioruby> p ct.start
    ["att", "atc", "ata", "atg", "gtg"]
    stop

    ߥɥ

    bioruby> p ct.stop
    ["taa", "tag", "aga", "agg"]
    revtrans

    ߥλ򥳡ɤ륳ɥĴ٤

    bioruby> p ct.revtrans("V")
    ["gtc", "gtg", "gtt", "gta"]

    եåȥեΥȥ

    ǡ١Υȥȡեåȥե뤽ΤΤ򰷤ˡҲ𤷤ޤ GenBank ǡ١ǤϡեΥȥ꤬ޤޤ gbphg.seq ե륵ΤǡΥեȤƻȤޤ

    % wget ftp://ftp.hgc.jp/pub/mirror/ncbi/genbank/gbphg.seq.gz
    % gunzip gbphg.seq.gz
    getent(str)

    getseq ޥɤޤǤʤȥΤ ˤ getent ޥ(2)Ȥޤgetseq ޥƱ͡getent ޥɤǤ OBDA, EMBOSS, NCBI, EBI, TogoWS, KEGG API Υǡ١ѲǽǤ(5) ˤĤƤ getseq ޥɤ򻲾ȤƤ

    bioruby> entry = getent("genbank:AB044425")
    bioruby> puts entry
    LOCUS       AB044425                1494 bp    DNA     linear   PLN 28-APR-2001
    DEFINITION  Volvox carteri f. kawasakiensis chloroplast psaB gene for
                photosystem I P700 chlorophyll a apoprotein A2,
                strain:NIES-732.
    (ά)

    getent ޥɤΰˤ db:entry_id ʸEMBOSS USA ե롢IO Ϳ졢ǡ١Σȥʬʸ֤ޤ ǡ١˸¤餺¿Υǡ١ȥбƤޤ

    flatparse(str)

    ȥѡߤǡȤˤ flatparse ޥɤȤޤ

    bioruby> entry = getent("gbphg.seq")
    bioruby> gb = flatparse(entry)
    bioruby> puts gb.entry_id
    AB000833
    bioruby> puts gb.definition
    Bacteriophage Mu DNA for ORF1, sheath protein gpL, ORF2, ORF3, complete cds.
    bioruby> puts psaB.naseq
    acggtcagacgtttggcccgaccaccgggatgaggctgacgcaggtcagaaatctttgtgacgacaaccgtatcaat
    (ά)
    getobj(str)

    getobj ޥ(2)ϡgetent ǥȥʸȤƼ flatparse ѡ֥ȤѴΤƱǤgetent ޥɤƱ դޤ getseqȥ getent ѡ֥Ȥ getobj ȤȤˤʤޤ

    bioruby> gb = getobj("gbphg.seq")
    bioruby> puts gb.entry_id
    AB000833
    flatfile(file)

    getent ޥɤϣȥꤷʤᡢΥե򳫤 ƥȥ˽Ԥˤ flatfile ޥɤȤޤ

    bioruby> flatfile("gbphg.seq") do |entry|
    bioruby+   # do something on entry
    bioruby+ end

    ֥åꤷʤϡեκǽΥȥޤ

    bioruby> entry = flatfile("gbphg.seq")
    bioruby> gb = flatparse(entry)
    bioruby> puts gb.entry_id
    flatauto(file)

    ƥȥ flatparse Ʊͤ˥ѡ֤ǽ֤˽뤿ˤϡ flatfile ޥɤ flatauto ޥɤȤޤ

    bioruby> flatauto("gbphg.seq") do |entry|
    bioruby+   print entry.entry_id
    bioruby+   puts  entry.definition
    bioruby+ end

    flatfile Ʊ֥͡åꤷʤϡեκǽΥȥ ѡ֥Ȥ֤ޤ

    bioruby> gb = flatfile("gbphg.seq")
    bioruby> puts gb.entry_id

    եåȥեΥǥ

    EMBOSS dbiflat ˻ǽȤơBioRuby, BioPerl ʤɤ˶̤ BioFlat ȤǥåȤߤޤ٥ǥå ƤȥȥμФ®ưפ˹Ԥޤ ˤ꼫ʬѤΥǡ١ڤ˺뤳ȤǤޤ

    flatindex(db_name, *source_file_list)

    GenBank Υեե gbphg.seq äƤ륨ȥФ mydb Ȥǡ١̾ǥǥåޤ

    bioruby> flatindex("mydb", "gbphg.seq")
    Creating BioFlat index (.bioruby/bioflat/mydb) ... done
    flatsearch(db_name, entry_id)

    mydb ǡ١饨ȥȤФˤ flatsearch ޥɤ Ȥޤ

    bioruby> entry = flatsearch("mydb", "AB004561")
    bioruby> puts entry
    LOCUS       AB004561                2878 bp    DNA     linear   PHG 20-MAY-1998
    DEFINITION  Bacteriophage phiU gene for integrase, complete cds, integration
                site.
    ACCESSION   AB004561
    (ά)

    ͡ DB FASTA եޥåȤѴ¸

    FASTA եޥåȤǡɸŪѤƤեޥåȤǤ >׵ǤϤޤ룱ܤꡢܰʹߤ󤬤ĤŤޤ ζʸ̵뤵ޤ

    >entry_id definition ...
    ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
    ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT

    Ԥϡǽñ줬 ID ˤʤäƤ뤳Ȥ¿ΤǤ NCBI BLAST ѥǡ١ǤϤ˹٤ʹ¤ʤƤޤ

    BioRuby Υǡ١ȥΥ饹ˤϥȥIDˤĤ ̤Υ᥽åɤѰդƤޤ

    • entry_id - ȥ ID
    • definition - ʸ
    • seq -

    ζ̥᥽åɤȤȡɤǡ١ȥǤ FASTA եޥåȤѴǤץबñ˺ޤ

    entry.seq.to_fasta("#{entry.entry_id} #{entry.definition}", 60)

    ˡBioRuby Ǥϥǡ١ηưȽ̤ǤޤΤǡ GenBank, UniProt ʤ¿μפǡ١Ǥ ե̾ꤹ FASTA եޥåȤѴǤޤ

    flatfasta(fasta_file, *source_file_list)

    ϥǡ١Υե̾ΥꥹȤ顢ꤷ FASTA եޥåȤ ե륳ޥɤǤǤϤĤ GenBank Υե FASTA եޥåȤѴmyfasta.fa Ȥե¸Ƥޤ

    bioruby> flatfasta("myfasta.fa", "gbphg.seq", "gbvrl1.seq", "gbvrl2.seq")
    Saving fasta file (myfasta.fa) ... 
      converting -- gbphg.gbk
      converting -- gbvrl1.gbk
      converting -- gbvrl2.gbk
    done

    KEGG API

    BioRuby Ǥ KEGG API Υ֥ӥñѤǤޤ

    keggdbs

    ΥͥåȤ KEGG API ̤Ѳǽʥǡ١ΥꥹȤɽޤ

    bioruby> keggdbs
    nt:     Non-redundant nucleic acid sequence database
    aa:     Non-redundant protein sequence database
    gb:     GenBank nucleic acid sequence database
    (ά)
    keggorgs

    KEGG ˼ϿƤʪΥꥹȤɽޤ

    bioruby> keggorgs
    aae:    Aquifex aeolicus
    aci:    Acinetobacter sp. ADP1
    afu:    Archaeoglobus fulgidus
    (ά)
    keggpathways

    KEGG ˼ϿƤѥΥꥹȤɽޤ

    bioruby> keggpathways
    path:map00010:  Glycolysis / Gluconeogenesis - Reference pathway
    path:map00020:  Citrate cycle (TCA cycle) - Reference pathway
    path:map00030:  Pentose phosphate pathway - Reference pathway
    (ά)

    ˣʸ KEGG ʪﵭ򤢤ȡʪѤǤ ѥΰ֤ޤIJ eco ξʲΤ褦ˤʤޤ

    bioruby> keggpathways("eco")
    path:eco00010:  Glycolysis / Gluconeogenesis - Escherichia coli K-12 MG1655
    path:eco00020:  Citrate cycle (TCA cycle) - Escherichia coli K-12 MG1655
    path:eco00030:  Pentose phosphate pathway - Escherichia coli K-12 MG1655
    (ά)
    keggapi

    ʳ KEGG API Υ᥽åɤϡkeggapi ³ƸƤӽФȤ ѤǤޤ

    bioruby> p keggapi.get_genes_by_pathway("path:eco00010")
    ["eco:b0114", "eco:b0115", "eco:b0116", "eco:b0356", "eco:b0688", (ά)

    Ѳǽʥ᥽åɤΰ KEGG API Υޥ˥奢򻲾ȤƤ

    DBGET

    ΥͥåȤ DBGET ΥޥɤǤ binfo, bfind, bget, btit, bconv KEGG API ѤƤΤޤ޼¹ԤǤ褦ˤʤäƤޤ

    binfo
    bioruby> binfo
                             *** Last database updates ***
    Date      Database              Release              #Entries      #Residues
    --------  -------------   ------------------------  ------------  ----------------
     05/12/06  nr-nt                 05-12-04 (Dec 05)    63,078,043   111,609,773,616
     05/12/06  nr-aa                 05-12-05 (Dec 05)     2,682,790       890,953,839
     05/10/25  genbank                  150.0 (Oct 05)    49,152,445    53,655,236,500
     05/12/06  genbank-upd       150.0+/12-04 (Dec 05)     7,470,976     6,357,888,366
     (ά)

    binfo ޥɤ³ƥǡ١̾ꤹ뤳ȤǤܺ٤ʾ ɽޤ

    bioruby> binfo "genbank"
    genbank          GenBank nucleic acid sequence database
    gb               Release 150.0, Oct 05
                     National Center for Biotechnology Information
                     49,152,445 entries, 53,655,236,500 bases
                     Last update:  05/10/25
                     <dbget> <fasta> <blast>
    bfind(keyword)

    bfind ޥɤǥǡ١Ф륭ɥԤȤǤޤ ǡ١̾ȸɤʸϤޤ

    bioruby> list = bfind "genbank ebola human"
    bioruby> puts list
    gb:BD177378 [BD177378] A monoclonal antibody recognizing ebola virus.
    gb:BD177379 [BD177379] A monoclonal antibody recognizing ebola virus.
    (ά)
    bget(entry_id)

    bget ޥɤǻꤷ db:entry_id Υǡ١ȥǤޤ

    bioruby> entry = bget "gb:BD177378"
    bioruby> puts entry
    LOCUS       BD177378                  24 bp    DNA     linear   PAT 16-APR-2003
    DEFINITION  A monoclonal antibody recognizing ebola virus.
    (ά)

    ץ

    ȼ򥹥ץȲ¸ƤȤǤޤ

    bioruby> script
    -- 8< -- 8< -- 8< --  Script  -- 8< -- 8< -- 8< --
    bioruby> seq = getseq("gbphg.seq")
    bioruby> p seq
    bioruby> p seq.translate
    bioruby> script
    -- >8 -- >8 -- >8 --  Script  -- >8 -- >8 -- >8 --
    Saving script (script.rb) ... done

    줿 script.rb ϰʲΤ褦ˤʤޤ

    #!/usr/bin/env bioruby
    
    seq = getseq("gbphg.seq")
    p seq
    p seq.translate

    ΥץȤ bioruby ޥɤǼ¹Ԥ뤳ȤǤޤ

    % bioruby script.rb

    ʰץ뵡ǽ

    cd(dir)

    ȥǥ쥯ȥѹޤ

    bioruby> cd "/tmp"
    "/tmp"

    ۡǥ쥯ȥˤϰĤ cd ¹Ԥޤ

    bioruby> cd
    "/home/k"
    pwd

    ȥǥ쥯ȥɽޤ

    bioruby> pwd
    "/home/k"
    dir

    ȥǥ쥯ȥΥեɽޤ

    bioruby> dir
       UGO  Date                                 Byte  File
    ------  ----------------------------  -----------  ------------
     40700  Tue Dec 06 07:07:35 JST 2005         1768  "Desktop"
     40755  Tue Nov 29 16:55:20 JST 2005         2176  "bin"
    100644  Sat Oct 15 03:01:00 JST 2005     42599518  "gbphg.seq"
    (ά)
    
    bioruby> dir "gbphg.seq"
       UGO  Date                                 Byte  File
    ------  ----------------------------  -----------  ------------
    100644  Sat Oct 15 03:01:00 JST 2005     42599518  "gbphg.seq"
    head(file, lines = 10)

    ƥȥե䥪֥ȤƬ 10 Ԥɽޤ

    bioruby> head "gbphg.seq"
    GBPHG.SEQ            Genetic Sequence Data Bank
                              October 15 2005
    
                    NCBI-GenBank Flat File Release 150.0
    
                              Phage Sequences         
    
        2713 loci,    16892737 bases, from     2713 reported sequences

    ɽԿꤹ뤳ȤǤޤ

    bioruby> head "gbphg.seq", 2
    GBPHG.SEQ            Genetic Sequence Data Bank
                              October 15 2005

    ƥȤäƤѿƬ򸫤뤳ȤǤޤ

    bioruby> entry = getent("gbphg.seq")
    bioruby> head entry, 2
    GBPHG.SEQ            Genetic Sequence Data Bank
                              October 15 2005
    disp(obj)

    ƥȥե䥪֥ȤȤڡ㡼ɽޤ ǻѤڡ㡼 pager ޥɤѹ뤳ȤǤޤʸҡˡ

    bioruby> disp "gbphg.seq"
    bioruby> disp entry
    bioruby> disp [1, 2, 3] * 4

    ѿ

    ls

    å˺ѿʥ֥ȡˤΰɽޤ

    bioruby> ls
    ["entry", "seq"]
    
    bioruby> a = 123
    ["a", "entry", "seq"]
    rm(symbol)

    ѿõޤ

    bioruby> rm "a"
    
    bioruby> ls
    ["entry", "seq"]
    savefile(filename, object)

    ѿ¸ƤƤƥȥե¸ޤ

    bioruby> savefile "testfile.txt", entry
    Saving data (testfile.txt) ... done
    
    bioruby> disp "testfile.txt"

    Ƽ

    ³λȤߤȤ BioRuby 뽪λ session ǥ쥯ȥ ҥȥꡢ֥ȡĿ꤬ͤ¸졢ư˼ưŪ ɤ߹ޤޤ

    config

    BioRuby γƼɽޤ

    bioruby> config
    message = "...BioRuby in the shell..."
    marshal = [4, 8]
    color   = false
    pager   = nil
    echo    = false

    echo ɽ뤫ɤڤؤޤon ξϡputs p ʤɤ ĤʤƤɾ̤ͤɽޤ irb ޥɤξϽ꤬ on ˤʤäƤޤbioruby ޥɤǤ Ĺ䥨ȥʤĹʸ򰷤Ȥ¿ᡢǤ off ˤƤޤ

    bioruby> config :echo
    Echo on
      ==> nil
    
    bioruby> config :echo
    Echo off

    ɥɽʤɡǽʾ˥顼ɽ뤫ɤڤؤޤ 顼ɽξ硢ץץȤˤ⿧ĤޤΤȽ̤Ǥޤ

    bioruby> config :color
    bioruby> codontable
    (դ)

    ¹Ԥ뤿Ӥ꤬ڤؤޤ

    bioruby> config :color
    bioruby> codontable
    (ʤ)

    BioRuby 뵯ưɽ륹ץåå㤦ʸ ѹޤβϥץѤΥǥ쥯ȥ꤫ꤷƤΤ 褤Ǥ礦

    bioruby> config :message, "Kumamushi genome project"
    
    K u m a m u s h i   g e n o m e   p r o j e c t
    
      Version : BioRuby 0.8.0 / Ruby 1.8.4

    ǥեȤʸ᤹ˤϡʤǼ¹Ԥޤ

    bioruby> config :message

    BioRuby 뵯ưɽ륹ץååݥ ˥᡼ɽ뤫ɤڤؤޤ ¹Ԥ뤿Ӥ꤬ڤؤޤ

    bioruby> config :splash
    Splash on
    pager(command)

    disp ޥɤǼºݤѤڡ㡼ڤؤޤ

    bioruby> pager "lv"
    Pager is set to 'lv'
    
    bioruby> pager "less -S"
    Pager is set to 'less -S'

    ڡ㡼ѤʤˤϰʤǼ¹Ԥޤ

    bioruby> pager
    Pager is set to 'off'

    ڡ㡼 off λ˰ʤǼ¹ԤȴĶѿ PAGER ͤѤޤ

    bioruby> pager
    Pager is set to 'less'

    ҥ

    doublehelix(sequence)

    DNA 򥢥Ȥɽ륪ޥǽޤ Ŭʱ seq äݤɽƤߤޤ礦

    bioruby> dna = getseq("atgc" * 10).randomize
    bioruby> doublehelix dna
         ta
        t--a
       a---t
      a----t
     a----t
    t---a
    g--c
     cg
     gc
    a--t
    g---c
     c----g
      c----g
    (ά)

    Ҳ

    midifile(midifile, sequence)

    DNA MIDI եѴ륪ޥǽޤ Ŭʱ seq Ȥä midifile.mid MIDI ץ쥤䡼DZդƤߤޤ礦

    bioruby> midifile("midifile.mid", seq)
    Saving MIDI file (midifile.mid) ... done

    ʾ BioRuby β򽪤ꡢʲǤ BioRuby 饤֥꼫Τ Ԥޤ

    𡦥ߥλ (Bio::Sequence 饹)

    Bio::Sequence 饹ϡФ͡ԤȤǤޤ ñȤơû atgcatgcaaaa ȤäơؤѴ ʬڤФη׻ߥλؤʬ̷׻ʤɤ ԤʤäƤߤޤߥλؤǤϡɬפ˱Ʋܤ Ϥ뤫ե졼ꤷꡢcodontable.rb Ƥ륳ɥơ ֥椫ѤΤꤷꤹǤޤʥɥơ֥ ֹ <URL:http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi> 򻲾ȡˡ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    seq = Bio::Sequence::NA.new("atgcatgcaaaa")
    
    puts seq                            # 
    puts seq.complement                 #  (Bio::Sequence::NA)
    puts seq.subseq(3,8)                # 3 ܤ 8 ܤޤ
    
    p seq.gc_percent                    # GC γ (Integer)
    p seq.composition                   #  (Hash)
    
    puts seq.translate                  #  (Bio::Sequence::AA)
    puts seq.translate(2)               # ʸܤ̤ϣ
    puts seq.translate(1,9)             # ֤Υɥơ֥
    
    p seq.translate.codes               # ߥλʸɤɽ (Array)
    p seq.translate.names               # ߥλ̾ɽ (Array)
    p seq.translate.composition         # ߥλ (Hash)
    p seq.translate.molecular_weight    # ʬ̤׻ (Float)
    
    puts seq.complement.translate       # 

    print, puts, p Ƥ̤ɽ뤿 Ruby ɸ᥽åɤǤ ܤȤʤ print ٤ơputs ϲԤưǤĤƤ롢 p ʸʳΥ֥Ȥʹ֤䤹褦ɽƤ롢 ȤħޤΤŬȤʬޤˡ

    require 'pp'

    ȤлȤ褦ˤʤ pp ᥽åɤϡp ɽ䤹ʤޤ

    Bio::Sequence::NA 饹Ρߥλ Bio::Sequence::AA 饹Υ֥Ȥˤʤޤ줾 Bio::Sequence 饹Ѿ Ƥ뤿ᡢ¿Υ᥽åɤ϶̤Ǥ

    Bio::Sequence::NA, AA 饹 Ruby String 饹ѾƤΤ String 饹ĥ᥽åɤȤǤޤ㤨ʬڤФˤ Bio::Sequence 饹 subseq(from,to) ᥽åɤ¾ˡString 饹 [] ᥽åɤȤȤǤޤ

    Ruby ʸ 1 ʸܤ 0 ܤȤƿˤդɬפǤȤС

    puts seq.subseq(1, 3)
    puts seq[0, 3]

    Ϥɤ seq κǽΣʸ atg ɽޤ

    Τ褦ˡString Υ᥽åɤȤϡʪؤ̻Ѥ 1 ʸܤ 1 ܤȤƿ 1 ɬפޤsubseq ᥽åɤ ǤäƤޤޤfrom, to Τɤ餫Ǥ 0 ʲξ 㳰ȯ褦ˤʤäƤޤˡ

    ޤǤν BioRuby ǻȰʲΤ褦ˤʤޤ

    # ιԤ seq = seq("atgcatgcaaaa") Ǥ褤
    bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
    # ɽ
    bioruby> puts seq
    atgcatgcaaaa
    # ɽ
    bioruby> puts seq.complement
    ttttgcatgcat
    # ʬɽʣܤ飸ܤޤǡ
    bioruby> puts seq.subseq(3,8)
    gcatgc
    #  GC% ɽ
    bioruby> p seq.gc_percent
    33
    # ɽ
    bioruby> p seq.composition
    {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
    # ߥλؤ
    bioruby> puts seq.translate
    MHAK
    # 򳫻ϱȤ
    bioruby> puts seq.translate(2)
    CMQ
    # ֤Υɥơ֥Ѥ
    bioruby> puts seq.translate(1,9)
    MHAN
    # 줿ߥλʸɤɽ
    bioruby> p seq.translate.codes
    ["Met", "His", "Ala", "Lys"]
    # 줿ߥλ򥢥ߥλ̾ɽ
    bioruby> p seq.translate.names
    ["methionine", "histidine", "alanine", "lysine"]
    # 줿ߥλɽ
    bioruby> p seq.translate.composition
    {"K"=>1, "A"=>1, "M"=>1, "H"=>1}
    # 줿ߥλʬ̤ɽ
    bioruby> p seq.translate.molecular_weight
    485.605
    # 
    bioruby> puts seq.complement.translate
    FCMH
    # ʬʣܤ飳ܤޤǡ
    bioruby> puts seq.subseq(1, 3)
    atg
    # ʬʣܤ飳ܤޤǡ
    bioruby> puts seq[0, 3]
    atg

    window_search(window_size, step_size) ᥽åɤȤȡФƥ ɥ򤺤餷ʤ餽줾ʬФԤȤǤޤ Ruby ĹΤҤȤĤǤ֥֥åפˤäơ֤줾Фפ ʷ餫Ƥ˽񤯤ȤǽǤʲǤϡsubseq Ȥѿˤ줾 ʬʤ֥å򷫤֤¹Ԥ뤳Ȥˤʤޤ

    • 100 𤴤Ȥˡ1𤺤Ĥ餷ʤʿ GC% ׻ɽ

      seq.window_search(100) do |subseq|
        puts subseq.gc_percent
      end

    ֥åǼʬ⡢Ʊ Bio::Sequence::NA ޤ Bio::Sequence::AA 饹Υ֥ȤʤΤǡ󥯥饹λƤΥ åɤ¹Ԥ뤳ȤǤޤ

    ޤܤΰ˰ưꤹ뤳Ȥ褦ˤʤäƤΤǡ

    • ɥñ̤Ǥ餷ʤ 15 5 ĴΥڥץɤɽ

      seq.window_search(15, 3) do |subseq|
        puts subseq.translate
      end

    ȤäȤǤޤ˰ưʤüʬ᥽å Τ֤ͤȤ᤹褦ˤʤäƤΤǡ

    • Υ 10000bp Ȥ˥֥ڤˤ FASTA եޥåȤ ΤȤü 1000bp ϥСåפ10000bp ʤ 3' ü Ӽäɽ

      i = 1
      remainder = seq.window_search(10000, 9000) do |subseq|
        puts subseq.to_fasta("segment #{i}", 60)
        i += 1
      end
      puts remainder.to_fasta("segment #{i}", 60)

    Τ褦ʻȴñˤǤޤ

    ɥȰưƱˤȥСåפʤɥ ǤΤǡ

    • ɥ٤

      codon_usage = Hash.new(0)
      seq.window_search(3, 3) do |subseq|
        codon_usage[subseq] += 1
      end
    • 10 Ĵ𤺤ʬ̤׻

      seq.window_search(10, 10) do |subseq|
        puts subseq.molecular_weight
      end

    ȤäѤͤޤ

    ºݤˤ Bio::Sequence::NA ֥Ȥϥե뤫ɤ߹ʸ ꡢǡ١ΤȤäꤷޤȤС

    #!/usr/bin/env ruby
    
    require 'bio'
    
    input_seq = ARGF.read       # Ϳ줿եԤɤ߹
    
    my_naseq = Bio::Sequence::NA.new(input_seq)
    my_aaseq = my_naseq.translate
    
    puts my_aaseq

    Υץ na2aa.rb Ȥơʲα

    gtggcgatctttccgaaagcgatgactggagcgaagaaccaaagcagtgacatttgtctg
    atgccgcacgtaggcctgataagacgcggacagcgtcgcatcaggcatcttgtgcaaatg
    tcggatgcggcgtga

    񤤤ե my_naseq.txt ɤ߹

    % ./na2aa.rb my_naseq.txt
    VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA*

    Τ褦ˤʤޤʤߤˡΤ餤ʤûȣԤǽ񤱤ޤ

    % ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt

    եΤݤʤΤǡϥǡ١ɬפ Ƥߤޤ

    GenBank Υѡ (Bio::GenBank 饹)

    GenBank ΥեѰդƤʼ긵ˤʤϡ ftp://ftp.ncbi.nih.gov/genbank/ .seq եɤޤˡ

    % wget ftp://ftp.hgc.jp/pub/mirror/ncbi/genbank/gbphg.seq.gz
    % gunzip gbphg.seq.gz

    ޤϡƥȥ꤫ ID ʸФ FASTA Ѵ ߤޤ礦

    Bio::GenBank::DELIMITER GenBank 饹Ƥǡ ǡ١Ȥ˰ۤʤ륨ȥζڤʸʤȤ GenBank ξ // ФƤʤƤɤ褦ˤʤäƤޤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    while entry = gets(Bio::GenBank::DELIMITER)
      gb = Bio::GenBank.new(entry)      # GenBank ֥
    
      print ">#{gb.accession} "         # ACCESSION ֹ
      puts gb.definition                # DEFINITION 
      puts gb.naseq                     # Sequence::NA ֥ȡ
    end

    νǤ GenBank եΥǡ¤˰¸Ƥޤ ե뤫ΥǡϤ򰷤饹 Bio::FlatFile Ѥ뤳Ȥǡ ʲΤ褦˶ڤʸʤɤ򵤤ˤ񤯤ȤǤޤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
    ff.each_entry do |gb|
      definition = "#{gb.accession} #{gb.definition}"
      puts gb.naseq.to_fasta(definition, 60)
    end

    ΰ㤦ǡȤFASTAեޥåȤΥեɤ߹ȤǤ⡢

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
    ff.each_entry do |f|
      puts "definition : " + f.definition
      puts "nalen      : " + f.nalen.to_s
      puts "naseq      : " + f.naseq
    end

    Τ褦ˡƱ褦ʽǺѤޤޤ

    ˡ Bio::DB 饹 open ᥽åɤƱͤΤȤǤޤȤС

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::GenBank.open("gbvrl1.seq")
    ff.each_entry do |gb|
      definition = "#{gb.accession} #{gb.definition}"
      puts gb.naseq.to_fasta(definition, 60)    
    end

    ʤɤȽ񤯤ȤǤޤʤνϤޤȤƤޤ)

    ˡGenBank ʣ FEATURES ѡɬפʾФޤ ޤ /tranlation="ߥλ" Ȥ Qualifier ߥλФɽƤߤޤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
    
    # GenBank ΣȥꤴȤ
    ff.each_entry do |gb|
    
      # FEATURES ǤĤĽ
      gb.features.each do |feature|
    
        # Feature ˴ޤޤ Qualifier ƥϥåѴ
        hash = feature.to_hash
    
        # Qualifier  translation 
        if hash['translation']
          # ȥΥåֹɽ
          puts ">#{gb.accession}
          puts hash['translation']
        end
      end
    end

    ˡFeature Υݥ˽񤫤Ƥ󤫤饨ȥα ץ饤󥰤Τ /translation= ˽񤫤Ƥ ξɽ٤Ƥߤޤ礦

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
    
    # GenBank ΣȥꤴȤ
    ff.each_entry do |gb|
    
      # ACCESSION ֹʪ̾ɽ
      puts "### #{gb.accession} - #{gb.organism}"
    
      # FEATURES ǤĤĽ
      gb.features.each do |feature|
    
        # Feature  position (join ...ʤ) Ф
        position = feature.position
    
        # Feature ˴ޤޤ Qualifier ƥϥåѴ
        hash = feature.to_hash
    
        # /translation= ʤХå
        next unless hash['translation']
    
        # /gene=, /product= ʤɤ Qualifier ̾ʤɤξ򽸤
        gene_info = [
          hash['gene'], hash['product'], hash['note'], hash['function']
        ].compact.join(', ')
        puts "## #{gene_info}"
    
        # position ξˤäƥץ饤󥰡
        puts ">NA splicing('#{position}')"
        puts gb.naseq.splicing(position)
    
        # ߥλʥץ饤󥰤󤫤
        puts ">AA translated by splicing('#{position}').translate"
        puts gb.naseq.splicing(position).translate
    
        # ߥλ/translation= ˽񤫤ƤΤΡ
        puts ">AA original translation"
        puts hash['translation']
      end
    end

    ⤷ѤƤ륳ɥơ֥뤬ǥե (universal) Ȱäꡢ ǽΥɥ "atg" ʳäꡢΥƥ󤬴ޤޤƤꡢ 뤤 BioRuby ˥ХСɽ룲ĤΥߥλ ۤʤˤʤޤ

    ǻѤƤ Bio::Sequence#splicing ᥽åɤϡGenBank, EMBL, DDBJ եޥåȤǻȤƤ Location ɽ򸵤ˡ󤫤 ʬڤФϤʥ᥽åɤǤ

    splicing ᥽åɤΰˤ GenBank Location ʸʳ BioRuby Bio::Locations ֥ȤϤȤǽǤ ̾ϸƤ Location ʸʬ䤹Τޤ Location ʸΥեޥåȤ Bio::Locations ˤĤƾܤΤꤿ BioRuby bio/location.rb 򸫤Ƥ

    • GenBank Υǡ Feature ǻȤƤ Location ʸ

      naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
    • 餫 Locations ֥ȤѴƤϤƤ褤

      locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
      naseq.splicing(locs)

    ʤߤˡߥλ (Bio::Sequence::AA) ˤĤƤ splicing ᥽å ѤʬФȤǽǤ

    • ߥλʬڤФʥʥڥץɤʤɡ

      aaseq.splicing('21..119')

    GenBank ʳΥǡ١

    BioRuby ǤϡGenBank ʳΥǡ١ˤĤƤŪʰƱǡ ǡ١Σȥʬʸбǡ١Υ饹ϤС ѡ줿̤֥Ȥˤʤä֤äƤޤ

    ǡ١Υեåȥե뤫飱ȥꤺļФƥѡ줿 ֥ȤФˤϡˤФƤ Bio::FlatFile Ȥޤ Bio::FlatFile.new ΰˤϥǡ١б BioRuby ǤΥ饹 ̾ (Bio::GenBank Bio::KEGG::GENES ʤ) ꤷޤ

    ff = Bio::FlatFile.new(Bio::ǡ١饹̾, ARGF)

    Ф餷Ȥˡ¤ FlatFile 饹ϥǡ١μưǧ ǤޤΤǡ

    ff = Bio::FlatFile.auto(ARGF)

    ȤΤִñǤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ff = Bio::FlatFile.auto(ARGF)
    
    ff.each_entry do |entry|
      p entry.entry_id          # ȥ ID
      p entry.definition        # ȥʸ
      p entry.seq               # ǡ١ξ
    end
    
    ff.close

    ˡǡ١Ĥ˺ʤˤ Ruby Υ֥å ѤưʲΤ褦˽񤯤Τ褤Ǥ礦

    #!/usr/bin/env ruby
    
    require 'bio'
    
    Bio::FlatFile.auto(ARGF) do |ff|
      ff.each_entry do |entry|
        p entry.entry_id          # ȥ ID
        p entry.definition        # ȥʸ
        p entry.seq               # ǡ١ξ
      end
    end

    ѡ줿֥Ȥ顢ȥΤ줾ʬФ ᥽åɤϥǡ١˰ۤʤޤ褯ܤˤĤƤ

    • entry_id ᥽å ȥ ID ֹ椬֤
    • definition ᥽å ȥԤ֤
    • reference ᥽å ե󥹥֥Ȥ֤
    • organism ᥽å ʪ̾
    • seq naseq aaseq ᥽å б󥪥֥Ȥ֤

    ʤɤΤ褦˶̲褦ȤƤޤƤΥ᥽åɤƤ ǤϤޤʶ̲λؿˤ bio/db.rb ȡˡޤ٤ʬϳ ǡ١ѡ˰ۤʤΤǡ줾ΥɥȤ˽ޤ

    §Ȥơ᥽å̾ʣξϡ֥ȤȤ֤ޤ Ȥ references ᥽åɤĥ饹ʣ Bio::Reference ֥ Ȥ Array ˤ֤ޤ̤Υ饹Ǥñ reference ᥽å ʤĤ Bio::Reference ֥Ȥ֤ȤäǤ

    PDB Υѡ (Bio::PDB 饹)

    Bio::PDB ϡPDB ɤ߹िΥ饹ǤPDB ǡ١ PDB, mmCIF, XML (PDBML) ΣΥեޥåȤ󶡤Ƥޤ Τ BioRuby бƤΤ PDB եޥåȤǤ

    PDB եޥåȤλͤϡʲ Protein Data Bank Contents Guide ȤƤ

    PDB ǡɤ߹

    PDB Σȥ꤬ 1bl8.pdb Ȥե˳ǼƤϡ Ruby Υեɤ߹ߵǽȤä

    entry = File.read("1bl8.pdb")

    Τ褦ˤ뤳ȤǡȥƤʸȤ entry Ȥѿ 뤳ȤǤޤȥƤѡˤ

    pdb = Bio::PDB.new(entry)

    Ȥޤǥȥ꤬ Bio::PDB ֥ȤȤʤꡢǤդΥǡ Ф褦ˤʤޤ

    PDB եޥåȤ Bio::FlatFile ˤ뼫ưǧǽǤߤ եʣȥޤˤбƤޤ Bio::FlatFile Ȥäƣȥʬɤ߹ˤϡ

    pdb = Bio::FlatFile.auto("1bl8.pdb") { |ff| ff.next_entry }

    ȤޤɤˡǤѿ pdb ˤƱ̤ޤ

    ֥Ȥγع¤

    PDB ȥϡѿʸʤ ID դƤޤ Bio::PDB ֥Ȥ ID Фˤ entry_id ᥽åɤȤޤ

    p pdb.entry_id   # => "1BL8"

    ȥγפ˴ؤб᥽åɤǼФȤǤޤ

    p pdb.definition # => "POTASSIUM CHANNEL (KCSA) FROM STREPTOMYCES LIVIDANS"
    p pdb.keywords   # => ["POTASSIUM CHANNEL", "INTEGRAL MEMBRANE PROTEIN"]

    ¾ˡϿԤʸ¸ˡʤɤξǤޤʤ줾 authors, jrnl, method ᥽åɡˡ

    PDB ǡϡŪˤϣԤĤΥ쥳ɤƤޤ Ԥ꤭ʤǡʣԤ˳Ǽ continuation Ȥ ȤߤѰդƤޤܤϣԣ쥳ɤǤ

    ƹԤƬʸιԤΥǡμ򼨤̾ʥ쥳ɡˤˤʤޤ BioRuby ǤϡHEADER 쥳ɤФƤ Bio::PDB::Record::HEADER 饹 TITLE 쥳ɤФƤ Bio::PDB::Record::TITLE 饹Ȥ褦 Ūˤϳƥ쥳ɤб륯饹򣱤ѰդƤޤ REMARK JRNL 쥳ɤ˴ؤƤϡ줾ʣΥեޥåȤ ¸ߤ뤿ᡢʣΥ饹ѰդƤޤ

    ƥ쥳ɤ˥äȤñˡ record ᥽åɤǤ

    pdb.record("HELIX")

    Τ褦ˤȡ PDB ȥ˴ޤޤƤ HELIX 쥳ɤ Bio::PDB::Record::HELIX 饹Υ֥ȤȤƼǤޤ

    ΤȤդޤʲǤϡPDB ȥΥᥤƤǤΩι¤ ؤǡ¤ΰ򸫤Ƥޤ

    : Bio::PDB::Record::ATOM, Bio::PDB::Record::HETATM 饹

    PDB ȥϡѥ˻DNA,RNAˤ䤽¾ʬҤΩι¤ ŪˤϸҤΣɸޤǤޤ

    ѥޤϳ˻θҤκɸϡATOM 쥳ɤ˳ǼƤޤ б륯饹ϡBio::PDB::Record::ATOM 饹Ǥ

    ѥ˻ʳθҤκɸϡHETATM 쥳ɤ˳ǼƤޤ б륯饹ϡBio::PDB::Record::HETATM 饹Ǥ

    HETATM饹 ATOM 饹ѾƤ뤿ᡢATOM HETATM ᥽åɤλȤϤޤäƱǤ

    ߥλĴʤޤϱ: Bio::PDB::Residue 饹

    ߥλޤϣñ̤ǸҤޤȤ᤿Τ Bio::PDB::Residue Ǥ Bio::PDB::Residue ֥Ȥϡİʾ Bio::PDB::Record::ATOM ֥Ȥޤߤޤ

    ʪ: Bio::PDB::Heterogen 饹

    ѥ˻ʳʬҤθҤϡŪˤʬñ̤ Bio::PDB::Heterogen ˤޤȤƤޤ Bio::PDB::Heterogen ֥Ȥϡİʾ Bio::PDB::Record::HETATM ֥Ȥޤߤޤ

    ʥ: Bio::PDB::Chain 饹

    Bio::PDB::Chain ϡʣ Bio::PDB::Residue ֥Ȥʤ ĤΥѥޤϳ˻ȡʣ Bio::PDB::Heterogen ֥ ʤ룱İʾΤʳʬҤǼǡ¤Ǥ

    ʤȾξϡѥ˻Bio::PDB::Residueˤ ʳʬҡBio::PDB::HeterogenˤΤɤ餫षޤ Chain ҤȤĤޤޤʤ PDB ȥǤξľ礬褦Ǥ

    Chain ˤϡѿʸ ID դƤޤChain ҤȤĤ ޤޤʤ PDB ȥξ϶ʸΤȤ⤢ޤˡ

    ǥ: Bio::PDB::Model

    İʾ Bio::PDB::Chain ޤäΤ Bio::PDB::Model Ǥ 뾽¤ξ硢Model ̾ĤǤNMR ¤ξ硢 ʣ Model ¸ߤ뤳Ȥޤ ʣ Model ¸ߤ硢 Model ˤϥꥢֹ椬դޤ

    ơİʾ Model ޤäΤBio::PDB ֥Ȥˤʤޤ

    Ҥ˥᥽å

    Bio::PDB#each_atom Ƥ ATOM ֤ˣĤé륤ƥ졼Ǥ

    pdb.each_atom do |atom|
      p atom.xyz
    end

    each_atom ᥽åɤ Model, Chain, Residue ֥ȤФƤ Ѥ뤳ȤǤ줾졢 Model, Chain, Residue Τ٤Ƥ ATOM 򤿤ɤ륤ƥ졼ȤƯޤ

    Bio::PDB#atoms Ƥ ATOM Ȥ֤᥽åɤǤ

    p pdb.atoms.size        # => 2820 Ĥ ATOM ޤޤ뤳Ȥ狼

    each_atom Ʊͤ atoms ᥽åɤ Model, Chain, Residue ֥ ФƻѲǽǤ

    pdb.chains.each do |chain|
      p chain.atoms.size    # =>  Chain  ATOM ɽ
    end

    Bio::PDB#each_hetatm ϡƤ HETATM ֤ˣĤé륤ƥ졼Ǥ

    pdb.each_hetatm do |hetatm|
      p hetatm.xyz
    end

    Bio::PDB#hetatms Ƥ HETATM Ȥ֤Τ hetatms ᥽åɤǤ

    p pdb.hetatms.size

    atoms ξƱͤˡModel, Chain, Heterogen ֥Ȥ ФƻѲǽǤ

    Bio::PDB::Record::ATOM, Bio::PDB::Record::HETATM 饹λȤ

    ATOM ϥѥ˻DNARNAˤ븶ҡHETATM Ϥʳ ҤǼ뤿Υ饹ǤHETATM ATOM 饹ѾƤ뤿 Υ饹ǥ᥽åɤλȤϤޤäƱǤ

    p atom.serial       # ꥢֹ
    p atom.name         # ̾
    p atom.altLoc       # Alternate location indicator
    p atom.resName      # ߥλ̾ޤϲʪ̾
    p atom.chainID      # Chain  ID
    p atom.resSeq       # ߥλĴΥֹ
    p atom.iCode        # Code for insertion of residues
    p atom.x            # X ɸ
    p atom.y            # Y ɸ
    p atom.z            # Z ɸ
    p atom.occupancy    # Occupancy
    p atom.tempFactor   # Temperature factor
    p atom.segID        # Segment identifier
    p atom.element      # Element symbol
    p atom.charge       # Charge on the atom

    Υ᥽å̾ϡ§Ȥ Protein Data Bank Contents Guide ܤ˹碌Ƥޤ᥽å̾ resName resSeq Ȥä̾ˡ CamelCaseˤѤƤΤϤΤǤ 줾Υ᥽åɤ֤ǡΰ̣ϡͽ򻲹ͤˤƤ

    ¾ˤ⡢Ĥʥ᥽åɤѰդƤޤ xyz ᥽åɤϡɸ򣳼Υ٥ȥȤ֤᥽åɤǤ Υ᥽åɤϡRuby Vector 饹ѾƣΥ٥ȥ ò Bio::PDB::Coordinate 饹Υ֥Ȥ֤ޤ : VectorѾ饹ΤϤޤ侩ʤ褦ʤΤǡ 衢Vector饹Υ֥Ȥ֤褦ѹ뤫⤷ޤˡ

    p atom.xyz

    ٥ȥʤΤǡ­Ѥʤɤ뤳ȤǤޤ

    # Ҵ֤εΥ
    p (atom1.xyz - atom2.xyz).r  # r ϥ٥ȥͤ᥽å
    
    # Ѥ
    p atom1.xyz.inner_product(atom2.xyz)

    ¾ˤϡθҤб TER, SIGATM, ANISOU 쥳ɤ ter, sigatm, anisou ᥽åɤѰդƤޤ

    ߥλĴ (Residue) ˥᥽å

    Bio::PDB#each_residue ϡƤ Residue ֤é륤ƥ졼Ǥ each_residue ᥽åɤϡModel, Chain ֥ȤФƤ Ѥ뤳ȤǤ줾 Model, Chain ˴ޤޤƤ Residue é륤ƥ졼ȤƯޤ

    pdb.each_residue do |residue|
      p residue.resName
    end

    Bio::PDB#residues ϡƤ Residue Ȥ֤᥽åɤǤ each_residue ƱͤˡModel, Chain ֥ȤФƤѲǽǤ

    p pdb.residues.size

    ʪ (Heterogen) ˥᥽å

    Bio::PDB#each_heterogen Ƥ Heterogen ֤ˤɤ륤ƥ졼 Bio::PDB#heterogens Ƥ Heterogen Ȥ֤᥽åɤǤ

    pdb.each_heterogen do |heterogeon|
      p heterogen.resName
    end
    
    p pdb.heterogens.size

    Υ᥽åɤ Residue Ʊͤ Model, Chain ֥ȤФƤ ѲǽǤ

    Chain, Model ˥᥽å

    ƱͤˡBio::PDB#each_chain Ƥ Chain ֤ˤɤ륤ƥ졼 Bio::PDB#chains Ƥ Chain Ȥ֤᥽åɤǤ Υ᥽åɤ Model ֥ȤФƤѲǽǤ

    Bio::PDB#each_model Ƥ Model ֤ˤɤ륤ƥ졼 Bio::PDB#models Ƥ Model Ȥ֤᥽åɤǤ

    PDB Chemical Component Dictionary Υǡɤ߹

    Bio::PDB::ChemicalComponent 饹ϡPDB Chemical Component Dictionary ʵ̾ HET Group DictionaryˤΥѡǤ

    PDB Chemical Component Dictionary ˤĤƤϰʲΥڡ򻲾ȤƤ

    ǡϰʲǥɤǤޤ

    Υ饹ϡRESIDUE ϤޤäƶԤǽ룱ȥѡޤ PDB եޥåȤˤΤбƤޤˡ

    Bio::FlatFile ˤեưȽ̤бƤޤ Υ饹Τ ID 鲽ʪ򸡺ꤹ뵡ǽϻäƤޤ br_bioflat.rb ˤ륤ǥåˤбƤޤΤǡ ɬפʤ餽ѤƤ

    Bio::FlatFile.auto("het_dictionary.txt") |ff|
      ff.each do |het|
        p het.entry_id  # ID
        p het.hetnam    # HETNAM 쥳ɡʲʪ̾Ρ
        p het.hetsyn    # HETSYM 쥳ɡʲʪ̾
        p het.formul    # FORMUL 쥳ɡʲʪ
        p het.conect    # CONECT 쥳
      end
    end

    Ǹ conect ᥽åɤϡʪη Hash Ȥ֤ޤ ȤСΡΥȥϼΤ褦ˤʤޤ

    RESIDUE   EOH      9
    CONECT      C1     4 C2   O   1H1  2H1
    CONECT      C2     4 C1  1H2  2H2  3H2
    CONECT      O      2 C1   HO
    CONECT     1H1     1 C1
    CONECT     2H1     1 C1
    CONECT     1H2     1 C2
    CONECT     2H2     1 C2
    CONECT     3H2     1 C2
    CONECT      HO     1 O
    END
    HET    EOH              9
    HETNAM     EOH ETHANOL
    FORMUL      EOH    C2 H6 O1

    ΥȥФ conect ᥽åɤƤ֤

    { "C1"  => [ "C2", "O", "1H1", "2H1" ], 
      "C2"  => [ "C1", "1H2", "2H2", "3H2" ], 
      "O"   => [ "C1", "HO" ], 
      "1H1" => [ "C1" ], 
      "1H2" => [ "C2" ], 
      "2H1" => [ "C1" ], 
      "2H2" => [ "C2" ], 
      "3H2" => [ "C2" ], 
      "HO"  => [ "O" ] }

    Ȥ Hash ֤ޤ

    ޤǤν BioRuby ǻȰʲΤ褦ˤʤޤ

    # PDB ȥ 1bl8 ͥåȥͳǼ
    bioruby> ent_1bl8 = getent("pdb:1bl8")
    # ȥȤǧ
    bioruby> head ent_1bl8
    # ȥե¸
    bioruby> savefile("1bl8.pdb", ent_1bl8)
    # ¸줿եȤǧ
    bioruby> disp "data/1bl8.pdb"
    # PDB ȥѡ
    bioruby> pdb_1bl8 = flatparse(ent_1bl8)
    # PDB Υȥ ID ɽ
    bioruby> pdb_1bl8.entry_id
    # getent("pdb:1bl8")  flatparse ˡʲǤOK
    bioruby> obj_1bl8 = getobj("pdb:1bl8")
    bioruby> obj_1bl8.entry_id
    #  HETEROGEN Ȥ˻Ĵ̾ɽ
    bioruby> pdb_1bl8.each_heterogen { |heterogen| p heterogen.resName }
    
    # PDB Chemical Component Dictionary 
    bioruby> het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read
    # եΥХȿǧ
    bioruby> het_dic.size
    # ե¸
    bioruby> savefile("data/het_dictionary.txt", het_dic)
    # եȤǧ
    bioruby> disp "data/het_dictionary.txt"
    # Τ˥ǥå het_dic Ȥǡ١
    bioruby> flatindex("het_dic", "data/het_dictionary.txt")
    # ID  EOH ΥΡΥȥ򸡺
    bioruby> ethanol = flatsearch("het_dic", "EOH")
    # ȥѡ
    bioruby> osake = flatparse(ethanol)
    # Ҵ֤ηơ֥ɽ
    bioruby> sake.conect

    饤 (Bio::Alignment 饹)

    Bio::Alignment 饹Υ饤ȤǼ뤿ΥƥʤǤ Ruby Hash Array ˻ǽǡBioPerl Bio::SimpleAlign ˤʤäƤޤʲ˴ñʻȤ򼨤ޤ

    require 'bio'
    
    seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
    seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
    
    # 饤ȥ֥Ȥ
    a = Bio::Alignment.new(seqs)
    
    # 󥻥󥵥ɽ
    p a.consensus             # ==> "a?gc?"
    
    # IUPAC ɸۣʱѤ󥻥󥵥ɽ
    p a.consensus_iupac       # ==> "ahgcr"
    
    # ˤĤƷ֤
    a.each { |x| p x }
      # ==>
      #    "atgca"
      #    "aagca"
      #    "acgca"
      #    "acgcg"
    
    # ƥȤˤĤƷ֤
    a.each_site { |x| p x }
      # ==>
      #    ["a", "a", "a", "a"]
      #    ["t", "a", "c", "c"]
      #    ["g", "g", "g", "g"]
      #    ["c", "c", "c", "c"]
      #    ["a", "a", "a", "g"]
    
    # Clustal W Ѥƥ饤ȤԤ
    # 'clustalw' ޥɤƥ˥󥹥ȡ뤵Ƥɬפ롣
    factory = Bio::ClustalW.new
    a2 = a.do_align(factory)

    FASTA ˤƱԤBio::Fasta 饹

    FASTA ե query.pep ФơʬΥޥ()뤤 󥿡ͥåȾΥ(⡼) FASTA ˤƱԤˡǤ ξ SSEARCH ʤɤƱͤ˻ȤȤǤޤ

    ξ

    FASTA 󥹥ȡ뤵Ƥ뤳ȤǧƤʲǤϡ ޥ̾ fasta34 ǥѥ̤äǥ쥯ȥ˥󥹥ȡ ƤꤷƤޤ

    оݤȤ FASTA Υǡ١ե target.pep ȡFASTA 䤤碌󤬤Ĥäե query.pep ޤ

    Ǥϡ䤤碌󤴤Ȥ FASTA ¹ԤҥåȤ evalue 0.0001 ʲΤΤɽޤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    # FASTA ¹ԤĶ֥Ȥssearch ʤɤǤɤ
    factory = Bio::Fasta.local('fasta34', ARGV.pop)
    
    # եåȥեɤ߹ߡFastaFormat ֥ȤΥꥹȤˤ
    ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
    
    # ȥꤺĤ FastaFormat ֥ȤФ
    ff.each do |entry|
      # '>' ǻϤޤ륳ȹԤƤʹԾɸ२顼Ϥɽ
      $stderr.puts "Searching ... " + entry.definition
    
      # FASTA ˤƱ¹ԡ̤ Fasta::Report ֥
      report = factory.query(entry)
    
      # ҥåȤΤ줾Ф
      report.each do |hit|
        # evalue  0.0001 ʲξ
        if hit.evalue < 0.0001
          #  evalue ȡ̾Сåΰɽ
          print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at "
          p hit.lap_at
        end
      end
    end

    factory Ϸ֤ FASTA ¹Ԥ뤿ˡ餫äƤ ¹ԴĶǤ

    嵭ΥץȤ search.rb Ȥȡ䤤碌ȥǡ١ ե̾ˤơʲΤ褦˼¹Ԥޤ

    % ruby search.rb query.pep target.pep > search.out

    FASTA ޥɤ˥ץͿ硢ܤΰ FASTA ޥɥ饤󥪥ץ񤤤Ϥޤktup ͤ ᥽åɤȤäƻꤹ뤳ȤˤʤäƤޤ Ȥ ktup ͤ 1 ˤơȥå 10 ̰ΥҥåȤ ץϡʲΤ褦ˤʤޤ

    factory = Bio::Fasta.local('fasta34', 'target.pep', '-b 10')
    factory.ktup = 1

    Bio::Fasta#query ᥽åɤʤɤ֤ͤ Bio::Fasta::Report ֥ Ǥ Report ֥Ȥ顢͡ʥ᥽åɤ FASTA νϷ̤ ۤƤͳ˼Ф褦ˤʤäƤޤȤСҥåȤ˴ؤ ʤɤμʾϡ

    report.each do |hit|
      puts hit.evalue           # E-value
      puts hit.sw               # Smith-Waterman  (*)
      puts hit.identity         # % identity
      puts hit.overlap          # СåפƤΰĹ 
      puts hit.query_id         # 䤤碌 ID
      puts hit.query_def        # 䤤碌Υ
      puts hit.query_len        # 䤤碌Ĺ
      puts hit.query_seq        # 䤤碌
      puts hit.target_id        # ҥåȤ ID
      puts hit.target_def       # ҥåȤΥ
      puts hit.target_len       # ҥåȤĹ
      puts hit.target_seq       # ҥåȤ
      puts hit.query_start      # Ʊΰ䤤碌ǤγϻĴ
      puts hit.query_end        # Ʊΰ䤤碌ǤνλĴ
      puts hit.target_start     # ƱΰΥåǤγϻĴ
      puts hit.target_end       # ƱΰΥåǤνλĴ
      puts hit.lap_at           # 嵭֤οͤ
    end

    ʤɤΥ᥽åɤǸƤӽФޤΥ᥽åɤ¿ϸ Bio::Blast::Report 饹ȶ̤ˤƤޤ嵭ʳΥ᥽åɤ FASTA ͭͤФ᥽åɤɬפʾϡBio::Fasta::Report 饹ΥɥȤ򻲾ȤƤ

    ⤷ѡμäƤʤ fasta ޥɤμ¹Է̤ɬפ ˤϡ

    report = factory.query(entry)
    puts factory.output

    Τ褦ˡquery ᥽åɤ¹Ԥ factory ֥Ȥ output ᥽åɤȤäƼФȤǤޤ

    ⡼Ȥξ

    ΤȤ GenomeNet (fasta.genome.jp) ǤθΤߥݡȤƤޤ ⡼ȤξϻѲǽʸоݥǡ١ޤäƤޤ ˤĤƤ Bio::Fasta.remote Bio::Fasta.local Ʊ褦˻Ȥ ȤǤޤ

    GenomeNet ǻѲǽʸоݥǡ١

    • ߥλǡ١
      • nr-aa, genes, vgenes.pep, swissprot, swissprot-upd, pir, prf, pdbstr
    • ǡ١
      • nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, htgs, dbsts, embl-nonst, embnonst-upd, genes-nt, genome, vgenes.nuc

    ޤ椫鸡ǡ١򤷤ޤ䤤碌μ ȸǡ١μˤäƥץϷޤޤ

    • 䤤碌󤬥ߥλΤȤ
      • оݥǡ١ߥλǡ١ξ硢program 'fasta'
      • оݥǡ١˻ǡ١ξ硢program 'tfasta'
    • 䤤碌󤬳˻ΤȤ
      • оݥǡ١˻ǡ١ξ硢program 'fasta'
      • (оݥǡ١ߥλǡ١ξϸǽ?)

    ץȥǡ١Ȥ߹礻ޤä

    program = 'fasta'
    database = 'genes'
    
    factory = Bio::Fasta.remote(program, database)

    Ȥƥեȥ꡼ꡢξƱ褦 factory.query ʤ Υ᥽åɤǸ¹Ԥޤ

    BLAST ˤƱԤBio::Blast 饹

    BLAST GenomeNet (blast.genome.jp) Ǥθ򥵥ݡȤ ޤǤ Bio::Fasta API ̤ˤƤޤΤǡ嵭 Bio::Blast Ƚ񤭴Ǥפʾ礬¿Ǥ

    ȤС f_search.rb

    # BLAST ¹ԤĶ֥Ȥ
    factory = Bio::Blast.local('blastp', ARGV.pop) 

    ѹƱ褦˼¹ԤǤޤ

    ƱͤˡGenomeNet ѤBLASTԤˤ Bio::Blast.remote Ȥޤ ξ硢programλƤ FASTA Ȱۤʤޤ

    • 䤤碌󤬥ߥλΤȤ
      • оݥǡ١ߥλǡ١ξ硢program 'blastp'
      • оݥǡ١˻ǡ١ξ硢program 'tblastn'
    • 䤤碌󤬱ΤȤ
      • оݥǡ١ߥλǡ١ξ硢program 'blastx'
      • оݥǡ١ǡ١ξ硢program 'blastn'
      • (䤤碌ǡ١6ե졼Ԥ 'tblastx')

    򤽤줾ꤷޤ

    ȤǡBLAST Ǥ "-m 7" ץˤ XML ϥեޥååȤ ˭٤ʤᡢBio::Blast Ruby Ѥ XML 饤֥Ǥ XMLParser ޤ REXML ѲǽʾϡXML ϤѤޤ ξѲǽʾ硢XMLParser Τۤ®ʤΤͥŪ˻Ѥޤ ʤRuby 1.8.0 ʹߤǤ REXML Ruby ΤɸźդƤޤ ⤷ XML 饤֥꤬󥹥ȡ뤵Ƥʤ "-m 8" Υֶڤ Ϸ򰷤褦ˤƤޤΥեޥåȤǤ ǡ¤Τǡ"-m 7" XML νϤȤȤ򤪴ᤷޤ

    Ǥ˸褦 Bio::Fasta::Report Bio::Blast::Report Hit ֥ ȤϤĤ̤Υ᥽åɤäƤޤBLAST ͭΥ᥽åɤɤ ʤΤˤ bit_score midline ʤɤޤ

    report.each do |hit|
      puts hit.bit_score        # bit  (*)
      puts hit.query_seq        # 䤤碌
      puts hit.midline          # 饤Ȥ midline ʸ (*)
      puts hit.target_seq       # ҥåȤ
    
      puts hit.evalue           # E-value
      puts hit.identity         # % identity
      puts hit.overlap          # СåפƤΰĹ 
      puts hit.query_id         # 䤤碌 ID
      puts hit.query_def        # 䤤碌Υ
      puts hit.query_len        # 䤤碌Ĺ
      puts hit.target_id        # ҥåȤ ID
      puts hit.target_def       # ҥåȤΥ
      puts hit.target_len       # ҥåȤĹ
      puts hit.query_start      # Ʊΰ䤤碌ǤγϻĴ
      puts hit.query_end        # Ʊΰ䤤碌ǤνλĴ
      puts hit.target_start     # ƱΰΥåǤγϻĴ
      puts hit.target_end       # ƱΰΥåǤνλĴ
      puts hit.lap_at           # 嵭֤οͤ
    end

    FASTAȤAPI̲ΤȴؤΤᡢʤɤĤξ1ܤ Hsp (High-scoring segment pair) ͤHit֤褦ˤƤޤ

    Bio::Blast::Report ֥Ȥϡʲ˼褦ʡBLASTη̽Ϥ ǡ¤򤽤ΤޤȿǤŪʥǡ¤äƤޤŪˤ

    • Bio::Blast::Report ֥Ȥ @iteratinos
      • Bio::Blast::Report::Iteration ֥Ȥ Array äƤ Bio::Blast::Report::Iteration ֥Ȥ @hits
        • Bio::Blast::Report::Hits ֥Ȥ Array äƤ Bio::Blast::Report::Hits ֥Ȥ @hsps
          • Bio::Blast::Report::Hsp ֥Ȥ Array äƤ

    Ȥع¤ˤʤäƤꡢ줾줬ͤФΥ᥽åɤ äƤޤΥ᥽åɤξܺ٤䡢BLAST ¹Ԥ׾ʤɤͤ ɬפʾˤϡ bio/appl/blast/*.rb ΥɥȤƥȥɤ ȤƤ

    ¸ BLAST ϥեѡ

    BLAST ¹Ԥ̥ե뤬Ǥ¸ƤäơϤ ˤϡBio::Blast ֥Ȥ餺ˡ Bio::Blast::Report ֥ ȤꤿȤȤˤʤޤˤ Bio::Blast.reports ᥽å ȤޤбƤΤ ǥեȽϥեޥå("-m 0") ޤ "-m 7" ץ XML եޥåȽϤǤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    # BLASTϤ˥ѡ Bio::Blast::Report ֥Ȥ֤
    Bio::Blast.reports(ARGF) do |report|
      puts "Hits for " + report.query_def + " against " + report.db
      report.each do |hit|
        print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
      end
    end

    Τ褦ʥץ hits_under_0.001.rb 񤤤ơ

    % ./hits_under_0.001.rb *.xml

    ʤɤȼ¹ԤСͿ BLAST η̥ե *.xml ֤˽ ޤ

    Blast ΥС OS ʤɤˤäƽϤ XML ηۤʤǽ ꡢ XML ΥѡޤȤʤȤ褦Ǥξ Blast 2.2.5 ʹߤΥС򥤥󥹥ȡ뤹뤫 -D -m ʤɤΥץ Ȥ߹礻ѤƻƤߤƤ

    ⡼ȸȤɲäˤ

    : ΥϾ桼ǤǽǤ SOAP ʤɤˤ ֥ӥѤ褤Ǥ礦

    Blast NCBI Ϥ͡ʥȤǥӥƤޤΤȤ BioRuby Ǥ GenomeNet ʳˤбƤޤ󡣤ΥȤϡ

    • CGI ƤӽФʥޥɥ饤󥪥ץϤΥѤ˽
    • -m 8 ʤ BioRuby ѡäƤϥեޥåȤ blast ϤФ

    ȤǤСquery äƸ̤ Bio::Blast::Report.new Ϥ褦ʥ᥽åɤǻȤ褦ˤʤޤŪˤϡ ᥽åɤexec_̾פΤ褦̾ Bio::Blast private ᥽å ȤϿȡܤΰˡ֥̾פꤷ

    factory = Bio::Blast.remote(program, db, option, '̾')

    Τ褦˸ƤӽФ褦ˤʤäƤޤ BioRuby ץȤ äƤ館мޤĺޤ

    PubMed ưʸꥹȤ (Bio::PubMed 饹)

    ϡNCBI ʸǡ١ PubMed 򸡺ưʸꥹȤǤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    ARGV.each do |id|
      entry = Bio::PubMed.query(id)     # PubMed 륯饹᥽å
      medline = Bio::MEDLINE.new(entry) # Bio::MEDLINE ֥
      reference = medline.reference     # Bio::Reference ֥
      puts reference.bibtex             # BibTeX եޥåȤǽ
    end

    ΥץȤ pmfetch.rb ʤɹ̾¸

    % ./pmfetch.rb 11024183 10592278 10592173

    ʤɰѤʸ PubMed ID (PMID) ¤٤ NCBI ˥ MEDLINE եޥåȤѡ BibTeX եޥåȤѴƽϤ ϤǤ

    ¾ˡɤǸ뵡ǽ⤢ޤ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    # ޥɥ饤ͿɤΥꥹȤ򣱤Ĥʸˤ
    keywords = ARGV.join(' ')
    
    # PubMed 򥭡ɤǸ
    entries = Bio::PubMed.search(keywords)
    
    entries.each do |entry|
      medline = Bio::MEDLINE.new(entry) # Bio::MEDLINE ֥
      reference = medline.reference     # Bio::Reference ֥
      puts reference.bibtex             # BibTeX եޥåȤǽ
    end

    ΥץȤ pmsearch.rb ʤɹ̾¸

    % ./pmsearch.rb genome bioinformatics

    ʤɸɤ¤٤Ƽ¹ԤȡPubMed 򥭡 ƥҥåȤʸΥꥹȤ BibTeX եޥåȤǽϤޤ

    ǶǤϡNCBI E-Utils Ȥ֥ץꥱȤȤ 侩ƤΤǡ Bio::PubMed.esearch ᥽åɤ Bio::PubMed.efetch ᥽åɤȤɤǤ礦

    #!/usr/bin/env ruby
    
    require 'bio'
    
    keywords = ARGV.join(' ')
    
    options = {
      'maxdate' => '2003/05/31',
      'retmax' => 1000,
    }
    
    entries = Bio::PubMed.esearch(keywords, options)
    
    Bio::PubMed.efetch(entries).each do |entry|
      medline = Bio::MEDLINE.new(entry)
      reference = medline.reference
      puts reference.bibtex
    end

    ΥץȤǤϡ嵭 pmsearch.rb ȤۤƱ褦ưޤˡ NCBI E-Utils Ѥ뤳Ȥˤꡢоݤդҥåȷʤɤ Ǥ褦ˤʤäƤΤǡⵡǽǤץͿ ˤĤƤ E-Utils Υإץڡ 򻲾ȤƤ

    ʤߤˡǤ bibtex ᥽åɤ BibTeX եޥåȤѴƤޤ ҤΤ褦 bibitem ᥽åɤȤ¾ʶĴ䥤åʤ ʸνϤǤޤ󤬡nature ᥽åɤ nar ʤɡĤλ եޥåȤˤбƤޤ

    BibTeX λȤΥ

    嵭ǽ᤿ BibTeX եޥåȤΥꥹȤ TeX ǻȤˡñˤ ȤƤޤѤʸ

    % ./pmfetch.rb 10592173 >> genoinfo.bib
    % ./pmsearch.rb genome bioinformatics >> genoinfo.bib

    ʤɤȤ genoinfo.bib ե˽¸Ƥ

    \documentclass{jarticle}
    \begin{document}
    \bibliographystyle{plain}
    ۤˤ KEGG ǡ١~\cite{PMID:10592173}ϤդۤǤ롣
    \bibliography{genoinfo}
    \end{document}

    Ȥե hoge.tex 񤤤ơ

    % platex hoge
    % bibtex hoge   #  genoinfo.bib ν
    % platex hoge   #  ʸꥹȤκ
    % platex hoge   #  ʸֹ

    Ȥ̵ hoge.dvi Ǥޤ

    bibitem λȤΥ

    ʸѤ̤ .bib եꤿʤ Reference#bibitem ᥽ ɤνϤȤޤ嵭 pmfetch.rb pmsearch.rb

    puts reference.bibtex

    ιԤ

    puts reference.bibitem

    ˽񤭴ʤɤơϷ̤

    \documentclass{jarticle}
    \begin{document}
    ۤˤ KEGG ǡ١~\cite{PMID:10592173}ϤդۤǤ롣
    
    \begin{thebibliography}{00}
    
    \bibitem{PMID:10592173}
    Kanehisa, M., Goto, S.
    KEGG: kyoto encyclopedia of genes and genomes.,
    {\em Nucleic Acids Res}, 28(1):27--30, 2000.
    
    \end{thebibliography}
    \end{document}

    Τ褦 \begin{thebibliography} ǰϤߤޤ hoge.tex Ȥ

    % platex hoge   #  ʸꥹȤκ
    % platex hoge   #  ʸֹ

    ȣФǤǤ

    OBDA

    OBDA (Open Bio Database Access) ȤϡOpen Bioinformatics Foundation ˤäꤵ줿ǡ١ؤζ̥ˡǤϡ 2002 ǯ12 Arizona Cape Town ˤƳŤ줿 BioHackathon ˤơBioPerl, BioJava, BioPython, BioRuby ʤɤγƥץȤ Сäƺޤ

    • BioRegistry (Directory)
      • ǡ١ɤˤɤΤ褦˼˹ԤꤹȤ
    • BioFlat
      • եåȥե 2 ʬڤޤ BDB Ȥäǥå
    • BioFetch
      • HTTP ͳǥǡ١饨ȥ륵Фȥ饤
    • BioSQL
      • MySQL PostgreSQL ʤɤδطǡ١ǡǼ schema ȡȥФΥ᥽å

    ܺ٤ <URL:http://obda.open-bio.org/> 򻲾ȤƤ 줾λͽ cvs.open-bio.org CVSݥȥ֤Ƥޤ ޤϡ<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common> 黲ȤǤޤ

    BioRegistry

    BioRegistryȤϡեˤäƳƥǡ١Υȥˡ ꤹ뤳ȤˤꡢɤˡȤäƤ뤫ۤȤɰռǡ 뤳ȤǽȤ뤿λȤߤǤ ե̤ͥ

    • (᥽åɤΥѥ᡼)ꤷե
    • ~/.bioinformatics/seqdatabase.ini
    • /etc/bioinformatics/seqdatabase.ini
    • http://www.open-bio.org/registry/seqdatabase.ini

    Ǹ open-bio.org ϡե뤬Ĥʤ Ȥޤ

    BioRuby θߤμǤϡ٤ƤΥեɤ߹ߡ Ʊ꤬̾ʣ¸ߤϡǽ˸ĤäѤޤ ѤȡȤСƥԤ /etc/bioinformatics/ ֤ ΤĿŪѹΤ ~/.bioinformatics/ Ǿ񤭤뤳Ȥ Ǥޤץ seqdatabase.ini ե뤬 bioruby Υ˴ޤޤ ޤΤǻȤƤ

    եȤ stanza եޥåȤȸƤФ񼰤ǵҤޤ

    [ǡ١̾]
    protocol=ץȥ̾
    location=̾

    Τ褦ʥȥƥǡ١ˤĤƵҤ뤳Ȥˤʤޤ ǡ١̾ϡʬѤ뤿Υ٥ʤΤʬ䤹Τ ĤɤºݤΥǡ١̾ȰۤʤäƤƤ⹽ʤ褦Ǥ Ʊ̾Υǡ١ʣȤϺǽ˽񤫤ƤΤ ³褦˻ͽǤƤƤޤΤȤ BioRuby Ǥ ˤбƤޤ

    ޤץȥμˤäƤ location ʳˤMySQL Υ桼̾ʤɡ ɲäΥץ򵭽ҤɬפޤߤΤȤͽǵꤵ Ƥ protocol ȤƤϰʲΤΤޤ

    • index-flat
    • index-berkeleydb
    • biofetch
    • biosql
    • bsane-corba
    • xembl

    ΤȤ BioRuby ǻѲǽʤΤ index-flat, index-berkleydb, biofetch biosql ǤޤBioRegistryƥץȥλͤѹ뤳 ޤBioRubyϤɽǤƤʤ⤷ޤ

    BioRegistry Ȥˤϡޤ Bio::Registry֥Ȥޤ ȡե뤬ɤ߹ޤޤ

    reg = Bio::Registry.new
    
    # ե˽񤤤ǡ١̾ǥФ³
    serv = reg.get_database('genbank')
    
    # ID ꤷƥȥ
    entry = serv.get_by_id('AA2CG')

    serv ե [genbank] ǻꤷ protocol ץȥ б륵Х֥ȤǡBio::SQL Bio::Fetch ʤɤΥ󥹥 󥹤֤äƤϤǤʥǡ١̾Ĥʤä nilˡ

    Ȥ OBDA ̤Υȥ᥽å get_by_id ƤꡢХ ֥˸ͭΥ᥽åɤƤ֤ȤˤʤޤΤǡʲ BioFetch BioSQL β򻲾ȤƤ

    BioFlat

    BioFlat ϥեåȥեФƥǥåȥ® ФȤߤǤǥåμϡRUbyγĥ饤֥˰¸ʤ index-flat Berkeley DB (bdb) Ȥä index-berkeleydb 2ब¸ ޤʤindex-berkeleydb ѤˤϡBDB Ȥ Ruby γĥ 饤֥ӥ󥹥ȡ뤹ɬפޤǥåκˤ bioruby ѥå° br_bioflat.rb ޥɤȤäơ

    % br_bioflat.rb --makeindex ǡ١̾ [--format 饹̾] ե̾

    Τ褦ˤޤBioRubyϥǡեޥåȤμưǧǽܤƤ Τ --format ץϾάǽǤ줦ޤǧʤä BioRuby γƥǡ١Υ饹̾ꤷƤϡ

    % bioflat ǡ١̾ ȥID

    ȤޤŪ GenBank gbbct*.seq ե˥ǥå Ƹ硢

    % bioflat --makeindex my_bctdb --format GenBank gbbct*.seq
    % bioflat my_bctdb A16STM262

    Τ褦ʴˤʤޤ

    Ruby bdb ĥ⥸塼(ܺ٤ http://raa.ruby-lang.org/project/bdb/ ) 󥹥ȡ뤵Ƥ Berkeley DB Ѥƥǥå ȤǤޤξ硢

    % bioflat --makeindex-bdb ǡ١̾ [--format 饹̾] ե̾

    Τ褦 "--makeindex" Τ "--makeindex-bdb" ꤷޤ

    BioFetch

    BioFetch CGI ͳƥФǡ١Υȥ ǡФ CGI Υץ̾顼ɤʤɤƤ ޤ饤Ȥ HTTP Ȥäƥǡ١IDեޥåȤʤɤ ꤷȥޤ

    BioRuby ץȤǤ GenomeNet DBGET ƥХåɤȤ BioFetch ФƤꡢbioruby.org DZѤƤޤΥФ ɤ BioRuby sample/ ǥ쥯ȥäƤޤߤΤȤ BioFetch ФϤ bioruby.org ΤΤ EBI 󤫽ꤷޤ

    BioFetch ȤäƥȥˤϡĤˡޤ

    1. ֥֥饦鸡ˡʰʲΥڡ򳫤

      http://bioruby.org/cgi-bin/biofetch.rb
    2. BioRuby° br_biofetch.rb ޥɤѤˡ

      % br_biofetch.rb db_name entry_id
    3. ץȤ椫 Bio::Fetch 饹ľܻȤˡ

      serv = Bio::Fetch.new(server_url)
      entry = serv.fetch(db_name, entry_id)
    4. ץȤ BioRegistry ͳ Bio::Fetch 饹Ū˻Ȥˡ

      reg = Bio::Registry.new
      serv = reg.get_database('genbank')
      entry = serv.get_by_id('AA2CG')

    ⤷ (4) Ȥ seqdatabase.ini

    [genbank]
    protocol=biofetch
    location=http://bioruby.org/cgi-bin/biofetch.rb
    biodbname=genbank

    ʤɤȻꤷƤɬפޤ

    BioFetch Bio::KEGG::GENES, Bio::AAindex1 Ȥ߹碌

    ΥץϡBioFetch Ȥä KEGG GENES ǡ١źٶ Halobacterium ΥХƥꥢɥץ (VNG1467G) äƤơƱ 褦˥ߥλɸǡ١Ǥ AAindex إå ɸ (BURA740101) Ȥäơ 15 ĴΥɥ򤹤Ǥ

    #!/usr/bin/env ruby
    
    require 'bio'
    
    entry = Bio::Fetch.query('hal', 'VNG1467G')
    aaseq = Bio::KEGG::GENES.new(entry).aaseq
    
    entry = Bio::Fetch.query('aax1', 'BURA740101')
    helix = Bio::AAindex1.new(entry).index
    
    position = 1
    win_size = 15
    
    aaseq.window_search(win_size) do |subseq|
      score = subseq.total(helix)
      puts [ position, score ].join("\t")
      position += 1
    end

    ǻȤäƤ륯饹᥽å Bio::Fetch.query ϰۤ bioruby.org BioFetch ФȤѤΥ硼ȥåȤǤʤΥФŪˤ ΥͥåȤǡƤޤKEGG/GENES ǡ١ hal AAindex ǡ١ aax1 Υȥϡ¾ BioFetch ФǤϼǤ ʤȤ⤢äơ query ᥽åɤȤäƤޤ

    BioSQL

    to be written...

    BioRuby ΥץץλȤ

    BioRuby Υѥåˤ samples/ ǥ쥯ȥʲˤĤΥץ बޤޤƤޤŤΤ⺮äƤޤ̤ȤƤ⽽ʬȤ ʤΤǡŪ򤤥ץ󶡤ϴޤǤ

    to be written...

    ʤ

    ¾Υ塼ȥꥢŪʥɥȤȤƤϡBioRuby Wiki֤Ƥ BioRuby in Anger ޤ

    • (1) BioRuby 1.2.1 ΥСǤϡsetup.rb Τ install.rb ѤޤޤʲΤ褦3ʳƧɬפޤ

      % ruby install.rb config
      % ruby install.rb setup
      # ruby install.rb install
    • (2) BioRuby 1.0.0 ΥСǤϡgetseq, getent, getobj γƥޥɤΤˡseq, ent, obj γƥޥɤѤƤ
    • (3) BioRuby 0.7.1 ΥСǤϡBio::Sequence::NA 饹 Bio::sequence::AA 饹Τɤ餫Υ֥Ȥˤʤޤ 󤬤ɤΥ饹°뤫 Ruby class ᥽åɤѤ

      bioruby> p cdc2.class
      Bio::Sequence::AA
      
      bioruby> p psaB.class
      Bio::Sequence::NA

      Τ褦Ĵ٤뤳ȤǤޤưȽְ꤬äƤʤɤˤ to_naseq, to_aaseq ᥽åɤǶŪѴǤޤ

    • (4) seq ᥽åɤϡɤ߹ǡμˤäƤϡ𡦥ߥλ ɤˤƤϤޤʤΤ Bio::Sequence::Generic 饹 String 饹Υ֥Ȥ֤礬뤫⤷ޤ
    • (5) NCBI, EBI, TogoWS ̵̤ getseq, getent, getobj ޥ ѲǽȤʤäΤ BioRuby 1.3.0 ʹߤǤ
    bio-1.4.3.0001/doc/RELEASE_NOTES-1.4.1.rdoc0000644000004100000410000000603512200110570016774 0ustar www-datawww-data= BioRuby 1.4.1 RELEASE NOTES A lot of changes have been made to the BioRuby 1.4.1 after the version 1.4.0 is released. This document describes important and/or incompatible changes since the BioRuby 1.4.0 release. For known problems, see KNOWN_ISSUES.rdoc. == New features === PAML Codeml support is significantly improved PAML Codeml result parser is completely rewritten and is significantly improved. The code is developed by Pjotr Prins. === KEGG PATHWAY and KEGG MODULE parser Parsers for KEGG PATHWAY and KEGG MODULE data are added. The code is developed by Kozo Nishida and Toshiaki Katayama. === Bio::KEGG improvements Following new methods are added. * Bio::KEGG::GENES#keggclass, keggclasses, names_as_array, names, motifs_as_strings, motifs_as_hash, motifs * Bio::KEGG::GENOME#original_databases === Test codes are added and improved. Test codes are added and improved. Tney are developed by Kazuhiro Hayashi, Kozo Nishida, John Prince, and Naohisa Goto. === Other new methods * Bio::Fastq#mask * Bio::Sequence#output_fasta * Bio::ClustalW::Report#get_sequence * Bio::Reference#== * Bio::Location#== * Bio::Locations#== * Bio::FastaNumericFormat#to_biosequence == Bug fixes === Bio::Tree Following methods did not work correctly. * Bio::Tree#collect_edge! * Bio::Tree#remove_edge_if === Bio::KEGG::GENES and Bio::KEGG::GENOME * Fixed bugs in Bio::KEGG::GENES#pathway. * Fixed parser errors due to the format changes of KEGG GENES and KEGG GENOME. === Other bug fixes * In Bio::Command, changed not to call fork(2) on platforms that do not support it. * Bio::MEDLINE#initialize should handle continuation of lines. * Typo and a missing field in Bio::GO::GeneAssociation#to_str. * Bug fix of Bio::FastaNumericFormat#to_biosequence. * Fixed UniProt GN parsing issue in Bio::SPTR. == Incompatible changes === Bio::PAML::Codeml::Report The code is completely rewritten. See the RDoc for details. === Bio::KEGG::ORTHOLOGY Bio::KEGG::ORTHOLOGY#pathways is changed to return a hash. The old pathway method is renamed to pathways_in_keggclass for compatibility. === Bio::AAindex2 Bio::AAindex2 now copies each symmetric element for lower triangular matrix to the upper right part, because the Matrix class in Ruby 1.9.2 no longer accepts any dimension mismatches. We think the previous behavior is a bug. === Bio::MEDLINE Bio::MEDLINE#reference no longer puts empty values in the returned Bio::Reference object. We think the previous behavior is a bug. We also think the effect is very small. == Known issues The following issues are added or updated. See KNOWN_ISSUES.rdoc for other already known issues. === String escaping of command-line arguments in Ruby 1.9.X on Windows After BioRuby 1.4.1, in Ruby 1.9.X running on Windows, escaping of command-line arguments are processed by the Ruby interpreter. Before BioRuby 1.4.0, the escaping is executed in Bio::Command#escape_shell_windows, and the behavior is different from the Ruby interpreter's one. Curreltly, due to the change, test/functional/bio/test_command.rb may fail on Windows with Ruby 1.9.X. bio-1.4.3.0001/doc/RELEASE_NOTES-1.4.2.rdoc0000644000004100000410000001224012200110570016770 0ustar www-datawww-data= BioRuby 1.4.2 RELEASE NOTES A lot of changes have been made to the BioRuby 1.4.2 after the version 1.4.1 is released. This document describes important and/or incompatible changes since the BioRuby 1.4.1 release. For known problems, see KNOWN_ISSUES.rdoc. == New features === Speed-up of Bio::RestrictionEnzyme::Analysis.cut The running speed of Bio::RestrictionEnzyme::Analysis.cut is significantly increased. The new code is 50 to 80 fold faster than the previous code when cutting 1Mbp sequence running on Ruby 1.9.2p180. The code is written by Tomoaki NISHIYAMA and Naohisa Goto. === New classes Bio::DDBJ::REST, REST interface for DDBJ web service For DDBJ Web API for Biology (WABI) web service, in additon to SOAP, REST (REpresentational State Transfer) interface is added as Bio::DDBJ::REST. Currently, only selected APIs are implemented. === Bio::Blast with remote DDBJ server uses REST instead of SOAP Bio::Blast with remote DDBJ server uses REST instead of SOAP, because Soap4r (SOAP library for Ruby) does not work well with Ruby 1.9. We can now use remote DDBJ BLAST server with Ruby 1.9. === Tutorial is updated The Tutorial.rd is updated by Pjotr Prins and Michael O'Keefe. === Many unit tests are added Added many unit tests for Bio::GenBank, Bio::GenPept, Bio::NBRF, Bio::PDB and so on. Most of them are developed by Kazuhiro Hayashi during the Google Summer of Code 2010. === Other new features * New method Bio::Fastq#to_s for convenience. Note that the use of the method may cause loss of performance. To get each input sequence entry as-is, consider using Bio::FlatFile#entry_raw. To output fastq format data, consider using Bio::Sequence#output(:fastq). * New methods Bio::NCBI::REST::EFetch.nucleotide and protein, to get data from "nucleotide" and "protein" database respectively. Because NCBI changed not to accept "gb" format for the database "sequence", the two new methods are added for convenience. * In BioRuby Shell, efetch method uses the above new methods. * In GenomeNet remote BLAST execution, database "mine-aa" and "mine-nt" with KEGG organism codes are now supported. * Support for Ruby 1.9.2 / 1.9.3 is improved. == Bug fixes === Bio::Blast * Failure of remote BLAST execution is fixed, due to the changes in GenomeNet and DDBJ. * When executing remote BLAST with "genomenet" server, options "-b" and "-v" are now correctly used to limit the number of hits to be reported. === Bio::SPTR (Bio::UniProt) * Due to the UniProtKB format changes, ID, DE, and WEB RESOURCE of CC lines were not correctly parsed. See also below about incompatible change of the fix. === Other bug fixes * Bio::Reference#pubmed_url is updated to follow recent NCBI changes. * Fixed: Bio::Newick#reparse failure. * Fixed: In Bio::MEDLINE#reference, doi field should be filled. * Fixed: Bio::Reference#endnote fails when url is not set. * Fixed: Bio::FastaFormat#query passes nil to the given factory object. * Fixed: In BioRuby Shell, efetch() with no additional arguments fails because of the NCBI site changes. * Fixed: In BioRuby Shell, getent() fails when EMBOSS seqret is not found. * Fixed: In BioRuby Shell, demo() fails due to the above two issues. == Incompatible changes === Bio::Sequence#output(:fastq) In Fastq output formatter, default width value is changed from 70 to nil. The nil means "without wrapping". The new default behavior without wrapping is generally good with many recent applications that read fastq. === Bio::SPTR CC line topic "WEB RESOURCE" In the return value of Bio::SPTR#cc('WEB RESOURCE'), "NAME" and "NOTE" are now renamed to "Name" and "Note", respectively. The change is due to the UniProt format change since UniProtKB release 12.2 of 11-Sep-2007. (See http://www.uniprot.org/docs/sp_news.htm#rel12.2 for details.) Note that "Name" and "Note" are used even when parsing older format. The change would also affect Marshal.dump (and YAML.dump) data. === Bio::Blast with the remote GenomeNet server When executing remote BLAST with "genomenet" server, options "-b" and "-v" are now correctly used to limit the number of hits to be reported. In 1.4.1 and before, "-B" and "-V" were mistakenly used for the purpose. === Bio::Blast with the remote DDBJ server Bio::Blast with remote DDBJ server uses REST instead of SOAP. === Bio::RestrictionEnzyme internal data structure change Due to the speedup, internal data structure of the following classes are changed: Bio::RestrictionEnzyme::Range::SequenceRange, Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts, Bio::RestrictionEnzyme::Range::SequenceRange::Fragment. This indicates that Marshal.dump (and YAML.dump) data generated by older versions cannot be loaded by the new version, and vice versa, although public APIs of the classes keep compatibility. == Known issues The following issues are added or updated. See KNOWN_ISSUES.rdoc for other already known issues. * Bio::SPTR should be updated to follow UniProtKB format changes. * Problems observed only with Ruby 1.8.5 or earlier will not be fixed. * Descriptions about very old RubyGems 0.8.11 or earlier and about CVS repository are moved from README.rdoc. == Other important news * Required ruby version is now Ruby 1.8.6 or later (except 1.9.0). bio-1.4.3.0001/doc/ChangeLog-before-1.3.10000644000004100000410000037566112200110570017122 0ustar www-datawww-data2009-09-02 Naohisa Goto * BioRuby 1.3.1 is released. 2009-09-02 Naohisa Goto * lib/bio/version.rb Preparation for bioruby-1.3.1 release. (commit 3d86bc6d519c4c3319e5a1b2ca36f8f5177f127f) 2009-08-31 Naohisa Goto * lib/bio/sequence/compat.rb Document bug fix: Bio::Sequence::(NA|AA|Generic)#to_fasta are currently not deprecated. (commit 0e0f888a73a60c0f0a7b103019aeb82c8f063c4e) 2009-08-28 Naohisa Goto * lib/bio/appl/sim4/report.rb Bug fix: parse error when unaligned regions exist. Thanks to Tomoaki NISHIYAMA who reports the bug ([BioRuby] SIM4 parser). * test/unit/bio/appl/sim4/test_report.rb, test/data/sim4/complement-A4.sim4 To confirm the bug fix, tests are added with new test data. (commit 02d531e36ecf789f232cf3e05f85391b60279f00) 2009-08-27 Naohisa Goto * lib/bio/appl/sim4/report.rb Bug fix: parse errpr when the alignment of an intron is splitted into two lines. Thanks to Tomoaki NISHIYAMA who sent the patch ([BioRuby] SIM4 parser). (commit 137ec4c3099236c89ac4a0157d0c77ba13d1875c) 2009-08-27 Naohisa Goto * lib/bio/appl/sim4/report.rb Ruby 1.9 support: String#each_line instead of String#each (commit b65f176f3be74c21a8bb8fc2a6f204fb8ab08fd6) 2009-08-27 Naohisa Goto * test/unit/bio/appl/sim4/test_report.rb, test/data/sim4/simple-A4.sim4, test/data/sim4/simple2-A4.sim4 Newly added unit tests for Bio::Sim4::Report with test data. The test data is based on the data provided by Tomoaki NISHIYAMA ([BioRuby] SIM4 parser), and most of the sequence data is replaced by random sequence. (commit 0f53916dd728b871f02d1caf0c5105a2e1c58bc4) 2009-08-18 Naohisa Goto * COPYING, COPYING.ja, GPL, LGPL, LEGAL License files are added. COPYING, COPYING.ja, GPL, LGPL are taken from Ruby's svn repository. LEGAL is written for BioRuby. (commit c65531331e840562ac7342f1896f7e2a3aac6c88) * README.rdoc Added descriptions about license to refer COPYING and LEGAL. (commit d88015a2e3b2c5f7c2a931261819b908084d0179) * COPYING Modified COPYING for BioRuby, following Matz's recommendation in [ruby-list:46293]. (commit 2c30e7342e33c878bd7132a302974364c54caad9) 2009-05-06 Naohisa Goto * lib/bio/appl/fasta.rb, lib/bio/appl/fasta/format10.rb Restored Bio::Fasta.parser for keeping compatibility, and added forgotten require. (commit 97b9284109c9a4431b92eab208509e1df6069b4b) 2009-05-02 Naohisa Goto * lib/bio/appl/fasta.rb * Bug fix: Bio::Fasta::Report should be autoloaded. * Removed useless method Bio::Fasta::Report.parser because only the "format10" parser is available for a long time and dynamic require is a potential security hole. * Removed "require" lines in Bio::Fasta#parse_result. (commit 3d3edc44127f4fd97abcc17a859e36623facdc7c) 2009-05-02 Naohisa Goto * lib/bio/appl/fasta/format10.rb Bug Fix: stack overflow problem, and added support for multiple query sequences. * Bug fix: stack overflow problem. Thanks to Fredrik Johansson who reports the bug ([BioRuby] Made a change in format10.rb). * Changed to set @entry_overrun when a report containing multiple query sequences' results is given. * New methods Bio::Fasta::Report#query_def and query_len. * To support reading a search result with multiple query sequences by using Bio::FlatFile, a flatfile splitter class Bio::Fasta::Report::FastaFormat10Splitter is newly added. (commit e57349594427ad1a51979c9d4e0c3efcffd160c2) 2009-04-27 Naohisa Goto * test/unit/bio/test_feature.rb, test/unit/bio/test_reference.rb class name conflict of NullStderr (commit 1607b60d905eb8cb5ca289e357cbb2cbb7a118ff) * test/unit/bio/appl/test_blast.rb Bug fix: method redefined: TestBlast#test_self_local (commit 9caa4c9d94126b3568c439878876062c84afbdec) * test/unit/bio/appl/hmmer/test_report.rb Bug fix: method name conflict: TestHMMERReportClassMethods#test_reports_ary (commit cc3e1b85cf885736a7b1293c7e0951e099cd7e6b) * test/unit/bio/appl/bl2seq/test_report.rb * Bug fix: method redefined: TestBl2seqReport#test_undefed_methods. To fix the bug, the second "test_undefed_methods" is renamed to "test_undefed_methods_for_iteration". * Assertions are changed in the first "test_undefed_methods". * Fixed typo. (commit 7e1a550de3dffde3fd8808803e44f35072e4d40b) 2009-04-27 Naohisa Goto * lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb Bug fix: attribute "strands_for_display" is disabled because the method definition with the same name overwrites the attribute definition. (commit af07e2784faacc51366ddfab5bedd45841734f53) * lib/bio/db/embl/embl.rb Bug fix: removed duplicated alias (commit 65c360f39580322b5eee64b7c2d8274ff7b8dfff) * lib/bio/appl/pts1.rb Bug fix: removed unused attribute "function" in Bio::PTS1 because method definition with the same name appeared later wipes out the attribute definition. (commit 81cbe9da55217d186e6dc9c1bfb56a39fba73590) 2009-04-27 Naohisa Goto * lib/bio/appl/blast/format0.rb Bug fix: forgotten "if false #dummy" in the attribute "query_to". (commit 7c2e8d0d11baf8cb9e25207ba5b27d4e9d756054) * lib/bio/appl/blast.rb Bug fix: suppressing warning messages when $VERBOSE=true. * To suppress warining message "lib/bio/appl/blast.rb:402: warning: useless use of :: in void context", a dummy variable is added. * The attribute "server" is changed to attr_reader because "server=" is defined later. (commit f9404276d2ddcf15966cab74c419733ccd748af2) 2009-04-27 Naohisa Goto * test/unit/bio/test_sequence.rb Fixed test name overwriting another test name in TestSequence. Fixed by Andrew Grimm at git://github.com/agrimm/bioruby.git in Thu Feb 19 22:30:26 2009 +1100. (commit a6c39a719b284a43fe8c67edc1f2826d2941647f) 2009-04-26 Naohisa Goto * test/unit/bio/appl/gcg/test_msf.rb, test/data/gcg/pileup-aa.msf Newly added unit tests for Bio::GCG::Msf with test data (commit a1819cd3b772300ef5bea2ebb63376e5b9fc64da) 2009-04-23 Naohisa Goto * lib/bio/appl/gcg/msf.rb * Bug fix: incorrect parsing of GCG clustalw+ results. * Small refactoring of codes (commit 2eae8f722aa888c85d54aa958eb117d49ce42f8b) 2009-04-21 Naohisa Goto * lib/bio/appl/gcg/msf.rb * Bug fix: Bio::GCG::Msf fails parsing when two dots are appeared at the end of a line. Thanks to Fredrik Johansson who reports the bug and send the patch ([BioRuby] Parsing MSF alignment file). * bug fix: misspelling of "ALIGNMENT". (commit 44ca52443e0249f54c43f92d08cf083cdd12c692) 2009-04-21 Naohisa Goto * lib/bio/io/pubmed.rb Bug fix: Bio::PubMed#efetch should return an array of string objects. Thanks to Masahide Kikkawa and Fredrik Johansson who report the bug (in "[BioRuby] Bio::PubMed.efetch, bug?" and "[BioRuby] PubMed.efetch error"). (commit a48a9a35b87dead069fe328ba7086977304af995) * test/functional/bio/io/test_pubmed.rb Newly added functional test for Bio::PubMed. (commit bf5ba6d4503f3ddb0ca31673882f5b396a932bbe) 2009-04-21 Naohisa Goto * lib/bio/io/ncbirest.rb * Bug fix: Bio::NCBI::REST#esearch ignores hash["retstart"]. * In Bio::NCBI::REST#esearch, the priority of limit and hash["retmax"] is clarified: limit is used unless it is nil. In addition, default value of limit is changed to nil. If both limit and hash["retmax"] are nil, default value 100 is used. * Bio::NCBI::REST::NCBI_INTERVAL is changed to 1. (commit fc0339fe8a42cd00199cfdc938590ae9626551bc) 2009-03-19 Naohisa Goto * lib/bio/io/ncbirest.rb Bug fix: Bio::PubMed.efetch/esearch ignores retmax after refactoring of Bio::PubMed. efetch/esearch methods in Bio::NCBI::REST are also affected. Thanks to Craig Knox who reports the bug ([BioRuby] efetch/esearch broken). Bug fix by Toshiaki Katayama. (commit 51c3223e033b2992a7bd95da282f88164406ff92) 2009-03-19 Naohisa Goto * doc/Tutorial.rd GO example using Ensembl API is moved to Appendix. (commit d677c3d7cbd2f4ff6193255e0e30366ecd0aa421) fixed RD text formatting issues (commit 642577ae70647f8bd0ae3bcc8ddc118cecc886c7) * doc/Tutorial.rd.html doc/Tutorial.rd.html is regenerated (commit dd878d3ecd83ad5e61a21bbf90d27d1c89d5f12d) 2009-03-18 Naohisa Goto * doc/Tutorial.rd Reverted a Blast example code because it aims to tell usage of blocks to explore a Blast report, and getting the "result" is only a side effect and not the main purpose. (commit db172eb1e5f1cbc17317bff8043cc07bf6597073) 2009-03-18 Pjotr Prins * doc/Tutorial.rd Updated Tutorial. (commit b3363ee94cfb86540a7d286ccac608b74737b30d) Updated tutorial with links and gene ontology example by Marc Hoeppner. (commit 27a5019ca7a41211055550f9731672aa71a3a4b3) Fixed doctests in documentation. (commit 9a21a1750a9584152fae669be132af89086e7d5f) Added working BLAST example. (commit 45c27f109f069db3b6208fd59cc2b683a5bca5a9) Added BLAST example. All doctests work again in Tutorial.rd. (commit 05edf3092d0322b8f2775e60448700024d8cb343) Slightly improved remarks. Tutorial.rd runs its doctests. (commit 541a4cf0d9d0d3904f1570e1258a847a22f9238b) reference to github.com/pjotrp/bioruby-support (commit ec5dfb1544e32034457b0dd36a9dc50fef6c0fbe) Added info on how to split large BLAST XML files. (commit 6c9a80cde4be6c4c3d02b77c44dfa8bfbf0a41ff) Updated Tutorial (commit 07267e2d9c5b774bb0f41b795f6be1f24ff175ba) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb Fixed: taxonomy, do not report node_rank of type "class". GenBanks Tests I/O passed. (commit ba5400eaf6de0f38341825cb0fbc24ca1d99eeba) 2009-03-17 Raoul Jean Pierre Bonnal * test/unit/bio/db/biosql/tc_biosql.rb Removed last "\n" from reference GenBank string (commit 2de87ceef220056a502c5a9a3457abdf1d93fab0) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb Fix: reference deletion from bioentry deletion, when reference is a leaf (no more bioentries connected to it) (commit 6f3195a023cab8ee64eb3e3bb9c491534cd80603) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/io/biosql/ar-biosql.rb Added: relation between bioentry and refernces, also through references by bioentry_reference. This is useful to accomplish complete bioentry's delete. (commit d9e5876231d451c9ab1a2e75702f9fe70b1509b8) 2009-03-17 Raoul Jean Pierre Bonnal * test/unit/bio/db/biosql/tc_biosql.rb Fixed: title test (commit 45e2d5e21bc1f93240827dee2e46ac02d24cf696) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/genbank/common.rb Fix: Delete added dot at the end of TITLE. (commit 2a29c9e7fd41da9d6bf065b3d6dbd473e4d03bbe) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb Add: bioentry_qualifier_value recognize if it's handling data (reader) and format it accordingly with GenBank/EMBL format ex: 26-SEP-2006 . (commit 05ba3f1647d4cc71747ada95c9bb7f2a5a44b518) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/biosql_to_biosequence.rb Fixed: date_modified, the code is moved to Bio::SQL::Sequence#bioentry_qualifier_anchor#method_reader. It's most an exercise of style than good programming. date_modifier reader should be a method apart. (commit c9a980877c9222e05aa0d9163ba51aa2c77a7146) 2009-03-17 Raoul Jean Pierre Bonnal * test/unit/bio/db/biosql/tc_biosql.rb, test/unit/bio/db/biosql/test_biosql.rb, test/unit/bio/db/biosql/ts_suite_biosql.rb Add: BioSQL's TestSuite, alpha stage (commit be1839b3bf3008fe234e8f89d85302caef83398f) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/biosql_to_biosequence.rb Fix: date_modifier biosequence adapter (commit a7c1c717e1684fd9117fc2d096e8d6e7c647b62d) 2009-03-17 Raoul Jean Pierre Bonnal * test/unit/bio/db/biosql/test_biosql.rb Added preliminar tests using connection with jdbcmysql. Test are focused on input/output coherence. (commit 0ada9f8b4bb8553bf076caca76bc76a4d6791c6b) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/biosql_to_biosequence.rb Fixed: GI:xxxx reference on VERSION's line using biosql/to_biosequence.output(:genbank) (commit 35e1dce1a75ed967ec707457ed3655ce927f83c3) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb added other_seqids as alias of identifier, for the adapter. Export problem of GI in output(:genbank) from biosql/biosequence. (commit 7f69ea73dcd28e76743bd5213c3719cf7d9d44a0) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/biosql_to_biosequence.rb (Changed comments only) Added TODOs as comments: to_biosequence.output(:genbank) some major and minor problems. 1) Major. GI: is not exported IN(VERSION X64011.1 GI:44010), OUT(VERSION X64011.1) 1.1) Db storage is ok, GI is saved into identifier of bioentry 2) Moderate. date wrong format IN(26-SEP-2006), OUT(2006-09-26) 3) Minor. Organism in output as more terms. 4) Minor. Title has a dot at the end, input was without ref for GI in genbank are functions ncbi_gi_number/other_seqids (commit 03662955a45e1c3d5d32150b423a92d40c0c33c7) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/io/sql.rb get and first converted from DataMapper to ActiveRecord (commit 78b37c61bbb0a16bbee6c3dd16bff7c292e77695) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb converted syntax of first function from DataMapper to ActiveRecord (commit 822a35794b958906e5d4bfb6d5b9d74efb360ea7) converted .get! method in find with conditions (commit 1f3012ba93a9c462e8b1daa762372a55534db29c) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/io/biosql/biosql.rb establish_connection rewrite and update Class.first call with ActiveRecord syntax. Coming from DataMapper. (commit 66fb6ff597a2ebf2f2dc1ebe7e505fbcc46c993c) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb Version developed with DataMapper, need to be tested with ActiveRecord -current ORM-. (commit 7bf5d24364fce8f3a466697e479af5f28c672265) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/io/biosql/config/database.yml Configured development database with jdbcmysql adapter. (commit 7e143b1d0451bce6865e560febc5c57048210416) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/io/biosql/ar-biosql.rb Newly added lib/bio/io/biosql/ar-biosql.rb: In one file definition of all BioSQL's ActiveRecords classes. (commit 87f7bc6ac844583adc07e409c9fac7fa1f275d2b) class Bioentry: added has_many obejct_bioentry_path and subject_bioentry_path. (commit e969eae59d0de098e094ea21007c34371bab3bdd) class BioentryRelationship: added relation to Term class. (commit f77b28045f0631391c6f4ad4e9eed15d296bec95) class Biosequence: changed to composite primary keys, :bioentry_id, :version. (commit c6683346e4c13d8969bb859e882698b90d0828f1) class SeqfeatureQualifierValue: find function deleted, wrong here. (commit 89f64af363d0b204e50ea71924909724d56bccc4) * lib/bio/io/sql.rb Separated connection (see lib/bio/io/biosql.rb) from definition of public methods. (commit 24b9e6473ce36e3151c560ea26c3b95105656ef4) 2009-03-17 Raoul Jean Pierre Bonnal * lib/bio/io/biosql To integrate BioSQL ActiveRecords classes to one file, as the first step, following 28 files listed below are deleted. In the later commit, they will be integrated into one file, lib/bio/io/biosql/ar-biosql.rb. (commit 0ea9f08b36e10e50c855d4346194849e8e7a263b) * lib/bio/io/biosql/biodatabase.rb * lib/bio/io/biosql/bioentry.rb * lib/bio/io/biosql/bioentry_dbxref.rb * lib/bio/io/biosql/bioentry_path.rb * lib/bio/io/biosql/bioentry_qualifier_value.rb * lib/bio/io/biosql/bioentry_reference.rb * lib/bio/io/biosql/bioentry_relationship.rb * lib/bio/io/biosql/biosequence.rb * lib/bio/io/biosql/comment.rb * lib/bio/io/biosql/dbxref.rb * lib/bio/io/biosql/dbxref_qualifier_value.rb * lib/bio/io/biosql/location.rb * lib/bio/io/biosql/location_qualifier_value.rb * lib/bio/io/biosql/ontology.rb * lib/bio/io/biosql/reference.rb * lib/bio/io/biosql/seqfeature.rb * lib/bio/io/biosql/seqfeature_dbxref.rb * lib/bio/io/biosql/seqfeature_path.rb * lib/bio/io/biosql/seqfeature_qualifier_value.rb * lib/bio/io/biosql/seqfeature_relationship.rb * lib/bio/io/biosql/taxon.rb * lib/bio/io/biosql/taxon_name.rb * lib/bio/io/biosql/term.rb * lib/bio/io/biosql/term_dbxref.rb * lib/bio/io/biosql/term_path.rb * lib/bio/io/biosql/term_relationship.rb * lib/bio/io/biosql/term_relationship_term.rb * lib/bio/io/biosql/term_synonym.rb 2009-03-17 Naohisa Goto * Rakefile Rake::Task#execute now needs to take an argument. Currently, nil is given. 2009-02-20 Naohisa Goto * BioRuby 1.3.0 is released. 2009-02-19 Naohisa Goto * lib/bio/version.rb Preparation for bioruby-1.3.0 release. (commit fd7fc9f78bc5f4d9a10b3c0d457d9781c9ec2e49) * bioruby.gemspec.erb Fixed a logic to determine whether in git repository, and file lists are changed to be sorted. (commit ede0c0d7aeab078b6183c4e0e7c74faec32739f7) 2009-02-18 Naohisa Goto * README.rdoc Added list of document files bundled in the BioRuby distribution. (commit 92748f848e4708766e44c22b2f02ac662491971f) 2009-02-10 Naohisa Goto * KNOWN_ISSUES.rdoc Added details about the text mode issue on mswin32/mingw32/bccwin32 and about non-UNIX/Windows systems. (commit 342a167a23d3b078bd77b3f16f0ceb1aa071df66) 2009-02-09 Naohisa Goto * test/unit/bio/db/test_gff.rb Test bug fix: test_gff.rb failed in some environment (e.g. Windows) because the default formatting rule of Float#to_s depends on the libc implementation. (commit f39bf88ed6a41bd328372ee7de7a23902235f833) 2009-02-06 Naohisa Goto * lib/bio/db/gff.rb, test/unit/bio/db/test_gff.rb * Bug fix: Bio::GFF::GFF3::Record#id and #id= should be changed to follow the previous incompatible change of @attributes. Thanks to Tomoaki NISHIYAMA who reports the bug ([BioRuby] GFF3 status (possible bug?)). * Unit tests are added. (commit 5258d88ef98a12fd7829eb86aa8664a18a672a43) (commit c0c7708b3e91b0d2f2d0d50a4a0ba36928057cc8) 2009-02-05 Naohisa Goto * Rakefile New task "tutorial2html" to generate html from doc/Tutorial.rd and doc/Tutorial.rd.ja. (commit 8d66fae59477f01f12b2fa3509ea34c371102725) * doc/Tutorial.rd.html, doc/Tutorial.rd.ja.html Automatically generated tutorial html from RD formatted documents. (commit 90c4a23eea08b06dd758aaa0a53bea789602d252) * doc/bioruby.css Newly added stylesheet for the tutorial html files. The bioruby.css have been used in http://bioruby.org/ and have been maintained by Toshiaki Katayama. (commit b69dc243787525de065bdf2e6b7da68d6079ab91) * test/runner.rb Added workaroud for test-unit-2.0.x. (commit 475ac6a6b38e8df30de3d9bf4c7e810759ab023d) 2009-02-04 Naohisa Goto * lib/bio/appl/blast/format0.rb Bug fix: a null line can be inserted after query name lines. (commit bea9ce35b4177f407575ed0752c36bba8a50f502) 2009-02-03 Naohisa Goto * Tutorial.rd.ja * Document bug: BioRuby shell commands seq, ent, obj were renamed to getseq, getent, getobj, respectively. Thanks to Hiroyuki Mishima who reports the issue ([BioRuby-ja]). * Changes of returned value of getseq are also reflected to the document. * Recommended Ruby version and installation procedure are also changed. (commit 916e96ca549db71a550e7a5d3bd49a3149614313) * doc/Changes-0.7.rd Documentation forgotten in 1.1.0: rename of BioRuby shell commands. (commit 64113314caac3453b4cc3b80ece9b5fb5841e069) 2009-01-30 Naohisa Goto * lib/bio/appl/blast/format0.rb Bug fix: incorrect parsing of hit sequence's whole length. (commit 98e6f57630b2c3394a9403f58e76b102346c56ef) Bug fix: Whole length of a hit sequence is mistakenly parsed when it contains ",". WU-BLAST parser is also affected in addition to NCBI BLAST parser. * lib/bio/db/lasergene.rb, lib/bio/db/soft.rb, lib/bio/util/color_scheme.rb, lib/bio/util/contingency_table.rb, lib/bio/util/restriction_enzyme.rb Removed ":nodoc:" in in "module Bio" which prevents RDoc of the Bio module. (commit 458db79b467d40ed02db0d085218f611e7dd5e04) 2009-01-29 Naohisa Goto * doc/Changes-1.3.rdoc Added documents about Bio::TogoWS and Bio::BIORUBY_VERSION. * lib/bio/shell/plugin/entry.rb getent (BioRuby shell command) is changed to use EBI Dbfetch or TogoWS in addition to NCBI or KEGG API. (commit 0e172590f60dd5a5f27a24ecd230037a7909224c) * lib/bio/shell/plugin/togows.rb, lib/bio/shell.rb Added new shell plugin providing accesses to TogoWS REST services. (commit 03f6720b90e90703c23536a11b3f12c8155550ff) * lib/bio.rb Added autoload of Bio::TogoWS. (commit f8605e1234164a7aa7f236b4e96a4299229753d7) * test/functional/bio/io/test_togows.rb, test/unit/bio/io/test_togows.rb Newly added functional and unit tests for Bio::TogoWS::REST. (commit f04152b80d07f44f146fa3fa0729facede865aac) * lib/bio/io/togows.rb New class Bio::TogoWS::REST, a REST client for the TogoWS web service (http://togows.dbcls.jp/site/en/rest.html). (commit 652d2534163675182b9ce30cbb1dd5efff45cd60) * bin/br_pmfetch.rb Changed to use Bio::BIORUBY_VERSION_ID instead of CVS version ID. (commit f69d538ffa9ded00eb68dd306e65505d03b6c656) * lib/bio/shell/core.rb Changed to use BIORUBY_VERSION_ID. (commit 4ce11656a205e85cae64eca27cef7cd94eb80930) * bioruby.gemspec.erb Gem version is now determined from lib/bio/version.rb or BIORUBY_GEM_VERSION environment variable. (commit 1811e845e60bc2847ea5717ef936bad93f9f2c87) * Rakefile * Changed to use lib/bio/version.rb. * Environment variable BIORUBY_EXTRAVERSION is renamed to BIORUBY_EXTRA_VERSION. * Added dependency on lib/bio/version.rb to bioruby.gemspec. (commit fb27eaa584cda1bb4cb75e10085996503361c98a) * lib/bio.rb, lib/bio/version.rb Bio::BIORUBY_VERSION is split into lib/bio/version.rb. (commit 9779398c3fa0e9405a875b754a5243e0d6922c32) * New file lib/bio/version.rb contains BioRuby version information. * New constants: Bio::BIORUBY_EXTRA_VERSION stores extra version string (e.g. "-pre1") and Bio::BIORUBY_VERSION_ID stores BioRuby version string (e.g. "1.3.0-pre1"). * Bio::BIORUBY_VERSION is changed to be frozen. Above two constants also store frozen values. 2009-01-26 Naohisa Goto * KNOWN_ISSUES.rdoc Newly added KNOWN_ISSUES.rdoc that describes known issues and bugs in current BioRuby. (commit 06b10262be0bf797a3b133e4697e9b0955408944) (commit a65ad8b42613e46b0b4bb0650d6301da0dcc88c9) * lib/bio/shell/plugin/ncbirest.rb, lib/bio/shell.rb New shell plugin lib/bio/shell/plugin/ncbirest.rb, providing "efetch", "einfo", "esearch", and "esearch_count" methods. They act the same as those defined in Bio::NCBI::REST, except that "efetch" fetches entries with pre-defined databases depending on arguments. (commit c482e1864aa0dbca3727b1059d4fe3d0aefb3917) (commit 3360b8905fdbcd4ca050470fdb2f02a7387e8bb9) * lib/bio/shell/plugin/entry.rb Shell commands "getent" and "getseq" are changed to use "efetch" method when "gb" or some variant is specified as the database. (commit c482e1864aa0dbca3727b1059d4fe3d0aefb3917) (commit 3360b8905fdbcd4ca050470fdb2f02a7387e8bb9) * bioruby.gemspec.erb, bioruby.gemspec * Changed version to 1.2.9.9501. * Changed to use "git ls-files" instead of "git-ls-files", and changed not to redirect to /dev/null. * Special treatment of bioruby.gemspec is removed. * ChangeLog is included to RDoc. * Set RDoc title to "BioRuby API documentation". * Set "--line-numbers" and "--inline-source" to rdoc_options. (commit f014685090c38eeb64219603f2c7e90574849431) * added KNOWN_ISSUES.rdoc to files for no-git environment. (commit 06b10262be0bf797a3b133e4697e9b0955408944) * Ruby 1.9 support: command execution with shell can raise an error. (commit 3179de32f1dc746c8de975917b1718a523800d69) * bioruby.gemspec is generated from bioruby.gemspec.erb. (commit 4e1cd3bfb8207b357d5b71cc0fc8366f06491130) (commit 06b10262be0bf797a3b133e4697e9b0955408944) 2009-01-21 Naohisa Goto * ChangeLog Added recent changes and fixed typo for recent changes. 2009-01-20 Naohisa Goto * ChangeLog, doc/Changes-1.3.rdoc Added ChangeLog and doc/Changes-1.3.rdoc for recent changes. (commit be2254ddea152fddf51a2476eeb20d804b1e3123) * bioruby.gemspec Added bioruby.gemspec created from bioruby.gemspec.erb. (commit 4c54597eaf09107c34ad06bc5f5f9cead77a0198) * lib/bio/appl/blast/wublast.rb Bug fix: parsing of exit code failed when ignoring fatal errors (commit 44ed958acebe4324a9a48e7292c4f0ad5c0fb685) * Bug fix: could not get exit code in WU-BLAST results executed with a command line option "-nonnegok", "-novalidctxok", or "-shortqueryok". * New methods Bio::Blast::WU::Report#exit_code_message and #notes. * Rakefile Added package tasks and changed to use ERB instead of eruby. (commit 7b081c173d3b1cbc46034297ea802a4e06f85b2f) * bioruby.gemspec.erb Use git-ls-files command to obtain list of files when available. (commit 5d5cb24fdd56601bc43ee78facc255ca484245c0) 2009-01-17 Naohisa Goto * Rakefile Simple Rakefile for dynamic generation of bioruby.gemspec (commit d5161d164f3520db25bed9aececb962428b9d6bc) * bioruby.gemspec.erb bioruby.gemspec is renamed to bioruby.gemspec.erb with modification. (commit bef311668e4a3be30965ce94d41e7bde4a4e17f9) To prevent the error "Insecure operation - glob" in GitHub, bioruby.gemspec is renamed to bioruby.gemspec.erb, and modified to generate the file list by using eruby. 2009-01-15 Naohisa Goto * doc/Changes-1.3.rdoc Changes-1.3.rd is renamed to Changes-1.3.rdoc with format conversion, and fixed typo. (commit 1aef599650d14362ed233dcc9a7db8d3c1db1777) Added details about newly added classes etc. (commit eda9fd0abbb8e430810468d777d0b585e33c25d8) 2009-01-13 Naohisa Goto * bioruby.gemspec Changed version to 1.2.9001, set has_rdoc = true and rdoc options. (commit 1f63d3d5389dd3b0316e9f312b56e62371caa253) * Gem version number changed to 1.2.9.9001 for testing gem. * Changed to has_rdoc = true. * README.rdoc and README_DEV.rdoc are now included to gem's rdoc, and README.rdoc is set to the main page. * *.yaml is now excluded from rdoc. 2009-01-13 Jan Aerts * bioruby.gemspec Renamed gemspec.rb to bioruby.gemspec because so github builds the gem automatically (commit 561ae16d20f73dcd6fc3d47c41c97c32f9aadb1a) (committer: Naohisa Goto) (original commit date: Wed Jun 25 11:01:03 2008 +0100) Edited gemspec because github returned an error while building gem. (commit f0d91e07550872c2f0d5835e496af1add7759d42) (committer: Naohisa Goto) (original commit date: Wed Jun 25 11:03:04 2008 +0100) 2009-01-13 Naohisa Goto * README.rdoc Changed format from RD to RDoc with some additional URLs (commit cb8781d701f22cbaf16575bb237a9e0cbf8cd407) Clarified copyright of README.rdoc and BioRuby (commit acd9e6d6e6046281c6c9c03cff1021449b8e780f) Updated descriptions about RubyGems, and added Ruby 1.9 partial support (commit ff63658b255988bf0e7a9f5a2d1523d5104fe588) 2009-01-09 Naohisa Goto * test/runner,rb Ruby 1.9.1 support: using alternatives if no Test::Unit::AutoRunner (commit 5df2a9dc0642d4f1e9a4398d6af908780d622a6e) 2009-01-05 Naohisa Goto * lib/bio/db/fantom.rb Bug fix: incomplete cgi parameter escaping, and suppressing warnings. (commit 754d8815255a0f0db20df9dd74f9f146605d430e) * Bug fix: incomplete cgi parameter escaping for ID string in Bio::FANTOM.get_by_id (and Bio::FANTOM.query which internally calls the get_by_id method). * Warning message "Net::HTTP v1.1 style assignment found" when $VERBOSE=true is suppressed. * Removed obsolete "rescue LoadError" when require 'rexml/document'. * lib/bio/io/fetch.rb Bug fix: possible incomplete form key/value escaping. (commit ecaf2c66261e4ce19ab35f73e305468e1da412ed) * Bug fix: possible incomplete form key/value escaping * Refactoring: changed to use private methods _get and _get_single to access remote site. * lib/bio/io/pubmed.rb Bug fix: possible incomplete escaping of parameters, and suppressing warnings (commit 93daccabb1a82bb20e92798c1810182dfb836ba7) * Bug fix: possible incomplete string escaping of REST parameters in Bio::PubMed#query and #pmfetch. * Warning message "Net::HTTP v1.1 style assignment found" when $VERBOSE=true is suppressed. * Removed obsolete "unless defined?(CGI)". * lib/bio/command.rb, test/unit/bio/test_command.rb Bug fix: incomplete escaping in Bio::Command.make_cgi_params etc. (commit 17c8f947e5d94012921f9252f71460e9d8f593e3) * Buf fix: in Bio::Command.make_cgi_params and make_cgi_params_key_value, string escaping of form keys and values is incomplete. * Warning message "useless use of :: in void context" is suppressed when running test/unit/bio/test_command.rb with $VERBOSE=true. * Unit tests are added. * lib/bio/appl/, lib/bio/io/ (9 files) Suppress warning message "Net::HTTP v1.1 style assignment found" when $VERBOSE = true. (commit a2985eb1f3aed383f1b1b391f2184317c7fd21c7) 2009-01-02 Naohisa Goto * README.rdoc Changing optional requirements, recommended Ruby version, and setup.rb credit. (commit a5462ab4bd403d2d833e5d6db26ae98ca763513c) 2008-12-30 Naohisa Goto * README.rdoc Fixed grammar and spelling in README.rdoc, indicated by Andrew Grimm at git://github.com/agrimm/bioruby.git in Sun Sep 21 19:59:03 2008 +1000. (commit 446918037bff392b9c6bc6828720c585733a8f4b) 2008-12-30 Naohisa Goto * lib/bio.rb Changed BIORUBY_VERSION to 1.3.0, which will be the next BioRuby release version number. (commit b000b1c4a5a136ab287b517b8b8c66e54f99a8a8). * doc/Changes-1.3.rd Added documents about changed points for 1.3.0 release. (commit 028e323e784eb60b18f941cce1e3752abff1433c) * lib/bio/appl/blast/format8.rb Ruby 1.9 support: String#each_line instead of String#each (commit 1bc59708137fd46911d5892e4712cc49c71fa031) * lib/bio/io/flatfile/splitter.rb Checks for undefined constants are added for running without "require 'bio'" in unit tests. (commit 311176d4d390e5948348f623ff3632454136a03f) * lib/bio/appl/blast.rb, lib/bio/appl/blast/report.rb, test/unit/bio/appl/test_blast.rb Support for default (-m 0) and tabular (-m 8) formats in Bio::Blast.reports. * Added support for default (-m 0) and tabular (-m 8) formats in Bio::Blast.reports method. For the purpose, Bio::Blast::Report_tab is added to read tabular format by using Bio::FlatFile. * Unit tests are added. 2008-12-26 Naohisa Goto * lib/bio/appl/paml/codeml/rates.rb Ruby 1.9 support: String#each_line instead of String#each (commit 1789a3975c4c82d3b45f545893be8f2a7bf47a01) 2008-12-26 Naohisa Goto * lib/bio/command.rb, lib/bio/appl/fasta.rb, lib/bio/appl/blast/genomenet.rb Refactoring and following the change of the remote site fasta.genome.jp. (commit 671092dff67890fc48dd7ff2f606c4cedc2eb02c) * New method Bio::Command.http_post_form. * Bio::Blast::Remote::GenomeNet#exec_genomenet and Bio::Fasta#exec_genomenet are changed to use the new method. * Changed a regexp. in Bio::Fasta#exec_genomenet is changed following the change of the remote GenomeNet (fasta.genome.jp). 2008-12-24 Naohisa Goto * lib/bio/location.rb, test/unit/bio/test_location.rb New method Bio::Locations#to_s with bug fix, etc. (commit 115b09456881e1d03730d0b9e7a61a65abf6a1fe) * New method Bio::Locations#to_s is added. * New attributes Bio::Locations#operator and Bio::Location#carat. * Changed not to substitute from "order(...)" or "group(...)" to "join(...)". * Bug fix: Bio::Locations.new(str) changes the argument string when the string contains whitespaces. * Unit tests for Bio::Locations#to_s are added. 2008-12-20 Naohisa Goto * test/functional/bio/appl/test_pts1.rb, test/unit/bio/appl/test_pts1.rb Moved part of test_pts1.rb using network from test/unit to test/functional. (commit 933ff3e7d615fe6521934f137519ea84b3b517f2) 2008-12-18 Naohisa Goto * test/unit/bio/io/test_soapwsdl.rb Ruby 1.9 support: following the change of Object#instance_methods (commit 008cf5f43786f6143f74889e0ec53d1c8a452aa2) Note that SOAP/WSDL library is no longer bundled with Ruby 1.9, and tests in test_soapwsdl.rb may fail. * test/unit/bio/io/test_ddbjxml.rb Ruby 1.9 support: following the change of Module::constants (commit ed1ad96e7ed9d6c7d67e5413a22ba935a3b36efa) * lib/bio/util/restriction_enzyme/single_strand.rb Ruby 1.9 support: changed Array#to_s to join, Symbol#to_i to __id__, etc. (commit a29debb8c03244c1ce61317d6df0a2c5d066de3d) * Ruby 1.9 support: in pattern method, changed to use Array#join instead of Array#to_s. * Ruby 1.9 support: in self.once method, changed to use Object#__id__ instead of Symbol#to_i. * self.once is changed to be a private class method. 2008-12-18 Naohisa Goto * lib/bio/db/rebase.rb Ruby 1.9 support: changed not to use String#each, etc. (commit 47ba6e9fcf864f5881211e766f2e47b60dde178a) * Ruby 1.9 support: In parse_enzymes, parse_references, and parse_suppliers methods, String#each is changed to each_line. * Changed to use require instead of autoload, to reduce support cost. 2008-12-16 Moses Hohman * lib/bio/db/medline.rb, test/unit/bio/db/test_medline.rb fix medline parsing of author last names that are all caps (commit 5f37d566fc2efa4878efbd19e83f909a58c4cb00) 2008-12-15 Mitsuteru Nakao * lib/bio/db/kegg/glycan.rb Bug fix in Bio::KEGG::GLYCAN#mass. Thanks to a reporter. (commit cb8f1acc4caebf1f04d4a6c141dd4477fcb5394b) (committer: Naohisa Goto) 2008-12-15 Naohisa Goto * lib/bio/pathway.rb, test/unit/bio/test_pathway.rb Fixed pending bugs described in unit test, and Ruby 1.9 support (commit 97b3cd4cf78eff8aede16369298aaacf1c319b68) * Pending bugs described in test/unit/bio/test_pathway.rb are fixed. Fixed a bug in subgraph: does not include nodes w/o edges. A bug in cliquishness depending on the subgraph bug is also fixed. * Bio::Pathway#cliquishness is changed to calculate cliquishness (clustering coefficient) for not only undirected graphs but also directed graphs. Note that pending proposed specification changes previously written in test_pathway.rb (raises error for directed graphs, and return 1 for a node that has only one neighbor node) are rejected. * Ruby 1.9 support: To avoid dependency to the order of objects in Hash#each (and each_keys, etc.), Bio::Pathway#index is used to specify preferences of nodes in a graph. Affected methods are: to_matrix, dump_matrix, dump_list, depth_first_search. * Bug fix in the libpath magic in test/unit/bio/test_pathway.rb. 2008-12-09 Naohisa Goto * lib/bio/db/newick.rb, lib/bio/tree.rb Ruby 1.9 support: suppressing "warning: shadowing outer local variable". (commit 6fe31f0a42a87631bdee3796cff65afb053b2add) 2008-12-05 Naohisa Goto * test/unit/bio/io/test_fastacmd.rb Ruby 1.9 support: changed to use respond_to?, etc. (commit 5d6c92c752c00f07ed856fd209c8078ef9fdf57a) * Following the change of Module#methods in Ruby 1.9, changed to use respond_to?(). * The test path '/tmp/test' is replaced with '/dev/null' * lib/bio/db/gff.rb Ruby 1.9 support: changes following the change of String#[] (commit c25cc506bffcf1f2397ac2210153cfbfbbcb4942) * lib/bio/reference.rb Ruby 1.9 support: using enumerator instead of String#collect (commit ea99242570fc8b2e2a869db84b7daaa7737f23e0) * test/unit/bio/test_location.rb Test bug fix: wrong number in libpath magic (commit aa45101246bc42f78a21ee110bc58e59f532e24a) * test/unit/bio/db/test_nexus.rb Test bug fix: missing libpath magic (commit d54eed426461f3a3148953fda1f7b428e74051c6) Thanks to Anthony Underwood who reports the bug in his Github repository. * test/unit/bio/db/pdb/test_pdb.rb Test bug fix: wrong number in libpath magic (commit b53d703a8dd72608ab5ea03457c2828470069f2f) 2008-12-04 Naohisa Goto * test/unit/bio/db/embl/test_embl_to_bioseq.rb Test bug fix: typing error (found by using Ruby 1.9) (commit fa52f99406ddd42221be354346f67245b3572510) * test/unit/bio/db/embl/test_common.rb Ruby 1.9 support: following the change of Module#instance_methods (commit d18fa7c1c3660cf04ec2a8a42d543a20a77cee2c) In Ruby 1.9, Module#instance_methods returns Array containing Symbol objects instead of String. To support both 1.8 and 1.9, "to_s" is added to every affected test method. * lib/bio/appl/tmhmm/report.rb Ruby 1.9 support: using enumerator if the entry is a string (commit 36968122b64b722e230e3e1b52d78221c0b60884) * lib/bio/appl/pts1.rb Ruby 1.9 support: String#each to each_line and Array#to_s to join('') (commit c4c251d5e94167512a0b8a38073a09b72994c08f) * test/unit/bio/appl/test_fasta.rb Ruby 1.9 support: changed to use Array#join instead of Array#to_s (commit bf8823014488166c6e1227dd26bdca344c9f07b7) * lib/bio/appl/blast.rb Ruby 1.9 support: String#each is changed to String#each_line (commit 3e177b9aecf6b54a5112fd81fc02386d18fc14b9) * lib/bio/appl/hmmer/report.rb Ruby 1.9 support: String#each is changed to String#each_line (commit 63bdb3a098bc447e7bd272b3be8f809b4b56d451) * lib/bio/appl/genscan/report.rb Ruby 1.9 support: String#each is changed to String#each_line (commit 082250786756de2b4171b3a00e0c4faaa816fc8f) * test/functional/bio/io/test_ensembl.rb Using jul2008.archive.ensembl.org for workaround of test failure. (commit 1d286f222cdc51cf1323d57c1c79e6943d574829) Due to the renewal of Ensembl web site, lib/bio/io/ensembl.rb does not work for the latest Ensembl. For a workaround of the failure of tests in test/functional/bio/io/test_ensembl.rb, tests for Ensembl#exportview are changed using Ensembl archive (http://jul2008.archive.ensembl.org/). 2008-12-03 Naohisa Goto * sample/demo_sequence.rb sample/demo_sequence.rb, example of sequence manipulation. (commit b7f52b47dbcc7d32f4eb7377d2b1510eb1991fd5) The content of this file is moved from previous version of lib/bio/sequence.rb (inside the "if __FILE__ == $0"). 2008-12-02 Naohisa Goto * lib/bio/appl/paml/baseml.rb, etc. (17 files) Support for baseml and yn00 (still under construction), and incompatible changes to Bio::PAML::Codeml. (commit d2571013409661b4d7be8c5c9db14dbe9a9daaaf) * Security fix: To prevent possible shell command injection, changed to use Bio::Command.query_command instead of %x. * Bug fix with incompatible changes: Using Tempfile.new.path as default values are removed because this can cause unexpected file loss during garbage collection. * Change of method/file names: The term "config file" is changed to "control file" because the term "config file" is never used in PAML documents. The term "options" is changed to "parameters" because the "options" have been used for command-line arguments in other wrappers (e.g. Bio::Blast, Bio::ClustalW). The term "parameters" is also used in BioPerl's Bio::Tools::Run::Phylo::PAML. * Bio::PAML::Codeml.create_config_file, create_control_file, Bio::PAML::Codeml#options, and #options= are now deprecated. They will be removed in the future. * New class Bio::PAML::Common, basic wrapper common to PAML programs. Bio::PAML::Codeml is changed to inherit the Common class. * New classes Bio::PAML::Baseml and Bio::PAML::Yn00, wrappers for baseml and yn00. * New classes Bio::PAML::Common::Report, Bio::PAML::Baseml::Report and Bio::PAML::Yn00::Report, but still under construction. * New methods Bio::PAML::Codeml#query(alignment, tree), etc. * test/data/paml/codeml/dummy_binary is removed because the default of Bio::PAML::Codeml.new is changed to use "codeml" command in PATH. * test/data/paml/codeml/config.missing_tree.txt is removed because treefile can be optional parameter depending on runmode. test/data/paml/codeml/config.missing_align.txt is also removed because test is changed to use normal control file parameters. * lib/bio/command.rb, test/functional/bio/test_command.rb Improvement of Bio::Command.query_command, call_command, etc. (commit e68ee45589f8063e5a648ab235d6c8bbc2c6e5ff) * Improvement of Bio::Command.query_command, call_command, query_command_popen, query_command_fork, call_command_popen, and call_command_fork: they can get an option :chdir => "path", specifying working directory of the child process. * New method Bio::Command.mktmpdir backported from Ruby 1.9.0. * New method Bio::Command.remove_entry_secure that simply calls FileUtils.remove_entry_secure or prints warning messages. * Tests are added in test/functional/bio/test_command.rb. * Ruby 1.9 followup: FuncTestCommandQuery#test_query_command_open3 failed in ruby 1.9 due to the change of Array#to_s. 2008-11-19 Naohisa Goto * test/data/paml/codeml/ Removed some files in test/data/paml/codeml/ because of potential copyright problem, because they are completely identical with those distributed in PAML 4.1. (commit 086b83d3e54f69d2b9e71af3f9647518768353b0) 2008-10-21 Naohisa Goto * lib/bio/sequence/compat.rb Bug fix: TypeError is raised in Bio::Sequence#to_s before Sequence#seq is called. (commit ea8e068a5b7f670ce62bc0d3d4b21639e3ca2714) Thanks to Anthony Underwood who reported the bug and sent the patch. 2008-10-19 Naohisa Goto * setup.rb, README.rdoc install.rb is replaced by new setup.rb. (commit 9def7df5b81340c49534ff0bb932de62402a1c8d) * install.rb is replaced by the latest version of setup.rb taken from the original author's svn repository (svn r2637, newer than version 3.4.1, latest release version. $ svn co http://i.loveruby.net/svn/public/setup/trunk setup). * README.rdoc is modified to follow the rename of install.rb to setup.rb. 2008-10-18 Toshiaki Katayama * lib/bio/io/ncbirest.rb * New methods: Bio::NCBI::REST#einfo, #esearch_count, etc. * New classes: Bio::NCBI::REST::ESearch, Bio::NCBI::REST::EFetch. (commit 637f97deefd6cc113ef18fe18ab628eb619f3dc1) (committer: Naohisa Goto) 2008-10-14 Naohisa Goto * lib/bio/sequence/common.rb, test/unit/bio/sequence/test_common.rb, test/unit/bio/sequence/test_compat.rb, test/unit/bio/sequence/test_na.rb Bug fix: Bio::Sequence::Common#randomize severely biased. (commit 02de70cbf036b41a50d770954f3b16ba2beca880) * Bug fix: Bio::Sequence::Common#randomize was severely biased. To fix the bug, it is changed to used Fisher-Yates shuffle, as suggested by Anders Jacobsen. ([BioRuby] Biased Bio::Sequence randomize()) * The module method Bio::Sequence::Common.randomize is removed because it is not used anymore. * Unit tests for Bio::Sequence::Common#randomize are added. * To avoid possible test class name conflicts, class/module names are changed in test_na.rb, test_compat.rb, and test_common.rb. 2008-10-14 Raoul Jean Pierre Bonnal * lib/bio/io/sql.rb Changed the demonstration code in the "if __FILE__ == $0". (commit 9942105920182c809564554bb0d1dba33fe4caab) * lib/bio/db/biosql/sequence.rb Fix: typing error (commit 67fbbb93adaa8b4b91de3703a235bc75eaef842a) 2008-10-14 Naohisa Goto * lib/bio/db/biosql/sequence.rb, lib/bio/io/sql.rb Merging patches by Raoul in commit 496561a70784d3a1a82bf3117b2d267c7625afac which are ignored when rebasing, probably because of manually editing during merge. (commit c699253d53510c0e76188a72004651a4635088b3) 2008-10-10 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb Fix: check on nil objects (to_biosql) (commit f701e9a71f524ee4373c94ee1bd345e87f16f6ce) BugFix: ex. /focus="true" in output was /focus="t", qualifier.value.to_s fix the bug (commit f6e1530f3372c87031b551e5c76e24f264891e64) * lib/bio/io/biosql/seqfeature.rb BugFix: seqfeature_qualifier_value returned ordered only by rank (commit fb74009393eeca6743f78b7b45cb66858c41d733) * lib/bio/io/biosql/bioentry.rb BugFix: seqfeatures returned ordered by rank (commit 25a249d87d23bd9cb4e671053019675836fcd38c) * lib/bio/db/biosql/sequence.rb Fixed to suppress warnings: Bio::Features is obsoleted. (commit 198a1e893dd4515d61276c9cce8905f02130e721) * lib/bio/db/biosql/biosql_to_biosequence.rb Removed alias comment. (commit c037ec565987634b354ff6d77dbbe7c9d83a9e7c) * lib/bio/db/biosql/sequence.rb Implemented Entry's comments and reference's comments. Fixed species common name. (commit bd3b24ea53ebd9b0ec9dd9f15c27091fe6143e28) * lib/bio/io/biosql/bioentry.rb Cleaned, deleted pk and seq reference (commit 14bcf90334ec3c3f1c1784977b329ae641e9e106) * lib/bio/io/biosql/comment.rb cleaned codes (commit 54976693350ab0512cecf946999c2868b9e88007) * lib/bio/db/biosql/biosql_to_biosequence.rb Added comments, comment adapter. (commit 5394ecea34778c9f571eb35cfc16e3b1a6cb6d1b) 2008-10-09 Raoul Jean Pierre Bonnal * lib/bio/io/sql.rb Changed the demonstration code in the "if __FILE__ == $0". (commit efb61d7c21d229e882c6706838c284404343fa9c) * lib/bio/db/biosql/sequence.rb Added support for reference. ToDo: handling comments. (commit 29211059ee04214d7879f900ec563c0708d8c9d6) * lib/bio/io/biosql/bioentry_reference.rb Fix: compisite primary keys :bioentry_id, :reference_id, :rank (commit eba61ba670c591f58866b37ababc4acac0cc7883) * lib/bio/io/biosql/dbxref.rb removed explicit pk and seq (commit e149f94484469fb3dfd881b45b14be7093b67e0d) 2008-10-09 Naohisa Goto * test/functional/bio/test_command.rb, test/data/command/echoarg2.bat Bug fix: tests in FuncTestCommandCall are failed on mswin32, and URL changed. (commit 921292f1188d85994742ce4aa156b39d6e720aad) * Bug fix: tests in FuncTestCommandCall were failed on mswin32. To fix the test bug, a batch file test/data/command/echoarg2.bat is newly added. This file is only used on mswin32 or bccwin32. * URL for test to fetch a web page is changed to http://bioruby.open-bio.org/. 2008-10-07 Naohisa Goto * test/unit/bio/appl/paml/test_codeml.rb Bug fix: error on mswin32 in test_expected_options_set_in_config_file. (commit 16b8f321c653502ef801d801383a019bc45f67de) Bug fix: On mswin32, test_expected_options_set_in_config_file in Bio::TestCodemlConfigGeneration failed with the error "Errno::EACCESS: Permission denied" because it attempts to remove the temporary file that is previously opened but not explicitly closed, and, in Windows, the opend file is automatically locked and protected from being removed. * lib/bio/command.rb, test/functional/bio/test_command.rb, test/unit/bio/test_command.rb Bio::Command improved, and added functional tests. (commit bb618cdfbfb56c40249aff81b6ef84742465851c) * In Bio::Command.call_command_* and Bio::Command.query_command_*, when giving command-line array with size 1, the command might passed to shell. To prevent this, changed to call a new method Bio::Command#safe_command_line_array internally. * Added test/functional/bio/test_command.rb, contains unit tests to call external commands and to access external web sites. 2008-10-06 Naohisa Goto * lib/bio/db/biosql/sequence.rb Bio::Sequence::SQL::Sequence#seq is changed to return a Bio::Sequence::Generic object, because of avoiding to create nested Bio::Sequence object in #to_biosequence and because Bio::FastaFormat#seq also returns a Bio::Sequence::Generic object. (commit 8fb944c964ab5e1ca8905e6c4ce8e68479952935) 2008-10-03 Raoul Jean Pierre Bonnal * lib/bio/io/biosql/taxon.rb Added has_one :taxon_genbank_common_name, :class_name => "TaxonName", :conditions => "name_class = 'genbank common name'" (commit dc7a18b17cad8e603e0d3c20a5a80bc2a6f0899c) * lib/bio/db/biosql/sequence.rb Fix taxon identification by splitting scientific name and genbank common name. Fix organism/source's name composed by scientific name and genbank common name. (commit 5d6abcc0dcd05d7083622360489a5f4c361e0cc7) * lib/bio/io/sql.rb Working on tests about format import/export. (commit d28a343e4bab3cc0c04ac65dce677cfee0f81a46) * lib/bio/io/biosql/term.rb Fix foreign keys (commit c19c8766c7c0bec7561727abf2ef1bdf47d4e032) * lib/bio/io/biosql/seqfeature_qualifier_value.rb added composite primary keys :seqfeature_id, :term_id, :rank (commit cdd6a3bfc1ab748acb0c0d9161ebeb3dc7a76544) * lib/bio/io/biosql/ontology.rb class cleaned. (commit 81eb2c246d01790db72f0b08929bec5d862c959e) * lib/bio/io/biosql/biodatabase.rb class cleaned. (commit 4aede5c5fee92c2f8cdf151a3e038025b6c7fd74) * lib/bio/db/biosql/sequence.rb to_biosequence: removed not adapter comments. (commit 591fda23464c7b7031db09a8ca85deca320a5c87) Removed main garbage comments. (commit c46d7a2b4e188a0592d5b49def17b9e6fd598268) feature= Fix creation of Ontology and Term. (commit 95fe6d1a65e94da502529e597b137d12c3fe2fc2) * lib/bio/db/biosql/biosql_to_biosequence.rb :seq cleaned. (commit d6f719693286b74c1a0ea8a42c09a12f775b74dc) 2008-10-01 Naohisa Goto * test/functional/bio/io/test_ensembl.rb Bug fix: 3 failures occurred in test_ensembl.rb because of recent changes in Ensembl database (the gene ENSG00000206158 used as an example in this file was removed from the Ensembl database). To fix this, the example gene is changed to ENSG00000172146 (OR1A1, olfactory receptor 1A1). (commit e20c86d2cd7d4fd1723762e8a5acc3bc311a5c1b) * lib/bio/db/embl/sptr.rb, test/unit/bio/db/embl/test_sptr.rb Ruby 1.9 support: in Bio::SPTR, avoid using String#each and Array#to_s. (commit 5ff56653cd7cc2520c2c04acbc9ce2bf2a0fae9a) * In Bio::SPTR#gn_uniprot_parser, String#each (which is removed in Ruby 1.9) is changed to each_line. * In Bio::SPTR#cc and cc_* (private) methods, Array#to_s (whose behavior is changed in Ruby 1.9) is changed to join(''). * Unit test for Bio::STPR#dr method is added and changed. 2008-09-30 Naohisa Goto * lib/bio/db/embl/sptr.rb, test/unit/bio/db/embl/test_sptr.rb Bug fix in Bio::SPTR#dr: raised error when asked it to return a DR key that didn't exist in the uniprot entry. Thanks to Ben Woodcroft who reports the bug and send a patch. ([BioRuby] Bio::SPTR bug and fix). (commit 3147683c0b41e3f9418e26b481bf8b3e9ce63b8c) * lib/bio.rb Added autoload of Bio::NCBI::REST, and BIORUBY_VERSION incremented. (commit d6a37b0fcf1fb2f6e134dcdb8e29e79ec2a8fea7) * Added autoload of Bio::NCBI::REST. * Added comments for autoloading Bio::Sequence and Bio::Blast. * BIORUBY_VERSION is temporary incremented to 1.2.2, though the version number will not be used in upcoming release. Upcoming release will probably be using larger version number. 2008-09-25 Raoul Jean Pierre Bonnal * lib/bio/db/biosql/sequence.rb Updated with adapter. Problem saving big sequences. (commit 82d87fbaf70f9a46c40dded0b2db510a40964e62) * lib/bio/io/biosql/* (25 files) AR: explicit class and foreign_key reference. (commit 70327998186c2f943addb5d46b4bda8007ed5444) 2008-09-24 Naohisa Goto * lib/bio/db/gff.rb, test/unit/bio/db/test_gff.rb Bug fix and incompatible changes in GFF2 and GFF3 attributes. (commit 7b174bb842d9dcf9fd7f4b59e8f3b13ebc0ff3d4) * Bug fix: GFF2 attributes parser misunderstand semicolons. * Incompatible change in Bio::GFF::GFF2::Record#attributes and Bio::GFF::GFF3::Record#attributes. Now, instead of Hash, the method is changed to return a nested Array, containing [ tag, value ] pairs, because of supporting multiple tags in same name. If you want to get a Hash, use Record#attributes_to_hash method, though some tag-value pairs in same tag name may not be included. * Bio::GFF::Record#attribute still returns a Hash for compatibility. * New methods for getting, setting and manipulating attributes: Bio::GFF::GFF2::Record#attribute, #get_attribute, #get_attributes, #set_attribute, #replace_attributes, #add_attribute, #delete_attribute, #delete_attributes, and #sort_attributes_by_tag! (These are also added to Bio::GFF::GFF3::Record). It is recommended to use these methods instead of directly manipulating the array returned by Record#attributes. * Incompatible change in GFF2 attributes parser: the priority of '"' (double quote) is greater than ';' (semicolon). Special treatment of '\;' in GFF2 is now removed. Unlike GFF2, in Bio::GFF, the '\;' can still be used for backward compatibility. * Incompatible changes in attribute values in Bio::GFF::GFF2. Now, GFF2 attribute values are automatically unescaped. In addition, if a value of an attribute is consisted of two or more tokens delimited by spaces, an object of the new class Bio::GFF::GFF2::Record::Value is returned instead of String. The new class Bio::GFF::GFF2::Record::Value aims to store a parsed value of an attribute. If you really want to get unparsed string, Value#to_s can be used. * Incompatible changes about data type in GFF2 columns: Bio::GFF::GFF2::Record#start, #end, and #frame return Integer or nil, and #score returns Float or nil. * Incompatible changes about the metadata in GFF2. The "##gff-version" line is parsed and the version string is stored to Bio::GFF::GFF2#gff_version. Other metadata lines are stored in an array obtained with a new method Bio::GFF::GFF2#metadata. Each metadata is parsed to Bio::GFF::GFF2::MetaData object. * Bio::GFF::Record#comments is renamed to #comment, and #comments= is renamed to #comment=, because they only allow a single String (or nil) and the plural form "comments" may be confusable. The "comments" and "comments=" methods can still be used, but warning messages will be shown when using in GFF2::Record and GFF3::Record objects. * New methods Bio::GFF::GFF2#to_s, Bio::GFF::GFF2::Record#to_s. * New methods Bio::GFF::GFF2::Record#comment_only? (also added in Bio::GFF::GFF3::Record). * Unit tests are added and modified. 2008-09-18 Naohisa Goto * lib/bio/appl/blast/rpsblast.rb, lib/bio/appl/blast/format0.rb, lib/bio/io/flatfile/autodetection.rb, test/unit/bio/appl/blast/test_rpsblast.rb, test/data/rpsblast/misc.rpsblast Improved support for RPS-BLAST results from multi-fasta query sequences. (commit 11f1787cf93c046c06d4a33a554210d56866274e) * By using Bio::FlatFile (e.g. Bio::FlatFile.open), a rpsblast result generated from multiple query sequences is automatically split into multiple Bio::Blast::RPSBlast::Report objects corresponding to query sequences. For the purpose, new flatfile splitter class Bio::Blast::RPSBlast::RPSBlastSplitter is added. * File format autodetection for RPS-BLAST default report is added. * Bug fix: Bio::Blast::RPSBlast::Report#program returns incorrect value. To fix the bug, regular expression in Bio::Blast::Default::Report#format0_parse_header (private method) is changed. * Unit tests are added for Bio::Blast::RPSBlast. 2008-09-17 Naohisa Goto * lib/bio/io/flatfile/buffer.rb, test/unit/bio/io/flatfile/test_buffer.rb Bug fix in Bio::FlatFile::BufferedInputStream#gets. (commit e15012e2a94d05308d139cb010749a1829d5c57f) * Bug fix: Bio::FlatFile::BufferedInputStream#gets('') might not work correctly. Now, BufferedInputStream#gets is refactored. Note that when rs = '' (paragraph mode), the behavior may still differ from that of IO#gets(''). * Test methods are added to test_buffer.rb. 2008-09-16 Naohisa Goto * lib/bio/appl/blast/wublast.rb Bug fix: parse error or infinite loop for WU-BLAST reports. (commit 07d1554c945400f9202d7b856055743e11860752) * Bug fix in Bio::Blast::WU::Report: fixed parse errors (errors, infinite loop, and wrong results could be generated) when parsing WU-BLAST reports generated by recent version of WU-BLAST. * New methods Bio::Blast::WU::Report#query_record_number, #exit_code, and #fatal_errors. 2008-09-03 Naohisa Goto * lib/bio/appl/blat/report.rb Bug fix: headers were parsed incorrectly with warning. (commit 3ff940988b76bdff75679cdf0af4c836f76fa3a1) * lib/bio/io/flatfile/splitter.rb To suppress warning messages "warning: private attribute?", private attributes are explicitly specified by using "private". (commit 1440b766202a2b66ac7386b9b46928834a9c9873) 2008-09-01 Michael Barton * lib/bio/appl/paml/codeml/report.rb Added code to pull estimated tree from codeml report. (commit 64cc5ef6f2d949cc9193b08dfc3fde6b221950d7) 2008-09-01 Naohisa Goto * test/unit/bio/db/embl/test_embl_rel89.rb Changed test class name because of name conflict of Bio::TestEMBL. (commit 536cdf903a3c3908c117efd554d33117d91452f4) * test/unit/bio/util/restriction_enzyme/ To prevent possible test class name conflicts about restriction enzyme. (commit 0fe1e7d3ed02185632f4a34d8efe1f21f755b289) * Tests about restriction enzyme are moved under a new module Bio::TestRestrictionEnzyme to prevent possible name conflict. * Conflicted test class names are changed. 2008-08-31 Naohisa Goto * test/unit/bio/db/test_prosite.rb Fixed failed test due to the change of hash algorithm in Ruby 1.8.7. (Probably also affected in Ruby 1.9.0). (commit e86f8d757c45805389e154f06ccde5a3d9e8a557) 2008-08-29 Naohisa Goto * lib/bio/appl/blast.rb Bio::Blast.reports is changed to support new BLAST XML format. (commit 02cc0695b85f18e8254aefed78a912812fc896d6) * Bio::Blast.reports is changed to support new BLAST XML format. * Removed unused require. 2008-08-28 Naohisa Goto * lib/bio/appl/blast/report.rb, lib/bio/appl/blast/rexml.rb, lib/bio/appl/blast/xmlparser.rb, test/unit/bio/appl/blast/test_report.rb Support for BLAST XML format with multiple queries after blastall 2.2.14. (commit de7897b5690279aae14d9bded5e682458bc61f9c) * BLAST XML format with multiple query sequences generated by blastall 2.2.14 or later is now supported. * New methods Bio::Blast::Report#reports, stores Bio::Blast::Report objects corresponding to the multiple query sequences. * New methods Bio::Blast::Report::Iteration#query_id, query_def, and query_len, which are available only for the new format. * New class Bio::Blast::Report::BlastXmlSplitter, flatfile splitter for Bio::FlatFile system. * Bug fix: Bio::Blast::Report#expect returned incorrect value. * Fixed typo and added tests in test/unit/bio/appl/blast/test_report.rb. * Some RDoc documents are added/modified. 2008-08-19 Michael Barton * lib/bio/appl/paml/codeml/rates.rb Updated regex for rates parser to include columns that have a '*' character. * test/unit/bio/appl/paml/codeml/test_rates.rb Updated testing for new rates file with * characters. * test/data/paml/codeml/rates Added rates file that includes positions with * characters. 2008-08-18 Naohisa Goto * test/unit/bio/io/test_ddbjxml.rb Changed a failed test, and added a test for Bio::DDBJ::XML::RequestManager. 2008-08-16 Michael Barton * lib/bio/appl/paml/, test/unit/bio/appl/paml/, test/data/paml/ Wrapper and parser for PAML Codeml program is added (merged from git://github.com/michaelbarton/bioruby). After merging, some changes were made by Naohisa Goto. See git log for details. 2008-08-15 Naohisa Goto * lib/bio/appl/blast.rb, lib/bio/appl/blast/genomenet.rb "-m 0" (BLAST's default) format support is improved, and fixed wrong example in the RDoc of Bio::Blast#query. * Added support for "-m 0" (BLAST's default) format to the Bio::Blast factory. For the purpose, Bio::Blast#parse_result (private method) is changed. * Added support for "-m 0" (default) format to the GenomeNet BLAST factory (in Bio::Blast::Remote::GenomeNet). * Bug fix: wrong example in the RDoc in Bio::Blast#query is changed. * Bio::Blast#set_option (private method) is changed to determine format correctly. * lib/bio/appl/blast/ddbj.rb, lib/bio/io/ddbjxml.rb Changed always using REST version of RequestManager, and changed to raise error when busy. * In Bio::Blast::Remote::DDBJ, changed always to use REST version for RequestManager, because of suppressing warning messages. * In Bio::DDBJ::XML::RequestManager, module REST_RequestManager is changed to class REST. * In Bio::Blast::Remote::DDBJ#exec_ddbj, changed to raise RuntimeError when "The search and analysis service by WWW is very busy now" message is returned from the server (which implies invalid options or queries may be given). 2008-08-14 Naohisa Goto * lib/bio/appl/blast.rb, lib/bio/appl/blast/genomenet.rb, lib/bio/appl/blast/remote.rb Bio::Blast#exec_genomenet is moved to genomenet.rb, with bug fix. * Bio::Blast#exec_genomenet is moved to lib/bio/appl/blast/genomenet.rb. * Incompatible change: Bio::Blast#exec_* is changed to return String. Parsing the string is now processed in query method. * New module Bio::Blast::Remote, to store remote BLAST factories. * New module Bio::Blast::Remote::GenomeNet (and Genomenet for lazy including), to store exec_genomenet and other methods. In the future, it might be a standalone class (or something else). * New module methods Bio::Blast::Remote::GenomeNet.databases, nucleotide_databases, protein_databases, and database_description, to provide information of available databases. * Bug fix: remote BLAST on GenomeNet with long query sequences fails because of the change of the behavior of the remote site. * Incompatible change: Bio::Blast#options= can change program, db, format, matrix, and filter instance variables. * Bio::Blast#format= is added. * Bio::Blast.local changed to accept 4th argument: full path to the blastall command. * lib/bio/appl/blast/ddbj.rb, lib/bio/io/ddbjxml.rb, lib/bio/appl/blast/genomenet.rb, lib/bio/appl/blast/remote.rb, lib/bio/appl/blast.rb New module Bio::Blast::Remote::DDBJ, remote BLAST on DDBJ. * New module Bio::Blast::Remote::DDBJ, remote BLAST routine using DDBJ Web API for Biology (WABI). Now, Bio::Blast.new(program, db, options, 'ddbj') works. * New class Bio::DDBJ::XML::RequestManager. In this class, workaround for Ruby 1.8.5's bundled SOAP4R is made. * Some common codes are moved from Bio::Blast::Remote::GenomeNet::Information to Bio::Blast::Remote::Information. * lib/bio/io/ddbjxml.rb Changed to use DDBJ REST interface for a workaround instead of editing WSDL. (commit a64c8da5df5076c5f55b54b7f134d22a2e8d281c) 2008-08-09 Naohisa Goto * lib/bio/appl/blast.rb * Bug fix: Bio::Blast raises TypeError without "-m" option, reported by Natapol Pornputapong. * New class Bio::Blast::NCBIOptions to treat command-line options for blastall (and for other NCBI tools, e.g. formatdb). * Changed not to overwrite @filter, @matrix or @format unless '-F', '-M', or '-m' option is given, respectively. 2008-07-30 BioHackathon2008 participants from BioRuby project * Branch 'biohackathon2008' is merged. See doc/Changes-1.3.rd for incompatible changes. * lib/bio/sequence.rb, lib/bio/sequence/ * lib/bio/db/embl/ * lib/bio/db/genbank/ * lib/bio/db/fasta.rb, lib/bio/db/fasta/ A new method #to_biosequence is added to Bio::EMBL, Bio::GenBank and Bio::FastaFormat. Bio::FastaFormat#to_seq is now an alias of the #to_biosequence method. Bio::Sequence#output is added to output formatted text. Supported formats are: EMBL, GenBank, Fasta, or raw. Written by Naohisa Goto and Jan Aerts. * lib/bio/db/biosql/ * lib/bio/io/sql.rb, lib/bio/io/biosql/ New BioSQL implementation by Raoul Jean Pierre Bonnal. * lib/bio/reference.rb * lib/bio/feature.rb Bio::References and Bio::Features are obsoleted. For more information, see doc/Changes-1.3.rd. * (Many changes are not listed here. See git log for details.) 2008-07-30 Naohisa Goto * lib/bio/db/gff.rb, test/unit/bio/db/test_gff.rb Branch 'test-gff3' in git://github.com/ngoto/bioruby is merged. Fixed gff3 attribute bug, and many improvements are added. See doc/Changes-1.3.rd for incompatible changes. Thanks to Ben Woodcroft who reported the bug and contributed codes. 2008-07-29 Naohisa Goto * lib/bio/appl/blast/format0.rb Bug fix: fixed ScanError when bit score is in exponential notation such as 1.234e+5. Regular expressions for numerics including exponential notations are changed to get correct values. 2008-07-18 Naohisa Goto * lib/bio/appl/hmmer.rb Bug fix: ArgumentError caused by misspelling of a variable name. 2008-06-23 Jan Aerts * README.rdoc * README_DEV.rdoc * gemspec.rb Renamed README files to RDoc gets parsed on github website. (commit 34b7693f74de2358759e955d8ce36cfe15e64b54) Edited README.rdoc and README_DEV.rdoc to reflect move from CVS to git. (commit a61b16163d3ca74f3f7c8d8e8f03f5f8c68dee60) 2008-06-13 Naohisa Goto * lib/bio/reference.rb * test/unit/bio/test_reference.rb * New method Bio::Reference#pubmed_url added (renamed the url method in CVS revision 1.25). * Bio::Reference#endnote is changed not to overwrite url if url is already given by user. * Improvement of Bio::Reference#bibtex method. (Idea to improve bibtex method is originally made by Pjotr Prins.) * test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb "require 'bio/sequence'" is needed to run the tests in this file. (commit 735e3563b723645afa65f0e4213a7c92152f68ec) 2008-05-19 Pjotr Prins * sample/fastasort.rb Simple example for sorting a flatfile (commit 677ac7c0707860f0478e75f72f23faa05b29dc6d) * doc/Tutorial.rd * sample/fastagrep.rb * sample/fastasort.rb Piping FASTA files (examples and doc) (commit ecd5e04477246dcf6cac84a6fbd21fb59efa3cf0) 2008-05-14 Naohisa Goto * lib/bio/appl/blast/format0.rb Bug fix: Possibly because of the output format changes of PHI-BLAST, Bio::Blast::Default::Report::Iteration#eff_space (and the shortcut method in the Report class) failed for PHI-BLAST (blastpgp) results, and Iteration#pattern and #pattern_positions (and the shortcut methods in the Report class) returned incorrect values. 2008-05-12 Naohisa Goto * lib/bio/appl/blast/xmlparser.rb, lib/bio/appl/blast/rexml.rb Bug fix: unit test sometime fails due to improper treatment of some Blast parameters and difference between rexml and xmlparser. To fix the bug, types of some parameters may be changed, e.g. Bio::Blast::Report#expect is changed to return Float or nil. * lib/bio/appl/blast/format0.rb Bug fix: Bio::Blast::Default::Report#eff_space returns wrong value ("Effective length of database"). It should return the value of "Effective search space". * test/unit/bio/appl/blast/test_xmlparser.rb Bug fix: tests in test/unit/bio/appl/blast/test_report.rb were ignored because of conflicts of the names of test classes. Class name in test_xmlparser.rb is changed to fix the bug. 2008-04-23 Naohisa Goto * lib/bio/db/embl/common.rb Bug fix: Bio::EMBL#references failed to parse journal name, volume, issue, pages, and year. In addition, it might failed to parse PubMed ID. (commit c715f51729b115309a78cf29fdce7fef992da875) 2008-04-18 Naohisa Goto * lib/bio/db/embl/sptr.rb Bug fix: Bio::SPTR#references raises NoMethodError since lib/bio/db/embl/sptr.rb CVS version 1.34. (commit 1b3e484e19c9c547cecfe53858a646b525685e0d) 2008-04-15 Naohisa Goto * lib/bio/appl/blast/rpsblast.rb Newly added RPS-Blast default (-m 0) output parser. 2008-04-01 Naohisa Goto * lib/bio/appl/blast/format0.rb Fixed a bug: Failed to parse database name in some cases. Thanks to Tomoaki Nishiyama who reported the bug and sent patches ([BioRuby-ja] BLAST format0 parser fails header parsing output of specific databases). * lib/bio/db/pdb/chain.rb, lib/bio/db/pdb/pdb.rb Fixed bugs: Bio::PDB::Chain#aaseq failed for nucleotide chain; Failed to parse chains for some entries (e.g. 1B2M). Thanks to Semin Lee who reported the bugs and sent patches ([BioRuby] Bio::PDB parsing problem (1B2M)). 2008-02-19 Toshiaki Katayama * lib/bio/io/ncbirest.rb * lib/bio/io/pubmed.rb NCBI E-Utilities (REST) functionality is separated to ncbirest.rb and pubmed.rb is changed to utilize the Bio::NCBI::REST class for esearch and efetch. You can now search and retrieve any database in any format that NCBI supports by E-Utilities through the Bio::NCBI::REST interface (currently, only esearch and efetch methods are implemented). (commit 0677bb69044cf6cfba453420bc1bbeb422f691c1) (commit f60e9f8153efacff0c97d12fb5c0830ebeb02edd) (commit 6e4670ab5e67ca596788f4c26a95a9687d36ce84) 2008-02-13 Pjotr Prins * doc/Tutorial.rd (commit d7ee01d86d6982f6b8aa19eba9adac95bebb08e8) 2008-02-12 Naohisa Goto * lib/bio/appl/blast/format0.rb Fixed bugs: Failed to parse query length for long query (>= 10000 letters) as comma is inserted for digit separator by blastall; Failed to parse e-value for some BLASTX results. Thanks to Shuji Shigenobu who reported the bugs and sent patches. 2008-02-11 Pjotr Prins * doc/Tutorial.rd Expanding on the Tutorial (bdc1d14f497909041fa761f659a74d98702a335a) Minor adjustments to Tutorial (72b5f4f0667a3a0c44ca31b0ab8228381e37919c) 2008-02-06 Pjotr Prins * sample/na2aa.rb Simple example to translate any NA to AA fasta (commit 433f974219cf04342935c1760464af24a5696c49) 2008-02-05 Pjotr Prins * sample/gb2fasta.rb Fixed broken require in gb2fasta example (commit b55daed0d6cff2e45155be01ef2a946925c972cf) 2008-02-05 Pjotr Prins * doc/Tutorial.rd Minor tweak to Tutorial.rd (commit 75416d780f99de24498a47fd22703d74f9a22329) 2008-02-03 Pjotr Prins * doc/Tutorial.rd More doctests in Tutorial.rd (commit 39d182bb67977956c0f22631ac596d65ccce74ff) 2008-02-02 Pjotr Prins * doc/Tutorial.rd Tabs in the Tutorial broke the rd parser - the Wiki will be fixed now. (commit 49078a5dea4f16f44add1882c60bf75df67ea19b) Updating tutorial. (commit f2f2005c3964f37e2d65afef0d52e63950d6bcb7) (commit d2b05581953712d0ac67ba0de1aa43853ed4e27f) 2008-02-02 Toshiaki Katayama * lib/bio/shell/rails/vendor/plugins/ The 'generators' directory is moved under the 'bioruby' subdirectory so that 'bioruby --rails' command can work with Rails 2.x series in addition to the Rails 1.2.x series. 2008-01-30 Mitsuteru Nakao * lib/bio/appl/blast.rb Fixed the bug at building the blastall command line options ('-m 0'). (commit 61443d177847825505103488573186dfc4e7568e) 2008-01-10 Naohisa Goto * lib/bio/appl/emboss.rb Added a method Bio::EMBOSS.run(program, arguments...) and Bio::EMBOSS.new is obsoleted. (commit fa04d97b073aefe05edc34a84498ba0a57ff98d2) 2008-01-10 Toshiaki Katayama * lib/bio/io/hinv.rb Bio::Hinv to access the H-invitational DB (http://h-invitational.jp/) web service in REST mode is added. 2007-12-30 Toshiaki Katayama * BioRuby 1.2.1 released This version is not Ruby 1.9 (released few days ago) compliant yet. 2007-12-28 Naohisa Goto * lib/bio/appl/blast/report/format0.rb Fixed parse error when compisition-based statistics were enabled. In addition, Bio::Blast::Default::Report#references and Bio::Blast::Default::Report::HSP#stat_method methods are added. In NCBI BLAST 2.2.17, default option of composition-based statistics for blastp or tblastn are changed to be enabled by default. * lib/bio/appl/blast/report/wublast.rb Changed to follow the above changes in format0.rb. * lib/bio/sequence/common.rb Ruby 1.9 compliant: in window_search method, a local variable name outside the iterator loop is changed not to be shadowed by the iterator variable. * lib/bio/db/pdb/pdb.rb Ruby 1.9 compliant: changed to avoid "RuntimeError: implicit argument passing of super from method defined by define_method() is not supported. Specify all arguments explicitly." error. Ruby 1.9 compliant: Bio::PDB::Record.get_record_class and Bio::PDB::Record.create_definition_hash (Note: they should only be internally used by PDB parser and users should not call them) are changed to follow the change of Module#constants which returns an array of Symbol instead of String. 2007-12-26 Naohisa Goto * lib/bio/alignment.rb Ruby 1.9 compliant: in EnumerableExtension#each_window and OriginalAlignment#index methods, local variable names outside the iterator loops are changed not to be shadowed by iterator variables. Warning messages for uninitialized instance variables of @gap_regexp, @gap_char, @missing_char, and @seqclass are suppressed. * test/unit/bio/test_alignment.rb Ruby 1.9 compliant: Ruby 1.9 compliant: The last comma in Array.[] is no longer allowed. (For example, class A < Array; end; A[ 1, 2, 3, ] raises syntax error in Ruby 1.9.) 2007-12-21 Toshiaki Katayama * lib/bio/db/medline.rb Added doi and pii methods to extract DOI and PII number from AID field 2007-12-18 Naohisa Goto * lib/bio/db/pdb/pdb.rb Bio::PDB#inspect is added to prevent memory exhaust problem. ([BioRuby] Parse big PDB use up all memory) * lib/bio/db/pdb/model.rb Bio::PDB::Model#inspect is added. * lib/bio/db/pdb/chain.rb Bio::PDB::Chain#inspect is added. * lib/bio/db/pdb/residue.rb Bio::PDB::Residue#inspect is added. This also affects Bio::PDB::Heterogen#inspect. 2007-12-15 Toshiaki Katayama * BioRuby 1.2.0 released * BioRuby shell is improved * file save functionality is fixed * deprecated require_gem is changed to gem to suppress warnings * deprecated end_form_tag is rewrited to suppress warnings * images for Rails shell are separated to the bioruby directory * spinner is shown during the evaluation * background image in the textarea is removed for the visibility * Bio::Blast is fixed to parse -m 8 formatted result correctly * Bio::PubMed is rewrited to enhance its functionality * e.g. 'rettype' => 'count' and 'retmode' => 'xml' are available * Bio::FlatFile is improved to accept recent MEDLINE format * Bio::KEGG::COMPOUND is enhanced to utilize REMARK field * Bio::KEGG::API is fixed to skip filter when the value is Fixnum * A number of minor bug fixes 2007-12-12 Naohisa Goto * lib/bio/db/newick.rb: Changed to be compliant with the Gary Olsen's Interpretation of the "Newick's 8:45" Tree Format Standard. * test/unit/bio/db/test_newick.rb More tests are added. * lib/bio/io/flatfile/indexer.rb Fixed a misspelling in Bio::FlatFileIndex.formatstring2class. 2007-11-28 Toshiaki Katayama * lib/bio/io/pubmed.rb: Fixed search, query methods (but use of esearch and efetch is strongly recommended). efetch method is enhanced to accept any PubMed search options as a hash (to retrieve in XML format etc.) Changed to wait 3 seconds among each access by default to be compliant with the NCBI terms (Make no more than one request every 3 seconds). All Bio::PubMed.* class methods are changed to instance methods (interface as the class methods are remained for the backward compatibility). 2007-07-19 Toshiaki Katayama * BioRuby 1.1.0 released 2007-07-17 Toshiaki Katayama * lib/bio/io/das.rb Fixed that mapmaster method to return correct value (mapmaseter's URL). This bug is reported and fixed by Dave Thorne. 2007-07-16 Naohisa Goto * lib/bio/mafft/report.rb For generic multi-fasta formatted sequence alignment, Bio::Alignment::MultiFastaFormat is newly added based on Bio::MAFFT::Report class, and Bio::MAFFT::Report is changed to inherit the new class. Tests are added in test/unit/bio/appl/mafft/test_report.rb. * lib/bio/alignment.rb New modules and classes Bio::Alignment::FactoryTemplate::* are added. They are used by following three new classes. * lib/bio/appl/muscle.rb * lib/bio/appl/probcons.rb * lib/bio/appl/tcoffee.rb New classess Bio::Muscle, Bio::Probcons, and Bio::Tcoffee are added for MUSCLE, ProbCons, and T-Coffee multiple alignment programs. Contributed by Jeffrey Blakeslee and colleagues. * lib/bio/appl/clustalw.rb * lib/bio/appl/mafft.rb Interfaces of Bio::ClustalW and Bio::MAFFT are added/modified to follow Bio::Alignment::FactoryTemplate (but not yet changed to use it). 2007-07-09 Toshiaki Katayama * BioRuby shell on Rails has new CSS theme Completely new design for BioRuby shell on Rails translated from the 'DibdoltRed' theme on www.openwebdesign.org which is created by Darjan Panic and Brian Green as a public domain work! 2007-07-09 Toshiaki Katayama * lib/bio/db/kegg/taxonomy.rb Newly added KEGG taxonomy file parser which treats taxonomic tree structure of the KEGG organisms. The file is available at ftp://ftp.genome.jp/pub/kegg/genes/taxonomy and is a replacement of the previously used keggtab file (obsoleted). * lib/bio/db/kegg/keggtab.rb Bio::KEGG::Keggtab is obsoleted as the file is no longer provided. Use Bio::KEGG::Taxonomy (lib/bio/db/kegg/taxonomy.rb) instead. * lib/bio/shell/plugin/soap.rb Newly added web service plugins for BioRuby shell which supports NCBI SOAP, EBI SOAP and DDBJ XML in addition to the KEGG API. 2007-07-09 Naohisa Goto * lib/bio/db/pdb/pdb.rb Pdb_LString.new is changed not to raise error for nil. Fixed a bug when below records does not exist in a PDB entry: REMARK (remark), JRNL (jrnl), HELIX (helix), TURN (turn), SHEET (sheet), SSBOND (ssbond), SEQRES (seqres), DBREF (dbref), KEYWDS (keywords), AUTHOR (authors), HEADER (entry_id, accession, classification), TITLE (definition), and REVDAT (version) records (methods). Incompatible change: Bio::PDB#record is changed to return an empty array for nonexistent record. (reported by Mikael Borg) 2007-07-09 Naohisa Goto * lib/bio/io/flatfile.rb Bio::FlatFile.foreach is added (which is suggested by IO.foreach). 2007-06-28 Toshiaki Katayama * lib/bio/shell/setup.rb, core.rb Changed not to use Dir.chdir by caching full path of the save directory at a start up time, so that user can freely change the work directory without affecting object/history saving functionality. Bio::Shell.cache[:savedir] stores the session saving directory (session means shell/session/{config,history,object} files), Bio::Shell.cache[:workdir] stores the working directory at a start up time (can be same directory with the :savedir) and both are converted and stored as full path allowing user to use Dir.chdir in the shell session). If --rails (-r) option is applied, 'bioruby' command will run in the Rails server mode, and the server will start in the :savedir. (A) IRB mode 1. run in the current directory and the session will be saved in the ~/.bioruby directory % bioruby 2. run in the current directory and the session will be saved in the foo/bar directory % bioruby foo/bar 3. run in the current directory and the session will be saved in the /tmp/foo/bar directory % bioruby /tmp/foo/bar (B) Rails mode 4. run in the ~/.bioruby directory and the session will also be saved in the ~/.bioruby directory % bioruby -r 5. run in the foo/bar directory and the session will also be saved in the foo/bar directory % bioruby -r foo/bar 6. run in the /tmp/foo/bar directory and the session will also be saved in the /tmp/foo/bar directory % bioruby -r /tmp/foo/bar (C) Script mode 7. run in the current directory using the session saved in the ~/.bioruby directory % bioruby ~/.bioruby/shell/script.rb 8. run in the current directory using the session saved in the foo/bar directory % bioruby foo/bar/shell/script.rb 9. run in the current directory using the session saved in the /tmp/foo/bar directory % bioruby /tmp/foo/bar/shell/script.rb 2007-06-21 Toshiaki Katayama * lib/bio/shell/setup.rb If no directory is specified to the bioruby command, use ~/.bioruby directory as the default save directory instead of the current directory, as suggested by Jun Sese. User can use 'bioruby' command without botherd by directories and files previously created by the 'bioruby' command in the current directory even when not needed. 2007-05-19 Toshiaki Katayama * lib/bio/appl/fasta.rb Bug fixed that exec_local fails to exec when @ktup is nil. This problem is reported and fixed by Fredrik Johansson. * lib/bio/db/gff.rb parser_attributes method in GFF3 class is modified to use '=' char as a separator instead of ' ' (which is used in GFF2 spec). 2007-04-06 Toshiaki Katayama * COPYING, COPYING.LIB are removed BioRuby is now distributed under the same terms as Ruby. On behalf of the BioRuby developer, I have asked all authors of the BioRuby code to change BioRuby's license from LGPL to Ruby's. And we have finished to change license of all modules in the BioRuby library. This means that Ruby user can freely use BioRuby library without being annoyed by licensing issues. * lib/bio/db/kegg/ko.rb is renamed to lib/bio/db/kegg/ortholog.rb KEGG KO database is renamed to KEGG ORTHOLOG database, thus we follow the change. Bio::KEGG::KO is renamed to Bio::KEGG::ORTHOLOG. Bio::KEGG::ORTHOLOG#genes, dblinks methods are rewrited to use lines_fetch method. * lib/bio/data/aa.rb to_re method is changed that the generated regexp to include ambiguous amino acid itself - replacement of amino acid X should include X itself. 2007-04-05 Trevor Wennblom * License headers are completely rewrited to Ruby's. 2007-04-02 Naohisa Goto * lib/bio/appl/mafft.rb Incompatible change: Bio::MAFFT#output is changed to return a string of multi-fasta formmatted text. To get an array of Bio::FastaFormat objects (as of 1.0 or before), please use report.data instead. 2007-03-29 Toshiaki Katayama * lib/bio/db/kegg/cell.rb Obsoleted as the KEGG CELL database is not publically available any more. 2007-03-28 Toshiaki Katayama * lib/bio/shell/rails/.../bioruby_controller.rb BioRuby shell on Rails access is changed to be limited only from the localhost for security reason (if local_request?). * lib/bio/command.rb The post_form method is modified to accept URL as a string and extended to accept params as array of string array of hash array of array or string in addition to hash (also can be ommited if not needed - defaults to nil). Keys and parameters of params are forced to use to_s for sure. * lib/bio/io/ensembl.rb Re-designed to allows user to use Bio::Ensembl.new without creating inherited sub class. Changed to use Bio::Command.post_form * lib/bio/das.rb Changed to use Bio::Command * lib/bio/shell/plugin/das.rb Newly added BioDAS client plugin for BioRuby shell. das.list_sequences das.dna das.features 2007-03-15 Toshiaki Katayama * lib/bio/shell/irb.rb Changed to load Rails environment when bioruby shell is invoked in the Rails project directory. This means that user can use 'bioruby' command as a better './script/console' which has persistent objects and shared history. 2007-03-08 Toshiaki Katayama * lib/bio/db/kegg/drug.rb Newly added KEGG DRUG database parser. * lib/bio/db/kegg/glycan.rb Bio::KEGG::GLYCAN#bindings method is removed. Bio::KEGG::GLYCAN#comment, remarks methods are added. Bio::KEGG::GLYCAN#orthologs and dblinks methods are changed to use lines_fetch method. * lib/bio/kegg/compound.rb Bio::KEGG::COMPOUND#glycans method is added Bio::KEGG::COMPOUND#names method is changed to return an array of stripped strings. * lib/bio/db/kegg/genes.rb Bio::KEGG::GENES#orthologs method is added. 2007-03-27 Naohisa Goto * lib/bio/command.rb Bio::Command.call_command_fork and query_command_fork methods are changed to handle all Ruby exceptions in the child process. * lib/bio/io/flatfile.rb UniProt format autodetection was changed to follow the change of UniProtKB release 9.0 of 31-Oct-2006. 2007-02-12 Naohisa Goto * lib/bio/io/flatfile.rb Exception class UnknownDataFormatError is added. It will be raised before reading data from IO when data format hasn't been specified due to failure of file format autodetection. 2007-02-12 Toshiaki Katayama * lib/bio/io/flatfile.rb Added support for KEGG EGENES. 2007-02-02 Trevor Wennblom * lib/bio/util/restriction_enzyme* Bio::RestrictionEnzyme stabilized. 2007-02-02 Trevor Wennblom * lib/bio/db/lasergene.rb Bio::Lasergene Interface for DNAStar Lasergene sequence file format 2007-02-02 Trevor Wennblom * lib/bio/db/soft.rb Bio::SOFT for reading SOFT formatted NCBI GEO files. 2007-01-16 Toshiaki Katayama * BioRuby shell on Rails new features and fixes New features: * Input [#] is linked to action for filling textarea from history * [methods] is separated into columns for readability Fixes and improvements: * HIDE_VARIABLES is moved from helper to controller to avoid warning "already initialized constant - HIDE_VARIABLES" repeated on reload. *
    is renamed to "log_#" with number for future extention. *
    are inserted in the
    2007-01-15 Toshiaki Katayama * lib/bio/db.rb lines_fetch method (internally used various bio/db/*.rb modules) is rewrited to concatenate indented sub field. * lib/bio/db/kegg/compound.rb Bio::KEGG::COMPOUND#comment method which returns contents of the COMMENT line is added * lib/bio/db/kegg/enzyme.rb Bio::KEGG::ENZYME#entry_id is changed to return EC number only. Previous version of entry_id method is renamed to entry method which returns a "EC x.x.x.x Enzyme" style string. Bio::KEGG::ENZYME#obsolete? method is added which returns boolean value (true or false) according to the ENTRY line contains a string 'Obsolete' or not. Bio::KEGG::ENZYME#all_reac, iubmb_reactions, kegg_reactions methods are added to support newly added ALL_REAC field. Bio::KEGG::ENZYME#inhibitors and orthologs methods are added. Bio::KEGG::ENZYME#substrates, products, inhibitors, cofactors, pathways, orthologs, diseases, motifs methods are rewrited to utilizes new lines_fetch method in db.rb to process continuous sub field. * lib/bio/db/kegg/genome.rb Bio::KEGG::GENOME#scaffolds, gc, genomemap methods are obsoleted. Bio::KEGG::GENOME#distance, data_source, original_db methods are added. 2006-12-24 Toshiaki Katayama * bin/bioruby, lib/bio/shell/, lib/bio/shell/rails/ (lib/bio/shell/rails/vendor/plugins/generators/) Web functionallity of the BioRuby shell is completely rewrited to utilize generator of the Ruby on Rails. This means we don't need to have a copy of the rails installation in our code base any more. The shell now run in threads so user does not need to run 2 processes as before (drb and webrick). Most importantly, the shell is extended to have textarea to input any code and the evaluated result is returned with AJAX having various neat visual effects. * lib/bio.rb Extended to have Bio.command where command can be any BioRuby shell methods. ex. puts Bio.getseq("atgc" * 10).randomize.translate * lib/bio/shell/plugin/entry.rb, seq.rb seq, ent, obj commands are renamed to getseq, getent, getobj respectively. This getseq is also changed to return Bio::Sequence with @moltype = Bio::Sequence::NA object instead of Bio::Sequence::NA object. * lib/bio/db/kegg/kgml.rb Some method names are changed to avoid confusion: * entry_type is renamed to category () * map is renamed to pathway () 2006-12-19 Christian Zmasek * lib/bio/db/nexus.rb Bio::Nexus is newly developed during the Phyloinformatics hackathon. 2006-12-16 Toshiaki Katayama * lib/bio/io/sql.rb Updated to follow recent BioSQL schema contributed by Raoul Jean Pierre Bonnal. 2006-12-15 Mitsuteru Nakao * lib/bio/appl/iprscan/report.rb Bio::Iprscan::Report for InterProScan output is newly added. 2006-12-15 Naohisa Goto * lib/bio/appl/mafft/report.rb Bio::MAFFT::Report#initialize is changed to get a string of multi-fasta formmatted text instead of Array. 2006-12-14 Naohisa Goto * lib/bio/appl/phylip/alignment.rb Phylip format multiple sequence alignment parser class Bio::Phylip::PhylipFormat is newly added. * lib/bio/appl/phylip/distance_matrix.rb Bio::Phylip::DistanceMatrix, a parser for phylip distance matrix (generated by dnadist/protdist/restdist programs) is newly added. * lib/bio/appl/gcg/msf.rb, lib/bio/appl/gcg/seq.rb Bio::GCG::Msf in lib/bio/appl/gcg/msf.rb for GCG MSF multiple sequence alignment format parser, and Bio::GCG::Seq in lib/bio/appl/gcg/seq.rb for GCG sequence format parser are newly added. * lib/bio/alignment.rb Output of Phylip interleaved/non-interleaved format (.phy), Molphy alignment format (.mol), and GCG MSF format (.msf) are supported. Bio::Alignment::ClustalWFormatter is removed and methods in the module are renamed and moved to Bio::Alignment::Output. * lib/bio/appl/clustalw.rb, lib/bio/appl/mafft.rb, lib/bio/appl/sim4.rb Changed to use Bio::Command instead of Open3.popen3. 2006-12-13 Naohisa Goto * lib/bio/tree.rb, lib/bio/db/newick.rb Bio::PhylogeneticTree is renamed to Bio::Tree, and lib/bio/phylogenetictree.rb is renamed to lib/bio/tree.rb. NHX (New Hampshire eXtended) parser/writer support are added. 2006-12-13 Toshiaki Katayama * doc/Desing.rd.ja, doc/TODO.rd.ja, doc/BioRuby.rd.ja are obsoletd. 2006-10-05 Naohisa Goto * lib/bio/db/newick.rb Bio::Newick for Newick standard phylogenetic tree parser is newly added (contributed by Daniel Amelang). * lib/bio/phylogenetictree.rb Bio::PhylogeneticTree for phylogenetic tree data structure is newly added. 2006-09-19 Toshiaki Katayama * lib/bio/io/soapwsdl.rb * lib/bio/io/ebisoap.rb * lib/bio/io/ncbisoap.rb Newly added web service modules. * lib/bio/db/kegg/kgml.rb Accessor for the attribute is added. * lib/bio/shell/plugin/codon.rb Support for Pyrrolysine and Selenocysteine are added in the BioRuby shell. * lib/bio/sshell/plugin/seq.rb sixtrans, skip, step methods are added in the BioRuby shell. bioruby> seqtrans(seq) bioruby> seq.step(window_size) {|subseq| # do something on subseq } bioruby> seq.skip(window_sizep, step_size) {|subseq| # do something on subseq } 2006-07-26 Toshiaki Katayama * lib/bio/data/aa.rb Amino acids J (Xle: I/L), O (Pyl: pyrrolysine) and X (unknown) are added (now we have consumed 26 alphabets!). * lib/bio/io/fastacmd.rb Fixed that new version of fastacmd (in BLAST package) changed the option from '-D T' to '-D 1', contributed by the author of this module Shuji Shigenobu. * lib/bio/shell/plugin/psort.rb Newly added BioRuby shell plugin for PSORT * lib/bio/shell/plugin/blast.rb Newly added BioRuby shell plugin for BLAST search against KEGG GENES * lib/bio/db/prosite.rb PROSITE#re instance method is added to translate PATTERN of the entry to Regexp using PROSITE.pa2re class method. * lib/bio/db/kegg/genes.rb Bio::KEGG::GENES#keggclass method is renamed to pathway Bio::KEGG::GENES#splinks method is removed Bio::KEGG::GENES#motifs method is added these reflect changes made in the original KEGG GENES database. Bio::KEGG::GENES#locations method is added to return Bio::Locations Bio::KEGG::GENES#codon_usage is renamed cu_list (returns as Array) Bio::KEGG::GENES#cu is renamed to codon_usage (returns as Hash) Bio::KEGG::GENES#aalen, nalen methods are changed to return the number written in the entry (use seq.length to obtain calculated number as before). * lib/bio/db/kegg/kgml.rb Names of some accessors have been changed (including bug fixes) and instance variable @dom is obsoleted. Here's a list of incompatible attribute names with KGML tags by this change: :id -> :entry_id :type -> :entry_type names() :name -> :label :type -> :shape :entry1 -> :node1 :entry2 -> :node2 :type -> :rel edge() :name -> :entry_id :type -> :direction * lib/bio/io/das.rb Bug fixed that the value of segment.stop was overwritten by segment.orientation. 2006-07-14 Naohisa Goto * lib/bio/command.rb Bio::Command::Tools and Bio::Command::NetTools are combined and re-constructed into a new Bio::Command module. lib/bio/appl/blast.rb, lib/bio/appl/fasta.rb, lib/bio/appl/emboss.rb, lib/bio/appl/psort.rb, lib/bio/appl/hmmer.rb, lib/bio/db/fantom.rb, lib/bio/io/fastacmd.rb, lib/bio/io/fetch.rb, lib/bio/io/keggapi.rb, lib/bio/io/pubmed.rb, and lib/bio/io/registry.rb are changed to use the new Bio::Command instead of old Bio::Command or Net::HTTP. 2006-06-29 Naohisa Goto * lib/bio/appl/blat/report.rb Bio::BLAT::Report::Hit#milli_bad, #percent_identity, #protein?, #score, and #psl_version methods/attributes are newly added, and psl files without headers are supported (discussed in bioruby-ja ML). 2006-06-27 Naohisa Goto * lib/bio/sequence/na.rb Bio::Sequence::NA#gc_content, #at_content, #gc_skew, #at_skew are newly added. Bio::Sequence::NA#gc_percent are changed not to raise ZeroDivisionError and returns 0 when given sequence is empty. * lib/bio/db/pdb/pdb.rb Bio::PDB::ATOM#name, #resName, #iCode, #chaarge, #segID, and #element are changed to strip whitespaces when initializing. Bio::PDB::HETATM is also subject to the above changes. (suggested by Mikael Borg) 2006-06-12 Naohisa Goto * lib/bio/io/flatfile.rb Bug fix: Bio::FlatFile.open(klass, filename) didn't work. 2006-05-30 Toshiaki Katayama * lib/bio/io/soapwsdl.rb Generic list_methods method which extracts web service methods defined in the WSDL file is added. 2006-05-02 Mitsuteru Nakao * lib/bio/appl/pts1.rb Bio::PTS1 first commit. 2006-04-30 Naohisa Goto * lib/bio/appl/blast/format0.rb Bug fix: parse error for hits whose database sequence names contain 'Score', and subsequent hits after them would lost (reported by Tomoaki NISHIYAMA). 2006-04-14 Mitsuteru Nakao * lib/bio/io/ensembl.rb Bio::Ensembl first commit. It is a client class for Ensembl Genome Browser. 2006-03-22 Naohisa Goto * lib/bio/io/flatfile.rb Bug fix: Bio::FlatFile raises error for pipes, ARGF, etc. The bug also affects bio/appl/mafft.rb, bio/appl/clustalw.rb, bio/appl/blast.rb, bio/io/fastacmd.rb, and so on. Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled only when Bio::FlatFile#entry_pos_flag is true. 2006-02-27 Toshiaki Katayama * BioRuby 1.0.0 released 2006-02-10 Toshiaki Katayama * BioRuby shell is changed to use session/ directory under the current or specified directory to store the session information instead of ./.bioruby directory. 2006-02-05 Toshiaki Katayama * License to be changed to Ruby's (not yet completed). 2006-02-01 Trevor Wennblom * Bio::RestrictionEnzyme first commit for comments. * See lib/bio/util/restriction_enzyme.rb and test/unit/bio/util/restriction_enzyme 2006-01-28 Toshiaki Katayama * lib/bio/appl/emboss.rb EMBOSS USA format is now accepted via seqret/entret commands and also utilized in the BioRuby shell (lib/bio/shell.rb, plugin/entry.rb, plugin/emboss.rb). * lib/bio/io/brdb.rb is removed - unused Bio::BRDB (BioRuby DB) 2006-01-23 Toshiaki Katayama * lib/bio/sequence.rb Bio::Sequence is refactored to be a container class for any sequence annotations. Functionality is separared into several files under the lib/bio/sequence/ direcotry as common.rb, compat.rb, aa.rb, na.rb, format.rb 2006-01-20 Toshiaki Katayama * BioRuby 0.7.1 is released. 2006-01-12 Toshiaki Katayama * lib/bio/db.ra: fixed a bug of the tag_cut method introduced in 0.7.0 (reported by Alex Gutteridge) 2006-01-04 Naohisa Goto * Bio::PDB is refactored. See doc/Changes-0.7 for more details. 2005-12-19 Toshiaki Katayama * BioRuby 0.7.0 is released. See doc/Changes-0.7.rd file for major and incompatible changes. 2005-12-19 Naohisa Goto * lib/bio/db/pdb.rb, lib/bio/db/pdb/pdb.rb, lib/bio/db/pdb/*.rb * Many changes have been made. * Bio::PDB::FieldDef is removed and Bio::PDB::Record is completely changed. Now, Record is changed from hash to Struct, and method_missing is no longer used. * In the "MODEL" record, model_serial is changed to serial. * In any records, record_type is changed to record_name. * In most records contains real numbers, changed to return float values instead of strings. * Pdb_AChar, Pdb_Atom, Pdb_Character, Pdb_Continuation, Pdb_Date, Pdb_IDcode, Pdb_Integer, Pdb_LString, Pdb_List, Pdb_Real, Pdb_Residue_name, Pdb_SList, Pdb_Specification_list, Pdb_String, Pdb_StringRJ and Pdb_SymOP are moved under Bio::PDB::DataType. * There are more and more changes to be written... * lib/bio/db/pdb/atom.rb * Bio::PDB::Atom is removed. Instead, please use Bio::PDB::Record::ATOM and Bio::PDB::Record::HETATM. 2005-12-02 Naohisa Goto * lib/bio/alignment.rb * Old Bio::Alignment class is renamed to Bio::Alignment::OriginalAlignment. Now, new Bio::Alignment is a module. However, you don't mind so much because most of the class methods previously existed are defined to delegate to the new Bio::Alignment::OriginalAlignment class, for keeping backward compatibility. * New classes and modules are introduced. Please refer RDoc. * each_site and some methods changed to return Bio::Alignment::Site, which inherits Array (previously returned Array). * consensus_iupac now returns only standard bases 'a', 'c', 'g', 't', 'm', 'r', 'w', 's', 'y', 'k', 'v', 'h', 'd', 'b', 'n', or nil (in SiteMethods#consensus_iupac) or '?' (or missing_char, in EnumerableExtension#consensus_iupac). Note that consensus_iupac now does not return u and invalid letters not defined in IUPAC standard even if all bases are equal. * There are more and more changes to be written... 2005-11-05 Toshiaki Katayama * lib/bio/sequence.rb Bio::Sequence.auto(str) method is added which auto detect the molecular type of the string and then returns the Bio::Sequence::NA or Bio::Sequence::AA object. Bio::Sequence#blast and Bio::Sequence#fasta methods are removed. * lib/bio/shell/plugin/codon.rb Newly added plugin to treat codon table. ColoredCodonTable is ported from the codontable.rb 2005-11-01 Toshiaki Katayama * bin/bioruby, lib/bio/shell/ All methods are changed to private methods to avoid adding them in top level binding, which caused many unexpected behaviors, as adviced by Koichi Sasada. The MIDI plugin is now able to select musical scales. 2005-10-23 Toshiaki Katayama * lib/bio/util/color_scheme Newly contributed Bio::ColorScheme * lib/bio/db/kegg/kgml.rb Newly added KEGG KGML parser. 2005-10-05 Toshiaki Katayama * lib/bio/shell/plugin/midi.rb Sequcne to MIDI plugin is contributed by Natsuhiro Ichinose 2005-09-25 Toshiaki Katayama * README.DEV Newly added guideline document for the contributors. * README Updated and added instructions on RubyGems. 2005-09-23 Toshiaki Katayama * bin/bioruby, lib/bio/shell.rb, lib/bio/shell/core.rb, lib/bio/shell/session.rb, lib/bio/shell/plugin/seq.rb, lib/bio/shell/flatfile.rb, lib/bio/shell/obda.rb Newly added BioRuby shell, the command line user interface. Try 'bioruby' command in your terminal. * doc/Changes-0.7.rd Newly added document describing incompatible and important changes between the BioRuby 0.6 and 0.7 versions. * lib/bio/sequence.rb Bio::Sequence.guess, Bio::Sequence#guess methods are added which guess the sequence type by following fomula (default value for the threshold is 0.9). number of ATGC --------------------------------------- > threshold number of other chars - number of N 2005-09-10 Naohisa Goto * lib/bio.rb, lib/bio/appl/blast.rb, lib/bio/appl/blast/format0.rb, lib/bio/appl/blast/report.rb, lib/bio/appl/clustalw.rb, lib/bio/appl/fasta.rb, lib/bio/appl/fasta/format10.rb, lib/bio/appl/hmmer.rb, lib/bio/appl/hmmer/report.rb, lib/bio/appl/mafft.rb, lib/bio/appl/psort.rb, lib/bio/appl/psort/report.rb, lib/bio/appl/sim4.rb, lib/bio/db/genbank/ddbj.rb, lib/bio/io/flatfile/bdb.rb, lib/bio/io/flatfile/index.rb, lib/bio/io/flatfile/indexer.rb fixed autoload problem * lib/bio/appl/blast.rb, lib/bio/appl/blast/report.rb Bio::Blast.reports method was moved from lib/bio/appl/blast/report.rb to lib/bio/appl/blast.rb for autoload. 2005-08-31 Toshiaki Katayama * BioRuby 0.6.4 is released. * doc/KEGG_API.rd Newly added English version of the KEGG API manual. * lib/bio/aa.rb the 'one2name' method introduced in 0.6.3 is fixed and added 'one' and 'three' methods as aliases for 'to_1' and 'to_3' methods. 2005-08-31 Naohisa Goto * removed unused file lib/bio/appl/factory.rb (the functionality had been integrated into lib/bio/command.rb) * doc/Tutorial.rd Newly added an English translation of the Japanese tutorial. 2005-08-16 Naohisa Goto * lib/bio/command.rb Newly added Bio::Command::Tools module. Bio::Command::Tools is a collection of useful methods for execution of external commands. * lib/bio/appl/blast.rb, lib/bio/appl/fasta.rb, lib/bio/appl/hmmer.rb, lib/bio/io/fastacmd.rb For security reason, shell special characters are escaped. * lib/bio/appl/blast.rb, lib/bio/appl/fasta.rb, lib/bio/appl/hmmer.rb Options are stored with an array (@options). #options and #opions= methods are added. * lib/bio/appl/blast.rb, lib/bio/appl/fasta.rb Bio::Blast.remote and Bio::Fasta.remote is fixed to work with the recent change of the GenomeNet. 2005-08-11 Toshiaki Katayama * Sequence#to_re method to have compatibility with 0.6.2 for RNA * Fixed Bio::Fastacmd#fetch to work * Bio::Fastacmd and Bio::Bl2seq classes (introduced in 0.6.3) are renamed to Bio::Blast::Fastacmd, Bio::Blast::Bl2seq respectively. 2005-08-09 Toshiaki Katayama * BioRuby 0.6.3 is released. This version would be the final release to support Ruby 1.6 series (as long as no serious bug is found:). * lib/bio/util/sirna.rb: Newly added method for desing of siRNA, contributed by Itoshi Nikaido. The lib/bio/util/ directory if reserved for bioinfomatics algorithms implemented by pure Ruby. * lib/bio/io/fastacmd.rb: Newly added wrapper for NCBI fastacmd program, contributed by Shinji Shigenobu. * lib/bio/appl/hmmer/report.rb: Bug fixed by Masashi Fujita when the position of sequence rarely becomes '-' instead of digits. 2005-08-08 Mitsuteru Nakao * lib/bio/db/embl/sptr.rb: Added Bio::SPTR#protein_name and Bio::SPTR#synoyms methods. contributed by Luca Pireddu. Changed Bio::SPTR#gn, Bio::SPTR#gene_name and Bio::SPTR#gene_names methods. contributed by Luca Pireddu. 2005-08-08 Naohisa Goto * lib/bio/appl/bl2seq/report.rb: Newly added bl2seq (BLAST 2 sequences) output parser. * lib/bio/appl/blast/format0.rb: Added `self.class::` before F0dbstat.new for bl2seq/report.rb 2005-08-07 Toshiaki Katayama * lib/bio/sequence.rb, lib/bio/data/na.rb, lib/bio/data/aa.rb: Bio::NucleicAcid, Bio::AminoAcid classes are refactored to have Data module, and this module is included and extended to make all methods as both of instance methods and class methods. Bio::Sequence::NA and AA classes are rewrited (molecular_weight, to_re methods) to use Bio::NucleicAcid. Bio::Sequence::NA#molecular_weight method is fixed to subtract two hydrogens per each base. * lib/bio/db/medline.rb: publication_type (pt) method is added. 2005-08-07 Naohisa Goto * lib/bio/db/genbank/common.rb: Avoid NoMethodError (private method `chomp` called for nil:NilClass) when parsing features of ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/ Salmonella_typhimurium_LT2/AE006468.gbk 2005-07-11 Toshiaki Katayama * bin/br_pmfetch.rb: Added sort by page option (--sort page) * lib/io/higet.rb: Newly added Bio::HGC::HiGet class for HiGet SOAP service. 2005-06-28 Toshiaki Katayama * gemspec.rb: newly added RubyGems spec file. 2005-06-21 Naohisa Goto * lib/bio/appl/blast/report.rb: Newly added support for reading BLAST -m 7 result files through Bio::FlatFile by adding DELIMITER = "\n" to Bio::Blast::Report class. (Note that tab-delimited format (-m 8 and -m 9) are not yet supported by Bio::FlatFile) * lib/bio/io/flatfile.rb: Added file format autodetection of BLAST XML format. 2005-06-20 Naohisa Goto * lib/bio/appl/blast/format0.rb: added 'to_s' to store original entry 2005-04-04 Mitsuteru Nakao * lib/bio/db/go.rb: Newly added Bio::GO::External2go class for parsing external2go file. 2005-03-10 Naohisa Goto * lib/bio/io/flatfile.rb: Added file format autodetection of Spidey (Bio::Spidey::Report). 2005-03-10 Naohisa Goto * lib/bio/io/flatfile.rb: Added file format autodetection for Bio::KEGG::KO, Bio::KEGG::GLYCAN, Bio::KEGG::REACTION, Bio::Blat::Report and Bio::Sim4::Report. In order to distinguish Bio::KEGG::REACTION and Bio::KEGG::COMPOUND, autodetection regexp. of Bio::KEGG::COMPOUND were modified. 2005-02-09 KATAYAMA Toshiaki * lib/bio/db/kegg/genes.rb: Added cu method which returns codon usage in Hash for the convenience (codon_usage method returns in Array or Fixnum). 2004-12-13 KATAYAMA Toshiaki * BioRuby 0.6.2 released. * test/all_tests.rb: Unit tests for some classes are newly incorporated by Moses Hohman. You can try it by 'ruby install.rb test' * lib/bio/appl/spidey/report.rb: Newly added Spidey result parser class. * lib/bio/appl/blat/report.rb: Newly added BLAT result parser class. * fixes and improvements: * lib/bio/appl/blast/blast/format0.rb * minor fix for the Blast default format parser * lib/bio/alignment.rb * Alignment class * lib/bio/db/prosite.rb * bug reported by Rolv Seehuus is fixed * some methods are added 2004-10-25 KATAYAMA Toshiaki * lib/bio/db/{compound.rb,reaction.rb,glycan.rb}: Newly added parser for KEGG REACTION and KEGG GLYCAN database entries, fix for KEGG COMPOUND parser to support the new format. 2004-10-09 GOTO Naohisa * lib/bio/appl/sim4.rb Newly added sim4 wrapper class. This is test version, specs would be changed frequently. * lib/bio/appl/sim4/report.rb Newly added sim4 result parser class. 2004-08-25 KATAYAMA Toshiaki * BioRuby 0.6.1 released. * fix for the packaging miss of 0.6.0 * bin/*.rb are renamed to bin/br_*.rb (similar to the BioPerl's convention: bp_*.pl) 2004-08-24 KATAYAMA Toshiaki * BioRuby 0.6.0 released. * many fixes for Ruby 1.8 * updated for genome.ad.jp -> genome.jp transition * lib/bio/db/pdb.rb Newly added parser for PDB contributed by Alex Gutteridge (EBI). * lib/bio/data/codontable.rb Bio::CodonTable is rewrited to be a class instead of static variable. Now it can hold table definition, start codons, stop codons and added methods to detect start/stop codons and reverse translation. Also includes sample code to show codon table in ANSI colored ascii art, have fun. * lib/bio/sequence.rb Bio::Sequence::NA#translate is rewrited to accept an user defined codon table as a Bio::CodonTable object and any character can be specified for the unknown codon. This method runs about 30% faster than ever before. Bio::Sequence::AA#to_re method is added for the symmetry. Bio::Seq will be changed to hold generic rich sequence features. This means Bio::Seq is no longer an alias of Bio::Sequence but is a sequence object model, something like contents of a GenBank entry, common in BioPerl, BioJava etc. * lib/bio/io/soapwsdl.rb Newly added common interface for SOAP/WSDL in BioRuby used by keggapi.rb, ddbjxml.rb. * lib/bio/io/keggapi.rb Completely rewrited to support KEGG API v3.0 * lib/bio/io/esoap.rb Newly added client library for Entrez Utilities SOAP interface. * lib/bio/db/genbank, lib/bio/db/embl Refactored to use common.rb as a common module. * bin/pmfetch.rb Newly added command to search PubMed. * bin/biofetch.rb, flatfile.rb, biogetseq.rb Renamed to have .rb suffix. * sample/biofetch.rb Rewrited to use KEGG API instead of DBGET 2003-10-13 KATAYAMA Toshiaki * BioRuby 0.5.3 released. Fixed bugs in Blast XML parsers: xmlparser.rb is fixed not to omit the string after ' and " in sequence definitions, rexml.rb is fixed not to raise NoMethodError as "undefined method `each_element_with_text' for nil:NilClass". 2003-10-07 GOTO Naohisa * lib/bio/db/nbrf.rb Newly added NBRF/PIR flatfile sequence format class. 2003-09-30 GOTO Naohisa * lib/bio/db/pdb.rb Newly added PDB database flatfile format class. This is pre-alpha version, specs shall be changed frequently. 2003-08-22 KATAYAMA Toshiaki * BioRuby 0.5.2 released. Fixed to be loaded in Ruby 1.8.0 without warnings. * doc/KEGG_API.rd.ja Newly added a Japanese document on the KEGG API. 2003-08-12 GOTO Naohisa * lib/bio/appl/blast/format0.rb Newly added NCBI BLAST default (-m 0) output parser, which may be 5-10x faster than BioPerl's parser. This is alpha version, specs may be frequently changed. PHI-BLAST support is still incomplete. Ruby 1.8 recommended. In ruby 1.6, you need strscan. * lib/bio/appl/blast/wublast.rb Newly added WU-BLAST default output parser. This is alpha version, specs may be frequently changed. Support for parameters and statistics are still incomplete. Ruby 1.8 recommended. In ruby 1.6, you need strscan. 2003-07-25 GOTO Naohisa * lib/bio/alignment.rb: Newly added multiple sequence alignment class. * lib/bio/appl/alignfactory.rb: Newly added template class for multiple alignment software. * lib/bio/appl/clustalw.rb: Newly added CLUSTAL W wrapper. * lib/bio/appl/clustalw/report.rb: Newly added CLUSTAL W result data (*.aln file) parser. * lib/bio/appl/mafft.rb, lib/bio/appl/mafft/report.rb: Newly added MAFFT wrapper and report parser. (MAFFT is a multiple sequence alignment program based on FFT.) 2003-07-16 KATAYAMA Toshiaki * BioRuby version 0.5.1 released. * lib/bio/sequence.rb: some methods (using 'rna?' internally) were temporally unusable by the changes in 0.5.0 is fixed. * lib/bio/io/flatfile.rb: autodetection failure of the fasta entry without sequence is fixed. FlatFile.auto method is added. * lib/bio/db.rb: sugtag2array fixed. DB.open now accepts IO/ARGF. * lib/bio/db/embl.rb: references method is added. 2003-06-25 KATAYAMA Toshiaki * BioRuby version 0.5.0 released. * lib/bio/appl/blast/report.rb: Refactored from xmlparser.rb, rexml.rb, and format8.rb files. Formats are auto detected and parsers are automatically selected by checking whether XMLParser or REXML are installed. You can call simply as Bio::Blast::Report.new(blastoutput) or you can choose parsers/format explicitly by Bio::Blast::Report.xmlparser(format7blastoutput) Bio::Blast::Report.rexml(fomat7blastoutput) Bio::Blast::Report.tab(format8blastoutput) You can also use newly added class method reports for multiple xml blast output. Bio::Blast.reports(output) # output can be IO or String * lib/bio/appl/fasta/report.rb: Refactored from format10.rb, format6.rb and sample/* files. * lib/bio/appl/hmmer/report.rb: Bug fix and clean up. * bin/biogetseq: Newly added OBDA (BioRegistry) entry retrieval command. * etc/bioinformatics/seqdatabase.ini, lib/bio/io/registry.rb: Updated for new OBDA spec (Singapore version). Including config file versioning and changes in tag names, support for OBDA_SEARCH_PATH environmental variable. * lib/bio/io/keggapi.rb: Newly added KEGG API client library. * lib/bio/io/ddbjxml.rb: Newly added DDBJ XML client library (test needed). * lib/bio/io/das.rb: Newly added BioDAS client library. * lib/bio/db/gff.rb: Newly added GFF format parser/store library. * lib/bio/appl/tmhmm/report.rb: Newly added TMHMM report parser. * lib/bio/appl/targetp/report.rb: Newly added TargetP report parser. * lib/bio/appl/sosui/report.rb: Newly added SOSUI report parser. * lib/bio/appl/psort/report.rb: Newly added PSORT report parser. , * lib/bio/appl/genscan/report.rb: Newly added GENSCAN report parser. * lib/bio/db/prosite.rb: bug fix in ps2re method. * lib/bio/db/fantom.rb: Newly added FANTOM database parser (XML). * lib/bio/db/go.rb: Newly added GO parser. * lib/bio/feature.rb: 'each' method now accepts an argument to select specific feature. * lib/bio/db/fasta.rb: definition=, data= to change comment line. * lib/bio/db/genbank.rb: References and features now accept a block. 'acc_version' method is added to return the Accsession.Version string. 'accession' method now returns Accession part of the acc_version. 'version' method now returns Version part of the acc_version as an integer. * lib/bio/db/keggtab.rb: Rewrited for bug fix and clean up (note: some methods renamed!) * gsub('abrev', 'abbrev') in method names * db_path_by_keggorg is changed to db_path_by_abbrev * @bio_root is changed to @bioroot (ENV['BIOROOT'] overrides) * Bio::KEGG::DBname is changed to Bio::KEGG::Keggtab::DB * @database is added (a hash with its key db_abbreb) * database, name, path methods added with its argument db_abbreb * lib/bio/io/flatfile.rb: Enumerable mix-in is included. * lib/bio/io/flatfile/indexer.rb: Indexing of the FASTA format file is now supported with various type of definition line. * bin/dbget: Removed (moved under sample directory because the port of the dbget server is now closed). * install.rb: Changed to use setup 3.1.4 to avoid installing CVS/ directory. * sample/goslim.rb: Added a sample to generate histogram from GO slim. * sample/tdiary.rb: Added for tDiary users. have fun. :) 2003-01-28 KATAYAMA Toshiaki * BioRuby version 0.4.0 released. * bin/bioflat: * newly added for the BioFlat indexing * lib/bio/io/flatfile.rb, flatfile/{indexer.rb,index.rb,bdb.rb}: * flatfile indexing is supported by N. Goto * lib/bio/db/genbank.rb: changed to contain common methods only * lib/bio/db/genbank/genbank.rb * lib/bio/db/genbank/genpept.rb * lib/bio/db/genbank/refseq.rb * lib/bio/db/genbank/ddbj.rb * lib/bio/db/embl.rb: changed to contain common methods only * lib/bio/db/embl/embl.rb * lib/bio/db/embl/sptr.rb * lib/bio/db/embl/swissprot.rb * lib/bio/db/embl/trembl.rb * lib/bio/appl/emboss.rb: * added - just a generic wrapper, no specific parsers yet. * lib/bio/appl/hmmer.rb: * added - execution wrapper * lib/bio/appl/hmmer/report.rb: * added - parsers for hmmsearch, hmmpfam contributed by H. Suga * lib/bio/db.rb: open method added for easy use of flatfile. * lib/bio/db/kegg/genes.rb: * fixed bug in codon_usage method in the case of long sequence >999 * eclinks, splinks, pathways, gbposition, chromosome methods added * lib/bio/db/aaindex.rb: * adapted for the new AAindex2 format (release >= 6.0). * lib/bio/db/fasta.rb: entry_id is changed to return first word only * lib/bio/data/na.rb, aa.rb, keggorg.rb: * moved under class NucleicAcid, AminoAcid, KEGG (!) * in the test codes, DBGET is replaced by BioFetch 2002-08-30 Yoshinori K. Okuji * lib/bio/matrix.rb: Removed. * lib/bio/db/aaindex.rb: Require matrix instead of bio/matrix. * lib/bio/db/transfac.rb: Likewise. * lib/bio/pathway.rb: Likewise. (Pathway#dump_matrix): Don't use Matrix#dump. 2002-07-30 KATAYAMA Toshiaki * BioRuby version 0.3.9 released. * lib/bio/location.rb: * Locations#length (size) methods added (contributed by N. Goto) * Locations#relative method added (contributed by N. Goto) * Locations#absolute method is renamed from offset * Locations#offset, offset_aa methods removed * use absolute/relative(n, :aa) for _aa * Locations#[], range methods added * Location#range method added * lib/bio/db/embl.rb: * fix accession method. * lib/bio/db/genpept.rb: * temporally added - in the next release, we will make refactoring. * lib/bio/reference.rb: * in bibtex and bibitem format, "PMIDnum" is changed to "PMID:num". * lib/bio/io/pubmed.rb: * esearch, efetch methods are added. * lib/bio/db/aaindex.rb: * fix serious bug in the index method to support negative values. * lib/bio/db.rb: * fix fetch method to cut tag without fail. * lib/bio/extend.rb: * added first_line_only option for the prefix in fill method. * doc/Tutorial.rd.ja: * added docs on BibTeX etc. 2002-06-26 KATAYAMA Toshiaki * BioRuby version 0.3.8 released. * lib/bio/sequence.rb: * normalize! method added for clean up the object itself. * 'to_seq' method was renamed to 'seq' (!) * to_xxxx should be used when the class of the object changes. * lib/bio/appl/blast/xmparser.rb: * each_iteration, each_hit, each, hits, statistics, message methods are added in Report class. * statistics, message methods are added in Iteration class. * methods compatible with Fasta::Report::Hit are added in Hit class. * lib/bio/appl/blast/rexml.rb: * many APIs were changed to follow the xmlparser.rb's. (!) * lib/bio/appl/{blast.rb,fasta.rb]: * class method parser() is added for loading specified Report class. * etc/bioinformatics/seqdatabase.ini: added for OBDA (!) * sample setup for BioRegistry - Open Bio Sequence Database Access. * lib/bio/extend.rb: added (!) * This module adds some functionarity to the existing classes and not loaded by default. User should require specifically if needed. * lib/bio/util/*: removed and merged into lib/bio/extend.rb (!) * lib/bio/id.rb: removed (!) * lib/bio/db/{embl.rb,sptr.rb,transfac.rb}: added entry_id * lib/bio/data/keggorg.rb: updated * sample/genes2* sample/genome2*: updated * doc/Tutrial.rd.ja: updated 2002-06-19 KATAYAMA Toshiaki * BioRuby version 0.3.7 released. * lib/bio/sequence.rb: Sequence inherits String again (!) * lib/bio/db.rb, db/embl.rb, db/sptr.rb: moved EMBL specific methods 2002-06-18 KATAYAMA Toshiaki * lib/bio/feature.rb: Bio::Feature#[] method added * doc/Tutrial.rd.ja: changed to use Feature class 2002-05-28 KATAYAMA Toshiaki * lib/bio/appl/fasta.rb: parser separated, API renewal (!) * lib/bio/appl/fasta/format10.rb: moved from fasta.rb * lib/bio/appl/blast.rb: parser separated, API renewal (!) * lib/bio/appl/blast/format8.rb: newly added * lib/bio/appl/blast/rexml.rb: newly added * lib/bio/appl/blast/xmlparser.rb: moved from blast.rb 2002-05-16 KATAYAMA Toshiaki * lib/bio/sequence.rb: added alias 'Seq' for class Sequence * lib/bio/db/fasta.rb: entry method added 2002-05-15 KATAYAMA Toshiaki * lib/bio/io/dbget.rb: bug fixed for pfam (was wrongly skip # lines) * lib/bio/location.rb: offset method added, eased range check 2002-04-26 KATAYAMA Toshiaki * sample/biofetch.rb: new 'info=' option added 2002-04-22 KATAYAMA Toshiaki * lib/bio/appl/fasta.rb: follow changes made at fasta.genome.ad.jp * sample/gb2tab.rb: fixed to use authors.inspect for reference 2002-04-15 KATAYAMA Toshiaki * sample/gb2fasta.rb: changed to follow new genbank.rb spec. * sample/gt2fasta.rb: changed to follow new genbank.rb spec. * sample/gbtab2mysql.rb: added for loading tab delimited data. 2002/04/08 * version 0.3.6 released -k * fixed inconsistency among db.rb, genbank.rb, genome.rb -k * lib/bio/db/genbank.rb : serious bug fixed in locus method -k * lib/bio/feature.rb : method name 'type' has changed -k 2002/03/27 * sample/gb2tab.rb changed to follow new genbank.rb w/ new schema -k 2002/03/26 * sample/gb2tab.rb use ruby instead of perl in the example -o * sample/gb2fasta.rb updated -o 2002/03/11 * version 0.3.5 released -k 2002/03/04 * lib/bio/sequence.rb to_a, to_ary methods renamed to names, codes -k * sample/biofetch.rb added for BioFetch server -k * bin/biofetch added for BioFetch client -k * lib/bio/io/fetch.rb added for BioFetch library -k * lib/bio/io/sql.rb added for BioSQL -k * lib/bio/io/registry.rb added for BioDirectory/Registry -k * lib/bio/feature.rb added for BioSQL, GenBank, EMBL etc. -k * lib/bio/db/genbank.rb rewrited to use Features, References -k * lib/bio/db/{genes,genome}.rb clean up -k * lib/bio/reference.rb added class References -k 2002/02/05 * changed to use 'cgi' instead of 'cgi-lib' -n,k 2002/01/31 * version 0.3.4 released -k * lib/bio/db/genbank.rb -k * fix for multiple 'allele' in the feature key. (thanx Lixin) 2002/01/07 * lib/bio/appl/blast.rb -n * remote blast support etc. 2001/12/18 * lib/bio/id.rb -k * newly created * lib/bio/io/brdb.rb -k * newly created * lib/bio/db.rb -k * template methods are deleted * detailed docuement added * lib/bio/sequence.rb -k * to_fasta, complement, translate fixed (due to the changes made in 0.3.3) * Sequence::NA#initialize doesn't replace 'u' with 't' any longer * gc_percent, complement, translate, to_re, molecular_weight methods are adapted to this change * molecular_weight changed to calculate more precisely * test code added * lib/bio.rb -k * rescue for require 'bio/appl/blast' is deleted 2001/12/15 * lib/bio/sequence.rb -o * Sequence#to_str added 2001/12/15 * version 0.3.3 released -k bio-1.4.3.0001/doc/Changes-0.7.rd0000644000004100000410000003141012200110570015630 0ustar www-datawww-data= Incompatible and important changes since the BioRuby 0.6.4 release A lot of changes have been made to the BioRuby after the version 0.6.4 is released. --- Ruby 1.6 series are no longer supported. We use autoload functionality and many standard (bundled) libraries (such as SOAP, open-uri, pp etc.) only in Ruby >1.8.2. --- BioRuby will be loaded about 30 times faster than before. As we changed to use autoload instead of require, time required to start up the BioRuby library made surprisingly faster. Other changes (including newly introduced BioRuby shell etc.) made in this series will be described in this file. == New features --- BioRuby shell A new command line user interface for the BioRuby is now included. You can invoke the shell by % bioruby --- UnitTest Test::Unit now covers wide range of the BioRuby library. You can run them by % ruby test/runner.rb or % ruby install.rb config % ruby install.rb setup % ruby install.rb test during the installation procedure. --- Documents README, README.DEV, doc/Tutorial.rd, doc/Tutorial.rd.ja etc. are updated or newly added. == Incompatible changes --- Bio::Sequence Bio::Sequence is completely refactored to be a container class for any sequence annotations. Functionalities are separated into several files under the lib/bio/sequence/ directory as * common.rb : module provides common methods for NA and AA sequences * compat.rb : methods for backward compatibility * aa.rb : Bio::Sequence::AA class * na.rb : Bio::Sequence::NA class * format.rb : module for format conversion Bio::Sequence is no longer a sub-class of String, instead, Bio::Sequence::NA and AA inherits String directly. * Bio::Sequence::NA#gc_percent returns integer instead of float * Bio::Sequence::NA#gc (was aliased to gc_percent) is removed Previously, GC% is rounded to one decimal place. However, how many digits should be left when rounding the value is not clear and as the GC% is an rough measure by its nature, we have changed to return integer part only. If you need a precise value, you can calculate it by values from the 'composition' method by your own criteria. Also, the 'gc' method is removed as the method name doesn't represent its value is ambiguous. * Bio::Sequence#blast * Bio::Sequence#fasta These two methods are removed. Use Bio::Blast and Bio::Fasta to execute BLAST and FASTA search. --- Bio::NucleicAcid Bio::NucleicAcid::Names and Bio::NucleicAcid::Weight no longer exists. Bio::NucleicAcid::Names is renamed to Bio::NucleicAcid::Data::NAMES and can be accessed by Bio::NucleicAcid#names, Bio::NucleicAcid.names methods and Bio::NucleicAcid::WEIGHT hash as the Data module is included. Bio::NucleicAcid::Weight is renamed to Bio::NucleicAcid::Data::Weight and can be accessed by Bio::NucleicAcid#weight, Bio::NucleicAcid.weight methods and Bio::NucleicAcid::WEIGHT hash as the Data module is included. --- Bio::AminoAcid Bio::AminoAcid::Names and Bio::AminoAcid::Weight no longer exists. Bio::AminoAcid::Names is renamed to Bio::AminoAcid::Data::NAMES and can be accessed by Bio::AminoAcid#names, Bio::AminoAcid.names methods and Bio::AminoAcid::WEIGHT hash as the Data module is included. Bio::AminoAcid::Weight is renamed to Bio::AminoAcid::Data::Weight and can be accessed by Bio::AminoAcid#weight, Bio::AminoAcid.weight methods and Bio::AminoAcid::WEIGHT hash as the Data module is included. --- Bio::CodonTable Bio::CodonTable::Tables, Bio::CodonTable::Definitions, Bio::CodonTable::Starts, and Bio::CodonTable::Stops are renamed to Bio::CodonTable::TABLES, Bio::CodonTable::DEFINITIONS, Bio::CodonTable::STARTS, and Bio::CodonTable::STOPS respectively. --- Bio::KEGG::Microarrays, Bio::KEGG::Microarray * lib/bio/db/kegg/microarray.rb is renamed to lib/bio/db/kegg/expression.rb * Bio::KEGG::Microarray is renamed to Bio::KEGG::EXPRESSION * Bio::KEGG::Microarrays is removed Bio::KEGG::Microarrays was intended to store a series of microarray expressions as a Hash of Array -like data structure, gene1 => [exp1, exp2, exp3, ... ] gene2 => [exp1, exp2, exp3, ... ] however, it is not utilized well and more suitable container class can be proposed. Until then, this class is removed. # # Following changes are suspended for a while (not yet introduced for now) # # --- Bio::Pathway # # * Bio::Pathway#nodes returns an Array of the node objects instead of # the number of the node objects. # * Bio::Pathway#edges returns an Array of the edge objects instead of # the number of the edge objects. # --- Bio::GenBank Bio::GenBank#gc is removed as the value can be calculated by the Bio::Sequence::NA#gc method and the method is also changed to return integer instead of float. Bio::GenBank#varnacular_name is renamed to Bio::GenBank#vernacular_name as it was a typo. --- Bio::GenBank::Common * lib/bio/db/genbank/common.rb is removed. Renamed to Bio::NCBIDB::Common to make simplify the autoload dependency. --- Bio::EMBL::Common * lib/bio/db/embl/common.rb is removed. Renamed to Bio::EMBLDB::Common to make simplify the autoload dependency. --- Bio::KEGG::GENES * lib/bio/db/kegg/genes.rb linkdb method is changed to return a Hash of an Array of entry IDs instead of a Hash of a entry ID string. --- Bio::TRANSFAC * Bio::TFMATRIX is renamed to Bio::TRANSFAC::MATRIX * Bio::TFSITE is renamed to Bio::TRANSFAC::SITE * Bio::TFFACTOR is renamed to Bio::TRANSFAC::FACTOR * Bio::TFCELL is renamed to Bio::TRANSFAC::CELL * Bio::TFCLASS is renamed to Bio::TRANSFAC::CLASS * Bio::TFGENE is renamed to Bio::TRANSFAC::GENE --- Bio::GFF * Bio::GFF2 is renamed to Bio::GFF::GFF2 * Bio::GFF3 is renamed to Bio::GFF::GFF3 --- Bio::Alignment In 0.7.0: * Old Bio::Alignment class is renamed to Bio::Alignment::OriginalAlignment. Now, new Bio::Alignment is a module. However, you don't mind so much because most of the class methods previously existed are defined to delegate to the new Bio::Alignment::OriginalAlignment class, for keeping backward compatibility. * New classes and modules are introduced. Please refer RDoc. * each_site and some methods changed to return Bio::Alignment::Site, which inherits Array (previously returned Array). * consensus_iupac now returns only standard bases 'a', 'c', 'g', 't', 'm', 'r', 'w', 's', 'y', 'k', 'v', 'h', 'd', 'b', 'n', or nil (in SiteMethods#consensus_iupac) or '?' (or missing_char, in EnumerableExtension#consensus_iupac). Note that consensus_iupac now does not return u and invalid letters not defined in IUPAC standard even if all bases are equal. * There are more and more changes to be written... In 1.1.0: * Bio::Alignment::ClustalWFormatter is removed and methods in this module are renemed and moved to Bio::Alignment::Output. --- Bio::PDB In 0.7.0: * Bio::PDB::Atom is removed. Instead, please use Bio::PDB::Record::ATOM and Bio::PDB::Record::HETATM. * Bio::PDB::FieldDef is removed and Bio::PDB::Record is completely changed. Now, records is changed from hash to Struct objects. (Note that method_missing is no longer used.) * In records, "do_parse" is now automatically called. Users don't need to call do_parse explicitly. (0.7.0 feature: "inspect" does not call do_parse.) (0.7.1 feature: "inspect" calls do_parse.) * In the "MODEL" record, model_serial is changed to serial. * In records, record_type is changed to record_name. * In most records contains real numbers, return values are changed to float instead of string. * Pdb_AChar, Pdb_Atom, Pdb_Character, Pdb_Continuation, Pdb_Date, Pdb_IDcode, Pdb_Integer, Pdb_LString, Pdb_List, Pdb_Real, Pdb_Residue_name, Pdb_SList, Pdb_Specification_list, Pdb_String, Pdb_StringRJ and Pdb_SymOP are moved under Bio::PDB::DataType. * There are more and more changes to be written... In 0.7.1: * Heterogens and HETATMs are completely separeted from residues and ATOMs. HETATMs (Bio::PDB::Record::HETATM objects) are stored in Bio::PDB::Heterogen (which inherits Bio::PDB::Residue). * Waters (resName=="HOH") are treated as normal heterogens. Model#solvents is still available but it will be deprecated. * In Bio::PDB::Chain, adding "LIGAND" to the heterogen id is no longer available. Instead, please use Chain#get_heterogen_by_id method. In addition, Bio::{PDB|PDB::Model::PDB::Chain}#heterogens, #each_heterogen, #find_heterogen, Bio::{PDB|PDB::Model::PDB::Chain::PDB::Heterogen}#hetatms, #each_hetatm, #find_hetatm methods are added. * Bio::PDB#seqres returns Bio::Sequence::NA object if the chain seems to be a nucleic acid sequence. * There are more and more changes to be written... In 1.1.0: * In Bio::PDB::ATOM#name, #resName, #iCode, and #charge, whitespaces are stripped during initializing. * In Bio::PDB::ATOM#segID, whitespaces are right-stripped during initializing. * In Bio::PDB::ATOM#element, whitespaces are left-stripped during initializing. * Bio::PDB::HETATM#name, #resName, #iCode, #charge, #segID, and #element are also subject to the above changes, because Bio::PDB::HETATM inherits Bio::PDB::ATOM. * Bio::PDB::Residue#[] and Bio::PDB::Heterogen#[] are changed to use the name field for selecting atoms, because the element field is not useful for selecting atoms and is not used in many pdb files. * Bio::PDB#record is changed to return an empty array instead of nil for a nonexistent record. --- Bio::FlatFile In 0.7.2: * Bio::FlatFile.open, Bio::FlatFile.auto and Bio::FlatFile.new are changed not to accept the last argument to specify raw mode, e.g. :raw => true, :raw => false, true or false. Instead, please use Bio::FlatFile#raw= method after creating a new object. * Now, first argument of Bio::FlatFile.open, which shall be a database class or nil, can be omitted, and you can do Bio::FlatFile.open(filename, ...). Note that Bio::FlatFile.open(dbclass, filaname, ...) is still available. * Bio::FlatFile#io is obsoleted. Please use Bio::FlatFile#to_io instead. * When reading GenBank or GenPept files, comments at the head of the file before the first "LOCUS" lines are now skipped by default. When reading other file formats, white space characters are skipped. * File format autodetection routine is completely rewritten. If it fails to determine data format which was previously determined, please report us with the data. * Internal structure is now completely changed. Codes depend on the internal structure (which is not recommended) would not work. In 1.1.0: * Bio::FlatFile#entry_start_pos and #entry_ended_pos are enabled only when Bio::FlatFile#entry_pos_flag is true. --- Bio::ClustalW, Bio::MAFFT, Bio::Sim4 In 1.1.0: * Bio::(ClustalW|MAFFT|Sim4)#option is changed to #options. * Bio::ClustalW::errorlog and Bio::(MAFFT|Sim4)#log are removed. No replacements/alternatives are available. --- Bio::ClustalW, Bio::MAFFT In 1.1.0: * Bio::(ClustalW|MAFFT)#query_align, #query_string, #query_by_filename are changed not to get second (and third, ...) arguments. * Bio::(ClustalW|MAFFT)#query, #query_string, #query_by_filename are changed not trying to guess whether given data is nucleotide or protein. * Return value of Bio::(ClustalW|MAFFT)#query with no arguments is changed. If the program exists normally (exit status is 0), returns true. Otherwise, returns false. --- Bio::MAFFT In 1.1.0: * Bio::MAFFT#output is changed to return a string of multi-fasta formmatted text instead of Array of Bio::FastaFormat objects. To get an array of Bio::FastaFormat objects, please use report.data instead. --- Bio::MAFFT::Report In 1.1.0: * Bio::MAFFT::Report#initialize is changed to get a string of multi-fasta formmatted text instead of Array. --- Bio::BLAST::Default::Report, Bio::BLAST::Default::Report::Hit, Bio::BLAST::Default::Report::HSP, Bio::BLAST::WU::Report, Bio::BLAST::WU::Report::Hit, Bio::BLAST::WU::Report::HSP In 1.1.0: * Hit#evalue, HSP#evalue, WU::Hit#pvalue, and WU::HSP#pvalue are changed to return a Float object instead of a String object. * Report#expect, Hit#bit_score, and HSP#bit_score are changed to return a Float object or nil instead of a String object or nil. * Following methods are changed to return an integer value or nil instead of a string or nil: score, percent_identity, percent_positive, percent_gaps. --- BioRuby Shell In 1.1.0: * Shell commands seq, ent, obj are renamed to getseq, getent, getobj, respectively. === Deleted files : lib/bio/db/genbank.rb : lib/bio/db/embl.rb These files are removed as we changed to use autoload. You can safely replace require 'bio/db/genbank' or require 'bio/db/embl' in your code to require 'bio' and this change will also speeds up loading time even if you only need one of the sub classes under the genbank/ or embl/ directory. : lib/bio/extend.rb This file contained some additional methods to String and Array classes. The methods added to Array are already included in Ruby itself since the version 1.8, and the methods added to String are moved to the BioRuby shell (lib/bio/shell/plugin/seq.rb). bio-1.4.3.0001/doc/Tutorial.rd0000644000004100000410000014431512200110570015572 0ustar www-datawww-data# This document is generated with a version of rd2html (part of Hiki) # # rd2 Tutorial.rd # # or with style sheet: # # rd2 -r rd/rd2html-lib.rb --with-css=bioruby.css Tutorial.rd > Tutorial.rd.html # # in Debian: # # rd2 -r rd/rd2html-lib --with-css="../lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > Tutorial.rd.html # # A common problem is tabs in the text file! TABs are not allowed. # # To add tests run Toshiaki's bioruby shell and paste in the query plus # results. # # To run the embedded Ruby doctests you can use the rubydoctest tool, though # it needs a little conversion. Like: # # cat Tutorial.rd | sed -e "s,bioruby>,>>," | sed "s,==>,=>," > Tutorial.rd.tmp # rubydoctest Tutorial.rd.tmp # # alternatively, the Ruby way is # # ruby -p -e '$_.sub!(/bioruby\>/, ">>"); $_.sub!(/\=\=\>/, "=>")' Tutorial.rd > Tutorial.rd.tmp # rubydoctest Tutorial.rd.tmp # # Rubydoctest is useful to verify an example in this document (still) works # # bioruby> $: << '../lib' # make sure rubydoctest finds bioruby/lib =begin #doctest Testing bioruby = BioRuby Tutorial * Copyright (C) 2001-2003 KATAYAMA Toshiaki * Copyright (C) 2005-2011 Pjotr Prins, Naohisa Goto and others This document was last modified: 2011/10/14 Current editor: Michael O'Keefe The latest version resides in the GIT source code repository: ./doc/(()). == Introduction This is a tutorial for using Bioruby. A basic knowledge of Ruby is required. If you want to know more about the programming language, we recommend the latest Ruby book (()) by Dave Thomas and Andy Hunt - the first edition can be read online (()). For BioRuby you need to install Ruby and the BioRuby package on your computer You can check whether Ruby is installed on your computer and what version it has with the % ruby -v command. You should see something like: ruby 1.9.2p290 (2011-07-09 revision 32553) [i686-linux] If you see no such thing you'll have to install Ruby using your installation manager. For more information see the (()) website. With Ruby download and install Bioruby using the links on the (()) website. The recommended installation is via RubyGems: gem install bio See also the Bioruby (()). A lot of BioRuby's documentation exists in the source code and unit tests. To really dive in you will need the latest source code tree. The embedded rdoc documentation can be viewed online at (()). But first lets start! == Trying Bioruby Bioruby comes with its own shell. After unpacking the sources run one of the following commands: bioruby or, from the source tree cd bioruby ruby -I lib bin/bioruby and you should see a prompt bioruby> Now test the following: bioruby> require 'bio' bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa") ==> "atgcatgcaaaa" bioruby> seq.complement ==> "ttttgcatgcat" See the the Bioruby shell section below for more tweaking. If you have trouble running examples also check the section below on trouble shooting. You can also post a question to the mailing list. BioRuby developers usually try to help. == Working with nucleic / amino acid sequences (Bio::Sequence class) The Bio::Sequence class allows the usual sequence transformations and translations. In the example below the DNA sequence "atgcatgcaaaa" is converted into the complemental strand and spliced into a subsequence; next, the nucleic acid composition is calculated and the sequence is translated into the amino acid sequence, the molecular weight calculated, and so on. When translating into amino acid sequences, the frame can be specified and optionally the codon table selected (as defined in codontable.rb). bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa") ==> "atgcatgcaaaa" # complemental sequence (Bio::Sequence::NA object) bioruby> seq.complement ==> "ttttgcatgcat" bioruby> seq.subseq(3,8) # gets subsequence of positions 3 to 8 (starting from 1) ==> "gcatgc" bioruby> seq.gc_percent ==> 33 bioruby> seq.composition ==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2} bioruby> seq.translate ==> "MHAK" bioruby> seq.translate(2) # translate from frame 2 ==> "CMQ" bioruby> seq.translate(1,11) # codon table 11 ==> "MHAK" bioruby> seq.translate.codes ==> ["Met", "His", "Ala", "Lys"] bioruby> seq.translate.names ==> ["methionine", "histidine", "alanine", "lysine"] bioruby> seq.translate.composition ==> {"K"=>1, "A"=>1, "M"=>1, "H"=>1} bioruby> seq.translate.molecular_weight ==> 485.605 bioruby> seq.complement.translate ==> "FCMH" get a random sequence with the same NA count: bioruby> counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')} ==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2} bioruby!> randomseq = Bio::Sequence::NA.randomize(counts) ==!> "aaacatgaagtc" bioruby!> print counts a6c2g2t2 bioruby!> p counts {"a"=>6, "c"=>2, "g"=>2, "t"=>2} The p, print and puts methods are standard Ruby ways of outputting to the screen. If you want to know more about standard Ruby commands you can use the 'ri' command on the command line (or the help command in Windows). For example % ri puts % ri p % ri File.open Nucleic acid sequence are members of the Bio::Sequence::NA class, and amino acid sequence are members of the Bio::Sequence::AA class. Shared methods are in the parent Bio::Sequence class. As Bio::Sequence inherits Ruby's String class, you can use String class methods. For example, to get a subsequence, you can not only use subseq(from, to) but also String#[]. Please take note that the Ruby's string's are base 0 - i.e. the first letter has index 0, for example: bioruby> s = 'abc' ==> "abc" bioruby> s[0].chr ==> "a" bioruby> s[0..1] ==> "ab" So when using String methods, you should subtract 1 from positions conventionally used in biology. (subseq method will throw an exception if you specify positions smaller than or equal to 0 for either one of the "from" or "to".) The window_search(window_size, step_size) method shows a typical Ruby way of writing concise and clear code using 'closures'. Each sliding window creates a subsequence which is supplied to the enclosed block through a variable named +s+. * Show average percentage of GC content for 20 bases (stepping the default one base at a time): bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa") ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa" bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent } bioruby> a ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30] Since the class of each subsequence is the same as original sequence (Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can use all methods on the subsequence. For example, * Shows translation results for 15 bases shifting a codon at a time bioruby> a = [] bioruby> seq.window_search(15, 3) { | s | a.push s.translate } bioruby> a ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"] Finally, the window_search method returns the last leftover subsequence. This allows for example * Divide a genome sequence into sections of 10000bp and output FASTA formatted sequences (line width 60 chars). The 1000bp at the start and end of each subsequence overlapped. At the 3' end of the sequence the leftover is also added: i = 1 textwidth=60 remainder = seq.window_search(10000, 9000) do |s| puts s.to_fasta("segment #{i}", textwidth) i += 1 end if remainder puts remainder.to_fasta("segment #{i}", textwidth) end If you don't want the overlapping window, set window size and stepping size to equal values. Other examples * Count the codon usage bioruby> codon_usage = Hash.new(0) bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 } bioruby> codon_usage ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1} * Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid) bioruby> a = [] bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight } bioruby> a ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262] In most cases, sequences are read from files or retrieved from databases. For example: require 'bio' input_seq = ARGF.read # reads all files in arguments my_naseq = Bio::Sequence::NA.new(input_seq) my_aaseq = my_naseq.translate puts my_aaseq Save the program above as na2aa.rb. Prepare a nucleic acid sequence described below and save it as my_naseq.txt: gtggcgatctttccgaaagcgatgactggagcgaagaaccaaagcagtgacatttgtctg atgccgcacgtaggcctgataagacgcggacagcgtcgcatcaggcatcttgtgcaaatg tcggatgcggcgtga na2aa.rb translates a nucleic acid sequence to a protein sequence. For example, translates my_naseq.txt: % ruby na2aa.rb my_naseq.txt or use a pipe! % cat my_naseq.txt|ruby na2aa.rb Outputs VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA* You can also write this, a bit fancifully, as a one-liner script. % ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt In the next section we will retrieve data from databases instead of using raw sequence files. One generic example of the above can be found in ./sample/na2aa.rb. == Parsing GenBank data (Bio::GenBank class) We assume that you already have some GenBank data files. (If you don't, download some .seq files from ftp://ftp.ncbi.nih.gov/genbank/) As an example we will fetch the ID, definition and sequence of each entry from the GenBank format and convert it to FASTA. This is also an example script in the BioRuby distribution. A first attempt could be to use the Bio::GenBank class for reading in the data: #!/usr/bin/env ruby require 'bio' # Read all lines from STDIN split by the GenBank delimiter while entry = gets(Bio::GenBank::DELIMITER) gb = Bio::GenBank.new(entry) # creates GenBank object print ">#{gb.accession} " # Accession puts gb.definition # Definition puts gb.naseq # Nucleic acid sequence # (Bio::Sequence::NA object) end But that has the disadvantage the code is tied to GenBank input. A more generic method is to use Bio::FlatFile which allows you to use different input formats: #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::GenBank, ARGF) ff.each_entry do |gb| definition = "#{gb.accession} #{gb.definition}" puts gb.naseq.to_fasta(definition, 60) end For example, in turn, reading FASTA format files: #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF) ff.each_entry do |f| puts "definition : " + f.definition puts "nalen : " + f.nalen.to_s puts "naseq : " + f.naseq end In the above two scripts, the first arguments of Bio::FlatFile.new are database classes of BioRuby. This is expanded on in a later section. Again another option is to use the Bio::DB.open class: #!/usr/bin/env ruby require 'bio' ff = Bio::GenBank.open("gbvrl1.seq") ff.each_entry do |gb| definition = "#{gb.accession} #{gb.definition}" puts gb.naseq.to_fasta(definition, 60) end Next, we are going to parse the GenBank 'features', which is normally very complicated: #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::GenBank, ARGF) # iterates over each GenBank entry ff.each_entry do |gb| # shows accession and organism puts "# #{gb.accession} - #{gb.organism}" # iterates over each element in 'features' gb.features.each do |feature| position = feature.position hash = feature.assoc # put into Hash # skips the entry if "/translation=" is not found next unless hash['translation'] # collects gene name and so on and joins it into a string gene_info = [ hash['gene'], hash['product'], hash['note'], hash['function'] ].compact.join(', ') # shows nucleic acid sequence puts ">NA splicing('#{position}') : #{gene_info}" puts gb.naseq.splicing(position) # shows amino acid sequence translated from nucleic acid sequence puts ">AA translated by splicing('#{position}').translate" puts gb.naseq.splicing(position).translate # shows amino acid sequence in the database entry (/translation=) puts ">AA original translation" puts hash['translation'] end end * Note: In this example Feature#assoc method makes a Hash from a feature object. It is useful because you can get data from the hash by using qualifiers as keys. But there is a risk some information is lost when two or more qualifiers are the same. Therefore an Array is returned by Feature#feature. Bio::Sequence#splicing splices subsequences from nucleic acid sequences according to location information used in GenBank, EMBL and DDBJ. When the specified translation table is different from the default (universal), or when the first codon is not "atg" or the protein contains selenocysteine, the two amino acid sequences will differ. The Bio::Sequence#splicing method takes not only DDBJ/EMBL/GenBank feature style location text but also Bio::Locations object. For more information about location format and Bio::Locations class, see bio/location.rb. * Splice according to location string used in a GenBank entry naseq.splicing('join(2035..2050,complement(1775..1818),13..345') * Generate Bio::Locations object and pass the splicing method locs = Bio::Locations.new('join((8298.8300)..10206,1..855)') naseq.splicing(locs) You can also use this splicing method for amino acid sequences (Bio::Sequence::AA objects). * Splicing peptide from a protein (e.g. signal peptide) aaseq.splicing('21..119') === More databases Databases in BioRuby are essentially accessed like that of GenBank with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in the ./lib/bio/db directory of the BioRuby source tree. In many cases the Bio::DatabaseClass acts as a factory pattern and recognises the database type automatically - returning a parsed object. For example using Bio::FlatFile class as described above. The first argument of the Bio::FlatFile.new is database class name in BioRuby (such as Bio::GenBank, Bio::KEGG::GENES and so on). ff = Bio::FlatFile.new(Bio::DatabaseClass, ARGF) Isn't it wonderful that Bio::FlatFile automagically recognizes each database class? #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.auto(ARGF) ff.each_entry do |entry| p entry.entry_id # identifier of the entry p entry.definition # definition of the entry p entry.seq # sequence data of the entry end An example that can take any input, filter using a regular expression and output to a FASTA file can be found in sample/any2fasta.rb. With this technique it is possible to write a Unix type grep/sort pipe for sequence information. One example using scripts in the BIORUBY sample folder: fastagrep.rb '/At|Dm/' database.seq | fastasort.rb greps the database for Arabidopsis and Drosophila entries and sorts the output to FASTA. Other methods to extract specific data from database objects can be different between databases, though some methods are common (see the guidelines for common methods in bio/db.rb). * entry_id --> gets ID of the entry * definition --> gets definition of the entry * reference --> gets references as Bio::Reference object * organism --> gets species * seq, naseq, aaseq --> returns sequence as corresponding sequence object Refer to the documents of each database to find the exact naming of the included methods. In general, BioRuby uses the following conventions: when a method name is plural, the method returns some object as an Array. For example, some classes have a "references" method which returns multiple Bio::Reference objects as an Array. And some classes have a "reference" method which returns a single Bio::Reference object. === Alignments (Bio::Alignment) The Bio::Alignment class in bio/alignment.rb is a container class like Ruby's Hash and Array classes and BioPerl's Bio::SimpleAlign. A very simple example is: bioruby> seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ] bioruby> seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) } # creates alignment object bioruby> a = Bio::Alignment.new(seqs) bioruby> a.consensus ==> "a?gc?" # shows IUPAC consensus p a.consensus_iupac # ==> "ahgcr" # iterates over each seq a.each { |x| p x } # ==> # "atgca" # "aagca" # "acgca" # "acgcg" # iterates over each site a.each_site { |x| p x } # ==> # ["a", "a", "a", "a"] # ["t", "a", "c", "c"] # ["g", "g", "g", "g"] # ["c", "c", "c", "c"] # ["a", "a", "a", "g"] # doing alignment by using CLUSTAL W. # clustalw command must be installed. factory = Bio::ClustalW.new a2 = a.do_align(factory) Read a ClustalW or Muscle 'ALN' alignment file: bioruby> aln = Bio::ClustalW::Report.new(File.read('../test/data/clustalw/example1.aln')) bioruby> aln.header ==> "CLUSTAL 2.0.9 multiple sequence alignment" Fetch a sequence: bioruby> seq = aln.get_sequence(1) bioruby> seq.definition ==> "gi|115023|sp|P10425|" Get a partial sequence: bioruby> seq.to_s[60..120] ==> "LGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHAD" Show the full alignment residue match information for the sequences in the set: bioruby> aln.match_line[60..120] ==> " . **. . .. ::*: . * : : . .: .* * *" Return a Bio::Alignment object: bioruby> aln.alignment.consensus[60..120] ==> "???????????SN?????????????D??????????L??????????????????H?H?D" == Restriction Enzymes (Bio::RE) BioRuby has extensive support for restriction enzymes (REs). It contains a full library of commonly used REs (from REBASE) which can be used to cut single stranded RNA or double stranded DNA into fragments. To list all enzymes: rebase = Bio::RestrictionEnzyme.rebase rebase.each do |enzyme_name, info| p enzyme_name end and to cut a sequence with an enzyme follow up with: res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0}, {:view_ranges => true}) if res.kind_of? Symbol #error err = Err.find_by_code(res.to_s) unless err err = Err.new(:code => res.to_s) end end res.each do |frag| em = EnzymeMatch.new em.p_left = frag.p_left em.p_right = frag.p_right em.c_left = frag.c_left em.c_right = frag.c_right em.err = nil em.enzyme = ar_enz em.sequence = ar_seq p em end == Sequence homology search by using the FASTA program (Bio::Fasta) Let's start with a query.pep file which contains a sequence in FASTA format. In this example we are going to execute a homology search from a remote internet site or on your local machine. Note that you can use the ssearch program instead of fasta when you use it in your local machine. === using FASTA in local machine Install the fasta program on your machine (the command name looks like fasta34. FASTA can be downloaded from ftp://ftp.virginia.edu/pub/fasta/). First, you must prepare your FASTA-formatted database sequence file target.pep and FASTA-formatted query.pep. #!/usr/bin/env ruby require 'bio' # Creates FASTA factory object ("ssearch" instead of # "fasta34" can also work) factory = Bio::Fasta.local('fasta34', ARGV.pop) (EDITOR's NOTE: not consistent pop command) ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF) # Iterates over each entry. the variable "entry" is a # Bio::FastaFormat object: ff.each do |entry| # shows definition line (begins with '>') to the standard error output $stderr.puts "Searching ... " + entry.definition # executes homology search. Returns Bio::Fasta::Report object. report = factory.query(entry) # Iterates over each hit report.each do |hit| # If E-value is smaller than 0.0001 if hit.evalue < 0.0001 # shows identifier of query and hit, E-value, start and # end positions of homologous region print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at " p hit.lap_at end end end We named above script f_search.rb. You can execute it as follows: % ./f_search.rb query.pep target.pep > f_search.out In above script, the variable "factory" is a factory object for executing FASTA many times easily. Instead of using Fasta#query method, Bio::Sequence#fasta method can be used. seq = ">test seq\nYQVLEEIGRGSFGSVRKVIHIPTKKLLVRKDIKYGHMNSKE" seq.fasta(factory) When you want to add options to FASTA commands, you can set the third argument of the Bio::Fasta.local method. For example, the following sets ktup to 1 and gets a list of the top 10 hits: factory = Bio::Fasta.local('fasta34', 'target.pep', '-b 10') factory.ktup = 1 Bio::Fasta#query returns a Bio::Fasta::Report object. We can get almost all information described in FASTA report text with the Report object. For example, getting information for hits: report.each do |hit| puts hit.evalue # E-value puts hit.sw # Smith-Waterman score (*) puts hit.identity # % identity puts hit.overlap # length of overlapping region puts hit.query_id # identifier of query sequence puts hit.query_def # definition(comment line) of query sequence puts hit.query_len # length of query sequence puts hit.query_seq # sequence of homologous region puts hit.target_id # identifier of hit sequence puts hit.target_def # definition(comment line) of hit sequence puts hit.target_len # length of hit sequence puts hit.target_seq # hit of homologous region of hit sequence puts hit.query_start # start position of homologous # region in query sequence puts hit.query_end # end position of homologous region # in query sequence puts hit.target_start # start posiotion of homologous region # in hit(target) sequence puts hit.target_end # end position of homologous region # in hit(target) sequence puts hit.lap_at # array of above four numbers end Most of above methods are common to the Bio::Blast::Report described below. Please refer to the documentation of the Bio::Fasta::Report class for FASTA-specific details. If you need the original output text of FASTA program you can use the "output" method of the factory object after the "query" method. report = factory.query(entry) puts factory.output === using FASTA from a remote internet site * Note: Currently, only GenomeNet (fasta.genome.jp) is supported. check the class documentation for updates. For accessing a remote site the Bio::Fasta.remote method is used instead of Bio::Fasta.local. When using a remote method, the databases available may be limited, but, otherwise, you can do the same things as with a local method. Available databases in GenomeNet: * Protein database * nr-aa, genes, vgenes.pep, swissprot, swissprot-upd, pir, prf, pdbstr * Nucleic acid database * nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, htgs, dbsts, embl-nonst, embnonst-upd, genes-nt, genome, vgenes.nuc Select the databases you require. Next, give the search program from the type of query sequence and database. * When query is an amino acid sequence * When protein database, program is "fasta". * When nucleic database, program is "tfasta". * When query is a nucleic acid sequence * When nucleic database, program is "fasta". * (When protein database, the search would fail.) For example, run: program = 'fasta' database = 'genes' factory = Bio::Fasta.remote(program, database) and try out the same commands as with the local search shown earlier. == Homology search by using BLAST (Bio::Blast class) The BLAST interface is very similar to that of FASTA and both local and remote execution are supported. Basically replace above examples Bio::Fasta with Bio::Blast! For example the BLAST version of f_search.rb is: # create BLAST factory object factory = Bio::Blast.local('blastp', ARGV.pop) For remote execution of BLAST in GenomeNet, Bio::Blast.remote is used. The parameter "program" is different from FASTA - as you can expect: * When query is a amino acid sequence * When protein database, program is "blastp". * When nucleic database, program is "tblastn". * When query is a nucleic acid sequence * When protein database, program is "blastx" * When nucleic database, program is "blastn". * ("tblastx" for six-frame search.) Bio::BLAST uses "-m 7" XML output of BLAST by default when either XMLParser or REXML (both of them are XML parser libraries for Ruby - of the two XMLParser is the fastest) is installed on your computer. In Ruby version 1.8.0 or later, REXML is bundled with Ruby's distribution. When no XML parser library is present, Bio::BLAST uses "-m 8" tabular deliminated format. Available information is limited with the "-m 8" format so installing an XML parser is recommended. Again, the methods in Bio::Fasta::Report and Bio::Blast::Report (and Bio::Fasta::Report::Hit and Bio::Blast::Report::Hit) are similar. There are some additional BLAST methods, for example, bit_score and midline. report.each do |hit| puts hit.bit_score puts hit.query_seq puts hit.midline puts hit.target_seq puts hit.evalue puts hit.identity puts hit.overlap puts hit.query_id puts hit.query_def puts hit.query_len puts hit.target_id puts hit.target_def puts hit.target_len puts hit.query_start puts hit.query_end puts hit.target_start puts hit.target_end puts hit.lap_at end For simplicity and API compatibility, some information such as score is extracted from the first Hsp (High-scoring Segment Pair). Check the documentation for Bio::Blast::Report to see what can be retrieved. For now suffice to say that Bio::Blast::Report has a hierarchical structure mirroring the general BLAST output stream: * In a Bio::Blast::Report object, @iterations is an array of Bio::Blast::Report::Iteration objects. * In a Bio::Blast::Report::Iteration object, @hits is an array of Bio::Blast::Report::Hits objects. * In a Bio::Blast::Report::Hits object, @hsps is an array of Bio::Blast::Report::Hsp objects. See bio/appl/blast.rb and bio/appl/blast/*.rb for more information. === Parsing existing BLAST output files When you already have BLAST output files and you want to parse them, you can directly create Bio::Blast::Report objects without the Bio::Blast factory object. For this purpose use Bio::Blast.reports, which supports the "-m 0" default and "-m 7" XML type output format. * For example: blast_version = nil; result = [] Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report| blast_version = report.version report.iterations.each do |itr| itr.hits.each do |hit| result.push hit.target_id end end end blast_version # ==> "blastp 2.2.18 [Mar-02-2008]" result # ==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"] * another example: require 'bio' Bio::Blast.reports(ARGF) do |report| puts "Hits for " + report.query_def + " against " + report.db report.each do |hit| print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001 end end Save the script as hits_under_0.001.rb and to process BLAST output files *.xml, you can run it with: % ruby hits_under_0.001.rb *.xml Sometimes BLAST XML output may be wrong and can not be parsed. Check whether blast is version 2.2.5 or later. See also blast --help. Bio::Blast loads the full XML file into memory. If this causes a problem you can split the BLAST XML file into smaller chunks using XML-Twig. An example can be found in (()). === Add remote BLAST search sites Note: this section is an advanced topic Here a more advanced application for using BLAST sequence homology search services. BioRuby currently only supports GenomeNet. If you want to add other sites, you must write the following: * the calling CGI (command-line options must be processed for the site). * make sure you get BLAST output text as supported format by BioRuby (e.g. "-m 8", "-m 7" or default("-m 0")). In addition, you must write a private class method in Bio::Blast named "exec_MYSITE" to get query sequence and to pass the result to Bio::Blast::Report.new(or Bio::Blast::Default::Report.new): factory = Bio::Blast.remote(program, db, option, 'MYSITE') When you write above routines, please send them to the BioRuby project, and they may be included in future releases. == Generate a reference list using PubMed (Bio::PubMed) Nowadays using NCBI E-Utils is recommended. Use Bio::PubMed.esearch and Bio::PubMed.efetch. #!/usr/bin/env ruby require 'bio' # NCBI announces that queries without email address will return error # after June 2010. When you modify the script, please enter your email # address instead of the staff's. Bio::NCBI.default_email = 'staff@bioruby.org' keywords = ARGV.join(' ') options = { 'maxdate' => '2003/05/31', 'retmax' => 1000, } entries = Bio::PubMed.esearch(keywords, options) Bio::PubMed.efetch(entries).each do |entry| medline = Bio::MEDLINE.new(entry) reference = medline.reference puts reference.bibtex end The script works same as pmsearch.rb. But, by using NCBI E-Utils, more options are available. For example published dates to search and maximum number of hits to show results can be specified. See the (()) for more details. === More about BibTeX In this section, we explain the simple usage of TeX for the BibTeX format bibliography list collected by above scripts. For example, to save BibTeX format bibliography data to a file named genoinfo.bib. % ./pmfetch.rb 10592173 >> genoinfo.bib % ./pmsearch.rb genome bioinformatics >> genoinfo.bib The BibTeX can be used with Tex or LaTeX to form bibliography information with your journal article. For more information on using BibTex see (()). A quick example: Save this to hoge.tex: \documentclass{jarticle} \begin{document} \bibliographystyle{plain} foo bar KEGG database~\cite{PMID:10592173} baz hoge fuga. \bibliography{genoinfo} \end{document} Then, % latex hoge % bibtex hoge # processes genoinfo.bib % latex hoge # creates bibliography list % latex hoge # inserts correct bibliography reference Now, you get hoge.dvi and hoge.ps - the latter of which can be viewed with any Postscript viewer. === Bio::Reference#bibitem When you don't want to create a bib file, you can use Bio::Reference#bibitem method instead of Bio::Reference#bibtex. In the above pmfetch.rb and pmsearch.rb scripts, change puts reference.bibtex to puts reference.bibitem Output documents should be bundled in \begin{thebibliography} and \end{thebibliography}. Save the following to hoge.tex \documentclass{jarticle} \begin{document} foo bar KEGG database~\cite{PMID:10592173} baz hoge fuga. \begin{thebibliography}{00} \bibitem{PMID:10592173} Kanehisa, M., Goto, S. KEGG: kyoto encyclopedia of genes and genomes., {\em Nucleic Acids Res}, 28(1):27--30, 2000. \end{thebibliography} \end{document} and run % latex hoge # creates bibliography list % latex hoge # inserts corrent bibliography reference = OBDA OBDA (Open Bio Database Access) is a standardized method of sequence database access developed by the Open Bioinformatics Foundation. It was created during the BioHackathon by BioPerl, BioJava, BioPython, BioRuby and other projects' members (2002). * BioRegistry (Directory) * Mechanism to specify how and where to retrieve sequence data for each database. * BioFlat * Flatfile indexing by using binary tree or BDB(Berkeley DB). * BioFetch * Server-client model for getting entry from database via http. * BioSQL * Schemas to store sequence data to relational databases such as MySQL and PostgreSQL, and methods to retrieve entries from the database. This tutorial only gives a quick overview of OBDA. Check out (()) for more extensive details. == BioRegistry BioRegistry allows for locating retrieval methods and database locations through configuration files. The priorities are * The file specified with method's parameter * ~/.bioinformatics/seqdatabase.ini * /etc/bioinformatics/seqdatabase.ini * http://www.open-bio.org/registry/seqdatabase.ini Note that the last locaation refers to www.open-bio.org and is only used when all local configulation files are not available. In the current BioRuby implementation all local configulation files are read. For databases with the same name settings encountered first are used. This means that if you don't like some settings of a database in the system's global configuration file (/etc/bioinformatics/seqdatabase.ini), you can easily override them by writing settings to ~/.bioinformatics/seqdatabase.ini. The syntax of the configuration file is called a stanza format. For example [DatabaseName] protocol=ProtocolName location=ServerName You can write a description like the above entry for every database. The database name is a local label for yourself, so you can name it freely and it can differ from the name of the actual databases. In the actual specification of BioRegistry where there are two or more settings for a database of the same name, it is proposed that connection to the database is tried sequentially with the order written in configuration files. However, this has not (yet) been implemented in BioRuby. In addition, for some protocols, you must set additional options other than locations (e.g. user name for MySQL). In the BioRegistory specification, current available protocols are: * index-flat * index-berkeleydb * biofetch * biosql * bsane-corba * xembl In BioRuby, you can use index-flat, index-berkleydb, biofetch and biosql. Note that the BioRegistry specification sometimes gets updated and BioRuby does not always follow quickly. Here is an example. It creates a Bio::Registry object and reads the configuration files: reg = Bio::Registry.new # connects to the database "genbank" serv = reg.get_database('genbank') # gets entry of the ID entry = serv.get_by_id('AA2CG') The variable "serv" is a server object corresponding to the settings written in the configuration files. The class of the object is one of Bio::SQL, Bio::Fetch, and so on. Note that Bio::Registry#get_database("name") returns nil if no database is found. After that, you can use the get_by_id method and some specific methods. Please refer to the sections below for more information. == BioFlat BioFlat is a mechanism to create index files of flat files and to retrieve these entries fast. There are two index types. index-flat is a simple index performing binary search without using any external libraries of Ruby. index-berkeleydb uses Berkeley DB for indexing - but requires installing bdb on your computer, as well as the BDB Ruby package. To create the index itself, you can use br_bioflat.rb command bundled with BioRuby. % br_bioflat.rb --makeindex database_name [--format data_format] filename... The format can be omitted because BioRuby has autodetection. If that doesn't work, you can try specifying the data format as the name of a BioRuby database class. Search and retrieve data from database: % br_bioflat.rb database_name identifier For example, to create an index of GenBank files gbbct*.seq and get the entry from the database: % br_bioflat.rb --makeindex my_bctdb --format GenBank gbbct*.seq % br_bioflat.rb my_bctdb A16STM262 If you have Berkeley DB on your system and installed the bdb extension module of Ruby (see (()) ), you can create and search indexes with Berkeley DB - a very fast alternative that uses little computer memory. When creating the index, use the "--makeindex-bdb" option instead of "--makeindex". % br_bioflat.rb --makeindex-bdb database_name [--format data_format] filename... == BioFetch Note: this section is an advanced topic BioFetch is a database retrieval mechanism via CGI. CGI Parameters, options and error codes are standardized. Client access via http is possible giving the database name, identifiers and format to retrieve entries. The BioRuby project has a BioFetch server at bioruby.org. It uses GenomeNet's DBGET system as a backend. The source code of the server is in sample/ directory. Currently, there are only two BioFetch servers in the world: bioruby.org and EBI. Here are some methods to retrieve entries from our BioFetch server. (1) Using a web browser http://bioruby.org/cgi-bin/biofetch.rb (2) Using the br_biofetch.rb command % br_biofetch.rb db_name entry_id (3) Directly using Bio::Fetch in a script serv = Bio::Fetch.new(server_url) entry = serv.fetch(db_name, entry_id) (4) Indirectly using Bio::Fetch via BioRegistry in script reg = Bio::Registry.new serv = reg.get_database('genbank') entry = serv.get_by_id('AA2CG') If you want to use (4), you have to include some settings in seqdatabase.ini. For example: [genbank] protocol=biofetch location=http://bioruby.org/cgi-bin/biofetch.rb biodbname=genbank === The combination of BioFetch, Bio::KEGG::GENES and Bio::AAindex1 Bioinformatics is often about gluing things together. Here is an example that gets the bacteriorhodopsin gene (VNG1467G) of the archaea Halobacterium from KEGG GENES database and gets alpha-helix index data (BURA740101) from the AAindex (Amino acid indices and similarity matrices) database, and shows the helix score for each 15-aa length overlapping window. #!/usr/bin/env ruby require 'bio' entry = Bio::Fetch.query('hal', 'VNG1467G') aaseq = Bio::KEGG::GENES.new(entry).aaseq entry = Bio::Fetch.query('aax1', 'BURA740101') helix = Bio::AAindex1.new(entry).index position = 1 win_size = 15 aaseq.window_search(win_size) do |subseq| score = subseq.total(helix) puts [ position, score ].join("\t") position += 1 end The special method Bio::Fetch.query uses the preset BioFetch server at bioruby.org. (The server internally gets data from GenomeNet. Because the KEGG/GENES database and AAindex database are not available from other BioFetch servers, we used the bioruby.org server with Bio::Fetch.query method.) == BioSQL BioSQL is a well known schema to store and retrive biological sequences using a RDBMS like PostgreSQL or MySQL: note that SQLite is not supported. First of all, you must install a database engine or have access to a remote one. Then create the schema and populate with the taxonomy. You can follow the (()) to accomplish these steps. Next step is to install these gems: * ActiveRecord * CompositePrimaryKeys (Rails doesn't handle by default composite primary keys) * The layer to comunicate with you preferred RDBMS (postgresql, mysql, jdbcmysql in case you are running JRuby ) You can find ActiveRecord's models in /bioruby/lib/bio/io/biosql When you have your database up and running, you can connect to it like this: #!/usr/bin/env ruby require 'bio' connection = Bio::SQL.establish_connection({'development'=>{'hostname'=>"YourHostname", 'database'=>"CoolBioSeqDB", 'adapter'=>"jdbcmysql", 'username'=>"YourUser", 'password'=>"YouPassword" } }, 'development') #The first parameter is the hash contaning the description of the configuration; similar to database.yml in Rails applications, you can declare different environment. #The second parameter is the environment to use: 'development', 'test', or 'production'. #To store a sequence into the database you simply need a biosequence object. biosql_database = Bio::SQL::Biodatabase.find(:first) ff = Bio::GenBank.open("gbvrl1.seq") ff.each_entry do |gb| Bio::SQL::Sequence.new(:biosequence=>gb.to_biosequence, :biodatabase=>biosql_database end #You can list all the entries into every database Bio::SQL.list_entries #list databases: Bio::SQL.list_databases #retriving a generic accession bioseq = Bio::SQL.fetch_accession("YouAccession") #If you use biosequence objects, you will find all its method mapped to BioSQL sequences. #But you can also access to the models directly: #get the raw sequence associated with your accession bioseq.entry.biosequence #get the length of your sequence; this is the explicit form of bioseq.length bioseq.entry.biosequence.length #convert the sequence into GenBank format bioseq.to_biosequence.output(:genbank) BioSQL's (()) is not very intuitive for beginners, so spend some time on understanding it. In the end if you know a little bit of Ruby on Rails, everything will go smoothly. You can find information on Annotation (()). ToDo: add exemaples from George. I remember he did some cool post on BioSQL and Rails. = PhyloXML PhyloXML is an XML language for saving, analyzing and exchanging data of annotated phylogenetic trees. PhyloXML's parser in BioRuby is implemented in Bio::PhyloXML::Parser, and its writer in Bio::PhyloXML::Writer. More information can be found at (()). == Requirements In addition to BioRuby, you need the libxml Ruby bindings. To install, execute: % gem install -r libxml-ruby For more information see the (()) == Parsing a file require 'bio' # Create new phyloxml parser phyloxml = Bio::PhyloXML::Parser.open('example.xml') # Print the names of all trees in the file phyloxml.each do |tree| puts tree.name end If there are several trees in the file, you can access the one you wish by specifying its index: tree = phyloxml[3] You can use all Bio::Tree methods on the tree, since PhyloXML::Tree inherits from Bio::Tree. For example, tree.leaves.each do |node| puts node.name end PhyloXML files can hold additional information besides phylogenies at the end of the file. This info can be accessed through the 'other' array of the parser object. phyloxml = Bio::PhyloXML::Parser.open('example.xml') while tree = phyloxml.next_tree # do stuff with trees end puts phyloxml.other == Writing a file # Create new phyloxml writer writer = Bio::PhyloXML::Writer.new('tree.xml') # Write tree to the file tree.xml writer.write(tree1) # Add another tree to the file writer.write(tree2) == Retrieving data Here is an example of how to retrieve the scientific name of the clades included in each tree. require 'bio' phyloxml = Bio::PhyloXML::Parser.open('ncbi_taxonomy_mollusca.xml') phyloxml.each do |tree| tree.each_node do |node| print "Scientific name: ", node.taxonomies[0].scientific_name, "\n" end end == Retrieving 'other' data require 'bio' phyloxml = Bio::PhyloXML::Parser.open('phyloxml_examples.xml') while tree = phyloxml.next_tree #do something with the trees end p phyloxml.other puts "\n" #=> output is an object representation #Print in a readable way puts phyloxml.other[0].to_xml, "\n" #=>: # # # acgtcgcggcccgtggaagtcctctcct # aggtcgcggcctgtggaagtcctctcct # taaatcgc--cccgtgg-agtccc-cct # #Once we know whats there, lets output just sequences phyloxml.other[0].children.each do |node| puts node.value end #=> # #acgtcgcggcccgtggaagtcctctcct #aggtcgcggcctgtggaagtcctctcct #taaatcgc--cccgtgg-agtccc-cct == The BioRuby example programs Some sample programs are stored in ./samples/ directory. For example, the n2aa.rb program (transforms a nucleic acid sequence into an amino acid sequence) can be run using: ./sample/na2aa.rb test/data/fasta/example1.txt == Unit testing and doctests BioRuby comes with an extensive testing framework with over 1300 tests and 2700 assertions. To run the unit tests: cd test ruby runner.rb We have also started with doctest for Ruby. We are porting the examples in this tutorial to doctest - more info upcoming. == Further reading See the BioRuby in anger Wiki. A lot of BioRuby's documentation exists in the source code and unit tests. To really dive in you will need the latest source code tree. The embedded rdoc documentation for the BioRuby source code can be viewed online at (()). == BioRuby Shell The BioRuby shell implementation is located in ./lib/bio/shell. It is very interesting as it uses IRB (the Ruby intepreter) which is a powerful environment described in (()). IRB commands can be typed directly into the shell, e.g. bioruby!> IRB.conf[:PROMPT_MODE] ==!> :PROMPT_C Additionally, you also may want to install the optional Ruby readline support - with Debian libreadline-ruby. To edit a previous line you may have to press line down (down arrow) first. = Helpful tools Apart from rdoc you may also want to use rtags - which allows jumping around source code by clicking on class and method names. cd bioruby/lib rtags -R --vi For a tutorial see (()) = APPENDIX == Biogem: Additional BioRuby plugins Biogem is one of the exciting developments for Ruby in bioinformatics! Biogems add new functionality next to the BioRuby core project (BioRuby is a biogem itself). A biogem is simply installed with gem install bio # The core BioRuby gem gem install bio-core # BioRuby + stable pure Ruby biogems gem install bio-core-ext # bio-core + stable Ruby extensions Information on these biogems, and the many others available, see (()) or (()). == KEGG API Please refer to KEGG_API.rd.ja (English version: (()) ) and * (()) == Ruby Ensembl API The Ruby Ensembl API is a Ruby API to the Ensembl database. It is NOT currently included in the BioRuby archives. To install it, see (()) for more information. === Gene Ontology (GO) through the Ruby Ensembl API Gene Ontologies can be fetched through the Ruby Ensembl API package: require 'ensembl' Ensembl::Core::DBConnection.connect('drosophila_melanogaster') infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping) infile.each do |line| accs = line.split(",") # Split the comma-sep.entries into an array drosphila_acc = accs.shift # the first entry is the Drosophila acc mosq_acc = accs.shift # the second entry is your Mosq. acc gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc) print "#{mosq_acc}" gene.go_terms.each do |go| print ",#{go}" end end Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila homologues. == Using BioPerl or BioPython from Ruby A possible route is to opt for JRuby and Jython on the JAVA virtual machine (JVM). At the moment there is no easy way of accessing BioPerl or BioPython directly from Ruby. A possibility is to create a Perl or Python server that gets accessed through XML/RPC or SOAP. == Installing required external libraries At this point for using BioRuby no additional libraries are needed, except if you are using the Bio::PhyloXML module; then you have to install libxml-ruby. This may change, so keep an eye on the Bioruby website. Also when a package is missing BioRuby should show an informative message. At this point installing third party Ruby packages can be a bit painful, as the gem standard for packages evolved late and some still force you to copy things by hand. Therefore read the README's carefully that come with each package. === Installing libxml-ruby The simplest way is to use the RubyGems packaging system: gem install -r libxml-ruby If you get `require': no such file to load - mkmf (LoadError) error then do sudo apt-get install ruby-dev If you have other problems with installation, then see (()). == Trouble shooting * Error: in `require': no such file to load -- bio (LoadError) Ruby is failing to find the BioRuby libraries - add it to the RUBYLIB path, or pass it to the interpeter. For example: ruby -I$BIORUBYPATH/lib yourprogram.rb == Modifying this page IMPORTANT NOTICE: This page is maintained in the BioRuby source code repository. Please edit the file there otherwise changes may get lost. See (()) for repository and mailing list access. =end bio-1.4.3.0001/gemfiles/0000755000004100000410000000000012200110570014456 5ustar www-datawww-databio-1.4.3.0001/gemfiles/Gemfile.travis-ruby1.90000644000004100000410000000023512200110570020467 0ustar www-datawww-datasource :rubygems gem "rake" gem "xmlparser" gem "libxml-ruby" ## disabled because of "uninitialized constant XML::SaxParser" error #gem "soap4r-ruby1.9" bio-1.4.3.0001/gemfiles/modify-Gemfile.rb0000644000004100000410000000113612200110570017641 0ustar www-datawww-data# require 'pathname' envname_default_task = 'BIORUBY_RAKE_DEFAULT_TASK' gem_dir = Pathname.new(File.join(File.dirname(__FILE__), '..')).realpath case t = ENV[envname_default_task] when 'gem-test' # do nothing else $stderr.print "#{$0}: skipped: ENV[#{envname_default_task}]=#{t.inspect}\n" exit(0) end target = ENV['BUNDLE_GEMFILE'] unless target then $stderr.puts("Error: env BUNDLE_GEMFILE is not set.") end File.open(target, 'a') do |w| $stderr.puts "Add a line to #{target}" $stderr.puts "gem 'bio', :path => '#{gem_dir}'" w.puts "" w.puts "gem 'bio', :path => '#{gem_dir}'" end bio-1.4.3.0001/gemfiles/Gemfile.travis-jruby1.80000644000004100000410000000013712200110570020641 0ustar www-datawww-datasource :rubygems gem "rake" ## disabled because of build error on Travis #gem "libxml-ruby" bio-1.4.3.0001/gemfiles/prepare-gemspec.rb0000644000004100000410000000074312200110570020066 0ustar www-datawww-data# require 'pathname' require 'fileutils' envname_default_task = 'BIORUBY_RAKE_DEFAULT_TASK' gem_dir = Pathname.new(File.join(File.dirname(__FILE__), '..')).realpath case t = ENV[envname_default_task] when 'gem-test' # do nothing else $stderr.print "#{$0}: skipped: ENV[#{envname_default_task}]=#{t.inspect}\n" exit(0) end $stderr.puts "cd #{gem_dir}" Dir.chdir(gem_dir) args = [ 'bioruby.gemspec', '.gemspec' ] $stderr.puts(['cp', *args].join(" ")) FileUtils.cp(*args) bio-1.4.3.0001/gemfiles/Gemfile.travis-ruby1.80000644000004100000410000000010112200110570020456 0ustar www-datawww-datasource :rubygems gem "rake" gem "xmlparser" gem "libxml-ruby" bio-1.4.3.0001/gemfiles/Gemfile.travis-jruby1.90000644000004100000410000000027312200110570020643 0ustar www-datawww-datasource :rubygems gem "rake" ## disabled because of build error on Travis #gem "libxml-ruby" ## disabled because of "uninitialized constant XML::SaxParser" error #gem "soap4r-ruby1.9" bio-1.4.3.0001/README.rdoc0000644000004100000410000002455012200110570014477 0ustar www-datawww-data-- = README.rdoc - README for BioRuby Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama , Copyright (C) 2008 Jan Aerts Copyright (C) 2011-2012 Naohisa Goto License:: The Ruby License * The above statement is limited to this file. See below about BioRuby's copyright and license. ++ = BioRuby Copyright (C) 2001-2012 Toshiaki Katayama BioRuby is an open source Ruby library for developing bioinformatics software. Object oriented scripting language Ruby has many features suitable for bioinformatics research, for example, clear syntax to express complex objects, regular expressions for text handling as powerful as Perl's, a wide variety of libraries including web service etc. As the syntax of the Ruby language is simple and very clean, we believe that it is easy to learn for beginners, easy to use for biologists, and also powerful enough for the software developers. In BioRuby, you can retrieve biological database entries from flat files, internet web servers and local relational databases. These database entries can be parsed to extract information you need. Biological sequences can be treated with the fulfilling methods of the Ruby's String class and with regular expressions. Daily tools like Blast, Fasta, Hmmer and many other software packages for biological analysis can be executed within the BioRuby script, and the results can be fully parsed to extract the portion you need. BioRuby supports major biological database formats and provides many ways for accessing them through flatfile indexing, SQL, web services etc. Various web services including KEGG API can be easily utilized by BioRuby. == FOR MORE INFORMATION See RELEASE_NOTES.rdoc for news and important changes in this version. === Documents in this distribution ==== Release notes, important changes and issues README.rdoc:: This file. General information and installation procedure. RELEASE_NOTES.rdoc:: News and important changes in this release. KNOWN_ISSUES.rdoc:: Known issues and bugs in BioRuby. doc/RELEASE_NOTES-1.4.2.rdoc:: News and incompatible changes from 1.4.1 to 1.4.2. doc/RELEASE_NOTES-1.4.1.rdoc:: News and incompatible changes from 1.4.0 to 1.4.1. doc/RELEASE_NOTES-1.4.0.rdoc:: News and incompatible changes from 1.3.1 to 1.4.0. doc/Changes-1.3.rdoc:: News and incompatible changes from 1.2.1 to 1.3.0. doc/Changes-0.7.rd:: News and incompatible changes from 0.6.4 to 1.2.1. ==== Tutorials and other useful information doc/Tutorial.rd:: BioRuby Tutorial. doc/Tutorial.rd.html:: HTML version of Tutorial.rd. doc/KEGG_API.rd:: Documents about KEGG API, including usage of Bio::KEGG::API. ==== BioRuby development ChangeLog:: History of changes. doc/ChangeLog-before-1.4.2:: changes before 1.4.2. doc/ChangeLog-before-1.3.1:: changes before 1.3.1. README_DEV.rdoc:: Describes ways to contribute to the BioRuby project, including coding styles and documentation guidelines. ==== Documents written in Japanese doc/Tutorial.rd.ja:: BioRuby Tutorial written in Japanese. doc/Tutorial.rd.ja.html:: HTML version of Tutorial.rd.ja. doc/KEGG_API.rd.ja:: Japanese translation of KEGG_API.rd. ==== Sample codes In sample/, There are many sample codes and demo scripts. === WWW BioRuby's official website is at http://bioruby.org/. You will find links to related resources including downloads, mailing lists, Wiki documentation etc. in the top page. * http://bioruby.org/ Mirror site is available, hosted on Open Bioinformatics Foundation (OBF). * http://bioruby.open-bio.org/ == WHERE TO OBTAIN === WWW The stable release is freely available from the BioRuby website. * http://bioruby.org/archive/ === RubyGems {RubyGems (packaging system for Ruby)}[http://rubygems.org/] version of the BioRuby package is also available for easy installation. * http://rubyforge.org/projects/bioruby/ === git If you need the latest development version, this is provided at * http://github.com/bioruby/bioruby and can be obtained by the following procedure: % git clone git://github.com/bioruby/bioruby.git == REQUIREMENTS * Ruby 1.8.6 or later (except Ruby 1.9.0) -- http://www.ruby-lang.org/ * Ruby 1.9.3-p194 or later, or Ruby 1.8.7-p358 or later is recommended. * See KNOWN_ISSUES.rdoc for Ruby version specific problems. == OPTIONAL REQUIREMENTS Some optional libraries can be utilized to extend BioRuby's functionality. If your needs meets the following conditions, install them by using RubyGems, or download and install from the "Ruby Application Archive" (RAA) at http://raa.ruby-lang.org/, RubyForge at http://rubyforge.org/, GitHub at http://github.com/, or the following web sites. For faster parsing of the BLAST XML output format: * {RAA:xmlparser}[http://raa.ruby-lang.org/project/xmlparser/] * For Ruby 1.8: {gem install xmlparser}[http://rubygems.org/gems/xmlparser] * For Ruby 1.9: http://www.yoshidam.net/Ruby.html#xmlparser * In both cases, {The Expat XML Parser}[http://expat.sourceforge.net/] and C compiler will be required. Creating faster flatfile index using Berkley DB: * {GitHub:ruby-bdb}[https://github.com/knu/ruby-bdb] (which took over {RAA:bdb}[http://raa.ruby-lang.org/project/bdb/]) (No RubyGems available) * {Oracle Berkeley DB}[http://www.oracle.com/technetwork/database/berkeleydb/index.html] and C compiler will be required. Accessing BioSQL database created by other Open Bio* libraries: * {gem install activerecord}[http://rubygems.org/gems/activerecord] and at least one driver (or adapter): * {gem install mysql}[http://rubygems.org/gems/mysql] or {gem install mysql2}[http://rubygems.org/gems/mysql2] * {gem install pg}[http://rubygems.org/gems/pg] * {gem install sqlite-ruby}[http://rubygems.org/gems/sqlite-ruby] or {gem install sqlite3}[http://rubygems.org/gems/sqlite3] * {gem install activerecord-oracle_enhanced-adapter}[http://rubygems.org/gems/activerecord-oracle_enhanced-adapter] For parsing PhyloXML format files: * {gem install libxml-ruby}[http://rubygems.org/gems/libxml-ruby] * {GNOME Libxml2 XML toolkit}[http://xmlsoft.org/] and C compiler will be required. (Only for Ruby 1.9.x) For using SOAP web services e.g. KEGG API: * {gem install soap4r-ruby1.9}[https://rubygems.org/gems/soap4r-ruby1.9] * For Ruby 1.8.x, SOAP4R is bundled within the Ruby 1.8.x release. == INSTALL === INSTALL by using RubyGems (recommended) If you are using RubyGems, just type % gem install bio Alternatively, manually download bio-X.X.X.gem from http://rubyforge.org/projects/bioruby/ and install it by using gems command. RubyGems is bundled with Ruby 1.9.1 or later. For Ruby 1.8.7 or earlier, download and install RubyGems from http://rubygems.org/ . === INSTALL without RubyGems In the bioruby source directory (such as bioruby-x.x.x/), run setup.rb as follows: % su # ruby setup.rb These simple step installs this program under the default location of Ruby libraries. You can also install files into your favorite directory by supplying setup.rb some options. Try "ruby setup.rb --help". If your operating system supports 'sudo' command (such as Mac OS X), try the following procedure instead of the above. % sudo ruby setup.rb For older version users: "install.rb" is now renamed to "setup.rb". The options "config", "setup", and "install" are still available. % ruby setup.rb config % ruby setup.rb setup % su # ruby setup.rb install You can run % ruby setup.rb --help for more details. === Running self-test To check if bioruby works fine on a machine, self-test codes are bundled. Note that some tests may need internet connection. To run tests, % ruby test/runner.rb If you are using Ruby 1.8.x and you want to use components installed by using RubyGems, explicit loading of RubyGems may be needed. % ruby -rubygems test/runner.rb Alternatively, testrb, the test runner command of ruby, can be used. % testrb test/ With testrb, you can select tests to run, for example, % testrb test/unit For those familiar with Rake, % rake test also works. Before reporting test failure, please check KNOWN_ISSUES.rdoc about known platform-dependent issues. We are happy if you write patches to solve the issues. == SETUP If you want to use the OBDA (Open Bio Database Access) to obtain database entries, copy a sample configuration file in the BioRuby distribution bioruby-x.x.x/etc/bioinformatics/seqdatabase.ini to /etc/bioinformatics/seqdatabase.ini (system wide configuration) or ~/.bioinformatics/seqdatabase.ini (personal configuration) and change the contents according to your preference. For more information on the OBDA, see http://obda.open-bio.org/ . == USAGE You can load all BioRuby classes just by requiring 'bio.rb'. All the BioRuby classes and modules are located under the module name 'Bio' to separate the name space. #!/usr/bin/env ruby require 'bio' You can also read other documentation in the 'doc' directory. bioruby-x.x.x/doc/ === RubyGems on Ruby 1.8.x With RubyGems on Ruby 1.8.x, you may need to load 'rubygems' library before using 'bio'. This may not be needed, depending on settings of Ruby. #!/usr/bin/env ruby require 'rubygems' require 'bio' == PLUGIN (Biogem) Many plugins (called Biogem) are now available. See http://biogems.info/ for list of plugins and related software utilizing BioRuby. * http://biogems.info/ To develop your own plugin, see "Plugins" pages of BioRuby Wiki. * http://bioruby.open-bio.org/wiki/Plugins == LICENSE BioRuby can be freely distributed under the same terms as Ruby. See the file COPYING (or COPYING.ja written in Japanese). As written in the file COPYING, see the file LEGAL for files distributed under different license. For example, setup.rb which comes from {RAA:setup}[http://raa.ruby-lang.org/project/setup/] developed by Minero Aoki (http://i.loveruby.net/en/projects/setup/) is licensed under LGPL 2.1. == REFERENCE If you use BioRuby in academic research, please consider citing the following publication. * BioRuby: Bioinformatics software for the Ruby programming language. Naohisa Goto, Pjotr Prins, Mitsuteru Nakao, Raoul Bonnal, Jan Aerts and Toshiaki Katayama. Bioinformatics (2010) 26(20): 2617-2619. * {doi: 10.1093/bioinformatics/btq475}[http://bioinformatics.oxfordjournals.org/content/26/20/2617] * {PMID: 20739307}[http://www.ncbi.nlm.nih.gov/pubmed/20739307] == CONTACT Current staff of the BioRuby project can be reached by sending e-mail to . bio-1.4.3.0001/etc/0000755000004100000410000000000012200110570013436 5ustar www-datawww-databio-1.4.3.0001/etc/bioinformatics/0000755000004100000410000000000012200110570016446 5ustar www-datawww-databio-1.4.3.0001/etc/bioinformatics/seqdatabase.ini0000644000004100000410000000075412200110570021432 0ustar www-datawww-dataVERSION=1.00 [embl] protocol=biofetch location=http://www.ebi.ac.uk/Tools/dbfetch/dbfetch dbname=embl [emblcds] protocol=biofetch location=http://www.ebi.ac.uk/Tools/dbfetch/dbfetch dbname=emblcds [uniprotkb] protocol=biofetch location=http://www.ebi.ac.uk/Tools/dbfetch/dbfetch dbname=uniprotkb [refseqn] protocol=biofetch location=http://www.ebi.ac.uk/Tools/dbfetch/dbfetch dbname=refseqn [refseqp] protocol=biofetch location=http://www.ebi.ac.uk/Tools/dbfetch/dbfetch dbname=refseqp bio-1.4.3.0001/Rakefile0000644000004100000410000002142212200110570014331 0ustar www-datawww-data# # = Rakefile - helper of developement and packaging # # Copyright:: Copyright (C) 2009, 2012 Naohisa Goto # License:: The Ruby License # require 'rubygems' require 'erb' require 'pathname' require 'fileutils' require 'tmpdir' require 'rake/testtask' require 'rake/packagetask' begin require 'rubygems/package_task' rescue LoadError # old RubyGems/Rake version require 'rake/gempackagetask' end begin require 'rdoc/task' rescue LoadError # old RDoc/Rake version require 'rake/rdoctask' end # workaround for new module name unless defined? Rake::GemPackageTask then Rake::GemPackageTask = Gem::PackageTask end load "./lib/bio/version.rb" BIO_VERSION_RB_LOADED = true # Version string for tar.gz, tar.bz2, or zip archive. # If nil, use the value in lib/bio.rb # Note that gem version is always determined from bioruby.gemspec.erb. version = ENV['BIORUBY_VERSION'] || Bio::BIORUBY_VERSION.join(".") version = nil if version.to_s.empty? extraversion = ENV['BIORUBY_EXTRA_VERSION'] || Bio::BIORUBY_EXTRA_VERSION extraversion = nil if extraversion.to_s.empty? BIORUBY_VERSION = version BIORUBY_EXTRA_VERSION = extraversion task :default => "see-env" Rake::TestTask.new do |t| t.test_files = FileList["test/{unit,functional}/**/test_*.rb"] end Rake::TestTask.new do |t| t.name = :"test-all" t.test_files = FileList["test/{unit,functional,network}/**/test_*.rb"] end Rake::TestTask.new do |t| t.name = :"test-network" t.test_files = FileList["test/network/**/test_*.rb"] end # files not included in gem but included in tar archive tar_additional_files = [] GEM_SPEC_FILE = "bioruby.gemspec" GEM_SPEC_TEMPLATE_FILE = "bioruby.gemspec.erb" # gets gem spec string gem_spec_string = File.open(GEM_SPEC_TEMPLATE_FILE, "rb") do |f| ERB.new(f.read).result end # gets gem spec object spec = eval(gem_spec_string) # adds notice of automatically generated file gem_spec_string = "# This file is automatically generated from #{GEM_SPEC_TEMPLATE_FILE} and\n# should NOT be edited by hand.\n# \n" + gem_spec_string # compares current gemspec file and newly generated gemspec string current_string = File.read(GEM_SPEC_FILE) rescue nil if current_string and current_string != gem_spec_string then #Rake::Task[GEM_SPEC_FILE].invoke flag_update_gemspec = true else flag_update_gemspec = false end desc "Update gem spec file" task :gemspec => GEM_SPEC_FILE desc "Force update gem spec file" task :regemspec do #rm GEM_SPEC_FILE, :force => true Rake::Task[GEM_SPEC_FILE].execute(nil) end desc "Update #{GEM_SPEC_FILE}" file GEM_SPEC_FILE => [ GEM_SPEC_TEMPLATE_FILE, 'Rakefile', 'lib/bio/version.rb' ] do |t| puts "creates #{GEM_SPEC_FILE}" File.open(t.name, 'wb') do |w| w.print gem_spec_string end end task :package => [ GEM_SPEC_FILE ] do Rake::Task[:regemspec].invoke if flag_update_gemspec end pkg_dir = "pkg" tar_version = (BIORUBY_VERSION || spec.version) + BIORUBY_EXTRA_VERSION.to_s tar_basename = "bioruby-#{tar_version}" tar_filename = "#{tar_basename}.tar.gz" tar_pkg_filepath = File.join(pkg_dir, tar_filename) gem_filename = spec.full_name + ".gem" gem_pkg_filepath = File.join(pkg_dir, gem_filename) Rake::PackageTask.new("bioruby") do |pkg| #pkg.package_dir = "./pkg" pkg.need_tar_gz = true pkg.package_files.import(spec.files) pkg.package_files.include(*tar_additional_files) pkg.version = tar_version end Rake::GemPackageTask.new(spec) do |pkg| #pkg.package_dir = "./pkg" end Rake::RDocTask.new do |r| r.rdoc_dir = "rdoc" r.rdoc_files.include(*spec.extra_rdoc_files) r.rdoc_files.import(spec.files.find_all {|x| /\Alib\/.+\.rb\z/ =~ x}) #r.rdoc_files.exclude /\.yaml\z" opts = spec.rdoc_options.to_a.dup if i = opts.index('--main') then main = opts[i + 1] opts.delete_at(i) opts.delete_at(i) else main = 'README.rdoc' end r.main = main r.options = opts end # Tutorial files TUTORIAL_RD = 'doc/Tutorial.rd' TUTORIAL_RD_JA = 'doc/Tutorial.rd.ja' TUTORIAL_RD_HTML = TUTORIAL_RD + '.html' TUTORIAL_RD_JA_HTML = TUTORIAL_RD_JA + '.html' HTMLFILES_TUTORIAL = [ TUTORIAL_RD_HTML, TUTORIAL_RD_JA_HTML ] # Formatting RD to html. def rd2html(src, dst) sh "rd2 -r rd/rd2html-lib.rb --with-css=bioruby.css #{src} > #{dst}" end # Tutorial.rd to Tutorial.rd.html file TUTORIAL_RD_HTML => TUTORIAL_RD do |t| rd2html(t.prerequisites[0], t.name) end # Tutorial.rd.ja to Tutorial.html.ja file TUTORIAL_RD_JA_HTML => TUTORIAL_RD_JA do |t| rd2html(t.prerequisites[0], t.name) end desc "Update doc/Tutorial*.html" task :tutorial2html => HTMLFILES_TUTORIAL desc "Force update doc/Tutorial*.html" task :retutorial2html do # safe_unlink HTMLFILES_TUTORIAL HTMLFILES_TUTORIAL.each do |x| Rake::Task[x].execute(nil) end end # ChangeLog desc "Force update ChangeLog using git log" task :rechangelog do # The tag name in the command line should be changed # after releasing new version, updating ChangeLog, # and doing "git mv ChangeLog doc/ChangeLog-X.X.X". sh "git log --stat --summary 1.4.2..HEAD > ChangeLog" end # define mktmpdir if true then # Note: arg is a subset of Dir.mktmpdir def mktmpdir(prefix) ## prepare temporary directory for testing top = Pathname.new(File.join(Dir.pwd, "tmp")).cleanpath.to_s begin Dir.mkdir(top) rescue Errno::EEXIST end ## prepare working directory flag = false dirname = nil ret = nil begin 10.times do |n| # following 3 lines are copied from Ruby 1.9.3's tmpdir.rb and modified t = Time.now.strftime("%Y%m%d") path = "#{prefix}#{t}-#{$$}-#{rand(0x100000000).to_s(36)}" path << "-#{n}" if n > 0 begin dirname = File.join(top, path) flag = Dir.mkdir(dirname) break if flag rescue SystemCallError end end raise "Couldn't create a directory under #{tmp}." unless flag ret = yield(dirname) ensure FileUtils.remove_entry_secure(dirname, true) if flag and dirname end ret end #def mktmpdir ## Currently, Dir.mktmpdir isn't used Because of JRuby's behavior. elsif Dir.respond_to?(:mktmpdir) then def self.mktmpdir(*arg, &block) Dir.mktmpdir(*arg, &block) end else load "lib/bio/command.rb" def mktmpdir(*arg, &block) Bio::Command.mktmpdir(*arg, &block) end end def chdir_with_message(dir) $stderr.puts("chdir #{dir}") Dir.chdir(dir) end # run in different directory def work_in_another_directory pwd = Dir.pwd ret = false mktmpdir("bioruby") do |dirname| begin chdir_with_message(dirname) ret = yield(dirname) ensure chdir_with_message(pwd) end end ret end desc "task specified with BIORUBY_RAKE_DEFAULT_TASK (default \"test\")" task :"see-env" do t = ENV["BIORUBY_RAKE_DEFAULT_TASK"] if t then Rake::Task[t].invoke else Rake::Task[:test].invoke end end desc "DANGER: build tar and install (GNU tar needed)" task :"tar-install" => [ :package ] do pwd = Dir.pwd work_in_another_directory do |dirname| begin # remove tar file in direname FileUtils.remove_entry_secure(tar_filename, true) # chdir to old pwd chdir_with_message(pwd) # copy (or link) tar file safe_ln(tar_pkg_filepath, dirname) # chdir to dirname again chdir_with_message(dirname) # remove a directory the tar file will contain FileUtils.remove_entry_secure(tar_basename, true) # extract tar sh("tar zxvf #{tar_filename}") # chdir to the directory chdir_with_message(tar_basename) # run tests ruby("setup.rb") ensure # cleanup chdir_with_message(dirname) FileUtils.remove_entry_secure(tar_basename, true) FileUtils.remove_entry_secure(tar_filename, true) end end end desc "test installed bioruby on system" task :"installed-test" do data_path = File.join(Dir.pwd, "test/data") test_runner = File.join(Dir.pwd, "test/runner.rb") data_path = Pathname.new(data_path).cleanpath.to_s test_runner = Pathname.new(test_runner).cleanpath.to_s ENV["BIORUBY_TEST_DATA"] = data_path ENV["BIORUBY_TEST_LIB"] = "" ENV["BIORUBY_TEST_GEM"] = nil work_in_another_directory do |dirname| ruby("-rbio", test_runner) end end desc "DANGER: build tar, install and run test" task :"tar-integration-test" => [ :"tar-install", :"installed-test" ] desc "test installed bioruby gem version #{spec.version.to_s}" task :"gem-test" do data_path = File.join(Dir.pwd, "test/data") test_runner = File.join(Dir.pwd, "test/runner.rb") data_path = Pathname.new(data_path).cleanpath.to_s test_runner = Pathname.new(test_runner).cleanpath.to_s ENV["BIORUBY_TEST_DATA"] = data_path ENV["BIORUBY_TEST_LIB"] = nil ENV["BIORUBY_TEST_GEM"] = spec.version.to_s work_in_another_directory do |dirname| ruby(test_runner) end end bio-1.4.3.0001/bioruby.gemspec0000644000004100000410000006535312200110570015717 0ustar www-datawww-data# This file is automatically generated from bioruby.gemspec.erb and # should NOT be edited by hand. # Gem::Specification.new do |s| s.name = 'bio' s.version = "1.4.3.0001" s.author = "BioRuby project" s.email = "staff@bioruby.org" s.homepage = "http://bioruby.org/" s.rubyforge_project = "bioruby" s.summary = "Bioinformatics library" s.description = "BioRuby is a library for bioinformatics (biology + information science)." s.platform = Gem::Platform::RUBY s.files = [ ".travis.yml", "COPYING", "COPYING.ja", "ChangeLog", "GPL", "KNOWN_ISSUES.rdoc", "LEGAL", "LGPL", "README.rdoc", "README_DEV.rdoc", "RELEASE_NOTES.rdoc", "Rakefile", "bin/bioruby", "bin/br_biofetch.rb", "bin/br_bioflat.rb", "bin/br_biogetseq.rb", "bin/br_pmfetch.rb", "bioruby.gemspec", "bioruby.gemspec.erb", "doc/ChangeLog-before-1.3.1", "doc/ChangeLog-before-1.4.2", "doc/Changes-0.7.rd", "doc/Changes-1.3.rdoc", "doc/KEGG_API.rd", "doc/KEGG_API.rd.ja", "doc/RELEASE_NOTES-1.4.0.rdoc", "doc/RELEASE_NOTES-1.4.1.rdoc", "doc/RELEASE_NOTES-1.4.2.rdoc", "doc/Tutorial.rd", "doc/Tutorial.rd.html", "doc/Tutorial.rd.ja", "doc/Tutorial.rd.ja.html", "doc/bioruby.css", "etc/bioinformatics/seqdatabase.ini", "extconf.rb", "gemfiles/Gemfile.travis-jruby1.8", "gemfiles/Gemfile.travis-jruby1.9", "gemfiles/Gemfile.travis-ruby1.8", "gemfiles/Gemfile.travis-ruby1.9", "gemfiles/modify-Gemfile.rb", "gemfiles/prepare-gemspec.rb", "lib/bio.rb", "lib/bio/alignment.rb", "lib/bio/appl/bl2seq/report.rb", "lib/bio/appl/blast.rb", "lib/bio/appl/blast/ddbj.rb", "lib/bio/appl/blast/format0.rb", "lib/bio/appl/blast/format8.rb", "lib/bio/appl/blast/genomenet.rb", "lib/bio/appl/blast/ncbioptions.rb", "lib/bio/appl/blast/remote.rb", "lib/bio/appl/blast/report.rb", "lib/bio/appl/blast/rexml.rb", "lib/bio/appl/blast/rpsblast.rb", "lib/bio/appl/blast/wublast.rb", "lib/bio/appl/blast/xmlparser.rb", "lib/bio/appl/blat/report.rb", "lib/bio/appl/clustalw.rb", "lib/bio/appl/clustalw/report.rb", "lib/bio/appl/emboss.rb", "lib/bio/appl/fasta.rb", "lib/bio/appl/fasta/format10.rb", "lib/bio/appl/gcg/msf.rb", "lib/bio/appl/gcg/seq.rb", "lib/bio/appl/genscan/report.rb", "lib/bio/appl/hmmer.rb", "lib/bio/appl/hmmer/report.rb", "lib/bio/appl/iprscan/report.rb", "lib/bio/appl/mafft.rb", "lib/bio/appl/mafft/report.rb", "lib/bio/appl/meme/mast.rb", "lib/bio/appl/meme/mast/report.rb", "lib/bio/appl/meme/motif.rb", "lib/bio/appl/muscle.rb", "lib/bio/appl/paml/baseml.rb", "lib/bio/appl/paml/baseml/report.rb", "lib/bio/appl/paml/codeml.rb", "lib/bio/appl/paml/codeml/rates.rb", "lib/bio/appl/paml/codeml/report.rb", "lib/bio/appl/paml/common.rb", "lib/bio/appl/paml/common_report.rb", "lib/bio/appl/paml/yn00.rb", "lib/bio/appl/paml/yn00/report.rb", "lib/bio/appl/phylip/alignment.rb", "lib/bio/appl/phylip/distance_matrix.rb", "lib/bio/appl/probcons.rb", "lib/bio/appl/psort.rb", "lib/bio/appl/psort/report.rb", "lib/bio/appl/pts1.rb", "lib/bio/appl/sim4.rb", "lib/bio/appl/sim4/report.rb", "lib/bio/appl/sosui/report.rb", "lib/bio/appl/spidey/report.rb", "lib/bio/appl/targetp/report.rb", "lib/bio/appl/tcoffee.rb", "lib/bio/appl/tmhmm/report.rb", "lib/bio/command.rb", "lib/bio/compat/features.rb", "lib/bio/compat/references.rb", "lib/bio/data/aa.rb", "lib/bio/data/codontable.rb", "lib/bio/data/na.rb", "lib/bio/db.rb", "lib/bio/db/aaindex.rb", "lib/bio/db/biosql/biosql_to_biosequence.rb", "lib/bio/db/biosql/sequence.rb", "lib/bio/db/embl/common.rb", "lib/bio/db/embl/embl.rb", "lib/bio/db/embl/embl_to_biosequence.rb", "lib/bio/db/embl/format_embl.rb", "lib/bio/db/embl/sptr.rb", "lib/bio/db/embl/swissprot.rb", "lib/bio/db/embl/trembl.rb", "lib/bio/db/embl/uniprot.rb", "lib/bio/db/fantom.rb", "lib/bio/db/fasta.rb", "lib/bio/db/fasta/defline.rb", "lib/bio/db/fasta/fasta_to_biosequence.rb", "lib/bio/db/fasta/format_fasta.rb", "lib/bio/db/fasta/format_qual.rb", "lib/bio/db/fasta/qual.rb", "lib/bio/db/fasta/qual_to_biosequence.rb", "lib/bio/db/fastq.rb", "lib/bio/db/fastq/fastq_to_biosequence.rb", "lib/bio/db/fastq/format_fastq.rb", "lib/bio/db/genbank/common.rb", "lib/bio/db/genbank/ddbj.rb", "lib/bio/db/genbank/format_genbank.rb", "lib/bio/db/genbank/genbank.rb", "lib/bio/db/genbank/genbank_to_biosequence.rb", "lib/bio/db/genbank/genpept.rb", "lib/bio/db/genbank/refseq.rb", "lib/bio/db/gff.rb", "lib/bio/db/go.rb", "lib/bio/db/kegg/brite.rb", "lib/bio/db/kegg/common.rb", "lib/bio/db/kegg/compound.rb", "lib/bio/db/kegg/drug.rb", "lib/bio/db/kegg/enzyme.rb", "lib/bio/db/kegg/expression.rb", "lib/bio/db/kegg/genes.rb", "lib/bio/db/kegg/genome.rb", "lib/bio/db/kegg/glycan.rb", "lib/bio/db/kegg/keggtab.rb", "lib/bio/db/kegg/kgml.rb", "lib/bio/db/kegg/module.rb", "lib/bio/db/kegg/orthology.rb", "lib/bio/db/kegg/pathway.rb", "lib/bio/db/kegg/reaction.rb", "lib/bio/db/kegg/taxonomy.rb", "lib/bio/db/lasergene.rb", "lib/bio/db/litdb.rb", "lib/bio/db/medline.rb", "lib/bio/db/nbrf.rb", "lib/bio/db/newick.rb", "lib/bio/db/nexus.rb", "lib/bio/db/pdb.rb", "lib/bio/db/pdb/atom.rb", "lib/bio/db/pdb/chain.rb", "lib/bio/db/pdb/chemicalcomponent.rb", "lib/bio/db/pdb/model.rb", "lib/bio/db/pdb/pdb.rb", "lib/bio/db/pdb/residue.rb", "lib/bio/db/pdb/utils.rb", "lib/bio/db/phyloxml/phyloxml.xsd", "lib/bio/db/phyloxml/phyloxml_elements.rb", "lib/bio/db/phyloxml/phyloxml_parser.rb", "lib/bio/db/phyloxml/phyloxml_writer.rb", "lib/bio/db/prosite.rb", "lib/bio/db/rebase.rb", "lib/bio/db/sanger_chromatogram/abif.rb", "lib/bio/db/sanger_chromatogram/chromatogram.rb", "lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb", "lib/bio/db/sanger_chromatogram/scf.rb", "lib/bio/db/soft.rb", "lib/bio/db/transfac.rb", "lib/bio/feature.rb", "lib/bio/io/biosql/ar-biosql.rb", "lib/bio/io/biosql/biosql.rb", "lib/bio/io/biosql/config/database.yml", "lib/bio/io/das.rb", "lib/bio/io/dbget.rb", "lib/bio/io/ddbjrest.rb", "lib/bio/io/ddbjxml.rb", "lib/bio/io/ebisoap.rb", "lib/bio/io/ensembl.rb", "lib/bio/io/fastacmd.rb", "lib/bio/io/fetch.rb", "lib/bio/io/flatfile.rb", "lib/bio/io/flatfile/autodetection.rb", "lib/bio/io/flatfile/bdb.rb", "lib/bio/io/flatfile/buffer.rb", "lib/bio/io/flatfile/index.rb", "lib/bio/io/flatfile/indexer.rb", "lib/bio/io/flatfile/splitter.rb", "lib/bio/io/higet.rb", "lib/bio/io/hinv.rb", "lib/bio/io/keggapi.rb", "lib/bio/io/ncbirest.rb", "lib/bio/io/ncbisoap.rb", "lib/bio/io/pubmed.rb", "lib/bio/io/registry.rb", "lib/bio/io/soapwsdl.rb", "lib/bio/io/sql.rb", "lib/bio/io/togows.rb", "lib/bio/location.rb", "lib/bio/map.rb", "lib/bio/pathway.rb", "lib/bio/reference.rb", "lib/bio/sequence.rb", "lib/bio/sequence/aa.rb", "lib/bio/sequence/adapter.rb", "lib/bio/sequence/common.rb", "lib/bio/sequence/compat.rb", "lib/bio/sequence/dblink.rb", "lib/bio/sequence/format.rb", "lib/bio/sequence/format_raw.rb", "lib/bio/sequence/generic.rb", "lib/bio/sequence/na.rb", "lib/bio/sequence/quality_score.rb", "lib/bio/sequence/sequence_masker.rb", "lib/bio/shell.rb", "lib/bio/shell/core.rb", "lib/bio/shell/demo.rb", "lib/bio/shell/interface.rb", "lib/bio/shell/irb.rb", "lib/bio/shell/object.rb", "lib/bio/shell/plugin/blast.rb", "lib/bio/shell/plugin/codon.rb", "lib/bio/shell/plugin/das.rb", "lib/bio/shell/plugin/emboss.rb", "lib/bio/shell/plugin/entry.rb", "lib/bio/shell/plugin/flatfile.rb", "lib/bio/shell/plugin/keggapi.rb", "lib/bio/shell/plugin/midi.rb", "lib/bio/shell/plugin/ncbirest.rb", "lib/bio/shell/plugin/obda.rb", "lib/bio/shell/plugin/psort.rb", "lib/bio/shell/plugin/seq.rb", "lib/bio/shell/plugin/soap.rb", "lib/bio/shell/plugin/togows.rb", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml", "lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif", "lib/bio/shell/script.rb", "lib/bio/shell/setup.rb", "lib/bio/shell/web.rb", "lib/bio/tree.rb", "lib/bio/tree/output.rb", "lib/bio/util/color_scheme.rb", "lib/bio/util/color_scheme/buried.rb", "lib/bio/util/color_scheme/helix.rb", "lib/bio/util/color_scheme/hydropathy.rb", "lib/bio/util/color_scheme/nucleotide.rb", "lib/bio/util/color_scheme/strand.rb", "lib/bio/util/color_scheme/taylor.rb", "lib/bio/util/color_scheme/turn.rb", "lib/bio/util/color_scheme/zappo.rb", "lib/bio/util/contingency_table.rb", "lib/bio/util/restriction_enzyme.rb", "lib/bio/util/restriction_enzyme/analysis.rb", "lib/bio/util/restriction_enzyme/analysis_basic.rb", "lib/bio/util/restriction_enzyme/cut_symbol.rb", "lib/bio/util/restriction_enzyme/dense_int_array.rb", "lib/bio/util/restriction_enzyme/double_stranded.rb", "lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb", "lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb", "lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb", "lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb", "lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb", "lib/bio/util/restriction_enzyme/enzymes.yaml", "lib/bio/util/restriction_enzyme/range/cut_range.rb", "lib/bio/util/restriction_enzyme/range/cut_ranges.rb", "lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb", "lib/bio/util/restriction_enzyme/range/sequence_range.rb", "lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb", "lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb", "lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb", "lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb", "lib/bio/util/restriction_enzyme/single_strand.rb", "lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb", "lib/bio/util/restriction_enzyme/single_strand_complement.rb", "lib/bio/util/restriction_enzyme/sorted_num_array.rb", "lib/bio/util/restriction_enzyme/string_formatting.rb", "lib/bio/util/sirna.rb", "lib/bio/version.rb", "sample/any2fasta.rb", "sample/biofetch.rb", "sample/color_scheme_na.rb", "sample/dbget", "sample/demo_aaindex.rb", "sample/demo_aminoacid.rb", "sample/demo_bl2seq_report.rb", "sample/demo_blast_report.rb", "sample/demo_codontable.rb", "sample/demo_das.rb", "sample/demo_ddbjxml.rb", "sample/demo_fasta_remote.rb", "sample/demo_fastaformat.rb", "sample/demo_genbank.rb", "sample/demo_genscan_report.rb", "sample/demo_gff1.rb", "sample/demo_go.rb", "sample/demo_hmmer_report.rb", "sample/demo_kegg_compound.rb", "sample/demo_kegg_drug.rb", "sample/demo_kegg_genome.rb", "sample/demo_kegg_glycan.rb", "sample/demo_kegg_orthology.rb", "sample/demo_kegg_reaction.rb", "sample/demo_kegg_taxonomy.rb", "sample/demo_keggapi.rb", "sample/demo_litdb.rb", "sample/demo_locations.rb", "sample/demo_ncbi_rest.rb", "sample/demo_nucleicacid.rb", "sample/demo_pathway.rb", "sample/demo_prosite.rb", "sample/demo_psort.rb", "sample/demo_psort_report.rb", "sample/demo_pubmed.rb", "sample/demo_sequence.rb", "sample/demo_sirna.rb", "sample/demo_sosui_report.rb", "sample/demo_targetp_report.rb", "sample/demo_tmhmm_report.rb", "sample/enzymes.rb", "sample/fasta2tab.rb", "sample/fastagrep.rb", "sample/fastasort.rb", "sample/fsplit.rb", "sample/gb2fasta.rb", "sample/gb2tab.rb", "sample/gbtab2mysql.rb", "sample/genes2nuc.rb", "sample/genes2pep.rb", "sample/genes2tab.rb", "sample/genome2rb.rb", "sample/genome2tab.rb", "sample/goslim.rb", "sample/gt2fasta.rb", "sample/na2aa.rb", "sample/pmfetch.rb", "sample/pmsearch.rb", "sample/psortplot_html.rb", "sample/seqdatabase.ini", "sample/ssearch2tab.rb", "sample/tdiary.rb", "sample/test_phyloxml_big.rb", "sample/test_restriction_enzyme_long.rb", "sample/tfastx2tab.rb", "sample/vs-genes.rb", "setup.rb", "test/bioruby_test_helper.rb", "test/data/HMMER/hmmpfam.out", "test/data/HMMER/hmmsearch.out", "test/data/KEGG/1.1.1.1.enzyme", "test/data/KEGG/C00025.compound", "test/data/KEGG/D00063.drug", "test/data/KEGG/G00024.glycan", "test/data/KEGG/G01366.glycan", "test/data/KEGG/K02338.orthology", "test/data/KEGG/M00118.module", "test/data/KEGG/R00006.reaction", "test/data/KEGG/T00005.genome", "test/data/KEGG/T00070.genome", "test/data/KEGG/b0529.gene", "test/data/KEGG/ec00072.pathway", "test/data/KEGG/hsa00790.pathway", "test/data/KEGG/ko00312.pathway", "test/data/KEGG/map00030.pathway", "test/data/KEGG/map00052.pathway", "test/data/KEGG/rn00250.pathway", "test/data/KEGG/test.kgml", "test/data/SOSUI/sample.report", "test/data/TMHMM/sample.report", "test/data/aaindex/DAYM780301", "test/data/aaindex/PRAM900102", "test/data/bl2seq/cd8a_cd8b_blastp.bl2seq", "test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq", "test/data/blast/2.2.15.blastp.m7", "test/data/blast/b0002.faa", "test/data/blast/b0002.faa.m0", "test/data/blast/b0002.faa.m7", "test/data/blast/b0002.faa.m8", "test/data/blast/blastp-multi.m7", "test/data/clustalw/example1.aln", "test/data/command/echoarg2.bat", "test/data/command/echoarg2.sh", "test/data/embl/AB090716.embl", "test/data/embl/AB090716.embl.rel89", "test/data/fasta/EFTU_BACSU.fasta", "test/data/fasta/example1.txt", "test/data/fasta/example2.txt", "test/data/fastq/README.txt", "test/data/fastq/error_diff_ids.fastq", "test/data/fastq/error_double_qual.fastq", "test/data/fastq/error_double_seq.fastq", "test/data/fastq/error_long_qual.fastq", "test/data/fastq/error_no_qual.fastq", "test/data/fastq/error_qual_del.fastq", "test/data/fastq/error_qual_escape.fastq", "test/data/fastq/error_qual_null.fastq", "test/data/fastq/error_qual_space.fastq", "test/data/fastq/error_qual_tab.fastq", "test/data/fastq/error_qual_unit_sep.fastq", "test/data/fastq/error_qual_vtab.fastq", "test/data/fastq/error_short_qual.fastq", "test/data/fastq/error_spaces.fastq", "test/data/fastq/error_tabs.fastq", "test/data/fastq/error_trunc_at_plus.fastq", "test/data/fastq/error_trunc_at_qual.fastq", "test/data/fastq/error_trunc_at_seq.fastq", "test/data/fastq/error_trunc_in_plus.fastq", "test/data/fastq/error_trunc_in_qual.fastq", "test/data/fastq/error_trunc_in_seq.fastq", "test/data/fastq/error_trunc_in_title.fastq", "test/data/fastq/illumina_full_range_as_illumina.fastq", "test/data/fastq/illumina_full_range_as_sanger.fastq", "test/data/fastq/illumina_full_range_as_solexa.fastq", "test/data/fastq/illumina_full_range_original_illumina.fastq", "test/data/fastq/longreads_as_illumina.fastq", "test/data/fastq/longreads_as_sanger.fastq", "test/data/fastq/longreads_as_solexa.fastq", "test/data/fastq/longreads_original_sanger.fastq", "test/data/fastq/misc_dna_as_illumina.fastq", "test/data/fastq/misc_dna_as_sanger.fastq", "test/data/fastq/misc_dna_as_solexa.fastq", "test/data/fastq/misc_dna_original_sanger.fastq", "test/data/fastq/misc_rna_as_illumina.fastq", "test/data/fastq/misc_rna_as_sanger.fastq", "test/data/fastq/misc_rna_as_solexa.fastq", "test/data/fastq/misc_rna_original_sanger.fastq", "test/data/fastq/sanger_full_range_as_illumina.fastq", "test/data/fastq/sanger_full_range_as_sanger.fastq", "test/data/fastq/sanger_full_range_as_solexa.fastq", "test/data/fastq/sanger_full_range_original_sanger.fastq", "test/data/fastq/solexa_full_range_as_illumina.fastq", "test/data/fastq/solexa_full_range_as_sanger.fastq", "test/data/fastq/solexa_full_range_as_solexa.fastq", "test/data/fastq/solexa_full_range_original_solexa.fastq", "test/data/fastq/wrapping_as_illumina.fastq", "test/data/fastq/wrapping_as_sanger.fastq", "test/data/fastq/wrapping_as_solexa.fastq", "test/data/fastq/wrapping_original_sanger.fastq", "test/data/gcg/pileup-aa.msf", "test/data/genbank/CAA35997.gp", "test/data/genbank/SCU49845.gb", "test/data/genscan/sample.report", "test/data/go/selected_component.ontology", "test/data/go/selected_gene_association.sgd", "test/data/go/selected_wikipedia2go", "test/data/iprscan/merged.raw", "test/data/iprscan/merged.txt", "test/data/litdb/1717226.litdb", "test/data/medline/20146148_modified.medline", "test/data/meme/db", "test/data/meme/mast", "test/data/meme/mast.out", "test/data/meme/meme.out", "test/data/paml/codeml/control_file.txt", "test/data/paml/codeml/models/aa.aln", "test/data/paml/codeml/models/aa.dnd", "test/data/paml/codeml/models/aa.ph", "test/data/paml/codeml/models/alignment.phy", "test/data/paml/codeml/models/results0-3.txt", "test/data/paml/codeml/models/results7-8.txt", "test/data/paml/codeml/output.txt", "test/data/paml/codeml/rates", "test/data/phyloxml/apaf.xml", "test/data/phyloxml/bcl_2.xml", "test/data/phyloxml/made_up.xml", "test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml", "test/data/phyloxml/phyloxml_examples.xml", "test/data/pir/CRAB_ANAPL.pir", "test/data/prosite/prosite.dat", "test/data/refseq/nm_126355.entret", "test/data/rpsblast/misc.rpsblast", "test/data/sanger_chromatogram/test_chromatogram_abif.ab1", "test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf", "test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf", "test/data/sim4/complement-A4.sim4", "test/data/sim4/simple-A4.sim4", "test/data/sim4/simple2-A4.sim4", "test/data/soft/GDS100_partial.soft", "test/data/soft/GSE3457_family_partial.soft", "test/data/uniprot/p53_human.uniprot", "test/functional/bio/sequence/test_output_embl.rb", "test/functional/bio/test_command.rb", "test/network/bio/appl/blast/test_remote.rb", "test/network/bio/appl/test_blast.rb", "test/network/bio/appl/test_pts1.rb", "test/network/bio/io/test_ddbjrest.rb", "test/network/bio/io/test_ensembl.rb", "test/network/bio/io/test_pubmed.rb", "test/network/bio/io/test_soapwsdl.rb", "test/network/bio/io/test_togows.rb", "test/network/bio/test_command.rb", "test/runner.rb", "test/unit/bio/appl/bl2seq/test_report.rb", "test/unit/bio/appl/blast/test_ncbioptions.rb", "test/unit/bio/appl/blast/test_report.rb", "test/unit/bio/appl/blast/test_rpsblast.rb", "test/unit/bio/appl/clustalw/test_report.rb", "test/unit/bio/appl/gcg/test_msf.rb", "test/unit/bio/appl/genscan/test_report.rb", "test/unit/bio/appl/hmmer/test_report.rb", "test/unit/bio/appl/iprscan/test_report.rb", "test/unit/bio/appl/mafft/test_report.rb", "test/unit/bio/appl/meme/mast/test_report.rb", "test/unit/bio/appl/meme/test_mast.rb", "test/unit/bio/appl/meme/test_motif.rb", "test/unit/bio/appl/paml/codeml/test_rates.rb", "test/unit/bio/appl/paml/codeml/test_report.rb", "test/unit/bio/appl/paml/codeml/test_report_single.rb", "test/unit/bio/appl/paml/test_codeml.rb", "test/unit/bio/appl/sim4/test_report.rb", "test/unit/bio/appl/sosui/test_report.rb", "test/unit/bio/appl/targetp/test_report.rb", "test/unit/bio/appl/test_blast.rb", "test/unit/bio/appl/test_fasta.rb", "test/unit/bio/appl/test_pts1.rb", "test/unit/bio/appl/tmhmm/test_report.rb", "test/unit/bio/data/test_aa.rb", "test/unit/bio/data/test_codontable.rb", "test/unit/bio/data/test_na.rb", "test/unit/bio/db/biosql/tc_biosql.rb", "test/unit/bio/db/biosql/ts_suite_biosql.rb", "test/unit/bio/db/embl/test_common.rb", "test/unit/bio/db/embl/test_embl.rb", "test/unit/bio/db/embl/test_embl_rel89.rb", "test/unit/bio/db/embl/test_embl_to_bioseq.rb", "test/unit/bio/db/embl/test_sptr.rb", "test/unit/bio/db/embl/test_uniprot.rb", "test/unit/bio/db/embl/test_uniprot_new_part.rb", "test/unit/bio/db/fasta/test_defline.rb", "test/unit/bio/db/fasta/test_defline_misc.rb", "test/unit/bio/db/fasta/test_format_qual.rb", "test/unit/bio/db/genbank/test_common.rb", "test/unit/bio/db/genbank/test_genbank.rb", "test/unit/bio/db/genbank/test_genpept.rb", "test/unit/bio/db/kegg/test_compound.rb", "test/unit/bio/db/kegg/test_drug.rb", "test/unit/bio/db/kegg/test_enzyme.rb", "test/unit/bio/db/kegg/test_genes.rb", "test/unit/bio/db/kegg/test_genome.rb", "test/unit/bio/db/kegg/test_glycan.rb", "test/unit/bio/db/kegg/test_kgml.rb", "test/unit/bio/db/kegg/test_module.rb", "test/unit/bio/db/kegg/test_orthology.rb", "test/unit/bio/db/kegg/test_pathway.rb", "test/unit/bio/db/kegg/test_reaction.rb", "test/unit/bio/db/pdb/test_pdb.rb", "test/unit/bio/db/sanger_chromatogram/test_abif.rb", "test/unit/bio/db/sanger_chromatogram/test_scf.rb", "test/unit/bio/db/test_aaindex.rb", "test/unit/bio/db/test_fasta.rb", "test/unit/bio/db/test_fastq.rb", "test/unit/bio/db/test_gff.rb", "test/unit/bio/db/test_go.rb", "test/unit/bio/db/test_lasergene.rb", "test/unit/bio/db/test_litdb.rb", "test/unit/bio/db/test_medline.rb", "test/unit/bio/db/test_nbrf.rb", "test/unit/bio/db/test_newick.rb", "test/unit/bio/db/test_nexus.rb", "test/unit/bio/db/test_phyloxml.rb", "test/unit/bio/db/test_phyloxml_writer.rb", "test/unit/bio/db/test_prosite.rb", "test/unit/bio/db/test_qual.rb", "test/unit/bio/db/test_rebase.rb", "test/unit/bio/db/test_soft.rb", "test/unit/bio/io/flatfile/test_autodetection.rb", "test/unit/bio/io/flatfile/test_buffer.rb", "test/unit/bio/io/flatfile/test_splitter.rb", "test/unit/bio/io/test_ddbjxml.rb", "test/unit/bio/io/test_ensembl.rb", "test/unit/bio/io/test_fastacmd.rb", "test/unit/bio/io/test_flatfile.rb", "test/unit/bio/io/test_soapwsdl.rb", "test/unit/bio/io/test_togows.rb", "test/unit/bio/sequence/test_aa.rb", "test/unit/bio/sequence/test_common.rb", "test/unit/bio/sequence/test_compat.rb", "test/unit/bio/sequence/test_dblink.rb", "test/unit/bio/sequence/test_na.rb", "test/unit/bio/sequence/test_quality_score.rb", "test/unit/bio/sequence/test_sequence_masker.rb", "test/unit/bio/shell/plugin/test_seq.rb", "test/unit/bio/test_alignment.rb", "test/unit/bio/test_command.rb", "test/unit/bio/test_db.rb", "test/unit/bio/test_feature.rb", "test/unit/bio/test_location.rb", "test/unit/bio/test_map.rb", "test/unit/bio/test_pathway.rb", "test/unit/bio/test_reference.rb", "test/unit/bio/test_sequence.rb", "test/unit/bio/test_shell.rb", "test/unit/bio/test_tree.rb", "test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb", "test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb", "test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb", "test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb", "test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb", "test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb", "test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb", "test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb", "test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb", "test/unit/bio/util/restriction_enzyme/test_analysis.rb", "test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb", "test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb", "test/unit/bio/util/restriction_enzyme/test_double_stranded.rb", "test/unit/bio/util/restriction_enzyme/test_single_strand.rb", "test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb", "test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb", "test/unit/bio/util/restriction_enzyme/test_string_formatting.rb", "test/unit/bio/util/test_color_scheme.rb", "test/unit/bio/util/test_contingency_table.rb", "test/unit/bio/util/test_restriction_enzyme.rb", "test/unit/bio/util/test_sirna.rb" ] s.has_rdoc = true s.extra_rdoc_files = [ "KNOWN_ISSUES.rdoc", "README.rdoc", "README_DEV.rdoc", "RELEASE_NOTES.rdoc", "doc/Changes-1.3.rdoc", "doc/RELEASE_NOTES-1.4.0.rdoc", "doc/RELEASE_NOTES-1.4.1.rdoc", "doc/RELEASE_NOTES-1.4.2.rdoc" ] s.rdoc_options << '--main' << 'README.rdoc' s.rdoc_options << '--title' << 'BioRuby API documentation' s.rdoc_options << '--exclude' << '\.yaml\z' s.rdoc_options << '--line-numbers' << '--inline-source' s.require_path = 'lib' s.bindir = "bin" s.executables = [ "bioruby", "br_biofetch.rb", "br_bioflat.rb", "br_biogetseq.rb", "br_pmfetch.rb" ] s.default_executable = "bioruby" end bio-1.4.3.0001/KNOWN_ISSUES.rdoc0000644000004100000410000002024212200110570015523 0ustar www-datawww-data= KNOWN_ISSUES.rdoc - Known issues and bugs in BioRuby Copyright:: Copyright (C) 2009-2012 Naohisa Goto License:: The Ruby License = Known issues and bugs in BioRuby Below are known issues and bugs in BioRuby. Patches to fix them are welcome. We hope they will be fixed in the future. Items marked with (WONT_FIX) tags would not be fixed within BioRuby because they are not BioRuby's issues and/or it is very difficult to fix them. == 1. Ruby version specific issues === Ruby 1.9.1 or later Some classes/modules/methods still may not work or may return incorrect results in Ruby 1.9.X, especially those not covered by the unit tests. ==== String encodings Currently, BioRuby do not care string encodings. In some cases, Encoding::CompatibilityError or "ArgumentError: invalid byte sequence in (encoding name)" may be raised. === End-of-life Ruby versions ==== Ruby 1.9.0 (WONT_FIX) Ruby 1.9.0 is NOT supported because it isn't a stable release. Use Ruby 1.9.1 or later. ==== Ruby 1.8.5 or earlier (WONT_FIX) Problems observed only with Ruby 1.8.5 or earlier will not be fixed. Note that Ruby 1.8.5 or earlier is no longer supported, as described in README.rdoc. ==== Ruby 1.8.2 or earlier (WONT_FIX) In some cases, temporary files and directories may not be removed because of the lack of FileUtils.remove_entry_secure. === Issues about SOAP/WSDL SOAP4R (SOAP and WSDL implementation) is no longer bundled with Ruby 1.9. In addition, because of the API changes in recent SOAP4R, some classes/modules using SOAP4R may not work. === Problem with REXML DoS vulnerability patch before 09-Nov-2008 (WONT_FIX) If you have applied a patch taken from http://www.ruby-lang.org/en/news/2008/08/23/dos-vulnerability-in-rexml/ before 09 Nov 2008 12:40 +0900, because of the bug in the patch, parsing of Blast XML results with REXML parser may fail. The bug is already fixed and new patch is available on the above URL. Note that some Linux distributions would have incorporated the patch in their manners, and may have the same problem. === RubyGems 0.8.11 or earlier (WONT_FIX) With very old version of RubyGems, use 'require_gem' which was deprecated in RubyGems 0.9.0 and removed in RubyGems 1.0.1. #!/usr/bin/env ruby require 'rubygems' require_gem 'bio' === JRuby On JRuby, errors may be raised due to the following unfixed bugs in JRuby. * {JRUBY-6195}[http://jira.codehaus.org/browse/JRUBY-6195] Process.spawn (and related methods) ignore option hash * {JRUBY-6818}[http://jira.codehaus.org/browse/JRUBY-6818] Kernel.exec, Process.spawn (and IO.popen etc.) raise error when program is an array containing two strings (WONT_FIX) With older version of JRuby, you may be bothered by the following bugs that have already been fixed in the head of JRuby. * {JRUBY-6658}[http://jira.codehaus.org/browse/JRUBY-6658] Problem when setting up an autoload entry, defining a class via require, then redefining the autoload entry * {JRUBY-6666}[http://jira.codehaus.org/browse/JRUBY-6666] Open3.popen3 failing due to missing handling for [path, argv[0]] array * {JRUBY-6819}[http://jira.codehaus.org/browse/JRUBY-6819] java.lang.ArrayIndexOutOfBoundsException in String#each_line (WONT_FIX) Due to JRUBY-5678 (resolved issue) and the difference of behavior between CRuby and JRuby written in the comments of the issue tracking page, when running BioRuby on JRuby with sudo or root rights, TMPDIR environment variable should be set to a directory that is not world-writable. Currently, the workaround is needed for running BioRuby tests with JRuby on Travis-CI. * {JRUBY-5678}[http://jira.codehaus.org/browse/JRUBY-5678] tmpdir cannot be delete when jruby has sudo/root rights === Rubinius According to Travis-CI, unit tests have failed on 1.9 mode of Rubinius. (WONT_FIX) With older version of Rubinius, you may be bothered by the following bugs that have already been fixed in the head of Rubinius. * {Rubinius Issue #1693}[https://github.com/rubinius/rubinius/issues/1693] String#split gives incorrect output when splitting by /^/ * {Rubinius Issue #1724}[https://github.com/rubinius/rubinius/issues/1724] Creating Struct class with length attribute == 2. OS and/or architecture-dependent issues === Microsoft Windows ==== Text mode issues Following 4 tests failed on mswin32 (and maybe on mingw32 and bccwin32) because of the conversion of line feed codes in the text mode. * test_ended_pos and test_start_pos in test/unit/bio/io/test_flatfile.rb * test_pos in test/unit/bio/io/flatfile/test_buffer.rb * test_entry_pos in test/unit/bio/appl/blast/test_rpsblast.rb This indicates that br_bioflat.rb and Bio::FlatFileIndex may create incorrect indexes on mswin32, mingw32, and bccwin32. In addition, Bio::FlatFile may return incorrect data. ==== String escaping of command-line arguments After BioRuby 1.4.1, in Ruby 1.9.X running on Windows, escaping of command-line arguments are processed by the Ruby interpreter. Before BioRuby 1.4.0, the escaping is executed in Bio::Command#escape_shell_windows, and the behavior is different from the Ruby interpreter's one. Curreltly, due to the change, test/functional/bio/test_command.rb may fail on Windows with Ruby 1.9.X. ==== Windows 95/98/98SE/ME (WONT_FIX) Some methods that call external programs may not work in Windows 95/98/98SE/ME because of the limitation of COMMAND.COM. === OpenVMS, BeOS, OS/2, djgpp, Windows CE (WONT_FIX) BioRuby may not work on these platforms. == 3. Known issues and bugs in BioRuby === DDBJ Web API related classes (Bio::DDBJ::*, Bio::BLAST::Remote::DDBJ) The DDBJ Web API is stopping after their system replacement in March 2012. (See the announcement though it is written only in Japanese: http://www.ddbj.nig.ac.jp/replace/rp120601-j.html) Due to the stop of the DDBJ Web API, Bio::DDBJ::* and Bio::BLAST::Remote::DDBJ which are using the web API can not be used. === Bio::Ensembl Due to the renewal of Ensembl web site, Bio::Ensembl does not work for the latest Ensembl. For a workaround, use an archive server. For example, "jul2008.archive.ensembl.org" seems to be the last server before the renewal. human = Bio::Ensembl.new("Homo_sapiens", "jul2008.archive.ensembl.org") Alternatively, consider using Ruby Ensembl API. * http://github.com/jandot/ruby-ensembl-api === Bio::DBGET and sample/dbget (WONT_FIX) Because the DBGET service have not been publically available for a long time, we give up support for Bio::DBGET and sample/dbget. Instead, using Bio::TogoWS or Bio::KEGG::API is recommended. === Bio::NCBI::SOAP It seems that Bio::NCBI::SOAP (in lib/bio/io/ncbisoap.rb) does not work correctly, even running on Ruby 1.8.x. Instead, use Bio::NCBI::REST. === Bio::KEGG::Taxonomy and sample/demo_kegg_taxonomy.rb Bio::KEGG::Taxonomy fails to parse current KEGG taxonomy data file probably because of the growth of the data size. === Bio::SPTR Bio::SPTR should be updated to follow UniProtKB format changes described in http://www.uniprot.org/docs/sp_news.htm . === http://bioruby.org/cgi-bin/biofetch.rb and Bio::Fetch.query BioRuby's default BioFetch server http://bioruby.org/cgi-bin/biofetch.rb is down for years. Please use other server, such as EBI Dbfetch (http://www.ebi.ac.uk/Tools/dbfetch/dbfetch). Due to the stop of the http://bioruby.org/cgi-bin/biofetch.rb service, Bio::Fetch.query always raises error. == 4. Compatibility issues with other libraries/extensions === ActiveRecord BioSQL support uses ActiveRecord, but the author of the document does not know which version is suitable. === Ruby on Rails BioRuby Shell on Web uses Ruby on Rails, but the author of the document does not know which version is suitable. ==== SOAP4R with Ruby 1.9 soap4r-ruby1.9 may raise "ununitialized constant XML::SaxParser" error with some combinations of XML parser libraries. It seems this is a bug of soap4r-ruby1.9. == 5. Historical descriptions === CVS For historical purposes: the anonymous CVS was provided at * http://cvs.bioruby.org/ and could be obtained by the following procedure. % cvs -d :pserver:cvs@code.open-bio.org:/home/repository/bioruby login CVS password: cvs (login with a password 'cvs' for the first time) % cvs -d :pserver:cvs@code.open-bio.org:/home/repository/bioruby co bioruby These may be closed without any prior notice. bio-1.4.3.0001/ChangeLog0000644000004100000410000015575312200110570014455 0ustar www-datawww-datacommit 960497899e75c0fe36ef662ec4a0159559c836b1 Author: Naohisa Goto Date: Sat May 25 03:41:51 2013 +0900 regenerate bioruby.gemspec with rake regemspec bioruby.gemspec | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit e8fa160e158970259d915abc6113cb425537b8d6 Author: Naohisa Goto Date: Sat May 25 03:38:17 2013 +0900 Bio::BIORUBY_EXTRA_VERSION set to ".0001" (Release version with patches) lib/bio/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 9d84d408479aa261b239e3f371f60262782bfb76 Author: Naohisa Goto Date: Sat May 25 03:37:55 2013 +0900 Added release notes for 1.4.3.0001 release RELEASE_NOTES.rdoc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 1d3530cd1424f2cc0123057424b5c892f45dc93b Author: Naohisa Goto Date: Sat May 25 02:46:32 2013 +0900 Ruby 2.0 support: not to add ChangeLog and LEGAL to rdoc_files * Ruby 2.0 support: not to add ChangeLog and LEGAL to rdoc_files. Because ChangeLog is not rdoc format, rdoc bundled with Ruby 2.0 raises error during parsing. bioruby.gemspec.erb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit be72602cb42e6d09d465b9636257335dccb278a4 Author: Naohisa Goto Date: Thu Jan 10 01:27:03 2013 +0900 Ruby 2.0 support: Set script encoding to US-ASCII for gff.rb. lib/bio/db/gff.rb | 1 + 1 file changed, 1 insertion(+) commit 091f6951d23c5ed2418981b2cf94733b1ee7a8b1 Author: Naohisa Goto Date: Mon Oct 1 21:11:14 2012 +0900 Bug fix: parse error when subject sequence contains spaces * Bug fix: parse error when subject sequence contains spaces. Thanks to Edward Rice who reports the bug. (Bug #3385) (https://redmine.open-bio.org/issues/3385) lib/bio/appl/blast/format0.rb | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) commit ad0d7a1712d8b02358763233d38e67a0fff54917 Author: Naohisa Goto Date: Wed Aug 22 00:18:14 2012 +0900 BioRuby 1.4.3 is re-released ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) commit 51ab2dec144c99a14ca9009c7b589b500f1cad5f Author: Naohisa Goto Date: Wed Aug 22 00:12:47 2012 +0900 Preparation to re-release BioRuby 1.4.3 ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) commit 5ff159d12252393ff04afe52b59a315d15c63d18 Author: Naohisa Goto Date: Wed Aug 22 00:00:40 2012 +0900 Bug fix: bin/bioruby failed to save object * Bug fix: bin/bioruby: Failed to save object with error message "can't convert Symbol into String" on Ruby 1.9. RELEASE_NOTES.rdoc | 2 ++ lib/bio/shell/core.rb | 1 + 2 files changed, 3 insertions(+) commit 74c6ce09413e7ddde1431d74e10cc9c4cdbb95ba Author: Naohisa Goto Date: Tue Aug 21 22:35:18 2012 +0900 BioRuby 1.4.3 is released. ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) commit 61af85b6cfc7bb1f3668ed68232113eb0751e7ea Author: Naohisa Goto Date: Tue Aug 21 22:33:30 2012 +0900 preparation for BioRuby 1.4.3 release version bioruby.gemspec | 2 +- lib/bio/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 1ec68beac42a06e9ef0a9c953650ef4d599e4e65 Author: Naohisa Goto Date: Tue Aug 21 20:53:04 2012 +0900 ChangeLog modified; release candidate version 1.4.3-rc2 ChangeLog | 1353 ++++++++++++++++++++++++++++++++++++++++++++++++++++ bioruby.gemspec | 2 +- lib/bio/version.rb | 4 +- 3 files changed, 1356 insertions(+), 3 deletions(-) commit e0d570b237a8b96ae0c1e7b1ad72c7333be07c52 Author: Naohisa Goto Date: Mon Aug 20 20:35:58 2012 +0900 version changed to 1.4.3-rc1 bioruby.gemspec | 3 ++- lib/bio/version.rb | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit 511c81ba67f7b8dc9cff85cf68db654d2feaf52e Author: Naohisa Goto Date: Mon Aug 20 20:17:14 2012 +0900 document JRUBY-5678 (resolved) and related issue with the workaround. KNOWN_ISSUES.rdoc | 9 +++++++++ RELEASE_NOTES.rdoc | 9 +++++++++ 2 files changed, 18 insertions(+) commit 2fdd7a3b3555a33dead31181c9526af22f24916f Author: Naohisa Goto Date: Mon Aug 20 19:44:39 2012 +0900 update recommended Ruby versions and the year in copyright lines README.rdoc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) commit b156227749e5ada74330e837c9ce48a16e6a6a2f Author: Naohisa Goto Date: Mon Aug 20 19:16:25 2012 +0900 Bug fix: Bio::EMBL#os raises error, with incompatible change * Bug fix: Bio::EMBL#os raises error. The bug is reported by Marc P. Hoeppner in the BioRuby mailing list (https://redmine.open-bio.org/issues/3294). * Incompatible change: Bio::EMBL#os no longer splits the content with comma, and it no longer raises error even if the OS line is not in the "Genus species (name)" format. The changes may affect the parsing of old EMBL files which contain two or more species names in an OS line. * Unit tests are modified to catch up the above incompatible changes. RELEASE_NOTES.rdoc | 14 ++++++ lib/bio/db/embl/embl.rb | 74 ++++++++++++++++++++++++++++++ test/unit/bio/db/embl/test_embl.rb | 9 +--- test/unit/bio/db/embl/test_embl_rel89.rb | 9 +--- 4 files changed, 92 insertions(+), 14 deletions(-) commit 31c8b4cb6ce2364aacee8137ddec3aa5f7d2d0d8 Author: Naohisa Goto Date: Mon Aug 20 19:04:50 2012 +0900 Workaround for jruby-1.7.0.preview2 bugs JRUBY-6195, JRUBY-6818 * Workaroud for jruby-1.7.0.preview2 bugs JRUBY-6195 and JRUBY-6818. * Refactoring of call_command_popen: split _call_command_popen_ruby18 and _call_command_popen_ruby19, add _call_command_popen_jruby19. Note that _call_command_popen_jruby19 will be removed in the future after the bugs are fixed. lib/bio/command.rb | 98 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 10 deletions(-) commit 05f51fa2e871e71c2b20559eb05e456768a4f7d6 Author: Naohisa Goto Date: Sat Aug 18 00:32:31 2012 +0900 New default etc/bioinformatics/seqdatabase.ini * New default etc/bioinformatics/seqdatabase.ini, with currently available services. etc/bioinformatics/seqdatabase.ini | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 etc/bioinformatics/seqdatabase.ini commit a4264cc3667b98289c09efc7ccba9c8e86f6d89c Author: Naohisa Goto Date: Sat Aug 18 00:31:10 2012 +0900 etc/bioinformatics/seqdatabase.ini is moved to sample/ etc/bioinformatics/seqdatabase.ini | 210 ------------------------------------ sample/seqdatabase.ini | 210 ++++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+), 210 deletions(-) delete mode 100644 etc/bioinformatics/seqdatabase.ini create mode 100644 sample/seqdatabase.ini commit 04b7a27b557576f5325b3ee420262922ab66ca3b Author: Naohisa Goto Date: Sat Aug 18 00:30:38 2012 +0900 known issue about http://bioruby.org/cgi-bin/biofetch.rb server down KNOWN_ISSUES.rdoc | 9 +++++++++ 1 file changed, 9 insertions(+) commit 4a8193f7b91ff703c8f3dc6e6a6ae0c981a404e6 Author: Naohisa Goto Date: Fri Aug 17 23:45:41 2012 +0900 Update descriptions about JRuby and Rubinius bugs KNOWN_ISSUES.rdoc | 14 ++++++++++---- RELEASE_NOTES.rdoc | 14 ++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) commit a2d8dd8ccebde84e91f82c59e531cc08fbf0f3fe Author: Naohisa Goto Date: Fri Aug 17 17:19:22 2012 +0900 Remove the suffix .rb in require, to avoid potential multiple loading. test/unit/bio/db/fasta/test_defline.rb | 2 +- test/unit/bio/db/genbank/test_genpept.rb | 2 +- test/unit/bio/db/kegg/test_drug.rb | 2 +- test/unit/bio/db/kegg/test_genome.rb | 2 +- test/unit/bio/db/kegg/test_glycan.rb | 2 +- test/unit/bio/util/test_restriction_enzyme.rb | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) commit 1d2e8b02db3699c2cd4f4890abc078ffd2b503aa Author: Ben J. Woodcroft Date: Wed Aug 8 09:41:20 2012 +1000 fill in missing piece of documentation in FastaFormat lib/bio/db/fasta.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 83bf09d4d81803c8d06e0d45ca25e7c09016161c Author: Naohisa Goto Date: Wed Aug 8 00:08:26 2012 +0900 RELEASE_NOTE.rdoc modified to reflect recent changes RELEASE_NOTES.rdoc | 107 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 15 deletions(-) commit c3afb1eb98cf8777ee021624c3d2eab92b3543f2 Author: Naohisa Goto Date: Wed Aug 8 00:06:09 2012 +0900 Descriptions about JRuby, Rubinius, DDBJ Web API, SOAP4R etc. KNOWN_ISSUES.rdoc | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) commit 01da7401a011aa519c43a021f89f6e7f769b4649 Author: Naohisa Goto Date: Tue Aug 7 23:55:09 2012 +0900 regenerate bioruby.gemspec with rake regemspec bioruby.gemspec | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit 9f70c27d9b75408fddae8384a2a09715b959dcb5 Author: Naohisa Goto Date: Tue Aug 7 23:51:56 2012 +0900 improve documentation; version changed to 1.4.3-pre1 lib/bio/version.rb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) commit c11f12c8aa56b8509cd082f3478e96374210e5d7 Author: Naohisa Goto Date: Tue Aug 7 23:31:41 2012 +0900 Remove autorequire which have been deprecated bioruby.gemspec.erb | 1 - 1 file changed, 1 deletion(-) commit 7792b092033d2c819f2bcad0e206f27608481db5 Author: Ben J Woodcroft Date: Mon Aug 6 09:40:55 2012 +1000 flesh out FastaFormat documentation lib/bio/db/fasta.rb | 102 ++++++++++++++++++++++++------------------- lib/bio/db/fasta/defline.rb | 2 +- 2 files changed, 58 insertions(+), 46 deletions(-) commit 9a2fe67c247cdc7c9ddc9f8b8de771515ba76ac1 Author: Naohisa Goto Date: Fri Aug 3 22:36:12 2012 +0900 .travis.yml: restructure matrix, add allow_failures lines * Add allow_failures lines * Restructure matrix: remove many exclude lines and add some include lines. * When running jruby, Set TMPDIR to avoid known issue about FileUtils#remove_entry_secure. .travis.yml | 52 ++++++++++++++++++---------------------------------- 1 file changed, 18 insertions(+), 34 deletions(-) commit 553fd102c533c42675f93895557e3e00d36fd3e7 Author: Naohisa Goto Date: Fri Aug 3 22:05:39 2012 +0900 Improve tests for BLAST "-m 8" tabular format parser test/unit/bio/appl/blast/test_report.rb | 119 +++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) commit 3e1c062dbc168bd558ca8408a6da115aa570f3a7 Author: Naohisa Goto Date: Fri Aug 3 22:05:07 2012 +0900 Improve test and suppress warning: assigned but unused variable test/unit/bio/io/flatfile/test_buffer.rb | 1 + 1 file changed, 1 insertion(+) commit 7e29ce1f050e9e5b23299372d8ddfae781447dc3 Author: Naohisa Goto Date: Fri Aug 3 22:02:21 2012 +0900 Improve test and suppress warning: assigned but unused variable test/unit/bio/db/test_newick.rb | 2 ++ 1 file changed, 2 insertions(+) commit 1053b62069df74f336934e4ed0f3f217e4ad3312 Author: Naohisa Goto Date: Fri Jul 27 13:56:53 2012 +0900 Suppress warnings: shadowing outer local variable * Suppress warnings: shadowing outer local variable. Thanks to Andrew Grimm: https://github.com/bioruby/bioruby/pull/64 lib/bio/db/gff.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit e55794f65b3fb45c99e61d45220fe42f718426a3 Author: Naohisa Goto Date: Wed Jul 25 23:29:17 2012 +0900 Suppress warnings in lib/bio/alignment.rb:2322 * A space is inserted to suppress warnings in lib/bio/alignment.rb:2322. * warning: :' after local variable is interpreted as binary operator * warning: even though it seems like symbol literal lib/bio/alignment.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 174a38ea8c4ecea70724bf6ec8e72b2e4259853b Author: Naohisa Goto Date: Wed Jul 25 23:12:51 2012 +0900 Modified to follow changes of GenomeNet BLAST site lib/bio/appl/blast/genomenet.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 93e24935840dcdec76984313719700134d69daf2 Author: Naohisa Goto Date: Wed Jul 25 15:21:32 2012 +0900 suppress warnings: instance variable @comment not initialized lib/bio/db/gff.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit 0ad3818fedb707a26e849877bde1f8dab006b848 Author: Naohisa Goto Date: Wed Jul 25 00:54:02 2012 +0900 suppress warnings: URI.escape/URI.unescape is obsolete lib/bio/db/gff.rb | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) commit 1263938742e7eeedb4a877aff7314e304320eca9 Author: Naohisa Goto Date: Mon Jul 23 21:15:52 2012 +0900 Added link to blastall options reference * Added link to blastall options reference. Thanks to Gareth Rees who sent a pull request. (https://github.com/bioruby/bioruby/pull/49) lib/bio/appl/blast/genomenet.rb | 5 +++++ 1 file changed, 5 insertions(+) commit 2ec5f4fd5abd0db7ec79ab3a9fd4adde7c9384a8 Author: Naohisa Goto Date: Mon Jul 23 17:26:45 2012 +0900 Next bioruby release version will be 1.4.3. RELEASE_NOTES.rdoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6cf1318507a5d82bb93acdfe33e96723a2e742fc Author: Naohisa Goto Date: Mon Jul 23 17:25:35 2012 +0900 fix typo README.rdoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 2fd71cac315affe6e4d90b03dadac782f11553a5 Author: Naohisa Goto Date: Mon Jul 23 17:21:57 2012 +0900 Bug fix: Genomenet remote blast: catch up changes of the server lib/bio/appl/blast/genomenet.rb | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) commit 69d9717da11b2fe81a8f840bbafcc5fbb0dbe688 Author: Naohisa Goto Date: Fri Jul 20 11:24:37 2012 +0900 regenerate bioruby.gemspec with rake regemspec bioruby.gemspec | 2 ++ 1 file changed, 2 insertions(+) commit 9683da186579dbfa5da1bb1a32edc49cfdc026b8 Author: Naohisa Goto Date: Wed Jul 18 23:19:33 2012 +0900 Incompatible changes in Bio::KEGG::KGML are documented. * Incompatible changes in Bio::KEGG::KGML are documented. * Next BioRuby release version will be 1.4.3. RELEASE_NOTES.rdoc | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) commit 6cab377ae760d1abfda06caafe4a04ecd549e21d Author: Naohisa Goto Date: Wed Jul 18 22:56:00 2012 +0900 Incompatible changes: Bio::KEGG::KGML::Reaction#substrates, products * Incompatible changes: Bio::KEGG::KGML::Reaction#substrates and Bio::KEGG::KGML::Reaction#products are changed to return an array containing Bio::KEGG::KGML::Substrate and Bio::KEGG::KGML::Product objects, respectively. The aim of these changes are to store ID of substrates and products that were thrown away in the previous versions. lib/bio/db/kegg/kgml.rb | 48 ++++++++++++++--- test/unit/bio/db/kegg/test_kgml.rb | 104 +++++++++++++++++++++++++++++++++++- 2 files changed, 144 insertions(+), 8 deletions(-) commit 3cb1e09709d3c6b934028e28f9cafed149c9c751 Author: Naohisa Goto Date: Wed Jul 18 22:16:46 2012 +0900 Bio::KEGG::KGML#parse_* :use new attribute names * In Bio::KEGG::KGML#parse_* (private methods) new attribute method names should be used instead of deprecated old names. lib/bio/db/kegg/kgml.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) commit c5ef981db6add98dc6778cd9809aff38a7071593 Author: Naohisa Goto Date: Wed Jul 18 22:14:33 2012 +0900 modified documentation for Bio::KEGG::KGML lib/bio/db/kegg/kgml.rb | 73 +++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 31 deletions(-) commit 5416b84eaa37b5abf15f905586a5eee65c4026f0 Author: Naohisa Goto Date: Wed Jul 18 15:01:58 2012 +0900 New class Bio::KEGG::KGML::Graphics with tests for Bio::KEGG::KGML * New class Bio::KEGG::KGML::Graphics for storing a graphics element. This fixes https://github.com/bioruby/bioruby/issues/51. * Unit tests for Bio::KEGG::KGML are added with mock test data. * Improve rdoc documentation for Bio::KEGG::KGML. * New method Bio::KEGG::KGML::Reaction#id * Attribute methods that were different from the KGML attribute names are renamed to the names of the KGML attribute names. Old method names are deprecated and are changed to aliases and will be removed in the future. * Bio::KEGG::KGML::Entry#id (old name: entry_id) * Bio::KEGG::KGML::Entry#type (old name: category) * Bio::KEGG::KGML::Entry#entry1 (old name: node1) * Bio::KEGG::KGML::Entry#entry2 (old name: node2) * Bio::KEGG::KGML::Entry#type (old name: rel) * Bio::KEGG::KGML::Reaction#name (old name: entry_id) * Bio::KEGG::KGML::Reaction#type (old name: direction) * Following attribute methods are deprecated because two or more graphics elements may exist in an entry element. They will be removed in the future. * Bio::KEGG::KGML::Entry#label * Bio::KEGG::KGML::Entry#shape * Bio::KEGG::KGML::Entry#x * Bio::KEGG::KGML::Entry#y * Bio::KEGG::KGML::Entry#width * Bio::KEGG::KGML::Entry#height * Bio::KEGG::KGML::Entry#fgcolor * Bio::KEGG::KGML::Entry#bgcolor lib/bio/db/kegg/kgml.rb | 321 ++++++++++--- test/data/KEGG/test.kgml | 37 ++ test/unit/bio/db/kegg/test_kgml.rb | 922 ++++++++++++++++++++++++++++++++++++ 3 files changed, 1223 insertions(+), 57 deletions(-) create mode 100644 test/data/KEGG/test.kgml create mode 100644 test/unit/bio/db/kegg/test_kgml.rb commit e5478363ef6969ec14c4e09c2bd7c6d27c12cf5b Author: Naohisa Goto Date: Tue Jul 17 22:23:28 2012 +0900 rdoc documentation for Bio::KEGG::KGML lib/bio/db/kegg/kgml.rb | 166 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 157 insertions(+), 9 deletions(-) commit 4a97e7034cae835b3bbc8ef918b9c6c48910dec5 Author: Naohisa Goto Date: Wed Jul 11 15:16:49 2012 +0900 autoload should not be used for libraries outside bio lib/bio/db/kegg/kgml.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit 338d4cd9913d70041349c5201f80f7a65e7135a6 Author: Naohisa Goto Date: Fri Jul 6 00:50:01 2012 +0900 remove unnecessary require "bio/db" in lib/bio/db/pdb.rb lib/bio/db/pdb.rb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) commit 87c806a480fcacb0fc610c9669de19e4cb661a9c Author: Naohisa Goto Date: Fri Jul 6 00:47:20 2012 +0900 workaround to avoid circular require about Bio::PDB lib/bio/db/pdb/atom.rb | 5 +++-- lib/bio/db/pdb/chain.rb | 5 ++--- lib/bio/db/pdb/chemicalcomponent.rb | 5 +++-- lib/bio/db/pdb/model.rb | 4 ++-- lib/bio/db/pdb/pdb.rb | 3 ++- lib/bio/db/pdb/residue.rb | 4 ++-- lib/bio/db/pdb/utils.rb | 11 +++++++---- 7 files changed, 21 insertions(+), 16 deletions(-) commit 874f35c3930506fa029b419aa84677d1fea6681a Author: Naohisa Goto Date: Fri Jul 6 00:24:24 2012 +0900 regenerate bioruby.gemspec with rake regemspec bioruby.gemspec | 1 + 1 file changed, 1 insertion(+) commit 090d4edb5698135f87df450a963ef35a307349c4 Author: Naohisa Goto Date: Fri Jul 6 00:19:54 2012 +0900 Tree output (formatter) methods moved to lib/bio/tree/output.rb * To avoid circular require about bio/tree, phylogenetic tree output (formatter) methods are moved to lib/bio/tree/output.rb. lib/bio/db/newick.rb | 244 -------------------------------------------- lib/bio/tree.rb | 3 +- lib/bio/tree/output.rb | 264 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 265 insertions(+), 246 deletions(-) create mode 100644 lib/bio/tree/output.rb commit b3d12b63097a5141b029bbfb3690870cd1935a60 Author: Naohisa Goto Date: Fri Jul 6 00:18:44 2012 +0900 Workaround to avoid circular require for Bio::Blast lib/bio/appl/bl2seq/report.rb | 6 +++--- lib/bio/appl/blast/ddbj.rb | 3 --- lib/bio/appl/blast/format0.rb | 3 +++ lib/bio/appl/blast/genomenet.rb | 2 -- lib/bio/appl/blast/ncbioptions.rb | 11 ++++++++--- lib/bio/appl/blast/remote.rb | 11 ++++++----- lib/bio/appl/blast/report.rb | 16 ++++++++++------ lib/bio/appl/blast/rpsblast.rb | 5 +++-- lib/bio/appl/blast/wublast.rb | 6 +++--- 9 files changed, 36 insertions(+), 27 deletions(-) commit 8f6c906c7b0d65b93ebf0a1e1307259e6eab8465 Author: Naohisa Goto Date: Thu Jul 5 23:29:42 2012 +0900 remove old require lines that are commented out lib/bio/appl/blast/format0.rb | 5 ----- 1 file changed, 5 deletions(-) commit c632fbf2d0320860eadfacb196d51d80ed3a2b34 Author: Naohisa Goto Date: Thu Jul 5 23:16:49 2012 +0900 Remove old workaround of strscan.so for Ruby 1.7 or earlier lib/bio/appl/blast/format0.rb | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) commit c81dce87f53d3ea7c7d2335e077fa609f2737779 Author: Naohisa Goto Date: Thu Jul 5 23:03:40 2012 +0900 .travis.yml: include ruby 1.9.2 test .travis.yml | 2 ++ 1 file changed, 2 insertions(+) commit 34709d114089c722b5da796028ffb91021761fdd Author: Naohisa Goto Date: Thu Jul 5 23:00:37 2012 +0900 Remove old comment lines lib/bio/sequence/format.rb | 6 ------ 1 file changed, 6 deletions(-) commit e0d5ed61e0101e2e72ad024dccd58c8c90def2b9 Author: Naohisa Goto Date: Thu Jul 5 22:42:17 2012 +0900 Finalizer for Bio::Command::Tmpdir is changed to suppress test failure * New class Bio::Command::Tmpdir::Remover for removing temporary directory in finilizer. This class is BioRuby internal use only. Users should not use this class. * Finalizer for Bio::Command::Tmpdir is changed from a Proc object to an instance of the Remover class. * Test failure fix: In some environment, with Ruby 1.9.2, test_output_embl(Bio::FuncTestSequenceOutputEMBL) was failed with "<#" that was raised in the finalizer callback of Bio::Command::Tmpdir. This commit fixes the problem. lib/bio/command.rb | 56 ++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 26 deletions(-) commit cca98d1378ce66d6db84cc9c1beadd39ed0e0fee Author: Naohisa Goto Date: Thu Jul 5 22:21:34 2012 +0900 Workaround to avoid circular require and JRuby autoload bug * "require" lines are modified to avoid circular require. * In files that would be required directly from outside bio/sequence (aa.rb, adapter.rb, common.rb, compat.rb, dblink.rb, generic.rb, na.rb, quality_score.rb, sequence_masker.rb), because of avoiding potential mismatch of superclass and/or lack of some methods, bio/sequence.rb is required when Bio::Sequence is not defined. * workaround to avoid JRuby autoload bug lib/bio/sequence.rb | 10 ++++++---- lib/bio/sequence/aa.rb | 8 +++----- lib/bio/sequence/adapter.rb | 12 ++++++------ lib/bio/sequence/common.rb | 2 ++ lib/bio/sequence/compat.rb | 9 ++------- lib/bio/sequence/dblink.rb | 11 ++++++----- lib/bio/sequence/generic.rb | 7 +++---- lib/bio/sequence/na.rb | 10 ++++------ lib/bio/sequence/quality_score.rb | 2 ++ lib/bio/sequence/sequence_masker.rb | 3 +++ 10 files changed, 37 insertions(+), 37 deletions(-) commit d2915c33ae7f330837688195a58c1e60fe78402a Author: Naohisa Goto Date: Thu Jul 5 21:04:28 2012 +0900 workaround to avoid circular require in Bio::RestrictionEnzyme * Workaround to avoid circular require in Bio::RestrictionEnzyme * Special care was needed for Bio::RestrictionEnzyme::Analysis because its method definitions are divided into two files: analysis.rb, analysis_basic.rb. lib/bio/util/restriction_enzyme/analysis.rb | 13 ++++++++----- lib/bio/util/restriction_enzyme/analysis_basic.rb | 7 ++++--- lib/bio/util/restriction_enzyme/cut_symbol.rb | 5 +++-- lib/bio/util/restriction_enzyme/dense_int_array.rb | 3 +++ lib/bio/util/restriction_enzyme/double_stranded.rb | 7 +++---- .../restriction_enzyme/double_stranded/aligned_strands.rb | 7 +++---- .../double_stranded/cut_location_pair.rb | 7 +++---- .../cut_location_pair_in_enzyme_notation.rb | 7 +++---- .../restriction_enzyme/double_stranded/cut_locations.rb | 7 +++---- .../double_stranded/cut_locations_in_enzyme_notation.rb | 7 +++---- lib/bio/util/restriction_enzyme/range/cut_range.rb | 7 +++---- lib/bio/util/restriction_enzyme/range/cut_ranges.rb | 7 +++---- .../util/restriction_enzyme/range/horizontal_cut_range.rb | 7 +++---- lib/bio/util/restriction_enzyme/range/sequence_range.rb | 7 +++---- .../range/sequence_range/calculated_cuts.rb | 7 +++---- .../restriction_enzyme/range/sequence_range/fragment.rb | 7 +++---- .../restriction_enzyme/range/sequence_range/fragments.rb | 7 +++---- .../util/restriction_enzyme/range/vertical_cut_range.rb | 7 +++---- lib/bio/util/restriction_enzyme/single_strand.rb | 6 +++--- .../single_strand/cut_locations_in_enzyme_notation.rb | 7 +++---- .../util/restriction_enzyme/single_strand_complement.rb | 7 +++---- lib/bio/util/restriction_enzyme/sorted_num_array.rb | 3 +++ lib/bio/util/restriction_enzyme/string_formatting.rb | 7 +++---- 23 files changed, 75 insertions(+), 81 deletions(-) commit 7df4843288ffde6d7132a5651fe978301f8ebd2b Author: Naohisa Goto Date: Thu Jul 5 20:18:08 2012 +0900 workaround to avoid JRuby autoload bug lib/bio/util/restriction_enzyme.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit 97d95f2b400006d4229a7ce69d7d8a5cdce42764 Author: Naohisa Goto Date: Wed Jul 4 22:00:27 2012 +0900 changed require to autoload for the workaround of JRuby autoload bug lib/bio/feature.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) commit 530b82a45731c2a71a110826341be425de1271e0 Author: Naohisa Goto Date: Wed Jul 4 22:00:06 2012 +0900 workaround to avoid JRuby autoload bug lib/bio/sequence/common.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit 8614f31b36fb93d6e49d109268d646ff3032cd1a Author: Naohisa Goto Date: Wed Jul 4 21:28:52 2012 +0900 workaround to avoid JRuby autoload bug * Workaround to avoid JRuby autoload bug. * Changed to require bio/db.rb because it is always loaded. lib/bio/db/kegg/genes.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit ea500006ed56857139c858bdfeb98773e5ca541e Author: Naohisa Goto Date: Thu Jun 28 21:36:35 2012 +0900 Rakefile: use own mktmpdir Rakefile | 59 +++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 16 deletions(-) commit 452fadcab61083dcb9d01ee05d300eae5cb23fee Author: Naohisa Goto Date: Thu Jun 28 20:37:59 2012 +0900 .travis.yml: remove "rake regemspec" from after_install .travis.yml | 2 -- 1 file changed, 2 deletions(-) commit 3fad822af3d7e558a58b71fd8ec2a7061b49f9f2 Author: Naohisa Goto Date: Thu Jun 28 20:36:59 2012 +0900 regenerate bioruby.gemspec with rake regemspec bioruby.gemspec | 2 ++ 1 file changed, 2 insertions(+) commit ea6e96fc654c797664b118a6326a84e4f9b1a8a3 Author: Naohisa Goto Date: Thu Jun 28 20:35:49 2012 +0900 print message when doing Dir.chdir Rakefile | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) commit c2fcd5e8cc71da38dc3c6d1f8c8d0233e47398b3 Author: Naohisa Goto Date: Thu Jun 28 20:28:41 2012 +0900 In tar-install, removed dependency to regemspec Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 67a7e83d516aab5d60f8263525b359be8b0ffc0b Author: Naohisa Goto Date: Thu Jun 28 20:23:24 2012 +0900 Rakefile: give up using Dir.mktmpdir because of JRuby's behavior * Rakefile: give up using Dir.mktmpdir because of JRuby's behavior that may be related with http://jira.codehaus.org/browse/JRUBY-5678 Rakefile | 61 ++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 15 deletions(-) commit cff098034a338bbe9579d6c7b4380c7132a38ef5 Author: Naohisa Goto Date: Thu Jun 28 19:23:57 2012 +0900 gem-integration-test, gem-install and gem-install-nodoc are removed * gem-integration-test, gem-install and gem-install-nodoc are removed because they are useless with Bundler Rakefile | 13 ------------- 1 file changed, 13 deletions(-) commit d5c054265af4f80318cbfa5a5bbdee6125219de2 Author: Naohisa Goto Date: Thu Jun 28 18:10:05 2012 +0900 .travis.yml: .gemspec is needed to install local gem .travis.yml | 1 + gemfiles/prepare-gemspec.rb | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 gemfiles/prepare-gemspec.rb commit 05b6172123f42a1d8d46668d8a3d5f698c371704 Author: Naohisa Goto Date: Thu Jun 28 17:51:43 2012 +0900 remove 1.9.2; add tar/gem integration tests * Remove ruby version 1.9.2 from matrix for reducing builds * Add tar/gem integration tests * Add a new helper script gemfiles/modify-Gemfile.rb, modifying gemfile when running gem integration test. * Remove jruby version comments .travis.yml | 26 +++++++++++++++++--------- gemfiles/modify-Gemfile.rb | 28 ++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 gemfiles/modify-Gemfile.rb commit 6813f91893e7ddc3000047357c9ed2dafb32a722 Author: Naohisa Goto Date: Thu Jun 28 17:06:28 2012 +0900 descriptions are modified for danger operations Rakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit a209688952c922d9ba45c227874990bccd3da7c0 Author: Naohisa Goto Date: Mon Jun 25 23:25:51 2012 +0900 regenerate bioruby.gemspec with rake regemspec bioruby.gemspec | 5 +++++ 1 file changed, 5 insertions(+) commit 8f6459497be0e9ca7dc3eb2eb9606e42d97ad60c Author: Naohisa Goto Date: Mon Jun 25 21:01:06 2012 +0900 rake tasks added and default task is changed * New tasks: * gem-install: build gem and install it * gem-install-nodoc: build gem and install it with --no-ri --no-rdoc. * gem-test: test installed bioruby gem installed with gem-install (or gem-install-nodoc) * gem-integration-test: build gem, install and run test (with --no-ri --no-rdoc) * tar-install: DANGER: build tar and install by using setup.rb * installed-test: test installed bioruby * tar-integration-test: DANGER: build tar, install and run test * see-env: see BIORUBY_RAKE_DEFAULT_TASK environment variable and invoke the specified task. If the variable did not exist, it invokes "test" which is previously the default task. It is added for selecting task on Travis-ci. It is not recommended to invoke the task explicitly by hand. * Default task is changed from "test" to "see-env". Rakefile | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-) commit 3b400042cd361e1ab6d0fb0d8c8cce14a6c2ae10 Author: Naohisa Goto Date: Mon Jun 25 20:58:13 2012 +0900 BIORUBY_TEST_LIB is always added on the top of $LOAD_PATH * When BIORUBY_TEST_LIB is specified, the specified directory name is always added on the top of $LOAD_PATH even if it is already included in the middle of $LOAD_PATH. test/bioruby_test_helper.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 848304b6f90310f8fa15c80ba06655ae5cae5053 Author: Naohisa Goto Date: Mon Jun 25 20:42:07 2012 +0900 New env BIORUBY_TEST_GEM and BIORUBY_TEST_LIB behavior changed * New environment variable BIORUBY_TEST_GEM for testing installed bio-X.X.X gem. Version number can be specified. Example with version number: % env BIORUBY_TEST_GEM=1.4.2.5000 ruby test/runner.rb Example without version number: % env BIORUBY_TEST_GEM="" ruby test/runner.rb * When BIORUBY_TEST_LIB is empty, it no longer add an empty string to $LOAD_PATH. Moreover, when BIORUBY_TEST_GEM is set, the variable is ignored. test/bioruby_test_helper.rb | 49 ++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) commit 9453a6773c24f866698370195fd8e767443a38b9 Author: Tomoaki NISHIYAMA Date: Fri Jun 1 18:06:40 2012 +0900 broader FASTQ file recognition * Because PacBio RS sequencer may produce kilobases long reads and read buffer size (default 31 lines) for file format detection may not be sufficient to find the second id line starting with "+", the regular expression for FASTQ is truncated only to check the first id line starting with "@". * Test code is added. lib/bio/io/flatfile/autodetection.rb | 2 +- test/unit/bio/io/flatfile/test_autodetection.rb | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) commit 120e780c023cba06b83899c2f8a17c8fc1de4faa Author: Naohisa Goto Date: Fri Jun 8 15:36:29 2012 +0900 Retry sequence randomize test up to 10 times when fails * To suppress rare failure of chi-square equiprobability tests for Bio::Sequence::Common#randomize, test code changed to retry up to 10 times if the chi-square test fails. The assertion fails if the chi-square test fails 10 consecutive times, and this strongly suggests bugs in codes or in the random number generator. * The chi-square equiprobability tests are separated into a new test class. test/unit/bio/sequence/test_common.rb | 40 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) commit 20dde52f7da784d4d9ac551957700cd96e842ef6 Author: Naohisa Goto Date: Sat May 19 18:14:19 2012 +0900 libxml-ruby is disabled because of build error on Travis-ci gemfiles/Gemfile.travis-jruby1.8 | 3 ++- gemfiles/Gemfile.travis-jruby1.9 | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) commit 3c5c1cc277d30737815c7e44a2abbb308f5324b0 Author: Clayton Wheeler Date: Mon May 14 21:48:41 2012 -0400 Use libxml-ruby instead of libxml-jruby to fix JRuby test failures. The travis-ci Gemfiles currently call for libxml-jruby; this appears not to support the same API as libxml-ruby, resulting in several tests in test/unit/bio/db/test_phyloxml.rb failing with "NameError: uninitialized constant LibXMLJRuby::XML::Parser::Options". Switching to the C libxml-ruby library allows these tests to pass under JRuby in 1.8 mode. JRuby in 1.9 mode still fails a few PhyloXML tests due to https://jira.codehaus.org/browse/JRUBY-6662. gemfiles/Gemfile.travis-jruby1.8 | 2 +- gemfiles/Gemfile.travis-jruby1.9 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 01a618242d67f0d00fe681dfd85e68bb393513fc Author: Clayton Wheeler Date: Thu May 10 23:13:56 2012 -0400 test_tree.rb: to use %f instead of %g to prevent odd behavior. test/unit/bio/test_tree.rb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) commit 5e80e4394bf2a5e4ee472fe84ab76239b293e1b5 Author: Clayton Wheeler Date: Thu May 10 23:04:55 2012 -0400 Fixed spurious JRuby failures in test_tree.rb due to floating point differences. test/unit/bio/test_tree.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) commit 459d4da894e9a9b9db0d793e3711dc45bae2089b Author: Artem Tarasov Date: Thu May 10 16:23:13 2012 +0400 Test bug fix: order of hash keys are not guaranteed * Test bug fix: Bio::TestSOFT#test_dataset: order of hash keys are not guaranteed. test/unit/bio/db/test_soft.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 7e730691d6ec597a610dc0d4665db3598fcfde59 Author: Naohisa Goto Date: Thu May 10 00:06:19 2012 +0900 removed potential circular require about Bio::Sequence::Format lib/bio/db/embl/format_embl.rb | 4 ---- lib/bio/db/fasta/format_fasta.rb | 4 ---- lib/bio/db/fasta/format_qual.rb | 5 ----- lib/bio/db/fastq/format_fastq.rb | 1 - lib/bio/db/genbank/format_genbank.rb | 4 ---- lib/bio/sequence/format_raw.rb | 4 ---- 6 files changed, 22 deletions(-) commit f1c398fdc3488bd18bd13ac864920ce6db4dab9e Author: Naohisa Goto Date: Wed May 9 15:54:20 2012 +0900 .travis.yml: comment out apt-get lines * .travis.yml: comment out apt-get lines because libxml2-dev and libexpat1-dev are already installed. .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit bc5ef4959e51f4a199d9f740b07812e9b8216255 Author: Naohisa Goto Date: Wed May 9 15:47:11 2012 +0900 travis-ci: comment out soap4r-ruby1.9 in Gemfile because of error * travis-ci: soap4r-ruby1.9 gem in Gemfile.travis-ruby1.9 and Gemfile.travis-jruby1.9 is commented out because of an error "uninitialized constant XML::SaxParser". gemfiles/Gemfile.travis-jruby1.9 | 4 +++- gemfiles/Gemfile.travis-ruby1.9 | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) commit 7e8153c09660c31d6286c1924680b8c5073a10b6 Author: Naohisa Goto Date: Tue May 1 18:11:09 2012 +0900 config files for Travis CI continuous integration service .travis.yml | 73 ++++++++++++++++++++++++++++++++++++++ gemfiles/Gemfile.travis-jruby1.8 | 6 ++++ gemfiles/Gemfile.travis-jruby1.9 | 7 ++++ gemfiles/Gemfile.travis-ruby1.8 | 7 ++++ gemfiles/Gemfile.travis-ruby1.9 | 8 +++++ 5 files changed, 101 insertions(+) create mode 100644 .travis.yml create mode 100644 gemfiles/Gemfile.travis-jruby1.8 create mode 100644 gemfiles/Gemfile.travis-jruby1.9 create mode 100644 gemfiles/Gemfile.travis-ruby1.8 create mode 100644 gemfiles/Gemfile.travis-ruby1.9 commit f1ecae7763648cb735a885ddb6c46d71c59b0694 Author: Naohisa Goto Date: Fri Mar 23 01:36:59 2012 +0900 Test bug fix: tests affected by the bug of Bio::NucleicAcid.to_re("s") test/unit/bio/data/test_na.rb | 2 +- test/unit/bio/sequence/test_na.rb | 2 +- test/unit/bio/test_sequence.rb | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) commit 3fd9384b1b59140a929c81dcc4b07cb3c2e47525 Author: Trevor Wennblom Date: Sat Feb 25 15:26:27 2012 -0600 Bug fix: Bio::NucleicAcid.to_re("s") typo lib/bio/data/na.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c552aa3a6773139b14ae95e79e0fb43a2f91c6fb Author: Naohisa Goto Date: Thu Jan 12 22:24:37 2012 +0900 Bug fix: GenomeNet BLAST server URI changed. * Bug fix: GenomeNet BLAST server URI changed. Reported by joaocardoso via GitHub. ( https://github.com/bioruby/bioruby/issues/44 ) lib/bio/appl/blast/genomenet.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit f33abf9bbd90c3c1e320f06447fdb54ffd094c5d Author: peterjc Date: Fri Nov 25 11:20:08 2011 +0000 Mark echoarg2.bat and echoarg2.sh as world executable 0 files changed mode change 100644 => 100755 test/data/command/echoarg2.bat mode change 100644 => 100755 test/data/command/echoarg2.sh commit d2d66f833d0b20647e8d761d2a240b99b206eaa8 Author: Naohisa Goto Date: Thu Nov 24 13:32:37 2011 +0900 Bug fix: rake aborted without git bioruby.gemspec.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c2139739988ef731d61bf1a8cdba2dc5c48393bd Author: Naohisa Goto Date: Thu Nov 24 13:07:10 2011 +0900 regenerate bioruby.gemspec with rake regemspec. bioruby.gemspec | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) commit 6213b45d28bfea2cc8c838813b524d48c369266b Author: Naohisa Goto Date: Thu Nov 24 13:05:07 2011 +0900 Added workaround for changes of a module name and file names to require. Rakefile | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) commit 39f847cf8d453476275361078b831da43d400816 Author: Naohisa Goto Date: Thu Nov 24 12:08:47 2011 +0900 Use binary mode to open files. Rakefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit 688779e71a27e861fb01e07f816384561b8cfe45 Author: Naohisa Goto Date: Thu Nov 24 11:49:30 2011 +0900 Rakefile: new tasks: test-all to run all tests, etc. * Rakefile: new tasks: test-all to run all tests, and test-network to run tests in test/network. Rakefile | 10 ++++++++++ 1 file changed, 10 insertions(+) commit 53719535defcb0fefb3cf8bebe3fad6716bf7de2 Author: Naohisa Goto Date: Thu Nov 24 11:28:38 2011 +0900 test/runner.rb: Run tests only in test/unit and test/functional. test/runner.rb | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) commit fb9ee403db6b447aee73ebb7f12ff5a5b73d6c52 Author: Naohisa Goto Date: Wed Nov 23 20:36:36 2011 +0900 A test class using network connection is moved under test/network/. test/functional/bio/test_command.rb | 16 ---------------- test/network/bio/test_command.rb | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 16 deletions(-) create mode 100644 test/network/bio/test_command.rb commit a6dda2215aa686a9ca4af7484aa190f726d51e69 Author: Naohisa Goto Date: Wed Nov 23 20:28:58 2011 +0900 Tests using network connections are moved to test/network/ * Tests using network connections are moved to test/network/. * renamed: test/functional/bio/appl -> test/network/bio/appl * renamed: test/functional/bio/io -> test/network/bio/io test/functional/bio/appl/blast/test_remote.rb | 93 --------- test/functional/bio/appl/test_blast.rb | 61 ------ test/functional/bio/appl/test_pts1.rb | 117 ----------- test/functional/bio/io/test_ddbjrest.rb | 47 ----- test/functional/bio/io/test_ensembl.rb | 230 --------------------- test/functional/bio/io/test_pubmed.rb | 135 ------------- test/functional/bio/io/test_soapwsdl.rb | 53 ----- test/functional/bio/io/test_togows.rb | 268 ------------------------- test/network/bio/appl/blast/test_remote.rb | 93 +++++++++ test/network/bio/appl/test_blast.rb | 61 ++++++ test/network/bio/appl/test_pts1.rb | 117 +++++++++++ test/network/bio/io/test_ddbjrest.rb | 47 +++++ test/network/bio/io/test_ensembl.rb | 230 +++++++++++++++++++++ test/network/bio/io/test_pubmed.rb | 135 +++++++++++++ test/network/bio/io/test_soapwsdl.rb | 53 +++++ test/network/bio/io/test_togows.rb | 268 +++++++++++++++++++++++++ 16 files changed, 1004 insertions(+), 1004 deletions(-) delete mode 100644 test/functional/bio/appl/blast/test_remote.rb delete mode 100644 test/functional/bio/appl/test_blast.rb delete mode 100644 test/functional/bio/appl/test_pts1.rb delete mode 100644 test/functional/bio/io/test_ddbjrest.rb delete mode 100644 test/functional/bio/io/test_ensembl.rb delete mode 100644 test/functional/bio/io/test_pubmed.rb delete mode 100644 test/functional/bio/io/test_soapwsdl.rb delete mode 100644 test/functional/bio/io/test_togows.rb create mode 100644 test/network/bio/appl/blast/test_remote.rb create mode 100644 test/network/bio/appl/test_blast.rb create mode 100644 test/network/bio/appl/test_pts1.rb create mode 100644 test/network/bio/io/test_ddbjrest.rb create mode 100644 test/network/bio/io/test_ensembl.rb create mode 100644 test/network/bio/io/test_pubmed.rb create mode 100644 test/network/bio/io/test_soapwsdl.rb create mode 100644 test/network/bio/io/test_togows.rb commit ec747aa33d06e08a6469dfd330360161d1b0f8e2 Author: Naohisa Goto Date: Wed Nov 23 15:03:08 2011 +0900 Test bug fix: use binmode to disable CR/LF conversion (fail on Windows) test/unit/bio/appl/blast/test_rpsblast.rb | 1 + test/unit/bio/io/flatfile/test_buffer.rb | 1 + 2 files changed, 2 insertions(+) commit 07ce32da009baa2c4e81f6d96f45e3dac49da183 Author: Naohisa Goto Date: Wed Nov 23 14:47:33 2011 +0900 Test bug fix: Read Sanger chromatogram files with binary mode * Test bug fix: Read Sanger chromatogram files with binary mode. Fix error/failure on Windows due to default text mode reading. test/unit/bio/db/sanger_chromatogram/test_abif.rb | 3 ++- test/unit/bio/db/sanger_chromatogram/test_scf.rb | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) commit 20d9068643214e3482d18c36028e50b3c9109755 Author: Naohisa Goto Date: Wed Nov 23 14:17:25 2011 +0900 Incompatible change: Bio::FlatFile.open and auto use binary mode * Incompatible change: Bio::FlatFile.open and auto use binary mode (binmode) unless text mode option is explicitly given. RELEASE_NOTES.rdoc | 7 ++ lib/bio/io/flatfile/buffer.rb | 84 ++++++++++++++++++ test/unit/bio/io/flatfile/test_buffer.rb | 139 ++++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+) commit 48bd150a6180d59879872bd85dd95c7ddf1a19c0 Author: Naohisa Goto Date: Tue Nov 22 17:32:23 2011 +0900 Test bug fix: fixed incomplete Windows platform detection. test/unit/bio/test_command.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) commit d499bcee7956b1a0a4c04aeb106e50a0839167b0 Author: Naohisa Goto Date: Tue Nov 22 16:15:05 2011 +0900 FuncTestCommandCall is changed to test various command-lines. * New file test/data/command/echoarg2.sh shell script, which acts like echoarg2.bat for Windows. * FuncTestCommandCall is changed to test various command-lines. test/data/command/echoarg2.sh | 4 ++ test/functional/bio/test_command.rb | 70 +++++++++++++++++++++++++++++------ 2 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 test/data/command/echoarg2.sh commit d45e311c09ad2f4116770dd903f81e652a63ca2a Author: Naohisa Goto Date: Tue Nov 22 14:21:34 2011 +0900 Test bug fix: Opened files should be closed. * Test bug fix: Opened files should be closed. When finalizing writer tests, temporary files are not properly closed after verify reading, and removing the temporary files raise erro on Windows. test/unit/bio/db/test_phyloxml_writer.rb | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) commit a9022c61b98746e98a83f1cfd902e0e6b11c7bbb Author: Naohisa Goto Date: Tue Nov 22 13:55:15 2011 +0900 New method Bio::PhyloXML::Parser#closed?, and Bio::PhyloXML::Parser.open with block. * New method Bio::PhyloXML::Parser#closed? to check if it is closed or not. * Bio::PhyloXML::Parser.open and open_uri now can get a block. When a block is given, a Bio::PhyloXML::Parser object is passed to the block as an argument. When the block terminates, the object is closed. * Added tests about the above changes. lib/bio/db/phyloxml/phyloxml_parser.rb | 57 +++++++++++++++++++++++++++++--- test/unit/bio/db/test_phyloxml.rb | 56 +++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 7 deletions(-) commit 893cbe6ca993eca08427074059c2ba03621ea889 Author: Naohisa Goto Date: Sat Nov 5 00:49:10 2011 +0900 Ruby 1.9 should be fully supported, and optional requirements are revised. README.rdoc | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) commit 38b1715c2d6bad39560e0846781ca903b1c16eda Author: Naohisa Goto Date: Fri Nov 4 22:12:38 2011 +0900 Added REFERENCE. README.rdoc | 12 ++++++++++++ 1 file changed, 12 insertions(+) commit 9a766cd17236bbe1e28d6972001dd5e3ed596123 Author: Naohisa Goto Date: Fri Nov 4 21:39:20 2011 +0900 Removed "setup.rb test" and added about running tests. README.rdoc | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) commit 39737179b06366e1d5acf2e5ac930e41b3a4ee38 Author: Pjotr Prins Date: Fri Oct 14 08:58:01 2011 +0200 Tutorial: added info on biogems doc/Tutorial.rd | 16 ++++++++++++++++ doc/Tutorial.rd.html | 23 +++++++++++++++-------- 2 files changed, 31 insertions(+), 8 deletions(-) commit e84400c5e9e94d95d6a8d3c4b72388b94d204766 Author: Pjotr Prins Date: Fri Oct 14 08:49:41 2011 +0200 Tutorial: small updates doc/Tutorial.rd | 8 +++++--- doc/Tutorial.rd.html | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) commit 9fe07345b3b7be890d5baad9a51f0752af5e0ac4 Author: Naohisa Goto Date: Tue Sep 13 23:05:39 2011 +0900 README_DEV.rdoc: added git tips and policies, etc. * Added Git tips about sending a patch or a pull request. * Added Git management policies for the blessed repository. * Added some coding styles. * Added descriptions about Ruby versions and OS. README_DEV.rdoc | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 2 deletions(-) commit 3c952c4a782501b21f36ece5bcab672dab12fc6d Author: Naohisa Goto Date: Tue Sep 13 13:21:20 2011 +0900 README.rdoc: for release notes and changelog, about sample files. README.rdoc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) commit fba9a6c0f1f79dd567ca54ba085b6258ac8efb31 Author: Naohisa Goto Date: Tue Sep 13 13:20:05 2011 +0900 RELEASE_NOTES.rdoc: mentioned about removal of rdoc.zsh. RELEASE_NOTES.rdoc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 685b6bb7b98083e1b50e73baf4e7fa71bc9a39fa Author: Naohisa Goto Date: Mon Sep 12 21:23:34 2011 +0900 bioruby.gemspec.erb: LEGAL is added to rdoc files * bioruby.gemspec.erb: LEGAL is added to rdoc files. * bioruby.gemspec is updated by "rake regemspec". bioruby.gemspec | 9 ++++++--- bioruby.gemspec.erb | 6 +++++- 2 files changed, 11 insertions(+), 4 deletions(-) commit 414a6331f40fc99f554042e9a031689ea6d76da4 Author: Naohisa Goto Date: Mon Sep 12 20:54:06 2011 +0900 deleted rdoc.zsh which is obsolete and unused * Deleted rdoc.zsh which is obsolete and unused. To generate rdoc html, "rake rdoc" or "rake rerdoc". See "rake -T" for more information. rdoc.zsh | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 rdoc.zsh commit 272d9106cec43b0f219edd92a6f7bd3f9875a761 Author: Naohisa Goto Date: Mon Sep 12 20:35:47 2011 +0900 Added new ChangeLog, showing changes after 1.4.2 release. * Added new ChangeLog, showing changes after 1.4.2 release. For the changes before 1.4.2, see doc/ChangeLog-before-1.4.2. For the changes before 1.3.1, see doc/ChangeLog-before-1.3.1. ChangeLog | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 ChangeLog commit 941493378f9884978c81d5f63ee4ed5c175d4bea Author: Naohisa Goto Date: Mon Sep 12 20:28:28 2011 +0900 Rakefile: add new task :rechangelog to update ChangeLog using git log. * Rakefile: add new task :rechangelog to update ChangeLog using git log. Note that the tag name (currently 1.4.2) is hardcoded in Rakefile. Rakefile | 9 +++++++++ 1 file changed, 9 insertions(+) commit 1c89e6546223c3c05ea79b8ade4b493580851efa Author: Naohisa Goto Date: Mon Sep 12 20:24:49 2011 +0900 renamed ChangeLog to doc/ChangeLog-before-1.4.2 ChangeLog | 5013 -------------------------------------------- doc/ChangeLog-before-1.4.2 | 5013 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 5013 insertions(+), 5013 deletions(-) delete mode 100644 ChangeLog create mode 100644 doc/ChangeLog-before-1.4.2 commit 2233fbada55034bd16fb5b9c642292b4b6ccca83 Author: Naohisa Goto Date: Mon Sep 12 20:22:49 2011 +0900 ChangeLog updated: add log about 1.4.2 release ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) commit 1c02ab0488e4097a2cf5c16180c3179c78e3d572 Author: Naohisa Goto Date: Mon Sep 12 19:40:54 2011 +0900 New RELEASE_NOTES.rdoc for next release version. RELEASE_NOTES.rdoc | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 RELEASE_NOTES.rdoc commit 4e63e69e98c0c440ec476ef3407fcc8fd2411056 Author: Naohisa Goto Date: Mon Sep 12 19:32:48 2011 +0900 renamed RELEASE_NOTES.rdoc to doc/RELEASE_NOTES-1.4.2.rdoc RELEASE_NOTES.rdoc | 132 ------------------------------------------ doc/RELEASE_NOTES-1.4.2.rdoc | 132 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 132 deletions(-) delete mode 100644 RELEASE_NOTES.rdoc create mode 100644 doc/RELEASE_NOTES-1.4.2.rdoc commit 9c5c8cafc3ec372ef80aa20d01d13034f94d5af2 Author: Naohisa Goto Date: Fri Sep 2 12:02:41 2011 +0900 Bio::BIORUBY_EXTRA_VERSION set to ".5000" (unstable version). lib/bio/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) bio-1.4.3.0001/LGPL0000644000004100000410000006347612200110570013364 0ustar www-datawww-data GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! bio-1.4.3.0001/README_DEV.rdoc0000644000004100000410000003030312200110570015166 0ustar www-datawww-data= README.DEV Copyright:: Copyright (C) 2005, 2006 Toshiaki Katayama Copyright:: Copyright (C) 2006, 2008 Jan Aerts Copyright:: Copyright (C) 2011 Naohisa Goto = HOW TO CONTRIBUTE TO THE BIORUBY PROJECT? There are many possible ways to contribute to the BioRuby project, such as: * Join the discussion on the BioRuby mailing list * Send a bug report or write a bug fix patch * Add and correct documentation * Develop code for new features, etc. All of these are welcome! This document mainly focuses on the last option, how to contribute your code to the BioRuby distribution. This may also be helpful when you send large patches for existing codes. We would like to include your contribution as long as the scope of your module meets the field of bioinformatics. == Git Bioruby is now under git source control at http://github.com/bioruby/bioruby. There are two basic ways to contribute: with patches or pull requests. Both are explained on the bioruby wiki at http://bioruby.open-bio.org/wiki. === Preparation before sending patches or pull requests Before sending patches or pull requests, rewriting history and reordering or selecting patches are recommended. See "Creating the perfect patch series" in the Git User's Manual. http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series === Sending your contribution ==== With patches You can send patches with git-format-patch. For a smaller change, unified diff (diff -u) without using git can also be accepted. ==== With pull requests We are happy if your commits can be pulled with fast-forward. For the purpose, using git-rebase before sending pull request is recommended. See "Keeping a patch series up to date using git rebase" in the Git User's Manual. http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#using-git-rebase === Notes for the treatment of contributions in the blessed repository ==== Merging policy We do not always merge your commits as is. We may edit, rewrite, reorder, select, and/or mix your commits before and/or after merging to the blessed repository. ==== Git commit management policy We want to keep the commit history linear as far as possible, because it is easy to find problems and regressions in commits. See "Why bisecting merge commits can be harder than bisecting linear history" in the Git User's Manual. http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#bisect-merges Note that the above policy is only for the main 'blessed' repository, and it does not aim to restrict each user's fork. = LICENSE If you would like your module to be included in the BioRuby distribution, you need to give us right to change the license of your module to make it compatible with other modules in BioRuby. BioRuby was previously distributed under the LGPL license, but now is distributed under the same terms as Ruby. = CODING STYLE You will need to follow the typical coding styles of the BioRuby modules: == Use the following naming conventions * CamelCase for module and class names * '_'-separated_lowercase for method names * '_'-separated_lowercase for variable names * all UPPERCASE for constants == Indentation must not include tabs * Use 2 spaces for indentation. * Don't replace spaces to tabs. == Parenthesis in the method definition line should be written * Good: def example(str, ary) * Discouraged: def example str, ary == Comments Don't use =begin and =end blocks for comments. If you need to add comments, include it in the RDoc documentation. == Documentation should be written in the RDoc format in the source code The RDoc format is becoming the popular standard for Ruby documentation. We are now in transition from the previously used RD format to the RDoc format in API documentation. Additional tutorial documentation and working examples are encouraged with your contribution. You may use the header part of the file for this purpose as demonstrated in the previous section. == Standard documentation === of files Each file should start with a header, which covers the following topics: * copyright * license * description of the file (_not_ the classes; see below) * any references, if appropriate The header should be formatted as follows: # # = bio/db/hoge.rb - Hoge database parser classes # # Copyright:: Copyright (C) 2001, 2003-2005 Bio R. Hacker , # Copyright:: Copyright (C) 2006 Chem R. Hacker # # License:: The Ruby License # # == Description # # This file contains classes that implement an interface to the Hoge database. # # == References # # * Hoge F. et al., The Hoge database, Nucleic. Acid. Res. 123:100--123 (2030) # * http://hoge.db/ # require 'foo' module Bio autoload :Bar, 'bio/bar' class Hoge : end # Hoge end # Bio === of classes and methods within those files Classes and methods should be documented in a standardized format, as in the following example (from lib/bio/sequence.rb): # == Description # # Bio::Sequence objects represent annotated sequences in bioruby. # A Bio::Sequence object is a wrapper around the actual sequence, # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object. # For most users, this encapsulation will be completely transparent. # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA # objects using the same arguments and returning the same values (even though # these methods are not documented specifically for Bio::Sequence). # # == Usage # # require 'bio' # # # Create a nucleic or amino acid sequence # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA') # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa') # aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU') # # # Print in FASTA format # puts dna.output(:fasta) # # # Print all codons # dna.window_search(3,3) do |codon| # puts codon # end # class Sequence # Create a new Bio::Sequence object # # s = Bio::Sequence.new('atgc') # puts s # => 'atgc' # # Note that this method does not intialize the contained sequence # as any kind of bioruby object, only as a simple string # # puts s.seq.class # => String # # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto # for methods to transform the basic String of a just created # Bio::Sequence object to a proper bioruby object # --- # *Arguments*: # * (required) _str_: String or Bio::Sequence::NA/AA object # *Returns*:: Bio::Sequence object def initialize(str) @seq = str end # The sequence identifier. For example, for a sequence # of Genbank origin, this is the accession number. attr_accessor :entry_id # An Array of Bio::Feature objects attr_accessor :features end # Sequence Preceding the class definition (class Sequence), there is at least a description and a usage example. Please use the +Description+ and +Usage+ headings. If appropriate, refer to other classes that interact with or are related to the class. The code in the usage example should, if possible, be in a format that a user can copy-and-paste into a new script to run. It should illustrate the most important uses of the class. If possible and if it would not clutter up the example too much, try to provide any input data directly into the usage example, instead of refering to ARGV or ARGF for input. dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA') Otherwise, describe the input shortly, for example: # input should be string consisting of nucleotides dna = Bio::Sequence.auto(ARGF.read) Methods should be preceded by a comment that describes what the method does, including any relevant usage examples. (In contrast to the documentation for the class itself, headings are not required.) In addition, any arguments should be listed, as well as the type of thing that is returned by the method. The format of this information is as follows: # --- # *Arguments*: # * (required) _str_: String or Bio::Sequence::NA # * (optional) _nr_: a number that means something # *Returns*:: true or false Attribute accessors can be preceded by a short description. # P-value (Float) attr_reader :pvalue For writing rdoc documentation, putting two or more attributes in a line (such as attr_reader :evalue, :pvalue) is strongly discouraged. Methods looks like attributes can also be preceded by a short description. # Scientific name (String) def scientific_name #... end # Scientific name (String) def scientific_name=(str) #... end == Exception handling Don't use $stderr.puts "WARNING" in your code. Instead, try to avoid printing error messages. For fatal errors, use +raise+ with an appropriate message. Kernel#warn can only be used to notice incompatible changes to programmers. Typically it may be used for deprecated or obsolete usage of a method. For example, warn "The Foo#bar method is obsoleted. Use Foo#baz instead." == Testing code should use 'test/unit' Unit tests should come with your modules by which you can assure what you meant to do with each method. The test code is useful to make maintenance easy and ensure stability. The use of if __FILE__ == $0 is deprecated. == Using autoload To quicken the initial load time we have replaced most of 'require' to 'autoload' since BioRuby version 0.7. During this change, we have found some tips: You should not separate the same namespace into several files. * For example, if you have separated definitions of the Bio::Foo class into two files (e.g. 'bio/foo.rb' and 'bio/bar.rb'), you need to resolve the dependencies (including the load order) yourself. * If you have a defined Bio::Foo in 'bio/foo.rb' and a defined Bio::Foo::Bar in 'bio/foo/bar.rb' add the following line in the 'bio/foo.rb' file: autoload :Bar, 'bio/foo/bar' You should not put several top level namespaces in one file. * For example, if you have Bio::A, Bio::B and Bio::C in the file 'bio/foo.rb', you need autoload :A, 'bio/foo' autoload :B, 'bio/foo' autoload :C, 'bio/foo' to load the module automatically (instead of require 'bio/foo'). In this case, you should put them under the new namespace like Bio::Foo::A, Bio::Foo::B and Bio::Foo::C in the file 'bio/foo', then use autoload :Foo, 'bio/foo' so autoload can be written in 1 line. = NAMESPACE Your module should be located under the top-level module Bio and put under the 'bioruby/lib/bio' directory. The class/module names and the file names should be short and descriptive. There are already several sub directories in 'bioruby/lib': bio/*.rb -- general and widely used basic classes bio/appl/ -- wrapper and parser for the external applications bio/data/ -- basic biological data bio/db/ -- flatfile database entry parsers bio/io/ -- I/O interfaces for files, RDB, web services etc. bio/util/ -- utilities and algorithms for bioinformatics If your module doesn't match any of the above, please propose an appropriate directory name when you contribute. Please let the staff discuss on namespaces (class names), API (method names) before commiting a new module or making changes on existing modules. = MAINTENANCE Finally, please maintain the code you've contributed. Please let us know (on the bioruby list) before you commit, so that users can discuss on the change. = RUBY VERSION and IMPLEMENTATION We are mainly using Ruby MRI (Matz' Ruby Implementation, or Matz' Ruby Interpreter). Please confirm that your code is running on current stable release versions of Ruby MRI. We are very happy if your code can run on both Ruby 1.8.x and 1.9.x. Note that Ruby 1.9.0 should be ignored because it was discontinued. Ruby 1.8.5 or earlier versions can also be ignored. See README.rdoc and RELEASE_NOTES.rdoc for recommended Ruby versions. It is welcome to support JRuby, Rubinius, etc, in addition to Ruby MRI. Of course, it is strongly encouraged to write code that is not affected by differences between Ruby versions and/or implementations, as far as possible. = OS and ARCHITECTURE We hope BioRuby can be run on both UNIX (and UNIX-like OS) and Microsoft Windows. bio-1.4.3.0001/bioruby.gemspec.erb0000644000004100000410000001015612200110570016455 0ustar www-datawww-dataGem::Specification.new do |s| s.name = 'bio' s.version = "<% ###### Below is executed in ERB environment ###### # Version can be specified by the environment variable env_ver = ENV['BIORUBY_GEM_VERSION'] env_ver = nil if env_ver.to_s.strip.empty? # By default, determined from lib/bio/version.rb load "./lib/bio/version.rb" unless defined?(BIO_VERSION_RB_LOADED) case Bio::BIORUBY_EXTRA_VERSION when nil suffix = nil when /\A\.(\d+)\z/ suffix = $1 when /\-alpha(\d+)/ decrement = true suffix = 9000 + $1.to_i when /\-pre(\d+)/ decrement = true suffix = 9500 + $1.to_i when /\-rc(\d+)/ decrement = true suffix = 9900 + $1.to_i else suffix = "0000" end ver = Bio::BIORUBY_VERSION.reverse.collect do |i| if decrement then i -= 1 i < 0 ? (i += 10) : decrement = false end i end.reverse ver.push suffix if suffix %><%= (env_ver || ver.join('.')) ###### Above is executed in ERB environment ###### %>" s.author = "BioRuby project" s.email = "staff@bioruby.org" s.homepage = "http://bioruby.org/" s.rubyforge_project = "bioruby" s.summary = "Bioinformatics library" s.description = "BioRuby is a library for bioinformatics (biology + information science)." s.platform = Gem::Platform::RUBY s.files = [ <% ###### Below is executed in ERB environment ###### # Gets file list from the "git ls-files" command. files = (`git ls-files` rescue nil).to_s.split(/\r?\n/) files.delete_if { |x| x.empty? } # When git-ls-files isn't available, creates a list from current files. if !$? or !($?.success?) or files.size <= 0 then files = [ "README.rdoc", "README_DEV.rdoc", "ChangeLog", "KNOWN_ISSUES.rdoc", "Rakefile", "bioruby.gemspec.erb", "bioruby.gemspec", "setup.rb", "extconf.rb", "rdoc.zsh" ] + Dir.glob("{bin,doc,etc,lib,sample,test}/**/*").delete_if do |item| case item when /(\A|\/)CVS(\z|\/)/, /(\A|\/)rdoc(\z|\/)/, /\~\z/ true else false end end end %><%= files.sort.collect { |x| x.dump }.join(",\n ") ###### Above is executed in ERB environment ###### %> ] s.has_rdoc = true s.extra_rdoc_files = [ <%= ###### Below is executed in ERB environment ###### # Files whose suffix are .rdoc are selected. rdoc_files = files.find_all { |item| /\.rdoc\z/ =~ item } # Fail safe settings if rdoc_files.empty? then rdoc_files = [ 'README.rdoc', 'README_DEV.rdoc', 'RELEASE_NOTES.rdoc', 'doc/Changes-1.3.rdoc', ] end def rdoc_files.add_file(name) self.push(name) unless self.include?(name) end #rdoc_files.add_file "ChangeLog" #rdoc_files.add_file "LEGAL" rdoc_files.sort.collect { |x| x.dump }.join(",\n ") ###### Above is executed in ERB environment ###### %> ] s.rdoc_options << '--main' << 'README.rdoc' s.rdoc_options << '--title' << 'BioRuby API documentation' s.rdoc_options << '--exclude' << '\.yaml\z' s.rdoc_options << '--line-numbers' << '--inline-source' s.require_path = 'lib' s.bindir = "bin" s.executables = [ <%= ###### Below is executed in ERB environment ###### # Files in bin/ directory are selected. exec_files = files.find_all { |item| /\Abin\// =~ item } # Non-executable files are removed from the list. exec_files.delete_if { |item| !File.executable?(item) } # strip "bin/" exec_files.collect! { |item| item.sub(/\Abin\//, '') } # Fail safe settings if exec_files.empty? then exec_files = [ "bioruby", "br_biofetch.rb", "br_biogetseq.rb", "br_bioflat.rb", "br_pmfetch.rb" ] end exec_files.sort.collect { |x| x.dump }.join(",\n ") ###### Above is executed in ERB environment ###### %> ] s.default_executable = "bioruby" end bio-1.4.3.0001/COPYING.ja0000644000004100000410000000407712200110570014317 0ustar www-datawww-data$BK\%W%m%0%i%`$O%U%j!<%=%U%H%&%'%"$G$9!%(BGPL (the GNU General Public License)$B%P!<%8%g%s(B2$B$^$?$O0J2<$K<($9>r7o$GK\%W%m%0%i%`(B $B$r:FG[I[$G$-$^$9!%(BGPL$B$K$D$$$F$O(BGPL$B%U%!%$%k$r;2>H$7$F2<$5$$!%(B 1. $BJ#@=$O@)8B$J$/<+M3$G$9!%(B 2. $B0J2<$N>r7o$N$$$:$l$+$rK~$?$9;~$KK\%W%m%0%i%`$N%=!<%9$r(B $B<+M3$KJQ99$G$-$^$9!%(B (a) $B%M%C%H%K%e!<%:$K%]%9%H$7$?$j!$:nA0$rJQ99$9$k!%(B $B$=$N%=%U%H%&%'%"$rG[I[$9$k;~$K$OJQ99A0$NK\%W%m%0%i(B $B%`$bF1;~$KG[I[$9$k!%$^$?$OJQ99A0$NK\%W%m%0%i%`$N%=!<(B $B%9$NF~$NJQ99>r7o$r:nr7o$N$$$:$l$+$rK~$?$9;~$KK\%W%m%0%i%`$r%3%s%Q%$(B $B%k$7$?%*%V%8%'%/%H%3!<%I$dA0$rJQ99$7$?$&$(!$%*%j%8%J(B $B%k$N%=!<%9%3!<%I$NF~$NG[I[>r7o$r:n$N%W%m%0%i%`$X$N0zMQ$O$$$+$J$kL\E*$G$"$l<+M3$G$9!%$?(B $B$@$7!$K\%W%m%0%i%`$K4^$^$l$kB>$N:n$l$N:nl9g$,$"$j$^$9!%(B $B$=$l$i%U%!%$%k$N0lMw$H$=$l$>$l$NG[I[>r7o$J$I$KIU$$$F$O(B LEGAL$B%U%!%$%k$r;2>H$7$F$/$@$5$$!%(B 5. $BK\%W%m%0%i%`$X$NF~NO$H$J$k%9%/%j%W%H$*$h$S!$K\%W%m%0%i(B $B%`$+$i$N=PNO$N8"Mx$OK\%W%m%0%i%`$N:n(B $B$l$NF~=PNO$r@8@.$7$??M$KB0$7$^$9!%$^$?!$K\%W%m%0%i%`$K(B $BAH$_9~$^$l$k$?$a$N3HD%%i%$%V%i%j$K$D$$$F$bF1MM$G$9!%(B 6. $BK\%W%m%0%i%`$OL5J]>Z$G$9!%:n # License:: The Ruby License # # $Id:$ # require 'bio' PROG_VER = "Powered by BioRuby #{Bio::BIORUBY_VERSION_ID}" PROG_NAME = File.basename($0) require 'getoptlong' ### formatting class String def fill(fill_column = 80, prefix = '', separater = ' ') prefix = ' ' * prefix if prefix.is_a?(Integer) maxlen = fill_column - prefix.length raise "prefix is longer than fill_column" if maxlen <= 0 cursor = pos = 0 lines = [] while cursor < self.length line = self[cursor, maxlen] pos = line.rindex(separater) pos = nil if line.length < maxlen if pos len = pos + separater.length lines << self[cursor, len] cursor += len else lines << self[cursor, maxlen] cursor += maxlen end end return lines.join("\n#{prefix}") end end module Bio class Reference def report if (num = @authors.size) > 10 authors = "#{@authors[0]} et al. (#{num} authors)" elsif num > 4 sep = ',' * (num - 1) authors = "#{@authors[0]}#{sep} #{@authors[-1]}" else authors = authors_join(' & ') end journal = "#{@journal} #{@year} #{@volume}(#{@issue}):#{@pages}" indent = 8 prefix = ' ' * indent [ "#{@pages[/\d+/]}".ljust(indent) + "#{@title}".fill(78, indent), authors, "#{journal} [PMID:#{@pubmed}]", ].join("\n#{prefix}") end end end class PMFetch class Examples < StandardError; end class Version < StandardError; end class Usage < StandardError; end ### default options def initialize @format = 'rd' @search_opts = { 'retmax' => 20, } @query = nil @query_opts = [] @pmid_list_only = false pmfetch end ### main def pmfetch begin set_options parse_options check_query rescue PMFetch::Examples puts examples exit rescue PMFetch::Version puts version exit rescue PMFetch::Usage puts usage exit rescue GetoptLong::MissingArgument, GetoptLong::InvalidOption puts usage exit end list = pm_esearch if list.empty? ; elsif @pmid_list_only puts list else pm_efetch(list) end end ### help def usage %Q[ Usage: #{PROG_NAME} [options...] "query string" or #{PROG_NAME} --query "query string" [other options...] Options: -q --query "genome AND virus" Query string for PubMed search -t --title "mobile elements" Title of the article to search -j --journal "genome res" Journal title to search -v --volume # Journal volume to search -i --issue # Journal issue to search -p --page # First page number of the article to search -a --author "Altschul SF" Author name to search -m --mesh "SARS virus" MeSH term to search -f --format bibtex Summary output format --pmidlist Output only a list of PubMed IDs -n --retmax # Number of articles to retrieve at the maximum -N --retstart # Starting number of the articles to retrieve -s --sort pub+date Sort method for the summary output --reldate # Search articles published within recent # days --mindate YYYY/MM/DD Search articles published after the date --maxdate YYYY/MM/DD Search articles published before the date --help Output this help, then exit --examples Output examples, then exit --version Output version number, then exit Formats: endnote, medline, bibitem, bibtex, report, rd, nature, science, genome_res, genome_biol, nar, current, trends, cell Sort: author, journal, pub+date, page See the following pages for the PubMed search options: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html #{version} ] end def version PROG_VER end def examples DATA.read.gsub('PMFetch', PROG_NAME) end private ### options def set_options @parser = GetoptLong.new @parser.set_options( [ '--query', '-q', GetoptLong::REQUIRED_ARGUMENT ], [ '--title', '-t', GetoptLong::REQUIRED_ARGUMENT ], [ '--journal', '-j', GetoptLong::REQUIRED_ARGUMENT ], [ '--volume', '-v', GetoptLong::REQUIRED_ARGUMENT ], [ '--issue', '-i', GetoptLong::REQUIRED_ARGUMENT ], [ '--page', '-p', GetoptLong::REQUIRED_ARGUMENT ], [ '--author', '-a', GetoptLong::REQUIRED_ARGUMENT ], [ '--mesh', '-m', GetoptLong::REQUIRED_ARGUMENT ], [ '--format', '-f', GetoptLong::REQUIRED_ARGUMENT ], [ '--pmidlist', GetoptLong::NO_ARGUMENT ], [ '--retmax', '-n', GetoptLong::REQUIRED_ARGUMENT ], [ '--retstart', '-N', GetoptLong::REQUIRED_ARGUMENT ], [ '--sort', '-s', GetoptLong::REQUIRED_ARGUMENT ], [ '--reldate', GetoptLong::REQUIRED_ARGUMENT ], [ '--mindate', GetoptLong::REQUIRED_ARGUMENT ], [ '--maxdate', GetoptLong::REQUIRED_ARGUMENT ], [ '--examples', GetoptLong::NO_ARGUMENT ], [ '--help', GetoptLong::NO_ARGUMENT ], [ '--version', GetoptLong::NO_ARGUMENT ] ) end def parse_options @parser.each_option do |optname, optarg| case optname when /--query/ @query = optarg when /--title/ @query_opts << "#{optarg}[ti]" when /--journal/ @query_opts << "#{optarg}[ta]" when /--volume/ @query_opts << "#{optarg}[vi]" when /--issue/ @query_opts << "#{optarg}[ip]" when /--page/ @query_opts << "#{optarg}[pg]" when /--author/ @query_opts << "#{optarg}[au]" when /--mesh/ @query_opts << "#{optarg}[mh]" when /--format/ @format = optarg when /--pmidlist/ @pmid_list_only = true when /--examples/ raise PMFetch::Examples when /--help/ raise PMFetch::Usage when /--version/ raise PMFetch::Version when /--sort/ @sort = optarg @search_opts["sort"] = @sort unless @sort == "page" else optname.delete!('-') @search_opts[optname] = optarg end end end ### check query def check_query p @query if $DEBUG @query ||= ARGV.join(" ") unless ARGV.empty? p @query if $DEBUG @query_str = [ @query, @query_opts ].flatten.compact.join(" AND ") p @query_str if $DEBUG if @query_str.empty? raise PMFetch::Usage end end ### search def pm_esearch return Bio::PubMed.esearch(@query_str, @search_opts) end def pm_efetch(list) entries = Bio::PubMed.efetch(list) if @format == 'medline' medline_format(entries) else entries = parse_entries(entries) if @sort == 'page' entries = sort_entries(entries) end if @format == 'report' report_format(entries) else other_format(entries) end end end ### output def medline_format(entries) entries.each do |entry| puts entry puts '//' end end def parse_entries(entries) entries.map { |entry| Bio::MEDLINE.new(entry) } end def sort_entries(entries) if RUBY_VERSION > "1.8.0" entries.sort_by { |x| [ x.journal, x.volume.to_i, x.issue.to_i, x.pages.to_i ] } else entries.map { |x| [ x.journal, x.volume.to_i, x.issue.to_i, x.pages.to_i, x ] }.sort { |a, b| a[0..3] <=> b[0..3] }.map { |y| y.pop } end end def report_format(entries) entries.each do |entry| puts entry.reference.report puts end end def other_format(entries) entries.each do |entry| puts entry.reference.format(@format) puts end end end PMFetch.new __END__ = Examples : PubMed search These four lines will do the same job. % PMFetch transcription factor % PMFetch "transcription factor" % PMFetch --query "transcription factor" % PMFetch -q "transcription factor" Retrieve max 100 artiecles (20 is a NCBI's default) at a time, use --retmax as % PMFetch -q "transcription factor" --retmax 100 and, to retrieve next 100 articles, use --retstart as % PMFetch -q "transcription factor" --retmax 100 --retstart 100 You can narrow the search target for an issue of the journal. % PMFetch --journal development --volume 131 --issue 3 transcription factor Short options are also available. % PMFetch -j development -v 131 -i 3 transcription factor Search articles indexed in PubMed within these 90 days. % PMFetch -q "transcription factor" --reldate 90 Search articles indexed in PubMed during the period of 2001/04/01 to 2001/08/31 % PMFetch -q "transcription factor" --mindate 2001/04/01 --maxdate 2001/08/31 Output format can be changed by --format option. % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f report % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f rd % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f endnote % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f medline % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f bibitem % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f bibtex % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f nature % PMFetch -q "transcription factor" -j development -v 131 -i 3 -f science Generate title listings for the journal report meeting (don't forget to inclease the number of --retmax for fetching all titles). % PMFetch -f report -j development -v 131 -i 3 -n 100 Search by author name. % PMFetch -a "Karlin S" % PMFetch -a "Koonin EV" Search by MeSH term. % PMFetch -m "computational biology" % PMFetch -m "SARS virus" Search by PubMed ID (PMID). % PMFetch 12345 Output PMID only. % PMFetch --pmidlist tardigrada bio-1.4.3.0001/bin/br_biofetch.rb0000755000004100000410000000235012200110570016231 0ustar www-datawww-data#!/usr/bin/env ruby # # = biofetch - BioFetch client # # Copyright:: Copyright (C) 2002 # Toshiaki Katayama # License:: The Ruby License # # $Id: br_biofetch.rb,v 1.4 2007/04/05 23:35:39 trevor Exp $ # require 'bio/io/fetch' def usage default_url = 'http://bioruby.org/cgi-bin/biofetch.rb' another_url = 'http://www.ebi.ac.uk/cgi-bin/dbfetch' puts "#{$0} [-s[erver] #{another_url}] db id [style] [format]" puts " server : URL of the BioFetch CGI (default is #{default_url})" puts " db : database name (embl, genbank, etc.)" puts " id : entry id" puts " style : 'raw' or 'html' (default is 'raw')" puts " format : change the output format ('default', 'fasta', etc.)" end if ARGV.empty? or ARGV[0] =~ /^--?h/ usage exit 1 end case ARGV[0] when /^--?s/ # User specified server ARGV.shift serv = Bio::Fetch.new(ARGV.shift) puts serv.fetch(*ARGV) when /^--?e/ # EBI server ARGV.shift serv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') puts serv.fetch(*ARGV) when /^--?r/ # BioRuby server ARGV.shift serv = Bio::Fetch.new('http://bioruby.org/cgi-bin/biofetch.rb') puts serv.fetch(*ARGV) else # Default server puts Bio::Fetch.query(*ARGV) end bio-1.4.3.0001/bin/br_biogetseq.rb0000755000004100000410000000143712200110570016435 0ustar www-datawww-data#!/usr/bin/env ruby # # = biogetseq - OBDA sequence data retrieval (executable) # # Copyright:: Copyright (C) 2003 # Toshiaki Katayama # License:: The Ruby License # # $Id: br_biogetseq.rb,v 1.4 2007/04/05 23:35:39 trevor Exp $ # require 'bio' def usage print < [--namespace ] entry_id [entry_id] END exit 1 end if ARGV.size < 3 usage end while ARGV.first =~ /^-/ case ARGV.shift when /^\-\-format/ ARGV.shift raise NotImplementedError when /^\-\-dbname/ dbname = ARGV.shift when /^\-\-namespace/ namespace = ARGV.shift end end reg = Bio::Registry.new db = reg.get_database(dbname) if namespace db['namespace'] = namespace end ARGV.each do |entry| puts db.get_by_id(entry) end bio-1.4.3.0001/bin/bioruby0000755000004100000410000000172312200110570015037 0ustar www-datawww-data#!/usr/bin/env ruby # # = BioRuby shell - command line interface for the BioRuby library # # Copyright:: Copyright (C) 2005, 2006, 2007 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # begin require 'rubygems' gem 'bio', '>= 1.1.0' rescue LoadError require 'bio' end require 'bio/shell' # required to run commands (getseq, ls etc.) include Bio::Shell # setup command line options, working directory, and irb configurations Bio::Shell::Setup.new # loading workspace and command history Bio::Shell.load_session # sets default email address for Entrez eUtils. Bio::NCBI.default_email ||= 'staff@bioruby.org' # main loop if Bio::Shell.cache[:rails] Bio::Shell.cache[:rails].join else Signal.trap("SIGINT") do Bio::Shell.cache[:irb].signal_handle end catch(:IRB_EXIT) do Bio::Shell.cache[:irb].eval_input end end # saving workspace, command history and configuration before exit Bio::Shell.save_session bio-1.4.3.0001/bin/br_bioflat.rb0000755000004100000410000001626012200110570016073 0ustar www-datawww-data#!/usr/bin/env ruby # # = bioflat - OBDA flat file indexer (executable) # # Copyright:: Copyright (C) 2002 # Naohisa Goto # License:: The Ruby License # # $Id: br_bioflat.rb,v 1.17 2007/04/05 23:35:39 trevor Exp $ # require 'bio' def usage print <] [options...] [--files] FILES Update index: #{$0} --update --location DIR --dbname DBNAME [options...] [--files] FILES Create index options: --primary=UNIQUE set primary namespece to UNIQUE Default primary/secondary namespaces depend on each format of flatfiles. --secondary=KEY set secondary namespaces. You may use this option many times to specify more than one namespace. --add-secondary=KEY add secondary namespaces to default specification. You can use this option many times. Options only valid for --create (or --update) --type flat: --sort=/path/to/sort use external sort program (e.g. /usr/bin/sort) --sort=BUILTIN use builtin sort routine (default: /usr/bin/sort or BUILTIN) --env=/path/to/env use env program to run sort (default: /usr/bin/env) --env-arg=XXXXXX argument given to the env program (default: LC_ALL=C) (multiple --env-arg=XXXXXX can be specified) Options only valid for --update: --renew re-read all flatfiles and update whole index Backward compatibility: --makeindex DIR/DBNAME same as --create --type flat --location DIR --dbname DBNAME --makeindexBDB DIR/DBNAME same as --create --type bdb --location DIR --dbname DBNAME --format=CLASS instead of genbank|embl|fasta, specifing a class name is allowed Show namespaces: #{$0} --show-namespaces [--location DIR --dbname DBNAME] [DIR/DBNAME] or #{$0} --show-namespaces [--format=CLASS] or #{$0} --show-namespaces --files file EOM end def do_index(mode = :create) case ARGV[0] when /^\-\-?make/ dbpath = ARGV[1] args = ARGV[2..-1] is_bdb = nil when /^\-\-?make.*bdb/i dbname = ARGV[1] args = ARGV[2..-1] is_bdb = Bio::FlatFileIndex::MAGIC_BDB when /^\-\-create/, /^\-\-update/ args = ARGV[1..-1] else usage end options = {} while args.first =~ /^\-/ case x = args.shift # OBDA stuff when /^\-\-?format$/ args.shift format = nil # throw this f*ckin' mess for auto detect :) when /^\-\-?location/ location = args.shift.chomp('/') when /^\-\-?dbname/ dbname = args.shift when /^\-\-?(index)?type/ indextype = args.shift case indextype when /bdb/ is_bdb = Bio::FlatFileIndex::MAGIC_BDB when /flat/ is_bdb = nil else usage end # BioRuby extension when /^\-\-?files/i break when /^\-\-?format\=(.*)/i format = $1 when /^\-\-?sort\=(.*)/i options['sort_program'] = $1 options['onmemory'] = nil when /^\-\-?no\-?te?mp/i options['onmemory'] = true when /^\-\-?env\=(.*)/i options['env_program'] = $1 when /^\-\-?env-arg(?:ument)?\=(.*)/i options['env_program_arguments'] ||= [] options['env_program_arguments'].push $1 when /^\-\-?primary.*\=(.*)/i options['primary_namespace'] = $1 when /^\-\-?add-secondary.*\=(.*)/i unless options['additional_secondary_namespaces'] then options['additional_secondary_namespaces'] = [] end options['additional_secondary_namespaces'] << $1 if $1.length > 0 when /^\-\-?secondary.*\=(.*)/i unless options['secondary_namespaces'] then options['secondary_namespaces'] = [] end options['secondary_namespaces'] << $1 if $1.length > 0 when /^\-\-?renew/ options['renew'] = true else $stderr.print "Warning: ignoring invalid option #{x.inspect}\n" end end dbpath = File.join(location, dbname) unless dbpath if mode == :update then Bio::FlatFileIndex::update_index(dbpath, format, options, *args) else Bio::FlatFileIndex::makeindex(is_bdb, dbpath, format, options, *args) end end def do_search dbname = nil location = nil names = [] while x = ARGV.shift case x when /\A\-\-?search/i #do nothing when /\A\-\-?location/i location = ARGV.shift.to_s.chomp('/') when /\A\-\-?dbname/i dbname = ARGV.shift when /\A\-\-?name(?:space)?(?:\=(.+))?/i if $1 then names << $1 elsif x = ARGV.shift names << x end else ARGV.unshift x break end end dbname = ARGV.shift unless dbname dbname = File.join(location, dbname) unless location.to_s.empty? db = Bio::FlatFileIndex.open(dbname) ARGV.each do |key| $stderr.print "Searching for \'#{key}\'...\n" #r = db.search(key) #$stderr.print "OK, #{r.size} entry found\n" #if r.size > 0 then # print r #end begin if names.empty? then r = db.include?(key) else r = db.include_in_namespaces?(key, *names) end rescue RuntimeError $stderr.print "ERROR: #{$!}\n" next end r = [] unless r $stderr.print "OK, #{r.size} entry found\n" r.each do |i| print db.search_primary(i) end end db.close end def do_show_namespaces dbname = nil location = nil files = nil format = nil names = [] while x = ARGV.shift case x when /\A\-\-?(show\-)?name(space)?s/i #do nothing when /\A\-\-?location/i location = ARGV.shift.to_s.chomp('/') when /\A\-\-?dbname/i dbname = ARGV.shift when /\A\-\-?format(?:\=(.+))?/i if $1 then format = $1 elsif x = ARGV.shift format = x end when /\A\-\-?files/i files = ARGV break else ARGV.unshift x break end end if files then k = nil files.each do |x| k = Bio::FlatFile.autodetect_file(x) break if k end if k then $stderr.print "Format: #{k.to_s}\n" format = k else $stderr.print "ERROR: couldn't determine file format\n" return end end $stderr.print "Namespaces: (first line: primary namespace)\n" if format then parser = Bio::FlatFileIndex::Indexer::Parser.new(format) print parser.primary.name, "\n" puts parser.secondary.keys else dbname = ARGV.shift unless dbname dbname = File.join(location, dbname) unless location.to_s.empty? db = Bio::FlatFileIndex.open(dbname) puts db.namespaces db.close end end if ARGV.size > 1 case ARGV[0] when /--make/, /--create/ Bio::FlatFileIndex::DEBUG.out = true do_index when /--update/ Bio::FlatFileIndex::DEBUG.out = true do_index(:update) when /\A\-\-?(show\-)?name(space)?s/i do_show_namespaces when /--search/ do_search else #default is search do_search end else usage end bio-1.4.3.0001/metadata.yml0000644000004100000410000012071212200110570015171 0ustar www-datawww-data--- !ruby/object:Gem::Specification name: !binary |- Ymlv version: !ruby/object:Gem::Version version: !binary |- MS40LjMuMDAwMQ== prerelease: platform: ruby authors: - !binary |- QmlvUnVieSBwcm9qZWN0 autorequire: bindir: !binary |- Ymlu cert_chain: [] date: 2013-05-24 00:00:00.000000000 Z dependencies: [] description: !binary |- QmlvUnVieSBpcyBhIGxpYnJhcnkgZm9yIGJpb2luZm9ybWF0aWNzIChiaW9s b2d5ICsgaW5mb3JtYXRpb24gc2NpZW5jZSku email: !binary |- c3RhZmZAYmlvcnVieS5vcmc= executables: - !binary |- YmlvcnVieQ== - !binary |- YnJfYmlvZmV0Y2gucmI= - !binary |- YnJfYmlvZmxhdC5yYg== - !binary |- YnJfYmlvZ2V0c2VxLnJi - !binary |- YnJfcG1mZXRjaC5yYg== extensions: [] extra_rdoc_files: - !binary |- S05PV05fSVNTVUVTLnJkb2M= - !binary |- UkVBRE1FLnJkb2M= - !binary |- UkVBRE1FX0RFVi5yZG9j - !binary |- UkVMRUFTRV9OT1RFUy5yZG9j - !binary |- ZG9jL0NoYW5nZXMtMS4zLnJkb2M= - !binary |- ZG9jL1JFTEVBU0VfTk9URVMtMS40LjAucmRvYw== - !binary |- ZG9jL1JFTEVBU0VfTk9URVMtMS40LjEucmRvYw== - !binary |- ZG9jL1JFTEVBU0VfTk9URVMtMS40LjIucmRvYw== files: - !binary |- LnRyYXZpcy55bWw= - !binary |- Q09QWUlORw== - !binary |- Q09QWUlORy5qYQ== - !binary |- Q2hhbmdlTG9n - !binary |- R1BM - !binary |- S05PV05fSVNTVUVTLnJkb2M= - !binary |- TEVHQUw= - !binary |- TEdQTA== - !binary |- UkVBRE1FLnJkb2M= - !binary |- UkVBRE1FX0RFVi5yZG9j - !binary |- UkVMRUFTRV9OT1RFUy5yZG9j - !binary |- UmFrZWZpbGU= - !binary |- YmluL2Jpb3J1Ynk= - !binary |- YmluL2JyX2Jpb2ZldGNoLnJi - !binary |- YmluL2JyX2Jpb2ZsYXQucmI= - !binary |- YmluL2JyX2Jpb2dldHNlcS5yYg== - !binary |- YmluL2JyX3BtZmV0Y2gucmI= - !binary |- YmlvcnVieS5nZW1zcGVj - !binary |- YmlvcnVieS5nZW1zcGVjLmVyYg== - !binary |- ZG9jL0NoYW5nZUxvZy1iZWZvcmUtMS4zLjE= - !binary |- ZG9jL0NoYW5nZUxvZy1iZWZvcmUtMS40LjI= - !binary |- ZG9jL0NoYW5nZXMtMC43LnJk - !binary |- ZG9jL0NoYW5nZXMtMS4zLnJkb2M= - !binary |- ZG9jL0tFR0dfQVBJLnJk - !binary |- ZG9jL0tFR0dfQVBJLnJkLmph - !binary |- ZG9jL1JFTEVBU0VfTk9URVMtMS40LjAucmRvYw== - !binary |- ZG9jL1JFTEVBU0VfTk9URVMtMS40LjEucmRvYw== - !binary |- ZG9jL1JFTEVBU0VfTk9URVMtMS40LjIucmRvYw== - !binary |- ZG9jL1R1dG9yaWFsLnJk - !binary |- ZG9jL1R1dG9yaWFsLnJkLmh0bWw= - !binary |- ZG9jL1R1dG9yaWFsLnJkLmph - !binary |- ZG9jL1R1dG9yaWFsLnJkLmphLmh0bWw= - !binary |- ZG9jL2Jpb3J1YnkuY3Nz - !binary |- ZXRjL2Jpb2luZm9ybWF0aWNzL3NlcWRhdGFiYXNlLmluaQ== - !binary |- ZXh0Y29uZi5yYg== - !binary |- Z2VtZmlsZXMvR2VtZmlsZS50cmF2aXMtanJ1YnkxLjg= - !binary |- Z2VtZmlsZXMvR2VtZmlsZS50cmF2aXMtanJ1YnkxLjk= - !binary |- Z2VtZmlsZXMvR2VtZmlsZS50cmF2aXMtcnVieTEuOA== - !binary |- Z2VtZmlsZXMvR2VtZmlsZS50cmF2aXMtcnVieTEuOQ== - !binary |- Z2VtZmlsZXMvbW9kaWZ5LUdlbWZpbGUucmI= - !binary |- Z2VtZmlsZXMvcHJlcGFyZS1nZW1zcGVjLnJi - !binary |- bGliL2Jpby5yYg== - !binary |- bGliL2Jpby9hbGlnbm1lbnQucmI= - !binary |- bGliL2Jpby9hcHBsL2JsMnNlcS9yZXBvcnQucmI= - !binary |- bGliL2Jpby9hcHBsL2JsYXN0LnJi - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L2RkYmoucmI= - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L2Zvcm1hdDAucmI= - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L2Zvcm1hdDgucmI= - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L2dlbm9tZW5ldC5yYg== - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L25jYmlvcHRpb25zLnJi - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L3JlbW90ZS5yYg== - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L3JleG1sLnJi - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L3Jwc2JsYXN0LnJi - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L3d1Ymxhc3QucmI= - !binary |- bGliL2Jpby9hcHBsL2JsYXN0L3htbHBhcnNlci5yYg== - !binary |- bGliL2Jpby9hcHBsL2JsYXQvcmVwb3J0LnJi - !binary |- bGliL2Jpby9hcHBsL2NsdXN0YWx3LnJi - !binary |- bGliL2Jpby9hcHBsL2NsdXN0YWx3L3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL2VtYm9zcy5yYg== - !binary |- bGliL2Jpby9hcHBsL2Zhc3RhLnJi - !binary |- bGliL2Jpby9hcHBsL2Zhc3RhL2Zvcm1hdDEwLnJi - !binary |- bGliL2Jpby9hcHBsL2djZy9tc2YucmI= - !binary |- bGliL2Jpby9hcHBsL2djZy9zZXEucmI= - !binary |- bGliL2Jpby9hcHBsL2dlbnNjYW4vcmVwb3J0LnJi - !binary |- bGliL2Jpby9hcHBsL2htbWVyLnJi - !binary |- bGliL2Jpby9hcHBsL2htbWVyL3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL2lwcnNjYW4vcmVwb3J0LnJi - !binary |- bGliL2Jpby9hcHBsL21hZmZ0LnJi - !binary |- bGliL2Jpby9hcHBsL21hZmZ0L3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL21lbWUvbWFzdC5yYg== - !binary |- bGliL2Jpby9hcHBsL21lbWUvbWFzdC9yZXBvcnQucmI= - !binary |- bGliL2Jpby9hcHBsL21lbWUvbW90aWYucmI= - !binary |- bGliL2Jpby9hcHBsL211c2NsZS5yYg== - !binary |- bGliL2Jpby9hcHBsL3BhbWwvYmFzZW1sLnJi - !binary |- bGliL2Jpby9hcHBsL3BhbWwvYmFzZW1sL3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL3BhbWwvY29kZW1sLnJi - !binary |- bGliL2Jpby9hcHBsL3BhbWwvY29kZW1sL3JhdGVzLnJi - !binary |- bGliL2Jpby9hcHBsL3BhbWwvY29kZW1sL3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL3BhbWwvY29tbW9uLnJi - !binary |- bGliL2Jpby9hcHBsL3BhbWwvY29tbW9uX3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL3BhbWwveW4wMC5yYg== - !binary |- bGliL2Jpby9hcHBsL3BhbWwveW4wMC9yZXBvcnQucmI= - !binary |- bGliL2Jpby9hcHBsL3BoeWxpcC9hbGlnbm1lbnQucmI= - !binary |- bGliL2Jpby9hcHBsL3BoeWxpcC9kaXN0YW5jZV9tYXRyaXgucmI= - !binary |- bGliL2Jpby9hcHBsL3Byb2Jjb25zLnJi - !binary |- bGliL2Jpby9hcHBsL3Bzb3J0LnJi - !binary |- bGliL2Jpby9hcHBsL3Bzb3J0L3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL3B0czEucmI= - !binary |- bGliL2Jpby9hcHBsL3NpbTQucmI= - !binary |- bGliL2Jpby9hcHBsL3NpbTQvcmVwb3J0LnJi - !binary |- bGliL2Jpby9hcHBsL3Nvc3VpL3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9hcHBsL3NwaWRleS9yZXBvcnQucmI= - !binary |- bGliL2Jpby9hcHBsL3RhcmdldHAvcmVwb3J0LnJi - !binary |- bGliL2Jpby9hcHBsL3Rjb2ZmZWUucmI= - !binary |- bGliL2Jpby9hcHBsL3RtaG1tL3JlcG9ydC5yYg== - !binary |- bGliL2Jpby9jb21tYW5kLnJi - !binary |- bGliL2Jpby9jb21wYXQvZmVhdHVyZXMucmI= - !binary |- bGliL2Jpby9jb21wYXQvcmVmZXJlbmNlcy5yYg== - !binary |- bGliL2Jpby9kYXRhL2FhLnJi - !binary |- bGliL2Jpby9kYXRhL2NvZG9udGFibGUucmI= - !binary |- bGliL2Jpby9kYXRhL25hLnJi - !binary |- bGliL2Jpby9kYi5yYg== - !binary |- bGliL2Jpby9kYi9hYWluZGV4LnJi - !binary |- bGliL2Jpby9kYi9iaW9zcWwvYmlvc3FsX3RvX2Jpb3NlcXVlbmNlLnJi - !binary |- bGliL2Jpby9kYi9iaW9zcWwvc2VxdWVuY2UucmI= - !binary |- bGliL2Jpby9kYi9lbWJsL2NvbW1vbi5yYg== - !binary |- bGliL2Jpby9kYi9lbWJsL2VtYmwucmI= - !binary |- bGliL2Jpby9kYi9lbWJsL2VtYmxfdG9fYmlvc2VxdWVuY2UucmI= - !binary |- bGliL2Jpby9kYi9lbWJsL2Zvcm1hdF9lbWJsLnJi - !binary |- bGliL2Jpby9kYi9lbWJsL3NwdHIucmI= - !binary |- bGliL2Jpby9kYi9lbWJsL3N3aXNzcHJvdC5yYg== - !binary |- bGliL2Jpby9kYi9lbWJsL3RyZW1ibC5yYg== - !binary |- bGliL2Jpby9kYi9lbWJsL3VuaXByb3QucmI= - !binary |- bGliL2Jpby9kYi9mYW50b20ucmI= - !binary |- bGliL2Jpby9kYi9mYXN0YS5yYg== - !binary |- bGliL2Jpby9kYi9mYXN0YS9kZWZsaW5lLnJi - !binary |- bGliL2Jpby9kYi9mYXN0YS9mYXN0YV90b19iaW9zZXF1ZW5jZS5yYg== - !binary |- bGliL2Jpby9kYi9mYXN0YS9mb3JtYXRfZmFzdGEucmI= - !binary |- bGliL2Jpby9kYi9mYXN0YS9mb3JtYXRfcXVhbC5yYg== - !binary |- bGliL2Jpby9kYi9mYXN0YS9xdWFsLnJi - !binary |- bGliL2Jpby9kYi9mYXN0YS9xdWFsX3RvX2Jpb3NlcXVlbmNlLnJi - !binary |- bGliL2Jpby9kYi9mYXN0cS5yYg== - !binary |- bGliL2Jpby9kYi9mYXN0cS9mYXN0cV90b19iaW9zZXF1ZW5jZS5yYg== - !binary |- bGliL2Jpby9kYi9mYXN0cS9mb3JtYXRfZmFzdHEucmI= - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL2NvbW1vbi5yYg== - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL2RkYmoucmI= - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL2Zvcm1hdF9nZW5iYW5rLnJi - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL2dlbmJhbmsucmI= - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL2dlbmJhbmtfdG9fYmlvc2VxdWVuY2UucmI= - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL2dlbnBlcHQucmI= - !binary |- bGliL2Jpby9kYi9nZW5iYW5rL3JlZnNlcS5yYg== - !binary |- bGliL2Jpby9kYi9nZmYucmI= - !binary |- bGliL2Jpby9kYi9nby5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL2JyaXRlLnJi - !binary |- bGliL2Jpby9kYi9rZWdnL2NvbW1vbi5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL2NvbXBvdW5kLnJi - !binary |- bGliL2Jpby9kYi9rZWdnL2RydWcucmI= - !binary |- bGliL2Jpby9kYi9rZWdnL2VuenltZS5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL2V4cHJlc3Npb24ucmI= - !binary |- bGliL2Jpby9kYi9rZWdnL2dlbmVzLnJi - !binary |- bGliL2Jpby9kYi9rZWdnL2dlbm9tZS5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL2dseWNhbi5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL2tlZ2d0YWIucmI= - !binary |- bGliL2Jpby9kYi9rZWdnL2tnbWwucmI= - !binary |- bGliL2Jpby9kYi9rZWdnL21vZHVsZS5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL29ydGhvbG9neS5yYg== - !binary |- bGliL2Jpby9kYi9rZWdnL3BhdGh3YXkucmI= - !binary |- bGliL2Jpby9kYi9rZWdnL3JlYWN0aW9uLnJi - !binary |- bGliL2Jpby9kYi9rZWdnL3RheG9ub215LnJi - !binary |- bGliL2Jpby9kYi9sYXNlcmdlbmUucmI= - !binary |- bGliL2Jpby9kYi9saXRkYi5yYg== - !binary |- bGliL2Jpby9kYi9tZWRsaW5lLnJi - !binary |- bGliL2Jpby9kYi9uYnJmLnJi - !binary |- bGliL2Jpby9kYi9uZXdpY2sucmI= - !binary |- bGliL2Jpby9kYi9uZXh1cy5yYg== - !binary |- bGliL2Jpby9kYi9wZGIucmI= - !binary |- bGliL2Jpby9kYi9wZGIvYXRvbS5yYg== - !binary |- bGliL2Jpby9kYi9wZGIvY2hhaW4ucmI= - !binary |- bGliL2Jpby9kYi9wZGIvY2hlbWljYWxjb21wb25lbnQucmI= - !binary |- bGliL2Jpby9kYi9wZGIvbW9kZWwucmI= - !binary |- bGliL2Jpby9kYi9wZGIvcGRiLnJi - !binary |- bGliL2Jpby9kYi9wZGIvcmVzaWR1ZS5yYg== - !binary |- bGliL2Jpby9kYi9wZGIvdXRpbHMucmI= - !binary |- bGliL2Jpby9kYi9waHlsb3htbC9waHlsb3htbC54c2Q= - !binary |- bGliL2Jpby9kYi9waHlsb3htbC9waHlsb3htbF9lbGVtZW50cy5yYg== - !binary |- bGliL2Jpby9kYi9waHlsb3htbC9waHlsb3htbF9wYXJzZXIucmI= - !binary |- bGliL2Jpby9kYi9waHlsb3htbC9waHlsb3htbF93cml0ZXIucmI= - !binary |- bGliL2Jpby9kYi9wcm9zaXRlLnJi - !binary |- bGliL2Jpby9kYi9yZWJhc2UucmI= - !binary |- bGliL2Jpby9kYi9zYW5nZXJfY2hyb21hdG9ncmFtL2FiaWYucmI= - !binary |- bGliL2Jpby9kYi9zYW5nZXJfY2hyb21hdG9ncmFtL2Nocm9tYXRvZ3JhbS5y Yg== - !binary |- bGliL2Jpby9kYi9zYW5nZXJfY2hyb21hdG9ncmFtL2Nocm9tYXRvZ3JhbV90 b19iaW9zZXF1ZW5jZS5yYg== - !binary |- bGliL2Jpby9kYi9zYW5nZXJfY2hyb21hdG9ncmFtL3NjZi5yYg== - !binary |- bGliL2Jpby9kYi9zb2Z0LnJi - !binary |- bGliL2Jpby9kYi90cmFuc2ZhYy5yYg== - !binary |- bGliL2Jpby9mZWF0dXJlLnJi - !binary |- bGliL2Jpby9pby9iaW9zcWwvYXItYmlvc3FsLnJi - !binary |- bGliL2Jpby9pby9iaW9zcWwvYmlvc3FsLnJi - !binary |- bGliL2Jpby9pby9iaW9zcWwvY29uZmlnL2RhdGFiYXNlLnltbA== - !binary |- bGliL2Jpby9pby9kYXMucmI= - !binary |- bGliL2Jpby9pby9kYmdldC5yYg== - !binary |- bGliL2Jpby9pby9kZGJqcmVzdC5yYg== - !binary |- bGliL2Jpby9pby9kZGJqeG1sLnJi - !binary |- bGliL2Jpby9pby9lYmlzb2FwLnJi - !binary |- bGliL2Jpby9pby9lbnNlbWJsLnJi - !binary |- bGliL2Jpby9pby9mYXN0YWNtZC5yYg== - !binary |- bGliL2Jpby9pby9mZXRjaC5yYg== - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS5yYg== - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS9hdXRvZGV0ZWN0aW9uLnJi - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS9iZGIucmI= - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS9idWZmZXIucmI= - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS9pbmRleC5yYg== - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS9pbmRleGVyLnJi - !binary |- bGliL2Jpby9pby9mbGF0ZmlsZS9zcGxpdHRlci5yYg== - !binary |- bGliL2Jpby9pby9oaWdldC5yYg== - !binary |- bGliL2Jpby9pby9oaW52LnJi - !binary |- bGliL2Jpby9pby9rZWdnYXBpLnJi - !binary |- bGliL2Jpby9pby9uY2JpcmVzdC5yYg== - !binary |- bGliL2Jpby9pby9uY2Jpc29hcC5yYg== - !binary |- bGliL2Jpby9pby9wdWJtZWQucmI= - !binary |- bGliL2Jpby9pby9yZWdpc3RyeS5yYg== - !binary |- bGliL2Jpby9pby9zb2Fwd3NkbC5yYg== - !binary |- bGliL2Jpby9pby9zcWwucmI= - !binary |- bGliL2Jpby9pby90b2dvd3MucmI= - !binary |- bGliL2Jpby9sb2NhdGlvbi5yYg== - !binary |- bGliL2Jpby9tYXAucmI= - !binary |- bGliL2Jpby9wYXRod2F5LnJi - !binary |- bGliL2Jpby9yZWZlcmVuY2UucmI= - !binary |- bGliL2Jpby9zZXF1ZW5jZS5yYg== - !binary |- bGliL2Jpby9zZXF1ZW5jZS9hYS5yYg== - !binary |- bGliL2Jpby9zZXF1ZW5jZS9hZGFwdGVyLnJi - !binary |- bGliL2Jpby9zZXF1ZW5jZS9jb21tb24ucmI= - !binary |- bGliL2Jpby9zZXF1ZW5jZS9jb21wYXQucmI= - !binary |- bGliL2Jpby9zZXF1ZW5jZS9kYmxpbmsucmI= - !binary |- bGliL2Jpby9zZXF1ZW5jZS9mb3JtYXQucmI= - !binary |- bGliL2Jpby9zZXF1ZW5jZS9mb3JtYXRfcmF3LnJi - !binary |- bGliL2Jpby9zZXF1ZW5jZS9nZW5lcmljLnJi - !binary |- bGliL2Jpby9zZXF1ZW5jZS9uYS5yYg== - !binary |- bGliL2Jpby9zZXF1ZW5jZS9xdWFsaXR5X3Njb3JlLnJi - !binary |- bGliL2Jpby9zZXF1ZW5jZS9zZXF1ZW5jZV9tYXNrZXIucmI= - !binary |- bGliL2Jpby9zaGVsbC5yYg== - !binary |- bGliL2Jpby9zaGVsbC9jb3JlLnJi - !binary |- bGliL2Jpby9zaGVsbC9kZW1vLnJi - !binary |- bGliL2Jpby9zaGVsbC9pbnRlcmZhY2UucmI= - !binary |- bGliL2Jpby9zaGVsbC9pcmIucmI= - !binary |- bGliL2Jpby9zaGVsbC9vYmplY3QucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vYmxhc3QucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vY29kb24ucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vZGFzLnJi - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vZW1ib3NzLnJi - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vZW50cnkucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vZmxhdGZpbGUucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4va2VnZ2FwaS5yYg== - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vbWlkaS5yYg== - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vbmNiaXJlc3QucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vb2JkYS5yYg== - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vcHNvcnQucmI= - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vc2VxLnJi - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vc29hcC5yYg== - !binary |- bGliL2Jpby9zaGVsbC9wbHVnaW4vdG9nb3dzLnJi - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS9iaW9ydWJ5X2dlbmVyYXRvci5yYg== - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvX2NsYXNzZXMucmh0bWw= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvX2xvZy5yaHRtbA== - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvX21ldGhvZHMucmh0bWw= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvX21vZHVsZXMucmh0bWw= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvX3ZhcmlhYmxlcy5yaHRtbA== - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieS1iZy5naWY= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieS1nZW0ucG5n - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieS1saW5rLmdpZg== - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieS5jc3M= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieS5yaHRtbA== - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieV9jb250cm9sbGVy LnJi - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvYmlvcnVieV9oZWxwZXIucmI= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvY29tbWFuZHMucmh0bWw= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvaGlzdG9yeS5yaHRtbA== - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvaW5kZXgucmh0bWw= - !binary |- bGliL2Jpby9zaGVsbC9yYWlscy92ZW5kb3IvcGx1Z2lucy9iaW9ydWJ5L2dl bmVyYXRvcnMvYmlvcnVieS90ZW1wbGF0ZXMvc3Bpbm5lci5naWY= - !binary |- bGliL2Jpby9zaGVsbC9zY3JpcHQucmI= - !binary |- bGliL2Jpby9zaGVsbC9zZXR1cC5yYg== - !binary |- bGliL2Jpby9zaGVsbC93ZWIucmI= - !binary |- bGliL2Jpby90cmVlLnJi - !binary |- bGliL2Jpby90cmVlL291dHB1dC5yYg== - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS5yYg== - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS9idXJpZWQucmI= - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS9oZWxpeC5yYg== - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS9oeWRyb3BhdGh5LnJi - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS9udWNsZW90aWRlLnJi - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS9zdHJhbmQucmI= - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS90YXlsb3IucmI= - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS90dXJuLnJi - !binary |- bGliL2Jpby91dGlsL2NvbG9yX3NjaGVtZS96YXBwby5yYg== - !binary |- bGliL2Jpby91dGlsL2NvbnRpbmdlbmN5X3RhYmxlLnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9hbmFseXNpcy5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9hbmFseXNpc19iYXNp Yy5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9jdXRfc3ltYm9sLnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kZW5zZV9pbnRfYXJy YXkucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVfc3RyYW5k ZWQucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVfc3RyYW5k ZWQvYWxpZ25lZF9zdHJhbmRzLnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVfc3RyYW5k ZWQvY3V0X2xvY2F0aW9uX3BhaXIucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVfc3RyYW5k ZWQvY3V0X2xvY2F0aW9uX3BhaXJfaW5fZW56eW1lX25vdGF0aW9uLnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVfc3RyYW5k ZWQvY3V0X2xvY2F0aW9ucy5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVfc3RyYW5k ZWQvY3V0X2xvY2F0aW9uc19pbl9lbnp5bWVfbm90YXRpb24ucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9lbnp5bWVzLnlhbWw= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9jdXRfcmFu Z2UucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9jdXRfcmFu Z2VzLnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9ob3Jpem9u dGFsX2N1dF9yYW5nZS5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9zZXF1ZW5j ZV9yYW5nZS5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9zZXF1ZW5j ZV9yYW5nZS9jYWxjdWxhdGVkX2N1dHMucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9zZXF1ZW5j ZV9yYW5nZS9mcmFnbWVudC5yYg== - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS9zZXF1ZW5j ZV9yYW5nZS9mcmFnbWVudHMucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9yYW5nZS92ZXJ0aWNh bF9jdXRfcmFuZ2UucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9zaW5nbGVfc3RyYW5k LnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9zaW5nbGVfc3RyYW5k L2N1dF9sb2NhdGlvbnNfaW5fZW56eW1lX25vdGF0aW9uLnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9zaW5nbGVfc3RyYW5k X2NvbXBsZW1lbnQucmI= - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9zb3J0ZWRfbnVtX2Fy cmF5LnJi - !binary |- bGliL2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9zdHJpbmdfZm9ybWF0 dGluZy5yYg== - !binary |- bGliL2Jpby91dGlsL3Npcm5hLnJi - !binary |- bGliL2Jpby92ZXJzaW9uLnJi - !binary |- c2FtcGxlL2FueTJmYXN0YS5yYg== - !binary |- c2FtcGxlL2Jpb2ZldGNoLnJi - !binary |- c2FtcGxlL2NvbG9yX3NjaGVtZV9uYS5yYg== - !binary |- c2FtcGxlL2RiZ2V0 - !binary |- c2FtcGxlL2RlbW9fYWFpbmRleC5yYg== - !binary |- c2FtcGxlL2RlbW9fYW1pbm9hY2lkLnJi - !binary |- c2FtcGxlL2RlbW9fYmwyc2VxX3JlcG9ydC5yYg== - !binary |- c2FtcGxlL2RlbW9fYmxhc3RfcmVwb3J0LnJi - !binary |- c2FtcGxlL2RlbW9fY29kb250YWJsZS5yYg== - !binary |- c2FtcGxlL2RlbW9fZGFzLnJi - !binary |- c2FtcGxlL2RlbW9fZGRianhtbC5yYg== - !binary |- c2FtcGxlL2RlbW9fZmFzdGFfcmVtb3RlLnJi - !binary |- c2FtcGxlL2RlbW9fZmFzdGFmb3JtYXQucmI= - !binary |- c2FtcGxlL2RlbW9fZ2VuYmFuay5yYg== - !binary |- c2FtcGxlL2RlbW9fZ2Vuc2Nhbl9yZXBvcnQucmI= - !binary |- c2FtcGxlL2RlbW9fZ2ZmMS5yYg== - !binary |- c2FtcGxlL2RlbW9fZ28ucmI= - !binary |- c2FtcGxlL2RlbW9faG1tZXJfcmVwb3J0LnJi - !binary |- c2FtcGxlL2RlbW9fa2VnZ19jb21wb3VuZC5yYg== - !binary |- c2FtcGxlL2RlbW9fa2VnZ19kcnVnLnJi - !binary |- c2FtcGxlL2RlbW9fa2VnZ19nZW5vbWUucmI= - !binary |- c2FtcGxlL2RlbW9fa2VnZ19nbHljYW4ucmI= - !binary |- c2FtcGxlL2RlbW9fa2VnZ19vcnRob2xvZ3kucmI= - !binary |- c2FtcGxlL2RlbW9fa2VnZ19yZWFjdGlvbi5yYg== - !binary |- c2FtcGxlL2RlbW9fa2VnZ190YXhvbm9teS5yYg== - !binary |- c2FtcGxlL2RlbW9fa2VnZ2FwaS5yYg== - !binary |- c2FtcGxlL2RlbW9fbGl0ZGIucmI= - !binary |- c2FtcGxlL2RlbW9fbG9jYXRpb25zLnJi - !binary |- c2FtcGxlL2RlbW9fbmNiaV9yZXN0LnJi - !binary |- c2FtcGxlL2RlbW9fbnVjbGVpY2FjaWQucmI= - !binary |- c2FtcGxlL2RlbW9fcGF0aHdheS5yYg== - !binary |- c2FtcGxlL2RlbW9fcHJvc2l0ZS5yYg== - !binary |- c2FtcGxlL2RlbW9fcHNvcnQucmI= - !binary |- c2FtcGxlL2RlbW9fcHNvcnRfcmVwb3J0LnJi - !binary |- c2FtcGxlL2RlbW9fcHVibWVkLnJi - !binary |- c2FtcGxlL2RlbW9fc2VxdWVuY2UucmI= - !binary |- c2FtcGxlL2RlbW9fc2lybmEucmI= - !binary |- c2FtcGxlL2RlbW9fc29zdWlfcmVwb3J0LnJi - !binary |- c2FtcGxlL2RlbW9fdGFyZ2V0cF9yZXBvcnQucmI= - !binary |- c2FtcGxlL2RlbW9fdG1obW1fcmVwb3J0LnJi - !binary |- c2FtcGxlL2VuenltZXMucmI= - !binary |- c2FtcGxlL2Zhc3RhMnRhYi5yYg== - !binary |- c2FtcGxlL2Zhc3RhZ3JlcC5yYg== - !binary |- c2FtcGxlL2Zhc3Rhc29ydC5yYg== - !binary |- c2FtcGxlL2ZzcGxpdC5yYg== - !binary |- c2FtcGxlL2diMmZhc3RhLnJi - !binary |- c2FtcGxlL2diMnRhYi5yYg== - !binary |- c2FtcGxlL2didGFiMm15c3FsLnJi - !binary |- c2FtcGxlL2dlbmVzMm51Yy5yYg== - !binary |- c2FtcGxlL2dlbmVzMnBlcC5yYg== - !binary |- c2FtcGxlL2dlbmVzMnRhYi5yYg== - !binary |- c2FtcGxlL2dlbm9tZTJyYi5yYg== - !binary |- c2FtcGxlL2dlbm9tZTJ0YWIucmI= - !binary |- c2FtcGxlL2dvc2xpbS5yYg== - !binary |- c2FtcGxlL2d0MmZhc3RhLnJi - !binary |- c2FtcGxlL25hMmFhLnJi - !binary |- c2FtcGxlL3BtZmV0Y2gucmI= - !binary |- c2FtcGxlL3Btc2VhcmNoLnJi - !binary |- c2FtcGxlL3Bzb3J0cGxvdF9odG1sLnJi - !binary |- c2FtcGxlL3NlcWRhdGFiYXNlLmluaQ== - !binary |- c2FtcGxlL3NzZWFyY2gydGFiLnJi - !binary |- c2FtcGxlL3RkaWFyeS5yYg== - !binary |- c2FtcGxlL3Rlc3RfcGh5bG94bWxfYmlnLnJi - !binary |- c2FtcGxlL3Rlc3RfcmVzdHJpY3Rpb25fZW56eW1lX2xvbmcucmI= - !binary |- c2FtcGxlL3RmYXN0eDJ0YWIucmI= - !binary |- c2FtcGxlL3ZzLWdlbmVzLnJi - !binary |- c2V0dXAucmI= - !binary |- dGVzdC9iaW9ydWJ5X3Rlc3RfaGVscGVyLnJi - !binary |- dGVzdC9kYXRhL0hNTUVSL2htbXBmYW0ub3V0 - !binary |- dGVzdC9kYXRhL0hNTUVSL2htbXNlYXJjaC5vdXQ= - !binary |- dGVzdC9kYXRhL0tFR0cvMS4xLjEuMS5lbnp5bWU= - !binary |- dGVzdC9kYXRhL0tFR0cvQzAwMDI1LmNvbXBvdW5k - !binary |- dGVzdC9kYXRhL0tFR0cvRDAwMDYzLmRydWc= - !binary |- dGVzdC9kYXRhL0tFR0cvRzAwMDI0LmdseWNhbg== - !binary |- dGVzdC9kYXRhL0tFR0cvRzAxMzY2LmdseWNhbg== - !binary |- dGVzdC9kYXRhL0tFR0cvSzAyMzM4Lm9ydGhvbG9neQ== - !binary |- dGVzdC9kYXRhL0tFR0cvTTAwMTE4Lm1vZHVsZQ== - !binary |- dGVzdC9kYXRhL0tFR0cvUjAwMDA2LnJlYWN0aW9u - !binary |- dGVzdC9kYXRhL0tFR0cvVDAwMDA1Lmdlbm9tZQ== - !binary |- dGVzdC9kYXRhL0tFR0cvVDAwMDcwLmdlbm9tZQ== - !binary |- dGVzdC9kYXRhL0tFR0cvYjA1MjkuZ2VuZQ== - !binary |- dGVzdC9kYXRhL0tFR0cvZWMwMDA3Mi5wYXRod2F5 - !binary |- dGVzdC9kYXRhL0tFR0cvaHNhMDA3OTAucGF0aHdheQ== - !binary |- dGVzdC9kYXRhL0tFR0cva28wMDMxMi5wYXRod2F5 - !binary |- dGVzdC9kYXRhL0tFR0cvbWFwMDAwMzAucGF0aHdheQ== - !binary |- dGVzdC9kYXRhL0tFR0cvbWFwMDAwNTIucGF0aHdheQ== - !binary |- dGVzdC9kYXRhL0tFR0cvcm4wMDI1MC5wYXRod2F5 - !binary |- dGVzdC9kYXRhL0tFR0cvdGVzdC5rZ21s - !binary |- dGVzdC9kYXRhL1NPU1VJL3NhbXBsZS5yZXBvcnQ= - !binary |- dGVzdC9kYXRhL1RNSE1NL3NhbXBsZS5yZXBvcnQ= - !binary |- dGVzdC9kYXRhL2FhaW5kZXgvREFZTTc4MDMwMQ== - !binary |- dGVzdC9kYXRhL2FhaW5kZXgvUFJBTTkwMDEwMg== - !binary |- dGVzdC9kYXRhL2JsMnNlcS9jZDhhX2NkOGJfYmxhc3RwLmJsMnNlcQ== - !binary |- dGVzdC9kYXRhL2JsMnNlcS9jZDhhX3A1M19lLTVibGFzdHAuYmwyc2Vx - !binary |- dGVzdC9kYXRhL2JsYXN0LzIuMi4xNS5ibGFzdHAubTc= - !binary |- dGVzdC9kYXRhL2JsYXN0L2IwMDAyLmZhYQ== - !binary |- dGVzdC9kYXRhL2JsYXN0L2IwMDAyLmZhYS5tMA== - !binary |- dGVzdC9kYXRhL2JsYXN0L2IwMDAyLmZhYS5tNw== - !binary |- dGVzdC9kYXRhL2JsYXN0L2IwMDAyLmZhYS5tOA== - !binary |- dGVzdC9kYXRhL2JsYXN0L2JsYXN0cC1tdWx0aS5tNw== - !binary |- dGVzdC9kYXRhL2NsdXN0YWx3L2V4YW1wbGUxLmFsbg== - !binary |- dGVzdC9kYXRhL2NvbW1hbmQvZWNob2FyZzIuYmF0 - !binary |- dGVzdC9kYXRhL2NvbW1hbmQvZWNob2FyZzIuc2g= - !binary |- dGVzdC9kYXRhL2VtYmwvQUIwOTA3MTYuZW1ibA== - !binary |- dGVzdC9kYXRhL2VtYmwvQUIwOTA3MTYuZW1ibC5yZWw4OQ== - !binary |- dGVzdC9kYXRhL2Zhc3RhL0VGVFVfQkFDU1UuZmFzdGE= - !binary |- dGVzdC9kYXRhL2Zhc3RhL2V4YW1wbGUxLnR4dA== - !binary |- dGVzdC9kYXRhL2Zhc3RhL2V4YW1wbGUyLnR4dA== - !binary |- dGVzdC9kYXRhL2Zhc3RxL1JFQURNRS50eHQ= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX2RpZmZfaWRzLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX2RvdWJsZV9xdWFsLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX2RvdWJsZV9zZXEuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX2xvbmdfcXVhbC5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX25vX3F1YWwuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfZGVsLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfZXNjYXBlLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfbnVsbC5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfc3BhY2UuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfdGFiLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfdW5pdF9zZXAuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3F1YWxfdnRhYi5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3Nob3J0X3F1YWwuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3NwYWNlcy5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RhYnMuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2F0X3BsdXMuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2F0X3F1YWwuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2F0X3NlcS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2luX3BsdXMuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2luX3F1YWwuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2luX3NlcS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2Vycm9yX3RydW5jX2luX3RpdGxlLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2lsbHVtaW5hX2Z1bGxfcmFuZ2VfYXNfaWxsdW1p bmEuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2lsbHVtaW5hX2Z1bGxfcmFuZ2VfYXNfc2FuZ2Vy LmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2lsbHVtaW5hX2Z1bGxfcmFuZ2VfYXNfc29sZXhh LmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL2lsbHVtaW5hX2Z1bGxfcmFuZ2Vfb3JpZ2luYWxf aWxsdW1pbmEuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2xvbmdyZWFkc19hc19pbGx1bWluYS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL2xvbmdyZWFkc19hc19zYW5nZXIuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2xvbmdyZWFkc19hc19zb2xleGEuZmFzdHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL2xvbmdyZWFkc19vcmlnaW5hbF9zYW5nZXIuZmFz dHE= - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2NfZG5hX2FzX2lsbHVtaW5hLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2NfZG5hX2FzX3Nhbmdlci5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2NfZG5hX2FzX3NvbGV4YS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2NfZG5hX29yaWdpbmFsX3Nhbmdlci5mYXN0 cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2Nfcm5hX2FzX2lsbHVtaW5hLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2Nfcm5hX2FzX3Nhbmdlci5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2Nfcm5hX2FzX3NvbGV4YS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL21pc2Nfcm5hX29yaWdpbmFsX3Nhbmdlci5mYXN0 cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3Nhbmdlcl9mdWxsX3JhbmdlX2FzX2lsbHVtaW5h LmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL3Nhbmdlcl9mdWxsX3JhbmdlX2FzX3Nhbmdlci5m YXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3Nhbmdlcl9mdWxsX3JhbmdlX2FzX3NvbGV4YS5m YXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3Nhbmdlcl9mdWxsX3JhbmdlX29yaWdpbmFsX3Nh bmdlci5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3NvbGV4YV9mdWxsX3JhbmdlX2FzX2lsbHVtaW5h LmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL3NvbGV4YV9mdWxsX3JhbmdlX2FzX3Nhbmdlci5m YXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3NvbGV4YV9mdWxsX3JhbmdlX2FzX3NvbGV4YS5m YXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3NvbGV4YV9mdWxsX3JhbmdlX29yaWdpbmFsX3Nv bGV4YS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3dyYXBwaW5nX2FzX2lsbHVtaW5hLmZhc3Rx - !binary |- dGVzdC9kYXRhL2Zhc3RxL3dyYXBwaW5nX2FzX3Nhbmdlci5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3dyYXBwaW5nX2FzX3NvbGV4YS5mYXN0cQ== - !binary |- dGVzdC9kYXRhL2Zhc3RxL3dyYXBwaW5nX29yaWdpbmFsX3Nhbmdlci5mYXN0 cQ== - !binary |- dGVzdC9kYXRhL2djZy9waWxldXAtYWEubXNm - !binary |- dGVzdC9kYXRhL2dlbmJhbmsvQ0FBMzU5OTcuZ3A= - !binary |- dGVzdC9kYXRhL2dlbmJhbmsvU0NVNDk4NDUuZ2I= - !binary |- dGVzdC9kYXRhL2dlbnNjYW4vc2FtcGxlLnJlcG9ydA== - !binary |- dGVzdC9kYXRhL2dvL3NlbGVjdGVkX2NvbXBvbmVudC5vbnRvbG9neQ== - !binary |- dGVzdC9kYXRhL2dvL3NlbGVjdGVkX2dlbmVfYXNzb2NpYXRpb24uc2dk - !binary |- dGVzdC9kYXRhL2dvL3NlbGVjdGVkX3dpa2lwZWRpYTJnbw== - !binary |- dGVzdC9kYXRhL2lwcnNjYW4vbWVyZ2VkLnJhdw== - !binary |- dGVzdC9kYXRhL2lwcnNjYW4vbWVyZ2VkLnR4dA== - !binary |- dGVzdC9kYXRhL2xpdGRiLzE3MTcyMjYubGl0ZGI= - !binary |- dGVzdC9kYXRhL21lZGxpbmUvMjAxNDYxNDhfbW9kaWZpZWQubWVkbGluZQ== - !binary |- dGVzdC9kYXRhL21lbWUvZGI= - !binary |- dGVzdC9kYXRhL21lbWUvbWFzdA== - !binary |- dGVzdC9kYXRhL21lbWUvbWFzdC5vdXQ= - !binary |- dGVzdC9kYXRhL21lbWUvbWVtZS5vdXQ= - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL2NvbnRyb2xfZmlsZS50eHQ= - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL21vZGVscy9hYS5hbG4= - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL21vZGVscy9hYS5kbmQ= - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL21vZGVscy9hYS5waA== - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL21vZGVscy9hbGlnbm1lbnQucGh5 - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL21vZGVscy9yZXN1bHRzMC0zLnR4dA== - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL21vZGVscy9yZXN1bHRzNy04LnR4dA== - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL291dHB1dC50eHQ= - !binary |- dGVzdC9kYXRhL3BhbWwvY29kZW1sL3JhdGVz - !binary |- dGVzdC9kYXRhL3BoeWxveG1sL2FwYWYueG1s - !binary |- dGVzdC9kYXRhL3BoeWxveG1sL2JjbF8yLnhtbA== - !binary |- dGVzdC9kYXRhL3BoeWxveG1sL21hZGVfdXAueG1s - !binary |- dGVzdC9kYXRhL3BoeWxveG1sL25jYmlfdGF4b25vbXlfbW9sbHVzY2Ffc2hv cnQueG1s - !binary |- dGVzdC9kYXRhL3BoeWxveG1sL3BoeWxveG1sX2V4YW1wbGVzLnhtbA== - !binary |- dGVzdC9kYXRhL3Bpci9DUkFCX0FOQVBMLnBpcg== - !binary |- dGVzdC9kYXRhL3Byb3NpdGUvcHJvc2l0ZS5kYXQ= - !binary |- dGVzdC9kYXRhL3JlZnNlcS9ubV8xMjYzNTUuZW50cmV0 - !binary |- dGVzdC9kYXRhL3Jwc2JsYXN0L21pc2MucnBzYmxhc3Q= - !binary |- dGVzdC9kYXRhL3Nhbmdlcl9jaHJvbWF0b2dyYW0vdGVzdF9jaHJvbWF0b2dy YW1fYWJpZi5hYjE= - !binary |- dGVzdC9kYXRhL3Nhbmdlcl9jaHJvbWF0b2dyYW0vdGVzdF9jaHJvbWF0b2dy YW1fc2NmX3YyLnNjZg== - !binary |- dGVzdC9kYXRhL3Nhbmdlcl9jaHJvbWF0b2dyYW0vdGVzdF9jaHJvbWF0b2dy YW1fc2NmX3YzLnNjZg== - !binary |- dGVzdC9kYXRhL3NpbTQvY29tcGxlbWVudC1BNC5zaW00 - !binary |- dGVzdC9kYXRhL3NpbTQvc2ltcGxlLUE0LnNpbTQ= - !binary |- dGVzdC9kYXRhL3NpbTQvc2ltcGxlMi1BNC5zaW00 - !binary |- dGVzdC9kYXRhL3NvZnQvR0RTMTAwX3BhcnRpYWwuc29mdA== - !binary |- dGVzdC9kYXRhL3NvZnQvR1NFMzQ1N19mYW1pbHlfcGFydGlhbC5zb2Z0 - !binary |- dGVzdC9kYXRhL3VuaXByb3QvcDUzX2h1bWFuLnVuaXByb3Q= - !binary |- dGVzdC9mdW5jdGlvbmFsL2Jpby9zZXF1ZW5jZS90ZXN0X291dHB1dF9lbWJs LnJi - !binary |- dGVzdC9mdW5jdGlvbmFsL2Jpby90ZXN0X2NvbW1hbmQucmI= - !binary |- dGVzdC9uZXR3b3JrL2Jpby9hcHBsL2JsYXN0L3Rlc3RfcmVtb3RlLnJi - !binary |- dGVzdC9uZXR3b3JrL2Jpby9hcHBsL3Rlc3RfYmxhc3QucmI= - !binary |- dGVzdC9uZXR3b3JrL2Jpby9hcHBsL3Rlc3RfcHRzMS5yYg== - !binary |- dGVzdC9uZXR3b3JrL2Jpby9pby90ZXN0X2RkYmpyZXN0LnJi - !binary |- dGVzdC9uZXR3b3JrL2Jpby9pby90ZXN0X2Vuc2VtYmwucmI= - !binary |- dGVzdC9uZXR3b3JrL2Jpby9pby90ZXN0X3B1Ym1lZC5yYg== - !binary |- dGVzdC9uZXR3b3JrL2Jpby9pby90ZXN0X3NvYXB3c2RsLnJi - !binary |- dGVzdC9uZXR3b3JrL2Jpby9pby90ZXN0X3RvZ293cy5yYg== - !binary |- dGVzdC9uZXR3b3JrL2Jpby90ZXN0X2NvbW1hbmQucmI= - !binary |- dGVzdC9ydW5uZXIucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2JsMnNlcS90ZXN0X3JlcG9ydC5yYg== - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2JsYXN0L3Rlc3RfbmNiaW9wdGlvbnMucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2JsYXN0L3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2JsYXN0L3Rlc3RfcnBzYmxhc3QucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2NsdXN0YWx3L3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2djZy90ZXN0X21zZi5yYg== - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2dlbnNjYW4vdGVzdF9yZXBvcnQucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2htbWVyL3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL2lwcnNjYW4vdGVzdF9yZXBvcnQucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL21hZmZ0L3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL21lbWUvbWFzdC90ZXN0X3JlcG9ydC5yYg== - !binary |- dGVzdC91bml0L2Jpby9hcHBsL21lbWUvdGVzdF9tYXN0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL21lbWUvdGVzdF9tb3RpZi5yYg== - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3BhbWwvY29kZW1sL3Rlc3RfcmF0ZXMucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3BhbWwvY29kZW1sL3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3BhbWwvY29kZW1sL3Rlc3RfcmVwb3J0X3Np bmdsZS5yYg== - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3BhbWwvdGVzdF9jb2RlbWwucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3NpbTQvdGVzdF9yZXBvcnQucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3Nvc3VpL3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3RhcmdldHAvdGVzdF9yZXBvcnQucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3Rlc3RfYmxhc3QucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3Rlc3RfZmFzdGEucmI= - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3Rlc3RfcHRzMS5yYg== - !binary |- dGVzdC91bml0L2Jpby9hcHBsL3RtaG1tL3Rlc3RfcmVwb3J0LnJi - !binary |- dGVzdC91bml0L2Jpby9kYXRhL3Rlc3RfYWEucmI= - !binary |- dGVzdC91bml0L2Jpby9kYXRhL3Rlc3RfY29kb250YWJsZS5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYXRhL3Rlc3RfbmEucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9iaW9zcWwvdGNfYmlvc3FsLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9iaW9zcWwvdHNfc3VpdGVfYmlvc3FsLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3RfY29tbW9uLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3RfZW1ibC5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3RfZW1ibF9yZWw4OS5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3RfZW1ibF90b19iaW9zZXEucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3Rfc3B0ci5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3RfdW5pcHJvdC5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9lbWJsL3Rlc3RfdW5pcHJvdF9uZXdfcGFydC5y Yg== - !binary |- dGVzdC91bml0L2Jpby9kYi9mYXN0YS90ZXN0X2RlZmxpbmUucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9mYXN0YS90ZXN0X2RlZmxpbmVfbWlzYy5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9mYXN0YS90ZXN0X2Zvcm1hdF9xdWFsLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9nZW5iYW5rL3Rlc3RfY29tbW9uLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9nZW5iYW5rL3Rlc3RfZ2VuYmFuay5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9nZW5iYW5rL3Rlc3RfZ2VucGVwdC5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfY29tcG91bmQucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfZHJ1Zy5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfZW56eW1lLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfZ2VuZXMucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfZ2Vub21lLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfZ2x5Y2FuLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3Rfa2dtbC5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfbW9kdWxlLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3Rfb3J0aG9sb2d5LnJi - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfcGF0aHdheS5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9rZWdnL3Rlc3RfcmVhY3Rpb24ucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9wZGIvdGVzdF9wZGIucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi9zYW5nZXJfY2hyb21hdG9ncmFtL3Rlc3RfYWJp Zi5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi9zYW5nZXJfY2hyb21hdG9ncmFtL3Rlc3Rfc2Nm LnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2FhaW5kZXgucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2Zhc3RhLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2Zhc3RxLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2dmZi5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2dvLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2xhc2VyZ2VuZS5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X2xpdGRiLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X21lZGxpbmUucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X25icmYucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X25ld2ljay5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X25leHVzLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X3BoeWxveG1sLnJi - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X3BoeWxveG1sX3dyaXRlci5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X3Byb3NpdGUucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X3F1YWwucmI= - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X3JlYmFzZS5yYg== - !binary |- dGVzdC91bml0L2Jpby9kYi90ZXN0X3NvZnQucmI= - !binary |- dGVzdC91bml0L2Jpby9pby9mbGF0ZmlsZS90ZXN0X2F1dG9kZXRlY3Rpb24u cmI= - !binary |- dGVzdC91bml0L2Jpby9pby9mbGF0ZmlsZS90ZXN0X2J1ZmZlci5yYg== - !binary |- dGVzdC91bml0L2Jpby9pby9mbGF0ZmlsZS90ZXN0X3NwbGl0dGVyLnJi - !binary |- dGVzdC91bml0L2Jpby9pby90ZXN0X2RkYmp4bWwucmI= - !binary |- dGVzdC91bml0L2Jpby9pby90ZXN0X2Vuc2VtYmwucmI= - !binary |- dGVzdC91bml0L2Jpby9pby90ZXN0X2Zhc3RhY21kLnJi - !binary |- dGVzdC91bml0L2Jpby9pby90ZXN0X2ZsYXRmaWxlLnJi - !binary |- dGVzdC91bml0L2Jpby9pby90ZXN0X3NvYXB3c2RsLnJi - !binary |- dGVzdC91bml0L2Jpby9pby90ZXN0X3RvZ293cy5yYg== - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X2FhLnJi - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X2NvbW1vbi5yYg== - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X2NvbXBhdC5yYg== - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X2RibGluay5yYg== - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X25hLnJi - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X3F1YWxpdHlfc2NvcmUucmI= - !binary |- dGVzdC91bml0L2Jpby9zZXF1ZW5jZS90ZXN0X3NlcXVlbmNlX21hc2tlci5y Yg== - !binary |- dGVzdC91bml0L2Jpby9zaGVsbC9wbHVnaW4vdGVzdF9zZXEucmI= - !binary |- dGVzdC91bml0L2Jpby90ZXN0X2FsaWdubWVudC5yYg== - !binary |- dGVzdC91bml0L2Jpby90ZXN0X2NvbW1hbmQucmI= - !binary |- dGVzdC91bml0L2Jpby90ZXN0X2RiLnJi - !binary |- dGVzdC91bml0L2Jpby90ZXN0X2ZlYXR1cmUucmI= - !binary |- dGVzdC91bml0L2Jpby90ZXN0X2xvY2F0aW9uLnJi - !binary |- dGVzdC91bml0L2Jpby90ZXN0X21hcC5yYg== - !binary |- dGVzdC91bml0L2Jpby90ZXN0X3BhdGh3YXkucmI= - !binary |- dGVzdC91bml0L2Jpby90ZXN0X3JlZmVyZW5jZS5yYg== - !binary |- dGVzdC91bml0L2Jpby90ZXN0X3NlcXVlbmNlLnJi - !binary |- dGVzdC91bml0L2Jpby90ZXN0X3NoZWxsLnJi - !binary |- dGVzdC91bml0L2Jpby90ZXN0X3RyZWUucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9hbmFseXNp cy90ZXN0X2NhbGN1bGF0ZWRfY3V0cy5yYg== - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9hbmFseXNp cy90ZXN0X2N1dF9yYW5nZXMucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9hbmFseXNp cy90ZXN0X3NlcXVlbmNlX3JhbmdlLnJi - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVf c3RyYW5kZWQvdGVzdF9hbGlnbmVkX3N0cmFuZHMucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVf c3RyYW5kZWQvdGVzdF9jdXRfbG9jYXRpb25fcGFpci5yYg== - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVf c3RyYW5kZWQvdGVzdF9jdXRfbG9jYXRpb25fcGFpcl9pbl9lbnp5bWVfbm90 YXRpb24ucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVf c3RyYW5kZWQvdGVzdF9jdXRfbG9jYXRpb25zLnJi - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9kb3VibGVf c3RyYW5kZWQvdGVzdF9jdXRfbG9jYXRpb25zX2luX2VuenltZV9ub3RhdGlv bi5yYg== - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS9zaW5nbGVf c3RyYW5kL3Rlc3RfY3V0X2xvY2F0aW9uc19pbl9lbnp5bWVfbm90YXRpb24u cmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X2Fu YWx5c2lzLnJi - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X2N1 dF9zeW1ib2wucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X2Rl bnNlX2ludF9hcnJheS5yYg== - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X2Rv dWJsZV9zdHJhbmRlZC5yYg== - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X3Np bmdsZV9zdHJhbmQucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X3Np bmdsZV9zdHJhbmRfY29tcGxlbWVudC5yYg== - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X3Nv cnRlZF9udW1fYXJyYXkucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Jlc3RyaWN0aW9uX2VuenltZS90ZXN0X3N0 cmluZ19mb3JtYXR0aW5nLnJi - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Rlc3RfY29sb3Jfc2NoZW1lLnJi - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Rlc3RfY29udGluZ2VuY3lfdGFibGUucmI= - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Rlc3RfcmVzdHJpY3Rpb25fZW56eW1lLnJi - !binary |- dGVzdC91bml0L2Jpby91dGlsL3Rlc3Rfc2lybmEucmI= homepage: !binary |- aHR0cDovL2Jpb3J1Ynkub3JnLw== licenses: [] post_install_message: rdoc_options: - !binary |- LS1tYWlu - !binary |- UkVBRE1FLnJkb2M= - !binary |- LS10aXRsZQ== - !binary |- QmlvUnVieSBBUEkgZG9jdW1lbnRhdGlvbg== - !binary |- LS1leGNsdWRl - !binary |- XC55YW1sXHo= - !binary |- LS1saW5lLW51bWJlcnM= - !binary |- LS1pbmxpbmUtc291cmNl require_paths: - !binary |- bGli required_ruby_version: !ruby/object:Gem::Requirement none: false requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' required_rubygems_version: !ruby/object:Gem::Requirement none: false requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' requirements: [] rubyforge_project: !binary |- YmlvcnVieQ== rubygems_version: 1.8.23 signing_key: specification_version: 3 summary: !binary |- QmlvaW5mb3JtYXRpY3MgbGlicmFyeQ== test_files: [] bio-1.4.3.0001/setup.rb0000644000004100000410000010751112200110570014355 0ustar www-datawww-data# # setup.rb # # Copyright (c) 2000-2006 Minero Aoki # # This program is free software. # You can distribute/modify this program under the terms of # the GNU LGPL, Lesser General Public License version 2.1. # unless Enumerable.method_defined?(:map) # Ruby 1.4.6 module Enumerable alias map collect end end unless File.respond_to?(:read) # Ruby 1.6 def File.read(fname) open(fname) {|f| return f.read } end end unless Errno.const_defined?(:ENOTEMPTY) # Windows? module Errno class ENOTEMPTY # We do not raise this exception, implementation is not needed. end end end def File.binread(fname) open(fname, 'rb') {|f| return f.read } end # for corrupted Windows' stat(2) def File.dir?(path) File.directory?((path[-1,1] == '/') ? path : path + '/') end class ConfigTable include Enumerable def initialize(rbconfig) @rbconfig = rbconfig @items = [] @table = {} # options @install_prefix = nil @config_opt = nil @verbose = true @no_harm = false end attr_accessor :install_prefix attr_accessor :config_opt attr_writer :verbose def verbose? @verbose end attr_writer :no_harm def no_harm? @no_harm end def [](key) lookup(key).resolve(self) end def []=(key, val) lookup(key).set val end def names @items.map {|i| i.name } end def each(&block) @items.each(&block) end def key?(name) @table.key?(name) end def lookup(name) @table[name] or setup_rb_error "no such config item: #{name}" end def add(item) @items.push item @table[item.name] = item end def remove(name) item = lookup(name) @items.delete_if {|i| i.name == name } @table.delete_if {|name, i| i.name == name } item end def load_script(path, inst = nil) if File.file?(path) MetaConfigEnvironment.new(self, inst).instance_eval File.read(path), path end end def savefile '.config' end def load_savefile begin File.foreach(savefile()) do |line| k, v = *line.split(/=/, 2) self[k] = v.strip end rescue Errno::ENOENT setup_rb_error $!.message + "\n#{File.basename($0)} config first" end end def save @items.each {|i| i.value } File.open(savefile(), 'w') {|f| @items.each do |i| f.printf "%s=%s\n", i.name, i.value if i.value? and i.value end } end def load_standard_entries standard_entries(@rbconfig).each do |ent| add ent end end def standard_entries(rbconfig) c = rbconfig rubypath = File.join(c['bindir'], c['ruby_install_name'] + c['EXEEXT']) major = c['MAJOR'].to_i minor = c['MINOR'].to_i teeny = c['TEENY'].to_i version = "#{major}.#{minor}" # ruby ver. >= 1.4.4? newpath_p = ((major >= 2) or ((major == 1) and ((minor >= 5) or ((minor == 4) and (teeny >= 4))))) if c['rubylibdir'] # V > 1.6.3 libruby = "#{c['prefix']}/lib/ruby" librubyver = c['rubylibdir'] librubyverarch = c['archdir'] siteruby = c['sitedir'] siterubyver = c['sitelibdir'] siterubyverarch = c['sitearchdir'] elsif newpath_p # 1.4.4 <= V <= 1.6.3 libruby = "#{c['prefix']}/lib/ruby" librubyver = "#{c['prefix']}/lib/ruby/#{version}" librubyverarch = "#{c['prefix']}/lib/ruby/#{version}/#{c['arch']}" siteruby = c['sitedir'] siterubyver = "$siteruby/#{version}" siterubyverarch = "$siterubyver/#{c['arch']}" else # V < 1.4.4 libruby = "#{c['prefix']}/lib/ruby" librubyver = "#{c['prefix']}/lib/ruby/#{version}" librubyverarch = "#{c['prefix']}/lib/ruby/#{version}/#{c['arch']}" siteruby = "#{c['prefix']}/lib/ruby/#{version}/site_ruby" siterubyver = siteruby siterubyverarch = "$siterubyver/#{c['arch']}" end parameterize = lambda {|path| path.sub(/\A#{Regexp.quote(c['prefix'])}/, '$prefix') } if arg = c['configure_args'].split.detect {|arg| /--with-make-prog=/ =~ arg } makeprog = arg.sub(/'/, '').split(/=/, 2)[1] else makeprog = 'make' end [ ExecItem.new('installdirs', 'std/site/home', 'std: install under libruby; site: install under site_ruby; home: install under $HOME')\ {|val, table| case val when 'std' table['rbdir'] = '$librubyver' table['sodir'] = '$librubyverarch' when 'site' table['rbdir'] = '$siterubyver' table['sodir'] = '$siterubyverarch' when 'home' setup_rb_error '$HOME was not set' unless ENV['HOME'] table['prefix'] = ENV['HOME'] table['rbdir'] = '$libdir/ruby' table['sodir'] = '$libdir/ruby' end }, PathItem.new('prefix', 'path', c['prefix'], 'path prefix of target environment'), PathItem.new('bindir', 'path', parameterize.call(c['bindir']), 'the directory for commands'), PathItem.new('libdir', 'path', parameterize.call(c['libdir']), 'the directory for libraries'), PathItem.new('datadir', 'path', parameterize.call(c['datadir']), 'the directory for shared data'), PathItem.new('mandir', 'path', parameterize.call(c['mandir']), 'the directory for man pages'), PathItem.new('sysconfdir', 'path', parameterize.call(c['sysconfdir']), 'the directory for system configuration files'), PathItem.new('localstatedir', 'path', parameterize.call(c['localstatedir']), 'the directory for local state data'), PathItem.new('libruby', 'path', libruby, 'the directory for ruby libraries'), PathItem.new('librubyver', 'path', librubyver, 'the directory for standard ruby libraries'), PathItem.new('librubyverarch', 'path', librubyverarch, 'the directory for standard ruby extensions'), PathItem.new('siteruby', 'path', siteruby, 'the directory for version-independent aux ruby libraries'), PathItem.new('siterubyver', 'path', siterubyver, 'the directory for aux ruby libraries'), PathItem.new('siterubyverarch', 'path', siterubyverarch, 'the directory for aux ruby binaries'), PathItem.new('rbdir', 'path', '$siterubyver', 'the directory for ruby scripts'), PathItem.new('sodir', 'path', '$siterubyverarch', 'the directory for ruby extentions'), PathItem.new('rubypath', 'path', rubypath, 'the path to set to #! line'), ProgramItem.new('rubyprog', 'name', rubypath, 'the ruby program using for installation'), ProgramItem.new('makeprog', 'name', makeprog, 'the make program to compile ruby extentions'), SelectItem.new('shebang', 'all/ruby/never', 'ruby', 'shebang line (#!) editing mode'), BoolItem.new('without-ext', 'yes/no', 'no', 'does not compile/install ruby extentions') ] end private :standard_entries def load_multipackage_entries multipackage_entries().each do |ent| add ent end end def multipackage_entries [ PackageSelectionItem.new('with', 'name,name...', '', 'ALL', 'package names that you want to install'), PackageSelectionItem.new('without', 'name,name...', '', 'NONE', 'package names that you do not want to install') ] end private :multipackage_entries ALIASES = { 'std-ruby' => 'librubyver', 'stdruby' => 'librubyver', 'rubylibdir' => 'librubyver', 'archdir' => 'librubyverarch', 'site-ruby-common' => 'siteruby', # For backward compatibility 'site-ruby' => 'siterubyver', # For backward compatibility 'bin-dir' => 'bindir', 'bin-dir' => 'bindir', 'rb-dir' => 'rbdir', 'so-dir' => 'sodir', 'data-dir' => 'datadir', 'ruby-path' => 'rubypath', 'ruby-prog' => 'rubyprog', 'ruby' => 'rubyprog', 'make-prog' => 'makeprog', 'make' => 'makeprog' } def fixup ALIASES.each do |ali, name| @table[ali] = @table[name] end end def options_re /\A--(#{@table.keys.join('|')})(?:=(.*))?\z/ end def parse_opt(opt) m = options_re().match(opt) or setup_rb_error "config: unknown option #{opt}" m.to_a[1,2] end def dllext @rbconfig['DLEXT'] end def value_config?(name) lookup(name).value? end class Item def initialize(name, template, default, desc) @name = name.freeze @template = template @value = default @default = default @description = desc end attr_reader :name attr_reader :description attr_accessor :default alias help_default default def help_opt "--#{@name}=#{@template}" end def value? true end def value @value end def resolve(table) @value.gsub(%r<\$([^/]+)>) { table[$1] } end def set(val) @value = check(val) end private def check(val) setup_rb_error "config: --#{name} requires argument" unless val val end end class BoolItem < Item def config_type 'bool' end def help_opt "--#{@name}" end private def check(val) return 'yes' unless val case val when /\Ay(es)?\z/i, /\At(rue)?\z/i then 'yes' when /\An(o)?\z/i, /\Af(alse)\z/i then 'no' else setup_rb_error "config: --#{@name} accepts only yes/no for argument" end end end class PathItem < Item def config_type 'path' end private def check(path) setup_rb_error "config: --#{@name} requires argument" unless path path[0,1] == '$' ? path : File.expand_path(path) end end class ProgramItem < Item def config_type 'program' end end class SelectItem < Item def initialize(name, selection, default, desc) super @ok = selection.split('/') end def config_type 'select' end private def check(val) unless @ok.include?(val.strip) setup_rb_error "config: use --#{@name}=#{@template} (#{val})" end val.strip end end class ExecItem < Item def initialize(name, selection, desc, &block) super name, selection, nil, desc @ok = selection.split('/') @action = block end def config_type 'exec' end def value? false end def resolve(table) setup_rb_error "$#{name()} wrongly used as option value" end undef set def evaluate(val, table) v = val.strip.downcase unless @ok.include?(v) setup_rb_error "invalid option --#{@name}=#{val} (use #{@template})" end @action.call v, table end end class PackageSelectionItem < Item def initialize(name, template, default, help_default, desc) super name, template, default, desc @help_default = help_default end attr_reader :help_default def config_type 'package' end private def check(val) unless File.dir?("packages/#{val}") setup_rb_error "config: no such package: #{val}" end val end end class MetaConfigEnvironment def initialize(config, installer) @config = config @installer = installer end def config_names @config.names end def config?(name) @config.key?(name) end def bool_config?(name) @config.lookup(name).config_type == 'bool' end def path_config?(name) @config.lookup(name).config_type == 'path' end def value_config?(name) @config.lookup(name).config_type != 'exec' end def add_config(item) @config.add item end def add_bool_config(name, default, desc) @config.add BoolItem.new(name, 'yes/no', default ? 'yes' : 'no', desc) end def add_path_config(name, default, desc) @config.add PathItem.new(name, 'path', default, desc) end def set_config_default(name, default) @config.lookup(name).default = default end def remove_config(name) @config.remove(name) end # For only multipackage def packages raise '[setup.rb fatal] multi-package metaconfig API packages() called for single-package; contact application package vendor' unless @installer @installer.packages end # For only multipackage def declare_packages(list) raise '[setup.rb fatal] multi-package metaconfig API declare_packages() called for single-package; contact application package vendor' unless @installer @installer.packages = list end end end # class ConfigTable # This module requires: #verbose?, #no_harm? module FileOperations def mkdir_p(dirname, prefix = nil) dirname = prefix + File.expand_path(dirname) if prefix $stderr.puts "mkdir -p #{dirname}" if verbose? return if no_harm? # Does not check '/', it's too abnormal. dirs = File.expand_path(dirname).split(%r<(?=/)>) if /\A[a-z]:\z/i =~ dirs[0] disk = dirs.shift dirs[0] = disk + dirs[0] end dirs.each_index do |idx| path = dirs[0..idx].join('') Dir.mkdir path unless File.dir?(path) end end def rm_f(path) $stderr.puts "rm -f #{path}" if verbose? return if no_harm? force_remove_file path end def rm_rf(path) $stderr.puts "rm -rf #{path}" if verbose? return if no_harm? remove_tree path end def remove_tree(path) if File.symlink?(path) remove_file path elsif File.dir?(path) remove_tree0 path else force_remove_file path end end def remove_tree0(path) Dir.foreach(path) do |ent| next if ent == '.' next if ent == '..' entpath = "#{path}/#{ent}" if File.symlink?(entpath) remove_file entpath elsif File.dir?(entpath) remove_tree0 entpath else force_remove_file entpath end end begin Dir.rmdir path rescue Errno::ENOTEMPTY # directory may not be empty end end def move_file(src, dest) force_remove_file dest begin File.rename src, dest rescue File.open(dest, 'wb') {|f| f.write File.binread(src) } File.chmod File.stat(src).mode, dest File.unlink src end end def force_remove_file(path) begin remove_file path rescue end end def remove_file(path) File.chmod 0777, path File.unlink path end def install(from, dest, mode, prefix = nil) $stderr.puts "install #{from} #{dest}" if verbose? return if no_harm? realdest = prefix ? prefix + File.expand_path(dest) : dest realdest = File.join(realdest, File.basename(from)) if File.dir?(realdest) str = File.binread(from) if diff?(str, realdest) verbose_off { rm_f realdest if File.exist?(realdest) } File.open(realdest, 'wb') {|f| f.write str } File.chmod mode, realdest File.open("#{objdir_root()}/InstalledFiles", 'a') {|f| if prefix f.puts realdest.sub(prefix, '') else f.puts realdest end } end end def diff?(new_content, path) return true unless File.exist?(path) new_content != File.binread(path) end def command(*args) $stderr.puts args.join(' ') if verbose? system(*args) or raise RuntimeError, "system(#{args.map{|a| a.inspect }.join(' ')}) failed" end def ruby(*args) command config('rubyprog'), *args end def make(task = nil) command(*[config('makeprog'), task].compact) end def extdir?(dir) File.exist?("#{dir}/MANIFEST") or File.exist?("#{dir}/extconf.rb") end def files_of(dir) Dir.open(dir) {|d| return d.select {|ent| File.file?("#{dir}/#{ent}") } } end DIR_REJECT = %w( . .. CVS SCCS RCS CVS.adm .svn ) def directories_of(dir) Dir.open(dir) {|d| return d.select {|ent| File.dir?("#{dir}/#{ent}") } - DIR_REJECT } end end # This module requires: #srcdir_root, #objdir_root, #relpath module HookScriptAPI def get_config(key) @config[key] end alias config get_config # obsolete: use metaconfig to change configuration def set_config(key, val) @config[key] = val end # # srcdir/objdir (works only in the package directory) # def curr_srcdir "#{srcdir_root()}/#{relpath()}" end def curr_objdir "#{objdir_root()}/#{relpath()}" end def srcfile(path) "#{curr_srcdir()}/#{path}" end def srcexist?(path) File.exist?(srcfile(path)) end def srcdirectory?(path) File.dir?(srcfile(path)) end def srcfile?(path) File.file?(srcfile(path)) end def srcentries(path = '.') Dir.open("#{curr_srcdir()}/#{path}") {|d| return d.to_a - %w(. ..) } end def srcfiles(path = '.') srcentries(path).select {|fname| File.file?(File.join(curr_srcdir(), path, fname)) } end def srcdirectories(path = '.') srcentries(path).select {|fname| File.dir?(File.join(curr_srcdir(), path, fname)) } end end class ToplevelInstaller Version = '3.4.1' Copyright = 'Copyright (c) 2000-2006 Minero Aoki' TASKS = [ [ 'all', 'do config, setup, then install' ], [ 'config', 'saves your configurations' ], [ 'show', 'shows current configuration' ], [ 'setup', 'compiles ruby extentions and others' ], [ 'install', 'installs files' ], [ 'test', 'run all tests in test/' ], [ 'clean', "does `make clean' for each extention" ], [ 'distclean',"does `make distclean' for each extention" ] ] def ToplevelInstaller.invoke config = ConfigTable.new(load_rbconfig()) config.load_standard_entries config.load_multipackage_entries if multipackage? config.fixup klass = (multipackage?() ? ToplevelInstallerMulti : ToplevelInstaller) klass.new(File.dirname($0), config).invoke end def ToplevelInstaller.multipackage? File.dir?(File.dirname($0) + '/packages') end def ToplevelInstaller.load_rbconfig if arg = ARGV.detect {|arg| /\A--rbconfig=/ =~ arg } ARGV.delete(arg) load File.expand_path(arg.split(/=/, 2)[1]) $".push 'rbconfig.rb' else require 'rbconfig' end ::Config::CONFIG end def initialize(ardir_root, config) @ardir = File.expand_path(ardir_root) @config = config # cache @valid_task_re = nil end def config(key) @config[key] end def inspect "#<#{self.class} #{__id__()}>" end def invoke run_metaconfigs case task = parsearg_global() when nil, 'all' parsearg_config init_installers exec_config exec_setup exec_install else case task when 'config', 'test' ; when 'clean', 'distclean' @config.load_savefile if File.exist?(@config.savefile) else @config.load_savefile end __send__ "parsearg_#{task}" init_installers __send__ "exec_#{task}" end end def run_metaconfigs @config.load_script "#{@ardir}/metaconfig" end def init_installers @installer = Installer.new(@config, @ardir, File.expand_path('.')) end # # Hook Script API bases # def srcdir_root @ardir end def objdir_root '.' end def relpath '.' end # # Option Parsing # def parsearg_global while arg = ARGV.shift case arg when /\A\w+\z/ setup_rb_error "invalid task: #{arg}" unless valid_task?(arg) return arg when '-q', '--quiet' @config.verbose = false when '--verbose' @config.verbose = true when '--help' print_usage $stdout exit 0 when '--version' puts "#{File.basename($0)} version #{Version}" exit 0 when '--copyright' puts Copyright exit 0 else setup_rb_error "unknown global option '#{arg}'" end end nil end def valid_task?(t) valid_task_re() =~ t end def valid_task_re @valid_task_re ||= /\A(?:#{TASKS.map {|task,desc| task }.join('|')})\z/ end def parsearg_no_options unless ARGV.empty? task = caller(0).first.slice(%r<`parsearg_(\w+)'>, 1) setup_rb_error "#{task}: unknown options: #{ARGV.join(' ')}" end end alias parsearg_show parsearg_no_options alias parsearg_setup parsearg_no_options alias parsearg_test parsearg_no_options alias parsearg_clean parsearg_no_options alias parsearg_distclean parsearg_no_options def parsearg_config evalopt = [] set = [] @config.config_opt = [] while i = ARGV.shift if /\A--?\z/ =~ i @config.config_opt = ARGV.dup break end name, value = *@config.parse_opt(i) if @config.value_config?(name) @config[name] = value else evalopt.push [name, value] end set.push name end evalopt.each do |name, value| @config.lookup(name).evaluate value, @config end # Check if configuration is valid set.each do |n| @config[n] if @config.value_config?(n) end end def parsearg_install @config.no_harm = false @config.install_prefix = '' while a = ARGV.shift case a when '--no-harm' @config.no_harm = true when /\A--prefix=/ path = a.split(/=/, 2)[1] path = File.expand_path(path) unless path[0,1] == '/' @config.install_prefix = path else setup_rb_error "install: unknown option #{a}" end end end def print_usage(out) out.puts 'Typical Installation Procedure:' out.puts " $ ruby #{File.basename $0} config" out.puts " $ ruby #{File.basename $0} setup" out.puts " # ruby #{File.basename $0} install (may require root privilege)" out.puts out.puts 'Detailed Usage:' out.puts " ruby #{File.basename $0} " out.puts " ruby #{File.basename $0} [] []" fmt = " %-24s %s\n" out.puts out.puts 'Global options:' out.printf fmt, '-q,--quiet', 'suppress message outputs' out.printf fmt, ' --verbose', 'output messages verbosely' out.printf fmt, ' --help', 'print this message' out.printf fmt, ' --version', 'print version and quit' out.printf fmt, ' --copyright', 'print copyright and quit' out.puts out.puts 'Tasks:' TASKS.each do |name, desc| out.printf fmt, name, desc end fmt = " %-24s %s [%s]\n" out.puts out.puts 'Options for CONFIG or ALL:' @config.each do |item| out.printf fmt, item.help_opt, item.description, item.help_default end out.printf fmt, '--rbconfig=path', 'rbconfig.rb to load',"running ruby's" out.puts out.puts 'Options for INSTALL:' out.printf fmt, '--no-harm', 'only display what to do if given', 'off' out.printf fmt, '--prefix=path', 'install path prefix', '' out.puts end # # Task Handlers # def exec_config @installer.exec_config @config.save # must be final end def exec_setup @installer.exec_setup end def exec_install @installer.exec_install end def exec_test @installer.exec_test end def exec_show @config.each do |i| printf "%-20s %s\n", i.name, i.value if i.value? end end def exec_clean @installer.exec_clean end def exec_distclean @installer.exec_distclean end end # class ToplevelInstaller class ToplevelInstallerMulti < ToplevelInstaller include FileOperations def initialize(ardir_root, config) super @packages = directories_of("#{@ardir}/packages") raise 'no package exists' if @packages.empty? @root_installer = Installer.new(@config, @ardir, File.expand_path('.')) end def run_metaconfigs @config.load_script "#{@ardir}/metaconfig", self @packages.each do |name| @config.load_script "#{@ardir}/packages/#{name}/metaconfig" end end attr_reader :packages def packages=(list) raise 'package list is empty' if list.empty? list.each do |name| raise "directory packages/#{name} does not exist"\ unless File.dir?("#{@ardir}/packages/#{name}") end @packages = list end def init_installers @installers = {} @packages.each do |pack| @installers[pack] = Installer.new(@config, "#{@ardir}/packages/#{pack}", "packages/#{pack}") end with = extract_selection(config('with')) without = extract_selection(config('without')) @selected = @installers.keys.select {|name| (with.empty? or with.include?(name)) \ and not without.include?(name) } end def extract_selection(list) a = list.split(/,/) a.each do |name| setup_rb_error "no such package: #{name}" unless @installers.key?(name) end a end def print_usage(f) super f.puts 'Inluded packages:' f.puts ' ' + @packages.sort.join(' ') f.puts end # # Task Handlers # def exec_config run_hook 'pre-config' each_selected_installers {|inst| inst.exec_config } run_hook 'post-config' @config.save # must be final end def exec_setup run_hook 'pre-setup' each_selected_installers {|inst| inst.exec_setup } run_hook 'post-setup' end def exec_install run_hook 'pre-install' each_selected_installers {|inst| inst.exec_install } run_hook 'post-install' end def exec_test run_hook 'pre-test' each_selected_installers {|inst| inst.exec_test } run_hook 'post-test' end def exec_clean rm_f @config.savefile run_hook 'pre-clean' each_selected_installers {|inst| inst.exec_clean } run_hook 'post-clean' end def exec_distclean rm_f @config.savefile run_hook 'pre-distclean' each_selected_installers {|inst| inst.exec_distclean } run_hook 'post-distclean' end # # lib # def each_selected_installers Dir.mkdir 'packages' unless File.dir?('packages') @selected.each do |pack| $stderr.puts "Processing the package `#{pack}' ..." if verbose? Dir.mkdir "packages/#{pack}" unless File.dir?("packages/#{pack}") Dir.chdir "packages/#{pack}" yield @installers[pack] Dir.chdir '../..' end end def run_hook(id) @root_installer.run_hook id end # module FileOperations requires this def verbose? @config.verbose? end # module FileOperations requires this def no_harm? @config.no_harm? end end # class ToplevelInstallerMulti class Installer FILETYPES = %w( bin lib ext data conf man ) include FileOperations include HookScriptAPI def initialize(config, srcroot, objroot) @config = config @srcdir = File.expand_path(srcroot) @objdir = File.expand_path(objroot) @currdir = '.' end def inspect "#<#{self.class} #{File.basename(@srcdir)}>" end def noop(rel) end # # Hook Script API base methods # def srcdir_root @srcdir end def objdir_root @objdir end def relpath @currdir end # # Config Access # # module FileOperations requires this def verbose? @config.verbose? end # module FileOperations requires this def no_harm? @config.no_harm? end def verbose_off begin save, @config.verbose = @config.verbose?, false yield ensure @config.verbose = save end end # # TASK config # def exec_config exec_task_traverse 'config' end alias config_dir_bin noop alias config_dir_lib noop def config_dir_ext(rel) extconf if extdir?(curr_srcdir()) end alias config_dir_data noop alias config_dir_conf noop alias config_dir_man noop def extconf ruby "#{curr_srcdir()}/extconf.rb", *@config.config_opt end # # TASK setup # def exec_setup exec_task_traverse 'setup' end def setup_dir_bin(rel) files_of(curr_srcdir()).each do |fname| update_shebang_line "#{curr_srcdir()}/#{fname}" end end alias setup_dir_lib noop def setup_dir_ext(rel) make if extdir?(curr_srcdir()) end alias setup_dir_data noop alias setup_dir_conf noop alias setup_dir_man noop def update_shebang_line(path) return if no_harm? return if config('shebang') == 'never' old = Shebang.load(path) if old $stderr.puts "warning: #{path}: Shebang line includes too many args. It is not portable and your program may not work." if old.args.size > 1 new = new_shebang(old) return if new.to_s == old.to_s else return unless config('shebang') == 'all' new = Shebang.new(config('rubypath')) end $stderr.puts "updating shebang: #{File.basename(path)}" if verbose? open_atomic_writer(path) {|output| File.open(path, 'rb') {|f| f.gets if old # discard output.puts new.to_s output.print f.read } } end def new_shebang(old) if /\Aruby/ =~ File.basename(old.cmd) Shebang.new(config('rubypath'), old.args) elsif File.basename(old.cmd) == 'env' and old.args.first == 'ruby' Shebang.new(config('rubypath'), old.args[1..-1]) else return old unless config('shebang') == 'all' Shebang.new(config('rubypath')) end end def open_atomic_writer(path, &block) tmpfile = File.basename(path) + '.tmp' begin File.open(tmpfile, 'wb', &block) File.rename tmpfile, File.basename(path) ensure File.unlink tmpfile if File.exist?(tmpfile) end end class Shebang def Shebang.load(path) line = nil File.open(path) {|f| line = f.gets } return nil unless /\A#!/ =~ line parse(line) end def Shebang.parse(line) cmd, *args = *line.strip.sub(/\A\#!/, '').split(' ') new(cmd, args) end def initialize(cmd, args = []) @cmd = cmd @args = args end attr_reader :cmd attr_reader :args def to_s "#! #{@cmd}" + (@args.empty? ? '' : " #{@args.join(' ')}") end end # # TASK install # def exec_install rm_f 'InstalledFiles' exec_task_traverse 'install' end def install_dir_bin(rel) install_files targetfiles(), "#{config('bindir')}/#{rel}", 0755, strip_ext? end def strip_ext? /mswin|mingw/ !~ RUBY_PLATFORM end def install_dir_lib(rel) install_files libfiles(), "#{config('rbdir')}/#{rel}", 0644 end def install_dir_ext(rel) return unless extdir?(curr_srcdir()) install_files rubyextentions('.'), "#{config('sodir')}/#{File.dirname(rel)}", 0555 end def install_dir_data(rel) install_files targetfiles(), "#{config('datadir')}/#{rel}", 0644 end def install_dir_conf(rel) # FIXME: should not remove current config files # (rename previous file to .old/.org) install_files targetfiles(), "#{config('sysconfdir')}/#{rel}", 0644 end def install_dir_man(rel) install_files targetfiles(), "#{config('mandir')}/#{rel}", 0644 end def install_files(list, dest, mode, stripext = false) mkdir_p dest, @config.install_prefix list.each do |fname| if stripext install fname, "#{dest}/#{File.basename(fname, '.*')}", mode, @config.install_prefix else install fname, dest, mode, @config.install_prefix end end end def libfiles glob_reject(%w(*.y *.output), targetfiles()) end def rubyextentions(dir) ents = glob_select("*.#{@config.dllext}", targetfiles()) if ents.empty? setup_rb_error "no ruby extention exists: 'ruby #{$0} setup' first" end ents end def targetfiles mapdir(existfiles() - hookfiles()) end def mapdir(ents) ents.map {|ent| if File.exist?(ent) then ent # objdir else "#{curr_srcdir()}/#{ent}" # srcdir end } end # picked up many entries from cvs-1.11.1/src/ignore.c JUNK_FILES = %w( core RCSLOG tags TAGS .make.state .nse_depinfo #* .#* cvslog.* ,* .del-* *.olb *~ *.old *.bak *.BAK *.orig *.rej _$* *$ *.org *.in .* ) def existfiles glob_reject(JUNK_FILES, (files_of(curr_srcdir()) | files_of('.'))) end def hookfiles %w( pre-%s post-%s pre-%s.rb post-%s.rb ).map {|fmt| %w( config setup install clean distclean ).map {|t| sprintf(fmt, t) } }.flatten end def glob_select(pat, ents) re = globs2re([pat]) ents.select {|ent| re =~ ent } end def glob_reject(pats, ents) re = globs2re(pats) ents.reject {|ent| re =~ ent } end GLOB2REGEX = { '.' => '\.', '$' => '\$', '#' => '\#', '*' => '.*' } def globs2re(pats) /\A(?:#{ pats.map {|pat| pat.gsub(/[\.\$\#\*]/) {|ch| GLOB2REGEX[ch] } }.join('|') })\z/ end # # TASK test # TESTDIR = 'test' def exec_test unless File.directory?('test') $stderr.puts 'no test in this package' if verbose? return end $stderr.puts 'Running tests...' if verbose? begin require 'test/unit' rescue LoadError setup_rb_error 'test/unit cannot loaded. You need Ruby 1.8 or later to invoke this task.' end ########## begin customization for BioRuby unless defined?(Test::Unit::AutoRunner) then setup_rb_error 'Sorry it does not work in Ruby 1.9. Run "ruby test/runner.rb" instead.' end ########## end customization for BioRuby runner = Test::Unit::AutoRunner.new(true) runner.to_run << TESTDIR runner.run end # # TASK clean # def exec_clean exec_task_traverse 'clean' rm_f @config.savefile rm_f 'InstalledFiles' end alias clean_dir_bin noop alias clean_dir_lib noop alias clean_dir_data noop alias clean_dir_conf noop alias clean_dir_man noop def clean_dir_ext(rel) return unless extdir?(curr_srcdir()) make 'clean' if File.file?('Makefile') end # # TASK distclean # def exec_distclean exec_task_traverse 'distclean' rm_f @config.savefile rm_f 'InstalledFiles' end alias distclean_dir_bin noop alias distclean_dir_lib noop def distclean_dir_ext(rel) return unless extdir?(curr_srcdir()) make 'distclean' if File.file?('Makefile') end alias distclean_dir_data noop alias distclean_dir_conf noop alias distclean_dir_man noop # # Traversing # def exec_task_traverse(task) run_hook "pre-#{task}" FILETYPES.each do |type| if type == 'ext' and config('without-ext') == 'yes' $stderr.puts 'skipping ext/* by user option' if verbose? next end traverse task, type, "#{task}_dir_#{type}" end run_hook "post-#{task}" end def traverse(task, rel, mid) dive_into(rel) { run_hook "pre-#{task}" __send__ mid, rel.sub(%r[\A.*?(?:/|\z)], '') directories_of(curr_srcdir()).each do |d| traverse task, "#{rel}/#{d}", mid end run_hook "post-#{task}" } end def dive_into(rel) return unless File.dir?("#{@srcdir}/#{rel}") dir = File.basename(rel) Dir.mkdir dir unless File.dir?(dir) prevdir = Dir.pwd Dir.chdir dir $stderr.puts '---> ' + rel if verbose? @currdir = rel yield Dir.chdir prevdir $stderr.puts '<--- ' + rel if verbose? @currdir = File.dirname(rel) end def run_hook(id) path = [ "#{curr_srcdir()}/#{id}", "#{curr_srcdir()}/#{id}.rb" ].detect {|cand| File.file?(cand) } return unless path $stderr.puts "invoking hook script #{path}" if verbose? begin instance_eval File.read(path), path, 1 rescue raise if $DEBUG setup_rb_error "hook #{path} failed:\n" + $!.message end end end # class Installer class SetupError < StandardError; end def setup_rb_error(msg) raise SetupError, msg end if $0 == __FILE__ begin ToplevelInstaller.invoke rescue SetupError raise if $DEBUG $stderr.puts $!.message $stderr.puts "Try 'ruby #{$0} --help' for detailed usage." exit 1 end end bio-1.4.3.0001/COPYING0000644000004100000410000000471012200110570013720 0ustar www-datawww-dataBioRuby is copyrighted free software by Toshiaki Katayama . You can redistribute it and/or modify it under either the terms of the GPL version 2 (see the file GPL), or the conditions below: 1. You may make and give away verbatim copies of the source form of the software without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may modify your copy of the software in any way, provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or by allowing the author to include your modifications in the software. b) use the modified software only within your corporation or organization. c) give non-standard binaries non-standard names, with instructions on where to get the original software distribution. d) make other distribution arrangements with the author. 3. You may distribute the software in object code or binary form, provided that you do at least ONE of the following: a) distribute the binaries and library files of the software, together with instructions (in the manual page or equivalent) on where to get the original distribution. b) accompany the distribution with the machine-readable source of the software. c) give non-standard binaries non-standard names, with instructions on where to get the original software distribution. d) make other distribution arrangements with the author. 4. You may modify and include the part of the software into any other software (possibly commercial). But some files in the distribution are not written by the author, so that they are not under these terms. For the list of those files and their copying conditions, see the file LEGAL. 5. The scripts and library files supplied as input to or produced as output from the software do not automatically fall under the copyright of the software, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this software. 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. bio-1.4.3.0001/RELEASE_NOTES.rdoc0000644000004100000410000002173612200110570015575 0ustar www-datawww-data= BioRuby 1.4.3.0001 RELEASE NOTES Some bug fixes have been made to the BioRuby 1.4.3.0001 after the version 1.4.3 is released. == Bug fixes === Ruby 2.0 support * "gem install bio" failed with Ruby 2.0 or later versions. * lib/bio/db/gff.rb: Set script encoding to US-ASCII. === Other bug fixes * Bio::BLAST::Default::Report parse error when subject sequence contains spaces. (https://redmine.open-bio.org/issues/3385) = BioRuby 1.4.3 RELEASE NOTES A lot of changes have been made to the BioRuby 1.4.3 after the version 1.4.2 is released. This document describes important and/or incompatible changes since the BioRuby 1.4.2 release. For known problems, see KNOWN_ISSUES.rdoc. == New features === Bio::KEGG::KGML * New class Bio::KEGG::KGML::Graphics for storing a graphics element. In the instance of the class, "coords" attribute is now available. * New class Bio::KEGG::KGML::Substrate for storing a substrate element. * New class Bio::KEGG::KGML::Product for storing a product element. * New method Bio::KEGG::KGML::Reaction#id. * Improve RDoc documentation. * Unit tests are added. * There are incompatible changes. See Incompatible changes below. == Improvements === Portability running on JRuby and Rubinius Many failures and errors running on JRuby and Rubinius are resolved. Some of them are due to BioRuby bugs, and some of them are due to JRuby or Rubinius bugs. Artem Tarasov reported bugs in BioRuby and submitted bug reports to Rubinius. Clayton Wheeler and Naohisa Goto fixed bugs in BioRuby and submitted bug reports to JRuby. === Testing on Travis CI BioRuby is now using Travis CI (http://travis-ci.org/), a hosted continuous integration service for the open source community. == Bug fixes === Strange behavior related with "circular require" is fixed Fixed: In previous versions, some bioruby files may be required more than two times, and this sometimes causes strange behavior, depending on the order of files in the disk. In particular, unit tests running on JRuby sometimes crashes with strange errors. In BioRuby 1.4.3, almost all require and autoload lines are revised and are changed to avoid circular require. This also fixes crash on JRuby due to JRuby's autoload bug. === Other bug fixes * Fixed: Genomenet remote BLAST does not work. * Fixed: Bio::KEGG::KGML ignores "coords" field. * Fixed: Bio::NucleicAcid.to_re("s") typo * To suppress rare failure of chi-square equiprobability tests for Bio::Sequence::Common#randomize, test code changed to retry up to 10 times if the chi-square test fails. The assertion fails if the chi-square test fails 10 consecutive times, and this strongly suggests bugs in codes or in the random number generator. * Fixed: Bio::EMBL#os raises RuntimeError. The fix includes incompatible change. See below "Incompatible changes". * Fixed: bin/bioruby: Failed to save object with error message "can't convert Symbol into String" on Ruby 1.9. == Incompatible changes and removed features === Bio::FlatFile use binmode (binary mode) when opening a file In Bio::FlatFile.open and Bio::FlatFile.auto, binmode (binary mode) is used by default when opening a file, unless text mode is explicitly specified with open mode string or with options. Due to the change, files using CR+LF line separator might not be read correctly. === Broader FASTQ file recognition Because PacBio RS sequencer may produce kilobases long reads and read buffer size (default 31 lines) for file format detection may not be sufficient to find the second id line starting with "+", the regular expression for FASTQ is truncated only to check the first id line starting with "@". === Bio::KEGG::KGML * Bio::KEGG::KGML::Reaction#substrates and Bio::KEGG::KGML::Reaction#products are changed to return an array containing Bio::KEGG::KGML::Substrate and Bio::KEGG::KGML::Product objects, respectively. The changes enables us to get ID of substrates and products that were thrown away in the previous versions. * Most attribute methods that were different from the KGML attribute names are renamed to the names compatible with the KGML attribute names. Old method names are changed to aliases of them and marked as deprecated. The old names will be removed in the future. * Bio::KEGG::KGML::Entry#id (old name: entry_id) * Bio::KEGG::KGML::Entry#type (old name: category) * Bio::KEGG::KGML::Entry#entry1 (old name: node1) * Bio::KEGG::KGML::Entry#entry2 (old name: node2) * Bio::KEGG::KGML::Entry#type (old name: rel) * Bio::KEGG::KGML::Reaction#name (old name: entry_id) * Bio::KEGG::KGML::Reaction#type (old name: direction) * Following attribute methods are deprecated because two or more graphics elements may exist in an entry element. They will be removed in the future. Instead, please use instance methods of Bio::KEGG::KGML::Graphics, which can be obtained from Bio::KEGG::KGML::Entry#graphics attribute. * Bio::KEGG::KGML::Entry#label * Bio::KEGG::KGML::Entry#shape * Bio::KEGG::KGML::Entry#x * Bio::KEGG::KGML::Entry#y * Bio::KEGG::KGML::Entry#width * Bio::KEGG::KGML::Entry#height * Bio::KEGG::KGML::Entry#fgcolor * Bio::KEGG::KGML::Entry#bgcolor === Bio::EMBL#os Bio::EMBL#os, returns parser result of the EMBL OS line, no longer splits the content with comma, and it no longer raises error even if the OS line is not in the "Genus species (name)" format. The changes may affect the parsing of old EMBL files which contain two or more species names in an OS line. Note that Bio::EMBL#os returns an Array containing several Hash objects, and the argument is always ignored. The return value type and the meaning of the argument might be changed in the future. === Tests * Tests using network connections are moved under test/network/. To invoke these tests, run "rake test-network". * BIORUBY_TEST_LIB environment variable * The directory name specified with BIORUBY_TEST_LIB is always added on the top of $LOAD_PATH even if it is already included in the middle of $LOAD_PATH. * When BIORUBY_TEST_LIB is empty, it no longer add an empty string to $LOAD_PATH. * BIORUBY_TEST_LIB is ignored when BIORUBY_TEST_GEM is set. * BIORUBY_TEST_GEM environment variable * New environment variable BIORUBY_TEST_GEM for testing installed bio-X.X.X gem. Version number can be specified. See the following examples with/without the version number: * % env BIORUBY_TEST_GEM=1.4.2.5000 ruby test/runner.rb * % env BIORUBY_TEST_GEM="" ruby test/runner.rb === Other removed features * rdoc.zsh is removed because it have not been used for a long time. == Known issues The following issues are added or updated. See KNOWN_ISSUES.rdoc for other already known issues. === JRuby On JRuby, errors may be raised due to the following unfixed bugs in JRuby. * {JRUBY-6195}[http://jira.codehaus.org/browse/JRUBY-6195] Process.spawn (and related methods) ignore option hash * {JRUBY-6818}[http://jira.codehaus.org/browse/JRUBY-6818] Kernel.exec, Process.spawn (and IO.popen etc.) raise error when program is an array containing two strings With older version of JRuby, you may be bothered by the following bugs that have already been fixed in the head of JRuby. * {JRUBY-6658}[http://jira.codehaus.org/browse/JRUBY-6658] Problem when setting up an autoload entry, defining a class via require, then redefining the autoload entry * {JRUBY-6666}[http://jira.codehaus.org/browse/JRUBY-6666] Open3.popen3 failing due to missing handling for [path, argv[0]] array * {JRUBY-6819}[http://jira.codehaus.org/browse/JRUBY-6819] java.lang.ArrayIndexOutOfBoundsException in String#each_line Due to JRUBY-5678 (resolved issue) and the difference of behavior between CRuby and JRuby written in the comments of the issue tracking page, when running BioRuby on JRuby with sudo or root rights, TMPDIR environment variable should be set to a directory that is not world-writable. Currently, the workaround is needed for running BioRuby tests with JRuby on Travis-CI. * {JRUBY-5678}[http://jira.codehaus.org/browse/JRUBY-5678] tmpdir cannot be delete when jruby has sudo/root rights === Rubinius According to Travis-CI, unit tests have failed on 1.9 mode of Rubinius. With older version of Rubinius, you may be bothered by the following bugs that have already been fixed in the head of Rubinius. * {Rubinius Issue #1693}[https://github.com/rubinius/rubinius/issues/1693] String#split gives incorrect output when splitting by /^/ * {Rubinius Issue #1724}[https://github.com/rubinius/rubinius/issues/1724] Creating Struct class with length attribute === DDBJ Web API related classes (Bio::DDBJ::*, Bio::BLAST::Remote::DDBJ) DDBJ Web API is stopping after their system replacement in March 2012. (See the announcement though it is written only in Japanese: http://www.ddbj.nig.ac.jp/replace/rp120601-j.html) Due to the stop of the DDBJ Web API, Bio::DDBJ::* and Bio::BLAST::Remote::DDBJ which are using the web API can not be used. === SOAP4R with Ruby 1.9 soap4r-ruby1.9 may raise "ununitialized constant XML::SaxParser" error with some combinations of XML parser libraries. It seems this is a bug of soap4r-ruby1.9. bio-1.4.3.0001/lib/0000755000004100000410000000000012200110570013431 5ustar www-datawww-databio-1.4.3.0001/lib/bio.rb0000644000004100000410000002305012200110570014527 0ustar www-datawww-data# # = bio.rb - Loading all BioRuby modules # # Copyright:: Copyright (C) 2001-2007 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # module Bio autoload :BIORUBY_VERSION, 'bio/version' autoload :BIORUBY_EXTRA_VERSION, 'bio/version' autoload :BIORUBY_VERSION_ID, 'bio/version' ### Basic data types ## Sequence autoload :Sequence, 'bio/sequence' ## below are described in bio/sequence.rb #class Sequence # autoload :Common, 'bio/sequence/common' # autoload :NA, 'bio/sequence/na' # autoload :AA, 'bio/sequence/aa' # autoload :Generic, 'bio/sequence/generic' # autoload :Format, 'bio/sequence/format' # autoload :Adapter, 'bio/sequence/adapter' #end ## Locations/Location autoload :Location, 'bio/location' autoload :Locations, 'bio/location' ## Features/Feature autoload :Feature, 'bio/feature' autoload :Features, 'bio/compat/features' ## References/Reference autoload :Reference, 'bio/reference' autoload :References, 'bio/compat/references' ## Pathway/Relation autoload :Pathway, 'bio/pathway' autoload :Relation, 'bio/pathway' ## Alignment autoload :Alignment, 'bio/alignment' ## Tree autoload :Tree, 'bio/tree' ## Map autoload :Map, 'bio/map' ### Constants autoload :NucleicAcid, 'bio/data/na' autoload :AminoAcid, 'bio/data/aa' autoload :CodonTable, 'bio/data/codontable' ### DB parsers autoload :DB, 'bio/db' autoload :NCBIDB, 'bio/db' autoload :KEGGDB, 'bio/db' autoload :EMBLDB, 'bio/db' ## GenBank/RefSeq/DDBJ autoload :GenBank, 'bio/db/genbank/genbank' autoload :GenPept, 'bio/db/genbank/genpept' autoload :RefSeq, 'bio/db/genbank/refseq' autoload :DDBJ, 'bio/db/genbank/ddbj' ## below are described in bio/db/genbank/ddbj.rb #class DDBJ # autoload :XML, 'bio/io/ddbjxml' # autoload :REST, 'bio/io/ddbjrest' #end ## EMBL/TrEMBL/Swiss-Prot/SPTR autoload :EMBL, 'bio/db/embl/embl' autoload :SPTR, 'bio/db/embl/sptr' autoload :TrEMBL, 'bio/db/embl/trembl' autoload :UniProt, 'bio/db/embl/uniprot' autoload :SwissProt, 'bio/db/embl/swissprot' ## KEGG class KEGG autoload :GENOME, 'bio/db/kegg/genome' autoload :GENES, 'bio/db/kegg/genes' autoload :ENZYME, 'bio/db/kegg/enzyme' autoload :COMPOUND, 'bio/db/kegg/compound' autoload :DRUG, 'bio/db/kegg/drug' autoload :GLYCAN, 'bio/db/kegg/glycan' autoload :REACTION, 'bio/db/kegg/reaction' autoload :BRITE, 'bio/db/kegg/brite' autoload :CELL, 'bio/db/kegg/cell' autoload :EXPRESSION, 'bio/db/kegg/expression' autoload :ORTHOLOGY, 'bio/db/kegg/orthology' autoload :KGML, 'bio/db/kegg/kgml' autoload :PATHWAY, 'bio/db/kegg/pathway' autoload :MODULE, 'bio/db/kegg/module' autoload :Taxonomy, 'bio/db/kegg/taxonomy' end ## other formats autoload :FastaFormat, 'bio/db/fasta' autoload :FastaNumericFormat, 'bio/db/fasta/qual' # change to FastaFormat::Numeric ? autoload :FastaDefline, 'bio/db/fasta/defline' # change to FastaFormat::Defline ? autoload :Fastq, 'bio/db/fastq' autoload :GFF, 'bio/db/gff' autoload :AAindex, 'bio/db/aaindex' autoload :AAindex1, 'bio/db/aaindex' # change to AAindex::AAindex1 ? autoload :AAindex2, 'bio/db/aaindex' # change to AAindex::AAindex2 ? autoload :TRANSFAC, 'bio/db/transfac' autoload :PROSITE, 'bio/db/prosite' autoload :LITDB, 'bio/db/litdb' autoload :MEDLINE, 'bio/db/medline' autoload :FANTOM, 'bio/db/fantom' autoload :GO, 'bio/db/go' autoload :PDB, 'bio/db/pdb' autoload :NBRF, 'bio/db/nbrf' autoload :REBASE, 'bio/db/rebase' autoload :SOFT, 'bio/db/soft' autoload :Lasergene, 'bio/db/lasergene' autoload :SangerChromatogram, 'bio/db/sanger_chromatogram/chromatogram' autoload :Scf, 'bio/db/sanger_chromatogram/scf' autoload :Abif, 'bio/db/sanger_chromatogram/abif' autoload :Newick, 'bio/db/newick' autoload :Nexus, 'bio/db/nexus' autoload :PhyloXML, 'bio/db/phyloxml/phyloxml_elements' # Bio::Taxonomy will be moved to other file autoload :Taxonomy, 'bio/db/phyloxml/phyloxml_elements' ## below are described in bio/db/phyloxml/phyloxml_elements.rb #module PhyloXML # autoload :Parser, 'bio/db/phyloxml/phyloxml_parser' # autoload :Writer, 'bio/db/phyloxml/phyloxml_writer' #end ### IO interface modules autoload :Registry, 'bio/io/registry' autoload :Fetch, 'bio/io/fetch' autoload :SQL, 'bio/io/sql' autoload :SOAPWSDL, 'bio/io/soapwsdl' autoload :FlatFile, 'bio/io/flatfile' autoload :FlatFileIndex, 'bio/io/flatfile/index' # chage to FlatFile::Index ? ## below are described in bio/io/flatfile/index.rb #class FlatFileIndex # autoload :Indexer, 'bio/io/flatfile/indexer' # autoload :BDBdefault, 'bio/io/flatfile/bdb' # autoload :BDBwrapper, 'bio/io/flatfile/bdb' # autoload :BDB_1, 'bio/io/flatfile/bdb' #end autoload :PubMed, 'bio/io/pubmed' autoload :DAS, 'bio/io/das' autoload :DBGET, 'bio/io/dbget' autoload :Ensembl, 'bio/io/ensembl' autoload :Hinv, 'bio/io/hinv' ## below are described in bio/appl/blast.rb #class Blast # autoload :Fastacmd, 'bio/io/fastacmd' #end class KEGG autoload :API, 'bio/io/keggapi' end ## below are described in bio/db/genbank/ddbj.rb #class DDBJ # autoload :XML, 'bio/io/ddbjxml' #end class HGC autoload :HiGet, 'bio/io/higet' end class EBI autoload :SOAP, 'bio/io/ebisoap' end autoload :NCBI, 'bio/io/ncbirest' ## below are described in bio/io/ncbirest.rb #class NCBI # autoload :SOAP, 'bio/io/ncbisoap' # autoload :REST, 'bio/io/ncbirest' #end autoload :TogoWS, 'bio/io/togows' ### Applications autoload :Fasta, 'bio/appl/fasta' ## below are described in bio/appl/fasta.rb #class Fasta # autoload :Report, 'bio/appl/fasta/format10' #end autoload :Blast, 'bio/appl/blast' ## below are described in bio/appl/blast.rb #class Blast # autoload :Fastacmd, 'bio/io/fastacmd' # autoload :Report, 'bio/appl/blast/report' # autoload :Default, 'bio/appl/blast/format0' # autoload :WU, 'bio/appl/blast/wublast' # autoload :Bl2seq, 'bio/appl/bl2seq/report' # autoload :RPSBlast, 'bio/appl/blast/rpsblast' # autoload :NCBIOptions, 'bio/appl/blast/ncbioptions' # autoload :Remote, 'bio/appl/blast/remote' #end autoload :HMMER, 'bio/appl/hmmer' ## below are described in bio/appl/hmmer.rb #class HMMER # autoload :Report, 'bio/appl/hmmer/report' #end autoload :EMBOSS, 'bio/appl/emboss' # use bio/command, improve autoload :PSORT, 'bio/appl/psort' ## below are described in bio/appl/psort.rb #class PSORT # class PSORT1 # autoload :Report, 'bio/appl/psort/report' # end # class PSORT2 # autoload :Report, 'bio/appl/psort/report' # end #end autoload :TMHMM, 'bio/appl/tmhmm/report' autoload :TargetP, 'bio/appl/targetp/report' autoload :SOSUI, 'bio/appl/sosui/report' autoload :Genscan, 'bio/appl/genscan/report' autoload :ClustalW, 'bio/appl/clustalw' ## below are described in bio/appl/clustalw.rb #class ClustalW # autoload :Report, 'bio/appl/clustalw/report' #end autoload :MAFFT, 'bio/appl/mafft' ## below are described in bio/appl/mafft.rb #class MAFFT # autoload :Report, 'bio/appl/mafft/report' #end autoload :Tcoffee, 'bio/appl/tcoffee' autoload :Muscle, 'bio/appl/muscle' autoload :Probcons, 'bio/appl/probcons' autoload :Sim4, 'bio/appl/sim4' ## below are described in bio/appl/sim4.rb #class Sim4 # autoload :Report, 'bio/appl/sim4/report' #end autoload :Spidey, 'bio/appl/spidey/report' autoload :Blat, 'bio/appl/blat/report' module GCG autoload :Msf, 'bio/appl/gcg/msf' autoload :Seq, 'bio/appl/gcg/seq' end module Phylip autoload :PhylipFormat, 'bio/appl/phylip/alignment' autoload :DistanceMatrix, 'bio/appl/phylip/distance_matrix' end autoload :Iprscan, 'bio/appl/iprscan/report' autoload :PAML, 'bio/appl/paml/common' ## below are described in bio/appl/paml/common.rb # module PAML # autoload :Codeml, 'bio/appl/paml/codeml' # autoload :Baseml, 'bio/appl/paml/baseml' # autoload :Yn00, 'bio/appl/paml/yn00' # end ### Utilities autoload :SiRNA, 'bio/util/sirna' autoload :ColorScheme, 'bio/util/color_scheme' autoload :ContingencyTable, 'bio/util/contingency_table' autoload :RestrictionEnzyme, 'bio/util/restriction_enzyme' ### Service libraries autoload :Command, 'bio/command' ### Provide BioRuby shell 'command' also as 'Bio.command' (like ChemRuby) def self.method_missing(*args) require 'bio/shell' extend Bio::Shell public_class_method(*Bio::Shell.private_instance_methods) if Bio.respond_to?(args.first) Bio.send(*args) else raise NameError end end end bio-1.4.3.0001/lib/bio/0000755000004100000410000000000012200110570014202 5ustar www-datawww-databio-1.4.3.0001/lib/bio/compat/0000755000004100000410000000000012200110570015465 5ustar www-datawww-databio-1.4.3.0001/lib/bio/compat/features.rb0000644000004100000410000001077312200110570017640 0ustar www-datawww-data# # = bio/compat/features.rb - Obsoleted Features class # # Copyright:: Copyright (c) 2002, 2005 Toshiaki Katayama # 2006 Jan Aerts # 2008 Naohisa Goto # License:: The Ruby License # # $Id: features.rb,v 1.1.2.2 2008/03/10 13:42:26 ngoto Exp $ # # == Description # # The Bio::Features class was obsoleted after BioRuby 1.2.1. # To keep compatibility, some wrapper methods are provided in this file. # As the compatibility methods (and Bio::Features) will soon be removed, # Please change your code not to use Bio::Features. # # Note that Bio::Feature is different from the Bio::Features. # Bio::Feature still exists to store DDBJ/GenBank/EMBL feature information. require 'bio/location' module Bio # = DESCRIPTION # # This class is OBSOLETED, and will soon be removed. # Instead of this class, an array is to be used. # # # Container for a list of Feature objects. # # = USAGE # # First, create some Bio::Feature objects # feature1 = Bio::Feature.new('intron','3627..4059') # feature2 = Bio::Feature.new('exon','4060..4236') # feature3 = Bio::Feature.new('intron','4237..4426') # feature4 = Bio::Feature.new('CDS','join(2538..3626,4060..4236)', # [ Bio::Feature::Qualifier.new('gene', 'CYP2D6'), # Bio::Feature::Qualifier.new('translation','MGXXTVMHLL...') # ]) # # # And create a container for them # feature_container = Bio::Features.new([ feature1, feature2, feature3, feature4 ]) # # # Iterate over all features and print # feature_container.each do |feature| # puts feature.feature + "\t" + feature.position # feature.each do |qualifier| # puts "- " + qualifier.qualifier + ": " + qualifier.value # end # end # # # Iterate only over CDS features and extract translated amino acid sequences # features.each("CDS") do |feature| # hash = feature.to_hash # name = hash["gene"] || hash["product"] || hash["note"] # aaseq = hash["translation"] # pos = feature.position # if name and seq # puts ">#{gene} #{feature.position}" # puts aaseq # end # end class Features # module to keep backward compatibility with obsoleted Bio::Features module BackwardCompatibility #:nodoc: # Backward compatibility with Bio::Features#features. # Now, features are stored in an array, and # you should change your code not to use this method. def features warn 'Bio::Features is obsoleted. Now, features are stored in an array.' self end # Backward compatibility with Bio::Features#append. # Now, references are stored in an array, and # you should change your code not to use this method. def append(feature) warn 'Bio::Features is obsoleted. Now, features are stored in an array.' self.push(feature) if feature.is_a? Feature self end end #module BackwardCompatibility # This method should not be used. # Only for backward compatibility of existing code. # # Since Bio::Features is obsoleted, # Bio::Features.new not returns Bio::Features object, # but modifies given _ary_ and returns the _ary_. # # *Arguments*: # * (optional) __: Array of Bio::Feature objects # *Returns*:: the given array def self.new(ary = []) warn 'Bio::Features is obsoleted. Some methods are added to given array to keep backward compatibility.' ary.extend(BackwardCompatibility) ary end # Normally, users can not call this method. # # Create a new Bio::Features object. # # *Arguments*: # * (optional) _list of features_: list of Bio::Feature objects # *Returns*:: Bio::Features object def initialize(ary = []) @features = ary end # Returns an Array of Feature objects. attr_accessor :features # Appends a Feature object to Features. # # *Arguments*: # * (required) _feature_: Bio::Feature object # *Returns*:: Bio::Features object def append(a) @features.push(a) if a.is_a? Feature return self end # Iterates on each feature object. # # *Arguments*: # * (optional) _key_: if specified, only iterates over features with this key def each(arg = nil) @features.each do |x| next if arg and x.feature != arg yield x end end # Short cut for the Features#features[n] def [](*arg) @features[*arg] end # Short cut for the Features#features.first def first @features.first end # Short cut for the Features#features.last def last @features.last end end # Features end # Bio bio-1.4.3.0001/lib/bio/compat/references.rb0000644000004100000410000000713412200110570020140 0ustar www-datawww-data# # = bio/compat/references.rb - Obsoleted References class # # Copyright:: Copyright (C) 2008 # Toshiaki Katayama , # Ryan Raaum , # Jan Aerts , # Naohisa Goto # License:: The Ruby License # # $Id: references.rb,v 1.1.2.1 2008/03/04 10:07:49 ngoto Exp $ # # == Description # # The Bio::References class was obsoleted after BioRuby 1.2.1. # To keep compatibility, some wrapper methods are provided in this file. # As the compatibility methods (and Bio::References) will soon be removed, # Please change your code not to use Bio::References. # # Note that Bio::Reference is different from Bio::References. # Bio::Reference still exists for storing a reference information # in sequence entries. module Bio # = DESCRIPTION # # This class is OBSOLETED, and will soon be removed. # Instead of this class, an array is to be used. # # # A container class for Bio::Reference objects. # # = USAGE # # This class should NOT be used. # # refs = Bio::References.new # refs.append(Bio::Reference.new(hash)) # refs.each do |reference| # ... # end # class References # module to keep backward compatibility with obsoleted Bio::References module BackwardCompatibility #:nodoc: # Backward compatibility with Bio::References#references. # Now, references are stored in an array, and # you should change your code not to use this method. def references warn 'Bio::References is obsoleted. Now, references are stored in an array.' self end # Backward compatibility with Bio::References#append. # Now, references are stored in an array, and # you should change your code not to use this method. def append(reference) warn 'Bio::References is obsoleted. Now, references are stored in an array.' self.push(reference) if reference.is_a? Reference self end end #module BackwardCompatibility # This method should not be used. # Only for backward compatibility of existing code. # # Since Bio::References is obsoleted, # Bio::References.new not returns Bio::References object, # but modifies given _ary_ and returns the _ary_. # # *Arguments*: # * (optional) __: Array of Bio::Reference objects # *Returns*:: the given array def self.new(ary = []) warn 'Bio::References is obsoleted. Some methods are added to given array to keep backward compatibility.' ary.extend(BackwardCompatibility) ary end # Array of Bio::Reference objects attr_accessor :references # Normally, users can not call this method. # # Create a new Bio::References object # # refs = Bio::References.new # --- # *Arguments*: # * (optional) __: Array of Bio::Reference objects # *Returns*:: Bio::References object def initialize(ary = []) @references = ary end # Add a Bio::Reference object to the container. # # refs.append(reference) # --- # *Arguments*: # * (required) _reference_: Bio::Reference object # *Returns*:: current Bio::References object def append(reference) @references.push(reference) if reference.is_a? Reference return self end # Iterate through Bio::Reference objects. # # refs.each do |reference| # ... # end # --- # *Block*:: yields each Bio::Reference object def each @references.each do |reference| yield reference end end end #class References end #module Bio bio-1.4.3.0001/lib/bio/util/0000755000004100000410000000000012200110570015157 5ustar www-datawww-databio-1.4.3.0001/lib/bio/util/color_scheme/0000755000004100000410000000000012200110570017621 5ustar www-datawww-databio-1.4.3.0001/lib/bio/util/color_scheme/buried.rb0000644000004100000410000000237112200110570021423 0ustar www-datawww-data# # bio/util/color_scheme/buried.rb - Color codings for buried amino acids # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: buried.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Buried < Score #:nodoc: ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(0.0, 1.0-percent, percent) end @colors = {} @scores = { 'A' => 0.66, 'C' => 1.19, 'D' => 1.46, 'E' => 0.74, 'F' => 0.6, 'G' => 1.56, 'H' => 0.95, 'I' => 0.47, 'K' => 1.01, 'L' => 0.59, 'M' => 0.6, 'N' => 1.56, 'P' => 1.52, 'Q' => 0.98, 'R' => 0.95, 'S' => 1.43, 'T' => 0.96, 'U' => 0, 'V' => 0.5, 'W' => 0.96, 'Y' => 1.14, 'B' => 1.51, 'X' => 1.0, 'Z' => 0.86, } @min = 0.05 @max = 4.6 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme/hydropathy.rb0000644000004100000410000000254112200110570022343 0ustar www-datawww-data# # bio/util/color_scheme/hydropathy.rb - Color codings for hydrophobicity # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: hydropathy.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme # Hydropathy index # Kyte, J., and Doolittle, R.F., J. Mol. Biol. # 1157, 105-132, 1982 class Hydropathy < Score #:nodoc: ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 0.0, 1.0-percent) end @colors = {} @scores = { 'A' => 1.8, 'C' => 2.5, 'D' => -3.5, 'E' => -3.5, 'F' => 2.8, 'G' => -0.4, 'H' => -3.2, 'I' => 4.5, 'K' => -3.9, 'L' => 3.8, 'M' => 1.9, 'N' => -3.5, 'P' => -1.6, 'Q' => -3.5, 'R' => -4.5, 'S' => -0.8, 'T' => -0.7, 'U' => 0.0, 'V' => 4.2, 'W' => -0.9, 'Y' => -1.3, 'B' => -3.5, 'X' => -0.49, 'Z' => -3.5, } @min = -3.9 @max = 4.5 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme/helix.rb0000644000004100000410000000237412200110570021265 0ustar www-datawww-data# # bio/util/color_scheme/helix.rb - Color codings for helix propensity # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: helix.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Helix < Score #:nodoc: ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 1.0-percent, percent) end @colors = {} @scores = { 'A' => 1.42, 'C' => 0.7, 'D' => 1.01, 'E' => 1.51, 'F' => 1.13, 'G' => 0.57, 'H' => 1.0, 'I' => 1.08, 'K' => 1.16, 'L' => 1.21, 'M' => 1.45, 'N' => 0.67, 'P' => 0.57, 'Q' => 1.11, 'R' => 0.98, 'S' => 0.77, 'T' => 0.83, 'U' => 0.0, 'V' => 1.06, 'W' => 1.08, 'Y' => 0.69, 'B' => 0.84, 'X' => 1.0, 'Z' => 1.31, } @min = 0.57 @max = 1.51 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme/strand.rb0000644000004100000410000000237512200110570021450 0ustar www-datawww-data# # bio/util/color_scheme/strand.rb - Color codings for strand propensity # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: strand.rb,v 1.5 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Strand < Score #:nodoc: ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, percent, 1.0-percent) end @colors = {} @scores = { 'A' => 0.83, 'C' => 1.19, 'D' => 0.54, 'E' => 0.37, 'F' => 1.38, 'G' => 0.75, 'H' => 0.87, 'I' => 1.6, 'K' => 0.74, 'L' => 1.3, 'M' => 1.05, 'N' => 0.89, 'P' => 0.55, 'Q' => 1.1, 'R' => 0.93, 'S' => 0.75, 'T' => 1.19, 'U' => 0.0, 'V' => 1.7, 'W' => 1.37, 'Y' => 1.47, 'B' => 0.72, 'X' => 1.0, 'Z' => 0.74, } @min = 0.37 @max = 1.7 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme/turn.rb0000644000004100000410000000237112200110570021141 0ustar www-datawww-data# # bio/util/color_scheme/turn.rb - Color codings for turn propensity # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: turn.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Turn < Score #:nodoc: ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 1.0-percent, 1.0-percent) end @colors = {} @scores = { 'A' => 0.66, 'C' => 1.19, 'D' => 1.46, 'E' => 0.74, 'F' => 0.6, 'G' => 1.56, 'H' => 0.95, 'I' => 0.47, 'K' => 1.01, 'L' => 0.59, 'M' => 0.6, 'N' => 1.56, 'P' => 1.52, 'Q' => 0.98, 'R' => 0.95, 'S' => 1.43, 'T' => 0.96, 'U' => 0, 'V' => 0.5, 'W' => 0.96, 'Y' => 1.14, 'B' => 1.51, 'X' => 1.0, 'Z' => 0.86, } @min = 0.47 @max = 1.56 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme/taylor.rb0000644000004100000410000000211312200110570021455 0ustar www-datawww-data# # bio/util/color_scheme/taylor.rb - Taylor color codings for amino acids # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: taylor.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Taylor < Simple #:nodoc: ######### protected ######### @colors = { 'A' => 'CCFF00', 'C' => 'FFFF00', 'D' => 'FF0000', 'E' => 'FF0066', 'F' => '00FF66', 'G' => 'FF9900', 'H' => '0066FF', 'I' => '66FF00', 'K' => '6600FF', 'L' => '33FF00', 'M' => '00FF00', 'N' => 'CC00FF', 'P' => 'FFCC00', 'Q' => 'FF00CC', 'R' => '0000FF', 'S' => 'FF3300', 'T' => 'FF6600', 'U' => 'FFFFFF', 'V' => '99FF00', 'W' => '00CCFF', 'Y' => '00FFCC', 'B' => 'FFFFFF', 'X' => 'FFFFFF', 'Z' => 'FFFFFF', } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme/nucleotide.rb0000644000004100000410000000126212200110570022302 0ustar www-datawww-data# # bio/util/color_scheme/nucleotide.rb - Color codings for nucleotides # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: nucleotide.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Nucleotide < Simple #:nodoc: ######### protected ######### @colors = { 'A' => '64F73F', 'C' => 'FFB340', 'G' => 'EB413C', 'T' => '3C88EE', 'U' => '3C88EE', } @colors.default = 'FFFFFF' # return white by default end NA = Nuc = Nucleotide end bio-1.4.3.0001/lib/bio/util/color_scheme/zappo.rb0000644000004100000410000000210712200110570021277 0ustar www-datawww-data# # bio/util/color_scheme/zappo.rb - Zappo color codings for amino acids # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: zappo.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Zappo < Simple #:nodoc: ######### protected ######### @colors = { 'A' => 'FFAFAF', 'C' => 'FFFF00', 'D' => 'FF0000', 'E' => 'FF0000', 'F' => 'FFC800', 'G' => 'FF00FF', 'H' => 'FF0000', 'I' => 'FFAFAF', 'K' => '6464FF', 'L' => 'FFAFAF', 'M' => 'FFAFAF', 'N' => '00FF00', 'P' => 'FF00FF', 'Q' => '00FF00', 'R' => '6464FF', 'S' => '00FF00', 'T' => '00FF00', 'U' => 'FFFFFF', 'V' => 'FFAFAF', 'W' => 'FFC800', 'Y' => 'FFC800', 'B' => 'FFFFFF', 'X' => 'FFFFFF', 'Z' => 'FFFFFF', } @colors.default = 'FFFFFF' # return white by default end end bio-1.4.3.0001/lib/bio/util/color_scheme.rb0000644000004100000410000001170412200110570020151 0ustar www-datawww-data# # bio/util/color_scheme.rb - Popular color codings for nucleic and amino acids # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # module Bio # # bio/util/color_scheme.rb - Popular color codings for nucleic and amino acids # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # # = Description # # The Bio::ColorScheme module contains classes that return popular color codings # for nucleic and amino acids in RGB hex format suitable for HTML code. # # The current schemes supported are: # * Buried - Buried index # * Helix - Helix propensity # * Hydropathy - Hydrophobicity # * Nucleotide - Nucelotide color coding # * Strand - Strand propensity # * Taylor - Taylor color coding # * Turn - Turn propensity # * Zappo - Zappo color coding # # Planned color schemes include: # * BLOSUM62 # * ClustalX # * Percentage Identity (PID) # # Color schemes BLOSUM62, ClustalX, and Percentage Identity are all dependent # on the alignment consensus. # # This data is currently referenced from the JalView alignment editor. # Clamp, M., Cuff, J., Searle, S. M. and Barton, G. J. (2004), # "The Jalview Java Alignment Editor," Bioinformatics, 12, 426-7 # http://www.jalview.org # # Currently the score data for things such as hydropathy, helix, turn, etc. are contained # here but should be moved to bio/data/aa once a good reference is found for these # values. # # # = Usage # # require 'bio' # # seq = 'gattaca' # scheme = Bio::ColorScheme::Zappo # postfix = '' # html = '' # seq.each_byte do |c| # color = scheme[c.chr] # prefix = %Q() # html += prefix + c.chr + postfix # end # # puts html # # # == Accessing colors # # puts Bio::ColorScheme::Buried['A'] # 00DC22 # puts Bio::ColorScheme::Buried[:c] # 00BF3F # puts Bio::ColorScheme::Buried[nil] # nil # puts Bio::ColorScheme::Buried['-'] # FFFFFF # puts Bio::ColorScheme::Buried[7] # FFFFFF # puts Bio::ColorScheme::Buried['junk'] # FFFFFF # puts Bio::ColorScheme::Buried['t'] # 00CC32 # module ColorScheme cs_location = File.join(File.dirname(File.expand_path(__FILE__)), 'color_scheme') # Score sub-classes autoload :Buried, File.join(cs_location, 'buried') autoload :Helix, File.join(cs_location, 'helix') autoload :Hydropathy, File.join(cs_location, 'hydropathy') autoload :Strand, File.join(cs_location, 'strand') autoload :Turn, File.join(cs_location, 'turn') # Simple sub-classes autoload :Nucleotide, File.join(cs_location, 'nucleotide') autoload :Taylor, File.join(cs_location, 'taylor') autoload :Zappo, File.join(cs_location, 'zappo') # Consensus sub-classes # NOTE todo # BLOSUM62 # ClustalX # PID # A very basic class template for color code referencing. class Simple #:nodoc: def self.[](x) return if x.nil? # accept symbols and any case @colors[x.to_s.upcase] end def self.colors() @colors end ####### private ####### # Example @colors = { 'A' => '64F73F', } @colors.default = 'FFFFFF' # return white by default end # A class template for color code referencing of color schemes # that are score based. This template is expected to change # when the scores are moved into bio/data/aa class Score #:nodoc: def self.[](x) return if x.nil? # accept symbols and any case @colors[x.to_s.upcase] end def self.min(x) @min end def self.max(x) @max end def self.scores() @scores end def self.colors() @colors end ######### protected ######### def self.percent_to_hex(percent) percent = percent.to_f if percent.is_a?(String) if (percent > 1.0) or (percent < 0.0) or percent.nil? raise 'Percentage must be between 0.0 and 1.0' end "%02X" % (percent * 255.0) end def self.rgb_percent_to_hex(red, green, blue) percent_to_hex(red) + percent_to_hex(green) + percent_to_hex(blue) end def self.score_to_percent(score, min, max) # .to_f to ensure every operation is float-aware percent = (score.to_f - min) / (max.to_f - min) percent = 1.0 if percent > 1.0 percent = 0.0 if percent < 0.0 percent end ####### private ####### # Example def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 0.0, 1.0-percent) end @colors = {} @scores = { 'A' => 0.83, } @min = 0.37 @max = 1.7 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end # TODO class Consensus #:nodoc: end end # module ColorScheme end # module Bio bio-1.4.3.0001/lib/bio/util/contingency_table.rb0000644000004100000410000002700312200110570021175 0ustar www-datawww-data# # bio/util/contingency_table.rb - Statistical contingency table analysis for aligned sequences # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # module Bio # # bio/util/contingency_table.rb - Statistical contingency table analysis for aligned sequences # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # = Description # # The Bio::ContingencyTable class provides basic statistical contingency table # analysis for two positions within aligned sequences. # # When ContingencyTable is instantiated the set of characters in the # aligned sequences may be passed to it as an array. This is # important since it uses these characters to create the table's rows # and columns. If this array is not passed it will use it's default # of an amino acid and nucleotide alphabet in lowercase along with the # clustal spacer '-'. # # To get data from the table the most used functions will be # chi_square and contingency_coefficient: # # ctable = Bio::ContingencyTable.new() # ctable['a']['t'] += 1 # # .. put more values into the table # puts ctable.chi_square # puts ctable.contingency_coefficient # between 0.0 and 1.0 # # The contingency_coefficient represents the degree of correlation of # change between two sequence positions in a multiple-sequence # alignment. 0.0 indicates no correlation, 1.0 is the maximum # correlation. # # # = Further Reading # # * http://en.wikipedia.org/wiki/Contingency_table # * http://www.physics.csbsju.edu/stats/exact.details.html # * Numerical Recipes in C by Press, Flannery, Teukolsky, and Vetterling # # = Usage # # What follows is an example of ContingencyTable in typical usage # analyzing results from a clustal alignment. # # require 'bio' # # seqs = {} # max_length = 0 # Bio::ClustalW::Report.new( IO.read('sample.aln') ).to_a.each do |entry| # data = entry.data.strip # seqs[entry.definition] = data.downcase # max_length = data.size if max_length == 0 # raise "Aligned sequences must be the same length!" unless data.size == max_length # end # # VERBOSE = true # puts "i\tj\tchi_square\tcontingency_coefficient" if VERBOSE # correlations = {} # # 0.upto(max_length - 1) do |i| # (i+1).upto(max_length - 1) do |j| # ctable = Bio::ContingencyTable.new() # seqs.each_value { |seq| ctable.table[ seq[i].chr ][ seq[j].chr ] += 1 } # # chi_square = ctable.chi_square # contingency_coefficient = ctable.contingency_coefficient # puts [(i+1), (j+1), chi_square, contingency_coefficient].join("\t") if VERBOSE # # correlations["#{i+1},#{j+1}"] = contingency_coefficient # correlations["#{j+1},#{i+1}"] = contingency_coefficient # Both ways are accurate # end # end # # require 'yaml' # File.new('results.yml', 'a+') { |f| f.puts correlations.to_yaml } # # # = Tutorial # # ContingencyTable returns the statistical significance of change # between two positions in an alignment. If you would like to see how # every possible combination of positions in your alignment compares # to one another you must set this up yourself. Hopefully the # provided examples will help you get started without too much # trouble. # # def lite_example(sequences, max_length, characters) # # %w{i j chi_square contingency_coefficient}.each { |x| print x.ljust(12) } # puts # # 0.upto(max_length - 1) do |i| # (i+1).upto(max_length - 1) do |j| # ctable = Bio::ContingencyTable.new( characters ) # sequences.each do |seq| # i_char = seq[i].chr # j_char = seq[j].chr # ctable.table[i_char][j_char] += 1 # end # chi_square = ctable.chi_square # contingency_coefficient = ctable.contingency_coefficient # [(i+1), (j+1), chi_square, contingency_coefficient].each { |x| print x.to_s.ljust(12) } # puts # end # end # # end # # allowed_letters = Array.new # allowed_letters = 'abcdefghijk'.split('') # # seqs = Array.new # seqs << 'abcde' # seqs << 'abcde' # seqs << 'aacje' # seqs << 'aacae' # # length_of_every_sequence = seqs[0].size # 5 letters long # # lite_example(seqs, length_of_every_sequence, allowed_letters) # # # Producing the following results: # # i j chi_square contingency_coefficient # 1 2 0.0 0.0 # 1 3 0.0 0.0 # 1 4 0.0 0.0 # 1 5 0.0 0.0 # 2 3 0.0 0.0 # 2 4 4.0 0.707106781186548 # 2 5 0.0 0.0 # 3 4 0.0 0.0 # 3 5 0.0 0.0 # 4 5 0.0 0.0 # # The position i=2 and j=4 has a high contingency coefficient # indicating that the changes at these positions are related. Note # that i and j are arbitrary, this could be represented as i=4 and j=2 # since they both refer to position two and position four in the # alignment. Here are some more examples: # # seqs = Array.new # seqs << 'abcde' # seqs << 'abcde' # seqs << 'aacje' # seqs << 'aacae' # seqs << 'akcfe' # seqs << 'akcfe' # # length_of_every_sequence = seqs[0].size # 5 letters long # # lite_example(seqs, length_of_every_sequence, allowed_letters) # # # Results: # # i j chi_square contingency_coefficient # 1 2 0.0 0.0 # 1 3 0.0 0.0 # 1 4 0.0 0.0 # 1 5 0.0 0.0 # 2 3 0.0 0.0 # 2 4 12.0 0.816496580927726 # 2 5 0.0 0.0 # 3 4 0.0 0.0 # 3 5 0.0 0.0 # 4 5 0.0 0.0 # # Here we can see that the strength of the correlation of change has # increased when more data is added with correlated changes at the # same positions. # # seqs = Array.new # seqs << 'abcde' # seqs << 'abcde' # seqs << 'kacje' # changed first letter # seqs << 'aacae' # seqs << 'akcfa' # changed last letter # seqs << 'akcfe' # # length_of_every_sequence = seqs[0].size # 5 letters long # # lite_example(seqs, length_of_every_sequence, allowed_letters) # # # Results: # # i j chi_square contingency_coefficient # 1 2 2.4 0.534522483824849 # 1 3 0.0 0.0 # 1 4 6.0 0.707106781186548 # 1 5 0.24 0.196116135138184 # 2 3 0.0 0.0 # 2 4 12.0 0.816496580927726 # 2 5 2.4 0.534522483824849 # 3 4 0.0 0.0 # 3 5 0.0 0.0 # 4 5 2.4 0.534522483824849 # # With random changes it becomes more difficult to identify correlated # changes, yet positions two and four still have the highest # correlation as indicated by the contingency coefficient. The best # way to improve the accuracy of your results, as is often the case # with statistics, is to increase the sample size. # # # = A Note on Efficiency # # ContingencyTable is slow. It involves many calculations for even a # seemingly small five-string data set. Even worse, it's very # dependent on matrix traversal, and this is done with two dimensional # hashes which dashes any hope of decent speed. # # Finally, half of the matrix is redundant and positions could be # summed with their companion position to reduce calculations. For # example the positions (5,2) and (2,5) could both have their values # added together and just stored in (2,5) while (5,2) could be an # illegal position. Also, positions (1,1), (2,2), (3,3), etc. will # never be used. # # The purpose of this package is flexibility and education. The code # is short and to the point in aims of achieving that purpose. If the # BioRuby project moves towards C extensions in the future a # professional caliber version will likely be created. # class ContingencyTable # Since we're making this math-notation friendly here is the layout of @table: # * @table[row][column] # * @table[i][j] # * @table[y][x] attr_accessor :table attr_reader :characters # Create a ContingencyTable that has characters_in_sequence.size rows and # characters_in_sequence.size columns for each row # # --- # *Arguments* # * +characters_in_sequences+: (_optional_) The allowable characters that will be present in the aligned sequences. # *Returns*:: +ContingencyTable+ object to be filled with values and calculated upon def initialize(characters_in_sequences = nil) @characters = ( characters_in_sequences or %w{a c d e f g h i k l m n p q r s t v w y - x u} ) tmp = Hash[*@characters.collect { |v| [v, 0] }.flatten] @table = Hash[*@characters.collect { |v| [v, tmp.dup] }.flatten] end # Report the sum of all values in a given row # # --- # *Arguments* # * +i+: Row to sum # *Returns*:: +Integer+ sum of row def row_sum(i) total = 0 @table[i].each { |k, v| total += v } total end # Report the sum of all values in a given column # # --- # *Arguments* # * +j+: Column to sum # *Returns*:: +Integer+ sum of column def column_sum(j) total = 0 @table.each { |row_key, column| total += column[j] } total end # Report the sum of all values in all columns. # # * This is the same thing as asking for the sum of all values in the table. # # --- # *Arguments* # * _none_ # *Returns*:: +Integer+ sum of all columns def column_sum_all total = 0 @characters.each { |j| total += column_sum(j) } total end # Report the sum of all values in all rows. # # * This is the same thing as asking for the sum of all values in the table. # # --- # *Arguments* # * _none_ # *Returns*:: +Integer+ sum of all rows def row_sum_all total = 0 @characters.each { |i| total += row_sum(i) } total end alias table_sum_all row_sum_all # Calculate _e_, the _expected_ value. # # --- # *Arguments* # * +i+: row # * +j+: column # *Returns*:: +Float+ e(sub:ij) = (r(sub:i)/N) * (c(sub:j)) def expected(i, j) (row_sum(i).to_f / table_sum_all) * column_sum(j) end # Report the chi square of the entire table # # --- # *Arguments* # * _none_ # *Returns*:: +Float+ chi square value def chi_square total = 0 c = @characters max = c.size - 1 @characters.each do |i| # Loop through every row in the ContingencyTable @characters.each do |j| # Loop through every column in the ContingencyTable total += chi_square_element(i, j) end end total end # Report the chi-square relation of two elements in the table # # --- # *Arguments* # * +i+: row # * +j+: column # *Returns*:: +Float+ chi-square of an intersection def chi_square_element(i, j) eij = expected(i, j) return 0 if eij == 0 ( @table[i][j] - eij )**2 / eij end # Report the contingency coefficient of the table # # --- # *Arguments* # * _none_ # *Returns*:: +Float+ contingency_coefficient of the table def contingency_coefficient c_s = chi_square Math.sqrt(c_s / (table_sum_all + c_s) ) end end # ContingencyTable end # Bio bio-1.4.3.0001/lib/bio/util/sirna.rb0000644000004100000410000001556612200110570016635 0ustar www-datawww-data# # = bio/util/sirna.rb - Class for designing small inhibitory RNAs # # Copyright:: Copyright (C) 2004, 2005 # Itoshi NIKAIDO # License:: The Ruby License # # $Id:$ # # == Bio::SiRNA - Designing siRNA. # # This class implements the selection rules described by Kumiko Ui-Tei # et al. (2004) and Reynolds et al. (2004). # # == Example # # seq = Bio::Sequence::NA.new(ARGF.read) # # sirna = Bio::SiRNA.new(seq) # pairs = sirna.design # # pairs.each do |pair| # puts pair.report # shrna = Bio::SiRNA::ShRNA.new(pair) # shrna.design # puts shrna.report # # puts shrna.top_strand.dna # puts shrna.bottom_strand.dna # end # # == References # # * Kumiko Ui-Tei et al. Guidelines for the selection of highly effective # siRNA sequences for mammalian and chick RNA interference. # Nucl. Acids. Res. 2004 32: 936-948. # # * Angela Reynolds et al. Rational siRNA design for RNA interference. # Nature Biotech. 2004 22: 326-330. # require 'bio/sequence' module Bio # = Bio::SiRNA # Designing siRNA. # # This class implements the selection rules described by Kumiko Ui-Tei # et al. (2004) and Reynolds et al. (2004). class SiRNA # A parameter of size of antisense. attr_accessor :antisense_size # A parameter of maximal %GC. attr_accessor :max_gc_percent # A parameter of minimum %GC. attr_accessor :min_gc_percent # Input is a Bio::Sequence::NA object (the target sequence). # Output is a list of Bio::SiRNA::Pair object. def initialize(seq, antisense_size = 21, max_gc_percent = 60.0, min_gc_percent = 40.0) @seq = seq.rna! @pairs = Array.new @antisense_size = antisense_size @max_gc_percent = max_gc_percent @min_gc_percent = min_gc_percent end # Ui-Tei's rule. def uitei?(target) return false unless /^.{2}[GC]/i =~ target return false unless /[AU].{2}$/i =~ target return false if /[GC]{9}/i =~ target one_third = target.size * 1 / 3 start_pos = @target_size - one_third - 1 remain_seq = target.subseq(start_pos, @target_size - 2) au_number = remain_seq.scan(/[AU]/i).size return false if au_number < 5 return true end # Reynolds' rule. def reynolds?(target) return false if /[GC]{9}/i =~ target return false unless /^.{4}A.{6}U.{2}[AUC].{5}[AU].{2}$/i =~ target return true end # same as design('uitei'). def uitei design('uitei') end # same as design('reynolds'). def reynolds design('reynolds') end # rule can be one of 'uitei' (default) and 'reynolds'. def design(rule = 'uitei') @target_size = @antisense_size + 2 target_start = 0 @seq.window_search(@target_size) do |target| antisense = target.subseq(1, @target_size - 2).complement.rna sense = target.subseq(3, @target_size) target_start += 1 target_stop = target_start + @target_size antisense_gc_percent = antisense.gc_percent next if antisense_gc_percent > @max_gc_percent next if antisense_gc_percent < @min_gc_percent case rule when 'uitei' next unless uitei?(target) when 'reynolds' next unless reynolds?(target) else raise NotImplementedError end pair = Bio::SiRNA::Pair.new(target, sense, antisense, target_start, target_stop, rule, antisense_gc_percent) @pairs.push(pair) end return @pairs end # = Bio::SiRNA::Pair class Pair attr_accessor :target attr_accessor :sense attr_accessor :antisense attr_accessor :start attr_accessor :stop attr_accessor :rule attr_accessor :gc_percent def initialize(target, sense, antisense, start, stop, rule, gc_percent) @target = target @sense = sense @antisense = antisense @start = start @stop = stop @rule = rule @gc_percent = gc_percent end # human readable report def report report = "### siRNA\n" report << 'Start: ' + @start.to_s + "\n" report << 'Stop: ' + @stop.to_s + "\n" report << 'Rule: ' + @rule.to_s + "\n" report << 'GC %: ' + @gc_percent.to_s + "\n" report << 'Target: ' + @target.upcase + "\n" report << 'Sense: ' + ' ' + @sense.upcase + "\n" report << 'Antisense: ' + @antisense.reverse.upcase + "\n" end # computer parsable report #def to_s # [ @antisense, @start, @stop ].join("\t") #end end # class Pair # = Bio::SiRNA::ShRNA # Designing shRNA. class ShRNA # Bio::Sequence::NA attr_accessor :top_strand # Bio::Sequence::NA attr_accessor :bottom_strand # Input is a Bio::SiRNA::Pair object (the target sequence). def initialize(pair) @pair = pair end # only the 'BLOCK-iT' rule is implemented for now. def design(method = 'BLOCK-iT') case method when 'BLOCK-iT' block_it else raise NotImplementedError end end # same as design('BLOCK-iT'). # method can be one of 'piGENE' (default) and 'BLOCK-iT'. def block_it(method = 'piGENE') top = Bio::Sequence::NA.new('CACC') # top_strand_shrna_overhang bot = Bio::Sequence::NA.new('AAAA') # bottom_strand_shrna_overhang fwd = @pair.sense rev = @pair.sense.complement case method when 'BLOCK-iT' # From BLOCK-iT's manual loop_fwd = Bio::Sequence::NA.new('CGAA') loop_rev = loop_fwd.complement when 'piGENE' # From piGENE document loop_fwd = Bio::Sequence::NA.new('GTGTGCTGTCC') loop_rev = loop_fwd.complement else raise NotImplementedError end if /^G/i =~ fwd @top_strand = top + fwd + loop_fwd + rev @bottom_strand = bot + fwd + loop_rev + rev else @top_strand = top + 'G' + fwd + loop_fwd + rev @bottom_strand = bot + fwd + loop_rev + rev + 'C' end end # human readable report def report report = "### shRNA\n" report << "Top strand shRNA (#{@top_strand.length} nt):\n" report << " 5'-#{@top_strand.upcase}-3'\n" report << "Bottom strand shRNA (#{@bottom_strand.length} nt):\n" report << " 3'-#{@bottom_strand.reverse.upcase}-5'\n" end end # class ShRNA end # class SiRNA end # module Bio =begin = ChangeLog 2005/03/21 Itoshi NIKAIDO Bio::SiRNA#ShRNA_designer method was changed design method. 2004/06/25 Bio::ShRNA class was added. 2004/06/17 Itoshi NIKAIDO We can use shRNA loop sequence from piGene document. =end bio-1.4.3.0001/lib/bio/util/restriction_enzyme.rb0000644000004100000410000002012212200110570021435 0ustar www-datawww-data# # bio/util/restriction_enzyme.rb - Digests DNA based on restriction enzyme cut patterns # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio autoload :REBASE, 'bio/db/rebase' unless const_defined?(:REBASE) # = Description # # Bio::RestrictionEnzyme allows you to fragment a DNA strand using one # or more restriction enzymes. Bio::RestrictionEnzyme is aware that # multiple enzymes may be competing for the same recognition site and # returns the various possible fragmentation patterns that result in # such circumstances. # # When using Bio::RestrictionEnzyme you may simply use the name of common # enzymes to cut your sequence or you may construct your own unique enzymes # to use. # # Visit the documentaion for individual classes for more information. # # An examination of the unit tests will also reveal several interesting uses # for the curious programmer. # # = Usage # # == Basic # # EcoRI cut pattern: # G|A A T T C # +-------+ # C T T A A|G # # This can also be written as: # G^AATTC # # Note that to use the method +cut_with_enzyme+ from a Bio::Sequence object # you currently must +require+ +bio/util/restriction_enzyme+ directly. If # instead you're going to directly call Bio::RestrictionEnzyme::Analysis # then only +bio+ needs to be +required+. # # require 'bio' # require 'bio/util/restriction_enzyme' # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('EcoRI') # cuts.primary # => ["aattc", "g"] # cuts.complement # => ["cttaa", "g"] # cuts.inspect # => "[#, #]" # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('g^aattc') # cuts.primary # => ["aattc", "g"] # cuts.complement # => ["cttaa", "g"] # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('g^aattc', 'gaatt^c') # cuts.primary # => ["aattc", "c", "g", "gaatt"] # cuts.complement # => ["c", "cttaa", "g", "ttaag"] # # seq = Bio::Sequence::NA.new('gaattcgaattc') # cuts = seq.cut_with_enzyme('EcoRI') # cuts.primary # => ["aattc", "aattcg", "g"] # cuts.complement # => ["cttaa", "g", "gcttaa"] # # seq = Bio::Sequence::NA.new('gaattcgggaattc') # cuts = seq.cut_with_enzyme('EcoRI') # cuts.primary # => ["aattc", "aattcggg", "g"] # cuts.complement # => ["cttaa", "g", "gcccttaa"] # # cuts[0].inspect # => "#" # # cuts[0].primary # => "g " # cuts[0].complement # => "cttaa" # # cuts[1].primary # => "aattcggg " # cuts[1].complement # => " gcccttaa" # # cuts[2].primary # => "aattc" # cuts[2].complement # => " g" # # == Advanced # # require 'bio' # # enzyme_1 = Bio::RestrictionEnzyme.new('anna', [1,1], [3,3]) # enzyme_2 = Bio::RestrictionEnzyme.new('gg', [1,1]) # a = Bio::RestrictionEnzyme::Analysis.cut('agga', enzyme_1, enzyme_2) # a.primary # => ["a", "ag", "g", "ga"] # a.complement # => ["c", "ct", "t", "tc"] # # a[0].primary # => "ag" # a[0].complement # => "tc" # # a[1].primary # => "ga" # a[1].complement # => "ct" # # a[2].primary # => "a" # a[2].complement # => "t" # # a[3].primary # => "g" # a[3].complement # => "c" # # = Todo / under development # # * Circular DNA cutting # class RestrictionEnzyme #require 'bio/util/restriction_enzyme/cut_symbol' autoload :CutSymbol, 'bio/util/restriction_enzyme/cut_symbol' autoload :StringFormatting, 'bio/util/restriction_enzyme/string_formatting' autoload :SingleStrand, 'bio/util/restriction_enzyme/single_strand' autoload :SingleStrandComplement, 'bio/util/restriction_enzyme/single_strand_complement' autoload :DoubleStranded, 'bio/util/restriction_enzyme/double_stranded' autoload :Analysis, 'bio/util/restriction_enzyme/analysis' autoload :Range, 'bio/util/restriction_enzyme/range/sequence_range' autoload :SortedNumArray, 'bio/util/restriction_enzyme/sorted_num_array' autoload :DenseIntArray, 'bio/util/restriction_enzyme/dense_int_array' include CutSymbol extend CutSymbol # See Bio::RestrictionEnzyme::DoubleStranded.new for more information. # # --- # *Arguments* # * +users_enzyme_or_rebase_or_pattern+: One of three possible parameters: The name of an enzyme, a REBASE::EnzymeEntry object, or a nucleotide pattern with a cut mark. # * +cut_locations+: The cut locations in enzyme index notation. # *Returns*:: Bio::RestrictionEnzyme::DoubleStranded #-- # Factory for DoubleStranded #++ def self.new(users_enzyme_or_rebase_or_pattern, *cut_locations) DoubleStranded.new(users_enzyme_or_rebase_or_pattern, *cut_locations) end # REBASE enzyme data information # # Returns a Bio::REBASE object loaded with all of the enzyme data on file. # # --- # *Arguments* # * _none_ # *Returns*:: Bio::REBASE def self.rebase enzymes_yaml_file = File.join(File.dirname(File.expand_path(__FILE__)), 'restriction_enzyme', 'enzymes.yaml') @@rebase_enzymes ||= Bio::REBASE.load_yaml(enzymes_yaml_file) @@rebase_enzymes end # Check if supplied name is the name of an available enzyme # # See Bio::REBASE.enzyme_name? # # --- # *Arguments* # * +name+: Enzyme name # *Returns*:: +true+ _or_ +false+ def self.enzyme_name?( name ) self.rebase.enzyme_name?(name) end # See Bio::RestrictionEnzyme::Analysis.cut def self.cut( sequence, enzymes ) Bio::RestrictionEnzyme::Analysis.cut( sequence, enzymes ) end # A Bio::RestrictionEnzyme::Fragment is a DNA fragment composed of fused primary and # complementary strands that would be found floating in solution after a full # sequence is digested by one or more RestrictionEnzymes. # # You will notice that either the primary or complement strand will be # padded with spaces to make them line up according to the original DNA # configuration before they were cut. # # Example: # # Fragment 1: # primary = "attaca" # complement = " atga" # # Fragment 2: # primary = "g " # complement = "cta" # # View these with the +primary+ and +complement+ methods. # # Bio::RestrictionEnzyme::Fragment is a simple +Struct+ object. # # Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragment Fragment = Struct.new(:primary, :complement, :p_left, :p_right, :c_left, :c_right) # Bio::RestrictionEnzyme::Fragments inherits from +Array+. # # Bio::RestrictionEnzyme::Fragments is a container for Fragment objects. It adds the # methods +primary+ and +complement+ which returns an +Array+ of all # respective strands from it's Fragment members in alphabetically sorted # order. Note that it will # not return duplicate items and does not return the spacing/padding # that you would # find by accessing the members directly. # # Example: # # primary = ['attaca', 'g'] # complement = ['atga', 'cta'] # # Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments class Fragments < Array def primary; strip_and_sort(:primary); end def complement; strip_and_sort(:complement); end protected def strip_and_sort( sym_strand ) self.map {|uf| uf.send( sym_strand ).tr(' ', '') }.sort end end end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/0000755000004100000410000000000012200110570021113 5ustar www-datawww-databio-1.4.3.0001/lib/bio/util/restriction_enzyme/string_formatting.rb0000644000004100000410000000546712200110570025214 0ustar www-datawww-data# # bio/util/restriction_enzyme/string_formatting.rb - Useful functions for string manipulation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme module StringFormatting include CutSymbol extend CutSymbol # Return the sequence with spacing for alignment. Does not add whitespace # around cut symbols. # # Example: # pattern = 'n^ng^arraxt^n' # add_spacing( pattern ) # => "n^n g^a r r a x t^n" # # --- # *Arguments* # * +seq+: sequence with cut symbols # * +cs+: (_optional_) Cut symbol along the string. The reason this is # definable outside of CutSymbol is that this is a utility function used # to form vertical and horizontal cuts such as: # # a|t g c # +---+ # t a c|g # *Returns*:: +String+ sequence with single character distance between bases def add_spacing( seq, cs = cut_symbol ) str = '' flag = false seq.each_byte do |c| c = c.chr if c == cs str += c flag = false elsif flag str += ' ' + c else str += c flag = true end end str end # Remove extraneous nucleic acid wildcards ('n' padding) from the # left and right sides # # --- # *Arguments* # * +s+: sequence with extraneous 'n' padding # *Returns*:: +String+ sequence without 'n' padding on the sides def strip_padding( s ) if s[0].chr == 'n' s =~ %r{(n+)(.+)} s = $2 end if s[-1].chr == 'n' s =~ %r{(.+?)(n+)$} s = $1 end s end # Remove extraneous nucleic acid wildcards ('n' padding) from the # left and right sides and remove cut symbols # # --- # *Arguments* # * +s+: sequence with extraneous 'n' padding and cut symbols # *Returns*:: +String+ sequence without 'n' padding on the sides or cut symbols def strip_cuts_and_padding( s ) strip_padding( s.tr(cut_symbol, '') ) end # Return the 'n' padding on the left side of the strand # # --- # *Arguments* # * +s+: sequence with extraneous 'n' padding on the left side of the strand # *Returns*:: +String+ the 'n' padding from the left side def left_padding( s ) s =~ %r{^n+} ret = $& ret ? ret : '' # Don't pass nil values end # Return the 'n' padding on the right side of the strand # # --- # *Arguments* # * +s+: sequence with extraneous 'n' padding on the right side of the strand # *Returns*:: +String+ the 'n' padding from the right side def right_padding( s ) s =~ %r{n+$} ret = $& ret ? ret : '' # Don't pass nil values end end # StringFormatting end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/enzymes.yaml0000644000004100000410000025705712200110570023511 0ustar www-datawww-data--- TspRI: :len: "5" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CASTG :c2: "-3" :name: TspRI :blunt: "0" :c3: "0" MvnI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGCG :c2: "2" :name: MvnI :blunt: "1" :c3: "0" AclI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: AACGTT :c2: "4" :name: AclI :blunt: "0" :c3: "0" SfuI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: SfuI :blunt: "0" :c3: "0" ScrFI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "3" :name: ScrFI :blunt: "0" :c3: "0" EcoO109I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGGNCCY :c2: "5" :name: EcoO109I :blunt: "0" :c3: "0" TssI: :len: "9" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gagnnnctc :c2: "0" :name: TssI :blunt: "0" :c3: "0" PpiI: :len: "12" :c1: "-8" :c4: "20" :ncuts: "4" :pattern: GAACNNNNNCTC :c2: "-13" :name: PpiI :blunt: "0" :c3: "25" Mph1103I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: ATGCAT :c2: "1" :name: Mph1103I :blunt: "0" :c3: "0" Eco81I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTNAGG :c2: "5" :name: Eco81I :blunt: "0" :c3: "0" BspACI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGC :c2: "3" :name: BspACI :blunt: "0" :c3: "0" Eco105I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TACGTA :c2: "3" :name: Eco105I :blunt: "1" :c3: "0" Eco24I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GRGCYC :c2: "1" :name: Eco24I :blunt: "0" :c3: "0" BseRI: :len: "6" :c1: "16" :c4: "0" :ncuts: "2" :pattern: GAGGAG :c2: "14" :name: BseRI :blunt: "0" :c3: "0" AxyI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTNAGG :c2: "5" :name: AxyI :blunt: "0" :c3: "0" SecI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ccnngg :c2: "5" :name: SecI :blunt: "0" :c3: "0" PmaCI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTG :c2: "3" :name: PmaCI :blunt: "1" :c3: "0" HgiJII: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: grgcyc :c2: "1" :name: HgiJII :blunt: "0" :c3: "0" CauII: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ccsgg :c2: "3" :name: CauII :blunt: "0" :c3: "0" BssKI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "5" :name: BssKI :blunt: "0" :c3: "0" AarI: :len: "7" :c1: "11" :c4: "0" :ncuts: "2" :pattern: CACCTGC :c2: "15" :name: AarI :blunt: "0" :c3: "0" StsI: :len: "5" :c1: "15" :c4: "0" :ncuts: "2" :pattern: ggatg :c2: "19" :name: StsI :blunt: "0" :c3: "0" Rsr2I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGGWCCG :c2: "5" :name: Rsr2I :blunt: "0" :c3: "0" BbvI: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: GCAGC :c2: "17" :name: BbvI :blunt: "0" :c3: "0" MmeI: :len: "6" :c1: "26" :c4: "0" :ncuts: "2" :pattern: TCCRAC :c2: "24" :name: MmeI :blunt: "0" :c3: "0" FseI: :len: "8" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GGCCGGCC :c2: "2" :name: FseI :blunt: "0" :c3: "0" SciI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: ctcgag :c2: "3" :name: SciI :blunt: "1" :c3: "0" PacI: :len: "8" :c1: "5" :c4: "0" :ncuts: "2" :pattern: TTAATTAA :c2: "3" :name: PacI :blunt: "0" :c3: "0" Bse21I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTNAGG :c2: "5" :name: Bse21I :blunt: "0" :c3: "0" AcvI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTG :c2: "3" :name: AcvI :blunt: "1" :c3: "0" DsaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ccrygg :c2: "5" :name: DsaI :blunt: "0" :c3: "0" Bsp119I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: Bsp119I :blunt: "0" :c3: "0" TliI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTCGAG :c2: "5" :name: TliI :blunt: "0" :c3: "0" PpsI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: GAGTC :c2: "10" :name: PpsI :blunt: "0" :c3: "0" Ksp22I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGATCA :c2: "5" :name: Ksp22I :blunt: "0" :c3: "0" BccI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: CCATC :c2: "10" :name: BccI :blunt: "0" :c3: "0" BtrI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTC :c2: "3" :name: BtrI :blunt: "1" :c3: "0" BptI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "3" :name: BptI :blunt: "0" :c3: "0" Bce83I: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: cttgag :c2: "20" :name: Bce83I :blunt: "0" :c3: "0" SmiI: :len: "8" :c1: "4" :c4: "0" :ncuts: "2" :pattern: ATTTAAAT :c2: "4" :name: SmiI :blunt: "1" :c3: "0" Sfr274I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTCGAG :c2: "5" :name: Sfr274I :blunt: "0" :c3: "0" PvuI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGATCG :c2: "2" :name: PvuI :blunt: "0" :c3: "0" BslFI: :len: "5" :c1: "15" :c4: "0" :ncuts: "2" :pattern: GGGAC :c2: "19" :name: BslFI :blunt: "0" :c3: "0" AssI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGTACT :c2: "3" :name: AssI :blunt: "1" :c3: "0" VpaK11AI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: ggwcc :c2: "5" :name: VpaK11AI :blunt: "0" :c3: "0" TspDTI: :len: "5" :c1: "16" :c4: "0" :ncuts: "2" :pattern: ATGAA :c2: "14" :name: TspDTI :blunt: "0" :c3: "0" MslI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CAYNNNNRTG :c2: "5" :name: MslI :blunt: "1" :c3: "0" HindIII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: AAGCTT :c2: "5" :name: HindIII :blunt: "0" :c3: "0" AlwNI: :len: "9" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CAGNNNCTG :c2: "3" :name: AlwNI :blunt: "0" :c3: "0" BstBI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: BstBI :blunt: "0" :c3: "0" BspDI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: BspDI :blunt: "0" :c3: "0" Csp6I: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GTAC :c2: "3" :name: Csp6I :blunt: "0" :c3: "0" Aor13HI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: Aor13HI :blunt: "0" :c3: "0" UbaF14I: :len: "11" :c1: "0" :c4: "0" :ncuts: "0" :pattern: ccannnnntcg :c2: "0" :name: UbaF14I :blunt: "0" :c3: "0" TaaI: :len: "5" :c1: "3" :c4: "0" :ncuts: "2" :pattern: ACNGT :c2: "2" :name: TaaI :blunt: "0" :c3: "0" SatI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCNGC :c2: "3" :name: SatI :blunt: "0" :c3: "0" MjaIV: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gtnnac :c2: "0" :name: MjaIV :blunt: "0" :c3: "0" LpnI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: rgcgcy :c2: "3" :name: LpnI :blunt: "1" :c3: "0" BanI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGYRCC :c2: "5" :name: BanI :blunt: "0" :c3: "0" FauNDI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CATATG :c2: "4" :name: FauNDI :blunt: "0" :c3: "0" AspA2I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCTAGG :c2: "5" :name: AspA2I :blunt: "0" :c3: "0" Eco130I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCWWGG :c2: "5" :name: Eco130I :blunt: "0" :c3: "0" PalAI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCGCGCC :c2: "6" :name: PalAI :blunt: "0" :c3: "0" MwoI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GCNNNNNNNGC :c2: "4" :name: MwoI :blunt: "0" :c3: "0" BstEII: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTNACC :c2: "6" :name: BstEII :blunt: "0" :c3: "0" Bsp120I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGGCCC :c2: "5" :name: Bsp120I :blunt: "0" :c3: "0" SspI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AATATT :c2: "3" :name: SspI :blunt: "1" :c3: "0" PmlI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTG :c2: "3" :name: PmlI :blunt: "1" :c3: "0" MfeI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CAATTG :c2: "5" :name: MfeI :blunt: "0" :c3: "0" HpyCH4V: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TGCA :c2: "2" :name: HpyCH4V :blunt: "1" :c3: "0" AvaIII: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: atgcat :c2: "0" :name: AvaIII :blunt: "0" :c3: "0" RcaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCATGA :c2: "5" :name: RcaI :blunt: "0" :c3: "0" PsiI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TTATAA :c2: "3" :name: PsiI :blunt: "1" :c3: "0" Hsp92II: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CATG :c2: "-1" :name: Hsp92II :blunt: "0" :c3: "0" Alw21I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GWGCWC :c2: "1" :name: Alw21I :blunt: "0" :c3: "0" BstENI: :len: "11" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CCTNNNNNAGG :c2: "6" :name: BstENI :blunt: "0" :c3: "0" BstAPI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GCANNNNNTGC :c2: "4" :name: BstAPI :blunt: "0" :c3: "0" SbfI: :len: "8" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CCTGCAGG :c2: "2" :name: SbfI :blunt: "0" :c3: "0" MaeII: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACGT :c2: "3" :name: MaeII :blunt: "0" :c3: "0" HapII: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGG :c2: "3" :name: HapII :blunt: "0" :c3: "0" BpuAI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GAAGAC :c2: "12" :name: BpuAI :blunt: "0" :c3: "0" DdeI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTNAG :c2: "4" :name: DdeI :blunt: "0" :c3: "0" Ama87I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CYCGRG :c2: "5" :name: Ama87I :blunt: "0" :c3: "0" AbsI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTCGAGG :c2: "6" :name: AbsI :blunt: "0" :c3: "0" SseBI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGGCCT :c2: "3" :name: SseBI :blunt: "1" :c3: "0" SlaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTCGAG :c2: "5" :name: SlaI :blunt: "0" :c3: "0" SgrDI: :len: "8" :c1: "0" :c4: "0" :ncuts: "0" :pattern: cgtcgacg :c2: "0" :name: SgrDI :blunt: "0" :c3: "0" Hpy188I: :len: "5" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TCNGA :c2: "2" :name: Hpy188I :blunt: "0" :c3: "0" Hin4I: :len: "11" :c1: "-9" :c4: "19" :ncuts: "4" :pattern: GAYNNNNNVTC :c2: "-14" :name: Hin4I :blunt: "0" :c3: "24" EcoT22I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: ATGCAT :c2: "1" :name: EcoT22I :blunt: "0" :c3: "0" BseAI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: BseAI :blunt: "0" :c3: "0" Alw26I: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GTCTC :c2: "10" :name: Alw26I :blunt: "0" :c3: "0" BstAUI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGTACA :c2: "5" :name: BstAUI :blunt: "0" :c3: "0" Bsp143II: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: RGCGCY :c2: "1" :name: Bsp143II :blunt: "0" :c3: "0" Bpu14I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: Bpu14I :blunt: "0" :c3: "0" BmrI: :len: "6" :c1: "11" :c4: "0" :ncuts: "2" :pattern: ACTGGG :c2: "10" :name: BmrI :blunt: "0" :c3: "0" BspNCI: :len: "5" :c1: "0" :c4: "0" :ncuts: "0" :pattern: ccaga :c2: "0" :name: BspNCI :blunt: "0" :c3: "0" BamHI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGATCC :c2: "5" :name: BamHI :blunt: "0" :c3: "0" SfiI: :len: "13" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GGCCNNNNNGGCC :c2: "5" :name: SfiI :blunt: "0" :c3: "0" Psp1406I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: AACGTT :c2: "4" :name: Psp1406I :blunt: "0" :c3: "0" NdeII: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: NdeII :blunt: "0" :c3: "0" BstX2I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RGATCY :c2: "5" :name: BstX2I :blunt: "0" :c3: "0" XceI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: RCATGY :c2: "1" :name: XceI :blunt: "0" :c3: "0" PssI: :len: "7" :c1: "5" :c4: "0" :ncuts: "2" :pattern: rggnccy :c2: "2" :name: PssI :blunt: "0" :c3: "0" Fsp4HI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCNGC :c2: "3" :name: Fsp4HI :blunt: "0" :c3: "0" ApeKI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCWGC :c2: "4" :name: ApeKI :blunt: "0" :c3: "0" BscGI: :len: "5" :c1: "0" :c4: "0" :ncuts: "0" :pattern: cccgt :c2: "0" :name: BscGI :blunt: "0" :c3: "0" BsaHI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GRCGYC :c2: "4" :name: BsaHI :blunt: "0" :c3: "0" BbeI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "1" :name: BbeI :blunt: "0" :c3: "0" Sth132I: :len: "4" :c1: "8" :c4: "0" :ncuts: "2" :pattern: cccg :c2: "12" :name: Sth132I :blunt: "0" :c3: "0" PvuII: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CAGCTG :c2: "3" :name: PvuII :blunt: "1" :c3: "0" Hpy99I: :len: "5" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CGWCG :c2: "-1" :name: Hpy99I :blunt: "0" :c3: "0" Fnu4HI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCNGC :c2: "3" :name: Fnu4HI :blunt: "0" :c3: "0" BspXI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: BspXI :blunt: "0" :c3: "0" BsmBI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CGTCTC :c2: "11" :name: BsmBI :blunt: "0" :c3: "0" MaeIII: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GTNAC :c2: "5" :name: MaeIII :blunt: "0" :c3: "0" HhaI: :len: "4" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "1" :name: HhaI :blunt: "0" :c3: "0" Cfr9I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCCGGG :c2: "5" :name: Cfr9I :blunt: "0" :c3: "0" TauI: :len: "5" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GCSGC :c2: "1" :name: TauI :blunt: "0" :c3: "0" Cfr13I: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGNCC :c2: "4" :name: Cfr13I :blunt: "0" :c3: "0" BsaMI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GAATGC :c2: "5" :name: BsaMI :blunt: "0" :c3: "0" BpcI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTRYAG :c2: "5" :name: BpcI :blunt: "0" :c3: "0" NotI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCGGCCGC :c2: "6" :name: NotI :blunt: "0" :c3: "0" SacII: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CCGCGG :c2: "2" :name: SacII :blunt: "0" :c3: "0" PdmI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAANNNNTTC :c2: "5" :name: PdmI :blunt: "1" :c3: "0" CstMI: :len: "6" :c1: "26" :c4: "0" :ncuts: "2" :pattern: aaggag :c2: "24" :name: CstMI :blunt: "0" :c3: "0" CjePI: :len: "12" :c1: "-8" :c4: "20" :ncuts: "4" :pattern: ccannnnnnntc :c2: "-14" :name: CjePI :blunt: "0" :c3: "26" DpnI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "2" :name: DpnI :blunt: "1" :c3: "0" XapI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RAATTY :c2: "5" :name: XapI :blunt: "0" :c3: "0" NheI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCTAGC :c2: "5" :name: NheI :blunt: "0" :c3: "0" BsiHKCI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CYCGRG :c2: "5" :name: BsiHKCI :blunt: "0" :c3: "0" BsePI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCGCGC :c2: "5" :name: BsePI :blunt: "0" :c3: "0" BveI: :len: "6" :c1: "10" :c4: "0" :ncuts: "2" :pattern: ACCTGC :c2: "14" :name: BveI :blunt: "0" :c3: "0" BfmI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTRYAG :c2: "5" :name: BfmI :blunt: "0" :c3: "0" DraII: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGGNCCY :c2: "5" :name: DraII :blunt: "0" :c3: "0" SacI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAGCTC :c2: "1" :name: SacI :blunt: "0" :c3: "0" AclWI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: GGATC :c2: "10" :name: AclWI :blunt: "0" :c3: "0" AcoI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: YGGCCR :c2: "5" :name: AcoI :blunt: "0" :c3: "0" Bso31I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGTCTC :c2: "11" :name: Bso31I :blunt: "0" :c3: "0" KspI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CCGCGG :c2: "2" :name: KspI :blunt: "0" :c3: "0" BfrI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTTAAG :c2: "5" :name: BfrI :blunt: "0" :c3: "0" FalI: :len: "11" :c1: "-9" :c4: "19" :ncuts: "4" :pattern: AAGNNNNNCTT :c2: "-14" :name: FalI :blunt: "0" :c3: "24" BcefI: :len: "5" :c1: "17" :c4: "0" :ncuts: "2" :pattern: acggc :c2: "18" :name: BcefI :blunt: "0" :c3: "0" Mly113I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "4" :name: Mly113I :blunt: "0" :c3: "0" HpyCH4IV: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACGT :c2: "3" :name: HpyCH4IV :blunt: "0" :c3: "0" FspBI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTAG :c2: "3" :name: FspBI :blunt: "0" :c3: "0" BspT104I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: BspT104I :blunt: "0" :c3: "0" BssNI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GRCGYC :c2: "4" :name: BssNI :blunt: "0" :c3: "0" Bst6I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CTCTTC :c2: "10" :name: Bst6I :blunt: "0" :c3: "0" BsiSI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGG :c2: "3" :name: BsiSI :blunt: "0" :c3: "0" BsaWI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: WCCGGW :c2: "5" :name: BsaWI :blunt: "0" :c3: "0" BpmI: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: CTGGAG :c2: "20" :name: BpmI :blunt: "0" :c3: "0" BanIII: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: BanIII :blunt: "0" :c3: "0" AsuC2I: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCSGG :c2: "3" :name: AsuC2I :blunt: "0" :c3: "0" CspI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGGWCCG :c2: "5" :name: CspI :blunt: "0" :c3: "0" Bsa29I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: Bsa29I :blunt: "0" :c3: "0" AccII: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGCG :c2: "2" :name: AccII :blunt: "1" :c3: "0" Sth302II: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ccgg :c2: "2" :name: Sth302II :blunt: "1" :c3: "0" Hpy188III: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TCNNGA :c2: "4" :name: Hpy188III :blunt: "0" :c3: "0" FaqI: :len: "5" :c1: "15" :c4: "0" :ncuts: "2" :pattern: GGGAC :c2: "19" :name: FaqI :blunt: "0" :c3: "0" EcoT14I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCWWGG :c2: "5" :name: EcoT14I :blunt: "0" :c3: "0" Acc36I: :len: "6" :c1: "10" :c4: "0" :ncuts: "2" :pattern: ACCTGC :c2: "14" :name: Acc36I :blunt: "0" :c3: "0" MseI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TTAA :c2: "3" :name: MseI :blunt: "0" :c3: "0" CviQI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GTAC :c2: "3" :name: CviQI :blunt: "0" :c3: "0" BsuRI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCC :c2: "2" :name: BsuRI :blunt: "1" :c3: "0" BssT1I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCWWGG :c2: "5" :name: BssT1I :blunt: "0" :c3: "0" BssSI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CACGAG :c2: "5" :name: BssSI :blunt: "0" :c3: "0" ClaI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: ClaI :blunt: "0" :c3: "0" BanII: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GRGCYC :c2: "1" :name: BanII :blunt: "0" :c3: "0" PceI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGGCCT :c2: "3" :name: PceI :blunt: "1" :c3: "0" HspAI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "3" :name: HspAI :blunt: "0" :c3: "0" Csp45I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: Csp45I :blunt: "0" :c3: "0" AflIII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACRYGT :c2: "5" :name: AflIII :blunt: "0" :c3: "0" AcyI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GRCGYC :c2: "4" :name: AcyI :blunt: "0" :c3: "0" BceAI: :len: "5" :c1: "17" :c4: "0" :ncuts: "2" :pattern: ACGGC :c2: "19" :name: BceAI :blunt: "0" :c3: "0" Ple19I: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGATCG :c2: "2" :name: Ple19I :blunt: "0" :c3: "0" McrI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: cgrycg :c2: "2" :name: McrI :blunt: "0" :c3: "0" BshFI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCC :c2: "2" :name: BshFI :blunt: "1" :c3: "0" BglII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: AGATCT :c2: "5" :name: BglII :blunt: "0" :c3: "0" EcoT38I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GRGCYC :c2: "1" :name: EcoT38I :blunt: "0" :c3: "0" DraIII: :len: "9" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CACNNNGTG :c2: "3" :name: DraIII :blunt: "0" :c3: "0" UbaF12I: :len: "10" :c1: "0" :c4: "0" :ncuts: "0" :pattern: ctacnnngtc :c2: "0" :name: UbaF12I :blunt: "0" :c3: "0" SmlI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTYRAG :c2: "5" :name: SmlI :blunt: "0" :c3: "0" SinI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGWCC :c2: "4" :name: SinI :blunt: "0" :c3: "0" BalI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGGCCA :c2: "3" :name: BalI :blunt: "1" :c3: "0" AhdI: :len: "11" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GACNNNNNGTC :c2: "5" :name: AhdI :blunt: "0" :c3: "0" AfeI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGCGCT :c2: "3" :name: AfeI :blunt: "1" :c3: "0" DinI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "3" :name: DinI :blunt: "1" :c3: "0" SsiI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGC :c2: "3" :name: SsiI :blunt: "0" :c3: "0" PmeI: :len: "8" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GTTTAAAC :c2: "4" :name: PmeI :blunt: "1" :c3: "0" NaeI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCCGGC :c2: "3" :name: NaeI :blunt: "1" :c3: "0" ItaI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCNGC :c2: "3" :name: ItaI :blunt: "0" :c3: "0" FmuI: :len: "5" :c1: "4" :c4: "0" :ncuts: "2" :pattern: ggncc :c2: "1" :name: FmuI :blunt: "0" :c3: "0" AccB7I: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCANNNNNTGG :c2: "4" :name: AccB7I :blunt: "0" :c3: "0" Vha464I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTTAAG :c2: "5" :name: Vha464I :blunt: "0" :c3: "0" MunI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CAATTG :c2: "5" :name: MunI :blunt: "0" :c3: "0" HpyCH4III: :len: "5" :c1: "3" :c4: "0" :ncuts: "2" :pattern: ACNGT :c2: "2" :name: HpyCH4III :blunt: "0" :c3: "0" GlaI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "2" :name: GlaI :blunt: "1" :c3: "0" Bsh1236I: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGCG :c2: "2" :name: Bsh1236I :blunt: "1" :c3: "0" BstMCI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGRYCG :c2: "2" :name: BstMCI :blunt: "0" :c3: "0" BsrFI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RCCGGY :c2: "5" :name: BsrFI :blunt: "0" :c3: "0" BspGI: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: ctggac :c2: "0" :name: BspGI :blunt: "0" :c3: "0" Tsp45I: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GTSAC :c2: "5" :name: Tsp45I :blunt: "0" :c3: "0" KpnI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GGTACC :c2: "1" :name: KpnI :blunt: "0" :c3: "0" GsuI: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: CTGGAG :c2: "20" :name: GsuI :blunt: "0" :c3: "0" Bsp13I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: Bsp13I :blunt: "0" :c3: "0" Esp3I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CGTCTC :c2: "11" :name: Esp3I :blunt: "0" :c3: "0" Pfl23II: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGTACG :c2: "5" :name: Pfl23II :blunt: "0" :c3: "0" NciI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCSGG :c2: "3" :name: NciI :blunt: "0" :c3: "0" MstI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: tgcgca :c2: "3" :name: MstI :blunt: "1" :c3: "0" HgiCI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ggyrcc :c2: "5" :name: HgiCI :blunt: "0" :c3: "0" BspLI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGNNCC :c2: "3" :name: BspLI :blunt: "1" :c3: "0" DrdII: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gaacca :c2: "0" :name: DrdII :blunt: "0" :c3: "0" Eco52I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGGCCG :c2: "5" :name: Eco52I :blunt: "0" :c3: "0" Ksp632I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CTCTTC :c2: "10" :name: Ksp632I :blunt: "0" :c3: "0" BmcAI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGTACT :c2: "3" :name: BmcAI :blunt: "1" :c3: "0" BbvCI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTCAGC :c2: "5" :name: BbvCI :blunt: "0" :c3: "0" Tth111II: :len: "6" :c1: "17" :c4: "0" :ncuts: "2" :pattern: caarca :c2: "15" :name: Tth111II :blunt: "0" :c3: "0" TaiI: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: ACGT :c2: "-1" :name: TaiI :blunt: "0" :c3: "0" Sse8387I: :len: "8" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CCTGCAGG :c2: "2" :name: Sse8387I :blunt: "0" :c3: "0" SgrBI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CCGCGG :c2: "2" :name: SgrBI :blunt: "0" :c3: "0" RsrII: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGGWCCG :c2: "5" :name: RsrII :blunt: "0" :c3: "0" PctI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GAATGC :c2: "5" :name: PctI :blunt: "0" :c3: "0" PauI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCGCGC :c2: "5" :name: PauI :blunt: "0" :c3: "0" BetI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: wccggw :c2: "5" :name: BetI :blunt: "0" :c3: "0" BcuI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACTAGT :c2: "5" :name: BcuI :blunt: "0" :c3: "0" BsaAI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: YACGTR :c2: "3" :name: BsaAI :blunt: "1" :c3: "0" McaTI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: gcgcgc :c2: "2" :name: McaTI :blunt: "0" :c3: "0" Eco57I: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: CTGAAG :c2: "20" :name: Eco57I :blunt: "0" :c3: "0" BstOI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "3" :name: BstOI :blunt: "0" :c3: "0" BspQI: :len: "7" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCTCTTC :c2: "11" :name: BspQI :blunt: "0" :c3: "0" BsmI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GAATGC :c2: "5" :name: BsmI :blunt: "0" :c3: "0" DraI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TTTAAA :c2: "3" :name: DraI :blunt: "1" :c3: "0" BstV1I: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: GCAGC :c2: "17" :name: BstV1I :blunt: "0" :c3: "0" BtgZI: :len: "6" :c1: "16" :c4: "0" :ncuts: "2" :pattern: GCGATG :c2: "20" :name: BtgZI :blunt: "0" :c3: "0" CspCI: :len: "12" :c1: "-12" :c4: "22" :ncuts: "4" :pattern: CAANNNNNGTGG :c2: "-14" :name: CspCI :blunt: "0" :c3: "24" MhlI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GDGCHC :c2: "1" :name: MhlI :blunt: "0" :c3: "0" MboI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: MboI :blunt: "0" :c3: "0" HinfI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GANTC :c2: "4" :name: HinfI :blunt: "0" :c3: "0" Eam1104I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CTCTTC :c2: "10" :name: Eam1104I :blunt: "0" :c3: "0" BseDI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCNNGG :c2: "5" :name: BseDI :blunt: "0" :c3: "0" BmuI: :len: "6" :c1: "11" :c4: "0" :ncuts: "2" :pattern: ACTGGG :c2: "10" :name: BmuI :blunt: "0" :c3: "0" ApoI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RAATTY :c2: "5" :name: ApoI :blunt: "0" :c3: "0" BfaI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTAG :c2: "3" :name: BfaI :blunt: "0" :c3: "0" TseI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCWGC :c2: "4" :name: TseI :blunt: "0" :c3: "0" BsrI: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: ACTGG :c2: "4" :name: BsrI :blunt: "0" :c3: "0" VspI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATTAAT :c2: "4" :name: VspI :blunt: "0" :c3: "0" RsaI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GTAC :c2: "2" :name: RsaI :blunt: "1" :c3: "0" PpuMI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGGWCCY :c2: "5" :name: PpuMI :blunt: "0" :c3: "0" PfeI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GAWTC :c2: "4" :name: PfeI :blunt: "0" :c3: "0" AccI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GTMKAC :c2: "4" :name: AccI :blunt: "0" :c3: "0" BmgT120I: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGNCC :c2: "3" :name: BmgT120I :blunt: "0" :c3: "0" TasI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: AATT :c2: "4" :name: TasI :blunt: "0" :c3: "0" SrfI: :len: "8" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GCCCGGGC :c2: "4" :name: SrfI :blunt: "1" :c3: "0" LweI: :len: "5" :c1: "10" :c4: "0" :ncuts: "2" :pattern: GCATC :c2: "14" :name: LweI :blunt: "0" :c3: "0" BsuTUI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: BsuTUI :blunt: "0" :c3: "0" AsiSI: :len: "8" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCGATCGC :c2: "3" :name: AsiSI :blunt: "0" :c3: "0" NspI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: RCATGY :c2: "1" :name: NspI :blunt: "0" :c3: "0" BstMWI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GCNNNNNNNGC :c2: "4" :name: BstMWI :blunt: "0" :c3: "0" BstYI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RGATCY :c2: "5" :name: BstYI :blunt: "0" :c3: "0" SplI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: cgtacg :c2: "5" :name: SplI :blunt: "0" :c3: "0" MabI: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCWGGT :c2: "6" :name: MabI :blunt: "0" :c3: "0" FaeI: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CATG :c2: "-1" :name: FaeI :blunt: "0" :c3: "0" XcmI: :len: "15" :c1: "8" :c4: "0" :ncuts: "2" :pattern: CCANNNNNNNNNTGG :c2: "7" :name: XcmI :blunt: "0" :c3: "0" TsoI: :len: "6" :c1: "17" :c4: "0" :ncuts: "2" :pattern: TARCCA :c2: "15" :name: TsoI :blunt: "0" :c3: "0" NdeI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CATATG :c2: "4" :name: NdeI :blunt: "0" :c3: "0" BsiHKAI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GWGCWC :c2: "1" :name: BsiHKAI :blunt: "0" :c3: "0" BseNI: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: ACTGG :c2: "4" :name: BseNI :blunt: "0" :c3: "0" VneI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GTGCAC :c2: "5" :name: VneI :blunt: "0" :c3: "0" TspGWI: :len: "5" :c1: "16" :c4: "0" :ncuts: "2" :pattern: ACGGA :c2: "14" :name: TspGWI :blunt: "0" :c3: "0" HaeII: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: RGCGCY :c2: "1" :name: HaeII :blunt: "0" :c3: "0" EcoHI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: ccsgg :c2: "5" :name: EcoHI :blunt: "0" :c3: "0" Bsh1285I: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGRYCG :c2: "2" :name: Bsh1285I :blunt: "0" :c3: "0" Tsp509I: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: AATT :c2: "4" :name: Tsp509I :blunt: "0" :c3: "0" PfoI: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCNGGA :c2: "6" :name: PfoI :blunt: "0" :c3: "0" AseI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATTAAT :c2: "4" :name: AseI :blunt: "0" :c3: "0" Bsp1286I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GDGCHC :c2: "1" :name: Bsp1286I :blunt: "0" :c3: "0" Bsp24I: :len: "12" :c1: "-9" :c4: "19" :ncuts: "4" :pattern: gacnnnnnntgg :c2: "-14" :name: Bsp24I :blunt: "0" :c3: "24" TstI: :len: "12" :c1: "-9" :c4: "19" :ncuts: "4" :pattern: CACNNNNNNTCC :c2: "-14" :name: TstI :blunt: "0" :c3: "24" MlyI: :len: "5" :c1: "10" :c4: "0" :ncuts: "2" :pattern: GAGTC :c2: "10" :name: MlyI :blunt: "1" :c3: "0" BseSI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GKGCMC :c2: "1" :name: BseSI :blunt: "0" :c3: "0" CviJI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGCY :c2: "2" :name: CviJI :blunt: "1" :c3: "0" Psp03I: :len: "5" :c1: "4" :c4: "0" :ncuts: "2" :pattern: ggwcc :c2: "1" :name: Psp03I :blunt: "0" :c3: "0" NlaIV: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGNNCC :c2: "3" :name: NlaIV :blunt: "1" :c3: "0" AasI: :len: "12" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GACNNNNNNGTC :c2: "5" :name: AasI :blunt: "0" :c3: "0" EcoO65I: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTNACC :c2: "6" :name: EcoO65I :blunt: "0" :c3: "0" Sfr303I: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CCGCGG :c2: "2" :name: Sfr303I :blunt: "0" :c3: "0" MalI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "2" :name: MalI :blunt: "1" :c3: "0" BfuI: :len: "6" :c1: "12" :c4: "0" :ncuts: "2" :pattern: GTATCC :c2: "11" :name: BfuI :blunt: "0" :c3: "0" Eco47III: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGCGCT :c2: "3" :name: Eco47III :blunt: "1" :c3: "0" Bse3DI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCAATG :c2: "6" :name: Bse3DI :blunt: "0" :c3: "0" Psp124BI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAGCTC :c2: "1" :name: Psp124BI :blunt: "0" :c3: "0" PaeR7I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTCGAG :c2: "5" :name: PaeR7I :blunt: "0" :c3: "0" MscI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGGCCA :c2: "3" :name: MscI :blunt: "1" :c3: "0" ChaI: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: gatc :c2: "-1" :name: ChaI :blunt: "0" :c3: "0" BstDSI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCRYGG :c2: "5" :name: BstDSI :blunt: "0" :c3: "0" Bse118I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RCCGGY :c2: "5" :name: Bse118I :blunt: "0" :c3: "0" BseXI: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: GCAGC :c2: "17" :name: BseXI :blunt: "0" :c3: "0" BspT107I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGYRCC :c2: "5" :name: BspT107I :blunt: "0" :c3: "0" MspA1I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CMGCKG :c2: "3" :name: MspA1I :blunt: "1" :c3: "0" HindII: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTYRAC :c2: "3" :name: HindII :blunt: "1" :c3: "0" EcoRI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GAATTC :c2: "5" :name: EcoRI :blunt: "0" :c3: "0" Asp718I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTACC :c2: "5" :name: Asp718I :blunt: "0" :c3: "0" XhoII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RGATCY :c2: "5" :name: XhoII :blunt: "0" :c3: "0" Van91I: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCANNNNNTGG :c2: "4" :name: Van91I :blunt: "0" :c3: "0" StyI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCWWGG :c2: "5" :name: StyI :blunt: "0" :c3: "0" BspANI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCC :c2: "2" :name: BspANI :blunt: "1" :c3: "0" BaeI: :len: "11" :c1: "-11" :c4: "18" :ncuts: "4" :pattern: ACNNNNGTAYC :c2: "-16" :name: BaeI :blunt: "0" :c3: "23" FatI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CATG :c2: "4" :name: FatI :blunt: "0" :c3: "0" PspXI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: VCTCGAGB :c2: "6" :name: PspXI :blunt: "0" :c3: "0" Ppu10I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: atgcat :c2: "5" :name: Ppu10I :blunt: "0" :c3: "0" BtsI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCAGTG :c2: "6" :name: BtsI :blunt: "0" :c3: "0" CjeI: :len: "11" :c1: "-9" :c4: "20" :ncuts: "4" :pattern: ccannnnnngt :c2: "-15" :name: CjeI :blunt: "0" :c3: "26" Sau96I: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGNCC :c2: "4" :name: Sau96I :blunt: "0" :c3: "0" SapI: :len: "7" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCTCTTC :c2: "11" :name: SapI :blunt: "0" :c3: "0" HgaI: :len: "5" :c1: "10" :c4: "0" :ncuts: "2" :pattern: GACGC :c2: "15" :name: HgaI :blunt: "0" :c3: "0" BtsCI: :len: "5" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGATG :c2: "5" :name: BtsCI :blunt: "0" :c3: "0" CviAII: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CATG :c2: "3" :name: CviAII :blunt: "0" :c3: "0" BstNSI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: RCATGY :c2: "1" :name: BstNSI :blunt: "0" :c3: "0" Tru1I: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TTAA :c2: "3" :name: Tru1I :blunt: "0" :c3: "0" NlaIII: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CATG :c2: "-1" :name: NlaIII :blunt: "0" :c3: "0" BsaI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGTCTC :c2: "11" :name: BsaI :blunt: "0" :c3: "0" Bsc4I: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCNNNNNNNGG :c2: "4" :name: Bsc4I :blunt: "0" :c3: "0" FbaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGATCA :c2: "5" :name: FbaI :blunt: "0" :c3: "0" VpaK11BI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGWCC :c2: "4" :name: VpaK11BI :blunt: "0" :c3: "0" PinAI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCGGT :c2: "5" :name: PinAI :blunt: "0" :c3: "0" NspV: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: NspV :blunt: "0" :c3: "0" FspI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGCGCA :c2: "3" :name: FspI :blunt: "1" :c3: "0" BstMAI: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GTCTC :c2: "10" :name: BstMAI :blunt: "0" :c3: "0" Eco57MI: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: CTGRAG :c2: "20" :name: Eco57MI :blunt: "0" :c3: "0" AccIII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: AccIII :blunt: "0" :c3: "0" BsrDI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCAATG :c2: "6" :name: BsrDI :blunt: "0" :c3: "0" BspEI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: BspEI :blunt: "0" :c3: "0" ZrmI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGTACT :c2: "3" :name: ZrmI :blunt: "1" :c3: "0" Sse9I: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: AATT :c2: "4" :name: Sse9I :blunt: "0" :c3: "0" SmoI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTYRAG :c2: "5" :name: SmoI :blunt: "0" :c3: "0" SauI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: cctnagg :c2: "5" :name: SauI :blunt: "0" :c3: "0" AleI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CACNNNNGTG :c2: "5" :name: AleI :blunt: "1" :c3: "0" BcnI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCSGG :c2: "3" :name: BcnI :blunt: "0" :c3: "0" SstII: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CCGCGG :c2: "2" :name: SstII :blunt: "0" :c3: "0" HgiEII: :len: "12" :c1: "0" :c4: "0" :ncuts: "0" :pattern: accnnnnnnggt :c2: "0" :name: HgiEII :blunt: "0" :c3: "0" HgiAI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: gwgcwc :c2: "1" :name: HgiAI :blunt: "0" :c3: "0" HaeIV: :len: "11" :c1: "-8" :c4: "20" :ncuts: "4" :pattern: gaynnnnnrtc :c2: "-14" :name: HaeIV :blunt: "0" :c3: "25" Bsp1720I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCTNAGC :c2: "5" :name: Bsp1720I :blunt: "0" :c3: "0" Eco31I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGTCTC :c2: "11" :name: Eco31I :blunt: "0" :c3: "0" BssNAI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTATAC :c2: "3" :name: BssNAI :blunt: "1" :c3: "0" BshNI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGYRCC :c2: "5" :name: BshNI :blunt: "0" :c3: "0" Cac8I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCNNGC :c2: "3" :name: Cac8I :blunt: "1" :c3: "0" Bse8I: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GATNNNNATC :c2: "5" :name: Bse8I :blunt: "1" :c3: "0" BmiI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGNNCC :c2: "3" :name: BmiI :blunt: "1" :c3: "0" BglI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GCCNNNNNGGC :c2: "4" :name: BglI :blunt: "0" :c3: "0" UnbI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: ggncc :c2: "5" :name: UnbI :blunt: "0" :c3: "0" SspD5I: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: ggtga :c2: "13" :name: SspD5I :blunt: "1" :c3: "0" SdaI: :len: "8" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CCTGCAGG :c2: "2" :name: SdaI :blunt: "0" :c3: "0" OliI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CACNNNNGTG :c2: "5" :name: OliI :blunt: "1" :c3: "0" Msp20I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGGCCA :c2: "3" :name: Msp20I :blunt: "1" :c3: "0" BstSCI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "5" :name: BstSCI :blunt: "0" :c3: "0" BspLU11I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACATGT :c2: "5" :name: BspLU11I :blunt: "0" :c3: "0" Bme1580I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GKGCMC :c2: "1" :name: Bme1580I :blunt: "0" :c3: "0" AspLEI: :len: "4" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "1" :name: AspLEI :blunt: "0" :c3: "0" Asp700I: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAANNNNTTC :c2: "5" :name: Asp700I :blunt: "1" :c3: "0" EagI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGGCCG :c2: "5" :name: EagI :blunt: "0" :c3: "0" Sse232I: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: cgccggcg :c2: "6" :name: Sse232I :blunt: "0" :c3: "0" PasI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCCWGGG :c2: "5" :name: PasI :blunt: "0" :c3: "0" EclHKI: :len: "11" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GACNNNNNGTC :c2: "5" :name: EclHKI :blunt: "0" :c3: "0" AhlI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACTAGT :c2: "5" :name: AhlI :blunt: "0" :c3: "0" AsiGI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCGGT :c2: "5" :name: AsiGI :blunt: "0" :c3: "0" BssHII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCGCGC :c2: "5" :name: BssHII :blunt: "0" :c3: "0" Zsp2I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: ATGCAT :c2: "1" :name: Zsp2I :blunt: "0" :c3: "0" SfeI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ctryag :c2: "5" :name: SfeI :blunt: "0" :c3: "0" Bpu10I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTNAGC :c2: "5" :name: Bpu10I :blunt: "0" :c3: "0" CspAI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCGGT :c2: "5" :name: CspAI :blunt: "0" :c3: "0" BmgBI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTC :c2: "3" :name: BmgBI :blunt: "1" :c3: "0" SnaI: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gtatac :c2: "0" :name: SnaI :blunt: "0" :c3: "0" MluNI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGGCCA :c2: "3" :name: MluNI :blunt: "1" :c3: "0" AloI: :len: "13" :c1: "-8" :c4: "20" :ncuts: "4" :pattern: GAACNNNNNNTCC :c2: "-13" :name: AloI :blunt: "0" :c3: "25" BseBI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "3" :name: BseBI :blunt: "0" :c3: "0" BstZ17I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTATAC :c2: "3" :name: BstZ17I :blunt: "1" :c3: "0" StyD4I: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "5" :name: StyD4I :blunt: "0" :c3: "0" SfaNI: :len: "5" :c1: "10" :c4: "0" :ncuts: "2" :pattern: GCATC :c2: "14" :name: SfaNI :blunt: "0" :c3: "0" PshAI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GACNNNNGTC :c2: "5" :name: PshAI :blunt: "1" :c3: "0" NarI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "4" :name: NarI :blunt: "0" :c3: "0" KspAI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTTAAC :c2: "3" :name: KspAI :blunt: "1" :c3: "0" BseX3I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGGCCG :c2: "5" :name: BseX3I :blunt: "0" :c3: "0" EcoRV: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GATATC :c2: "3" :name: EcoRV :blunt: "1" :c3: "0" BsrSI: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: ACTGG :c2: "4" :name: BsrSI :blunt: "0" :c3: "0" BspTI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTTAAG :c2: "5" :name: BspTI :blunt: "0" :c3: "0" NsiI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: ATGCAT :c2: "1" :name: NsiI :blunt: "0" :c3: "0" BciVI: :len: "6" :c1: "12" :c4: "0" :ncuts: "2" :pattern: GTATCC :c2: "11" :name: BciVI :blunt: "0" :c3: "0" AjuI: :len: "14" :c1: "-8" :c4: "20" :ncuts: "4" :pattern: GAANNNNNNNTTGG :c2: "-13" :name: AjuI :blunt: "0" :c3: "25" CelII: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCTNAGC :c2: "5" :name: CelII :blunt: "0" :c3: "0" DrdI: :len: "12" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GACNNNNNNGTC :c2: "5" :name: DrdI :blunt: "0" :c3: "0" XmaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCCGGG :c2: "5" :name: XmaI :blunt: "0" :c3: "0" XagI: :len: "11" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CCTNNNNNAGG :c2: "6" :name: XagI :blunt: "0" :c3: "0" TaqI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCGA :c2: "3" :name: TaqI :blunt: "0" :c3: "0" SpeI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACTAGT :c2: "5" :name: SpeI :blunt: "0" :c3: "0" PstI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CTGCAG :c2: "1" :name: PstI :blunt: "0" :c3: "0" MnlI: :len: "4" :c1: "11" :c4: "0" :ncuts: "2" :pattern: CCTC :c2: "10" :name: MnlI :blunt: "0" :c3: "0" BsiEI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGRYCG :c2: "2" :name: BsiEI :blunt: "0" :c3: "0" BseGI: :len: "5" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGATG :c2: "5" :name: BseGI :blunt: "0" :c3: "0" Ppu21I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: YACGTR :c2: "3" :name: Ppu21I :blunt: "1" :c3: "0" BsoBI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CYCGRG :c2: "5" :name: BsoBI :blunt: "0" :c3: "0" Bsp1407I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGTACA :c2: "5" :name: Bsp1407I :blunt: "0" :c3: "0" SfoI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "3" :name: SfoI :blunt: "1" :c3: "0" PflMI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCANNNNNTGG :c2: "4" :name: PflMI :blunt: "0" :c3: "0" PdiI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCCGGC :c2: "3" :name: PdiI :blunt: "1" :c3: "0" Hpy178III: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: tcnnga :c2: "4" :name: Hpy178III :blunt: "0" :c3: "0" AspS9I: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGNCC :c2: "4" :name: AspS9I :blunt: "0" :c3: "0" DriI: :len: "11" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GACNNNNNGTC :c2: "5" :name: DriI :blunt: "0" :c3: "0" AviII: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGCGCA :c2: "3" :name: AviII :blunt: "1" :c3: "0" PsyI: :len: "9" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GACNNNGTC :c2: "5" :name: PsyI :blunt: "0" :c3: "0" PleI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: GAGTC :c2: "10" :name: PleI :blunt: "0" :c3: "0" MroI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: MroI :blunt: "0" :c3: "0" BlfI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: BlfI :blunt: "0" :c3: "0" BfiI: :len: "6" :c1: "11" :c4: "0" :ncuts: "2" :pattern: ACTGGG :c2: "10" :name: BfiI :blunt: "0" :c3: "0" BmeT110I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CYCGRG :c2: "4" :name: BmeT110I :blunt: "0" :c3: "0" BseLI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCNNNNNNNGG :c2: "4" :name: BseLI :blunt: "0" :c3: "0" SnaBI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TACGTA :c2: "3" :name: SnaBI :blunt: "1" :c3: "0" PspGI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "5" :name: PspGI :blunt: "0" :c3: "0" BmeRI: :len: "11" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GACNNNNNGTC :c2: "5" :name: BmeRI :blunt: "0" :c3: "0" BstPAI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GACNNNNGTC :c2: "5" :name: BstPAI :blunt: "1" :c3: "0" SduI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GDGCHC :c2: "1" :name: SduI :blunt: "0" :c3: "0" MaeI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTAG :c2: "3" :name: MaeI :blunt: "0" :c3: "0" LguI: :len: "7" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCTCTTC :c2: "11" :name: LguI :blunt: "0" :c3: "0" AscI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCGCGCC :c2: "6" :name: AscI :blunt: "0" :c3: "0" AccBSI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CCGCTC :c2: "3" :name: AccBSI :blunt: "1" :c3: "0" Bst2UI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "3" :name: Bst2UI :blunt: "0" :c3: "0" CjuII: :len: "11" :c1: "0" :c4: "0" :ncuts: "0" :pattern: caynnnnnctc :c2: "0" :name: CjuII :blunt: "0" :c3: "0" BtgI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCRYGG :c2: "5" :name: BtgI :blunt: "0" :c3: "0" BpiI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GAAGAC :c2: "12" :name: BpiI :blunt: "0" :c3: "0" PspLI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGTACG :c2: "5" :name: PspLI :blunt: "0" :c3: "0" MvrI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGATCG :c2: "2" :name: MvrI :blunt: "0" :c3: "0" Aor51HI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGCGCT :c2: "3" :name: Aor51HI :blunt: "1" :c3: "0" StrI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTCGAG :c2: "5" :name: StrI :blunt: "0" :c3: "0" MspR9I: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "3" :name: MspR9I :blunt: "0" :c3: "0" HphI: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: GGTGA :c2: "12" :name: HphI :blunt: "0" :c3: "0" Hin1II: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CATG :c2: "-1" :name: Hin1II :blunt: "0" :c3: "0" AhaIII: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: tttaaa :c2: "3" :name: AhaIII :blunt: "1" :c3: "0" BbuI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCATGC :c2: "1" :name: BbuI :blunt: "0" :c3: "0" EheI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "3" :name: EheI :blunt: "1" :c3: "0" PspPPI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGGWCCY :c2: "5" :name: PspPPI :blunt: "0" :c3: "0" NmuCI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GTSAC :c2: "5" :name: NmuCI :blunt: "0" :c3: "0" BsaXI: :len: "11" :c1: "-10" :c4: "18" :ncuts: "4" :pattern: ACNNNNNCTCC :c2: "-13" :name: BsaXI :blunt: "0" :c3: "21" BlpI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCTNAGC :c2: "5" :name: BlpI :blunt: "0" :c3: "0" BspMAI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CTGCAG :c2: "1" :name: BspMAI :blunt: "0" :c3: "0" Eco147I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGGCCT :c2: "3" :name: Eco147I :blunt: "1" :c3: "0" AspEI: :len: "11" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GACNNNNNGTC :c2: "5" :name: AspEI :blunt: "0" :c3: "0" Eco47I: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGWCC :c2: "4" :name: Eco47I :blunt: "0" :c3: "0" SgfI: :len: "8" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCGATCGC :c2: "3" :name: SgfI :blunt: "0" :c3: "0" SchI: :len: "5" :c1: "10" :c4: "0" :ncuts: "2" :pattern: GAGTC :c2: "10" :name: SchI :blunt: "1" :c3: "0" PabI: :len: "4" :c1: "3" :c4: "0" :ncuts: "2" :pattern: gtac :c2: "1" :name: PabI :blunt: "0" :c3: "0" AcuI: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: CTGAAG :c2: "20" :name: AcuI :blunt: "0" :c3: "0" Bbv12I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GWGCWC :c2: "1" :name: Bbv12I :blunt: "0" :c3: "0" ZraI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GACGTC :c2: "3" :name: ZraI :blunt: "1" :c3: "0" PciSI: :len: "7" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCTCTTC :c2: "11" :name: PciSI :blunt: "0" :c3: "0" FinI: :len: "5" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gggac :c2: "0" :name: FinI :blunt: "0" :c3: "0" Bme1390I: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "3" :name: Bme1390I :blunt: "0" :c3: "0" Bsu36I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCTNAGG :c2: "5" :name: Bsu36I :blunt: "0" :c3: "0" FokI: :len: "5" :c1: "14" :c4: "0" :ncuts: "2" :pattern: GGATG :c2: "18" :name: FokI :blunt: "0" :c3: "0" CviRI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: tgca :c2: "2" :name: CviRI :blunt: "1" :c3: "0" BsiYI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCNNNNNNNGG :c2: "4" :name: BsiYI :blunt: "0" :c3: "0" Bst1107I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTATAC :c2: "3" :name: Bst1107I :blunt: "1" :c3: "0" SelI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: cgcg :c2: "4" :name: SelI :blunt: "0" :c3: "0" PagI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCATGA :c2: "5" :name: PagI :blunt: "0" :c3: "0" Bbr7I: :len: "6" :c1: "13" :c4: "0" :ncuts: "2" :pattern: gaagac :c2: "17" :name: Bbr7I :blunt: "0" :c3: "0" BfuAI: :len: "6" :c1: "10" :c4: "0" :ncuts: "2" :pattern: ACCTGC :c2: "14" :name: BfuAI :blunt: "0" :c3: "0" AfaI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GTAC :c2: "2" :name: AfaI :blunt: "1" :c3: "0" Bse1I: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: ACTGG :c2: "4" :name: Bse1I :blunt: "0" :c3: "0" BcgI: :len: "12" :c1: "-11" :c4: "22" :ncuts: "4" :pattern: CGANNNNNNTGC :c2: "-13" :name: BcgI :blunt: "0" :c3: "24" BsrBI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CCGCTC :c2: "3" :name: BsrBI :blunt: "1" :c3: "0" UbaF13I: :len: "13" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gagnnnnnnctgg :c2: "0" :name: UbaF13I :blunt: "0" :c3: "0" ApaI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GGGCCC :c2: "1" :name: ApaI :blunt: "0" :c3: "0" BclI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGATCA :c2: "5" :name: BclI :blunt: "0" :c3: "0" FriOI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GRGCYC :c2: "1" :name: FriOI :blunt: "0" :c3: "0" PscI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACATGT :c2: "5" :name: PscI :blunt: "0" :c3: "0" MspI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGG :c2: "3" :name: MspI :blunt: "0" :c3: "0" HinP1I: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "3" :name: HinP1I :blunt: "0" :c3: "0" CjeNII: :len: "10" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gagnnnnngt :c2: "0" :name: CjeNII :blunt: "0" :c3: "0" CfoI: :len: "4" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "1" :name: CfoI :blunt: "0" :c3: "0" BmgI: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gkgccc :c2: "0" :name: BmgI :blunt: "0" :c3: "0" Sau3AI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: Sau3AI :blunt: "0" :c3: "0" NruI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TCGCGA :c2: "3" :name: NruI :blunt: "1" :c3: "0" Bme18I: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGWCC :c2: "4" :name: Bme18I :blunt: "0" :c3: "0" BsrGI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGTACA :c2: "5" :name: BsrGI :blunt: "0" :c3: "0" BspHI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCATGA :c2: "5" :name: BspHI :blunt: "0" :c3: "0" EaeI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: YGGCCR :c2: "5" :name: EaeI :blunt: "0" :c3: "0" ApaBI: :len: "11" :c1: "8" :c4: "0" :ncuts: "2" :pattern: gcannnnntgc :c2: "3" :name: ApaBI :blunt: "0" :c3: "0" AjiI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTC :c2: "3" :name: AjiI :blunt: "1" :c3: "0" XhoI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTCGAG :c2: "5" :name: XhoI :blunt: "0" :c3: "0" Tru9I: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TTAA :c2: "3" :name: Tru9I :blunt: "0" :c3: "0" PspOMI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGGCCC :c2: "5" :name: PspOMI :blunt: "0" :c3: "0" Kzo9I: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: Kzo9I :blunt: "0" :c3: "0" GdiII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: cggccr :c2: "5" :name: GdiII :blunt: "0" :c3: "0" BseMII: :len: "5" :c1: "15" :c4: "0" :ncuts: "2" :pattern: CTCAG :c2: "13" :name: BseMII :blunt: "0" :c3: "0" Bpu1102I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCTNAGC :c2: "5" :name: Bpu1102I :blunt: "0" :c3: "0" BspMI: :len: "6" :c1: "10" :c4: "0" :ncuts: "2" :pattern: ACCTGC :c2: "14" :name: BspMI :blunt: "0" :c3: "0" BstF5I: :len: "5" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGATG :c2: "5" :name: BstF5I :blunt: "0" :c3: "0" BsiI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: cacgag :c2: "5" :name: BsiI :blunt: "0" :c3: "0" BinI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: ggatc :c2: "10" :name: BinI :blunt: "0" :c3: "0" Eco72I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTG :c2: "3" :name: Eco72I :blunt: "1" :c3: "0" SfcI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTRYAG :c2: "5" :name: SfcI :blunt: "0" :c3: "0" Psp6I: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "5" :name: Psp6I :blunt: "0" :c3: "0" NsbI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGCGCA :c2: "3" :name: NsbI :blunt: "1" :c3: "0" Kpn2I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCCGGA :c2: "5" :name: Kpn2I :blunt: "0" :c3: "0" BstSFI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTRYAG :c2: "5" :name: BstSFI :blunt: "0" :c3: "0" CpoI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGGWCCG :c2: "5" :name: CpoI :blunt: "0" :c3: "0" EciI: :len: "6" :c1: "17" :c4: "0" :ncuts: "2" :pattern: GGCGGA :c2: "15" :name: EciI :blunt: "0" :c3: "0" Eco91I: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTNACC :c2: "6" :name: Eco91I :blunt: "0" :c3: "0" Bsp143I: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: Bsp143I :blunt: "0" :c3: "0" SstI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAGCTC :c2: "1" :name: SstI :blunt: "0" :c3: "0" Hin4II: :len: "5" :c1: "11" :c4: "0" :ncuts: "2" :pattern: ccttc :c2: "10" :name: Hin4II :blunt: "0" :c3: "0" Bst4CI: :len: "5" :c1: "3" :c4: "0" :ncuts: "2" :pattern: ACNGT :c2: "2" :name: Bst4CI :blunt: "0" :c3: "0" BscAI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: gcatc :c2: "11" :name: BscAI :blunt: "0" :c3: "0" BsaBI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GATNNNNATC :c2: "5" :name: BsaBI :blunt: "1" :c3: "0" BisI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCNGC :c2: "3" :name: BisI :blunt: "0" :c3: "0" AjnI: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "5" :name: AjnI :blunt: "0" :c3: "0" Bsp19I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCATGG :c2: "5" :name: Bsp19I :blunt: "0" :c3: "0" TspEI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: AATT :c2: "4" :name: TspEI :blunt: "0" :c3: "0" NcoI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCATGG :c2: "5" :name: NcoI :blunt: "0" :c3: "0" MvaI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "3" :name: MvaI :blunt: "0" :c3: "0" BthCI: :len: "5" :c1: "4" :c4: "0" :ncuts: "2" :pattern: gcngc :c2: "1" :name: BthCI :blunt: "0" :c3: "0" BshVI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: BshVI :blunt: "0" :c3: "0" BsnI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCC :c2: "2" :name: BsnI :blunt: "1" :c3: "0" Alw44I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GTGCAC :c2: "5" :name: Alw44I :blunt: "0" :c3: "0" BstPI: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTNACC :c2: "6" :name: BstPI :blunt: "0" :c3: "0" PflFI: :len: "9" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GACNNNGTC :c2: "5" :name: PflFI :blunt: "0" :c3: "0" MroNI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCCGGC :c2: "5" :name: MroNI :blunt: "0" :c3: "0" HincII: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTYRAC :c2: "3" :name: HincII :blunt: "1" :c3: "0" BbvII: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: gaagac :c2: "12" :name: BbvII :blunt: "0" :c3: "0" BpuEI: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: CTTGAG :c2: "20" :name: BpuEI :blunt: "0" :c3: "0" BstV2I: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GAAGAC :c2: "12" :name: BstV2I :blunt: "0" :c3: "0" PsrI: :len: "13" :c1: "-8" :c4: "20" :ncuts: "4" :pattern: GAACNNNNNNTAC :c2: "-13" :name: PsrI :blunt: "0" :c3: "25" CaiI: :len: "9" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CAGNNNCTG :c2: "3" :name: CaiI :blunt: "0" :c3: "0" Eam1105I: :len: "11" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GACNNNNNGTC :c2: "5" :name: Eam1105I :blunt: "0" :c3: "0" ApaLI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GTGCAC :c2: "5" :name: ApaLI :blunt: "0" :c3: "0" XmaJI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCTAGG :c2: "5" :name: XmaJI :blunt: "0" :c3: "0" SexAI: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCWGGT :c2: "6" :name: SexAI :blunt: "0" :c3: "0" RigI: :len: "8" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GGCCGGCC :c2: "2" :name: RigI :blunt: "0" :c3: "0" AsuII: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: TTCGAA :c2: "4" :name: AsuII :blunt: "0" :c3: "0" BstKTI: :len: "4" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "1" :name: BstKTI :blunt: "0" :c3: "0" BstUI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGCG :c2: "2" :name: BstUI :blunt: "1" :c3: "0" BstBAI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: YACGTR :c2: "3" :name: BstBAI :blunt: "1" :c3: "0" EclXI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGGCCG :c2: "5" :name: EclXI :blunt: "0" :c3: "0" BsmAI: :len: "5" :c1: "6" :c4: "0" :ncuts: "2" :pattern: GTCTC :c2: "10" :name: BsmAI :blunt: "0" :c3: "0" Sse8647I: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: aggwcct :c2: "5" :name: Sse8647I :blunt: "0" :c3: "0" SphI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCATGC :c2: "1" :name: SphI :blunt: "0" :c3: "0" HpyF10VI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GCNNNNNNNGC :c2: "4" :name: HpyF10VI :blunt: "0" :c3: "0" BbrPI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTG :c2: "3" :name: BbrPI :blunt: "1" :c3: "0" AlwI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: GGATC :c2: "10" :name: AlwI :blunt: "0" :c3: "0" TatI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: WGTACW :c2: "5" :name: TatI :blunt: "0" :c3: "0" Hpy8I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTNNAC :c2: "3" :name: Hpy8I :blunt: "1" :c3: "0" BsmFI: :len: "5" :c1: "15" :c4: "0" :ncuts: "2" :pattern: GGGAC :c2: "19" :name: BsmFI :blunt: "0" :c3: "0" BseJI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GATNNNNATC :c2: "5" :name: BseJI :blunt: "1" :c3: "0" PspEI: :len: "7" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTNACC :c2: "6" :name: PspEI :blunt: "0" :c3: "0" MroXI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAANNNNTTC :c2: "5" :name: MroXI :blunt: "1" :c3: "0" KasI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "5" :name: KasI :blunt: "0" :c3: "0" HpaII: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGG :c2: "3" :name: HpaII :blunt: "0" :c3: "0" BstZI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGGCCG :c2: "5" :name: BstZI :blunt: "0" :c3: "0" BstDEI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTNAG :c2: "4" :name: BstDEI :blunt: "0" :c3: "0" DseDI: :len: "12" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GACNNNNNNGTC :c2: "5" :name: DseDI :blunt: "0" :c3: "0" CseI: :len: "5" :c1: "10" :c4: "0" :ncuts: "2" :pattern: GACGC :c2: "15" :name: CseI :blunt: "0" :c3: "0" HpaI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GTTAAC :c2: "3" :name: HpaI :blunt: "1" :c3: "0" HaeIII: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCC :c2: "2" :name: HaeIII :blunt: "1" :c3: "0" CviKI-1: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGCY :c2: "2" :name: CviKI-1 :blunt: "1" :c3: "0" AciI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCGC :c2: "3" :name: AciI :blunt: "0" :c3: "0" XmiI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GTMKAC :c2: "4" :name: XmiI :blunt: "0" :c3: "0" MluI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACGCGT :c2: "5" :name: MluI :blunt: "0" :c3: "0" EspI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: gctnagc :c2: "5" :name: EspI :blunt: "0" :c3: "0" Bst98I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTTAAG :c2: "5" :name: Bst98I :blunt: "0" :c3: "0" AatII: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GACGTC :c2: "1" :name: AatII :blunt: "0" :c3: "0" TaqII: :len: "6" :c1: "17" :c4: "0" :ncuts: "2" :pattern: CACCCA :c2: "15" :name: TaqII :blunt: "0" :c3: "0" ScaI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGTACT :c2: "3" :name: ScaI :blunt: "1" :c3: "0" AflII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTTAAG :c2: "5" :name: AflII :blunt: "0" :c3: "0" BstHHI: :len: "4" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "1" :name: BstHHI :blunt: "0" :c3: "0" FnuDII: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: cgcg :c2: "2" :name: FnuDII :blunt: "1" :c3: "0" BspTNI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GGTCTC :c2: "11" :name: BspTNI :blunt: "0" :c3: "0" XmaIII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: cggccg :c2: "5" :name: XmaIII :blunt: "0" :c3: "0" PhoI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCC :c2: "2" :name: PhoI :blunt: "1" :c3: "0" BbsI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GAAGAC :c2: "12" :name: BbsI :blunt: "0" :c3: "0" XmnI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GAANNNNTTC :c2: "5" :name: XmnI :blunt: "1" :c3: "0" TsuI: :len: "5" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gcgac :c2: "0" :name: TsuI :blunt: "0" :c3: "0" FspAI: :len: "8" :c1: "4" :c4: "0" :ncuts: "2" :pattern: RTGCGCAY :c2: "4" :name: FspAI :blunt: "1" :c3: "0" BstFNI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CGCG :c2: "2" :name: BstFNI :blunt: "1" :c3: "0" BssMI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: BssMI :blunt: "0" :c3: "0" BstC8I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GCNNGC :c2: "3" :name: BstC8I :blunt: "1" :c3: "0" BplI: :len: "11" :c1: "-9" :c4: "19" :ncuts: "4" :pattern: GAGNNNNNCTC :c2: "-14" :name: BplI :blunt: "0" :c3: "24" BlnI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCTAGG :c2: "5" :name: BlnI :blunt: "0" :c3: "0" EcoNI: :len: "11" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CCTNNNNNAGG :c2: "6" :name: EcoNI :blunt: "0" :c3: "0" Ecl136II: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GAGCTC :c2: "3" :name: Ecl136II :blunt: "1" :c3: "0" AcsI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RAATTY :c2: "5" :name: AcsI :blunt: "0" :c3: "0" AspCNI: :len: "5" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gccgc :c2: "0" :name: AspCNI :blunt: "0" :c3: "0" AatI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGGCCT :c2: "3" :name: AatI :blunt: "1" :c3: "0" EsaBC3I: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: tcga :c2: "2" :name: EsaBC3I :blunt: "1" :c3: "0" XbaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TCTAGA :c2: "5" :name: XbaI :blunt: "0" :c3: "0" TfiI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GAWTC :c2: "4" :name: TfiI :blunt: "0" :c3: "0" StuI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: AGGCCT :c2: "3" :name: StuI :blunt: "1" :c3: "0" SmaI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CCCGGG :c2: "3" :name: SmaI :blunt: "1" :c3: "0" Psp5II: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: RGGWCCY :c2: "5" :name: Psp5II :blunt: "0" :c3: "0" MboII: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: GAAGA :c2: "12" :name: MboII :blunt: "0" :c3: "0" MamI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GATNNNNATC :c2: "5" :name: MamI :blunt: "1" :c3: "0" Bsp68I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TCGCGA :c2: "3" :name: Bsp68I :blunt: "1" :c3: "0" Acc16I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGCGCA :c2: "3" :name: Acc16I :blunt: "1" :c3: "0" XspI: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTAG :c2: "3" :name: XspI :blunt: "0" :c3: "0" BsiWI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CGTACG :c2: "5" :name: BsiWI :blunt: "0" :c3: "0" BseYI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCCAGC :c2: "5" :name: BseYI :blunt: "0" :c3: "0" Eco88I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CYCGRG :c2: "5" :name: Eco88I :blunt: "0" :c3: "0" Bsu15I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: Bsu15I :blunt: "0" :c3: "0" AlwFI: :len: "13" :c1: "0" :c4: "0" :ncuts: "0" :pattern: gaaaynnnnnrtg :c2: "0" :name: AlwFI :blunt: "0" :c3: "0" SalI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GTCGAC :c2: "5" :name: SalI :blunt: "0" :c3: "0" RleAI: :len: "6" :c1: "18" :c4: "0" :ncuts: "2" :pattern: cccaca :c2: "15" :name: RleAI :blunt: "0" :c3: "0" PaeI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCATGC :c2: "1" :name: PaeI :blunt: "0" :c3: "0" Hsp92I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GRCGYC :c2: "4" :name: Hsp92I :blunt: "0" :c3: "0" SwaI: :len: "8" :c1: "4" :c4: "0" :ncuts: "2" :pattern: ATTTAAAT :c2: "4" :name: SwaI :blunt: "1" :c3: "0" SspBI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: TGTACA :c2: "5" :name: SspBI :blunt: "0" :c3: "0" BspCNI: :len: "5" :c1: "14" :c4: "0" :ncuts: "2" :pattern: CTCAG :c2: "12" :name: BspCNI :blunt: "0" :c3: "0" ErhI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCWWGG :c2: "5" :name: ErhI :blunt: "0" :c3: "0" FauI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: CCCGC :c2: "11" :name: FauI :blunt: "0" :c3: "0" AceIII: :len: "6" :c1: "13" :c4: "0" :ncuts: "2" :pattern: cagctc :c2: "17" :name: AceIII :blunt: "0" :c3: "0" AsuNHI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCTAGC :c2: "5" :name: AsuNHI :blunt: "0" :c3: "0" AccB1I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGYRCC :c2: "5" :name: AccB1I :blunt: "0" :c3: "0" AspI: :len: "9" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GACNNNGTC :c2: "5" :name: AspI :blunt: "0" :c3: "0" HaeI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: wggccw :c2: "3" :name: HaeI :blunt: "1" :c3: "0" EcoICRI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GAGCTC :c2: "3" :name: EcoICRI :blunt: "1" :c3: "0" BstACI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GRCGYC :c2: "4" :name: BstACI :blunt: "0" :c3: "0" BspMII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: tccgga :c2: "5" :name: BspMII :blunt: "0" :c3: "0" CdiI: :len: "5" :c1: "4" :c4: "0" :ncuts: "2" :pattern: catcg :c2: "4" :name: CdiI :blunt: "1" :c3: "0" UbaF11I: :len: "5" :c1: "0" :c4: "0" :ncuts: "0" :pattern: tcgta :c2: "0" :name: UbaF11I :blunt: "0" :c3: "0" SimI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: gggtc :c2: "5" :name: SimI :blunt: "0" :c3: "0" PciI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACATGT :c2: "5" :name: PciI :blunt: "0" :c3: "0" MspCI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTTAAG :c2: "5" :name: MspCI :blunt: "0" :c3: "0" HpyF3I: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CTNAG :c2: "4" :name: HpyF3I :blunt: "0" :c3: "0" AsuHPI: :len: "5" :c1: "13" :c4: "0" :ncuts: "2" :pattern: GGTGA :c2: "12" :name: AsuHPI :blunt: "0" :c3: "0" BmrFI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCNGG :c2: "3" :name: BmrFI :blunt: "0" :c3: "0" AdeI: :len: "9" :c1: "6" :c4: "0" :ncuts: "2" :pattern: CACNNNGTG :c2: "3" :name: AdeI :blunt: "0" :c3: "0" BsbI: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: caacac :c2: "0" :name: BsbI :blunt: "0" :c3: "0" AsuI: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ggncc :c2: "4" :name: AsuI :blunt: "0" :c3: "0" Cfr42I: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CCGCGG :c2: "2" :name: Cfr42I :blunt: "0" :c3: "0" NspBII: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: cmgckg :c2: "3" :name: NspBII :blunt: "1" :c3: "0" Mva1269I: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: GAATGC :c2: "5" :name: Mva1269I :blunt: "0" :c3: "0" BstMBI: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: BstMBI :blunt: "0" :c3: "0" SgsI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGCGCGCC :c2: "6" :name: SgsI :blunt: "0" :c3: "0" SetI: :len: "4" :c1: "4" :c4: "0" :ncuts: "2" :pattern: ASST :c2: "-1" :name: SetI :blunt: "0" :c3: "0" AvaI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CYCGRG :c2: "5" :name: AvaI :blunt: "0" :c3: "0" AlfI: :len: "12" :c1: "-11" :c4: "22" :ncuts: "4" :pattern: GCANNNNNNTGC :c2: "-13" :name: AlfI :blunt: "0" :c3: "24" AfiI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCNNNNNNNGG :c2: "4" :name: AfiI :blunt: "0" :c3: "0" AvrII: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCTAGG :c2: "5" :name: AvrII :blunt: "0" :c3: "0" SanDI: :len: "7" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GGGWCCC :c2: "5" :name: SanDI :blunt: "0" :c3: "0" PspN4I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGNNCC :c2: "3" :name: PspN4I :blunt: "1" :c3: "0" Pfl1108I: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: tcgtag :c2: "0" :name: Pfl1108I :blunt: "0" :c3: "0" MssI: :len: "8" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GTTTAAAC :c2: "4" :name: MssI :blunt: "1" :c3: "0" BsgI: :len: "6" :c1: "22" :c4: "0" :ncuts: "2" :pattern: GTGCAG :c2: "20" :name: BsgI :blunt: "0" :c3: "0" CfrI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: YGGCCR :c2: "5" :name: CfrI :blunt: "0" :c3: "0" Eco32I: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GATATC :c2: "3" :name: Eco32I :blunt: "1" :c3: "0" BstH2I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: RGCGCY :c2: "1" :name: BstH2I :blunt: "0" :c3: "0" BpvUI: :len: "6" :c1: "4" :c4: "0" :ncuts: "2" :pattern: CGATCG :c2: "2" :name: BpvUI :blunt: "0" :c3: "0" BtuMI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TCGCGA :c2: "3" :name: BtuMI :blunt: "1" :c3: "0" SmuI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: CCCGC :c2: "11" :name: SmuI :blunt: "0" :c3: "0" SgrAI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CRCCGGYG :c2: "6" :name: SgrAI :blunt: "0" :c3: "0" MbiI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CCGCTC :c2: "3" :name: MbiI :blunt: "1" :c3: "0" Hin1I: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GRCGYC :c2: "4" :name: Hin1I :blunt: "0" :c3: "0" FblI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GTMKAC :c2: "4" :name: FblI :blunt: "0" :c3: "0" EgeI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: GGCGCC :c2: "3" :name: EgeI :blunt: "1" :c3: "0" Bst2BI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CACGAG :c2: "5" :name: Bst2BI :blunt: "0" :c3: "0" BauI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CACGAG :c2: "5" :name: BauI :blunt: "0" :c3: "0" XmaCI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCCGGG :c2: "5" :name: XmaCI :blunt: "0" :c3: "0" UbaF9I: :len: "12" :c1: "0" :c4: "0" :ncuts: "0" :pattern: tacnnnnnrtgt :c2: "0" :name: UbaF9I :blunt: "0" :c3: "0" RgaI: :len: "8" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCGATCGC :c2: "3" :name: RgaI :blunt: "0" :c3: "0" BstNI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "3" :name: BstNI :blunt: "0" :c3: "0" BspPI: :len: "5" :c1: "9" :c4: "0" :ncuts: "2" :pattern: GGATC :c2: "10" :name: BspPI :blunt: "0" :c3: "0" BshTI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCGGT :c2: "5" :name: BshTI :blunt: "0" :c3: "0" BslI: :len: "11" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CCNNNNNNNGG :c2: "4" :name: BslI :blunt: "0" :c3: "0" CjuI: :len: "11" :c1: "0" :c4: "0" :ncuts: "0" :pattern: caynnnnnrtg :c2: "0" :name: CjuI :blunt: "0" :c3: "0" UbaPI: :len: "6" :c1: "0" :c4: "0" :ncuts: "0" :pattern: cgaacg :c2: "0" :name: UbaPI :blunt: "0" :c3: "0" MflI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RGATCY :c2: "5" :name: MflI :blunt: "0" :c3: "0" Hin6I: :len: "4" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCGC :c2: "3" :name: Hin6I :blunt: "0" :c3: "0" BseCI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATCGAT :c2: "4" :name: BseCI :blunt: "0" :c3: "0" BdaI: :len: "12" :c1: "-11" :c4: "22" :ncuts: "4" :pattern: TGANNNNNNTCA :c2: "-13" :name: BdaI :blunt: "0" :c3: "24" Acc65I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGTACC :c2: "5" :name: Acc65I :blunt: "0" :c3: "0" PshBI: :len: "6" :c1: "2" :c4: "0" :ncuts: "2" :pattern: ATTAAT :c2: "4" :name: PshBI :blunt: "0" :c3: "0" BmtI: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GCTAGC :c2: "1" :name: BmtI :blunt: "0" :c3: "0" EarI: :len: "6" :c1: "7" :c4: "0" :ncuts: "2" :pattern: CTCTTC :c2: "10" :name: EarI :blunt: "0" :c3: "0" BstSNI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TACGTA :c2: "3" :name: BstSNI :blunt: "1" :c3: "0" AvaII: :len: "5" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GGWCC :c2: "4" :name: AvaII :blunt: "0" :c3: "0" AluI: :len: "4" :c1: "2" :c4: "0" :ncuts: "2" :pattern: AGCT :c2: "2" :name: AluI :blunt: "1" :c3: "0" Tth111I: :len: "9" :c1: "4" :c4: "0" :ncuts: "2" :pattern: GACNNNGTC :c2: "5" :name: Tth111I :blunt: "0" :c3: "0" Tsp4CI: :len: "5" :c1: "3" :c4: "0" :ncuts: "2" :pattern: acngt :c2: "2" :name: Tsp4CI :blunt: "0" :c3: "0" PsuI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RGATCY :c2: "5" :name: PsuI :blunt: "0" :c3: "0" DpnII: :len: "4" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: GATC :c2: "4" :name: DpnII :blunt: "0" :c3: "0" Cfr10I: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RCCGGY :c2: "5" :name: Cfr10I :blunt: "0" :c3: "0" BoxI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: GACNNNNGTC :c2: "5" :name: BoxI :blunt: "1" :c3: "0" BsaJI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCNNGG :c2: "5" :name: BsaJI :blunt: "0" :c3: "0" TspMI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCCGGG :c2: "5" :name: TspMI :blunt: "0" :c3: "0" SmiMI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CAYNNNNRTG :c2: "5" :name: SmiMI :blunt: "1" :c3: "0" PspCI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: CACGTG :c2: "3" :name: PspCI :blunt: "1" :c3: "0" Nli3877I: :len: "6" :c1: "5" :c4: "0" :ncuts: "2" :pattern: cycgrg :c2: "1" :name: Nli3877I :blunt: "0" :c3: "0" BstXI: :len: "12" :c1: "8" :c4: "0" :ncuts: "2" :pattern: CCANNNNNNTGG :c2: "4" :name: BstXI :blunt: "0" :c3: "0" BssAI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: RCCGGY :c2: "5" :name: BssAI :blunt: "0" :c3: "0" RseI: :len: "10" :c1: "5" :c4: "0" :ncuts: "2" :pattern: CAYNNNNRTG :c2: "5" :name: RseI :blunt: "1" :c3: "0" NgoMIV: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: GCCGGC :c2: "5" :name: NgoMIV :blunt: "0" :c3: "0" BpuMI: :len: "5" :c1: "2" :c4: "0" :ncuts: "2" :pattern: CCSGG :c2: "3" :name: BpuMI :blunt: "0" :c3: "0" AgeI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: ACCGGT :c2: "5" :name: AgeI :blunt: "0" :c3: "0" MlsI: :len: "6" :c1: "3" :c4: "0" :ncuts: "2" :pattern: TGGCCA :c2: "3" :name: MlsI :blunt: "1" :c3: "0" BssECI: :len: "6" :c1: "1" :c4: "0" :ncuts: "2" :pattern: CCNNGG :c2: "5" :name: BssECI :blunt: "0" :c3: "0" CciNI: :len: "8" :c1: "2" :c4: "0" :ncuts: "2" :pattern: GCGGCCGC :c2: "6" :name: CciNI :blunt: "0" :c3: "0" BseMI: :len: "6" :c1: "8" :c4: "0" :ncuts: "2" :pattern: GCAATG :c2: "6" :name: BseMI :blunt: "0" :c3: "0" EcoRII: :len: "5" :c1: "-1" :c4: "0" :ncuts: "2" :pattern: CCWGG :c2: "5" :name: EcoRII :blunt: "0" :c3: "0" bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/0000755000004100000410000000000012200110570022207 5ustar www-datawww-databio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb0000644000004100000410000000475112200110570026403 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/vertical_cut_range.rb - # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range # FIXME docs are kind of out of date. Change this to VerticalAndHorizontalCutRange class VerticalCutRange < CutRange attr_reader :p_cut_left, :p_cut_right attr_reader :c_cut_left, :c_cut_right attr_reader :min, :max attr_reader :range # VerticalCutRange provides an extremely raw, yet precise, method of # defining the location of cuts on primary and complementary sequences. # # Many VerticalCutRange objects are used with HorizontalCutRange objects # to be contained in CutRanges to define the cut pattern that a # specific enzyme may make. # # VerticalCutRange takes up to four possible cuts, two on the primary # strand and two on the complementary strand. In typical usage # you will want to make a single cut on the primary strand and a single # cut on the complementary strand. # # However, you can construct it with whatever cuts you desire to accomadate # the most eccentric of imaginary restriction enzymes. # # --- # *Arguments* # * +p_cut_left+: (_optional_) Left-most cut on the primary strand. +nil+ to skip # * +p_cut_right+: (_optional_) Right-most cut on the primary strand. +nil+ to skip # * +c_cut_left+: (_optional_) Left-most cut on the complementary strand. +nil+ to skip # * +c_cut_right+: (_optional_) Right-most cut on the complementary strand. +nil+ to skip # *Returns*:: nothing def initialize( p_cut_left=nil, p_cut_right=nil, c_cut_left=nil, c_cut_right=nil ) @p_cut_left = p_cut_left @p_cut_right = p_cut_right @c_cut_left = c_cut_left @c_cut_right = c_cut_right a = [@p_cut_left, @c_cut_left, @p_cut_right, @c_cut_right] a.delete(nil) a.sort! @min = a.first @max = a.last @range = nil @range = (@min..@max) unless @min == nil or @max == nil return end # Check if a location falls within the minimum or maximum values of this # range. # # --- # *Arguments* # * +i+: Location to check if it is included in the range # *Returns*:: +true+ _or_ +false+ def include?(i) return false if @range == nil @range.include?(i) end end # VerticalCutRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/cut_ranges.rb0000644000004100000410000000231112200110570024663 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/cut_ranges.rb - Container for many CutRange objects or CutRange child objects. # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range # Container for many CutRange objects or CutRange child objects. Inherits from array. # class CutRanges < Array def min; self.collect{|a| a.min}.flatten.sort.first; end def max; self.collect{|a| a.max}.flatten.sort.last; end def include?(i); self.collect{|a| a.include?(i)}.include?(true); end def min_vertical vertical_min_max_helper( :min ) end def max_vertical vertical_min_max_helper( :max ) end protected def vertical_min_max_helper( sym_which ) tmp = [] self.each do |a| next unless a.class == Bio::RestrictionEnzyme::Range::VerticalCutRange tmp << a.send( sym_which ) end z = (sym_which == :max) ? :last : :first tmp.flatten.sort.send(z) end end # CutRanges end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb0000644000004100000410000000341712200110570026761 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/horizontal_cut_range.rb - # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range class HorizontalCutRange < CutRange attr_reader :p_cut_left, :p_cut_right attr_reader :c_cut_left, :c_cut_right attr_reader :min, :max attr_reader :hcuts def initialize( left, right=left ) raise "left > right" if left > right # The 'range' here is actually off by one on the left # side in relation to a normal CutRange, so using the normal # variables from CutRange would result in bad behavior. # # See below - the first horizontal cut is the primary cut plus one. # # 1 2 3 4 5 6 7 # G A|T T A C A # +-----+ # C T A A T|G T # 1 2 3 4 5 6 7 # # Primary cut = 2 # Complement cut = 5 # Horizontal cuts = 3, 4, 5 @p_cut_left = nil @p_cut_right = nil @c_cut_left = nil @c_cut_right = nil @min = left # NOTE this used to be 'nil', make sure all tests work @max = right # NOTE this used to be 'nil', make sure all tests work @range = (@min..@max) unless @min == nil or @max == nil # NOTE this used to be 'nil', make sure all tests work @hcuts = (left..right) end # Check if a location falls within the minimum or maximum values of this # range. # # --- # *Arguments* # * +i+: Location to check if it is included in the range # *Returns*:: +true+ _or_ +false+ def include?(i) @range.include?(i) end end # HorizontalCutRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/sequence_range.rb0000644000004100000410000002254512200110570025530 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/sequence_range.rb - A defined range over a nucleotide sequence # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range autoload :CutRange, 'bio/util/restriction_enzyme/range/cut_range' autoload :CutRanges, 'bio/util/restriction_enzyme/range/cut_ranges' autoload :HorizontalCutRange, 'bio/util/restriction_enzyme/range/horizontal_cut_range' autoload :VerticalCutRange, 'bio/util/restriction_enzyme/range/vertical_cut_range' # A defined range over a nucleotide sequence. # # This class accomadates having cuts defined on a sequence and returning the # fragments made by those cuts. class SequenceRange autoload :Fragment, 'bio/util/restriction_enzyme/range/sequence_range/fragment' autoload :Fragments, 'bio/util/restriction_enzyme/range/sequence_range/fragments' autoload :CalculatedCuts, 'bio/util/restriction_enzyme/range/sequence_range/calculated_cuts' # Left-most index of primary strand attr_reader :p_left # Right-most index of primary strand attr_reader :p_right # Left-most index of complementary strand attr_reader :c_left # Right-most index of complementary strand attr_reader :c_right # Left-most index of DNA sequence attr_reader :left # Right-most index of DNA sequence attr_reader :right # Size of DNA sequence attr_reader :size # CutRanges in this SequenceRange attr_reader :cut_ranges def initialize( p_left = nil, p_right = nil, c_left = nil, c_right = nil ) raise ArgumentError if p_left == nil and c_left == nil raise ArgumentError if p_right == nil and c_right == nil (raise ArgumentError unless p_left <= p_right) unless p_left == nil or p_right == nil (raise ArgumentError unless c_left <= c_right) unless c_left == nil or c_right == nil @p_left, @p_right, @c_left, @c_right = p_left, p_right, c_left, c_right @left = [p_left, c_left].compact.sort.first @right = [p_right, c_right].compact.sort.last @size = (@right - @left) + 1 unless @left == nil or @right == nil @cut_ranges = CutRanges.new @__fragments_current = false end # If the first object is HorizontalCutRange or VerticalCutRange, that is # added to the SequenceRange. Otherwise this method # builds a VerticalCutRange object and adds it to the SequenceRange. # # Note: # Cut occurs immediately after the index supplied. # For example, a cut at '0' would mean a cut occurs between bases 0 and 1. # # --- # *Arguments* # * +p_cut_left+: (_optional_) Left-most cut on the primary strand *or* a CutRange object. +nil+ to skip # * +p_cut_right+: (_optional_) Right-most cut on the primary strand. +nil+ to skip # * +c_cut_left+: (_optional_) Left-most cut on the complementary strand. +nil+ to skip # * +c_cut_right+: (_optional_) Right-most cut on the complementary strand. +nil+ to skip # *Returns*:: nothing def add_cut_range( p_cut_left=nil, p_cut_right=nil, c_cut_left=nil, c_cut_right=nil ) @__fragments_current = false if p_cut_left.kind_of? CutRange # shortcut @cut_ranges << p_cut_left else [p_cut_left, p_cut_right, c_cut_left, c_cut_right].each { |n| (raise IndexError unless n >= @left and n <= @right) unless n == nil } @cut_ranges << VerticalCutRange.new( p_cut_left, p_cut_right, c_cut_left, c_cut_right ) end end # Add a series of CutRange objects (HorizontalCutRange or VerticalCutRange). # # --- # *Arguments* # * +cut_ranges+: A series of CutRange objects # *Returns*:: nothing def add_cut_ranges(*cut_ranges) cut_ranges.flatten.each do |cut_range| raise TypeError, "Not of type CutRange" unless cut_range.kind_of? CutRange self.add_cut_range( cut_range ) end end # Builds a HorizontalCutRange object and adds it to the SequenceRange. # # --- # *Arguments* # * +left+: Left-most cut # * +right+: (_optional_) Right side - by default this equals the left side, default is recommended. # *Returns*:: nothing def add_horizontal_cut_range( left, right=left ) @__fragments_current = false @cut_ranges << HorizontalCutRange.new( left, right ) end # A Bio::RestrictionEnzyme::Range::SequenceRange::Bin holds an +Array+ of # indexes for the primary and complement strands (+p+ and +c+ accessors). # # Example hash with Bin values: # {0=>#, # 2=>#, # 3=>#, # 4=>#} # # Note that the bin cannot be easily stored as a range since there may be # nucleotides excised in the middle of a range. # # TODO: Perhaps store the bins as one-or-many ranges since missing # nucleotides due to enzyme cutting is a special case. Bin = Struct.new(:c, :p) # Calculates the fragments over this sequence range as defined after using # the methods add_cut_range, add_cut_ranges, and/or add_horizontal_cut_range # # Example return value: # [#, # #, # #, # #] # # --- # *Arguments* # * _none_ # *Returns*:: Bio::RestrictionEnzyme::Range::SequenceRange::Fragments def fragments return @__fragments if @__fragments_current == true @__fragments_current = true num_txt = '0123456789' num_txt_repeat = (num_txt * ( @size.div(num_txt.size) + 1))[0..@size-1] fragments = Fragments.new(num_txt_repeat, num_txt_repeat) cc = Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts.new(@size) cc.add_cuts_from_cut_ranges(@cut_ranges) cc.remove_incomplete_cuts create_bins(cc).sort.each { |k, bin| fragments << Fragment.new( bin.p, bin.c ) } @__fragments = fragments return fragments end ######### protected ######### # Example: # cc = Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts.new(@size) # cc.add_cuts_from_cut_ranges(@cut_ranges) # cc.remove_incomplete_cuts # bins = create_bins(cc) # # Example return value: # {0=>#, # 2=>#, # 3=>#, # 4=>#} # # --- # *Arguments* # * +cc+: Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts # *Returns*:: +Hash+ Keys are unique, values are Bio::RestrictionEnzyme::Range::SequenceRange::Bin objects filled with indexes of the sequence locations they represent. def create_bins(cc) p_cut = cc.vc_primary_as_original_class c_cut = cc.vc_complement_as_original_class h_cut = cc.hc_between_strands_as_original_class if @circular # NOTE # if it's circular we should start at the beginning of a cut for orientation # scan for it, hack off the first set of hcuts and move them to the back unique_id = 0 else p_cut.unshift(-1) unless p_cut.include?(-1) c_cut.unshift(-1) unless c_cut.include?(-1) unique_id = -1 end p_bin_id = c_bin_id = unique_id bins = {} setup_new_bin(bins, unique_id) -1.upto(@size-1) do |idx| # NOTE - circular, for the future - should '-1' be replace with 'unique_id'? # if bin_ids are out of sync but the strands are attached if (p_bin_id != c_bin_id) and !h_cut.include?(idx) min_id, max_id = [p_bin_id, c_bin_id].sort bins.delete(max_id) p_bin_id = c_bin_id = min_id end bins[ p_bin_id ].p << idx bins[ c_bin_id ].c << idx if p_cut.include? idx p_bin_id = (unique_id += 1) setup_new_bin(bins, p_bin_id) end if c_cut.include? idx # repetition c_bin_id = (unique_id += 1) # repetition setup_new_bin(bins, c_bin_id) # repetition end # repetition end # Bin "-1" is an easy way to indicate the start of a strand just in case # there is a horizontal cut at position 0 bins.delete(-1) unless @circular bins end # Modifies bins in place by creating a new element with key bin_id and # initializing the bin. def setup_new_bin(bins, bin_id) bins[ bin_id ] = Bin.new bins[ bin_id ].p = DenseIntArray[] #could be replaced by SortedNumArray[] bins[ bin_id ].c = DenseIntArray[] #could be replaced by SortedNumArray[] end end # SequenceRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/cut_range.rb0000644000004100000410000000110312200110570024476 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/cut_range.rb - Abstract base class for HorizontalCutRange and VerticalCutRange # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range # Abstract base class for HorizontalCutRange and VerticalCutRange # class CutRange end # CutRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/sequence_range/0000755000004100000410000000000012200110570025173 5ustar www-datawww-databio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb0000644000004100000410000000164712200110570027516 0ustar www-datawww-data# # bio/util/restriction_enzyme/analysis/fragments.rb - # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range class SequenceRange class Fragments < Array attr_accessor :primary attr_accessor :complement def initialize(primary, complement) @primary = primary @complement = complement end DisplayFragment = Struct.new(:primary, :complement) def for_display(p_str=nil, c_str=nil) p_str ||= @primary c_str ||= @complement pretty_fragments = [] self.each { |fragment| pretty_fragments << fragment.for_display(p_str, c_str) } pretty_fragments end end # Fragments end # SequenceRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb0000644000004100000410000002203112200110570030655 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb - # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range class SequenceRange # cc = CalculatedCuts.new(@size) # cc.add_cuts_from_cut_ranges(@cut_ranges) # cc.remove_incomplete_cuts # # 1 2 3 4 5 6 7 # G A|T T A C A # +-----+ # C T A A T|G T # 1 2 3 4 5 6 7 # # Primary cut = 2 # Complement cut = 5 # Horizontal cuts = 3, 4, 5 # class CalculatedCuts include CutSymbol include StringFormatting # +Array+ of vertical cuts on the primary strand in 0-based index notation def vc_primary #$stderr.puts caller[0].inspect ###DEBUG @vc_primary.to_a end # Returns the same contents as vc_primary, but returns original data # structure used in the class. def vc_primary_as_original_class @vc_primary end # +Array+ of vertical cuts on the complementary strand in 0-based index notation def vc_complement #$stderr.puts caller[0].inspect ###DEBUG @vc_complement.to_a end # Returns the same contents as vc_complement, but returns original data # structure used in the class. def vc_complement_as_original_class @vc_complement end # +Array+ of horizontal cuts between strands in 0-based index notation def hc_between_strands #$stderr.puts caller[0].inspect ###DEBUG @hc_between_strands.to_a end # Returns the same contents as hc_between_strands, but returns original data # structure used in the class. def hc_between_strands_as_original_class @hc_between_strands end # Set to +true+ if the fragment CalculatedCuts is working on is circular attr_accessor :circular #-- ## An +Array+ with the primary strand with vertical cuts, the horizontal cuts, and the complementary strand with vertical cuts. #attr_reader :strands_for_display #++ # If +false+ the strands_for_display method needs to be called to update the contents # of @strands_for_display. Becomes out of date whenever add_cuts_from_cut_ranges is called. attr_reader :strands_for_display_current # Size of the sequence being digested. attr_reader :size def initialize(size=nil, circular=false) @size = size @circular = circular @vc_primary = SortedNumArray[] @vc_complement = SortedNumArray[] @hc_between_strands = SortedNumArray[] end # Accepts an +Array+ of CutRange type objects and applies them to # @vc_complement, @vc_primary, and @hc_between_strands. # # --- # *Arguments* # * +cut_ranges+: An +Array+ of HorizontalCutRange or VerticalCutRange objects # *Returns*:: nothing def add_cuts_from_cut_ranges(cut_ranges) @strands_for_display_current = false @vc_primary = @vc_primary.dup @vc_complement = @vc_complement.dup cut_ranges.each do |cut_range| @vc_primary.concat [cut_range.p_cut_left, cut_range.p_cut_right] @vc_complement.concat [cut_range.c_cut_left, cut_range.c_cut_right] # Add horizontal cut ranges. This may happen from cuts made inbetween a # VerticalCutRange or may be specifically defined by a HorizontalCutRange. if cut_range.class == VerticalCutRange ( cut_range.min + 1 ).upto( cut_range.max ){|i| @hc_between_strands << i} if cut_range.min < cut_range.max elsif cut_range.class == HorizontalCutRange ( cut_range.hcuts.first ).upto( cut_range.hcuts.last ){|i| @hc_between_strands << i} end end clean_all #return end # There may be incomplete cuts made, this method removes the cuts that don't # create sub-sequences for easier processing. # # For example, stray horizontal cuts that do not end with a left # and right separation: # # G A T T A C A # +-- --- # C T|A A T G T # # Or stray vertical cuts: # # G A T T A C A # +-- + # C T|A A T|G T # # However note that for non-circular sequences this would be a successful # cut which would result in a floating 'GT' sub-sequence: # # G A T T A C A # +--- # C T A A T|G T # # Blunt cuts are also complete cuts. # --- # *Arguments* # * +size+: (_optional_) Size of the sequence being digested. Defined here or during initalization of CalculatedCuts. # *Returns*:: nothing def remove_incomplete_cuts(size=nil) @strands_for_display_current = false @size = size if size raise IndexError, "Size of the strand must be provided here or during initalization." if !@size.kind_of?(Fixnum) and not @circular vcuts = @vc_primary + @vc_complement hcuts = @hc_between_strands last_index = @size - 1 good_hcuts = SortedNumArray[] potential_hcuts = [] if @circular # NOTE # if it's circular we should start at the beginning of a cut for orientation, # scan for it, hack off the first set of hcuts and move them to the back else vcuts.unshift(-1) unless vcuts.include?(-1) vcuts.push(last_index) unless vcuts.include?(last_index) end hcuts.each do |hcut| raise IndexError if hcut < -1 or hcut > last_index # skipped a nucleotide potential_hcuts.clear if !potential_hcuts.empty? and (hcut - potential_hcuts.last).abs > 1 if potential_hcuts.empty? if vcuts.include?( hcut ) and vcuts.include?( hcut - 1 ) good_hcuts << hcut elsif vcuts.include?( hcut - 1 ) potential_hcuts << hcut end else if vcuts.include?( hcut ) good_hcuts.concat(potential_hcuts) good_hcuts << hcut potential_hcuts.clear else potential_hcuts << hcut end end end check_vc = lambda do |vertical_cuts, opposing_vcuts| # opposing_vcuts is here only to check for blunt cuts, so there shouldn't # be any out-of-order problems with this good_vc = SortedNumArray[] vertical_cuts.each { |vc| good_vc << vc if good_hcuts.include?( vc ) or good_hcuts.include?( vc + 1 ) or opposing_vcuts.include?( vc ) } good_vc end @vc_primary = check_vc.call(@vc_primary, @vc_complement) @vc_complement = check_vc.call(@vc_complement, @vc_primary) @hc_between_strands = good_hcuts clean_all end # Sets @strands_for_display_current to +true+ and populates @strands_for_display. # # --- # *Arguments* # * +str1+: (_optional_) For displaying a primary strand. If +nil+ a numbered sequence will be used in place. # * +str2+: (_optional_) For displaying a complementary strand. If +nil+ a numbered sequence will be used in place. # * +vcp+: (_optional_) An array of vertical cut locations on the primary strand. If +nil+ the contents of @vc_primary is used. # * +vcc+: (_optional_) An array of vertical cut locations on the complementary strand. If +nil+ the contents of @vc_complementary is used. # * +hc+: (_optional_) An array of horizontal cut locations between strands. If +nil+ the contents of @hc_between_strands is used. # *Returns*:: +Array+ An array with the primary strand with vertical cuts, the horizontal cuts, and the complementary strand with vertical cuts. # def strands_for_display(str1 = nil, str2 = nil, vcp=nil, vcc=nil, hc=nil) return @strands_for_display if @strands_for_display_current vcs = '|' # Vertical cut symbol hcs = '-' # Horizontal cut symbol vhcs = '+' # Intersection of vertical and horizontal cut symbol num_txt_repeat = lambda { num_txt = '0123456789'; (num_txt * (@size.div(num_txt.size) + 1))[0..@size-1] } (str1 == nil) ? a = num_txt_repeat.call : a = str1.dup (str2 == nil) ? b = num_txt_repeat.call : b = str2.dup if vcp and !vcp.is_a?(SortedNumArray) then vcp = SortedNumArray.new.concat(vcp) end if vcc and !vcc.is_a?(SortedNumArray) then vcc = SortedNumArray.new.concat(vcc) end if hc and !hc.is_a?(SortedNumArray) then hc = SortedNumArray.new.concat(hc) end vcp = @vc_primary if vcp==nil vcc = @vc_complement if vcc==nil hc = @hc_between_strands if hc==nil vcp.reverse_each { |c| a.insert(c+1, vcs) } vcc.reverse_each { |c| b.insert(c+1, vcs) } between = ' ' * @size hc.each {|hcut| between[hcut,1] = hcs } s_a = add_spacing(a, vcs) s_b = add_spacing(b, vcs) s_bet = add_spacing(between) # NOTE watch this for circular i = 0 0.upto( s_a.size-1 ) do if (s_a[i,1] == vcs) or (s_b[i,1] == vcs) s_bet[i] = vhcs elsif i != 0 and s_bet[i-1,1] == hcs and s_bet[i+1,1] == hcs s_bet[i] = hcs end i+=1 end @strands_for_display_current = true @strands_for_display = [s_a, s_bet, s_b] end ######### protected ######### # remove nil values, remove duplicate values, and # sort @vc_primary, @vc_complement, and @hc_between_strands def clean_all [@vc_primary, @vc_complement, @hc_between_strands].collect { |a| a.delete(nil); a.uniq!; a.sort! } end end # CalculatedCuts end # SequenceRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb0000644000004100000410000000237412200110570027331 0ustar www-datawww-data# # bio/util/restriction_enzyme/range/sequence_range/fragment.rb - # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Range class SequenceRange class Fragment attr_reader :size def initialize( primary_bin, complement_bin ) @primary_bin = primary_bin @complement_bin = complement_bin end DisplayFragment = Struct.new(:primary, :complement, :p_left, :p_right, :c_left, :c_right) def for_display(p_str=nil, c_str=nil) df = DisplayFragment.new df.primary = '' df.complement = '' both_bins = @primary_bin + @complement_bin both_bins.each do |item| @primary_bin.include?(item) ? df.primary << p_str[item] : df.primary << ' ' @complement_bin.include?(item) ? df.complement << c_str[item] : df.complement << ' ' end df.p_left = @primary_bin.first df.p_right = @primary_bin.last df.c_left = @complement_bin.first df.c_right = @complement_bin.last df end end # Fragment end # SequenceRange end # Range end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/single_strand.rb0000644000004100000410000001523312200110570024300 0ustar www-datawww-data# # bio/util/restriction_enzyme/single_strand.rb - Single strand of a restriction enzyme sequence # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # require 'bio/sequence' module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme # A single strand of restriction enzyme sequence pattern with a 5' to 3' # orientation. # # DoubleStranded puts the SingleStrand and SingleStrandComplement together to # create the sequence pattern with cuts on both strands. # class SingleStrand < Bio::Sequence::NA autoload :CutLocationsInEnzymeNotation, 'bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation' include CutSymbol include StringFormatting # The cut locations in enzyme notation. Contains a # CutLocationsInEnzymeNotation object set when the SingleStrand # object is initialized. attr_reader :cut_locations_in_enzyme_notation # The cut locations transformed from enzyme index notation to 0-based # array index notation. Contains an Array. attr_reader :cut_locations # Orientation of the strand, 5' to 3' def orientation; [5,3]; end # Constructor for a Bio::RestrictionEnzyme::StingleStrand object. # # A single strand of restriction enzyme sequence pattern with a 5' to 3' orientation. # # --- # *Arguments* # * +sequence+: (_required_) The enzyme sequence. # * +c+: (_optional_) Cut locations in enzyme notation. # See Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation # # *Constraints* # * +sequence+ cannot contain immediately adjacent cut symbols (ex. atg^^c). # * +c+ is in enzyme index notation and therefore cannot contain a 0. # * If +c+ is omitted, +sequence+ must contain a cut symbol. # * You cannot provide both a sequence with cut symbols and provide cut locations - ambiguous. # # +sequence+ must be a kind of: # * String # * Bio::Sequence::NA # * Bio::RestrictionEnzyme::SingleStrand # # +c+ must be a kind of: # * Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation # * Integer, one or more # * Array # # *Returns*:: nothing def initialize( sequence, *c ) c.flatten! # if an array was supplied as an argument # NOTE t| 2009-09-19 commented out for library efficiency # validate_args(sequence, c) sequence = sequence.downcase if sequence =~ re_cut_symbol @cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( strip_padding(sequence) ) else @cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( c ) end @stripped = Bio::Sequence::NA.new( strip_cuts_and_padding( sequence ) ) super( pattern ) @cut_locations = @cut_locations_in_enzyme_notation.to_array_index return end # Returns true if this enzyme is palindromic with its reverse complement. # Does not report if the +cut_locations+ are palindromic or not. # # Examples: # * This would be palindromic: # 5' - ATGCAT - 3' # TACGTA # # * This would not be palindromic: # 5' - ATGCGTA - 3' # TACGCAT # # --- # *Arguments* # * _none_ # *Returns*:: +true+ _or_ +false+ def palindromic? @stripped.reverse_complement == @stripped end # Sequence pattern with no cut symbols and no 'n' padding. # * SingleStrand.new('garraxt', [-2, 1, 7]).stripped # => "garraxt" attr_reader :stripped # The sequence with 'n' padding and cut symbols. # * SingleStrand.new('garraxt', [-2, 1, 7]).with_cut_symbols # => "n^ng^arraxt^n" # # --- # *Arguments* # * _none_ # *Returns*:: The sequence with 'n' padding and cut symbols. def with_cut_symbols s = pattern @cut_locations_in_enzyme_notation.to_array_index.sort.reverse.each { |c| s.insert(c+1, cut_symbol) } s end # The sequence with 'n' padding on the left and right for cuts larger than the sequence. # * SingleStrand.new('garraxt', [-2, 1, 7]).pattern # => "nngarraxtn" # # --- # *Arguments* # * _none_ # *Returns*:: The sequence with 'n' padding on the left and right for cuts larger than the sequence. def pattern return stripped if @cut_locations_in_enzyme_notation.min == nil left = (@cut_locations_in_enzyme_notation.min < 0 ? 'n' * @cut_locations_in_enzyme_notation.min.abs : '') # Add one more 'n' if a cut is at the last position right = ( (@cut_locations_in_enzyme_notation.max >= @stripped.length) ? ('n' * (@cut_locations_in_enzyme_notation.max - @stripped.length + 1)) : '') [left, stripped, right].join('') end # The sequence with 'n' pads, cut symbols, and spacing for alignment. # * SingleStrand.new('garraxt', [-2, 1, 7]).with_spaces # => "n^n g^a r r a x t^n" # # --- # *Arguments* # * _none_ # *Returns*:: The sequence with 'n' pads, cut symbols, and spacing for alignment. def with_spaces add_spacing( with_cut_symbols ) end ######### protected ######### def validate_args( input_pattern, input_cut_locations ) unless input_pattern.kind_of?(String) err = "input_pattern is not a String, Bio::Sequence::NA, or Bio::RestrictionEnzyme::SingleStrand object\n" err += "pattern: #{input_pattern}\n" err += "class: #{input_pattern.class}" raise ArgumentError, err end if ( input_pattern =~ re_cut_symbol ) and !input_cut_locations.empty? err = "Cut symbol found in sequence, but cut locations were also supplied. Ambiguous.\n" err += "pattern: #{input_pattern}\n" err += "symbol: #{cut_symbol}\n" err += "locations: #{input_cut_locations.inspect}" raise ArgumentError, err end input_pattern.each_byte do |c| c = c.chr.downcase unless Bio::NucleicAcid::NAMES.has_key?(c) or c == 'x' or c == 'X' or c == cut_symbol err = "Invalid character in pattern.\n" err += "Not a nucleotide or representation of possible nucleotides. See Bio::NucleicAcid::NAMES for more information.\n" err += "char: #{c}\n" err += "input_pattern: #{input_pattern}" raise ArgumentError, err end end end # Tadayoshi Funaba's method as discussed in Programming Ruby 2ed, p390 def self.once(*ids) for id in ids module_eval <<-"end;" alias_method :__#{id.__id__}__, :#{id.to_s} private :__#{id.__id__}__ def #{id.to_s}(*args, &block) (@__#{id.__id__}__ ||= [__#{id.__id__}__(*args, &block)])[0] end end; end end private_class_method :once once :pattern, :with_cut_symbols, :with_spaces, :to_re end # SingleStrand end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/single_strand/0000755000004100000410000000000012200110570023747 5ustar www-datawww-databio-1.4.3.0001/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb0000644000004100000410000000767112200110570033145 0ustar www-datawww-data# # bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb - The cut locations, in enzyme notation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class SingleStrand # Stores the cut location in thier enzyme index notation # # May be initialized with a series of cuts or an enzyme pattern marked # with cut symbols. # # Enzyme index notation:: 1.._n_, value before 1 is -1 # # example:: [-3][-2][-1][1][2][3][4][5] # # Negative values are used to indicate when a cut may occur at a specified # distance before the sequence begins. This would be padded with 'n' # nucleotides to represent wildcards. # # Notes: # * 0 is invalid as it does not refer to any index # * +nil+ is not allowed here as it has no meaning # * +nil+ values are kept track of in DoubleStranded::CutLocations as they # need a reference point on the correlating strand. In # DoubleStranded::CutLocations +nil+ represents no cut or a partial # digestion. # class CutLocationsInEnzymeNotation < Array include CutSymbol extend CutSymbol # First cut, in enzyme-index notation attr_reader :min # Last cut, in enzyme-index notation attr_reader :max # Constructor for CutLocationsInEnzymeNotation # # --- # *Arguments* # * +a+: Locations of cuts represented as a string with cuts or an array of values # Examples: # * n^ng^arraxt^n # * 2 # * -1, 5 # * [-1, 5] # *Returns*:: nothing def initialize(*a) a.flatten! # in case an array was passed as an argument if a.size == 1 and a[0].kind_of? String and a[0] =~ re_cut_symbol # Initialize with a cut symbol pattern such as 'n^ng^arraxt^n' s = a[0] a = [] i = -( s.tr(cut_symbol, '') =~ %r{[^n]} ) # First character that's not 'n' s.each_byte { |c| (a << i; next) if c.chr == cut_symbol; i += 1 } a.collect! { |n| n <= 0 ? n-1 : n } # 0 is not a valid enzyme index, decrement from 0 and all negative else a.collect! { |n| n.to_i } # Cut locations are always integers end validate_cut_locations( a ) super(a) self.sort! @min = self.first @max = self.last self.freeze end # Transform the cut locations from enzyme index notation to 0-based index # notation. # # input -> output # [ 1, 2, 3 ] -> [ 0, 1, 2 ] # [ 1, 3, 5 ] -> [ 0, 2, 4 ] # [ -1, 1, 2 ] -> [ 0, 1, 2 ] # [ -2, 1, 3 ] -> [ 0, 2, 4 ] # # --- # *Arguments* # * _none_ # *Returns*:: +Array+ of cuts in 0-based index notation def to_array_index return [] if @min == nil if @min < 0 calc = lambda do |n| n -= 1 unless n < 0 n + @min.abs end else calc = lambda { |n| n - 1 } end self.collect(&calc) end ######### protected ######### def validate_cut_locations( input_cut_locations ) unless input_cut_locations == input_cut_locations.uniq err = "The cut locations supplied contain duplicate values. Redundant / undefined meaning.\n" err += "cuts: #{input_cut_locations.inspect}\n" err += "unique: #{input_cut_locations.uniq.inspect}" raise ArgumentError, err end if input_cut_locations.include?(nil) err = "The cut locations supplied contained a nil. nil has no index for enzyme notation, alternative meaning is 'no cut'.\n" err += "cuts: #{input_cut_locations.inspect}" raise ArgumentError, err end if input_cut_locations.include?(0) err = "The cut locations supplied contained a '0'. '0' has no index for enzyme notation, alternative meaning is 'no cut'.\n" err += "cuts: #{input_cut_locations.inspect}" raise ArgumentError, err end end end # CutLocationsInEnzymeNotation end # SingleStrand end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/0000755000004100000410000000000012200110570024251 5ustar www-datawww-data././@LongLink0000000000000000000000000000014700000000000011567 Lustar rootrootbio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rbbio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.0000644000004100000410000000175712200110570033752 0ustar www-datawww-data# # bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb - Inherits from DoubleStranded::CutLocationPair # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class DoubleStranded # Inherits from DoubleStranded::CutLocationPair , stores the cut location pair in # enzyme notation instead of 0-based. # class CutLocationPairInEnzymeNotation < CutLocationPair ######### protected ######### def validate_2( a, b ) if (a == 0) or (b == 0) raise ArgumentError, "Enzyme index notation only. 0 values are illegal." end if a == nil and b == nil raise ArgumentError, "Neither strand has a cut. Ambiguous." end end end # CutLocationPair end # DoubleStranded end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb0000644000004100000410000000337512200110570027454 0ustar www-datawww-data# # bio/util/restriction_enzyme/double_stranded/cut_locations.rb - Contains an Array of CutLocationPair objects # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class DoubleStranded # Contains an +Array+ of CutLocationPair objects. # class CutLocations < Array # CutLocations constructor. # # Contains an +Array+ of CutLocationPair objects. # # Example: # clp1 = CutLocationPair.new(3,2) # clp2 = CutLocationPair.new(7,9) # pairs = CutLocations.new(clp1, clp2) # # --- # *Arguments* # * +args+: Any number of +CutLocationPair+ objects # *Returns*:: nothing def initialize(*args) validate_args(args) super(args) end # Returns an +Array+ of locations of cuts on the primary strand # # --- # *Arguments* # * _none_ # *Returns*:: +Array+ of locations of cuts on the primary strand def primary self.collect {|a| a[0]} end # Returns an +Array+ of locations of cuts on the complementary strand # # --- # *Arguments* # * _none_ # *Returns*:: +Array+ of locations of cuts on the complementary strand def complement self.collect {|a| a[1]} end ######### protected ######### def validate_args(args) args.each do |a| unless a.class == Bio::RestrictionEnzyme::DoubleStranded::CutLocationPair err = "Not a CutLocationPair\n" err += "class: #{a.class}\n" err += "inspect: #{a.inspect}" raise ArgumentError, err end end end end # CutLocations end # DoubleStranded end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb0000644000004100000410000000621412200110570033437 0ustar www-datawww-data# # bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb - Inherits from DoubleStrand::CutLocations # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class DoubleStranded # Inherits from DoubleStranded::CutLocations. Contains CutLocationPairInEnzymeNotation objects. # Adds helper methods to convert from enzyme index notation to 0-based array index notation. # class CutLocationsInEnzymeNotation < CutLocations # Returns +Array+ of locations of cuts on the primary # strand in 0-based array index notation. # # --- # *Arguments* # * _none_ # *Returns*:: +Array+ of locations of cuts on the primary strand in 0-based array index notation. def primary_to_array_index helper_for_to_array_index(self.primary) end # Returns +Array+ of locations of cuts on the complementary # strand in 0-based array index notation. # # --- # *Arguments* # * _none_ # *Returns*:: +Array+ of locations of cuts on the complementary strand in 0-based array index notation. def complement_to_array_index helper_for_to_array_index(self.complement) end # Returns the contents of the present CutLocationsInEnzymeNotation object as # a CutLocations object with the contents converted from enzyme notation # to 0-based array index notation. # # --- # *Arguments* # * _none_ # *Returns*:: +CutLocations+ def to_array_index unless self.primary_to_array_index.size == self.complement_to_array_index.size err = "Primary and complement strand cut locations are not available in equal numbers.\n" err += "primary: #{self.primary_to_array_index.inspect}\n" err += "primary.size: #{self.primary_to_array_index.size}\n" err += "complement: #{self.complement_to_array_index.inspect}\n" err += "complement.size: #{self.complement_to_array_index.size}" raise IndexError, err end a = self.primary_to_array_index.zip(self.complement_to_array_index) CutLocations.new( *a.collect {|cl| CutLocationPair.new(cl)} ) end ######### protected ######### def helper_for_to_array_index(a) minimum = (self.primary + self.complement).flatten minimum.delete(nil) minimum = minimum.sort.first return [] if minimum == nil # no elements if minimum < 0 calc = lambda do |n| unless n == nil n -= 1 unless n < 0 n += minimum.abs end n end else calc = lambda do |n| n -= 1 unless n == nil n end end a.collect(&calc) end def validate_args(args) args.each do |a| unless a.class == Bio::RestrictionEnzyme::DoubleStranded::CutLocationPairInEnzymeNotation err = "Not a CutLocationPairInEnzymeNotation\n" err += "class: #{a.class}\n" err += "inspect: #{a.inspect}" raise TypeError, err end end end end # CutLocationsInEnzymeNotation end # DoubleStranded end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb0000644000004100000410000000560712200110570030304 0ustar www-datawww-data# # bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb - Stores a cut location pair in 0-based index notation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class DoubleStranded # Stores a single cut location pair in 0-based index notation for use with # DoubleStranded enzyme sequences. # class CutLocationPair < Array # Location of the cut on the primary strand. # Corresponds - or 'pairs' - to the complement cut. # A value of +nil+ is an explicit representation of 'no cut'. attr_reader :primary # Location of the cut on the complementary strand. # Corresponds - or 'pairs' - to the primary cut. # A value of +nil+ is an explicit representation of 'no cut'. attr_reader :complement # CutLocationPair constructor. # # Stores a single cut location pair in 0-based index notation for use with # DoubleStranded enzyme sequences. # # Example: # clp = CutLocationPair.new(3,2) # clp.primary # 3 # clp.complement # 2 # # --- # *Arguments* # * +pair+: May be two values represented as an Array, a Range, or a # combination of Integer and nil values. The first value # represents a cut on the primary strand, the second represents # a cut on the complement strand. # *Returns*:: nothing def initialize( *pair ) a = b = nil if pair[0].kind_of? Array a,b = init_with_array( pair[0] ) # no idea why this barfs without the second half during test/runner.rb # are there two Range objects running around? elsif pair[0].kind_of? Range or (pair[0].class.to_s == 'Range') #elsif pair[0].kind_of? Range a,b = init_with_array( [pair[0].first, pair[0].last] ) elsif pair[0].kind_of? Integer or pair[0].kind_of? NilClass a,b = init_with_array( [pair[0], pair[1]] ) else raise ArgumentError, "#{pair[0].class} is an invalid class type to initalize CutLocationPair." end super( [a,b] ) @primary = a @complement = b return end ######### protected ######### def init_with_array( ary ) validate_1(ary) a = ary.shift ary.empty? ? b = nil : b = ary.shift validate_2(a,b) [a,b] end def validate_1( ary ) unless ary.size == 1 or ary.size == 2 raise ArgumentError, "Must be one or two elements." end end def validate_2( a, b ) if (a != nil and a < 0) or (b != nil and b < 0) raise ArgumentError, "0-based index notation only. Negative values are illegal." end if a == nil and b == nil raise ArgumentError, "Neither strand has a cut. Ambiguous." end end end # CutLocationPair end # DoubleStranded end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb0000644000004100000410000001031412200110570027736 0ustar www-datawww-data# # bio/util/restriction_enzyme/double_stranded/aligned_strands.rb - Align two SingleStrand objects # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class DoubleStranded # Align two SingleStrand objects and return a Result # object with +primary+ and +complement+ accessors. # class AlignedStrands extend CutSymbol extend StringFormatting # Creates a new object. # --- # *Returns*:: Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands object def initialize; super; end # The object returned for alignments Result = Struct.new(:primary, :complement) # Pad and align two String objects without cut symbols. # # This will look for the sub-sequence without left and right 'n' padding # and re-apply 'n' padding to both strings on both sides equal to the # maximum previous padding on that side. # # The sub-sequences stripped of left and right 'n' padding must be of equal # length. # # Example: # AlignedStrands.align('nngattacannnnn', 'nnnnnctaatgtnn') # => # # # --- # *Arguments* # * +a+: Primary strand # * +b+: Complementary strand # *Returns*:: +Result+ object with equal padding on both strings def self.align(a, b) a = a.to_s b = b.to_s validate_input( strip_padding(a), strip_padding(b) ) left = [left_padding(a), left_padding(b)].sort.last right = [right_padding(a), right_padding(b)].sort.last p = left + strip_padding(a) + right c = left + strip_padding(b) + right Result.new(p,c) end # Pad and align two String objects with cut symbols. # # Example: # AlignedStrands.with_cuts('nngattacannnnn', 'nnnnnctaatgtnn', [0, 10, 12], [0, 2, 12]) # => # # # Notes: # * To make room for the cut symbols each nucleotide is spaced out. # * This is meant to be able to handle multiple cuts and completely # unrelated cutsites on the two strands, therefore no biological # algorithm assumptions (shortcuts) are made. # # The sequences stripped of left and right 'n' padding must be of equal # length. # # --- # *Arguments* # * +a+: Primary sequence # * +b+: Complementary sequence # * +a_cuts+: Primary strand cut locations in 0-based index notation # * +b_cuts+: Complementary strand cut locations in 0-based index notation # *Returns*:: +Result+ object with equal padding on both strings and spacing between bases def self.align_with_cuts(a,b,a_cuts,b_cuts) a = a.to_s b = b.to_s validate_input( strip_padding(a), strip_padding(b) ) a_left, a_right = left_padding(a), right_padding(a) b_left, b_right = left_padding(b), right_padding(b) left_diff = a_left.length - b_left.length right_diff = a_right.length - b_right.length (right_diff > 0) ? (b_right += 'n' * right_diff) : (a_right += 'n' * right_diff.abs) a_adjust = b_adjust = 0 if left_diff > 0 b_left += 'n' * left_diff b_adjust = left_diff else a_left += 'n' * left_diff.abs a_adjust = left_diff.abs end a = a_left + strip_padding(a) + a_right b = b_left + strip_padding(b) + b_right a_cuts.sort.reverse.each { |c| a.insert(c+1+a_adjust, cut_symbol) } b_cuts.sort.reverse.each { |c| b.insert(c+1+b_adjust, cut_symbol) } Result.new( add_spacing(a), add_spacing(b) ) end ######### protected ######### def self.validate_input(a,b) unless a.size == b.size err = "Result sequences are not the same size. Does not align sequences with differing lengths after strip_padding.\n" err += "#{a.size}, #{a.inspect}\n" err += "#{b.size}, #{b.inspect}" raise ArgumentError, err end end end # AlignedStrands end # DoubleStranded end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/dense_int_array.rb0000644000004100000410000001041612200110570024610 0ustar www-datawww-data# # bio/util/restriction_enzyme/dense_int_array.rb - Internal data storage for Bio::RestrictionEnzyme::Range::SequenceRange # # Copyright:: Copyright (C) 2011 # Naohisa Goto # Tomoaki NISHIYAMA # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme # a class to store integer numbers, containing many contiguous # integral numbers. # # Bio::RestrictionEnzyme internal use only. # Please do not create the instance outside Bio::RestrictionEnzyme. class DenseIntArray MutableRange = Struct.new(:first, :last) include Enumerable # Same usage as Array.[] def self.[](*args) a = self.new args.each do |elem| a.push elem end a end # creates a new object def initialize @data = [] end # initialize copy def initialize_copy(other) super(other) @data = @data.collect { |elem| elem.dup } end # sets internal data object def internal_data=(a) #clear_cache @data = a self end protected :internal_data= # gets internal data object def internal_data @data end protected :internal_data # Same usage as Array#[] def [](*arg) #$stderr.puts "SortedIntArray#[]" to_a[*arg] end # Not implemented def []=(*arg) raise NotImplementedError, 'DenseIntArray#[]= is not implemented.' end # Same usage as Array#each def each @data.each do |elem| elem.first.upto(elem.last) { |num| yield num } end self end # Same usage as Array#reverse_each def reverse_each @data.reverse_each do |elem| elem.last.downto(elem.first) { |num| yield num } end self end # Same usage as Array#+, but accepts only the same classes instance. def +(other) unless other.is_a?(self.class) then raise TypeError, 'unsupported data type' end tmpdata = @data + other.internal_data tmpdata.sort! { |a,b| a.first <=> b.first } result = self.class.new return result if tmpdata.empty? newdata = result.internal_data newdata.push tmpdata[0].dup (1...(tmpdata.size)).each do |i| if (x = newdata[-1].last) >= tmpdata[i].first then newdata[-1].last = tmpdata[i].last if tmpdata[i].last > x else newdata.push tmpdata[i].dup end end result end # Same usage as Array#== def ==(other) if r = super(other) then r elsif other.is_a?(self.class) then other.internal_data == @data else false end end # Same usage as Array#concat def concat(ary) ary.each { |elem| self.<<(elem) } self end # Same usage as Array#push def push(*args) args.each do |elem| self.<<(elem) end self end # Same usage as Array#unshift def unshift(*arg) raise NotImplementedError, 'DenseIntArray#unshift is not implemented.' end # Same usage as Array#<< def <<(elem) if !@data.empty? and @data[-1].last + 1 == elem then @data[-1].last = elem else @data << MutableRange.new(elem, elem) end self end # Same usage as Array#include? def include?(elem) return false if @data.empty? or elem < self.first or self.last < elem @data.any? do |range| range.first <= elem && elem <= range.last end end # Same usage as Array#first def first elem = @data.first elem ? elem.first : nil end # Same usage as Array#last def last elem = @data.last elem ? elem.last : nil end # Same usage as Array#size def size sum = 0 @data.each do |range| sum += (range.last - range.first + 1) end sum end alias length size # Same usage as Array#delete def delete(elem) raise NotImplementedError, 'DenseIntArray#delete is not implemented.' end # Does nothing def sort!(&block) # does nothing self end # Does nothing def uniq! # does nothing self end end #class DenseIntArray end #class RestrictionEnzyme end #module Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/analysis.rb0000644000004100000410000002322112200110570023263 0ustar www-datawww-data# # bio/util/restriction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Analysis #-- # require "analysis_basic.rb" here to avoid cyclic require #++ require 'bio/util/restriction_enzyme/analysis_basic' # See cut instance method def self.cut( sequence, *args ) self.new.cut( sequence, *args ) end # See main documentation for Bio::RestrictionEnzyme # # # +cut+ takes into account # permutations of cut variations based on competitiveness of enzymes for an # enzyme cutsite or enzyme bindsite on a sequence. # # Example: # # FIXME add output # # Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'EcoRI') # # _same as:_ # # Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'g^aattc') # --- # *Arguments* # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence. # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects. # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) or a +Symbol+ containing an error code def cut( sequence, *args ) view_ranges = false args.select { |i| i.class == Hash }.each do |hsh| hsh.each do |key, value| if key == :view_ranges unless ( value.kind_of?(TrueClass) or value.kind_of?(FalseClass) ) raise ArgumentError, "view_ranges must be set to true or false, currently #{value.inspect}." end view_ranges = value end end end res = cut_and_return_by_permutations( sequence, *args ) return res if res.class == Symbol # Format the fragments for the user fragments_for_display( res, view_ranges ) end ######### protected ######### # See cut instance method # # --- # *Arguments* # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence. # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects. # May also supply a +Hash+ with the key ":max_permutations" to specificy how many permutations are allowed - a value of 0 indicates no permutations are allowed. # *Returns*:: +Hash+ Keys are a permutation ID, values are SequenceRange objects that have cuts applied. # _also_ may return the +Symbol+ ':sequence_empty', ':no_cuts_found', or ':too_many_permutations' def cut_and_return_by_permutations( sequence, *args ) my_hash = {} maximum_permutations = nil hashes_in_args = args.select { |i| i.class == Hash } args.delete_if { |i| i.class == Hash } hashes_in_args.each do |hsh| hsh.each do |key, value| case key when :max_permutations, 'max_permutations', :maximum_permutations, 'maximum_permutations' maximum_permutations = value.to_i unless value == nil when :view_ranges else raise ArgumentError, "Received key #{key.inspect} in argument - I only know the key ':max_permutations' and ':view_ranges' currently. Hash passed: #{hsh.inspect}" end end end if !sequence.kind_of?(String) or sequence.empty? logger.warn "The supplied sequence is empty." if defined?(logger) return :sequence_empty end sequence = Bio::Sequence::NA.new( sequence ) enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args ) if enzyme_actions.empty? and initial_cuts.empty? logger.warn "This enzyme does not make any cuts on this sequence." if defined?(logger) return :no_cuts_found end # * When enzyme_actions.size is equal to '1' that means there are no permutations. # * If enzyme_actions.size is equal to '2' there is one # permutation ("[0, 1]") # * If enzyme_actions.size is equal to '3' there are two # permutations ("[0, 1, 2]") # * and so on.. if maximum_permutations and enzyme_actions.size > 1 if (enzyme_actions.size - 1) > maximum_permutations.to_i logger.warn "More permutations than maximum, skipping. Found: #{enzyme_actions.size-1} Max: #{maximum_permutations.to_i}" if defined?(logger) return :too_many_permutations end end if enzyme_actions.size > 1 permutations = permute(enzyme_actions.size) permutations.each do |permutation| previous_cut_ranges = [] # Primary and complement strands are both measured from '0' to 'sequence.size-1' here sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 ) # Add the cuts to the sequence_range from each enzyme_action contained # in initial_cuts. These are the cuts that have no competition so are # not subject to permutations. initial_cuts.each do |enzyme_action| enzyme_action.cut_ranges.each do |cut_range| sequence_range.add_cut_range(cut_range) end end permutation.each do |id| enzyme_action = enzyme_actions[id] # conflict is false if the current enzyme action may cut in it's range. # conflict is true if it cannot due to a previous enzyme action making # a cut where this enzyme action needs a whole recognition site. conflict = false # If current size of enzyme_action overlaps with previous cut_range, don't cut # note that the enzyme action may fall in the middle of a previous enzyme action # so all cut locations must be checked that would fall underneath. previous_cut_ranges.each do |cut_range| next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts previous_cut_left = cut_range.range.first previous_cut_right = cut_range.range.last # Keep in mind: # * The cut location is to the immediate right of the base located at the index. # ex: at^gc -- the cut location is at index 1 # * The enzyme action location is located at the base of the index. # ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c' # method create_enzyme_actions has similar commentary if interested if (enzyme_action.right <= previous_cut_left) or (enzyme_action.left > previous_cut_right) or (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts # no conflict else conflict = true end end next if conflict == true enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } previous_cut_ranges += enzyme_action.cut_ranges end # permutation.each # Fill in the source sequence for sequence_range so it knows what bases # to use sequence_range.fragments.primary = sequence sequence_range.fragments.complement = sequence.forward_complement my_hash[permutation] = sequence_range end # permutations.each else # if enzyme_actions.size == 1 # no permutations, just do it sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 ) initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } } sequence_range.fragments.primary = sequence sequence_range.fragments.complement = sequence.forward_complement my_hash[0] = sequence_range end my_hash end # Returns permutation orders for a given number of elements. # # Examples: # permute(0) # => [[0]] # permute(1) # => [[0]] # permute(2) # => [[1, 0], [0, 1]] # permute(3) # => [[2, 1, 0], [2, 0, 1], [1, 2, 0], [0, 2, 1], [1, 0, 2], [0, 1, 2]] # permute(4) # => [[3, 2, 1, 0], # [3, 2, 0, 1], # [3, 1, 2, 0], # [3, 0, 2, 1], # [3, 1, 0, 2], # [3, 0, 1, 2], # [2, 3, 1, 0], # [2, 3, 0, 1], # [1, 3, 2, 0], # [0, 3, 2, 1], # [1, 3, 0, 2], # [0, 3, 1, 2], # [2, 1, 3, 0], # [2, 0, 3, 1], # [1, 2, 3, 0], # [0, 2, 3, 1], # [1, 0, 3, 2], # [0, 1, 3, 2], # [2, 1, 0, 3], # [2, 0, 1, 3], # [1, 2, 0, 3], # [0, 2, 1, 3], # [1, 0, 2, 3], # [0, 1, 2, 3]] # # --- # *Arguments* # * +count+: +Number+ of different elements to be permuted # * +permutations+: ignore - for the recursive algorithm # *Returns*:: +Array+ of +Array+ objects with different possible permutation orders. See examples. def permute(count, permutations = [[0]]) return permutations if count <= 1 new_arrays = [] new_array = [] (permutations[0].size + 1).times do |n| new_array.clear permutations.each { |a| new_array << a.dup } new_array.each { |e| e.insert(n, permutations[0].size) } new_arrays += new_array end permute(count-1, new_arrays) end end # Analysis end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/analysis_basic.rb0000644000004100000410000002032712200110570024430 0ustar www-datawww-data# # bio/util/restriction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # require 'set' # for method create_enzyme_actions require 'bio/sequence' module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme class Analysis # See cut_without_permutations instance method def self.cut_without_permutations( sequence, *args ) self.new.cut_without_permutations( sequence, *args ) end # See main documentation for Bio::RestrictionEnzyme # # Bio::RestrictionEnzyme.cut is preferred over this! # # USE AT YOUR OWN RISK # # This is a simpler version of method +cut+. +cut+ takes into account # permutations of cut variations based on competitiveness of enzymes for an # enzyme cutsite or enzyme bindsite on a sequence. This does not take into # account those possibilities and is therefore faster, but less likely to be # accurate. # # This code is mainly included as an academic example # without having to wade through the extra layer of complexity added by the # permutations. # # Example: # # FIXME add output # # Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'EcoRI') # # _same as:_ # # Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'g^aattc') # --- # *Arguments* # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence. # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects. # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) def cut_without_permutations( sequence, *args ) return fragments_for_display( {} ) if !sequence.kind_of?(String) or sequence.empty? sequence = Bio::Sequence::NA.new( sequence ) # create_enzyme_actions returns two seperate array elements, they're not # needed separated here so we put them into one array enzyme_actions = create_enzyme_actions( sequence, *args ).flatten return fragments_for_display( {} ) if enzyme_actions.empty? # Primary and complement strands are both measured from '0' to 'sequence.size-1' here sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 ) # Add the cuts to the sequence_range from each enzyme_action enzyme_actions.each do |enzyme_action| enzyme_action.cut_ranges.each do |cut_range| sequence_range.add_cut_range(cut_range) end end # Fill in the source sequence for sequence_range so it knows what bases # to use sequence_range.fragments.primary = sequence sequence_range.fragments.complement = sequence.forward_complement # Format the fragments for the user fragments_for_display( {0 => sequence_range} ) end ######### protected ######### # Take the fragments from SequenceRange objects generated from add_cut_range # and return unique results as a Bio::RestrictionEnzyme::Analysis::Fragment object. # # --- # *Arguments* # * +hsh+: +Hash+ Keys are a permutation ID, if any. Values are SequenceRange objects that have cuts applied. # *Returns*:: Bio::RestrictionEnzyme::Analysis::Fragments object populated with Bio::RestrictionEnzyme::Analysis::Fragment objects. def fragments_for_display( hsh, view_ranges=false ) ary = Fragments.new return ary unless hsh hsh.each do |permutation_id, sequence_range| sequence_range.fragments.for_display.each do |fragment| if view_ranges ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement, fragment.p_left, fragment.p_right, fragment.c_left, fragment.c_right) else ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement) end end end ary.uniq! unless view_ranges ary end # Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes. # # --- # *Arguments* # * +sequence+: The string of DNA to match the enzyme recognition sites against # * +args+:: The enzymes to use. # *Returns*:: +Array+ with the first element being an array of EnzymeAction objects that +sometimes_cut+, and are subject to competition. The second is an array of EnzymeAction objects that +always_cut+ and are not subject to competition. def create_enzyme_actions( sequence, *args ) all_enzyme_actions = [] args.each do |enzyme| enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded # make sure pattern is the proper size # for more info see the internal documentation of # Bio::RestrictionEnzyme::DoubleStranded.create_action_at pattern = Bio::Sequence::NA.new( Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands.align( enzyme.primary, enzyme.complement ).primary ).to_re find_match_locations( sequence, pattern ).each do |offset| all_enzyme_actions << enzyme.create_action_at( offset ) end end # FIXME VerticalCutRange should really be called VerticalAndHorizontalCutRange # * all_enzyme_actions is now full of EnzymeActions at specific locations across # the sequence. # * all_enzyme_actions will now be examined to see if any EnzymeActions may # conflict with one another, and if they do they'll be made note of in # indicies_of_sometimes_cut. They will then be remove FIXME # * a conflict occurs if another enzyme's bind site is compromised do due # to another enzyme's cut. Enzyme's bind sites may overlap and not be # competitive, however neither bind site may be part of the other # enzyme's cut or else they do become competitive. # # Take current EnzymeAction's entire bind site and compare it to all other # EzymeAction's cut ranges. Only look for vertical cuts as boundaries # since trailing horizontal cuts would have no influence on the bind site. # # If example Enzyme A makes this cut pattern (cut range 2..5): # # 0 1 2|3 4 5 6 7 # +-----+ # 0 1 2 3 4 5|6 7 # # Then the bind site (and EnzymeAction range) for Enzyme B would need it's # right side to be at index 2 or less, or it's left side to be 6 or greater. competition_indexes = Set.new all_enzyme_actions[0..-2].each_with_index do |current_enzyme_action, i| next if competition_indexes.include? i next if current_enzyme_action.cut_ranges.empty? # no cuts, some enzymes are like this (ex. CjuI) all_enzyme_actions[i+1..-1].each_with_index do |comparison_enzyme_action, j| j += (i + 1) next if competition_indexes.include? j next if comparison_enzyme_action.cut_ranges.empty? # no cuts if (current_enzyme_action.right <= comparison_enzyme_action.cut_ranges.min_vertical) or (current_enzyme_action.left > comparison_enzyme_action.cut_ranges.max_vertical) # no conflict else competition_indexes += [i, j] # merge both indexes into the flat set end end end sometimes_cut = all_enzyme_actions.values_at( *competition_indexes ) always_cut = all_enzyme_actions always_cut.delete_if {|x| sometimes_cut.include? x } [sometimes_cut, always_cut] end # Returns an +Array+ of the match indicies of a +RegExp+ to a string. # # Example: # # find_match_locations('abccdefeg', /[ce]/) # => [2,3,5,7] # # --- # *Arguments* # * +string+: The string to scan # * +re+: A RegExp to use # *Returns*:: +Array+ with indicies of match locations def find_match_locations( string, re ) md = string.match( re ) locations = [] counter = 0 while md # save the match index relative to the original string locations << (counter += md.begin(0)) # find the next match md = string[ (counter += 1)..-1 ].match( re ) end locations end end # Analysis end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/sorted_num_array.rb0000644000004100000410000001061312200110570025016 0ustar www-datawww-data# # bio/util/restriction_enzyme/sorted_num_array.rb - Internal data storage for Bio::RestrictionEnzyme::Range::SequenceRange # # Copyright:: Copyright (C) 2011 # Naohisa Goto # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme # a class to store sorted numerics. # # Bio::RestrictionEnzyme internal use only. # Please do not create the instance outside Bio::RestrictionEnzyme. class SortedNumArray # Same usage as Array.[] def self.[](*args) a = self.new args.each do |elem| a.push elem end a end # Creates a new object def initialize @hash = {} #clear_cache end # initialize copy def initialize_copy(other) super(other) @hash = @hash.dup end # sets internal hash object def internal_data_hash=(h) #clear_cache @hash = h self end protected :internal_data_hash= # gets internal hash object def internal_data_hash @hash end protected :internal_data_hash #--- ## clear the internal cache #def clear_cache # @sorted_keys = nil #end #protected :clear_cache #+++ # sorted keys def sorted_keys #@sorted_keys ||= @hash.keys.sort #@sorted_keys @hash.keys.sort end private :sorted_keys # adds a new element def push_element(n) #return if @hash.has_key?(n) #already existed; do nothing @hash.store(n, true) #if @sorted_keys then # if thelast = @sorted_keys[-1] and n > thelast then # @sorted_keys.push n # else # clear_cache # end #end nil end private :push_element # adds a new element in the beginning of the array def unshift_element(n) #return if @hash.has_key?(n) #already existed; do nothing @hash.store(n, true) #if @sorted_keys then # if thefirst = @sorted_keys[0] and n < thefirst then # @sorted_keys.unshift n # else # clear_cache # end #end nil end private :unshift_element # Same usage as Array#[] def [](*arg) #$stderr.puts "SortedNumArray#[]" sorted_keys[*arg] end # Not implemented def []=(*arg) raise NotImplementedError, 'SortedNumArray#[]= is not implemented.' end # Same usage as Array#each def each(&block) sorted_keys.each(&block) end # Same usage as Array#reverse_each def reverse_each(&block) sorted_keys.reverse_each(&block) end # Same usage as Array#+, but accepts only the same classes instance. def +(other) unless other.is_a?(self.class) then raise TypeError, 'unsupported data type' end new_hash = @hash.merge(other.internal_data_hash) result = self.class.new result.internal_data_hash = new_hash result end # Same usage as Array#== def ==(other) if r = super(other) then r elsif other.is_a?(self.class) then other.internal_data_hash == @hash else false end end # Same usage as Array#concat def concat(ary) ary.each { |elem| push_element(elem) } self end # Same usage as Array#push def push(*args) args.each do |elem| push_element(elem) end self end # Same usage as Array#unshift def unshift(*arg) arg.reverse_each do |elem| unshift_element(elem) end self end # Same usage as Array#<< def <<(elem) push_element(elem) self end # Same usage as Array#include? def include?(elem) @hash.has_key?(elem) end # Same usage as Array#first def first sorted_keys.first end # Same usage as Array#last def last sorted_keys.last end # Same usage as Array#size def size @hash.size end alias length size # Same usage as Array#delete def delete(elem) #clear_cache @hash.delete(elem) ? elem : nil end # Does nothing def sort!(&block) # does nothing self end # Does nothing def uniq! # does nothing self end # Converts to an array def to_a #sorted_keys.dup sorted_keys end end #class SortedNumArray end #class RestrictionEnzyme end #module Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/cut_symbol.rb0000644000004100000410000000526212200110570023625 0ustar www-datawww-data# # bio/util/restriction_enzyme/cut_symbol.rb - Defines the symbol used to mark a cut in an enzyme sequence # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme # = Usage # # #require 'bio/util/restriction_enzyme/cut_symbol' # require 'cut_symbol' # include Bio::RestrictionEnzyme::CutSymbol # # cut_symbol # => "^" # set_cut_symbol('|') # => "|" # cut_symbol # => "|" # escaped_cut_symbol # => "\\|" # re_cut_symbol # => /\|/ # set_cut_symbol('^') # => "^" # "abc^de" =~ re_cut_symbol # => 3 # "abc^de" =~ re_cut_symbol_adjacent # => nil # "abc^^de" =~ re_cut_symbol_adjacent # => 3 # "a^bc^^de" =~ re_cut_symbol_adjacent # => 4 # "a^bc^de" =~ re_cut_symbol_adjacent # => nil # module CutSymbol # Set the token to be used as the cut symbol in a restriction enzyme sequece # # Starts as +^+ character # # --- # *Arguments* # * +glyph+: The single character to be used as the cut symbol in an enzyme sequence # *Returns*:: +glyph+ def set_cut_symbol(glyph) CutSymbol__.cut_symbol = glyph end # Get the token that's used as the cut symbol in a restriction enzyme sequece # # --- # *Arguments* # * _none_ # *Returns*:: +glyph+ def cut_symbol; CutSymbol__.cut_symbol; end # Get the token that's used as the cut symbol in a restriction enzyme sequece with # a back-slash preceding it. # # --- # *Arguments* # * _none_ # *Returns*:: +\glyph+ def escaped_cut_symbol; CutSymbol__.escaped_cut_symbol; end # Used to check if multiple cut symbols are next to each other. # # --- # *Arguments* # * _none_ # *Returns*:: +RegExp+ def re_cut_symbol_adjacent %r"#{escaped_cut_symbol}{2}" end # A Regexp of the cut_symbol. # # --- # *Arguments* # * _none_ # *Returns*:: +RegExp+ def re_cut_symbol %r"#{escaped_cut_symbol}" end ######### #protected # NOTE this is a Module, can't hide CutSymbol__ ######### require 'singleton' # Class to keep state class CutSymbol__ include Singleton @cut_symbol = '^' def self.cut_symbol; @cut_symbol; end def self.cut_symbol=(glyph); raise ArgumentError if glyph.size != 1 @cut_symbol = glyph end def self.escaped_cut_symbol; "\\" + self.cut_symbol; end end end # CutSymbol end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/single_strand_complement.rb0000644000004100000410000000127512200110570026524 0ustar www-datawww-data# # bio/util/restriction_enzyme/single_strand_complement.rb - Single strand restriction enzyme sequence in complement orientation # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme # A single strand of restriction enzyme sequence pattern with a 3' to 5' orientation. # class SingleStrandComplement < SingleStrand # Orientation of the strand, 3' to 5' def orientation; [3, 5]; end end # SingleStrandComplement end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/util/restriction_enzyme/double_stranded.rb0000644000004100000410000002513312200110570024602 0ustar www-datawww-data# # bio/util/restriction_enzyme/double_stranded.rb - DoubleStranded restriction enzyme sequence # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # module Bio require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme) class RestrictionEnzyme # A pair of SingleStrand and SingleStrandComplement objects with methods to # add utility to their relation. # # = Notes # * This is created by Bio::RestrictionEnzyme.new for convenience. # * The two strands accessible are +primary+ and +complement+. # * SingleStrand methods may be used on DoubleStranded and they will be passed to +primary+. # # # FIXME needs better docs class DoubleStranded autoload :AlignedStrands, 'bio/util/restriction_enzyme/double_stranded/aligned_strands' autoload :CutLocations, 'bio/util/restriction_enzyme/double_stranded/cut_locations' autoload :CutLocationPair, 'bio/util/restriction_enzyme/double_stranded/cut_location_pair' autoload :CutLocationsInEnzymeNotation, 'bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation' autoload :CutLocationPairInEnzymeNotation, 'bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation' include CutSymbol extend CutSymbol include StringFormatting extend StringFormatting # The primary strand attr_reader :primary # The complement strand attr_reader :complement # Cut locations in 0-based index format, DoubleStranded::CutLocations object attr_reader :cut_locations # Cut locations in enzyme index notation, DoubleStranded::CutLocationsInEnzymeNotation object attr_reader :cut_locations_in_enzyme_notation # [+erp+] One of three possible parameters: The name of an enzyme, a REBASE::EnzymeEntry object, or a nucleotide pattern with a cut mark. # [+raw_cut_pairs+] The cut locations in enzyme index notation. # # Enzyme index notation:: 1.._n_, value before 1 is -1 # # Examples of the allowable cut locations for +raw_cut_pairs+ follows. 'p' and # 'c' refer to a cut location on the 'p'rimary and 'c'omplement strands. # # 1, [3,2], [20,22], 57 # p, [p,c], [p, c], p # # Which is the same as: # # 1, (3..2), (20..22), 57 # p, (p..c), (p..c), p # # Examples of partial cuts: # 1, [nil,2], [20,nil], 57 # p, [p, c], [p, c], p # def initialize(erp, *raw_cut_pairs) # 'erp' : 'E'nzyme / 'R'ebase / 'P'attern k = erp.class if k == Bio::REBASE::EnzymeEntry # Passed a Bio::REBASE::EnzymeEntry object unless raw_cut_pairs.empty? err = "A Bio::REBASE::EnzymeEntry object was passed, however the cut locations contained values. Ambiguous or redundant.\n" err += "inspect = #{raw_cut_pairs.inspect}" raise ArgumentError, err end initialize_with_rebase( erp ) elsif erp.kind_of? String # Passed something that could be an enzyme pattern or an anzyme name # Decide if this String is an enzyme name or a pattern if Bio::RestrictionEnzyme.enzyme_name?( erp ) # FIXME we added this to rebase... # Check if it's a known name known_enzyme = false known_enzyme = true if Bio::RestrictionEnzyme.rebase[ erp ] # Try harder to find the enzyme unless known_enzyme re = %r"^#{erp}$"i Bio::RestrictionEnzyme.rebase.each { |name, v| (known_enzyme = true; erp = name; break) if name =~ re } end if known_enzyme initialize_with_rebase( Bio::RestrictionEnzyme.rebase[erp] ) else raise IndexError, "No entry found for enzyme named '#{erp}'" end else # Not an enzyme name, so a pattern is assumed if erp =~ re_cut_symbol initialize_with_pattern_and_cut_symbols( erp ) else initialize_with_pattern_and_cut_locations( erp, raw_cut_pairs ) end end elsif k == NilClass err = "Passed a nil value. Perhaps you tried to pass a Bio::REBASE::EnzymeEntry that does not exist?\n" err += "inspect = #{erp.inspect}" raise ArgumentError, err else err = "I don't know what to do with class #{k} for erp.\n" err += "inspect = #{erp.inspect}" raise ArgumentError, err end end # See AlignedStrands.align def aligned_strands AlignedStrands.align(@primary.pattern, @complement.pattern) end # See AlignedStrands.align_with_cuts def aligned_strands_with_cuts AlignedStrands.align_with_cuts(@primary.pattern, @complement.pattern, @primary.cut_locations, @complement.cut_locations) end # Returns +true+ if the cut pattern creates blunt fragments. # (opposite of sticky) def blunt? as = aligned_strands_with_cuts ary = [as.primary, as.complement] ary.collect! { |seq| seq.split( cut_symbol ) } # convert the cut sections to their lengths ary.each { |i| i.collect! { |c| c.length } } ary[0] == ary[1] end # Returns +true+ if the cut pattern creates sticky fragments. # (opposite of blunt) def sticky? !blunt? end # Takes a RestrictionEnzyme object and a numerical offset to the sequence and # returns an EnzymeAction # # +restriction_enzyme+:: RestrictionEnzyme # +offset+:: Numerical offset of where the enzyme action occurs on the seqeunce def create_action_at( offset ) # x is the size of the fully aligned sequence with maximum padding needed # to make a match on the primary and complement strand. # # For example - # Note how EcoRII needs extra padding on the beginning and ending of the # sequence 'ccagg' to make the match since the cut must occur between # two nucleotides and can not occur on the very end of the sequence. # # EcoRII: # :blunt: "0" # :c2: "5" # :c4: "0" # :c1: "-1" # :pattern: CCWGG # :len: "5" # :name: EcoRII # :c3: "0" # :ncuts: "2" # # -1 1 2 3 4 5 # 5' - n^c c w g g n - 3' # 3' - n g g w c c^n - 5' # # (w == [at]) x = aligned_strands.primary.size enzyme_action = EnzymeAction.new( offset, offset + x-1, offset, offset + x-1) @cut_locations.each do |cut_location_pair| # cut_pair is a DoubleStranded::CutLocationPair p, c = cut_location_pair.primary, cut_location_pair.complement if c >= p enzyme_action.add_cut_range(offset+p, nil, nil, offset+c) else enzyme_action.add_cut_range(nil, offset+p, offset+c, nil) end end enzyme_action end # An EnzymeAction is a way of representing a potential effect that a # RestrictionEnzyme may have on a nucleotide sequence, an 'action'. # # Multiple cuts in multiple locations on a sequence may occur in one # 'action' if it is done by a single enzyme. # # An EnzymeAction is a series of locations that represents where the restriction # enzyme will bind on the sequence, as well as what ranges are cut on the # sequence itself. The complexity is due to the fact that our virtual # restriction enzyme may create multiple segments from its cutting action, # on which another restriction enzyme may operate upon. # # For example, the DNA sequence: # # 5' - G A A T A A A C G A - 3' # 3' - C T T A T T T G C T - 5' # # When mixed with the restriction enzyme with the following cut pattern: # # 5' - A|A T A A A C|G - 3' # +-+ + # 3' - T T|A T T T G|C - 5' # # And also mixed with the restriction enzyme of the following cut pattern: # # 5' - A A|A C - 3' # +-+ # 3' - T|T T G - 5' # # Would result in a DNA sequence with these cuts: # # 5' - G A|A T A A|A C|G A - 3' # +-+ +-+ + # 3' - C T T|A T|T T G|C T - 5' # # Or these separate "free-floating" sequences: # # 5' - G A - 3' # 3' - C T T - 5' # # 5' - A T A A - 3' # 3' - A T - 5' # # 5' - A C - 3' # 3' - T T G - 5' # # 5' - G A - 3' # 3' - C T - 5' # # This would be represented by two EnzymeActions - one for each # RestrictionEnzyme. # # This is, however, subject to competition. If the second enzyme reaches # the target first, the the first enzyme will not be able to find the # appropriate bind site. # # FIXME complete these docs # # To initialize an EnzymeAction you must first instantiate it with the # beginning and ending locations of where it will operate on a nucleotide # sequence. # # Next the ranges of cu # # An EnzymeAction is # Defines a single enzyme action, in this case being a range that correlates # to the DNA sequence that may contain it's own internal cuts. class EnzymeAction < Bio::RestrictionEnzyme::Range::SequenceRange end ######### protected ######### def initialize_with_pattern_and_cut_symbols( s ) p_cl = SingleStrand::CutLocationsInEnzymeNotation.new( strip_padding(s) ) s = Bio::Sequence::NA.new( strip_cuts_and_padding(s) ) # * Reflect cuts that are in enzyme notation # * 0 is not a valid enzyme index, decrement 0 and all negative c_cl = p_cl.collect {|n| (n >= s.length or n < 1) ? ((s.length - n) - 1) : (s.length - n)} create_cut_locations( p_cl.zip(c_cl) ) create_primary_and_complement( s, p_cl, c_cl ) end def initialize_with_pattern_and_cut_locations( s, raw_cl ) create_cut_locations(raw_cl) create_primary_and_complement( Bio::Sequence::NA.new(s), @cut_locations_in_enzyme_notation.primary, @cut_locations_in_enzyme_notation.complement ) end def create_primary_and_complement(primary_seq, p_cuts, c_cuts) @primary = SingleStrand.new( primary_seq, p_cuts ) @complement = SingleStrandComplement.new( primary_seq.forward_complement, c_cuts ) end def create_cut_locations(raw_cl) @cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( *raw_cl.collect {|cl| CutLocationPairInEnzymeNotation.new(cl)} ) @cut_locations = @cut_locations_in_enzyme_notation.to_array_index end def initialize_with_rebase( e ) p_cl = [e.primary_strand_cut1, e.primary_strand_cut2] c_cl = [e.complementary_strand_cut1, e.complementary_strand_cut2] # If there's no cut in REBASE it's represented as a 0. # 0 is an invalid index, it just means no cut. p_cl.delete(0) c_cl.delete(0) raise IndexError unless p_cl.size == c_cl.size initialize_with_pattern_and_cut_locations( e.pattern, p_cl.zip(c_cl) ) end end # DoubleStranded end # RestrictionEnzyme end # Bio bio-1.4.3.0001/lib/bio/command.rb0000644000004100000410000006435712200110570016164 0ustar www-datawww-data# # = bio/command.rb - general methods for external command execution # # Copyright:: Copyright (C) 2003-2010 # Naohisa Goto , # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'open3' require 'uri' require 'open-uri' require 'cgi' require 'net/http' require 'tmpdir' require 'fileutils' module Bio # = Bio::Command # # Bio::Command is a collection of useful methods for execution # of external commands or web applications. # Any wrapper class for applications shall use this class. # # Library internal use only. Users should not directly use it. module Command UNSAFE_CHARS_UNIX = /[^A-Za-z0-9\_\-\.\:\,\/\@\x1b\x80-\xfe]/n QUOTE_CHARS_WINDOWS = /[^A-Za-z0-9\_\-\.\:\,\/\@\\]/n UNESCAPABLE_CHARS = /[\x00-\x08\x10-\x1a\x1c-\x1f\x7f\xff]/n module_function # *CAUTION* Bio::Command INTERNAL USE ONLY. # Users must NOT use the method. # The method will be removed when it is not needed. # # Checks if the program is running on Microsoft Windows. # If Windows, returns true. Otherwise, returns false. # Note that Cygwin is not treated as Windows. # # Known issues: # * It might make a mistake in minor platforms/architectures/interpreters. # * When running JRuby on Cygwin, the result is unknown. # --- # *Returns*:: true or false def windows_platform? case RUBY_PLATFORM when /(?:mswin|bccwin|mingw)(?:32|64)/i true when /java/i # Reference: Redmine's platform.rb # http://www.redmine.org/projects/redmine/repository/revisions/1753/entry/trunk/lib/redmine/platform.rb if /windows/i =~ (ENV['OS'] || ENV['os']).to_s then true else false end else false end end private_class_method :windows_platform? # *CAUTION* Bio::Command INTERNAL USE ONLY. # Users must NOT use the method. # The method will be removed when it is not needed. # # Checks if the OS does not support fork(2) system call. # When not supported, it returns true. # When supported or unknown, it returns false or nil. # # Known issues: # * It might make a mistake in minor platforms/architectures/interpreters. # --- # *Returns*:: true, false or nil. def no_fork? if (defined?(@@no_fork) && @@no_fork) or windows_platform? or /java/i =~ RUBY_PLATFORM then true else false end end private_class_method :no_fork? # Escape special characters in command line string for cmd.exe on Windows. # --- # *Arguments*: # * (required) _str_: String # *Returns*:: String object def escape_shell_windows(str) str = str.to_s raise 'cannot escape control characters' if UNESCAPABLE_CHARS =~ str if QUOTE_CHARS_WINDOWS =~ str then '"' + str.gsub(/\"/, '""') + '"' else String.new(str) end end # Escape special characters in command line string for UNIX shells. # --- # *Arguments*: # * (required) _str_: String # *Returns*:: String object def escape_shell_unix(str) str = str.to_s raise 'cannot escape control characters' if UNESCAPABLE_CHARS =~ str str.gsub(UNSAFE_CHARS_UNIX) { |x| "\\#{x}" } end # Escape special characters in command line string. # --- # *Arguments*: # * (required) _str_: String # *Returns*:: String object def escape_shell(str) if windows_platform? then escape_shell_windows(str) else escape_shell_unix(str) end end # Generate command line string with special characters escaped. # --- # *Arguments*: # * (required) _ary_: Array containing String objects # *Returns*:: String object def make_command_line(ary) if windows_platform? then make_command_line_windows(ary) else make_command_line_unix(ary) end end # Generate command line string with special characters escaped # for cmd.exe on Windows. # --- # *Arguments*: # * (required) _ary_: Array containing String objects # *Returns*:: String object def make_command_line_windows(ary) ary.collect { |str| escape_shell_windows(str) }.join(" ") end # Generate command line string with special characters escaped # for UNIX shells. # --- # *Arguments*: # * (required) _ary_: Array containing String objects # *Returns*:: String object def make_command_line_unix(ary) ary.collect { |str| escape_shell_unix(str) }.join(" ") end # Returns an Array of command-line command and arguments # that can be safely passed to Kernel.exec etc. # If the given array is already safe (or empty), returns the given array. # --- # *Arguments*: # * (required) _ary_: Array # *Returns*:: Array def safe_command_line_array(ary) ary = ary.to_ary return ary if ary.size >= 2 or ary.empty? if ary.size != 1 then raise 'Bug: assersion of ary.size == 1 failed' end arg0 = ary[0] begin arg0 = arg0.to_ary rescue NoMethodError arg0 = [ arg0, arg0 ] end [ arg0 ] end # Executes the program. Automatically select popen for Ruby 1.9 or # Windows environment and fork for the others. # A block must be given. An IO object is passed to the block. # # Available options: # :chdir => "path" : changes working directory to the specified path. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _options_: Hash # *Returns*:: (undefined) def call_command(cmd, options = {}, &block) #:yields: io if RUBY_VERSION >= "1.9.0" then return call_command_popen(cmd, options, &block) elsif no_fork? then call_command_popen(cmd, options, &block) else begin call_command_fork(cmd, options, &block) rescue NotImplementedError # fork(2) not implemented @@no_fork = true call_command_popen(cmd, options, &block) end end end # This method is internally called from the call_command method. # In normal case, use call_command, and do not call this method directly. # # Executes the program via IO.popen for OS which doesn't support fork. # A block must be given. An IO object is passed to the block. # # See the document of call_command for available options. # # Note for Ruby 1.8: # In Ruby 1.8, although shell unsafe characters are escaped. # If inescapable characters exists, it raises RuntimeError. # So, call_command_fork is normally recommended. # # Note for Ruby 1.9: # In Ruby 1.9, call_command_popen is safe and robust enough, and is the # recommended way, because IO.popen is improved to get a command-line # as an array without calling shell. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _options_: Hash # *Returns*:: (undefined) def call_command_popen(cmd, options = {}, &block) if RUBY_VERSION >= "1.9.0" then if RUBY_ENGINE == 'jruby' then _call_command_popen_jruby19(cmd, options, &block) else _call_command_popen_ruby19(cmd, options, &block) end else _call_command_popen_ruby18(cmd, options, &block) end end # This method is internally called from the call_command method. # In normal case, use call_command, and do not call this method directly. # # Executes the program via IO.popen. # A block must be given. An IO object is passed to the block. # # See the document of call_command for available options. # # The method is written for Ruby 1.8. # # In Ruby 1.8, although shell unsafe characters are escaped, # if inescapable characters exists, it raises RuntimeError. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _options_: Hash # *Returns*:: (undefined) def _call_command_popen_ruby18(cmd, options = {}) # For Ruby 1.8, using command line string. str = make_command_line(cmd) # processing options if dir = options[:chdir] then if windows_platform? # Unix-like dir separator is changed to Windows dir separator # by using String#gsub. dirstr = dir.gsub(/\//, "\\") chdirstr = make_command_line([ 'cd', '/D', dirstr ]) str = chdirstr + ' && ' + str else # UNIX shell chdirstr = make_command_line([ 'cd', dir ]) str = chdirstr + ' && ' + str end end # call command by using IO.popen IO.popen(str, "w+") do |io| io.sync = true yield io end end private :_call_command_popen_ruby18 # This method is internally called from the call_command method. # In normal case, use call_command, and do not call this method directly. # # Executes the program via IO.popen. # A block must be given. An IO object is passed to the block. # # See the document of call_command for available options. # # The method can be run only on Ruby (MRI) 1.9 or later versions. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _options_: Hash # *Returns*:: (undefined) def _call_command_popen_ruby19(cmd, options = {}) # For Ruby 1.9 or later, using command line array with options. dir = options[:chdir] cmd = safe_command_line_array(cmd) if dir then cmd = cmd + [ { :chdir => dir } ] end r = IO.popen(cmd, "r+") do |io| yield io end return r end private :_call_command_popen_ruby19 # This method is internally called from the call_command method. # In normal case, use call_command, and do not call this method directly. # # Executes the program via IO.popen. # A block must be given. An IO object is passed to the block. # # See the document of call_command for available options. # # The method is written for the workaround of the JRuby bugs: # * {JRUBY-6195}[http://jira.codehaus.org/browse/JRUBY-6195] Process.spawn # (and related methods) ignore option hash # * {JRUBY-6818}[http://jira.codehaus.org/browse/JRUBY-6818] Kernel.exec, # Process.spawn (and IO.popen etc.) raise error when program is an array # containing two strings # This method may be removed after the bugs are resolved. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _options_: Hash # *Returns*:: (undefined) def _call_command_popen_jruby19(cmd, options = {}, &block) if !options.empty? or cmd.size == 1 then _call_command_popen_ruby18(cmd, options, &block) else _call_command_popen_ruby19(cmd, options, &block) end end private :_call_command_popen_jruby19 # This method is internally called from the call_command method. # In normal case, use call_command, and do not call this method directly. # # Executes the program via fork (by using IO.popen("-")) and exec. # A block must be given. An IO object is passed to the block. # # See the document of call_command for available options. # # Note for Ruby 1.8: # In Ruby 1.8, from the view point of security, this method is recommended # rather than call_command_popen. However, this method might have problems # with multi-threads. # # Note for Ruby 1.9: # In Ruby 1.9, this method can not be used, because Thread.critical is # removed. In Ruby 1.9, call_command_popen is safe and robust enough, and # is the recommended way, because IO.popen is improved to get a # command-line as an array without calling shell. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _options_: Hash # *Returns*:: (undefined) def call_command_fork(cmd, options = {}) dir = options[:chdir] cmd = safe_command_line_array(cmd) begin tc, Thread.critical, flag0, flag1 = Thread.critical, true, true, true IO.popen("-", "r+") do |io| if io then # parent flag0, Thread.critical, flag1 = false, tc, false yield io else # child Thread.critical = true # for safety, though already true GC.disable # chdir to options[:chdir] if available begin Dir.chdir(dir) if dir rescue Exception Process.exit!(1) end # executing the command begin Kernel.exec(*cmd) rescue Errno::ENOENT, Errno::EACCES Process.exit!(127) rescue Exception end Process.exit!(1) end end ensure # When IO.popen("-") raises error, Thread.critical will be set here. Thread.critical = tc if flag0 or flag1 #warn 'Thread.critical might have wrong value.' if flag0 != flag1 end end # Executes the program via Open3.popen3 # A block must be given. IO objects are passed to the block. # # You would use this method only when you really need to get stderr. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # *Returns*:: (undefined) def call_command_open3(cmd) cmd = safe_command_line_array(cmd) Open3.popen3(*cmd) do |pin, pout, perr| yield pin, pout, perr end end # Executes the program with the query (String) given to the standard input, # waits the program termination, and returns the output data printed to the # standard output as a string. # # Automatically select popen for Ruby 1.9 or Windows environment and # fork for the others. # # Available options: # :chdir => "path" : changes working directory to the specified path. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _query_: String # * (optional) _options_: Hash # *Returns*:: String or nil def query_command(cmd, query = nil, options = {}) if RUBY_VERSION >= "1.9.0" then return query_command_popen(cmd, query, options) elsif no_fork? then query_command_popen(cmd, query, options) else begin query_command_fork(cmd, query, options) rescue NotImplementedError # fork(2) not implemented @@no_fork = true query_command_fork(cmd, query, options) end end end # This method is internally called from the query_command method. # In normal case, use query_command, and do not call this method directly. # # Executes the program with the query (String) given to the standard input, # waits the program termination, and returns the output data printed to the # standard output as a string. # # See the document of query_command for available options. # # See the document of call_command_popen for the security and Ruby # version specific issues. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _query_: String # * (optional) _options_: Hash # *Returns*:: String or nil def query_command_popen(cmd, query = nil, options = {}) ret = nil call_command_popen(cmd, options) do |io| io.sync = true io.print query if query io.close_write ret = io.read end ret end # This method is internally called from the query_command method. # In normal case, use query_command, and do not call this method directly. # # Executes the program with the query (String) given to the standard input, # waits the program termination, and returns the output data printed to the # standard output as a string. # # Fork (by using IO.popen("-")) and exec is used to execute the program. # # See the document of query_command for available options. # # See the document of call_command_fork for the security and Ruby # version specific issues. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _query_: String # * (optional) _options_: Hash # *Returns*:: String or nil def query_command_fork(cmd, query = nil, options = {}) ret = nil call_command_fork(cmd, options) do |io| io.sync = true io.print query if query io.close_write ret = io.read end ret end # Executes the program via Open3.popen3 with the query (String) given # to the stain, waits the program termination, and # returns the data from stdout and stderr as an array of the strings. # # You would use this method only when you really need to get stderr. # # --- # *Arguments*: # * (required) _cmd_: Array containing String objects # * (optional) _query_: String # *Returns*:: Array containing 2 objects: output string (or nil) and stderr string (or nil) def query_command_open3(cmd, query = nil) errorlog = nil cmd = safe_command_line_array(cmd) Open3.popen3(*cmd) do |pin, pout, perr| perr.sync = true t = Thread.start { errorlog = perr.read } begin pin.print query if query pin.close output = pout.read ensure t.join end [ output, errorlog ] end end # Same as FileUtils.remove_entry_secure after Ruby 1.8.3. # In Ruby 1.8.2 or previous version, it only shows warning message # and does nothing. # # It is strongly recommended using Ruby 1.8.5 or later. # --- # *Arguments*: # * (required) _path_: String # * (optional) _force_: boolean def remove_entry_secure(path, force = false) begin FileUtils.remove_entry_secure(path, force) rescue NoMethodError warn "The temporary file or directory is not removed because of the lack of FileUtils.remove_entry_secure. Use Ruby 1.8.3 or later (1.8.5 or later is strongly recommended): #{path}" nil end end # Backport of Dir.mktmpdir in Ruby 1.9. # # Same as Dir.mktmpdir(prefix_suffix) in Ruby 1.9. # # --- # *Arguments*: # * (optional) prefix_suffix: String (or Array, etc.) # * (optional) tmpdir: String: temporary directory's path # def mktmpdir(prefix_suffix = nil, tmpdir = nil, &block) begin Dir.mktmpdir(prefix_suffix, tmpdir, &block) rescue NoMethodError # backported from Ruby 1.9.2-preview1. # ***** Below is excerpted from Ruby 1.9.2-preview1's lib/tmpdir.rb **** # ***** Be careful about copyright. **** case prefix_suffix when nil prefix = "d" suffix = "" when String prefix = prefix_suffix suffix = "" when Array prefix = prefix_suffix[0] suffix = prefix_suffix[1] else raise ArgumentError, "unexpected prefix_suffix: #{prefix_suffix.inspect}" end tmpdir ||= Dir.tmpdir t = Time.now.strftime("%Y%m%d") n = nil begin path = "#{tmpdir}/#{prefix}#{t}-#{$$}-#{rand(0x100000000).to_s(36)}" path << "-#{n}" if n path << suffix Dir.mkdir(path, 0700) rescue Errno::EEXIST n ||= 0 n += 1 retry end if block_given? begin yield path ensure remove_entry_secure path end else path end # ***** Above is excerpted from Ruby 1.9.2-preview1's lib/tmpdir.rb **** end end # Bio::Command::Tmpdir is a wrapper class to handle temporary directory # like Tempfile class. A temporary directory is created when the object # of the class is created, and automatically removed when the object # is destroyed by GC. # # BioRuby library internal use only. class Tmpdir # Internal use only. Users should not use this class directly. # # Bio::Command::Tmpdir::Remover is a class to remove temporary # directory. # # Acknowledgement: The essense of the code is taken from tempfile.rb # in Ruby trunk (svn 34413) and in Ruby 1.8.7. class Remover # Internal use only. Users should not call this method. def initialize(data) @pid = $$ @data = data end # Internal use only. Users should not call this method. def call(*args) return if @pid != $$ path, = *@data STDERR.print "removing ", path, "..." if $DEBUG if path and !path.empty? and File.directory?(path) and !File.symlink?(path) then Bio::Command.remove_entry_secure(path) $stderr.print "done\n" if $DEBUG else $stderr.print "skipped\n" if $DEBUG end end end #class Remover # Creates a new Tmpdir object. # The arguments are the same as Bio::Command.mktmpdir. # # --- # *Arguments*: # * (optional) prefix_suffix: String (or Array) # * (optional) tmpdir: String: temporary directory's path # *Returns*:: Tmpdir object def initialize(prefix_suffix = nil, tmpdir = nil) @data = [] @clean_proc = Remover.new(@data) ObjectSpace.define_finalizer(self, @clean_proc) @data.push(@path = Bio::Command.mktmpdir(prefix_suffix, tmpdir).freeze) end # Path to the temporay directory # # *Returns*:: String def path @path || raise(IOError, 'removed temporary directory') end # Removes the temporary directory. # # *Returns*:: nil def close! # raise error if path is nil self.path # finilizer object is called to remove the directory @clean_proc.call # unregister finalizer ObjectSpace.undefine_finalizer(self) # @data and @path is removed @data = @path = nil end end #class Tmpdir # Same as OpenURI.open_uri(uri).read # and # it uses proxy if an environment variable (same as OpenURI.open_uri) # is set. # # --- # *Arguments*: # * (required) _uri_: URI object or String # *Returns*:: String def read_uri(uri) OpenURI.open_uri(uri).read end # Same as: # Net::HTTP.start(address, port) # and # it uses proxy if an environment variable (same as OpenURI.open_uri) # is set. # # --- # *Arguments*: # * (required) _address_: String containing host name or IP address # * (optional) _port_: port (sanme as Net::HTTP::start) # *Returns*:: (same as Net::HTTP::start except for proxy support) def start_http(address, port = 80, &block) uri = URI.parse("http://#{address}:#{port}") # Note: URI#find_proxy is an unofficial method defined in open-uri.rb. # If the spec of open-uri.rb would be changed, we should change below. if proxyuri = uri.find_proxy then raise 'Non-HTTP proxy' if proxyuri.class != URI::HTTP http = Net::HTTP.Proxy(proxyuri.host, proxyuri.port) else http = Net::HTTP end http.start(address, port, &block) end # Same as: # Net::HTTP.new(address, port) # and # it uses proxy if an environment variable (same as OpenURI.open_uri) # is set. # # --- # *Arguments*: # * (required) _address_: String containing host name or IP address # * (optional) _port_: port (sanme as Net::HTTP::start) # *Returns*:: (same as Net::HTTP.new except for proxy support) def new_http(address, port = 80) uri = URI.parse("http://#{address}:#{port}") # Note: URI#find_proxy is an unofficial method defined in open-uri.rb. # If the spec of open-uri.rb would be changed, we should change below. if proxyuri = uri.find_proxy then raise 'Non-HTTP proxy' if proxyuri.class != URI::HTTP Net::HTTP.new(address, port, proxyuri.host, proxyuri.port) else Net::HTTP.new(address, port) end end # Same as: # http = Net::HTTP.new(...); http.post_form(path, params) # and # it uses proxy if an environment variable (same as OpenURI.open_uri) # is set. # In addition, +header+ can be set. # (Note that Content-Type and Content-Length are automatically # set by default.) # +uri+ must be a URI object, +params+ must be a hash, and # +header+ must be a hash. # # --- # *Arguments*: # * (required) _http_: Net::HTTP object or compatible object # * (required) _path_: String # * (optional) _params_: Hash containing parameters # * (optional) _header_: Hash containing header strings # *Returns*:: (same as Net::HTTP::post_form) def http_post_form(http, path, params = nil, header = {}) data = make_cgi_params(params) hash = { 'Content-Type' => 'application/x-www-form-urlencoded', 'Content-Length' => data.length.to_s } hash.update(header) http.post(path, data, hash) end # Same as: # Net::HTTP.post_form(uri, params) # and # it uses proxy if an environment variable (same as OpenURI.open_uri) # is set. # In addition, +header+ can be set. # (Note that Content-Type and Content-Length are automatically # set by default.) # +uri+ must be a URI object, +params+ must be a hash, and # +header+ must be a hash. # # --- # *Arguments*: # * (required) _uri_: URI object or String # * (optional) _params_: Hash containing parameters # * (optional) _header_: Hash containing header strings # *Returns*:: (same as Net::HTTP::post_form) def post_form(uri, params = nil, header = {}) unless uri.is_a?(URI) uri = URI.parse(uri) end data = make_cgi_params(params) hash = { 'Content-Type' => 'application/x-www-form-urlencoded', 'Content-Length' => data.length.to_s } hash.update(header) start_http(uri.host, uri.port) do |http| http.post(uri.path, data, hash) end end # Builds parameter string for from Hash of parameters for # application/x-www-form-urlencoded. # # --- # *Arguments*: # * (required) _params_: Hash containing parameters # *Returns*:: String def make_cgi_params(params) data = "" case params when Hash data = params.map do |key, val| make_cgi_params_key_value(key, val) end.join('&') when Array case params.first when Hash data = params.map do |hash| hash.map do |key, val| make_cgi_params_key_value(key, val) end end.join('&') when Array data = params.map do |key, val| make_cgi_params_key_value(key, val) end.join('&') when String data = params.map do |str| key, val = str.split(/\=/, 2) if val then make_cgi_params_key_value(key, val) else CGI.escape(str) end end.join('&') end when String data = URI.escape(params.strip) end return data end # Builds parameter string for from a key string and a value (or values) # for application/x-www-form-urlencoded. # # --- # *Arguments*: # * (required) _key_: String # * (required) _value_: String or Array containing String # *Returns*:: String def make_cgi_params_key_value(key, value) result = [] case value when Array value.each do |val| result << [key, val].map {|x| CGI.escape(x.to_s) }.join('=') end else result << [key, value].map {|x| CGI.escape(x.to_s) }.join('=') end return result end end # module Command end # module Bio bio-1.4.3.0001/lib/bio/version.rb0000644000004100000410000000205112200110570016212 0ustar www-datawww-data# # = bio/version.rb - BioRuby version information # # Copyright:: Copyright (C) 2001-2012 # Toshiaki Katayama , # Naohisa Goto # License:: The Ruby License # module Bio # BioRuby version (Array containing Integer) BIORUBY_VERSION = [1, 4, 3].extend(Comparable).freeze # Extra version specifier (String or nil). # Existance of the value indicates pre-release version or modified version. # # nil :: Release version. # ".0000"..".4999" :: Release version with patches. # ".5000" :: Development unstable version. # ".5001"..".8999" :: Pre-alpha version. # "-alphaN" (N=0..99) :: Alpha version. # "-preN" (N=0..99) :: Pre-release test version. # "-rcN" (N=0..99) :: Release candidate version. # BIORUBY_EXTRA_VERSION = ".0001" # Version identifier, including extra version string (String) # Unlike BIORUBY_VERSION, it is not comparable. BIORUBY_VERSION_ID = (BIORUBY_VERSION.join('.') + BIORUBY_EXTRA_VERSION.to_s).freeze end #module Bio bio-1.4.3.0001/lib/bio/sequence/0000755000004100000410000000000012200110570016012 5ustar www-datawww-databio-1.4.3.0001/lib/bio/sequence/format_raw.rb0000644000004100000410000000066212200110570020504 0ustar www-datawww-data# # = bio/sequence/format_raw.rb - Raw sequence formatter # # Copyright:: Copyright (C) 2008 Naohisa Goto # License:: The Ruby License # module Bio::Sequence::Format::Formatter # Raw sequence output formatter class class Raw < Bio::Sequence::Format::FormatterBase # output raw sequence data def output "#{@sequence.seq}" end end #class Raw end #module Bio::Sequence::Format::Formatter bio-1.4.3.0001/lib/bio/sequence/na.rb0000644000004100000410000003412212200110570016737 0ustar www-datawww-data# # = bio/sequence/na.rb - nucleic acid sequence class # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama , # Ryan Raaum # License:: The Ruby License # module Bio autoload :NucleicAcid, 'bio/data/na' unless const_defined?(:NucleicAcid) autoload :CodonTable, 'bio/data/codontable' unless const_defined?(:CodonTable) require 'bio/sequence' unless const_defined?(:Sequence) class Sequence # = DESCRIPTION # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby. # # = USAGE # # Create a Nucleic Acid sequence. # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA') # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa') # # # What are the names of all the bases? # puts dna.names # puts rna.names # # # What is the GC percentage? # puts dna.gc_percent # puts rna.gc_percent # # # What is the molecular weight? # puts dna.molecular_weight # puts rna.molecular_weight # # # What is the reverse complement? # puts dna.reverse_complement # puts dna.complement # # # Is this sequence DNA or RNA? # puts dna.rna? # # # Translate my sequence (see method docs for many options) # puts dna.translate # puts rna.translate class NA < String include Bio::Sequence::Common # Generate an nucleic acid sequence object from a string. # # s = Bio::Sequence::NA.new("aagcttggaccgttgaagt") # # or maybe (if you have an nucleic acid sequence in a file) # # s = Bio::Sequence:NA.new(File.open('dna.txt').read) # # Nucleic Acid sequences are *always* all lowercase in bioruby # # s = Bio::Sequence::NA.new("AAGcTtGG") # puts s #=> "aagcttgg" # # Whitespace is stripped from the sequence # # seq = Bio::Sequence::NA.new("atg\nggg\ttt\r gc") # puts s #=> "atggggttgc" # --- # *Arguments*: # * (required) _str_: String # *Returns*:: Bio::Sequence::NA object def initialize(str) super self.downcase! self.tr!(" \t\n\r",'') end # Alias of Bio::Sequence::Common splice method, documented there. def splicing(position) #:nodoc: mRNA = super if mRNA.rna? mRNA.tr!('t', 'u') else mRNA.tr!('u', 't') end mRNA end # Returns a new complementary sequence object (without reversing). # The original sequence object is not modified. # # s = Bio::Sequence::NA.new('atgc') # puts s.forward_complement #=> 'tacg' # puts s #=> 'atgc' # --- # *Returns*:: new Bio::Sequence::NA object def forward_complement s = self.class.new(self) s.forward_complement! s end # Converts the current sequence into its complement (without reversing). # The original sequence object is modified. # # seq = Bio::Sequence::NA.new('atgc') # puts s.forward_complement! #=> 'tacg' # puts s #=> 'tacg' # --- # *Returns*:: current Bio::Sequence::NA object (modified) def forward_complement! if self.rna? self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn') else self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn') end self end # Returns a new sequence object with the reverse complement # sequence to the original. The original sequence is not modified. # # s = Bio::Sequence::NA.new('atgc') # puts s.reverse_complement #=> 'gcat' # puts s #=> 'atgc' # --- # *Returns*:: new Bio::Sequence::NA object def reverse_complement s = self.class.new(self) s.reverse_complement! s end # Converts the original sequence into its reverse complement. # The original sequence is modified. # # s = Bio::Sequence::NA.new('atgc') # puts s.reverse_complement #=> 'gcat' # puts s #=> 'gcat' # --- # *Returns*:: current Bio::Sequence::NA object (modified) def reverse_complement! self.reverse! self.forward_complement! end # Alias for Bio::Sequence::NA#reverse_complement alias complement reverse_complement # Alias for Bio::Sequence::NA#reverse_complement! alias complement! reverse_complement! # Translate into an amino acid sequence. # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.translate #=> "MA*" # # By default, translate starts in reading frame position 1, but you # can start in either 2 or 3 as well, # # puts s.translate(2) #=> "WR" # puts s.translate(3) #=> "GV" # # You may also translate the reverse complement in one step by using frame # values of -1, -2, and -3 (or 4, 5, and 6) # # puts s.translate(-1) #=> "SRH" # puts s.translate(4) #=> "SRH" # puts s.reverse_complement.translate(1) #=> "SRH" # # The default codon table in the translate function is the Standard # Eukaryotic codon table. The translate function takes either a # number or a Bio::CodonTable object for its table argument. # The available tables are # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]): # # 1. "Standard (Eukaryote)" # 2. "Vertebrate Mitochondrial" # 3. "Yeast Mitochondorial" # 4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma" # 5. "Invertebrate Mitochondrial" # 6. "Ciliate Macronuclear and Dasycladacean" # 9. "Echinoderm Mitochondrial" # 10. "Euplotid Nuclear" # 11. "Bacteria" # 12. "Alternative Yeast Nuclear" # 13. "Ascidian Mitochondrial" # 14. "Flatworm Mitochondrial" # 15. "Blepharisma Macronuclear" # 16. "Chlorophycean Mitochondrial" # 21. "Trematode Mitochondrial" # 22. "Scenedesmus obliquus mitochondrial" # 23. "Thraustochytrium Mitochondrial" # # If you are using anything other than the default table, you must specify # frame in the translate method call, # # puts s.translate #=> "MA*" (using defaults) # puts s.translate(1,1) #=> "MA*" (same as above, but explicit) # puts s.translate(1,2) #=> "MAW" (different codon table) # # and using a Bio::CodonTable instance in the translate method call, # # mt_table = Bio::CodonTable[2] # puts s.translate(1, mt_table) #=> "MAW" # # By default, any invalid or unknown codons (as could happen if the # sequence contains ambiguities) will be represented by 'X' in the # translated sequence. # You may change this to any character of your choice. # # s = Bio::Sequence::NA.new('atgcNNtga') # puts s.translate #=> "MX*" # puts s.translate(1,1,'9') #=> "M9*" # # The translate method considers gaps to be unknown characters and treats # them as such (i.e. does not collapse sequences prior to translation), so # # s = Bio::Sequence::NA.new('atgc--tga') # puts s.translate #=> "MX*" # --- # *Arguments*: # * (optional) _frame_: one of 1,2,3,4,5,6,-1,-2,-3 (default 1) # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object # (default 1) # * (optional) _unknown_: Character (default 'X') # *Returns*:: Bio::Sequence::AA object def translate(frame = 1, table = 1, unknown = 'X') if table.is_a?(Bio::CodonTable) ct = table else ct = Bio::CodonTable[table] end naseq = self.dna case frame when 1, 2, 3 from = frame - 1 when 4, 5, 6 from = frame - 4 naseq.complement! when -1, -2, -3 from = -1 - frame naseq.complement! else from = 0 end nalen = naseq.length - from nalen -= nalen % 3 aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown} return Bio::Sequence::AA.new(aaseq) end # Returns counts of each codon in the sequence in a hash. # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.codon_usage #=> {"gcg"=>1, "tga"=>1, "atg"=>1} # # This method does not validate codons! Any three letter group is a 'codon'. So, # # s = Bio::Sequence::NA.new('atggNNtga') # puts s.codon_usage #=> {"tga"=>1, "gnn"=>1, "atg"=>1} # # seq = Bio::Sequence::NA.new('atgg--tga') # puts s.codon_usage #=> {"tga"=>1, "g--"=>1, "atg"=>1} # # Also, there is no option to work in any frame other than the first. # --- # *Returns*:: Hash object def codon_usage hash = Hash.new(0) self.window_search(3, 3) do |codon| hash[codon] += 1 end return hash end # Calculate the ratio of GC / ATGC bases as a percentage rounded to # the nearest whole number. U is regarded as T. # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.gc_percent #=> 55 # --- # *Returns*:: Fixnum def gc_percent count = self.composition at = count['a'] + count['t'] + count['u'] gc = count['g'] + count['c'] return 0 if at + gc == 0 gc = 100 * gc / (at + gc) return gc end # Calculate the ratio of GC / ATGC bases. U is regarded as T. # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.gc_content #=> 0.555555555555556 # --- # *Returns*:: Float def gc_content count = self.composition at = count['a'] + count['t'] + count['u'] gc = count['g'] + count['c'] return 0.0 if at + gc == 0 return gc.quo(at + gc) end # Calculate the ratio of AT / ATGC bases. U is regarded as T. # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.at_content #=> 0.444444444444444 # --- # *Returns*:: Float def at_content count = self.composition at = count['a'] + count['t'] + count['u'] gc = count['g'] + count['c'] return 0.0 if at + gc == 0 return at.quo(at + gc) end # Calculate the ratio of (G - C) / (G + C) bases. # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.gc_skew #=> 0.6 # --- # *Returns*:: Float def gc_skew count = self.composition g = count['g'] c = count['c'] return 0.0 if g + c == 0 return (g - c).quo(g + c) end # Calculate the ratio of (A - T) / (A + T) bases. U is regarded as T. # # s = Bio::Sequence::NA.new('atgttgttgttc') # puts s.at_skew #=> -0.75 # --- # *Returns*:: Float def at_skew count = self.composition a = count['a'] t = count['t'] + count['u'] return 0.0 if a + t == 0 return (a - t).quo(a + t) end # Returns an alphabetically sorted array of any non-standard bases # (other than 'atgcu'). # # s = Bio::Sequence::NA.new('atgStgQccR') # puts s.illegal_bases #=> ["q", "r", "s"] # --- # *Returns*:: Array object def illegal_bases self.scan(/[^atgcu]/).sort.uniq end # Estimate molecular weight (using the values from BioPerl's # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module). # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.molecular_weight #=> 2841.00708 # # RNA and DNA do not have the same molecular weights, # # s = Bio::Sequence::NA.new('auggcguga') # puts s.molecular_weight #=> 2956.94708 # --- # *Returns*:: Float object def molecular_weight if self.rna? Bio::NucleicAcid.weight(self, true) else Bio::NucleicAcid.weight(self) end end # Create a ruby regular expression instance # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html] # # s = Bio::Sequence::NA.new('atggcgtga') # puts s.to_re #=> /atggcgtga/ # --- # *Returns*:: Regexp object def to_re if self.rna? Bio::NucleicAcid.to_re(self.dna, true) else Bio::NucleicAcid.to_re(self) end end # Generate the list of the names of each nucleotide along with the # sequence (full name). Names used in bioruby are found in the # Bio::AminoAcid::NAMES hash. # # s = Bio::Sequence::NA.new('atg') # puts s.names #=> ["Adenine", "Thymine", "Guanine"] # --- # *Returns*:: Array object def names array = [] self.each_byte do |x| array.push(Bio::NucleicAcid.names[x.chr.upcase]) end return array end # Returns a new sequence object with any 'u' bases changed to 't'. # The original sequence is not modified. # # s = Bio::Sequence::NA.new('augc') # puts s.dna #=> 'atgc' # puts s #=> 'augc' # --- # *Returns*:: new Bio::Sequence::NA object def dna self.tr('u', 't') end # Changes any 'u' bases in the original sequence to 't'. # The original sequence is modified. # # s = Bio::Sequence::NA.new('augc') # puts s.dna! #=> 'atgc' # puts s #=> 'atgc' # --- # *Returns*:: current Bio::Sequence::NA object (modified) def dna! self.tr!('u', 't') end # Returns a new sequence object with any 't' bases changed to 'u'. # The original sequence is not modified. # # s = Bio::Sequence::NA.new('atgc') # puts s.dna #=> 'augc' # puts s #=> 'atgc' # --- # *Returns*:: new Bio::Sequence::NA object def rna self.tr('t', 'u') end # Changes any 't' bases in the original sequence to 'u'. # The original sequence is modified. # # s = Bio::Sequence::NA.new('atgc') # puts s.dna! #=> 'augc' # puts s #=> 'augc' # --- # *Returns*:: current Bio::Sequence::NA object (modified) def rna! self.tr!('t', 'u') end def rna? self.index('u') end protected :rna? # Example: # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('EcoRI') # # _or_ # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('g^aattc') # --- # See Bio::RestrictionEnzyme::Analysis.cut def cut_with_enzyme(*args) Bio::RestrictionEnzyme::Analysis.cut(self, *args) end alias cut_with_enzymes cut_with_enzyme end # NA end # Sequence end # Bio bio-1.4.3.0001/lib/bio/sequence/dblink.rb0000644000004100000410000000242012200110570017600 0ustar www-datawww-data# # = bio/sequence/dblink.rb - sequence ID with database name # # Copyright:: Copyright (C) 2008 # Naohisa Goto # License:: The Ruby License # module Bio require 'bio/sequence' unless const_defined?(:Sequence) # Bio::Sequence::DBLink stores IDs with the database name. # Its main purpose is to store database cross-reference information # for a sequence entry. class Sequence::DBLink # creates a new DBLink object def initialize(database, primary_id, *secondary_ids) @database = database @id = primary_id @secondary_ids = secondary_ids end # Database name, or namespace identifier (String). attr_reader :database # Primary identifier (String) attr_reader :id # Secondary identifiers (Array of String) attr_reader :secondary_ids #-- # class methods #++ # Parses DR line in EMBL entry, and returns a DBLink object. def self.parse_embl_DR_line(str) str = str.sub(/\.\s*\z/, '') str.sub!(/\ADR /, '') self.new(*(str.split(/\s*\;\s*/, 3))) end # Parses DR line in UniProt entry, and returns a DBLink object. def self.parse_uniprot_DR_line(str) str = str.sub(/\.\s*\z/, '') str.sub!(/\ADR /, '') self.new(*(str.split(/\s*\;\s*/))) end end #class Sequence::DBLink end #module Bio bio-1.4.3.0001/lib/bio/sequence/common.rb0000644000004100000410000002206012200110570017627 0ustar www-datawww-data# # = bio/sequence/common.rb - common methods for biological sequence # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama , # Ryan Raaum # License:: The Ruby License # module Bio autoload :Locations, 'bio/location' unless const_defined?(:Locations) require 'bio/sequence' unless const_defined?(:Sequence) class Sequence # = DESCRIPTION # Bio::Sequence::Common is a # Mixin[http://www.rubycentral.com/book/tut_modules.html] # implementing methods common to # Bio::Sequence::AA and Bio::Sequence::NA. All of these methods # are available to either Amino Acid or Nucleic Acid sequences, and # by encapsulation are also available to Bio::Sequence objects. # # = USAGE # # # Create a sequence # dna = Bio::Sequence.auto('atgcatgcatgc') # # # Splice out a subsequence using a Genbank-style location string # puts dna.splice('complement(1..4)') # # # What is the base composition? # puts dna.composition # # # Create a random sequence with the composition of a current sequence # puts dna.randomize module Common # Return sequence as # String[http://corelib.rubyonrails.org/classes/String.html]. # The original sequence is unchanged. # # seq = Bio::Sequence::NA.new('atgc') # puts s.to_s #=> 'atgc' # puts s.to_s.class #=> String # puts s #=> 'atgc' # puts s.class #=> Bio::Sequence::NA # --- # *Returns*:: String object def to_s String.new(self) end alias to_str to_s # Create a new sequence based on the current sequence. # The original sequence is unchanged. # # s = Bio::Sequence::NA.new('atgc') # s2 = s.seq # puts s2 #=> 'atgc' # --- # *Returns*:: new Bio::Sequence::NA/AA object def seq self.class.new(self) end # Normalize the current sequence, removing all whitespace and # transforming all positions to uppercase if the sequence is AA or # transforming all positions to lowercase if the sequence is NA. # The original sequence is modified. # # s = Bio::Sequence::NA.new('atgc') # s.normalize! # --- # *Returns*:: current Bio::Sequence::NA/AA object (modified) def normalize! initialize(self) self end alias seq! normalize! # Add new data to the end of the current sequence. # The original sequence is modified. # # s = Bio::Sequence::NA.new('atgc') # s << 'atgc' # puts s #=> "atgcatgc" # s << s # puts s #=> "atgcatgcatgcatgc" # --- # *Returns*:: current Bio::Sequence::NA/AA object (modified) def concat(*arg) super(self.class.new(*arg)) end def <<(*arg) concat(*arg) end # Create a new sequence by adding to an existing sequence. # The existing sequence is not modified. # # s = Bio::Sequence::NA.new('atgc') # s2 = s + 'atgc' # puts s2 #=> "atgcatgc" # puts s #=> "atgc" # # The new sequence is of the same class as the existing sequence if # the new data was added to an existing sequence, # # puts s2.class == s.class #=> true # # but if an existing sequence is added to a String, the result is a String # # s3 = 'atgc' + s # puts s3.class #=> String # --- # *Returns*:: new Bio::Sequence::NA/AA *or* String object def +(*arg) self.class.new(super(*arg)) end # Returns a new sequence containing the subsequence identified by the # start and end numbers given as parameters. *Important:* Biological # sequence numbering conventions (one-based) rather than ruby's # (zero-based) numbering conventions are used. # # s = Bio::Sequence::NA.new('atggaatga') # puts s.subseq(1,3) #=> "atg" # # Start defaults to 1 and end defaults to the entire existing string, so # subseq called without any parameters simply returns a new sequence # identical to the existing sequence. # # puts s.subseq #=> "atggaatga" # --- # *Arguments*: # * (optional) _s_(start): Integer (default 1) # * (optional) _e_(end): Integer (default current sequence length) # *Returns*:: new Bio::Sequence::NA/AA object def subseq(s = 1, e = self.length) raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0 s -= 1 e -= 1 self[s..e] end # This method steps through a sequences in steps of 'step_size' by # subsequences of 'window_size'. Typically used with a block. # Any remaining sequence at the terminal end will be returned. # # Prints average GC% on each 100bp # # s.window_search(100) do |subseq| # puts subseq.gc # end # # Prints every translated peptide (length 5aa) in the same frame # # s.window_search(15, 3) do |subseq| # puts subseq.translate # end # # Split genome sequence by 10000bp with 1000bp overlap in fasta format # # i = 1 # remainder = s.window_search(10000, 9000) do |subseq| # puts subseq.to_fasta("segment #{i}", 60) # i += 1 # end # puts remainder.to_fasta("segment #{i}", 60) # --- # *Arguments*: # * (required) _window_size_: Fixnum # * (optional) _step_size_: Fixnum (default 1) # *Returns*:: new Bio::Sequence::NA/AA object def window_search(window_size, step_size = 1) last_step = 0 0.step(self.length - window_size, step_size) do |i| yield self[i, window_size] last_step = i end return self[last_step + window_size .. -1] end # Returns a float total value for the sequence given a hash of # base or residue values, # # values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4} # s = Bio::Sequence::NA.new('atgc') # puts s.total(values) #=> 1.0 # --- # *Arguments*: # * (required) _hash_: Hash object # *Returns*:: Float object def total(hash) hash.default = 0.0 unless hash.default sum = 0.0 self.each_byte do |x| begin sum += hash[x.chr] end end return sum end # Returns a hash of the occurrence counts for each residue or base. # # s = Bio::Sequence::NA.new('atgc') # puts s.composition #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1} # --- # *Returns*:: Hash object def composition count = Hash.new(0) self.scan(/./) do |x| count[x] += 1 end return count end # Returns a randomized sequence. The default is to retain the same # base/residue composition as the original. If a hash of base/residue # counts is given, the new sequence will be based on that hash # composition. If a block is given, each new randomly selected # position will be passed into the block. In all cases, the # original sequence is not modified. # # s = Bio::Sequence::NA.new('atgc') # puts s.randomize #=> "tcag" (for example) # # new_composition = {'a' => 2, 't' => 2} # puts s.randomize(new_composition) #=> "ttaa" (for example) # # count = 0 # s.randomize { |x| count += 1 } # puts count #=> 4 # --- # *Arguments*: # * (optional) _hash_: Hash object # *Returns*:: new Bio::Sequence::NA/AA object def randomize(hash = nil) if hash tmp = '' hash.each {|k, v| tmp += k * v.to_i } else tmp = self end seq = self.class.new(tmp) # Reference: http://en.wikipedia.org/wiki/Fisher-Yates_shuffle seq.length.downto(2) do |n| k = rand(n) c = seq[n - 1] seq[n - 1] = seq[k] seq[k] = c end if block_given? then (0...seq.length).each do |i| yield seq[i, 1] end return self.class.new('') else return seq end end # Return a new sequence extracted from the original using a GenBank style # position string. See also documentation for the Bio::Location class. # # s = Bio::Sequence::NA.new('atgcatgcatgcatgc') # puts s.splice('1..3') #=> "atg" # puts s.splice('join(1..3,8..10)') #=> "atgcat" # puts s.splice('complement(1..3)') #=> "cat" # puts s.splice('complement(join(1..3,8..10))') #=> "atgcat" # # Note that 'complement'ed Genbank position strings will have no # effect on Bio::Sequence::AA objects. # --- # *Arguments*: # * (required) _position_: String *or* Bio::Location object # *Returns*:: Bio::Sequence::NA/AA object def splice(position) unless position.is_a?(Locations) then position = Locations.new(position) end s = '' position.each do |location| if location.sequence s << location.sequence else exon = self.subseq(location.from, location.to) begin exon.complement! if location.strand < 0 rescue NameError end s << exon end end return self.class.new(s) end alias splicing splice end # Common end # Sequence end # Bio bio-1.4.3.0001/lib/bio/sequence/aa.rb0000644000004100000410000000612212200110570016721 0ustar www-datawww-data# # = bio/sequence/aa.rb - amino acid sequence class # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama , # Ryan Raaum # License:: The Ruby License # module Bio autoload :AminoAcid, 'bio/data/aa' unless const_defined?(:AminoAcid) require 'bio/sequence' unless const_defined?(:Sequence) class Sequence # = DESCRIPTION # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby. # # = USAGE # # Create an Amino Acid sequence. # aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU') # # # What are the three-letter codes for all the residues? # puts aa.codes # # # What are the names of all the residues? # puts aa.names # # # What is the molecular weight of this peptide? # puts aa.molecular_weight class AA < String include Bio::Sequence::Common # Generate an amino acid sequence object from a string. # # s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY") # # or maybe (if you have an amino acid sequence in a file) # # s = Bio::Sequence:AA.new(File.open('aa.txt').read) # # Amino Acid sequences are *always* all uppercase in bioruby # # s = Bio::Sequence::AA.new("rrLeHtfV") # puts s #=> "RRLEHTFVF" # # Whitespace is stripped from the sequence # # s = Bio::Sequence::AA.new("RRL\nELA\tRG\r RL") # puts s #=> "RRLELARGRL" # --- # *Arguments*: # * (required) _str_: String # *Returns*:: Bio::Sequence::AA object def initialize(str) super self.upcase! self.tr!(" \t\n\r",'') end # Estimate molecular weight based on # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101] # # s = Bio::Sequence::AA.new("RRLE") # puts s.molecular_weight #=> 572.655 # --- # *Returns*:: Float object def molecular_weight Bio::AminoAcid.weight(self) end # Create a ruby regular expression instance # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html] # # s = Bio::Sequence::AA.new("RRLE") # puts s.to_re #=> /RRLE/ # --- # *Returns*:: Regexp object def to_re Bio::AminoAcid.to_re(self) end # Generate the list of the names of each residue along with the # sequence (3 letters code). Codes used in bioruby are found in the # Bio::AminoAcid::NAMES hash. # # s = Bio::Sequence::AA.new("RRLE") # puts s.codes #=> ["Arg", "Arg", "Leu", "Glu"] # --- # *Returns*:: Array object def codes array = [] self.each_byte do |x| array.push(Bio::AminoAcid.names[x.chr]) end return array end # Generate the list of the names of each residue along with the # sequence (full name). Names used in bioruby are found in the # Bio::AminoAcid::NAMES hash. # # s = Bio::Sequence::AA.new("RRLE") # puts s.names # #=> ["arginine", "arginine", "leucine", "glutamic acid"] # --- # *Returns*:: Array object def names self.codes.map do |x| Bio::AminoAcid.names[x] end end end # AA end # Sequence end # Bio bio-1.4.3.0001/lib/bio/sequence/quality_score.rb0000644000004100000410000001353112200110570021225 0ustar www-datawww-data# # = bio/sequence/quality_score.rb - Sequence quality score manipulation modules # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # # == Description # # Sequence quality score manipulation modules, mainly used by Bio::Fastq # and related classes. # # == References # # * FASTQ format specification # http://maq.sourceforge.net/fastq.shtml # module Bio require 'bio/sequence' unless const_defined?(:Sequence) class Sequence # Bio::Sequence::QualityScore is a name space for quality score modules. # BioRuby internal use only (mainly from Bio::Fastq). module QualityScore # Converter methods between PHRED and Solexa quality scores. module Converter # Converts PHRED scores to Solexa scores. # # The values may be truncated or incorrect if overflows/underflows # occurred during the calculation. # --- # *Arguments*: # * (required) _scores_: (Array containing Integer) quality scores # *Returns*:: (Array containing Integer) quality scores def convert_scores_from_phred_to_solexa(scores) sc = scores.collect do |q| t = 10 ** (q / 10.0) - 1 t = Float::MIN if t < Float::MIN r = 10 * Math.log10(t) r.finite? ? r.round : r end sc end # Converts Solexa scores to PHRED scores. # # The values may be truncated if overflows/underflows occurred # during the calculation. # --- # *Arguments*: # * (required) _scores_: (Array containing Integer) quality scores # *Returns*:: (Array containing Integer) quality scores def convert_scores_from_solexa_to_phred(scores) sc = scores.collect do |q| r = 10 * Math.log10(10 ** (q / 10.0) + 1) r.finite? ? r.round : r end sc end # Does nothing and simply returns the given argument. # # --- # *Arguments*: # * (required) _scores_: (Array containing Integer) quality scores # *Returns*:: (Array containing Integer) quality scores def convert_nothing(scores) scores end end #module Converter # Bio::Sequence::QualityScore::Phred is a module having quality calculation # methods for the PHRED quality score. # # BioRuby internal use only (mainly from Bio::Fastq). module Phred include Converter # Type of quality scores. # --- # *Returns*:: (Symbol) the type of quality score. def quality_score_type :phred end # PHRED score to probability conversion. # --- # *Arguments*: # * (required) _scores_: (Array containing Integer) scores # *Returns*:: (Array containing Float) probabilities (0<=p<=1) def phred_q2p(scores) scores.collect do |q| r = 10 ** (- q / 10.0) if r > 1.0 then r = 1.0 #elsif r < 0.0 then # r = 0.0 end r end end alias q2p phred_q2p module_function :q2p public :q2p # Probability to PHRED score conversion. # # The values may be truncated or incorrect if overflows/underflows # occurred during the calculation. # --- # *Arguments*: # * (required) _probabilities_: (Array containing Float) probabilities # *Returns*:: (Array containing Float) scores def phred_p2q(probabilities) probabilities.collect do |p| p = Float::MIN if p < Float::MIN q = -10 * Math.log10(p) q.finite? ? q.round : q end end alias p2q phred_p2q module_function :p2q public :p2q alias convert_scores_from_phred convert_nothing alias convert_scores_to_phred convert_nothing alias convert_scores_from_solexa convert_scores_from_solexa_to_phred alias convert_scores_to_solexa convert_scores_from_phred_to_solexa module_function :convert_scores_to_solexa public :convert_scores_to_solexa end #module Phred # Bio::Sequence::QualityScore::Solexa is a module having quality # calculation methods for the Solexa quality score. # # BioRuby internal use only (mainly from Bio::Fastq). module Solexa include Converter # Type of quality scores. # --- # *Returns*:: (Symbol) the type of quality score. def quality_score_type :solexa end # Solexa score to probability conversion. # --- # *Arguments*: # * (required) _scores_: (Array containing Integer) scores # *Returns*:: (Array containing Float) probabilities def solexa_q2p(scores) scores.collect do |q| t = 10 ** (- q / 10.0) t /= (1.0 + t) if t > 1.0 then t = 1.0 #elsif t < 0.0 then # t = 0.0 end t end end alias q2p solexa_q2p module_function :q2p public :q2p # Probability to Solexa score conversion. # --- # *Arguments*: # * (required) _probabilities_: (Array containing Float) probabilities # *Returns*:: (Array containing Float) scores def solexa_p2q(probabilities) probabilities.collect do |p| t = p / (1.0 - p) t = Float::MIN if t < Float::MIN q = -10 * Math.log10(t) q.finite? ? q.round : q end end alias p2q solexa_p2q module_function :p2q public :p2q alias convert_scores_from_solexa convert_nothing alias convert_scores_to_solexa convert_nothing alias convert_scores_from_phred convert_scores_from_phred_to_solexa alias convert_scores_to_phred convert_scores_from_solexa_to_phred module_function :convert_scores_to_phred public :convert_scores_to_phred end #module Solexa end #module QualityScore end #class Sequence end #module Bio bio-1.4.3.0001/lib/bio/sequence/adapter.rb0000644000004100000410000000731412200110570017764 0ustar www-datawww-data# # = bio/sequence/adapter.rb - Bio::Sequence adapter helper module # # Copyright:: Copyright (C) 2008 # Naohisa Goto , # License:: The Ruby License # module Bio require 'bio/sequence' unless const_defined?(:Sequence) # Internal use only. Normal users should not use this module. # # Helper methods for defining adapters used when converting data classes to # Bio::Sequence class, with pseudo lazy evaluation and pseudo memoization. # # This module is used by using "extend", not "include". # module Sequence::Adapter autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence' autoload :EMBL, 'bio/db/embl/embl_to_biosequence' autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence' autoload :FastaNumericFormat, 'bio/db/fasta/qual_to_biosequence' autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence' autoload :SangerChromatogram, 'bio/db/sanger_chromatogram/chromatogram_to_biosequence' autoload :Fastq, 'bio/db/fastq/fastq_to_biosequence' private # Defines a reader attribute method with psudo lazy evaluation/memoization. # # It defines a method name like attr_reader, but at the first time # when the method name is called, it acts as follows: # When instance variable @name is not defined, # calls __get__name(@source_data) and stores the returned # value to @name, and changes its behavior to the same as # attr_reader :name. # When instance variable @name is already defined, # its behavior is changed to the same as # attr_reader :name. # When the object is frozen, storing to the instance variable and # changing methods behavior do not occur, and the value of # __get__name(@source_data) is returned. # # Note that it assumes that the source data object is stored in # @source_data instance variable. def attr_reader_lazy(name) #$stderr.puts "attr_reader_lazy :#{name}" varname = "@#{name}".intern methodname = "__get__#{name}".intern # module to reset method's behavior to normal attr_reader reset = "Attr_#{name}".intern const_set(reset, Module.new { attr_reader name }) reset_module_name = "#{self}::#{reset}" # define attr method module_eval <<__END_OF_DEF__ def #{name} unless defined? #{varname} then #$stderr.puts "LAZY #{name}: calling #{methodname}" val = #{methodname}(@source_data) #{varname} = val unless frozen? else val = #{varname} end unless frozen? then #$stderr.puts "LAZY #{name}: finalize: attr_reader :#{name}" self.extend(#{reset_module_name}) end val end __END_OF_DEF__ end # Defines a Bio::Sequence to Bio::* adapter method with # psudo lazy evaluation and psudo memoization. # # Without block, defines a private method __get__name(orig) # which calls source_method for @source_data. # # def__get__(name, source_method) is the same as: # def __get__name(orig); orig.source_method; end # attr_reader_lazy name # # If block is given, __get__name(orig) is defined # with the block. The @source_data is given as an argument of the block, # i.e. the block must get an argument. # def def_biosequence_adapter(name, source_method = name, &block) methodname = "__get__#{name}".intern if block then define_method(methodname, block) else module_eval <<__END_OF_DEF__ def #{methodname}(orig) orig.#{source_method} end __END_OF_DEF__ end private methodname attr_reader_lazy name true end end #module Sequence::Adapter end #module Bio bio-1.4.3.0001/lib/bio/sequence/compat.rb0000644000004100000410000000713112200110570017624 0ustar www-datawww-data# # = bio/sequence/compat.rb - methods for backward compatibility # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama , # Ryan Raaum # License:: The Ruby License # module Bio require 'bio/sequence' unless const_defined?(:Sequence) class Sequence # Return sequence as # String[http://corelib.rubyonrails.org/classes/String.html]. # The original sequence is unchanged. # # seq = Bio::Sequence.new('atgc') # puts s.to_s #=> 'atgc' # puts s.to_s.class #=> String # puts s #=> 'atgc' # puts s.class #=> Bio::Sequence # --- # *Returns*:: String object def to_s String.new(self.seq) end alias to_str to_s module Common # Bio::Sequence#to_fasta is DEPRECIATED # Do not use Bio::Sequence#to_fasta ! Use Bio::Sequence#output instead. # Note that Bio::Sequence::NA#to_fasta, Bio::Sequence::AA#to_fasata, # and Bio::Sequence::Generic#to_fasta can still be used, # because there are no alternative methods. # # Output the FASTA format string of the sequence. The 1st argument is # used as the comment string. If the 2nd option is given, the output # sequence will be folded. # --- # *Arguments*: # * (optional) _header_: String object # * (optional) _width_: Fixnum object (default nil) # *Returns*:: String def to_fasta(header = '', width = nil) warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG ">#{header}\n" + if width self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n") else self.to_s + "\n" end end end # Common class NA # Generate a new random sequence with the given frequency of bases. # The sequence length is determined by their cumulative sum. # (See also Bio::Sequence::Common#randomize which creates a new # randomized sequence object using the base composition of an existing # sequence instance). # # counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4} # puts Bio::Sequence::NA.randomize(counts) #=> "ggcttgttac" (for example) # # You may also feed the output of randomize into a block # # actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0} # Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1} # actual_counts #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4} # --- # *Arguments*: # * (optional) _hash_: Hash object # *Returns*:: Bio::Sequence::NA object def self.randomize(*arg, &block) self.new('').randomize(*arg, &block) end def pikachu #:nodoc: self.dna.tr("atgc", "pika") # joke, of course :-) end end # NA class AA # Generate a new random sequence with the given frequency of bases. # The sequence length is determined by their cumulative sum. # (See also Bio::Sequence::Common#randomize which creates a new # randomized sequence object using the base composition of an existing # sequence instance). # # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4} # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example) # # You may also feed the output of randomize into a block # # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0} # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1} # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1} # --- # *Arguments*: # * (optional) _hash_: Hash object # *Returns*:: Bio::Sequence::AA object def self.randomize(*arg, &block) self.new('').randomize(*arg, &block) end end # AA end # Sequence end # Bio bio-1.4.3.0001/lib/bio/sequence/generic.rb0000644000004100000410000000061412200110570017754 0ustar www-datawww-data# # = bio/sequence/generic.rb - generic sequence class to store an intact string # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama # License:: The Ruby License # module Bio require 'bio/sequence' unless const_defined?(:Sequence) class Sequence class Generic < String #:nodoc: include Bio::Sequence::Common end # Generic end # Sequence end # Bio bio-1.4.3.0001/lib/bio/sequence/format.rb0000644000004100000410000002463512200110570017641 0ustar www-datawww-data# # = bio/sequence/format.rb - various output format of the biological sequence # # Copyright:: Copyright (C) 2006-2008 # Toshiaki Katayama , # Naohisa Goto , # Ryan Raaum , # Jan Aerts # License:: The Ruby License # require 'erb' module Bio class Sequence # = DESCRIPTION # A Mixin[http://www.rubycentral.com/book/tut_modules.html] # of methods used by Bio::Sequence#output to output sequences in # common bioinformatic formats. These are not called in isolation. # # = USAGE # # Given a Bio::Sequence object, # puts s.output(:fasta) # puts s.output(:genbank) # puts s.output(:embl) module Format # Repository of generic (or both nucleotide and protein) sequence # formatter classes module Formatter # Raw format generatar autoload :Raw, 'bio/sequence/format_raw' # Fasta format generater autoload :Fasta, 'bio/db/fasta/format_fasta' # NCBI-style Fasta format generatar # (resemble to EMBOSS "ncbi" format) autoload :Fasta_ncbi, 'bio/db/fasta/format_fasta' # FASTQ "fastq-sanger" format generator autoload :Fastq, 'bio/db/fastq/format_fastq' # FASTQ "fastq-sanger" format generator autoload :Fastq_sanger, 'bio/db/fastq/format_fastq' # FASTQ "fastq-solexa" format generator autoload :Fastq_solexa, 'bio/db/fastq/format_fastq' # FASTQ "fastq-illumina" format generator autoload :Fastq_illumina, 'bio/db/fastq/format_fastq' # FastaNumericFormat format generator autoload :Fasta_numeric, 'bio/db/fasta/format_qual' # Qual format generator. # Its format is the same as Fasta_numeric, but it would perform # to convert quality score or generates scores from error probability. autoload :Qual, 'bio/db/fasta/format_qual' end #module Formatter # Repository of nucleotide sequence formatter classes module NucFormatter # GenBank format generater # Note that the name is 'Genbank' and NOT 'GenBank' autoload :Genbank, 'bio/db/genbank/format_genbank' # EMBL format generater # Note that the name is 'Embl' and NOT 'EMBL' autoload :Embl, 'bio/db/embl/format_embl' end #module NucFormatter # Repository of protein sequence formatter classes module AminoFormatter # currently no formats available end #module AminoFormatter # Formatter base class. # Any formatter class should inherit this class. class FormatterBase # Returns a formatterd string of the given sequence # --- # *Arguments*: # * (required) _sequence_: Bio::Sequence object # * (optional) _options_: a Hash object # *Returns*:: String object def self.output(sequence, options = {}) self.new(sequence, options).output end # register new Erb template def self.erb_template(str) erb = ERB.new(str) erb.def_method(self, 'output') true end private_class_method :erb_template # generates output data # --- # *Returns*:: String object def output raise NotImplementedError, 'should be implemented in subclass' end # creates a new formatter object for output def initialize(sequence, options = {}) @sequence = sequence @options = options end private # any unknown methods are delegated to the sequence object def method_missing(sym, *args, &block) #:nodoc: begin @sequence.__send__(sym, *args, &block) rescue NoMethodError => evar lineno = __LINE__ - 2 file = __FILE__ bt_here = [ "#{file}:#{lineno}:in \`__send__\'", "#{file}:#{lineno}:in \`method_missing\'" ] if bt_here == evar.backtrace[0, 2] then bt = evar.backtrace[2..-1] evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}") evar.set_backtrace(bt) end raise(evar) end end end #class FormatterBase # Using Bio::Sequence::Format, return a String with the Bio::Sequence # object formatted in the given style. # # Formats currently implemented are: 'fasta', 'genbank', and 'embl' # # s = Bio::Sequence.new('atgc') # puts s.output(:fasta) #=> "> \natgc\n" # # The style argument is given as a Ruby # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html) # --- # *Arguments*: # * (required) _format_: :fasta, :genbank, *or* :embl # *Returns*:: String object def output(format = :fasta, options = {}) formatter_const = format.to_s.capitalize.intern formatter_class = nil get_formatter_repositories.each do |mod| begin formatter_class = mod.const_get(formatter_const) rescue NameError end break if formatter_class end unless formatter_class then raise "unknown format name #{format.inspect}" end formatter_class.output(self, options) end # Returns a list of available output formats for the sequence # --- # *Arguments*: # *Returns*:: Array of Symbols def list_output_formats a = get_formatter_repositories.collect { |mod| mod.constants } a.flatten! a.collect! { |x| x.to_s.downcase.intern } a end # The same as output(:fasta, :header=>definition, :width=>width) # This method is intended to replace Bio::Sequence#to_fasta. # # s = Bio::Sequence.new('atgc') # puts s.output_fasta #=> "> \natgc\n" # --- # *Arguments*: # * (optional) _definition_: (String) definition line # * (optional) _width_: (Integer) width (default 70) # *Returns*:: String object def output_fasta(definition = nil, width = 70) output(:fasta, :header=> definition, :width => width) end private # returns formatter repository modules def get_formatter_repositories if self.moltype == Bio::Sequence::NA then [ NucFormatter, Formatter ] elsif self.moltype == Bio::Sequence::AA then [ AminoFormatter, Formatter ] else [ NucFormatter, AminoFormatter, Formatter ] end end #--- # Not yet implemented :) # Remove the nodoc command after implementation! # --- # *Returns*:: String object #def format_gff #:nodoc: # raise NotImplementedError #end #+++ # Formatting helper methods for INSD (NCBI, EMBL, DDBJ) feature table module INSDFeatureHelper private # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any # case, it would be difficult to successfully call this method outside # its expected context). # # Output the Genbank feature format string of the sequence. # Used in Bio::Sequence#output. # --- # *Returns*:: String object def format_features_genbank(features) prefix = ' ' * 5 indent = prefix + ' ' * 16 fwidth = 79 - indent.length format_features(features, prefix, indent, fwidth) end # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any # case, it would be difficult to successfully call this method outside # its expected context). # # Output the EMBL feature format string of the sequence. # Used in Bio::Sequence#output. # --- # *Returns*:: String object def format_features_embl(features) prefix = 'FT ' indent = prefix + ' ' * 16 fwidth = 80 - indent.length format_features(features, prefix, indent, fwidth) end # format INSD featurs def format_features(features, prefix, indent, width) result = [] features.each do |feature| result.push format_feature(feature, prefix, indent, width) end return result.join('') end # format an INSD feature def format_feature(feature, prefix, indent, width) result = prefix + sprintf("%-16s", feature.feature) position = feature.position #position = feature.locations.to_s result << wrap_and_split_lines(position, width).join("\n" + indent) result << "\n" result << format_qualifiers(feature.qualifiers, indent, width) return result end # format qualifiers def format_qualifiers(qualifiers, indent, width) qualifiers.collect do |qualifier| q = qualifier.qualifier v = qualifier.value.to_s if v == true lines = wrap_with_newline('/' + q, width) elsif q == 'translation' lines = fold("/#{q}=\"#{v}\"", width) else if v[/\D/] or q == 'chromosome' #v.delete!("\x00-\x1f\x7f-\xff") v.gsub!(/"/, '""') v = '"' + v + '"' end lines = wrap_with_newline('/' + q + '=' + v, width) end lines.gsub!(/^/, indent) lines end.join end def fold(str, width) str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n") end def fold_and_split_lines(str, width) str.scan(Regexp.new(".{1,#{width}}")) end def wrap_and_split_lines(str, width) result = [] lefts = str.chomp.split(/(?:\r\n|\r|\n)/) lefts.each do |left| left.rstrip! while left and left.length > width line = nil width.downto(1) do |i| if left[i..i] == ' ' or /[\,\;]/ =~ left[(i-1)..(i-1)] then line = left[0..(i-1)].sub(/ +\z/, '') left = left[i..-1].sub(/\A +/, '') break end end if line.nil? then line = left[0..(width-1)] left = left[width..-1] end result << line left = nil if left.to_s.empty? end result << left if left end return result end def wrap_with_newline(str, width) result = wrap_and_split_lines(str, width) result_string = result.join("\n") result_string << "\n" unless result_string.empty? return result_string end def wrap(str, width = 80, prefix = '') actual_width = width - prefix.length result = wrap_and_split_lines(str, actual_width) result_string = result.join("\n#{prefix}") result_string = prefix + result_string unless result_string.empty? return result_string end #-- # internal use only MonthStr = [ nil, 'JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC' ].collect { |x| x.freeze }.freeze #++ # formats a date from Date, DateTime, or Time object, or String. def format_date(d) begin yy = d.year mm = d.month dd = d.day rescue NoMethodError, NameError, ArgumentError, TypeError return sprintf("%-11s", d) end sprintf("%02d-%-3s-%04d", dd, MonthStr[mm], yy) end # null date def null_date Date.new(0, 1, 1) end end #module INSDFeatureHelper end #module Format end #class Sequence end #module Bio bio-1.4.3.0001/lib/bio/sequence/sequence_masker.rb0000644000004100000410000000600112200110570021506 0ustar www-datawww-data# # = bio/sequence/sequence_masker.rb - Sequence masking helper methods # # Copyright:: Copyright (C) 2010 # Naohisa Goto # License:: The Ruby License # # == Description # # Bio::Sequence::SequenceMasker is a mix-in module to provide helpful # methods for masking a sequence. # # For details, see documentation of Bio::Sequence::SequenceMasker. # module Bio require 'bio/sequence' unless const_defined?(:Sequence) class Sequence # Bio::Sequence::SequenceMasker is a mix-in module to provide helpful # methods for masking a sequence. # # It is only expected to be included in Bio::Sequence. # In the future, methods in this module might be moved to # Bio::Sequence or other module and this module might be removed. # Please do not depend on this module. # module SequenceMasker # Masks the sequence with each value in the enum. # The enum should be an array or enumerator. # A block must be given. # When the block returns true, the sequence is masked with # mask_char. # --- # *Arguments*: # * (required) enum : Enumerator # * (required) mask_char : (String) character used for masking # *Returns*:: Bio::Sequence object def mask_with_enumerator(enum, mask_char) offset = 0 unit = mask_char.length - 1 s = self.seq.class.new(self.seq) j = 0 enum.each_with_index do |item, index| if yield item then j = index + offset if j < s.length then s[j, 1] = mask_char offset += unit end end end newseq = self.dup newseq.seq = s newseq end # Masks low quality sequence regions. # For each sequence position, if the quality score is smaller than # the threshold, the sequence in the position is replaced with # mask_char. # # Note: This method does not care quality_score_type. # --- # *Arguments*: # * (required) threshold : (Numeric) threshold # * (required) mask_char : (String) character used for masking # *Returns*:: Bio::Sequence object def mask_with_quality_score(threshold, mask_char) scores = self.quality_scores || [] mask_with_enumerator(scores, mask_char) do |item| item < threshold end end # Masks high error-probability sequence regions. # For each sequence position, if the error probability is larger than # the threshold, the sequence in the position is replaced with # mask_char. # # --- # *Arguments*: # * (required) threshold : (Numeric) threshold # * (required) mask_char : (String) character used for masking # *Returns*:: Bio::Sequence object def mask_with_error_probability(threshold, mask_char) values = self.error_probabilities || [] mask_with_enumerator(values, mask_char) do |item| item > threshold end end end #module SequenceMasker end #class Sequence end #module Bio bio-1.4.3.0001/lib/bio/feature.rb0000644000004100000410000000765512200110570016177 0ustar www-datawww-data# # = bio/feature.rb - Features/Feature class (GenBank Feature table) # # Copyright:: Copyright (c) 2002, 2005 Toshiaki Katayama # 2006 Jan Aerts # License:: The Ruby License # module Bio autoload :Locations, 'bio/location' unless const_defined?(:Locations) # = DESCRIPTION # Container for the sequence annotation. # # = USAGE # # Create a Bio::Feature object. # # For example: the GenBank-formatted entry in genbank for accession M33388 # # contains the following feature: # # exon 1532..1799 # # /gene="CYP2D6" # # /note="cytochrome P450 IID6; GOO-132-127" # # /number="1" # feature = Bio::Feature.new('exon','1532..1799') # feature.append(Bio::Feature::Qualifier.new('gene', 'CYP2D6')) # feature.append(Bio::Feature::Qualifier.new('note', 'cytochrome P450 IID6')) # feature.append(Bio::Feature::Qualifier.new('number', '1')) # # # or all in one go: # feature2 = Bio::Feature.new('exon','1532..1799', # [ Bio::Feature::Qualifier.new('gene', 'CYP2D6'), # Bio::Feature::Qualifier.new('note', 'cytochrome P450 IID6; GOO-132-127'), # Bio::Feature::Qualifier.new('number', '1') # ]) # # # Print the feature # puts feature.feature + "\t" + feature.position # feature.each do |qualifier| # puts "- " + qualifier.qualifier + ": " + qualifier.value # end # # = REFERENCES # INSD feature table definition:: http://www.ddbj.nig.ac.jp/FT/full_index.html class Feature # Create a new Bio::Feature object. # *Arguments*: # * (required) _feature_: type of feature (e.g. "exon") # * (required) _position_: position of feature (e.g. "complement(1532..1799)") # * (opt) _qualifiers_: list of Bio::Feature::Qualifier objects (default: []) # *Returns*:: Bio::Feature object def initialize(feature = '', position = '', qualifiers = []) @feature, @position, @qualifiers = feature, position, qualifiers end # Returns type of feature in String (e.g 'CDS', 'gene') attr_accessor :feature # Returns position of the feature in String (e.g. 'complement(123..146)') attr_accessor :position # Returns an Array of Qualifier objects. attr_accessor :qualifiers # Returns a Bio::Locations object translated from the position string. def locations Locations.new(@position) end # Appends a Qualifier object to the Feature. # # *Arguments*: # * (required) _qualifier_: Bio::Feature::Qualifier object # *Returns*:: Bio::Feature object def append(a) @qualifiers.push(a) if a.is_a? Qualifier return self end # Iterates on each qualifier object. # # *Arguments*: # * (optional) _key_: if specified, only iterates over qualifiers with this key def each(arg = nil) @qualifiers.each do |x| next if arg and x.qualifier != arg yield x end end # Returns a Hash constructed from qualifier objects. def assoc STDERR.puts "Bio::Feature#assoc is deprecated, use Bio::Feature#to_hash instead" if $DEBUG hash = Hash.new @qualifiers.each do |x| hash[x.qualifier] = x.value end return hash end # Returns a Hash constructed from qualifier objects. def to_hash hash = Hash.new @qualifiers.each do |x| hash[x.qualifier] ||= [] hash[x.qualifier] << x.value end return hash end # Short cut for the Bio::Feature#to_hash[key] def [](key) self.to_hash[key] end # Container for qualifier-value pairs for sequence features. class Qualifier # Creates a new Bio::Feature::Qualifier object # # *Arguments*: # * (required) _key_: key of the qualifier (e.g. "gene") # * (required) _value_: value of the qualifier (e.g. "CYP2D6") # *Returns*:: Bio::Feature::Qualifier object def initialize(key, value) @qualifier, @value = key, value end # Qualifier name in String attr_reader :qualifier # Qualifier value in String attr_reader :value end #Qualifier end #Feature end # Bio bio-1.4.3.0001/lib/bio/tree.rb0000644000004100000410000006367512200110570015507 0ustar www-datawww-data# # = bio/tree.rb - phylogenetic tree data structure class # # Copyright:: Copyright (C) 2006 # Naohisa Goto # License:: The Ruby License # # require 'matrix' require 'bio/pathway' module Bio # This is the class for phylogenetic tree. # It stores a phylogenetic tree. # # Internally, it is based on Bio::Pathway class. # However, users cannot handle Bio::Pathway object directly. # # This is alpha version. Incompatible changes may be made frequently. class Tree # Error when there are no path between specified nodes class NoPathError < RuntimeError; end # Edge object of each node. # By default, the object doesn't contain any node information. class Edge # creates a new edge. def initialize(distance = nil) if distance.kind_of?(Numeric) self.distance = distance elsif distance self.distance_string = distance end end # evolutionary distance attr_reader :distance # evolutionary distance represented as a string attr_reader :distance_string # set evolutionary distance value def distance=(num) @distance = num @distance_string = (num ? num.to_s : num) end # set evolutionary distance value from a string def distance_string=(str) if str.to_s.strip.empty? @distance = nil @distance_string = str else @distance = str.to_f @distance_string = str end end # visualization of this object def inspect "" end # string representation of this object def to_s @distance_string.to_s end #--- # methods for NHX (New Hampshire eXtended) and/or PhyloXML #+++ # log likelihood value (:L in NHX) attr_accessor :log_likelihood # width of the edge # ( of PhyloXML, or :W="w" in NHX) attr_accessor :width # Other NHX parameters. Returns a Hash. # Note that :L and :W # are not stored here but stored in the proper attributes in this class. # However, if you force to set these parameters in this hash, # the parameters in this hash are preferred when generating NHX. # In addition, If the same parameters are defined at Node object, # the parameters in the node are preferred. def nhx_parameters @nhx_parameters ||= {} @nhx_parameters end end #class Edge # Gets distance value from the given edge. # Returns float or any other numeric value or nil. def get_edge_distance(edge) begin dist = edge.distance rescue NoMethodError dist = edge end dist end # Gets distance string from the given edge. # Returns a string or nil. def get_edge_distance_string(edge) begin dist = edge.distance_string rescue NoMethodError dist = (edge ? edge.to_s : nil) end dist end # Returns edge1 + edge2 def get_edge_merged(edge1, edge2) dist1 = get_edge_distance(edge1) dist2 = get_edge_distance(edge2) if dist1 and dist2 then Edge.new(dist1 + dist2) elsif dist1 then Edge.new(dist1) elsif dist2 then Edge.new(dist2) else Edge.new end end # Node object. class Node # Creates a new node. def initialize(name = nil) @name = name if name end # name of the node attr_accessor :name # bootstrap value attr_reader :bootstrap # bootstrap value as a string attr_reader :bootstrap_string # sets a bootstrap value def bootstrap=(num) @bootstrap_string = (num ? num.to_s : num) @bootstrap = num end # sets a bootstrap value from a string def bootstrap_string=(str) if str.to_s.strip.empty? @bootstrap = nil @bootstrap_string = str else i = str.to_i f = str.to_f @bootstrap = (i == f ? i : f) @bootstrap_string = str end end # visualization of this object def inspect if @name and !@name.empty? then str = "(Node:#{@name.inspect}" else str = sprintf('(Node:%x', (self.__id__ << 1) & 0xffffffff) end if defined?(@bootstrap) and @bootstrap then str += " bootstrap=#{@bootstrap.inspect}" end str += ")" str end # string representation of this object def to_s @name.to_s end # the order of the node # (lower value, high priority) attr_accessor :order_number #--- # methods for NHX (New Hampshire eXtended) and/or PhyloXML #+++ # Phylogenetic events. # Returns an Array of one (or more?) of the following symbols # :gene_duplication # :speciation def events @events ||= [] @events end # EC number (EC_number in PhyloXML, or :E in NHX) attr_accessor :ec_number # scientific name (scientific_name in PhyloXML, or :S in NHX) attr_accessor :scientific_name # taxonomy identifier (taxonomy_identifier in PhyloXML, or :T in NHX) attr_accessor :taxonomy_id # Other NHX parameters. Returns a Hash. # Note that :D, :E, :S, and :T # are not stored here but stored in the proper attributes in this class. # However, if you force to set these parameters in this hash, # the parameters in this hash are preferred when generating NHX. def nhx_parameters @nhx_parameters ||= {} @nhx_parameters end end #class Node # Gets node name def get_node_name(node) begin node.name rescue NoMethodError node.to_s end end def get_node_bootstrap(node) begin node.bootstrap rescue NoMethodError nil end end def get_node_bootstrap_string(node) begin node.bootstrap_string rescue NoMethodError nil end end # Creates a new phylogenetic tree. # When no arguments are given, it creates a new empty tree. # When a Tree object is given, it copies the tree. # Note that the new tree shares Node and Edge objects # with the given tree. def initialize(tree = nil) # creates an undirected adjacency list graph @pathway = Bio::Pathway.new([], true) @root = nil @options = {} _init_cache self.concat(tree) if tree end # (private) clear internal cache def _init_cache @cache_parent = {} end private :_init_cache # (private) clear internal cache def _clear_cache @cache_parent.clear end private :_clear_cache # root node of this tree # (even if unrooted tree, it is used by some methods) attr_accessor :root # tree options; mainly used for tree output attr_accessor :options # Clears all nodes and edges. # Returns self. # Note that options and root are also cleared. def clear initialize self end # Returns all nodes as an array. def nodes @pathway.graph.keys end # Number of nodes. def number_of_nodes @pathway.nodes end # Iterates over each node of this tree. def each_node(&x) #:yields: node @pathway.graph.each_key(&x) self end # Iterates over each edges of this tree. def each_edge #:yields: source, target, edge @pathway.relations.each do |rel| yield rel.node[0], rel.node[1], rel.relation end self end # Returns all edges an array of [ node0, node1, edge ] def edges @pathway.relations.collect do |rel| [ rel.node[0], rel.node[1], rel.relation ] end end # Returns number of edges in the tree. def number_of_edges @pathway.relations.size end # Returns an array of adjacent nodes of the given node. def adjacent_nodes(node) h = @pathway.graph[node] h ? h.keys : [] end # Returns all connected edges with adjacent nodes. # Returns an array of the array [ source, target, edge ]. # # The reason why the method name is "out_edges" is that # it comes from the Boost Graph Library. def out_edges(source) h = @pathway.graph[source] if h h.collect { |key, val| [ source, key, val ] } else [] end end # Iterates over each connected edges of the given node. # Returns self. # # The reason why the method name is "each_out_edge" is that # it comes from the Boost Graph Library. def each_out_edge(source) #:yields: source, target, edge h = @pathway.graph[source] h.each { |key, val| yield source, key, val } if h self end # Returns number of edges in the given node. # # The reason why the method name is "out_degree" is that # it comes from the Boost Graph Library. def out_degree(source) h = @pathway.graph[source] h ? h.size : 0 end # Returns an edge from source to target. # If source and target are not adjacent nodes, returns nil. def get_edge(source, target) h = @pathway.graph[source] h ? h[target] : nil end # Adds a new edge to the tree. # Returns the newly added edge. # If the edge already exists, it is overwritten with new one. def add_edge(source, target, edge = Edge.new) _clear_cache @pathway.append(Bio::Relation.new(source, target, edge)) edge end # Finds a node in the tree by given name and returns the node. # If the node does not found, returns nil. # If multiple nodes with the same name exist, # the result would be one of those (unspecified). def get_node_by_name(str) self.each_node do |node| if get_node_name(node) == str return node end end nil end # Adds a node to the tree. # Returns self. # If the node already exists, it does nothing. def add_node(node) _clear_cache @pathway.graph[node] ||= {} self end # If the node exists, returns true. # Otherwise, returns false. def include?(node) @pathway.graph[node] ? true : false end # Removes all edges connected with the node. # Returns self. # If the node does not exist, raises IndexError. def clear_node(node) unless self.include?(node) raise IndexError, 'the node does not exist' end _clear_cache @pathway.relations.delete_if do |rel| rel.node.include?(node) end @pathway.graph[node].each_key do |k| @pathway.graph[k].delete(node) end @pathway.graph[node].clear self end # Removes the given node from the tree. # All edges connected with the node are also removed. # Returns self. # If the node does not exist, raises IndexError. def remove_node(node) #_clear_cache #done in clear_node(node) self.clear_node(node) @pathway.graph.delete(node) self end # Removes each node if the block returns not nil. # All edges connected with the removed nodes are also removed. # Returns self. def remove_node_if #_clear_cache #done in clear_node(node) all = self.nodes all.each do |node| if yield node then self.clear_node(node) @pathway.graph.delete(node) end end self end # Removes an edge between source and target. # Returns self. # If the edge does not exist, raises IndexError. #--- # If two or more edges exists between source and target, # all of them are removed. #+++ def remove_edge(source, target) unless self.get_edge(source, target) then raise IndexError, 'edge not found' end _clear_cache fwd = [ source, target ] rev = [ target, source ] @pathway.relations.delete_if do |rel| rel.node == fwd or rel.node == rev end h = @pathway.graph[source] h.delete(target) if h h = @pathway.graph[target] h.delete(source) if h self end # Removes each edge if the block returns not nil. # Returns self. def remove_edge_if #:yields: source, target, edge _clear_cache removed_rel = [] @pathway.relations.delete_if do |rel| if yield rel.node[0], rel.node[1], rel.edge then removed_rel << rel true end end removed_rel.each do |rel| source = rel.node[0] target = rel.node[1] h = @pathway.graph[source] h.delete(target) if h h = @pathway.graph[target] h.delete(source) if h end self end # Replaces each node by each block's return value. # Returns self. def collect_node! #:yields: node _clear_cache tr = {} self.each_node do |node| tr[node] = yield node end # replaces nodes in @pathway.relations @pathway.relations.each do |rel| rel.node.collect! { |node| tr[node] } end # re-generates @pathway from relations @pathway.to_list # adds orphan nodes tr.each_value do |newnode| @pathway.graph[newnode] ||= {} end self end # Replaces each edge by each block's return value. # Returns self. def collect_edge! #:yields: source, target, edge _clear_cache @pathway.relations.each do |rel| newedge = yield rel.node[0], rel.node[1], rel.relation rel.edge = newedge @pathway.append(rel, false) end self end # Gets the sub-tree consisted of given nodes. # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. # Returns a Tree object. # Note that the sub-tree shares Node and Edge objects # with the original tree. def subtree(nodes) nodes = nodes.find_all do |x| @pathway.graph[x] end return self.class.new if nodes.empty? # creates subtree new_tree = self.class.new nodes.each do |x| new_tree.add_node(x) end self.each_edge do |node1, node2, edge| if new_tree.include?(node1) and new_tree.include?(node2) then new_tree.add_edge(node1, node2, edge) end end return new_tree end # Gets the sub-tree consisted of given nodes and # all internal nodes connected between given nodes. # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. # Returns a Tree object. # The result is unspecified for cyclic trees. # Note that the sub-tree shares Node and Edge objects # with the original tree. def subtree_with_all_paths(nodes) hash = {} nodes.each { |x| hash[x] = true } nodes.each_index do |i| node1 = nodes[i] (0...i).each do |j| node2 = nodes[j] unless node1 == node2 then begin path = self.path(node1, node2) rescue IndexError, NoPathError path = [] end path.each { |x| hash[x] = true } end end end self.subtree(hash.keys) end # Concatenates the other tree. # If the same edge exists, the edge in _other_ is used. # Returns self. # The result is unspecified if _other_ isn't a Tree object. # Note that the Node and Edge objects in the _other_ tree are # shared in the concatinated tree. def concat(other) #raise TypeError unless other.kind_of?(self.class) _clear_cache other.each_node do |node| self.add_node(node) end other.each_edge do |node1, node2, edge| self.add_edge(node1, node2, edge) end self end # Gets path from node1 to node2. # Retruns an array of nodes, including node1 and node2. # If node1 and/or node2 do not exist, IndexError is raised. # If node1 and node2 are not connected, NoPathError is raised. # The result is unspecified for cyclic trees. def path(node1, node2) raise IndexError, 'node1 not found' unless @pathway.graph[node1] raise IndexError, 'node2 not found' unless @pathway.graph[node2] return [ node1 ] if node1 == node2 return [ node1, node2 ] if @pathway.graph[node1][node2] step, path = @pathway.bfs_shortest_path(node1, node2) unless path[0] == node1 and path[-1] == node2 then raise NoPathError, 'node1 and node2 are not connected' end path end # Iterates over each edge from node1 to node2. # The result is unspecified for cyclic trees. def each_edge_in_path(node1, node2) path = self.path(node1, node2) source = path.shift path.each do |target| edge = self.get_edge(source, target) yield source, target, edge source = target end self end # Returns distance between node1 and node2. # It would raise error if the edges didn't contain distance values. # The result is unspecified for cyclic trees. def distance(node1, node2) distance = 0 self.each_edge_in_path(node1, node2) do |source, target, edge| distance += get_edge_distance(edge) end distance end # (private) get parent only by using cache def _get_cached_parent(node, root) @cache_parent[root] ||= Hash.new cache = @cache_parent[root] if node == root then unless cache.has_key?(root) then self.adjacent_nodes(root).each do |n| cache[n] ||= root if n != root end cache[root] = nil end parent = nil else unless parent = cache[node] then parent = self.adjacent_nodes(node).find { |n| (m = cache[n]) && (m != node) } _cache_parent(node, parent, root) if parent end parent end end private :_get_cached_parent # (private) set parent cache def _cache_parent(node, parent, root) return unless parent cache = @cache_parent[root] cache[node] = parent self.adjacent_nodes(node).each do |n| cache[n] ||= node if n != parent end end private :_cache_parent # Gets the parent node of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an Node object or nil. # The result is unspecified for cyclic trees. def parent(node, root = nil) root ||= @root raise IndexError, 'can not get parent for unrooted tree' unless root unless ret = _get_cached_parent(node, root) then ret = self.path(root, node)[-2] _cache_parent(node, ret, root) end ret end # Gets the adjacent children nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def children(node, root = nil) root ||= @root c = self.adjacent_nodes(node) c.delete(self.parent(node, root)) c end # Gets all descendent nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def descendents(node, root = nil) root ||= @root distance, route = @pathway.breadth_first_search(root) d = distance[node] result = [] distance.each do |key, val| if val > d then x = key while x = route[x] if x == node then result << key break end break if distance[x] <= d end end end result end # If _node_ is nil, returns an array of # all leaves (nodes connected with one edge). # Otherwise, gets all descendent leaf nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def leaves(node = nil, root = nil) unless node then nodes = [] self.each_node do |x| nodes << x if self.out_degree(x) == 1 end return nodes else root ||= @root self.descendents(node, root).find_all do |x| self.adjacent_nodes(x).size == 1 end end end # Gets all ancestral nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def ancestors(node, root = nil) root ||= @root (self.path(root, node) - [ node ]).reverse end # Gets the lowest common ancestor of the two nodes. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns a Node object or nil. # The result is unspecified for cyclic trees. def lowest_common_ancestor(node1, node2, root = nil) root ||= @root distance, route = @pathway.breadth_first_search(root) x = node1; r1 = [] begin; r1 << x; end while x = route[x] x = node2; r2 = [] begin; r2 << x; end while x = route[x] return (r1 & r2).first end # Returns total distance of all edges. # It would raise error if some edges didn't contain distance values. def total_distance distance = 0 self.each_edge do |source, target, edge| distance += get_edge_distance(edge) end distance end # Calculates distance matrix of given nodes. # If _nodes_ is nil, or is ommited, it acts the same as # tree.distance_matrix(tree.leaves). # Returns a matrix object. # The result is unspecified for cyclic trees. # Note 1: The diagonal values of the matrix are 0. # Note 2: If the distance cannot be calculated, nil will be set. def distance_matrix(nodes = nil) nodes ||= self.leaves matrix = [] nodes.each_index do |i| row = [] nodes.each_index do |j| if i == j then distance = 0 elsif r = matrix[j] and val = r[i] then distance = val else distance = (self.distance(nodes[i], nodes[j]) rescue nil) end row << distance end matrix << row end Matrix.rows(matrix, false) end # Shows the adjacency matrix representation of the tree. # It shows matrix only for given nodes. # If _nodes_ is nil or is ommitted, # it acts the same as tree.adjacency_matrix(tree.nodes). # If a block is given, for each edge, # it yields _source_, _target_, and _edge_, and # uses the returned value of the block. # Without blocks, it uses edge. # Returns a matrix object. def adjacency_matrix(nodes = nil, default_value = nil, diagonal_value = nil) #:yields: source, target, edge nodes ||= self.nodes size = nodes.size hash = {} nodes.each_with_index { |x, i| hash[x] = i } # prepares an matrix matrix = Array.new(size, nil) matrix.collect! { |x| Array.new(size, default_value) } (0...size).each { |i| matrix[i][i] = diagonal_value } # fills the matrix from each edge self.each_edge do |source, target, edge| i_source = hash[source] i_target = hash[target] if i_source and i_target then val = block_given? ? (yield source, target, edge) : edge matrix[i_source][i_target] = val matrix[i_target][i_source] = val end end Matrix.rows(matrix, false) end # Removes all nodes that are not branches nor leaves. # That is, removes nodes connected with exactly two edges. # For each removed node, two adjacent edges are merged and # a new edge are created. # Returns removed nodes. # Note that orphan nodes are still kept unchanged. def remove_nonsense_nodes _clear_cache hash = {} self.each_node do |node| hash[node] = true if @pathway.graph[node].size == 2 end hash.each_key do |node| adjs = @pathway.graph[node].keys edges = @pathway.graph[node].values new_edge = get_edge_merged(edges[0], edges[1]) @pathway.graph[adjs[0]].delete(node) @pathway.graph[adjs[1]].delete(node) @pathway.graph.delete(node) @pathway.append(Bio::Relation.new(adjs[0], adjs[1], new_edge)) end #@pathway.to_relations @pathway.relations.reject! do |rel| hash[rel.node[0]] or hash[rel.node[1]] end return hash.keys end # Insert a new node between adjacent nodes node1 and node2. # The old edge between node1 and node2 are changed to the edge # between new_node and node2. # The edge between node1 and new_node is newly created. # # If new_distance is specified, the distance between # node1 and new_node is set to new_distance, and # distance between new_node and node2 is set to # tree.get_edge(node1, node2).distance - new_distance. # # Returns self. # If node1 and node2 are not adjacent, raises IndexError. # # If new_node already exists in the tree, the tree would become # circular. In addition, if the edge between new_node and # node1 (or node2) already exists, it will be erased. def insert_node(node1, node2, new_node, new_distance = nil) unless edge = self.get_edge(node1, node2) then raise IndexError, 'nodes not found or two nodes are not adjacent' end _clear_cache new_edge = Edge.new(new_distance) self.remove_edge(node1, node2) self.add_edge(node1, new_node, new_edge) if new_distance and old_distance = get_edge_distance(edge) then old_distance -= new_distance begin edge.distance = old_distance rescue NoMethodError edge = old_distance end end self.add_edge(new_node, node2, edge) self end end #class Tree end #module Bio #--- # temporary added #+++ require 'bio/tree/output' bio-1.4.3.0001/lib/bio/location.rb0000644000004100000410000006471412200110570016353 0ustar www-datawww-data# # = bio/location.rb - Locations/Location class (GenBank location format) # # Copyright:: Copyright (C) 2001, 2005 Toshiaki Katayama # 2006 Jan Aerts # 2008 Naohisa Goto # License:: The Ruby License # # $Id:$ # module Bio # == Description # # The Bio::Location class describes the position of a genomic locus. # Typically, Bio::Location objects are created automatically when the # user creates a Bio::Locations object, instead of initialized directly. # # == Usage # # location = Bio::Location.new('500..550') # puts "start=" + location.from.to_s + ";end=" + location.to.to_s # # #, or better: through Bio::Locations # locations = Bio::Locations.new('500..550') # locations.each do |location| # puts "start=" + location.from.to_s + ";end=" + location.to.to_s # end # class Location include Comparable # Parses a'location' segment, which can be 'ID:' + ('n' or 'n..m' or 'n^m' # or "seq") with '<' or '>', and returns a Bio::Location object. # # location = Bio::Location.new('500..550') # # --- # *Arguments*: # * (required) _str_: GenBank style position string (see Bio::Locations # documentation) # *Returns*:: the Bio::Location object def initialize(location = nil) if location if location =~ /:/ # (G) ID:location xref_id, location = location.split(':') end if location =~ / lt = true end if location =~ />/ gt = true end end # s : start base, e : end base => from, to case location when /^[<>]?(\d+)$/ # (A, I) n s = e = $1.to_i when /^[<>]?(\d+)\.\.[<>]?(\d+)$/ # (B, I) n..m s = $1.to_i e = $2.to_i if e - s < 0 # raise "Error: invalid range : #{location}" $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG end when /^[<>]?(\d+)\^[<>]?(\d+)$/ # (C, I) n^m s = $1.to_i e = $2.to_i carat = true if e - s != 1 or e != 1 # assert n^n+1 or n^1 # raise "Error: invalid range : #{location}" $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG end when /^"?([ATGCatgc]+)"?$/ # (H) literal sequence sequence = $1.downcase s = e = nil when nil ; else raise "Error: unknown location format : #{location}" end @from = s # start position of the location @to = e # end position of the location @strand = 1 # strand direction of the location # forward => 1 or complement => -1 @sequence = sequence # literal sequence of the location @lt = lt # true if the position contains '<' @gt = gt # true if the position contains '>' @xref_id = xref_id # link to the external entry as GenBank ID @carat = carat # true if the location indicates the site # between two adjoining nucleotides end # (Integer) start position of the location attr_accessor :from # (Integer) end position of the location attr_accessor :to # (Integer) strand direction of the location # (forward => 1 or complement => -1) attr_accessor :strand # (String) literal sequence of the location attr_accessor :sequence # (true, false or nil) true if the position contains '<' attr_accessor :lt # (true, false or nil) true if the position contains '>' attr_accessor :gt # (String) link to the external entry as GenBank ID attr_accessor :xref_id # (true, false or nil) true if the location indicates the site # between two adjoining nucleotides attr_accessor :carat # Complements the sequence location (i.e. alternates the strand). # Note that it is destructive method (i.e. modifies itself), # but it does not modify the "sequence" attribute. # --- # *Returns*:: the Bio::Location object def complement @strand *= -1 self # return Location object end # Replaces the sequence of the location. # --- # *Arguments*: # * (required) _sequence_: sequence to be used to replace the sequence # at the location # *Returns*:: the Bio::Location object def replace(sequence) @sequence = sequence.downcase self # return Location object end # Returns the range (from..to) of the location as a Range object. def range @from..@to end # Check where a Bio::Location object is located compared to another # Bio::Location object (mainly to facilitate the use of Comparable). # A location A is upstream of location B if the start position of # location A is smaller than the start position of location B. If # they're the same, the end positions are checked. # --- # *Arguments*: # * (required) _other location_: a Bio::Location object # *Returns*:: # * 1 if self < other location # * -1 if self > other location # * 0 if both location are the same # * nil if the argument is not a Bio::Location object def <=>(other) if ! other.kind_of?(Bio::Location) return nil end if @from.to_f < other.from.to_f return -1 elsif @from.to_f > other.from.to_f return 1 end if @to.to_f < other.to.to_f return -1 elsif @to.to_f > other.to.to_f return 1 end return 0 end # If _other_ is equal with the self, returns true. # Otherwise, returns false. # --- # *Arguments*: # * (required) _other_: any object # *Returns*:: true or false def ==(other) return true if super(other) return false unless other.instance_of?(self.class) flag = false [ :from, :to, :strand, :sequence, :lt, :gt, :xref_id, :carat ].each do |m| begin flag = (self.__send__(m) == other.__send__(m)) rescue NoMethodError, ArgumentError, NameError flag = false end break unless flag end flag end end # Location # == Description # # The Bio::Locations class is a container for Bio::Location objects: # creating a Bio::Locations object (based on a GenBank style position string) # will spawn an array of Bio::Location objects. # # == Usage # # locations = Bio::Locations.new('join(complement(500..550), 600..625)') # locations.each do |loc| # puts "class = " + loc.class.to_s # puts "range = #{loc.from}..#{loc.to} (strand = #{loc.strand})" # end # # Output would be: # # class = Bio::Location # # range = 500..550 (strand = -1) # # class = Bio::Location # # range = 600..625 (strand = 1) # # # For the following three location strings, print the span and range # ['one-of(898,900)..983', # 'one-of(5971..6308,5971..6309)', # '8050..one-of(10731,10758,10905,11242)'].each do |loc| # location = Bio::Locations.new(loc) # puts location.span # puts location.range # end # # === GenBank location descriptor classification # # ==== Definition of the position notation of the GenBank location format # # According to the GenBank manual 'gbrel.txt', position notations were # classified into 10 patterns - (A) to (J). # # 3.4.12.2 Feature Location # # The second column of the feature descriptor line designates the # location of the feature in the sequence. The location descriptor # begins at position 22. Several conventions are used to indicate # sequence location. # # Base numbers in location descriptors refer to numbering in the entry, # which is not necessarily the same as the numbering scheme used in the # published report. The first base in the presented sequence is numbered # base 1. Sequences are presented in the 5 to 3 direction. # # Location descriptors can be one of the following: # # (A) 1. A single base; # # (B) 2. A contiguous span of bases; # # (C) 3. A site between two bases; # # (D) 4. A single base chosen from a range of bases; # # (E) 5. A single base chosen from among two or more specified bases; # # (F) 6. A joining of sequence spans; # # (G) 7. A reference to an entry other than the one to which the feature # belongs (i.e., a remote entry), followed by a location descriptor # referring to the remote sequence; # # (H) 8. A literal sequence (a string of bases enclosed in quotation marks). # # ==== Description commented with pattern IDs. # # (C) A site between two residues, such as an endonuclease cleavage site, is # indicated by listing the two bases separated by a carat (e.g., 23^24). # # (D) A single residue chosen from a range of residues is indicated by the # number of the first and last bases in the range separated by a single # period (e.g., 23.79). The symbols < and > indicate that the end point # (I) of the range is beyond the specified base number. # # (B) A contiguous span of bases is indicated by the number of the first and # last bases in the range separated by two periods (e.g., 23..79). The # (I) symbols < and > indicate that the end point of the range is beyond the # specified base number. Starting and ending positions can be indicated # by base number or by one of the operators described below. # # Operators are prefixes that specify what must be done to the indicated # sequence to locate the feature. The following are the operators # available, along with their most common format and a description. # # (J) complement (location): The feature is complementary to the location # indicated. Complementary strands are read 5 to 3. # # (F) join (location, location, .. location): The indicated elements should # be placed end to end to form one contiguous sequence. # # (F) order (location, location, .. location): The elements are found in the # specified order in the 5 to 3 direction, but nothing is implied about # the rationality of joining them. # # (F) group (location, location, .. location): The elements are related and # should be grouped together, but no order is implied. # # (E) one-of (location, location, .. location): The element can be any one, # but only one, of the items listed. # # === Reduction strategy of the position notations # # * (A) Location n # * (B) Location n..m # * (C) Location n^m # * (D) (n.m) => Location n # * (E) # * one-of(n,m,..) => Location n # * one-of(n..m,..) => Location n..m # * (F) # * order(loc,loc,..) => join(loc, loc,..) # * group(loc,loc,..) => join(loc, loc,..) # * join(loc,loc,..) => Sequence # * (G) ID:loc => Location with ID # * (H) "atgc" => Location only with Sequence # * (I) # * Location n with lt flag # * >n => Location n with gt flag # * Location n..m with lt flag # * n..>m => Location n..m with gt flag # * m => Location n..m with lt, gt flag # * (J) complement(loc) => Sequence # * (K) replace(loc, str) => Location with replacement Sequence # class Locations include Enumerable # Parses a GenBank style position string and returns a Bio::Locations # object, which contains a list of Bio::Location objects. # # locations = Bio::Locations.new('join(complement(500..550), 600..625)') # # --- # *Arguments*: # * (required) _str_: GenBank style position string # *Returns*:: Bio::Locations object def initialize(position) @operator = nil if position.is_a? Array @locations = position else position = gbl_cleanup(position) # preprocessing @locations = gbl_pos2loc(position) # create an Array of Bio::Location objects end end # (Array) An Array of Bio::Location objects attr_accessor :locations # (Symbol or nil) Operator. # nil (means :join), :order, or :group (obsolete). attr_accessor :operator # Evaluate equality of Bio::Locations object. def equals?(other) if ! other.kind_of?(Bio::Locations) return nil end if self.sort == other.sort return true else return false end end # If _other_ is equal with the self, returns true. # Otherwise, returns false. # --- # *Arguments*: # * (required) _other_: any object # *Returns*:: true or false def ==(other) return true if super(other) return false unless other.instance_of?(self.class) if self.locations == other.locations and self.operator == other.operator then true else false end end # Iterates on each Bio::Location object. def each @locations.each do |x| yield(x) end end # Returns nth Bio::Location object. def [](n) @locations[n] end # Returns first Bio::Location object. def first @locations.first end # Returns last Bio::Location object. def last @locations.last end # Returns an Array containing overall min and max position [min, max] # of this Bio::Locations object. def span span_min = @locations.min { |a,b| a.from <=> b.from } span_max = @locations.max { |a,b| a.to <=> b.to } return span_min.from, span_max.to end # Similar to span, but returns a Range object min..max def range min, max = span min..max end # Returns a length of the spliced RNA. def length len = 0 @locations.each do |x| if x.sequence len += x.sequence.size else len += (x.to - x.from + 1) end end len end alias size length # Converts absolute position in the whole of the DNA sequence to relative # position in the locus. # # This method can for example be used to relate positions in a DNA-sequence # with those in RNA. In this use, the optional ':aa'-flag returns the # position of the associated amino-acid rather than the nucleotide. # # loc = Bio::Locations.new('complement(12838..13533)') # puts loc.relative(13524) # => 10 # puts loc.relative(13506, :aa) # => 3 # # --- # *Arguments*: # * (required) _position_: nucleotide position within whole of the sequence # * _:aa_: flag that lets method return position in aminoacid coordinates # *Returns*:: position within the location def relative(n, type = nil) case type when :location ; when :aa if n = abs2rel(n) (n - 1) / 3 + 1 else nil end else abs2rel(n) end end # Converts relative position in the locus to position in the whole of the # DNA sequence. # # This method can for example be used to relate positions in a DNA-sequence # with those in RNA. In this use, the optional ':aa'-flag returns the # position of the associated amino-acid rather than the nucleotide. # # loc = Bio::Locations.new('complement(12838..13533)') # puts loc.absolute(10) # => 13524 # puts loc.absolute(10, :aa) # => 13506 # # --- # *Arguments*: # * (required) _position_: nucleotide position within locus # * _:aa_: flag to be used if _position_ is a aminoacid position rather than # a nucleotide position # *Returns*:: position within the whole of the sequence def absolute(n, type = nil) case type when :location ; when :aa n = (n - 1) * 3 + 1 rel2abs(n) else rel2abs(n) end end # String representation. # # Note: In some cases, it fails to detect whether # "complement(join(...))" or "join(complement(..))", and whether # "complement(order(...))" or "order(complement(..))". # # --- # *Returns*:: String def to_s return '' if @locations.empty? complement_join = false locs = @locations if locs.size >= 2 and locs.inject(true) do |flag, loc| # check if each location is complement (flag && (loc.strand == -1) && !loc.xref_id) end and locs.inject(locs[0].from) do |pos, loc| if pos then (pos >= loc.from) ? loc.from : false else false end end then locs = locs.reverse complement_join = true end locs = locs.collect do |loc| lt = loc.lt ? '<' : '' gt = loc.gt ? '>' : '' str = if loc.from == loc.to then "#{lt}#{gt}#{loc.from.to_i}" elsif loc.carat then "#{lt}#{loc.from.to_i}^#{gt}#{loc.to.to_i}" else "#{lt}#{loc.from.to_i}..#{gt}#{loc.to.to_i}" end if loc.xref_id and !loc.xref_id.empty? then str = "#{loc.xref_id}:#{str}" end if loc.strand == -1 and !complement_join then str = "complement(#{str})" end if loc.sequence then str = "replace(#{str},\"#{loc.sequence}\")" end str end if locs.size >= 2 then op = (self.operator || 'join').to_s result = "#{op}(#{locs.join(',')})" else result = locs[0] end if complement_join then result = "complement(#{result})" end result end private # Preprocessing to clean up the position notation. def gbl_cleanup(position) # sometimes position contains white spaces... position = position.gsub(/\s+/, '') # select one base # (D) n.m # .. n m : # $1 ( $2 $3 not ) position.gsub!(/(\.{2})?\(?([<>\d]+)\.([<>\d]+)(?!:)\)?/) do |match| if $1 $1 + $3 # ..(n.m) => ..m else $2 # (?n.m)? => n end end # select the 1st location # (E) one-of() # .. one-of ($2 ,$3 ) position.gsub!(/(\.{2})?one-of\(([^,]+),([^)]+)\)/) do |match| if $1 $1 + $3.gsub(/.*,(.*)/, '\1') # ..one-of(n,m) => ..m else $2 # one-of(n,m) => n end end ## substitute order(), group() by join() # (F) group(), order() #position.gsub!(/(order|group)/, 'join') return position end # Parse position notation and create Location objects. def gbl_pos2loc(position) ary = [] case position when /^(join|order|group)\((.*)\)$/ # (F) join() if $1 != "join" then @operator = $1.intern end position = $2 join_list = [] # sub positions to join bracket = [] # position with bracket s_count = 0 # stack counter position.split(',').each do |sub_pos| case sub_pos when /\(.*\)/ join_list << sub_pos when /\(/ s_count += 1 bracket << sub_pos when /\)/ s_count -= 1 bracket << sub_pos if s_count == 0 join_list << bracket.join(',') end else if s_count == 0 join_list << sub_pos else bracket << sub_pos end end end join_list.each do |pos| ary << gbl_pos2loc(pos) end when /^complement\((.*)\)$/ # (J) complement() position = $1 gbl_pos2loc(position).reverse_each do |location| ary << location.complement end when /^replace\(([^,]+),"?([^"]*)"?\)/ # (K) replace() position = $1 sequence = $2 ary << gbl_pos2loc(position).first.replace(sequence) else # (A, B, C, G, H, I) ary << Location.new(position) end return ary.flatten end # Convert the relative position to the absolute position def rel2abs(n) return nil unless n > 0 # out of range cursor = 0 @locations.each do |x| if x.sequence len = x.sequence.size else len = x.to - x.from + 1 end if n > cursor + len cursor += len else if x.strand < 0 return x.to - (n - cursor - 1) else return x.from + (n - cursor - 1) end end end return nil # out of range end # Convert the absolute position to the relative position def abs2rel(n) return nil unless n > 0 # out of range cursor = 0 @locations.each do |x| if x.sequence len = x.sequence.size else len = x.to - x.from + 1 end if n < x.from or n > x.to then cursor += len else if x.strand < 0 then return x.to - (n - cursor - 1) else return n + cursor + 1 - x.from end end end return nil # out of range end end # Locations end # Bio # === GenBank location examples # # (C) n^m # # * [AB015179] 754^755 # * [AF179299] complement(53^54) # * [CELXOL1ES] replace(4480^4481,"") # * [ECOUW87] replace(4792^4793,"a") # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc") # # (D) (n.m) # # * [HACSODA] 157..(800.806) # * [HALSODB] (67.68)..(699.703) # * [AP001918] (45934.45974)..46135 # * [BACSPOJ] <180..(731.761) # * [BBU17998] (88.89)..>1122 # * [ECHTGA] complement((1700.1708)..(1715.1721)) # * [ECPAP17] complement(<22..(255.275)) # * [LPATOVGNS] complement((64.74)..1525) # * [PIP404CG] join((8298.8300)..10206,1..855) # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534) # * [HUMMIC2A] replace((651.655)..(651.655),"") # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181) # # (E) one-of # # * [ECU17136] one-of(898,900)..983 # * [CELCYT1A] one-of(5971..6308,5971..6309) # * [DMU17742] 8050..one-of(10731,10758,10905,11242) # * [PFU27807] one-of(623,627,632)..one-of(628,633,637) # * [BTBAINH1] one-of(845,953,963,1078,1104)..1354 # * [ATU39449] join(one-of(969..1094,970..1094,995..1094,1018..1094),1518..1587,1726..2119,2220..2833,2945..3215) # # (F) join, order, group # # * [AB037374S2] join(AB037374.1:1..177,1..807) # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505)) # * [ASNOS11] join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128) # # * [AARPOB2] order(AF194507.1:<1..510,1..>871) # * [AF006691] order(912..1918,20410..21416) # * [AF024666] order(complement(18919..19224),complement(13965..14892)) # * [AF264948] order(27066..27076,27089..27099,27283..27314,27330..27352) # * [D63363] order(3..26,complement(964..987)) # * [ECOCURLI2] order(complement(1009..>1260),complement(AF081827.1:<1..177)) # * [S72388S2] order(join(S72388.1:757..911,S72388.1:609..1542),1..>139) # * [HEYRRE07] order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133)) # * [COL11A1G34] order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611) # # group() are found in the COMMENT field only (in GenBank 122.0) # # gbpat2.seq: FT repeat_region group(598..606,611..619) # gbpat2.seq: FT repeat_region group(8..16,1457..1464). # gbpat2.seq: FT variation group(t1,t2) # gbpat2.seq: FT variation group(t1,t3) # gbpat2.seq: FT variation group(t1,t2,t3) # gbpat2.seq: FT repeat_region group(11..202,203..394) # gbpri9.seq:COMMENT Residues reported = 'group(1..2145);'. # # (G) ID:location # # * [AARPOB2] order(AF194507.1:<1..510,1..>871) # * [AF178221S4] join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90) # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534) # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181) # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233) # # (I) <, > # # * [A5U48871] <1..>318 # * [AA23SRRNP] <1..388 # * [AA23SRRNP] 503..>1010 # * [AAM5961] complement(<1..229) # * [AAM5961] complement(5231..>5598) # * [AF043934] join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843) # * [BACSPOJ] <180..(731.761) # * [BBU17998] (88.89)..>1122 # * [AARPOB2] order(AF194507.1:<1..510,1..>871) # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233) # # (J) complement # # * [AF179299] complement(53^54) <= hoge insertion site etc. # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505)) # * [AF209868S2] order(complement(1..>308),complement(AF209868.1:75..336)) # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505)) # * [CPPLCG] complement(<1..(1093.1098)) # * [D63363] order(3..26,complement(964..987)) # * [ECHTGA] complement((1700.1708)..(1715.1721)) # * [ECOUXW] order(complement(1658..1663),complement(1636..1641)) # * [LPATOVGNS] complement((64.74)..1525) # * [AF129075] complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403)) # * [ZFDYST2] join(AF137145.1:<1..18,complement(<1..99)) # # (K) replace # # * [CSU27710] replace(64,"A") # * [CELXOL1ES] replace(5256,"t") # * [ANICPC] replace(1..468,"") # * [CSU27710] replace(67..68,"GC") # * [CELXOL1ES] replace(4480^4481,"") <= ? only one case in GenBank 122.0 # * [ECOUW87] replace(4792^4793,"a") # * [CEU34893] replace(1..22,"ggttttaacccagttactcaag") # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc") # * [MBDR3S1] replace(1400..>9281,"") # * [HUMMHDPB1F] replace(complement(36..37),"ttc") # * [HUMMIC2A] replace((651.655)..(651.655),"") # * [LEIMDRPGP] replace(1..1554,"L01572") # * [TRBND3] replace(376..395,"atttgtgtgtggtaatta") # * [TRBND3] replace(376..395,"atttgtgtgggtaatttta") # * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta") # * [TRBND3] replace(376..395,"atgtgtggtgaatta") # * [TRBND3] replace(376..395,"atgtgtgtggtaatta") # * [TRBND3] replace(376..395,"gatttgttgtggtaatttta") # * [MSU09460] replace(193, <= replace(193, "t") # * [HUMMAGE12X] replace(3002..3003, <= replace(3002..3003, "GC") # * [ADR40FIB] replace(510..520, <= replace(510..520, "taatcctaccg") # * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa") # bio-1.4.3.0001/lib/bio/alignment.rb0000644000004100000410000022276112200110570016517 0ustar www-datawww-data# # = bio/alignment.rb - multiple alignment of sequences # # Copyright:: Copyright (C) 2003, 2005, 2006 # GOTO Naohisa # # License:: The Ruby License # # $Id: alignment.rb,v 1.24 2007/12/26 14:08:02 ngoto Exp $ # # = About Bio::Alignment # # Please refer document of Bio::Alignment module. # # = References # # * Bio::Align::AlignI class of the BioPerl. # http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html # # * Bio::SimpleAlign class of the BioPerl. # http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html # require 'tempfile' require 'bio/command' require 'bio/sequence' #--- # (depends on autoload) #require 'bio/appl/gcg/seq' #+++ module Bio # # = About Bio::Alignment # # Bio::Alignment is a namespace of classes/modules for multiple sequence # alignment. # # = Multiple alignment container classes # # == Bio::Alignment::OriginalAlignment # # == Bio::Alignment::SequenceArray # # == Bio::Alignment::SequenceHash # # = Bio::Alignment::Site # # = Modules # # == Bio::Alignment::EnumerableExtension # # Mix-in for classes included Enumerable. # # == Bio::Alignment::ArrayExtension # # Mix-in for Array or Array-like classes. # # == Bio::Alignment::HashExtension # # Mix-in for Hash or Hash-like classes. # # == Bio::Alignment::SiteMethods # # == Bio::Alignment::PropertyMethods # # = Bio::Alignment::GAP # # = Compatibility from older BioRuby # module Alignment autoload :MultiFastaFormat, 'bio/appl/mafft/report' # Bio::Alignment::PropertyMethods is a set of methods to treat # the gap character and so on. module PropertyMethods # regular expression for detecting gaps. GAP_REGEXP = /[^a-zA-Z]/ # gap character GAP_CHAR = '-'.freeze # missing character MISSING_CHAR = '?'.freeze # If given character is a gap, returns true. # Otherwise, return false. # Note that s must be a String which contain a single character. def is_gap?(s) (gap_regexp =~ s) ? true : false end # Returns regular expression for checking gap. def gap_regexp ((defined? @gap_regexp) ? @gap_regexp : nil) or GAP_REGEXP end # regular expression for checking gap attr_writer :gap_regexp # Gap character. def gap_char ((defined? @gap_char) ? @gap_char : nil) or GAP_CHAR end # gap character attr_writer :gap_char # Character if the site is missing or unknown. def missing_char ((defined? @missing_char) ? @missing_char : nil) or MISSING_CHAR end # Character if the site is missing or unknown. attr_writer :missing_char # Returns class of the sequence. # If instance variable @seqclass (which can be # set by 'seqclass=' method) is set, simply returns the value. # Otherwise, returns the first sequence's class. # If no sequences are found, returns nil. def seqclass ((defined? @seqclass) ? @seqclass : nil) or String end # The class of the sequence. # The value must be String or its derivatives. attr_writer :seqclass # Returns properties defined in the object as an hash. def get_all_property ret = {} if defined? @gap_regexp ret[:gap_regexp] = @gap_regexp end if defined? @gap_char ret[:gap_char] = @gap_char end if defined? @missing_char ret[:missing_char] = @missing_char end if defined? @seqclass ret[:seqclass] = @seqclass end ret end # Sets properties from given hash. # hash would be a return value of get_character method. def set_all_property(hash) @gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp) @gap_char = hash[:gap_char] if hash.has_key?(:gap_char) @missing_char = hash[:missing_char] if hash.has_key?(:missing_char) @seqclass = hash[:seqclass] if hash.has_key?(:seqclass) self end end #module PropertyMethods # Bio::Alignment::SiteMethods is a set of methods for # Bio::Alignment::Site. # It can also be used for extending an array of single-letter strings. module SiteMethods include PropertyMethods # If there are gaps, returns true. Otherwise, returns false. def has_gap? (find { |x| is_gap?(x) }) ? true : false end # Removes gaps in the site. (destructive method) def remove_gaps! flag = nil self.collect! do |x| if is_gap?(x) then flag = self; nil; else x; end end self.compact! flag end # Returns consensus character of the site. # If consensus is found, eturns a single-letter string. # If not, returns nil. def consensus_string(threshold = 1.0) return nil if self.size <= 0 return self[0] if self.sort.uniq.size == 1 h = Hash.new(0) self.each { |x| h[x] += 1 } total = self.size b = h.to_a.sort do |x,y| z = (y[1] <=> x[1]) z = (self.index(x[0]) <=> self.index(y[0])) if z == 0 z end if total * threshold <= b[0][1] then b[0][0] else nil end end # IUPAC nucleotide groups. Internal use only. IUPAC_NUC = [ %w( t u ), %w( m a c ), %w( r a g ), %w( w a t u ), %w( s c g ), %w( y c t u ), %w( k g t u ), %w( v a c g m r s ), %w( h a c t u m w y ), %w( d a g t u r w k ), %w( b c g t u s y k ), %w( n a c g t u m r w s y k v h d b ) ] # Returns an IUPAC consensus base for the site. # If consensus is found, eturns a single-letter string. # If not, returns nil. def consensus_iupac a = self.collect { |x| x.downcase }.sort.uniq if a.size == 1 then case a[0] when 'a', 'c', 'g', 't' a[0] when 'u' 't' else IUPAC_NUC.find { |x| a[0] == x[0] } ? a[0] : nil end elsif r = IUPAC_NUC.find { |x| (a - x).size <= 0 } then r[0] else nil end end # Table of strongly conserved amino-acid groups. # # The value of the tables are taken from BioPerl # (Bio/SimpleAlign.pm in BioPerl 1.0), # and the BioPerl's document says that # it is taken from Clustalw documentation and # These are all the positively scoring groups that occur in the # Gonnet Pam250 matrix. The strong and weak groups are # defined as strong score >0.5 and weak score =<0.5 respectively. # StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF HY FYW).collect { |x| x.split('').sort } # Table of weakly conserved amino-acid groups. # # Please refer StrongConservationGroups document # for the origin of the table. WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort } # Returns the match-line character for the site. # This is amino-acid version. def match_line_amino(opt = {}) # opt[:match_line_char] ==> 100% equal default: '*' # opt[:strong_match_char] ==> strong match default: ':' # opt[:weak_match_char] ==> weak match default: '.' # opt[:mismatch_char] ==> mismatch default: ' ' mlc = (opt[:match_line_char] or '*') smc = (opt[:strong_match_char] or ':') wmc = (opt[:weak_match_char] or '.') mmc = (opt[:mismatch_char] or ' ') a = self.collect { |c| c.upcase }.sort.uniq a.extend(SiteMethods) if a.has_gap? then mmc elsif a.size == 1 then mlc elsif StrongConservationGroups.find { |x| (a - x).empty? } then smc elsif WeakConservationGroups.find { |x| (a - x).empty? } then wmc else mmc end end # Returns the match-line character for the site. # This is nucleic-acid version. def match_line_nuc(opt = {}) # opt[:match_line_char] ==> 100% equal default: '*' # opt[:mismatch_char] ==> mismatch default: ' ' mlc = (opt[:match_line_char] or '*') mmc = (opt[:mismatch_char] or ' ') a = self.collect { |c| c.upcase }.sort.uniq a.extend(SiteMethods) if a.has_gap? then mmc elsif a.size == 1 then mlc else mmc end end end #module SiteMethods # Bio::Alignment::Site stores bases or amino-acids in a # site of the alignment. # It would store multiple String objects of length 1. # Please refer to the document of Array and SiteMethods for methods. class Site < Array include SiteMethods end #module Site # The module Bio::Alignment::EnumerableExtension is a set of useful # methods for multiple sequence alignment. # It can be included by any classes or can be extended to any objects. # The classes or objects must have methods defined in Enumerable, # and must have the each method # which iterates over each sequence (or string) and yields # a sequence (or string) object. # # Optionally, if each_seq method is defined, # which iterates over each sequence (or string) and yields # each sequence (or string) object, it is used instead of each. # # Note that the each or each_seq method would be # called multiple times. # This means that the module is not suitable for IO objects. # In addition, break would be used in the given block and # destructive methods would be used to the sequences. # # For Array or Hash objects, you'd better using # ArrayExtension or HashExtension modules, respectively. # They would have built-in each_seq method and/or # some methods would be redefined. # module EnumerableExtension include PropertyMethods # Iterates over each sequences. # Yields a sequence. # It acts the same as Enumerable#each. # # You would redefine the method suitable for the class/object. def each_seq(&block) #:yields: seq each(&block) end # Returns class of the sequence. # If instance variable @seqclass (which can be # set by 'seqclass=' method) is set, simply returns the value. # Otherwise, returns the first sequence's class. # If no sequences are found, returns nil. def seqclass if (defined? @seqclass) and @seqclass then @seqclass else klass = nil each_seq do |s| if s then klass = s.class break if klass end end (klass or String) end end # Returns the alignment length. # Returns the longest length of the sequence in the alignment. def alignment_length maxlen = 0 each_seq do |s| x = s.length maxlen = x if x > maxlen end maxlen end alias seq_length alignment_length # Gets a site of the position. # Returns a Bio::Alignment::Site object. # # If the position is out of range, it returns the site # of which all are gaps. # # It is a private method. # Only difference from public alignment_site method is # it does not do set_all_property(get_all_property). def _alignment_site(position) site = Site.new each_seq do |s| c = s[position, 1] if c.to_s.empty? c = seqclass.new(gap_char) end site << c end site end private :_alignment_site # Gets a site of the position. # Returns a Bio::Alignment::Site object. # # If the position is out of range, it returns the site # of which all are gaps. def alignment_site(position) site = _alignment_site(position) site.set_all_property(get_all_property) site end # Iterates over each site of the alignment. # It yields a Bio::Alignment::Site object (which inherits Array). # It returns self. def each_site cp = get_all_property (0...alignment_length).each do |i| site = _alignment_site(i) site.set_all_property(cp) yield(site) end self end # Iterates over each site of the alignment, with specifying # start, stop positions and step. # It yields Bio::Alignment::Site object (which inherits Array). # It returns self. # It is same as # start.step(stop, step) { |i| yield alignment_site(i) }. def each_site_step(start, stop, step = 1) cp = get_all_property start.step(stop, step) do |i| site = _alignment_site(i) site.set_all_property(cp) yield(site) end self end # Iterates over each sequence and results running blocks # are collected and returns a new alignment as a # Bio::Alignment::SequenceArray object. # # Note that it would be redefined if you want to change # return value's class. # def alignment_collect a = SequenceArray.new a.set_all_property(get_all_property) each_seq do |str| a << yield(str) end a end # Returns specified range of the alignment. # For each sequence, the '[]' method (it may be String#[]) # is executed, and returns a new alignment # as a Bio::Alignment::SequenceArray object. # # Unlike alignment_slice method, the result alignment are # guaranteed to contain String object if the range specified # is out of range. # # If you want to change return value's class, you should redefine # alignment_collect method. # def alignment_window(*arg) alignment_collect do |s| s[*arg] or seqclass.new('') end end alias window alignment_window # Iterates over each sliding window of the alignment. # window_size is the size of sliding window. # step is the step of each sliding. # It yields a Bio::Alignment::SequenceArray object which contains # each sliding window. # It returns a Bio::Alignment::SequenceArray object which contains # remainder alignment at the terminal end. # If window_size is smaller than 0, it returns nil. def each_window(window_size, step_size = 1) return nil if window_size < 0 if step_size >= 0 then last_step = nil 0.step(alignment_length - window_size, step_size) do |i| yield alignment_window(i, window_size) last_step = i end alignment_window((last_step + window_size)..-1) else i = alignment_length - window_size while i >= 0 yield alignment_window(i, window_size) i += step_size end alignment_window(0...(i-step_size)) end end # Iterates over each site of the alignment and results running the # block are collected and returns an array. # It yields a Bio::Alignment::Site object. def collect_each_site ary = [] each_site do |site| ary << yield(site) end ary end # Helper method for calculating consensus sequence. # It iterates over each site of the alignment. # In each site, gaps will be removed if specified with opt. # It yields a Bio::Alignment::Site object. # Results running the block (String objects are expected) # are joined to a string and it returns the string. # # opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters # 1 -- a site within gaps is regarded as a gap # -1 -- gaps are eliminated from consensus calculation # default: 0 # def consensus_each_site(opt = {}) mchar = (opt[:missing_char] or self.missing_char) gap_mode = opt[:gap_mode] case gap_mode when 0, nil collect_each_site do |a| yield(a) or mchar end.join('') when 1 collect_each_site do |a| a.has_gap? ? gap_char : (yield(a) or mchar) end.join('') when -1 collect_each_site do |a| a.remove_gaps! a.empty? ? gap_char : (yield(a) or mchar) end.join('') else raise ':gap_mode must be 0, 1 or -1' end end # Returns the consensus string of the alignment. # 0.0 <= threshold <= 1.0 is expected. # # It resembles the BioPerl's AlignI::consensus_string method. # # Please refer to the consensus_each_site method for opt. # def consensus_string(threshold = 1.0, opt = {}) consensus_each_site(opt) do |a| a.consensus_string(threshold) end end # Returns the IUPAC consensus string of the alignment # of nucleic-acid sequences. # # It resembles the BioPerl's AlignI::consensus_iupac method. # # Please refer to the consensus_each_site method for opt. # def consensus_iupac(opt = {}) consensus_each_site(opt) do |a| a.consensus_iupac end end # Returns the match line stirng of the alignment # of amino-acid sequences. # # It resembles the BioPerl's AlignI::match_line method. # # opt[:match_line_char] ==> 100% equal default: '*' # opt[:strong_match_char] ==> strong match default: ':' # opt[:weak_match_char] ==> weak match default: '.' # opt[:mismatch_char] ==> mismatch default: ' ' # # More opt can be accepted. # Please refer to the consensus_each_site method for opt. # def match_line_amino(opt = {}) collect_each_site do |a| a.match_line_amino(opt) end.join('') end # Returns the match line stirng of the alignment # of nucleic-acid sequences. # # It resembles the BioPerl's AlignI::match_line method. # # opt[:match_line_char] ==> 100% equal default: '*' # opt[:mismatch_char] ==> mismatch default: ' ' # # More opt can be accepted. # Please refer to the consensus_each_site method for opt. # def match_line_nuc(opt = {}) collect_each_site do |a| a.match_line_nuc(opt) end.join('') end # Returns the match line stirng of the alignment # of nucleic- or amino-acid sequences. # The type of the sequence is automatically determined # or you can specify with opt[:type]. # # It resembles the BioPerl's AlignI::match_line method. # # opt[:type] ==> :na or :aa (or determined by sequence class) # opt[:match_line_char] ==> 100% equal default: '*' # opt[:strong_match_char] ==> strong match default: ':' # opt[:weak_match_char] ==> weak match default: '.' # opt[:mismatch_char] ==> mismatch default: ' ' # :strong_ and :weak_match_char are used only in amino mode (:aa) # # More opt can be accepted. # Please refer to the consensus_each_site method for opt. # def match_line(opt = {}) case opt[:type] when :aa amino = true when :na, :dna, :rna amino = false else if seqclass == Bio::Sequence::AA then amino = true elsif seqclass == Bio::Sequence::NA then amino = false else amino = nil self.each_seq do |x| if /[EFILPQ]/i =~ x amino = true break end end end end if amino then match_line_amino(opt) else match_line_nuc(opt) end end # This is the BioPerl's AlignI::match like method. # # Changes second to last sequences' sites to match_char(default: '.') # when a site is equeal to the first sequence's corresponding site. # # Note that it is a destructive method. # # For Hash, please use it carefully because # the order of the sequences is inconstant. # def convert_match(match_char = '.') #(BioPerl) AlignI::match like method len = alignment_length firstseq = nil each_seq do |s| unless firstseq then firstseq = s else (0...len).each do |i| if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i]) s[i..i] = match_char end end end end self end # This is the BioPerl's AlignI::unmatch like method. # # Changes second to last sequences' sites match_char(default: '.') # to original sites' characters. # # Note that it is a destructive method. # # For Hash, please use it carefully because # the order of the sequences is inconstant. # def convert_unmatch(match_char = '.') #(BioPerl) AlignI::unmatch like method len = alignment_length firstseq = nil each_seq do |s| unless firstseq then firstseq = s else (0...len).each do |i| if s[i..i] == match_char then s[i..i] = (firstseq[i..i] or match_char) end end end end self end # Fills gaps to the tail of each sequence if the length of # the sequence is shorter than the alignment length. # # Note that it is a destructive method. def alignment_normalize! #(original) len = alignment_length each_seq do |s| s << (gap_char * (len - s.length)) if s.length < len end self end alias normalize! alignment_normalize! # Removes excess gaps in the tail of the sequences. # If removes nothing, returns nil. # Otherwise, returns self. # # Note that it is a destructive method. def alignment_rstrip! #(String-like) len = alignment_length newlen = len each_site_step(len - 1, 0, -1) do |a| a.remove_gaps! if a.empty? then newlen -= 1 else break end end return nil if newlen >= len each_seq do |s| s[newlen..-1] = '' if s.length > newlen end self end alias rstrip! alignment_rstrip! # Removes excess gaps in the head of the sequences. # If removes nothing, returns nil. # Otherwise, returns self. # # Note that it is a destructive method. def alignment_lstrip! #(String-like) pos = 0 each_site do |a| a.remove_gaps! if a.empty? pos += 1 else break end end return nil if pos <= 0 each_seq { |s| s[0, pos] = '' } self end alias lstrip! alignment_lstrip! # Removes excess gaps in the sequences. # If removes nothing, returns nil. # Otherwise, returns self. # # Note that it is a destructive method. def alignment_strip! #(String-like) r = alignment_rstrip! l = alignment_lstrip! (r or l) end alias strip! alignment_strip! # Completely removes ALL gaps in the sequences. # If removes nothing, returns nil. # Otherwise, returns self. # # Note that it is a destructive method. def remove_all_gaps! ret = nil each_seq do |s| x = s.gsub!(gap_regexp, '') ret ||= x end ret ? self : nil end # Returns the specified range of the alignment. # For each sequence, the 'slice' method (it may be String#slice, # which is the same as String#[]) is executed, and # returns a new alignment as a Bio::Alignment::SequenceArray object. # # Unlike alignment_window method, the result alignment # might contain nil. # # If you want to change return value's class, you should redefine # alignment_collect method. # def alignment_slice(*arg) #(String-like) #(BioPerl) AlignI::slice like method alignment_collect do |s| s.slice(*arg) end end alias slice alignment_slice # For each sequence, the 'subseq' method (Bio::Seqeunce::Common#subseq is # expected) is executed, and returns a new alignment as # a Bio::Alignment::SequenceArray object. # # All sequences in the alignment are expected to be kind of # Bio::Sequence::NA or Bio::Sequence::AA objects. # # Unlike alignment_window method, the result alignment # might contain nil. # # If you want to change return value's class, you should redefine # alignment_collect method. # def alignment_subseq(*arg) #(original) alignment_collect do |s| s.subseq(*arg) end end alias subseq alignment_subseq # Concatenates the given alignment. # align must have each_seq # or each method. # # Returns self. # # Note that it is a destructive method. # # For Hash, please use it carefully because # the order of the sequences is inconstant and # key information is completely ignored. # def alignment_concat(align) flag = nil a = [] each_seq { |s| a << s } i = 0 begin align.each_seq do |seq| flag = true a[i].concat(seq) if a[i] and seq i += 1 end return self rescue NoMethodError, ArgumentError => evar raise evar if flag end align.each do |seq| a[i].concat(seq) if a[i] and seq i += 1 end self end end #module EnumerableExtension module Output def output(format, *arg) case format when :clustal output_clustal(*arg) when :fasta output_fasta(*arg) when :phylip output_phylip(*arg) when :phylipnon output_phylipnon(*arg) when :msf output_msf(*arg) when :molphy output_molphy(*arg) else raise "Unknown format: #{format.inspect}" end end # Check whether there are same names for ClustalW format. # # array:: names of the sequences (array of string) # len:: length to check (default:30) def __clustal_have_same_name?(array, len = 30) na30 = array.collect do |k| k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s end #p na30 na30idx = (0...(na30.size)).to_a na30idx.sort! do |x,y| na30[x] <=> na30[y] end #p na30idx y = nil dupidx = [] na30idx.each do |x| if y and na30[y] == na30[x] then dupidx << y dupidx << x end y = x end if dupidx.size > 0 then dupidx.sort! dupidx.uniq! dupidx else false end end private :__clustal_have_same_name? # Changes sequence names if there are conflicted names # for ClustalW format. # # array:: names of the sequences (array of string) # len:: length to check (default:30) def __clustal_avoid_same_name(array, len = 30) na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') } if dupidx = __clustal_have_same_name?(na, len) procs = [ Proc.new { |s, i| s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s }, # Proc.new { |s, i| # "#{i}_#{s}" # }, ] procs.each do |pr| dupidx.each do |i| s = array[i] na[i] = pr.call(s.to_s, i) end dupidx = __clustal_have_same_name?(na, len) break unless dupidx end if dupidx then na.each_with_index do |s, i| na[i] = "#{i}_#{s}" end end end na end private :__clustal_avoid_same_name # Generates ClustalW-formatted text # seqs:: sequences (must be an alignment object) # names:: names of the sequences # options:: options def __clustal_formatter(seqs, names, options = {}) #(original) aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ] len = seqs.seq_length sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(sn) end if sn.find { |x| x.length > 10 } then seqwidth = 50 namewidth = 30 sep = ' ' * 6 else seqwidth = 60 namewidth = 10 sep = ' ' * 6 end seqregexp = Regexp.new("(.{1,#{seqwidth}})") gchar = (options[:gap_char] or '-') case options[:type].to_s when /protein/i, /aa/i mopt = { :type => :aa } when /na/i mopt = { :type => :na } else mopt = {} end mline = (options[:match_line] or seqs.match_line(mopt)) aseqs = Array.new(seqs.number_of_sequences).clear seqs.each_seq do |s| aseqs << s.to_s.gsub(seqs.gap_regexp, gchar) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } end aseqs << mline aseqs.collect! do |s| snx = sn.shift head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] + sep s << (gchar * (len - s.length)) s.gsub!(seqregexp, "\\1\n") a = s.split(/^/) if options[:seqnos] and snx then i = 0 a.each do |x| x.chomp! l = x.tr(gchar, '').length i += l x.concat(l > 0 ? " #{i}\n" : "\n") end end a.collect { |x| head + x } end lines = (len + seqwidth - 1).div(seqwidth) lines.times do aln << "\n" aseqs.each { |a| aln << a.shift } end aln.join('') end private :__clustal_formatter # Generates ClustalW-formatted text # seqs:: sequences (must be an alignment object) # names:: names of the sequences # options:: options def output_clustal(options = {}) __clustal_formatter(self, self.sequence_names, options) end # to_clustal is deprecated. Instead, please use output_clustal. #--- #alias to_clustal output_clustal #+++ def to_clustal(*arg) warn "to_clustal is deprecated. Please use output_clustal." output_clustal(*arg) end # Generates fasta format text and returns a string. def output_fasta(options={}) #(original) width = (options[:width] or 70) if options[:avoid_same_name] then na = __clustal_avoid_same_name(self.sequence_names, 30) else na = self.sequence_names.collect do |k| k.to_s.gsub(/[\r\n\x00]/, ' ') end end if width and width > 0 then w_reg = Regexp.new(".{1,#{width}}") self.collect do |s| ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n") end.join('') else self.collect do |s| ">#{na.shift}\n" + s.to_s + "\n" end.join('') end end # generates phylip interleaved alignment format as a string def output_phylip(options = {}) aln, aseqs, lines = __output_phylip_common(options) lines.times do aseqs.each { |a| aln << a.shift } aln << "\n" end aln.pop if aln[-1] == "\n" aln.join('') end # generates Phylip3.2 (old) non-interleaved format as a string def output_phylipnon(options = {}) aln, aseqs, lines = __output_phylip_common(options) aln.first + aseqs.join('') end # common routine for interleaved/non-interleaved phylip format def __output_phylip_common(options = {}) len = self.alignment_length aln = [ " #{self.number_of_sequences} #{len}\n" ] sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(sn, 10) end namewidth = 10 seqwidth = (options[:width] or 60) seqwidth = seqwidth.div(10) * 10 seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})") gchar = (options[:gap_char] or '-') aseqs = Array.new(self.number_of_sequences).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } end aseqs.collect! do |s| snx = sn.shift head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] head2 = ' ' * namewidth s << (gchar * (len - s.length)) s.gsub!(/(.{1,10})/n, " \\1") s.gsub!(seqregexp, "\\1\n") a = s.split(/^/) head += a.shift ret = a.collect { |x| head2 + x } ret.unshift(head) ret end lines = (len + seqwidth - 1).div(seqwidth) [ aln, aseqs, lines ] end # Generates Molphy alignment format text as a string def output_molphy(options = {}) len = self.alignment_length header = "#{self.number_of_sequences} #{len}\n" sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(sn, 30) end seqwidth = (options[:width] or 60) seqregexp = Regexp.new("(.{1,#{seqwidth}})") gchar = (options[:gap_char] or '-') aseqs = Array.new(len).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } end aseqs.collect! do |s| s << (gchar * (len - s.length)) s.gsub!(seqregexp, "\\1\n") sn.shift + "\n" + s end aseqs.unshift(header) aseqs.join('') end # Generates msf formatted text as a string def output_msf(options = {}) len = self.seq_length if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(self.sequence_names) else sn = self.sequence_names.collect do |x| x.to_s.gsub(/[\r\n\x00]/, ' ') end end if !options.has_key?(:replace_space) or options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end seqwidth = 50 namewidth = [31, sn.collect { |x| x.length }.max ].min sep = ' ' * 2 seqregexp = Regexp.new("(.{1,#{seqwidth}})") gchar = (options[:gap_char] or '.') pchar = (options[:padding_char] or '~') aseqs = Array.new(self.number_of_sequences).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) end aseqs.each do |s| s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length } s.sub!(/#{Regexp.escape(gchar)}+\z/, '') s << (pchar * (len - s.length)) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } else #default upcase aseqs.each { |s| s.upcase! } end case options[:type].to_s when /protein/i, /aa/i amino = true when /na/i amino = false else if seqclass == Bio::Sequence::AA then amino = true elsif seqclass == Bio::Sequence::NA then amino = false else # if we can't determine, we asuume as protein. amino = aseqs.size aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x } amino = false if amino <= 0 end end seq_type = (amino ? 'P' : 'N') fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf') dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M') sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) } #sums = aseqs.collect { |s| 0 } sum = 0; sums.each { |x| sum += x }; sum %= 10000 msf = [ "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n", "\n", "\n", " #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n", "\n" ] sn.each do |snx| msf << ' Name: ' + sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] + " Len: #{len} Check: #{sums.shift} Weight: 1.00\n" end msf << "\n//\n" aseqs.collect! do |s| snx = sn.shift head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep s.gsub!(seqregexp, "\\1\n") a = s.split(/^/) a.collect { |x| head + x } end lines = (len + seqwidth - 1).div(seqwidth) i = 1 lines.times do msf << "\n" n_l = i n_r = [ i + seqwidth - 1, len ].min if n_l != n_r then w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max msf << (' ' * namewidth + sep + n_l.to_s + ' ' * w + n_r.to_s + "\n") else msf << (' ' * namewidth + sep + n_l.to_s + "\n") end aseqs.each { |a| msf << a.shift } i += seqwidth end msf << "\n" msf.join('') end end #module Output module EnumerableExtension include Output # Returns number of sequences in this alignment. def number_of_sequences i = 0 self.each_seq { |s| i += 1 } i end # Returns an array of sequence names. # The order of the names must be the same as # the order of each_seq. def sequence_names (0...(self.number_of_sequences)).to_a end end #module EnumerableExtension # Bio::Alignment::ArrayExtension is a set of useful methods for # multiple sequence alignment. # It is designed to be extended to array objects or # included in your own classes which inherit Array. # (It can also be included in Array, though not recommended.) # # It possesses all methods defined in EnumerableExtension. # For usage of methods, please refer to EnumerableExtension. module ArrayExtension include EnumerableExtension # Iterates over each sequences. # Yields a sequence. # # It works the same as Array#each. def each_seq(&block) #:yields: seq each(&block) end # Returns number of sequences in this alignment. def number_of_sequences self.size end end #module ArrayExtension # Bio::Alignment::HashExtension is a set of useful methods for # multiple sequence alignment. # It is designed to be extended to hash objects or # included in your own classes which inherit Hash. # (It can also be included in Hash, though not recommended.) # # It possesses all methods defined in EnumerableExtension. # For usage of methods, please refer to EnumerableExtension. # # Because SequenceHash#alignment_collect is redefined, # some methods' return value's class are changed to # SequenceHash instead of SequenceArray. # # Because the order of the objects in a hash is inconstant, # some methods strictly affected with the order of objects # might not work correctly, # e.g. EnumerableExtension#convert_match and #convert_unmatch. module HashExtension include EnumerableExtension # Iterates over each sequences. # Yields a sequence. # # It works the same as Hash#each_value. def each_seq #:yields: seq #each_value(&block) each_key { |k| yield self[k] } end # Iterates over each sequence and each results running block # are collected and returns a new alignment as a # Bio::Alignment::SequenceHash object. # # Note that it would be redefined if you want to change # return value's class. # def alignment_collect a = SequenceHash.new a.set_all_property(get_all_property) each_pair do |key, str| a.store(key, yield(str)) end a end # Concatenates the given alignment. # If align is a Hash (or SequenceHash), # sequences of same keys are concatenated. # Otherwise, align must have each_seq # or each method and # works same as EnumerableExtension#alignment_concat. # # Returns self. # # Note that it is a destructive method. # def alignment_concat(align) flag = nil begin align.each_pair do |key, seq| flag = true if origseq = self[key] origseq.concat(seq) end end return self rescue NoMethodError, ArgumentError =>evar raise evar if flag end a = values i = 0 begin align.each_seq do |seq| flag = true a[i].concat(seq) if a[i] and seq i += 1 end return self rescue NoMethodError, ArgumentError => evar raise evar if flag end align.each do |seq| a[i].concat(seq) if a[i] and seq i += 1 end self end # Returns number of sequences in this alignment. def number_of_sequences self.size end # Returns an array of sequence names. # The order of the names must be the same as # the order of each_seq. def sequence_names self.keys end end #module HashExtension # Bio::Alignment::SequenceArray is a container class of # multiple sequence alignment. # Since it inherits Array, it acts completely same as Array. # In addition, methods defined in ArrayExtension and EnumerableExtension # can be used. class SequenceArray < Array include ArrayExtension end #class SequenceArray # Bio::Alignment::SequenceHash is a container class of # multiple sequence alignment. # Since it inherits Hash, it acts completely same as Hash. # In addition, methods defined in HashExtension and EnumerableExtension # can be used. class SequenceHash < Hash include HashExtension end #class SequenceHash # Bio::Alignment::OriginalPrivate is a set of private methods # for Bio::Alignment::OriginalAlignment. module OriginalPrivate # Gets the sequence from given object. def extract_seq(obj) seq = nil if obj.is_a?(Bio::Sequence::NA) or obj.is_a?(Bio::Sequence::AA) then seq = obj else for m in [ :seq, :naseq, :aaseq ] begin seq = obj.send(m) rescue NameError, ArgumentError seq = nil end break if seq end seq = obj unless seq end seq end module_function :extract_seq # Gets the name or the definition of the sequence from given object. def extract_key(obj) sn = nil for m in [ :definition, :entry_id ] begin sn = obj.send(m) rescue NameError, ArgumentError sn = nil end break if sn end sn end module_function :extract_key end #module OriginalPrivate # Bio::Alignment::OriginalAlignment is # the BioRuby original multiple sequence alignment container class. # It includes HashExtension. # # It is recommended only to use methods defined in EnumerableExtension # (and the each_seq method). # The method only defined in this class might be obsoleted in the future. # class OriginalAlignment include Enumerable include HashExtension include OriginalPrivate # Read files and creates a new alignment object. # # It will be obsoleted. def self.readfiles(*files) require 'bio/io/flatfile' aln = self.new files.each do |fn| Bio::FlatFile.open(nil, fn) do |ff| aln.add_sequences(ff) end end aln end # Creates a new alignment object from given arguments. # # It will be obsoleted. def self.new2(*arg) self.new(arg) end # Creates a new alignment object. # seqs may be one of follows: # an array of sequences (or strings), # an array of sequence database objects, # an alignment object. def initialize(seqs = []) @seqs = {} @keys = [] self.add_sequences(seqs) end # If x is the same value, returns true. # Otherwise, returns false. def ==(x) #(original) if x.is_a?(self.class) self.to_hash == x.to_hash else false end end # convert to hash def to_hash #(Hash-like) @seqs end # Adds sequences to the alignment. # seqs may be one of follows: # an array of sequences (or strings), # an array of sequence database objects, # an alignment object. def add_sequences(seqs) if block_given? then seqs.each do |x| s, key = yield x self.store(key, s) end else if seqs.is_a?(self.class) then seqs.each_pair do |k, s| self.store(k, s) end elsif seqs.respond_to?(:each_pair) seqs.each_pair do |k, x| s = extract_seq(x) self.store(k, s) end else seqs.each do |x| s = extract_seq(x) k = extract_key(x) self.store(k, s) end end end self end # identifiers (or definitions or names) of the sequences attr_reader :keys # stores a sequences with the name # key:: name of the sequence # seq:: sequence def __store__(key, seq) #(Hash-like) h = { key => seq } @keys << h.keys[0] @seqs.update(h) seq end # stores a sequence with key # (name or definition of the sequence). # Unlike __store__ method, the method doesn't allow # same keys. # If the key is already used, returns nil. # When succeeded, returns key. def store(key, seq) #(Hash-like) returns key instead of seq if @seqs.has_key?(key) then # don't allow same key # New key is discarded, while existing key is preserved. key = nil end unless key then unless defined?(@serial) @serial = 0 end @serial = @seqs.size if @seqs.size > @serial while @seqs.has_key?(@serial) @serial += 1 end key = @serial end self.__store__(key, seq) key end # Reconstructs internal data structure. # (Like Hash#rehash) def rehash @seqs.rehash oldkeys = @keys tmpkeys = @seqs.keys @keys.collect! do |k| tmpkeys.delete(k) end @keys.compact! @keys.concat(tmpkeys) self end # Prepends seq (with key) to the front of the alignment. # (Like Array#unshift) def unshift(key, seq) #(Array-like) self.store(key, seq) k = @keys.pop @keys.unshift(k) k end # Removes the first sequence in the alignment and # returns [ key, seq ]. def shift k = @keys.shift if k then s = @seqs.delete(k) [ k, s ] else nil end end # Gets the n-th sequence. # If not found, returns nil. def order(n) #(original) @seqs[@keys[n]] end # Removes the sequence whose key is key. # Returns the removed sequence. # If not found, returns nil. def delete(key) #(Hash-like) @keys.delete(key) @seqs.delete(key) end # Returns sequences. (Like Hash#values) def values #(Hash-like) @keys.collect { |k| @seqs[k] } end # Adds a sequence without key. # The key is automatically determined. def <<(seq) #(Array-like) self.store(nil, seq) self end # Gets a sequence. (Like Hash#[]) def [](*arg) #(Hash-like) @seqs[*arg] end # Number of sequences in the alignment. def size #(Hash&Array-like) @seqs.size end alias number_of_sequences size # If the key exists, returns true. Otherwise, returns false. # (Like Hash#has_key?) def has_key?(key) #(Hash-like) @seqs.has_key?(key) end # Iterates over each sequence. # (Like Array#each) def each #(Array-like) @keys.each do |k| yield @seqs[k] end end alias each_seq each # Iterates over each key and sequence. # (Like Hash#each_pair) def each_pair #(Hash-like) @keys.each do |k| yield k, @seqs[k] end end # Iterates over each sequence, replacing the sequence with the # value returned by the block. def collect! #(Array-like) @keys.each do |k| @seqs[k] = yield @seqs[k] end end ###-- ### note that 'collect' and 'to_a' is defined in Enumerable ### ### instance-variable-related methods ###++ # Creates new alignment. Internal use only. def new(*arg) na = self.class.new(*arg) na.set_all_property(get_all_property) na end protected :new # Duplicates the alignment def dup #(Hash-like) self.new(self) end #-- # methods below should not access instance variables #++ # Merges given alignment and returns a new alignment. def merge(*other) #(Hash-like) na = self.new(self) na.merge!(*other) na end # Merge given alignment. # Note that it is destructive method. def merge!(*other) #(Hash-like) if block_given? then other.each do |aln| aln.each_pair do |k, s| if self.has_key?(k) then s = yield k, self[k], s self.to_hash.store(k, s) else self.store(k, s) end end end else other.each do |aln| aln.each_pair do |k, s| self.delete(k) if self.has_key?(k) self.store(k, s) end end end self end # Returns the key for a given sequence. If not found, returns nil. def index(seq) #(Hash-like) last_key = nil self.each_pair do |k, s| last_key = k if s.class == seq.class then r = (s == seq) else r = (s.to_s == seq.to_s) end break if r end last_key end # Sequences in the alignment are duplicated. # If keys are given to the argument, sequences of given keys are # duplicated. # # It will be obsoleted. def isolate(*arg) #(original) if arg.size == 0 then self.collect! do |s| seqclass.new(s) end else arg.each do |k| if self.has_key?(k) then s = self.delete(key) self.store(k, seqclass.new(s)) end end end self end # Iterates over each sequence and each results running block # are collected and returns a new alignment. # # The method name 'collect_align' will be obsoleted. # Please use 'alignment_collect' instead. def alignment_collect #(original) na = self.class.new na.set_all_property(get_all_property) self.each_pair do |k, s| na.store(k, yield(s)) end na end alias collect_align alignment_collect # Removes empty sequences or nil in the alignment. # (Like Array#compact!) def compact! #(Array-like) d = [] self.each_pair do |k, s| if !s or s.empty? d << k end end d.each do |k| self.delete(k) end d.empty? ? nil : d end # Removes empty sequences or nil and returns new alignment. # (Like Array#compact) def compact #(Array-like) na = self.dup na.compact! na end # Adds a sequence to the alignment. # Returns key if succeeded. # Returns nil (and not added to the alignment) if key is already used. # # It resembles BioPerl's AlignI::add_seq method. def add_seq(seq, key = nil) #(BioPerl) AlignI::add_seq like method unless seq.is_a?(Bio::Sequence::NA) or seq.is_a?(Bio::Sequence::AA) s = extract_seq(seq) key = extract_key(seq) unless key seq = s end self.store(key, seq) end # Removes given sequence from the alignment. # Returns removed sequence. If nothing removed, returns nil. # # It resembles BioPerl's AlignI::remove_seq. def remove_seq(seq) #(BioPerl) AlignI::remove_seq like method if k = self.index(seq) then self.delete(k) else nil end end # Removes sequences from the alignment by given keys. # Returns an alignment object consists of removed sequences. # # It resembles BioPerl's AlignI::purge method. def purge(*arg) #(BioPerl) AlignI::purge like method purged = self.new arg.each do |k| if self[k] then purged.store(k, self.delete(k)) end end purged end # If block is given, it acts like Array#select (Enumerable#select). # Returns a new alignment containing all sequences of the alignment # for which return value of given block is not false nor nil. # # If no block is given, it acts like the BioPerl's AlignI::select. # Returns a new alignment containing sequences of given keys. # # The BioPerl's AlignI::select-like action will be obsoleted. def select(*arg) #(original) na = self.new if block_given? then # 'arg' is ignored # nearly same action as Array#select (Enumerable#select) self.each_pair.each do |k, s| na.store(k, s) if yield(s) end else # BioPerl's AlignI::select like function arg.each do |k| if s = self[k] then na.store(k, s) end end end na end # The method name slice will be obsoleted. # Please use alignment_slice instead. alias slice alignment_slice # The method name subseq will be obsoleted. # Please use alignment_subseq instead. alias subseq alignment_subseq # Not-destructive version of alignment_normalize!. # Returns a new alignment. def normalize #(original) na = self.dup na.alignment_normalize! na end # Not-destructive version of alignment_rstrip!. # Returns a new alignment. def rstrip #(String-like) na = self.dup na.isolate na.alignment_rstrip! na end # Not-destructive version of alignment_lstrip!. # Returns a new alignment. def lstrip #(String-like) na = self.dup na.isolate na.alignment_lstrip! na end # Not-destructive version of alignment_strip!. # Returns a new alignment. def strip #(String-like) na = self.dup na.isolate na.alignment_strip! na end # Not-destructive version of remove_gaps!. # Returns a new alignment. # # The method name 'remove_gap' will be obsoleted. # Please use 'remove_all_gaps' instead. def remove_all_gaps #(original) na = self.dup na.isolate na.remove_all_gaps! na end # Concatenates a string or an alignment. # Returns self. # # Note that the method will be obsoleted. # Please use each_seq { |s| s << str } for concatenating # a string and # alignment_concat(aln) for concatenating an alignment. def concat(aln) #(String-like) if aln.respond_to?(:to_str) then #aln.is_a?(String) self.each do |s| s << aln end self else alignment_concat(aln) end end # Replace the specified region of the alignment to aln. # aln:: String or Bio::Alignment object # arg:: same format as String#slice # # It will be obsoleted. def replace_slice(aln, *arg) #(original) if aln.respond_to?(:to_str) then #aln.is_a?(String) self.each do |s| s[*arg] = aln end elsif aln.is_a?(self.class) then aln.each_pair do |k, s| self[k][*arg] = s end else i = 0 aln.each do |s| self.order(i)[*arg] = s i += 1 end end self end # Performs multiple alignment by using external program. def do_align(factory) a0 = self.class.new (0...self.size).each { |i| a0.store(i, self.order(i)) } r = factory.query(a0) a1 = r.alignment a0.keys.each do |k| unless a1[k.to_s] then raise 'alignment result is inconsistent with input data' end end a2 = self.new a0.keys.each do |k| a2.store(self.keys[k], a1[k.to_s]) end a2 end # Convert to fasta format and returns an array of strings. # # It will be obsoleted. def to_fasta_array(*arg) #(original) width = nil if arg[0].is_a?(Integer) then width = arg.shift end options = (arg.shift or {}) width = options[:width] unless width if options[:avoid_same_name] then na = __clustal_avoid_same_name(self.keys, 30) else na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') } end a = self.collect do |s| ">#{na.shift}\n" + if width then s.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n") else s.to_s + "\n" end end a end # Convets to fasta format and returns an array of FastaFormat objects. # # It will be obsoleted. def to_fastaformat_array(*arg) #(original) require 'bio/db/fasta' a = self.to_fasta_array(*arg) a.collect! do |x| Bio::FastaFormat.new(x) end a end # Converts to fasta format and returns a string. # # The specification of the argument will be changed. # # Note: to_fasta is deprecated. # Please use output_fasta instead. def to_fasta(*arg) #(original) warn "to_fasta is deprecated. Please use output_fasta." self.to_fasta_array(*arg).join('') end # The method name consensus will be obsoleted. # Please use consensus_string instead. alias consensus consensus_string end #class OriginalAlignment # Bio::Alignment::GAP is a set of class methods for # gap-related position translation. module GAP # position with gaps are translated into the position without gaps. #seq:: sequence #pos:: position with gaps #gap_regexp:: regular expression to specify gaps def ungapped_pos(seq, pos, gap_regexp) p = seq[0..pos].gsub(gap_regexp, '').length p -= 1 if p > 0 p end module_function :ungapped_pos # position without gaps are translated into the position with gaps. #seq:: sequence #pos:: position with gaps #gap_regexp:: regular expression to specify gaps def gapped_pos(seq, pos, gap_regexp) olen = seq.gsub(gap_regexp, '').length pos = olen if pos >= olen pos = olen + pos if pos < 0 i = 0 l = pos + 1 while l > 0 and i < seq.length x = seq[i, l].gsub(gap_regexp, '').length i += l l -= x end i -= 1 if i > 0 i end module_function :gapped_pos end # module GAP # creates a new Bio::Alignment::OriginalAlignment object. # Please refer document of OriginalAlignment.new. def self.new(*arg) OriginalAlignment.new(*arg) end # creates a new Bio::Alignment::OriginalAlignment object. # Please refer document of OriginalAlignment.new2. def self.new2(*arg) OriginalAlignment.new2(*arg) end # creates a new Bio::Alignment::OriginalAlignment object. # Please refer document of OriginalAlignment.readfiles. def self.readfiles(*files) OriginalAlignment.readfiles(*files) end #--- # Service classes for multiple alignment applications #+++ #--- # Templates of alignment application factory #+++ # Namespace for templates for alignment application factory module FactoryTemplate # Template class for alignment application factory. # The program acts: # input: stdin or file, format = fasta format # output: stdout (parser should be specified by DEFAULT_PARSER) class Simple # Creates a new alignment factory def initialize(program = self.class::DEFAULT_PROGRAM, options = []) @program = program @options = options @command = nil @output = nil @report = nil @exit_status = nil @data_stdout = nil end # program name attr_accessor :program # options attr_accessor :options # Last command-line string. Returns nil or an array of String. # Note that filenames described in the command-line may already # be removed because these files may be temporary files. attr_reader :command # Last raw result of the program. # Return a string (or nil). attr_reader :output # Last result object performed by the factory. attr_reader :report # Last exit status attr_reader :exit_status # Last output to the stdout. attr_accessor :data_stdout # Clear the internal data and status, except program and options. def reset @command = nil @output = nil @report = nil @exit_status = nil @data_stdout = nil end # Executes the program. # If +seqs+ is not nil, perform alignment for seqs. # If +seqs+ is nil, simply executes the program. # # Compatibility note: When seqs is nil, # returns true if the program exits normally, and # returns false if the program exits abnormally. def query(seqs) if seqs then query_alignment(seqs) else exec_local(@options) @exit_status.exitstatus == 0 ? true : false end end # Performs alignment for seqs. # +seqs+ should be Bio::Alignment or Array of sequences or nil. def query_alignment(seqs) unless seqs.respond_to?(:output_fasta) then seqs = Bio::Alignment.new(seqs) end query_string(seqs.output_fasta(:width => 70)) end # alias of query_alignment. # # Compatibility Note: query_align will renamed to query_alignment. def query_align(seqs) #warn 'query_align is renamed to query_alignment.' query_alignment(seqs) end # Performs alignment for +str+. # The +str+ should be a string that can be recognized by the program. def query_string(str) _query_string(str, @options) @report end # Performs alignment of sequences in the file named +fn+. def query_by_filename(filename_in) _query_local(filename_in, @options) @report end private # Executes a program in the local machine. def exec_local(opt, data_stdin = nil) @exit_status = nil @command = [ @program, *opt ] #STDERR.print "DEBUG: ", @command.join(" "), "\n" @data_stdout = Bio::Command.query_command(@command, data_stdin) @exit_status = $? end # prepare temporary file def _prepare_tempfile(str = nil) tf_in = Tempfile.open(str ? 'alignment_i' : 'alignment_o') tf_in.print str if str tf_in.close(false) tf_in end # generates options specifying input/output filename. # nil for filename means stdin or stdout. # +options+ must not contain specify filenames. # returns an array of string. def _generate_options(infile, outfile, options) options + (infile ? _option_input_file(infile) : _option_input_stdin) + (outfile ? _option_output_file(outfile) : _option_output_stdout) end # generates options specifying input filename. # returns an array of string def _option_input_file(fn) [ fn ] end # generates options specifying output filename. # returns an array of string def _option_output_file(fn) raise 'can not specify output file: always stdout' end # generates options specifying that input is taken from stdin. # returns an array of string def _option_input_stdin [] end # generates options specifying output to stdout. # returns an array of string def _option_output_stdout [] end end #class Simple # mix-in module module WrapInputStdin private # Performs alignment for +str+. # The +str+ should be a string that can be recognized by the program. def _query_string(str, opt) _query_local(nil, opt, str) end end #module WrapInputStdin # mix-in module module WrapInputTempfile private # Performs alignment for +str+. # The +str+ should be a string that can be recognized by the program. def _query_string(str, opt) begin tf_in = _prepare_tempfile(str) ret = _query_local(tf_in.path, opt, nil) ensure tf_in.close(true) if tf_in end ret end end #module WrapInputTempfile # mix-in module module WrapOutputStdout private # Performs alignment by specified filenames def _query_local(fn_in, opt, data_stdin = nil) opt = _generate_options(fn_in, nil, opt) exec_local(opt, data_stdin) @output = @data_stdout @report = self.class::DEFAULT_PARSER.new(@output) @report end end #module WrapOutputStdout # mix-in module module WrapOutputTempfile private # Performs alignment def _query_local(fn_in, opt, data_stdin = nil) begin tf_out = _prepare_tempfile() opt = _generate_options(fn_in, tf_out.path, opt) exec_local(opt, data_stdin) tf_out.open @output = tf_out.read ensure tf_out.close(true) if tf_out end @report = self.class::DEFAULT_PARSER.new(@output) @report end end #module WrapOutputTempfile # Template class for alignment application factory. # The program needs: # input: file (cannot accept stdin), format = fasta format # output: stdout (parser should be specified by DEFAULT_PARSER) class FileInStdoutOut < Simple include Bio::Alignment::FactoryTemplate::WrapInputTempfile include Bio::Alignment::FactoryTemplate::WrapOutputStdout private # generates options specifying that input is taken from stdin. # returns an array of string def _option_input_stdin raise 'input is always a file' end end #class FileInStdoutOut # Template class for alignment application factory. # The program needs: # input: stdin or file, format = fasta format # output: file (parser should be specified by DEFAULT_PARSER) class StdinInFileOut < Simple include Bio::Alignment::FactoryTemplate::WrapInputStdin include Bio::Alignment::FactoryTemplate::WrapOutputTempfile private # generates options specifying output to stdout. # returns an array of string def _option_output_stdout raise 'output is always a file' end end #class StdinInFileOut # Template class for alignment application factory. # The program needs: # input: file (cannot accept stdin), format = fasta format # output: file (parser should be specified by DEFAULT_PARSER) class FileInFileOut < Simple include Bio::Alignment::FactoryTemplate::WrapInputTempfile include Bio::Alignment::FactoryTemplate::WrapOutputTempfile private # generates options specifying that input is taken from stdin. # returns an array of string def _option_input_stdin raise 'input is always a file' end # generates options specifying output to stdout. # returns an array of string def _option_output_stdout raise 'output is always a file' end end #class FileInFileOut # Template class for alignment application factory. # The program needs: # input: file (cannot accept stdin), format = fasta format # output: file (parser should be specified by DEFAULT_PARSER) # Tree (*.dnd) output is also supported. class FileInFileOutWithTree < FileInFileOut # alignment guide tree generated by the program (*.dnd file) attr_reader :output_dnd def reset @output_dnd = nil super end private # Performs alignment def _query_local(fn_in, opt, data_stdin = nil) begin tf_dnd = _prepare_tempfile() opt = opt + _option_output_dndfile(tf_dnd.path) ret = super(fn_in, opt, data_stdin) tf_dnd.open @output_dnd = tf_dnd.read ensure tf_dnd.close(true) if tf_dnd end ret end # generates options specifying output tree file (*.dnd). # returns an array of string def _option_output_dndfile raise NotImplementedError end end #class FileInFileOutWithTree end #module FactoryTemplate end #module Alignment end #module Bio bio-1.4.3.0001/lib/bio/db.rb0000644000004100000410000002021612200110570015115 0ustar www-datawww-data# # = bio/db.rb - common API for database parsers # # Copyright:: Copyright (C) 2001, 2002, 2005 # Toshiaki Katayama # License:: The Ruby License # # $Id: db.rb,v 0.38 2007/05/08 17:02:13 nakao Exp $ # # == On-demand parsing and cache # # The flatfile parsers (sub classes of the Bio::DB) split the original entry # into a Hash and store the hash in the @orig instance variable. To parse # in detail is delayed until the method is called which requires a further # parsing of a content of the @orig hash. Fully parsed data is cached in the # another hash, @data, separately. # # == Guide lines for the developers to create an new database class # # --- Bio::DB.new(entry) # # The 'new' method should accept the entire entry in one String and # return the parsed database object. # # --- Bio::DB#entry_id # # Database classes should implement the following methods if appropriate: # # * entry_id # * definition # # Every sub class should define the following constants if appropriate: # # * DELIMITER (RS) # * entry separator of the flatfile of the database. # * RS (= record separator) is an alias for the DELIMITER in short. # # * TAGSIZE # * length of the tag field in the FORTRAN-like format. # # |<- tag ->||<- data ---->| # ENTRY_ID A12345 # DEFINITION Hoge gene of the Pokemonia pikachuae # # === Template of the sub class # # module Bio # class Hoge < DB # # DELIMITER = RS = "\n//\n" # TAGSIZE = 12 # You can omit this line if not needed # # def initialize(entry) # end # # def entry_id # end # # end # class Hoge # end # module Bio # # === Recommended method names for sub classes # # In general, the method name should be in the singular form when returns # a Object (including the case when the Object is a String), and should be # the plural form when returns same Objects in Array. It depends on the # database classes that which form of the method name can be use. # # For example, GenBank has several REFERENCE fields in one entry, so define # Bio::GenBank#references and this method should return an Array of the # Reference objects. On the other hand, MEDLINE has one REFERENCE information # per one entry, so define Bio::MEDLINE#reference method and this should # return a Reference object. # # The method names used in the sub classes should be taken from the following # list if appropriate: # # --- entry_id #=> String # # The entry identifier. # # --- definition #=> String # # The description of the entry. # # --- reference #=> Bio::Reference # --- references #=> Array of Bio::Reference # # The reference field(s) of the entry. # # --- dblink #=> String # --- dblinks #=> Array of String # # The link(s) to the other database entry. # # --- naseq #=> Bio::Sequence::NA # # The DNA/RNA sequence of the entry. # # --- nalen #=> Integer # # The length of the DNA/RNA sequence of the entry. # # --- aaseq #=> Bio::Sequence::AA # # The amino acid sequence of the entry. # # --- aalen #=> Integer # # The length of the amino acid sequence of the entry. # # --- seq #=> Bio::Sequence::NA or Bio::Sequence::AA # # Returns an appropriate sequence object. # # --- position #=> String # # The position of the sequence in the entry or in the genome (depends on # the database). # # --- locations #=> Bio::Locations # # Returns Bio::Locations.new(position). # # --- division #=> String # # The sub division name of the database. # # * Example: # * EST, VRL etc. for GenBank # * PATTERN, RULE etc. for PROSITE # # --- date #=> String # # The date of the entry. # Should we use Date (by ParseDate) instead of String? # # --- gene #=> String # --- genes #=> Array of String # # The name(s) of the gene. # # --- organism #=> String # # The name of the organism. # require 'bio/sequence' require 'bio/reference' require 'bio/feature' module Bio class DB def self.open(filename, *mode, &block) Bio::FlatFile.open(self, filename, *mode, &block) end # Returns an entry identifier as a String. This method must be # implemented in every database classes by overriding this method. def entry_id raise NotImplementedError end # Returns a list of the top level tags of the entry as an Array of String. def tags @orig.keys end # Returns true or false - wether the entry contains the field of the # given tag name. def exists?(tag) @orig.include?(tag) end # Returns an intact field of the tag as a String. def get(tag) @orig[tag] end # Similar to the get method, however, fetch returns the content of the # field without its tag and any extra white spaces stripped. def fetch(tag, skip = 0) field = @orig[tag].split(/\n/, skip + 1).last.to_s truncate(field.gsub(/^.{0,#{@tagsize}}/,'')) end private # Returns a String with successive white spaces are replaced by one # space and stripeed. def truncate(str) str ||= "" return str.gsub(/\s+/, ' ').strip end # Returns a tag name of the field as a String. def tag_get(str) str ||= "" return str[0,@tagsize].strip end # Returns a String of the field without a tag name. def tag_cut(str) str ||= "" str[0,@tagsize] = '' return str end # Returns the content of the field as a String like the fetch method. # Furthermore, field_fetch stores the result in the @data hash. def field_fetch(tag, skip = 0) unless @data[tag] @data[tag] = fetch(tag, skip) end return @data[tag] end # Returns an Array containing each line of the field without a tag. # lines_fetch also stores the result in the @data hash. def lines_fetch(tag) unless @data[tag] list = [] lines = get(tag).split(/\n/) lines.each do |line| data = tag_cut(line) if data[/^\S/] # next sub field list << data else # continued sub field data.strip! if list.last[/\-$/] # folded list[-1] += data else list[-1] += " #{data}" # rest of list end end end @data[tag] = list end @data[tag] end end # class DB # Stores a NCBI style (GenBank, KEGG etc.) entry. class NCBIDB < DB autoload :Common, 'bio/db/genbank/common' # The entire entry is passed as a String. The length of the tag field is # passed as an Integer. Parses the entry roughly by the entry2hash method # and returns a database object. def initialize(entry, tagsize) @tagsize = tagsize @orig = entry2hash(entry.strip) # Hash of the original entry @data = {} # Hash of the parsed entry end private # Splits an entry into an Array of Strings at the level of top tags. def toptag2array(str) sep = "\001" str.gsub(/\n([A-Za-z\/\*])/, "\n#{sep}\\1").split(sep) end # Splits a field into an Array of Strings at the level of sub tags. def subtag2array(str) sep = "\001" str.gsub(/\n(\s{1,#{@tagsize-1}}\S)/, "\n#{sep}\\1").split(sep) end # Returns the contents of the entry as a Hash with the top level tags as # its keys. def entry2hash(entry) hash = Hash.new('') fields = toptag2array(entry) fields.each do |field| tag = tag_get(field) hash[tag] += field end return hash end end # class NCBIDB # Class for KEGG databases. Inherits a NCBIDB class. class KEGGDB < NCBIDB end # Stores an EMBL style (EMBL, TrEMBL, Swiss-Prot etc.) entry. class EMBLDB < DB autoload :Common, 'bio/db/embl/common' # The entire entry is passed as a String. The length of the tag field is # passed as an Integer. Parses the entry roughly by the entry2hash method # and returns a database object. def initialize(entry, tagsize) @tagsize = tagsize @orig = entry2hash(entry.strip) # Hash of the original entry @data = {} # Hash of the parsed entry end private # Returns the contents of the entry as a Hash. def entry2hash(entry) hash = Hash.new { |h,k| h[k] = '' } entry.each_line do |line| tag = tag_get(line) next if tag == 'XX' tag = 'R' if tag =~ /^R./ # Reference lines hash[tag].concat line end return hash end end # class EMBLDB end # module Bio bio-1.4.3.0001/lib/bio/sequence.rb0000644000004100000410000003760112200110570016346 0ustar www-datawww-data# # = bio/sequence.rb - biological sequence class # # Copyright:: Copyright (C) 2000-2006 # Toshiaki Katayama , # Yoshinori K. Okuji , # Naohisa Goto , # Ryan Raaum , # Jan Aerts # License:: The Ruby License # module Bio # = DESCRIPTION # Bio::Sequence objects represent annotated sequences in bioruby. # A Bio::Sequence object is a wrapper around the actual sequence, # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object. # For most users, this encapsulation will be completely transparent. # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA # objects using the same arguments and returning the same values (even though # these methods are not documented specifically for Bio::Sequence). # # = USAGE # # Create a nucleic or amino acid sequence # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA') # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa') # aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU') # # # Print it out # puts dna.to_s # puts aa.to_s # # # Get a subsequence, bioinformatics style (first nucleotide is '1') # puts dna.subseq(2,6) # # # Get a subsequence, informatics style (first nucleotide is '0') # puts dna[2,6] # # # Print in FASTA format # puts dna.output(:fasta) # # # Print all codons # dna.window_search(3,3) do |codon| # puts codon # end # # # Splice or otherwise mangle your sequence # puts dna.splicing("complement(join(1..5,16..20))") # puts rna.splicing("complement(join(1..5,16..20))") # # # Convert a sequence containing ambiguity codes into a # # regular expression you can use for subsequent searching # puts aa.to_re # # # These should speak for themselves # puts dna.complement # puts dna.composition # puts dna.molecular_weight # puts dna.translate # puts dna.gc_percent class Sequence autoload :Common, 'bio/sequence/common' autoload :NA, 'bio/sequence/na' autoload :AA, 'bio/sequence/aa' autoload :Generic, 'bio/sequence/generic' autoload :Format, 'bio/sequence/format' autoload :Adapter, 'bio/sequence/adapter' autoload :QualityScore, 'bio/sequence/quality_score' autoload :SequenceMasker, 'bio/sequence/sequence_masker' #-- # require "bio/sequence/compat.rb" here to avoid circular require and # possible superclass mismatch of AA class #++ require 'bio/sequence/compat' include Format include SequenceMasker # Create a new Bio::Sequence object # # s = Bio::Sequence.new('atgc') # puts s #=> 'atgc' # # Note that this method does not intialize the contained sequence # as any kind of bioruby object, only as a simple string # # puts s.seq.class #=> String # # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto # for methods to transform the basic String of a just created # Bio::Sequence object to a proper bioruby object # --- # *Arguments*: # * (required) _str_: String or Bio::Sequence::NA/AA object # *Returns*:: Bio::Sequence object def initialize(str) @seq = str end # Pass any unknown method calls to the wrapped sequence object. see # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing def method_missing(sym, *args, &block) #:nodoc: begin seq.__send__(sym, *args, &block) rescue NoMethodError => evar lineno = __LINE__ - 2 file = __FILE__ bt_here = [ "#{file}:#{lineno}:in \`__send__\'", "#{file}:#{lineno}:in \`method_missing\'" ] if bt_here == evar.backtrace[0, 2] then bt = evar.backtrace[2..-1] evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}") evar.set_backtrace(bt) end #p lineno #p file #p bt_here #p evar.backtrace raise(evar) end end # The sequence identifier (String). For example, for a sequence # of Genbank origin, this is the locus name. # For a sequence of EMBL origin, this is the primary accession number. attr_accessor :entry_id # A String with a description of the sequence (String) attr_accessor :definition # Features (An Array of Bio::Feature objects) attr_accessor :features # References (An Array of Bio::Reference objects) attr_accessor :references # Comments (String or an Array of String) attr_accessor :comments # Keywords (An Array of String) attr_accessor :keywords # Links to other database entries. # (An Array of Bio::Sequence::DBLink objects) attr_accessor :dblinks # Bio::Sequence::NA/AA attr_accessor :moltype # The sequence object, usually Bio::Sequence::NA/AA, # but could be a simple String attr_accessor :seq # Quality scores of the bases/residues in the sequence. # (Array containing Integer, or nil) attr_accessor :quality_scores # The meaning (calculation method) of the quality scores stored in # the quality_scores attribute. # Maybe one of :phred, :solexa, or nil. # # Note that if it is nil, and error_probabilities is empty, # some methods implicitly assumes that it is :phred (PHRED score). attr_accessor :quality_score_type # Error probabilities of the bases/residues in the sequence. # (Array containing Float, or nil) attr_accessor :error_probabilities #--- # Attributes below have been added during BioHackathon2008 #+++ # Version number of the sequence (String or Integer). # Unlike entry_version, sequence_version will be changed # when the submitter of the sequence updates the entry. # Normally, the same entry taken from different databases (EMBL, GenBank, # and DDBJ) may have the same sequence_version. attr_accessor :sequence_version # Topology (String). "circular", "linear", or nil. attr_accessor :topology # Strandedness (String). "single" (single-stranded), # "double" (double-stranded), "mixed" (mixed-stranded), or nil. attr_accessor :strandedness # molecular type (String). "DNA" or "RNA" for nucleotide sequence. attr_accessor :molecule_type # Data Class defined by EMBL (String) # See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_1 attr_accessor :data_class # Taxonomic Division defined by EMBL/GenBank/DDBJ (String) # See http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2 attr_accessor :division # Primary accession number (String) attr_accessor :primary_accession # Secondary accession numbers (Array of String) attr_accessor :secondary_accessions # Created date of the sequence entry (Date, DateTime, Time, or String) attr_accessor :date_created # Last modified date of the sequence entry (Date, DateTime, Time, or String) attr_accessor :date_modified # Release information when created (String) attr_accessor :release_created # Release information when last-modified (String) attr_accessor :release_modified # Version of the entry (String or Integer). # Unlike sequence_version, entry_version is a database # maintainer's internal version number. # The version number will be changed when the database maintainer # modifies the entry. # The same enrty in EMBL, GenBank, and DDBJ may have different # entry_version. attr_accessor :entry_version # Organism species (String). For example, "Escherichia coli". attr_accessor :species # Organism classification, taxonomic classification of the source organism. # (Array of String) attr_accessor :classification alias taxonomy classification # (not well supported) Organelle information (String). attr_accessor :organelle # Namespace of the sequence IDs described in entry_id, primary_accession, # and secondary_accessions methods (String). # For example, 'EMBL', 'GenBank', 'DDBJ', 'RefSeq'. attr_accessor :id_namespace # Sequence identifiers which are not described in entry_id, # primary_accession,and secondary_accessions methods # (Array of Bio::Sequence::DBLink objects). # For example, NCBI GI number can be stored. # Note that only identifiers of the entry itself should be stored. # For database cross references, dblinks should be used. attr_accessor :other_seqids # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis # of this guess. This method will change the current Bio::Sequence object. # # s = Bio::Sequence.new('atgc') # puts s.seq.class #=> String # s.auto # puts s.seq.class #=> Bio::Sequence::NA # --- # *Returns*:: Bio::Sequence::NA/AA object def auto @moltype = guess if @moltype == NA @seq = NA.new(seq) else @seq = AA.new(seq) end end # Given a sequence String, guess its type, Amino Acid or Nucleic Acid, and # return a new Bio::Sequence object wrapping a sequence of the guessed type # (either Bio::Sequence::AA or Bio::Sequence::NA) # # s = Bio::Sequence.auto('atgc') # puts s.seq.class #=> Bio::Sequence::NA # --- # *Arguments*: # * (required) _str_: String *or* Bio::Sequence::NA/AA object # *Returns*:: Bio::Sequence object def self.auto(str) seq = self.new(str) seq.auto return seq end # Guess the class of the current sequence. Returns the class # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by # developers only, but if you know what you are doing, feel free. # # s = Bio::Sequence.new('atgc') # puts s.guess #=> Bio::Sequence::NA # # There are three parameters: `threshold`, `length`, and `index`. # # The `threshold` value (defaults to 0.9) is the frequency of # nucleic acid bases [AGCTUagctu] required in the sequence for this method # to produce a Bio::Sequence::NA "guess". In the default case, if less # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu], # then the guess is Bio::Sequence::AA. # # s = Bio::Sequence.new('atgcatgcqq') # puts s.guess #=> Bio::Sequence::AA # puts s.guess(0.8) #=> Bio::Sequence::AA # puts s.guess(0.7) #=> Bio::Sequence::NA # # The `length` value is how much of the total sequence to use in the # guess (default 10000). If your sequence is very long, you may # want to use a smaller amount to reduce the computational burden. # # s = Bio::Sequence.new(A VERY LONG SEQUENCE) # puts s.guess(0.9, 1000) # limit the guess to the first 1000 positions # # The `index` value is where to start the guess. Perhaps you know there # are a lot of gaps at the start... # # s = Bio::Sequence.new('-----atgcc') # puts s.guess #=> Bio::Sequence::AA # puts s.guess(0.9,10000,5) #=> Bio::Sequence::NA # --- # *Arguments*: # * (optional) _threshold_: Float in range 0,1 (default 0.9) # * (optional) _length_: Fixnum (default 10000) # * (optional) _index_: Fixnum (default 1) # *Returns*:: Bio::Sequence::NA/AA def guess(threshold = 0.9, length = 10000, index = 0) str = seq.to_s[index,length].to_s.extend Bio::Sequence::Common cmp = str.composition bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] + cmp['a'] + cmp['t'] + cmp['g'] + cmp['c'] + cmp['u'] total = str.length - cmp['N'] - cmp['n'] if bases.to_f / total > threshold return NA else return AA end end # Guess the class of a given sequence. Returns the class # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by # developers only, but if you know what you are doing, feel free. # # puts .guess('atgc') #=> Bio::Sequence::NA # # There are three optional parameters: `threshold`, `length`, and `index`. # # The `threshold` value (defaults to 0.9) is the frequency of # nucleic acid bases [AGCTUagctu] required in the sequence for this method # to produce a Bio::Sequence::NA "guess". In the default case, if less # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu], # then the guess is Bio::Sequence::AA. # # puts Bio::Sequence.guess('atgcatgcqq') #=> Bio::Sequence::AA # puts Bio::Sequence.guess('atgcatgcqq', 0.8) #=> Bio::Sequence::AA # puts Bio::Sequence.guess('atgcatgcqq', 0.7) #=> Bio::Sequence::NA # # The `length` value is how much of the total sequence to use in the # guess (default 10000). If your sequence is very long, you may # want to use a smaller amount to reduce the computational burden. # # # limit the guess to the first 1000 positions # puts Bio::Sequence.guess('A VERY LONG SEQUENCE', 0.9, 1000) # # The `index` value is where to start the guess. Perhaps you know there # are a lot of gaps at the start... # # puts Bio::Sequence.guess('-----atgcc') #=> Bio::Sequence::AA # puts Bio::Sequence.guess('-----atgcc',0.9,10000,5) #=> Bio::Sequence::NA # --- # *Arguments*: # * (required) _str_: String *or* Bio::Sequence::NA/AA object # * (optional) _threshold_: Float in range 0,1 (default 0.9) # * (optional) _length_: Fixnum (default 10000) # * (optional) _index_: Fixnum (default 1) # *Returns*:: Bio::Sequence::NA/AA def self.guess(str, *args) self.new(str).guess(*args) end # Transform the sequence wrapped in the current Bio::Sequence object # into a Bio::Sequence::NA object. This method will change the current # object. This method does not validate your choice, so be careful! # # s = Bio::Sequence.new('RRLE') # puts s.seq.class #=> String # s.na # puts s.seq.class #=> Bio::Sequence::NA !!! # # However, if you know your sequence type, this method may be # constructively used after initialization, # # s = Bio::Sequence.new('atgc') # s.na # --- # *Returns*:: Bio::Sequence::NA def na @seq = NA.new(seq) @moltype = NA end # Transform the sequence wrapped in the current Bio::Sequence object # into a Bio::Sequence::NA object. This method will change the current # object. This method does not validate your choice, so be careful! # # s = Bio::Sequence.new('atgc') # puts s.seq.class #=> String # s.aa # puts s.seq.class #=> Bio::Sequence::AA !!! # # However, if you know your sequence type, this method may be # constructively used after initialization, # # s = Bio::Sequence.new('RRLE') # s.aa # --- # *Returns*:: Bio::Sequence::AA def aa @seq = AA.new(seq) @moltype = AA end # Create a new Bio::Sequence object from a formatted string # (GenBank, EMBL, fasta format, etc.) # # s = Bio::Sequence.input(str) # --- # *Arguments*: # * (required) _str_: string # * (optional) _format_: format specification (class or nil) # *Returns*:: Bio::Sequence object def self.input(str, format = nil) if format then klass = format else klass = Bio::FlatFile::AutoDetect.default.autodetect(str) end obj = klass.new(str) obj.to_biosequence end # alias of Bio::Sequence.input def self.read(str, format = nil) input(str, format) end # accession numbers of the sequence # # *Returns*:: Array of String def accessions [ primary_accession, secondary_accessions ].flatten.compact end # Normally, users should not call this method directly. # Use Bio::*#to_biosequence (e.g. Bio::GenBank#to_biosequence). # # Creates a new Bio::Sequence object from database data with an # adapter module. def self.adapter(source_data, adapter_module) biosequence = self.new(nil) biosequence.instance_eval { remove_instance_variable(:@seq) @source_data = source_data } biosequence.extend(adapter_module) biosequence end end # Sequence end # Bio bio-1.4.3.0001/lib/bio/io/0000755000004100000410000000000012200110570014611 5ustar www-datawww-databio-1.4.3.0001/lib/bio/io/keggapi.rb0000644000004100000410000002405712200110570016555 0ustar www-datawww-data# # = bio/io/keggapi.rb - KEGG API access class # # Copyright:: Copyright (C) 2003, 2004 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/io/soapwsdl' require 'uri' require 'net/http' require 'bio/command' module Bio class KEGG # == Description # # KEGG API is a web service to use KEGG system via SOAP/WSDL. # # == References # # For more informations on KEGG API, see the following site and read the # reference manual. # # * http://www.genome.jp/kegg/soap/ # * http://www.genome.jp/kegg/soap/doc/keggapi_manual.html # # == List of methods # # As of KEGG API v5.0 # # * list_databases # * list_organisms # * list_pathways(org) # * binfo(string) # * bget(string) # * bfind(string) # * btit(string) # * get_linkdb_by_entry(entry_id, db, start, max_results) # * get_best_best_neighbors_by_gene(genes_id, start, max_results) # * get_best_neighbors_by_gene(genes_id, start, max_results) # * get_reverse_best_neighbors_by_gene(genes_id, start, max_results) # * get_paralogs_by_gene(genes_id, start, max_results) # * get_similarity_between_genes(genes_id1, genes_id2) # * get_motifs_by_gene(genes_id, db) # * get_genes_by_motifs(motif_id_list, start, max_results) # * get_ko_by_gene(genes_id) # * get_ko_members(ko_id) # * get_oc_members_by_gene(genes_id, start, max_results) # * get_pc_members_by_gene(genes_id, start, max_results) # * mark_pathway_by_objects(pathway_id, object_id_list) # * color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list) # * get_genes_by_pathway(pathway_id) # * get_enzymes_by_pathway(pathway_id) # * get_compounds_by_pathway(pathway_id) # * get_reactions_by_pathway(pathway_id) # * get_pathways_by_genes(genes_id_list) # * get_pathways_by_enzymes(enzyme_id_list) # * get_pathways_by_compounds(compound_id_list) # * get_pathways_by_reactions(reaction_id_list) # * get_linked_pathways(pathway_id) # * get_genes_by_enzyme(enzyme_id, org) # * get_enzymes_by_gene(genes_id) # * get_enzymes_by_compound(compound_id) # * get_enzymes_by_reaction(reaction_id) # * get_compounds_by_enzyme(enzyme_id) # * get_compounds_by_reaction(reaction_id) # * get_reactions_by_enzyme(enzyme_id) # * get_reactions_by_compound(compound_id) # * get_genes_by_organism(org, start, max_results) # * get_number_of_genes_by_organism(org) # # == KEGG API methods implemented only in BioRuby # # In BioRuby, returned values are added filter method to pick up # values in a complex data type as an array. # # #!/usr/bin/env ruby # # require 'bio' # # serv = Bio::KEGG::API.new # results = serv.get_best_neighbors_by_gene("eco:b0002", "bsu") # # # case 0 : without filter # results.each do |hit| # print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" # end # # # case 1 : select gene names and SW score only # fields = [:genes_id1, :genes_id2, :sw_score] # results.each do |hit| # puts hit.filter(fields).join("\t") # end # # # case 2 : also uses aligned position in each amino acid sequence etc. # fields1 = [:genes_id1, :start_position1, :end_position1, :best_flag_1to2] # fields2 = [:genes_id2, :start_position2, :end_position2, :best_flag_2to1] # results.each do |hit| # print "> score: ", hit.sw_score, ", identity: ", hit.identity, "\n" # print "1:\t", hit.filter(fields1).join("\t"), "\n" # print "2:\t", hit.filter(fields2).join("\t"), "\n" # end # # Using filter method will make it easy to change fields to select and # keep the script clean. # # * Bio::KEGG::API#get_all_neighbors_by_gene(genes_id, org) # * Bio::KEGG::API#get_all_best_best_neighbors_by_gene(genes_id) # * Bio::KEGG::API#get_all_best_neighbors_by_gene(genes_id) # * Bio::KEGG::API#get_all_reverse_best_neighbors_by_gene(genes_id) # * Bio::KEGG::API#get_all_paralogs_by_gene(genes_id) # * Bio::KEGG::API#get_all_genes_by_motifs(motif_id_list) # * Bio::KEGG::API#get_all_oc_members_by_gene(genes_id) # * Bio::KEGG::API#get_all_pc_members_by_gene(genes_id) # * Bio::KEGG::API#get_all_genes_by_organism(org) # # These methods are wrapper for the methods without _all_ in its name # and internally iterate to retrive all the results using start/max_results # value pairs described above. For example, # # #!/usr/bin/env ruby # # require 'soap/wsdlDriver' # # wsdl = "http://soap.genome.jp/KEGG.wsdl" # serv = SOAP::WSDLDriverFactory.new(wsdl).create_driver # serv.generate_explicit_type = true # # start = 1 # max_results = 100 # # loop do # results = serv.get_best_neighbors_by_gene('eco:b0002', start, max_results) # break unless results # when no more results returned # results.each do |hit| # print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" # end # start += max_results # end # # can be witten as # # #!/usr/bin/env ruby # # require 'bio' # # serv = Bio::KEGG::API.new # # results = serv.get_all_best_neighbors_by_gene('eco:b0002') # results.each do |hit| # print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n" # end # # # * Bio::KEGG::API#save_image(url, filename = nil) # # Some methods of the KEGG API will return a URL of the generated image. # This method save an image specified by the URL. The filename can be # specified by its second argument, otherwise basename of the URL will # be used. # # #!/usr/bin/env ruby # # require 'bio' # # serv = Bio::KEGG::API.new("http://soap.genome.jp/v3.0/KEGG.wsdl") # # list = ["eco:b1002", "eco:b2388"] # url = serv.mark_pathway_by_objects("path:eco00010", list) # # # Save with the original filename (eco00010.gif in this case) # serv.save_image(url) # # # or save as "save_image.gif" # serv.save_image(url, "save_image.gif") # # * Bio::KEGG::API#get_entries(entry_id_list) # * Bio::KEGG::API#get_aaseqs(entry_id_list) # * Bio::KEGG::API#get_naseqs(entry_id_list) # * Bio::KEGG::API#get_definitions(entry_id_list) # # These methods are for the shortcut and backward compatibility # (these methods existed in the older version of the KEGG API). # class API < Bio::SOAPWSDL SERVER_URI = "http://soap.genome.jp/KEGG.wsdl" # Connect to the KEGG API's SOAP server. A WSDL file will be automatically # downloaded and parsed to generate the SOAP client driver. The default URL # for the WSDL is http://soap.genome.jp/KEGG.wsdl but it can be changed by # the argument or by wsdl= method. def initialize(wsdl = nil) @wsdl = wsdl || SERVER_URI @log = nil @start = 1 @max_results = 100 create_driver end # Returns current value for the 'start' count for the methods having # start/max_results argument pairs or changes the default value for # the 'start' count. attr_accessor :start # Returns current value for the 'max_results' number for the methods having # start/max_results argument pairs or changes the default value for the # 'max_results' count. If your request timeouts, try smaller value for # the max_results. attr_accessor :max_results def method_missing(*arg) begin results = @driver.send(*arg) rescue Timeout::Error retry end results = add_filter(results) return results end # def get_all_neighbors_by_gene(genes_id, org) # get_all(:get_neighbors_by_gene, genes_id, org) # end def get_all_best_best_neighbors_by_gene(genes_id) get_all(:get_best_best_neighbors_by_gene, genes_id) end def get_all_best_neighbors_by_gene(genes_id) get_all(:get_best_neighbors_by_gene, genes_id) end def get_all_reverse_best_neighbors_by_gene(genes_id) get_all(:get_reverse_best_neighbors_by_gene, genes_id) end def get_all_paralogs_by_gene(genes_id) get_all(:get_paralogs_by_gene, genes_id) end def get_all_genes_by_motifs(motif_id_list) get_all(:get_genes_by_motifs, motif_id_list) end def get_all_oc_members_by_gene(genes_id) get_all(:get_oc_members_by_gene, genes_id) end def get_all_pc_members_by_gene(genes_id) get_all(:get_pc_members_by_gene, genes_id) end def get_all_genes_by_organism(org) get_all(:get_genes_by_organism, org) end def get_all_linkdb_by_entry(entry_id, db) get_all(:get_linkdb_by_entry, entry_id, db) end def save_image(url, filename = nil) schema, user, host, port, reg, path, = URI.split(url) filename ||= File.basename(path) http = Bio::Command.new_http(host, port) response = http.get(path) File.open(filename, "w+") do |f| f.print response.body end return filename end def get_entries(ary = []) result = '' step = [@max_results, 50].min 0.step(ary.length, step) do |i| str = ary[i, step].join(" ") if entry = @driver.send(:bget, str) result << entry.to_s end end return result end def get_aaseqs(ary = []) result = '' step = [@max_results, 50].min 0.step(ary.length, step) do |i| str = "-f -n a " + ary[i, step].join(" ") if entry = @driver.send(:bget, str) result << entry.to_s end end return result end def get_naseqs(ary = []) result = '' step = [@max_results, 50].min 0.step(ary.length, step) do |i| str = "-f -n n " + ary[i, step].join(" ") if entry = @driver.send(:bget, str) result << entry.to_s end end return result end def get_definitions(ary = []) result = '' step = [@max_results, 50].min 0.step(ary.length, step) do |i| str = ary[i, step].join(" ") if entry = @driver.send(:btit, str) result << entry.to_s end end return result end private def add_filter(results) if results.is_a?(Array) results.each do |result| next if result.is_a?(Fixnum) def result.filter(fields) fields.collect { |field| self.send(field) } end end end return results end def get_all(method, *args) args << @start args << @max_results ary = [] loop do results = @driver.send(method, *args) break unless results break if results.empty? results = add_filter(results) ary << results args[-2] += @max_results # next start count end return ary.flatten end end # API end # KEGG end # Bio bio-1.4.3.0001/lib/bio/io/pubmed.rb0000644000004100000410000001422512200110570016416 0ustar www-datawww-data# # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module # # Copyright:: Copyright (C) 2001, 2007, 2008 Toshiaki Katayama # Copyright:: Copyright (C) 2006 Jan Aerts # License:: The Ruby License # # $Id:$ # require 'bio/io/ncbirest' require 'bio/command' require 'cgi' module Bio # == Description # # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed # # Basically, two types of queries are possible: # # * searching for PubMed IDs given a query string: # * Bio::PubMed#esearch (recommended) # * Bio::PubMed#search (only retrieves top 20 hits) # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID # * Bio::PubMed#efetch (recommended) # * Bio::PubMed#query (unstable for the change of the HTML design) # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI) # # The different methods within the same group are interchangeable and should # return the same result. # # Additional information about the MEDLINE format and PubMed programmable # APIs can be found on the following websites: # # * PubMed Overview: # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html # * PubMed help: # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html # * Entrez utilities index: # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html # * How to link: # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp # # == Usage # # require 'bio' # # # If you don't know the pubmed ID: # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x| # p x # end # # Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: # puts Bio::PubMed.efetch("10592173", "14693808") # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") # # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") # medline = Bio::MEDLINE.new(manuscript) # class PubMed < Bio::NCBI::REST # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. # # For information on the possible arguments, see # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed # --- # *Arguments*: # * _str_: query string (required) # * _hash_: hash of E-Utils options # * _retmode_: "xml", "html", ... # * _rettype_: "medline", ... # * _retmax_: integer (default 100) # * _retstart_: integer # * _field_ # * _reldate_ # * _mindate_ # * _maxdate_ # * _datetype_ # *Returns*:: array of PubMed IDs or a number of results def esearch(str, hash = {}) opts = { "db" => "pubmed" } opts.update(hash) super(str, opts) end # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using # entrez efetch. Multiple PubMed IDs can be provided: # Bio::PubMed.efetch(123) # Bio::PubMed.efetch([123,456,789]) # --- # *Arguments*: # * _ids_: list of PubMed IDs (required) # * _hash_: hash of E-Utils options # * _retmode_: "xml", "html", ... # * _rettype_: "medline", ... # * _retmax_: integer (default 100) # * _retstart_: integer # * _field_ # * _reldate_ # * _mindate_ # * _maxdate_ # * _datetype_ # *Returns*:: Array of MEDLINE formatted String def efetch(ids, hash = {}) opts = { "db" => "pubmed", "rettype" => "medline" } opts.update(hash) result = super(ids, opts) if !opts["retmode"] or opts["retmode"] == "text" result = result.split(/\n\n+/) end result end # Search the PubMed database by given keywords using entrez query and returns # an array of PubMed IDs. Caution: this method returns the first 20 hits only. # Instead, use of the 'esearch' method is strongly recomended. # --- # *Arguments*: # * _id_: query string (required) # *Returns*:: array of PubMed IDs def search(str) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" ncbi_access_wait http = Bio::Command.new_http(host) response = http.get(path + CGI.escape(str)) result = response.body result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten return result end # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using # entrez query. # --- # *Arguments*: # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String def query(*ids) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" list = ids.collect { |x| CGI.escape(x.to_s) }.join(",") ncbi_access_wait http = Bio::Command.new_http(host) response = http.get(path + list) result = response.body result = result.scan(/
    \s*(.*?)<\/pre>/m).flatten
    
        if result =~ /id:.*Error occurred/
          # id: xxxxx Error occurred: Article does not exist
          raise( result )
        else
          if ids.size > 1
            return result
          else
            return result.first
          end
        end
      end
    
      # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
      # entrez pmfetch.
      # ---
      # *Arguments*:
      # * _id_: PubMed ID (required)
      # *Returns*:: MEDLINE formatted String
      def pmfetch(id)
        host = "www.ncbi.nlm.nih.gov"
        path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
    
        ncbi_access_wait
    
        http = Bio::Command.new_http(host)
        response = http.get(path + CGI.escape(id.to_s))
        result = response.body
        if result =~ /#{id}\s+Error/
          raise( result )
        else
          result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
          return result
        end
      end
    
      def self.esearch(*args)
        self.new.esearch(*args)
      end
    
      def self.efetch(*args)
        self.new.efetch(*args)
      end
    
      def self.search(*args)
        self.new.search(*args)
      end
    
      def self.query(*args)
        self.new.query(*args)
      end
    
      def self.pmfetch(*args)
        self.new.pmfetch(*args)
      end
    
    end # PubMed
    
    end # Bio
    
    bio-1.4.3.0001/lib/bio/io/higet.rb0000644000004100000410000000246012200110570016240 0ustar  www-datawww-data#
    # = bio/io/higet.rb - SOAP interface for HGC HiGet
    #
    # Copyright::  Copyright (C) 2005 Toshiaki Katayama 
    #
    # $Id: higet.rb,v 1.3 2006/09/19 05:46:22 k Exp $
    #
    
    require 'bio/io/soapwsdl'
    
    module Bio
    class HGC
    
    # == Description
    #
    # Interface for the HiGet service provided by Human Genome Center (HGC), Japan.
    # HiGet performs full-text search against various biological databases.
    #
    # == References
    #
    # * http://higet.hgc.jp/
    #
    class HiGet < Bio::SOAPWSDL
    
      SERVER_URI = "http://higet.hgc.jp/soap/higet.wsdl"
    
      def initialize(wsdl = nil)
        super(wsdl || SERVER_URI)
      end
    
      def higet_in_fasta(db, entries)
        self.higet(db, entries, "-d fasta")
      end
    
      def higet_in_xml(db, entries)
        self.higet(db, entries, "-d xml")
      end
    
    end
    
    end # HGC
    end # Bio
    
    
    if __FILE__ == $0
    
      begin
        require 'pp'
        alias p pp
      rescue LoadError
      end
    
      puts ">>> Bio::HGC::HiGet"
      serv = Bio::HGC::HiGet.new
      serv.log = STDERR
    
      puts "### HiFind"
      puts serv.hifind("genbank", "human kinase", "-l 10")
    
      puts "### HiGet"
      puts serv.higet("genbank", "S40289", "")
    
      puts "### HiGet (FASTA)"
      puts serv.higet("genbank", "S40289", "-d fasta")
    
      puts "### HiGet higet_in_fasta"
      puts serv.higet_in_fasta("genbank", "S40289")
    
      puts "### HiGet higet_in_xml"
      puts serv.higet_in_xml("genbank", "S40289")
    
    end
    
    bio-1.4.3.0001/lib/bio/io/ensembl.rb0000644000004100000410000001330412200110570016564 0ustar  www-datawww-data#
    # = bio/io/ensembl.rb - An Ensembl Genome Browser client.
    #
    # Copyright::   Copyright (C) 2006
    #               Mitsuteru C. Nakao 
    # License::     The Ruby License
    #
    # $Id:$
    #
    # == Description
    #
    # Client classes for Ensembl Genome Browser.
    #
    # == Examples
    #
    #  human = Bio::Ensembl.new('Homo_sapiens')
    #  seq = human.exportview(1, 1000, 100000)
    #  gff = human.exportview(1, 1000, 100000, ['gene', 'variation', 'genscan'])
    #
    #  human = Bio::Ensembl.human
    #  seq = human.exportview(1, 1000, 100000)
    #  gff = human.exportview(1, 1000, 100000, ['gene'])
    #
    #  seq = Bio::Ensembl.human.exportview(1, 1000, 100000)
    #  gff = Bio::Ensembl.human.exportview(1, 1000, 100000, ['gene', 'variation', 'genscan'])
    #
    #  
    # == References
    #
    # * Ensembl
    #   http:/www.ensembl.org/
    #
    
    require 'bio/command'
    
    module Bio
    
    # == Description
    #
    # An Ensembl Genome Browser client class.
    #
    # == Examples
    #
    #  human = Bio::Ensembl.new('Homo_sapiens')
    #  seq = human.exportview(1, 1000, 100000)
    #  gff = human.exportview(1, 1000, 100000, ['gene'])
    #
    #  mouse = Bio::Ensembl.new('Mus_musculus')
    #  seq = mouse.exportview(1, 1000, 100000)
    #  gff = mouse.exportview(1, 1000, 100000, ['gene', 'variation', 'genscan'])
    #
    #  rice = Bio::Enesmbl.new('Oryza_sativa', 'http://www.gramene.org')
    #  seq = rice.exportview(1, 1000, 100000)
    #
    # == References
    #
    # * Ensembl
    #   http:/www.ensembl.org/
    #
    # * GRAMENE
    #   http://www.gramene.org/
    #
    class Ensembl
      
      ENSEMBL_URL = 'http://www.ensembl.org'
    
      # Server URL (ex. 'http://www.ensembl.org')
      attr_reader :server
    
      # Organism name. (ex. 'Homo_sapiens').
      attr_reader :organism
    
      def initialize(organism, server = nil)
        @server = server || ENSEMBL_URL
        @organism = organism
        @uri = [ @server.chomp('/'), @organism ].join('/')
      end
    
      def self.human
        self.new("Homo_sapiens")
      end
    
      def self.mouse
        self.new("Mus_musculus")
      end
    
      # Ensembl ExportView Client.
      #
      # Retrieve genomic sequence/features from Ensembl ExportView in plain text.
      # Ensembl ExportView exports genomic data (sequence and features) in 
      # several file formats including fasta, GFF and tab.
      #
      # * ExportViwe (http://www.ensembl.org/Homo_sapiens/exportview).
      #
      # == Examples
      #
      #   human = Bio::Ensembl.new('Homo_sapiens')
      #     or
      #   human = Bio::Ensembl.human
      #
      #   # Genomic sequence in Fasta format
      #   human.exportview(:seq_region_name => 1, 
      #                    :anchor1 => 1149206, :anchor2 => 1149229)
      #   human.exportview(1, 1149206, 1149229)
      #
      #   # Feature in GFF
      #   human.exportview(:seq_region_name => 1, 
      #                    :anchor1 => 1149206, :anchor2 => 1150000, 
      #                    :options => ['similarity', 'repeat', 
      #                                 'genscan', 'variation', 'gene'])
      #   human.exportview(1, 1149206, 1150000, ['variation', 'gene'])
      #   
      # Feature in TAB
      #   human.exportview(:seq_region_name => 1, 
      #                    :anchor1 => 1149206, :anchor2 => 1150000, 
      #                    :options => ['similarity', 'repeat', 
      #                                 'genscan', 'variation', 'gene'],
      #                    :format => 'tab')
      #
      # == Arguments
      #
      # Bio::Ensembl#exportview method allow both orderd arguments and 
      # named arguments. (Note: mandatory arguments are marked by '*').
      #
      # === Orderd Arguments
      #
      # 1. seq_region_name - Chromosome number (*)
      # 2. anchor1         - From coordination (*)
      # 3. anchor2         - To coordination (*)
      # 4. options         - Features to export (in :format => 'gff' or 'tab')
      #                      ['similarity', 'repeat', 'genscan', 'variation', 
      #                       'gene']
      #
      # === Named Arguments
      # 
      # * :seq_region_name - Chromosome number (*)
      # * :anchor1         - From coordination (*)
      # * :anchor2         - To coordination (*)
      # * :type1           - From coordination type ['bp', ]
      # * :type2           - To coordination type ['bp', ]
      # * :upstream        - Bp upstream
      # * :downstream      - Bp downstream
      # * :format          - File format ['fasta', 'gff', 'tab']
      # * :options         - Features to export (for :format => 'gff' or 'tab')
      #                      ['similarity', 'repeat', 'genscan', 'variation', 
      #                       'gene']
      # 
      def exportview(*args)
        defaults = {
          :type1 => 'bp', 
          :type2 => 'bp', 
          :downstream => '', 
          :upstream => '', 
          :format => 'fasta',
          :options => [],
          :action => 'export', 
          :_format => 'Text', 
          :output => 'txt', 
          :submit => 'Continue >>'
        }
    
        if args.first.class == Hash
          options = args.first
          if options[:options] and options[:format] != 'fasta' and options[:format] != 'tab' 
            options.update({:format => 'gff'}) 
          end
        else
          options = {
            :seq_region_name => args[0], 
            :anchor1 => args[1], 
            :anchor2 => args[2],
          }
    
          case args[3]
          when Array
            options.update({:format => 'gff', :options => args[3]}) 
          when Hash
            options.update(args[3])
          end
    
          if args[4].class == Hash
            options.update(args[4])
          end
        end
    
        params = defaults.update(options)
    
        result = Bio::Command.post_form("#{@uri}/exportview", params)
    
        return result.body
      end
    
    end # class Ensembl
    
    end # module Bio
    
    
    
    # Codes for backward-compatibility.
    #
    class Bio::Ensembl
      EBIServerURI = ENSEMBL_URL
    
      def self.server_uri(uri = nil)
        if uri
          @uri = uri
        else
          @uri || EBIServerURI
        end
      end
        
      class Base
        def self.exportview(*args)
          Bio::Ensembl.new(Organism).exportview(*args)
        end
      end
      
      class Human < Base
        Organism = Bio::Ensembl.human.organism
      end
      
      class Mouse < Base
        Organism = Bio::Ensembl.mouse.organism
      end
    end # class Bio::Ensembl
    
    
    
    
    bio-1.4.3.0001/lib/bio/io/soapwsdl.rb0000644000004100000410000000445712200110570017004 0ustar  www-datawww-data#
    # = bio/io/soapwsdl.rb - SOAP/WSDL interface class
    #
    # Copyright::   Copyright (C) 2004 
    #               Toshiaki Katayama 
    # License::     The Ruby License
    #
    # $Id: soapwsdl.rb,v 1.7 2007/04/05 23:35:41 trevor Exp $
    #
    begin
      require 'soap/wsdlDriver'
    rescue LoadError
    end
    
    module Bio
    
    # == Examples
    # 
    # class API < Bio::SOAPWSDL
    #   def initialize
    #     @wsdl = 'http://example.com/example.wsdl'
    #     @log = File.new("soap_log", 'w')
    #     create_driver
    #   end
    # end
    #
    # == Use HTTP proxy
    #
    # You need to set following two environmental variables
    # (case might be insensitive) as required by SOAP4R.
    #
    # --- soap_use_proxy
    #
    # Set the value of this variable to 'on'.
    #
    # --- http_proxy
    #
    # Set the URL of your proxy server (http://myproxy.com:8080 etc.).
    #
    # === Example to use HTTP proxy
    # 
    # % export soap_use_proxy=on
    # % export http_proxy=http://localhost:8080
    #
    class SOAPWSDL
    
      # Returns URL of the current WSDL file.
      attr_reader :wsdl
    
      # Returns current logging IO.
      attr_reader :log
    
    
      def initialize(wsdl = nil)
        @wsdl = wsdl
        @log = nil
        create_driver
      end
    
    
      def create_driver
        if RUBY_VERSION > "1.8.2"
          @driver = SOAP::WSDLDriverFactory.new(@wsdl).create_rpc_driver
        else
          @driver = SOAP::WSDLDriverFactory.new(@wsdl).create_driver
        end
        @driver.generate_explicit_type = true	# Ruby obj <-> SOAP obj
      end
      private :create_driver
    
    
      # Change the URL for WSDL file
      #
      #   serv = Bio::SOAPWSDL.new("http://soap.genome.jp/KEGG.wsdl")
      #
      # or
      # 
      #   serv = Bio::SOAPWSDL.new
      #   serv.wsdl = "http://soap.genome.jp/KEGG.wsdl"
      #
      # Note that you can't read two or more different WSDL files at once.
      # In that case, create Bio::SOAPWSDL object for each.
      #
      def wsdl=(url)
        @wsdl = url
        create_driver
      end
    
    
      # Change the IO for logging.  The argument is passed to wiredump_dev method
      # of the SOAP4R, thus
      #
      #   serv = Bio::SOAPWSDL.new
      #   serv.log = STDERR
      #
      # will print all the SOAP transactions in standard error.
      # This feature is especially useful for debug.
      #
      def log=(io)
        @log = io
        @driver.wiredump_dev = @log
      end
    
    
      # List of methods defined by WSDL
      def list_methods
        @driver.methods(false)
      end
    
    
      def method_missing(*arg)
        @driver.send(*arg)
      end
      private :method_missing
    
    end # SOAPWSDL
    
    end # Bio
    
    bio-1.4.3.0001/lib/bio/io/ncbirest.rb0000644000004100000410000006665012200110570016764 0ustar  www-datawww-data#
    # = bio/io/ncbirest.rb - NCBI Entrez client module
    #
    # Copyright::  Copyright (C) 2008 Toshiaki Katayama 
    # License::    The Ruby License
    #
    # $Id:$
    #
    
    require 'thread'
    require 'bio/command'
    require 'bio/version'
    
    module Bio
    
    class NCBI
    
      autoload :SOAP,       'bio/io/ncbisoap'
    
      # (Hash) Default parameters for Entrez (eUtils).
      # They may also be used for other NCBI services.
      ENTREZ_DEFAULT_PARAMETERS = {
        'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
        'email' => nil,
      }
    
      # Resets Entrez (eUtils) default parameters.
      # ---
      # *Returns*:: (Hash) default parameters
      def self.reset_entrez_default_parameters
        h = {
          'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
          'email' => nil,
        }
        ENTREZ_DEFAULT_PARAMETERS.clear
        ENTREZ_DEFAULT_PARAMETERS.update(h)
      end
    
      # Gets default email address for Entrez (eUtils).
      # ---
      # *Returns*:: String or nil
      def self.default_email
        ENTREZ_DEFAULT_PARAMETERS['email']
      end
    
      # Sets default email address used for Entrez (eUtils).
      # It may also be used for other NCBI services.
      # ---
      # *Arguments*:
      # * (required) _str_: (String) email address
      # *Returns*:: same as given argument
      def self.default_email=(str)
        ENTREZ_DEFAULT_PARAMETERS['email'] = str
      end
    
      # Gets default tool name for Entrez (eUtils).
      # ---
      # *Returns*:: String or nil
      def self.default_tool
        ENTREZ_DEFAULT_PARAMETERS['tool']
      end
    
      # Sets default tool name for Entrez (eUtils).
      # It may also be used for other NCBI services.
      # ---
      # *Arguments*:
      # * (required) _str_: (String) tool name
      # *Returns*:: same as given argument
      def self.default_tool=(str)
        ENTREZ_DEFAULT_PARAMETERS['tool'] = str
      end
    
    # == Description
    #
    # The Bio::NCBI::REST class provides REST client for the NCBI E-Utilities
    #
    # Entrez utilities index:
    #
    # * http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
    #
    class REST
    
      # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
      # weekdays for any series of more than 100 requests.
      # -> Not implemented yet in BioRuby
      #
      # Wait for 1/3 seconds.
      # NCBI's restriction is: "Make no more than 3 requests every 1 second.".
      NCBI_INTERVAL = 1.0 / 3.0
      @@last_access = nil
      @@last_access_mutex = nil
    
      private
    
      # (Private) Sleeps until allowed to access.
      # ---
      # *Arguments*:
      # * (required) _wait_: wait unit time
      # *Returns*:: (undefined)
      def ncbi_access_wait(wait = NCBI_INTERVAL)
        @@last_access_mutex ||= Mutex.new
        @@last_access_mutex.synchronize {
          if @@last_access
            duration = Time.now - @@last_access
            if wait > duration
              sleep wait - duration
            end
          end
          @@last_access = Time.now
        }
        nil
      end
    
      # (Private) default parameters
      # ---
      # *Returns*:: Hash
      def default_parameters
        Bio::NCBI::ENTREZ_DEFAULT_PARAMETERS
      end
    
      # (Private) Sends query to NCBI.
      # ---
      # *Arguments*:
      # * (required) _serv_: (String) server URI string
      # * (required) _opts_: (Hash) parameters
      # *Returns*:: nil
      def ncbi_post_form(serv, opts)
        ncbi_check_parameters(opts)
        ncbi_access_wait
        #$stderr.puts opts.inspect
        response = Bio::Command.post_form(serv, opts)
        response
      end
    
      # (Private) Checks parameters as NCBI requires.
      # If no email or tool parameter, raises an error.
      #
      # NCBI announces that "Effective on
      # June 1, 2010, all E-utility requests, either using standard URLs or
      # SOAP, must contain non-null values for both the &tool and &email
      # parameters. Any E-utility request made after June 1, 2010 that does
      # not contain values for both parameters will return an error explaining
      # that these parameters must be included in E-utility requests."
      # ---
      # *Arguments*:
      # * (required) _opts_: Hash containing parameters
      # *Returns*:: (undefined)
      def ncbi_check_parameters(opts)
        #return if Time.now < Time.gm(2010,5,31)
        if opts['email'].to_s.empty? then
          raise 'Set email parameter for the query, or set Bio::NCBI.default_email = "(your email address)"'
        end
        if opts['tool'].to_s.empty? then
          raise 'Set tool parameter for the query, or set Bio::NCBI.default_tool = "(your tool name)"'
        end
        nil
      end
    
      public
    
      # List the NCBI database names E-Utils (einfo) service
      # 
      # * http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
      #
      #  pubmed protein nucleotide nuccore nucgss nucest structure genome
      #  books cancerchromosomes cdd gap domains gene genomeprj gensat geo
      #  gds homologene journals mesh ncbisearch nlmcatalog omia omim pmc
      #  popset probe proteinclusters pcassay pccompound pcsubstance snp
      #  taxonomy toolkit unigene unists
      #
      # == Usage
      #
      #  ncbi = Bio::NCBI::REST.new
      #  ncbi.einfo
      #
      #  Bio::NCBI::REST.einfo
      #
      # ---
      # *Returns*:: array of string (database names)
      def einfo
        serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
        opts = default_parameters.merge({})
        response = ncbi_post_form(serv, opts)
        result = response.body
        list = result.scan(/(.*?)<\/DbName>/m).flatten
        return list
      end
    
    
      # Search the NCBI database by given keywords using E-Utils (esearch) service
      # and returns an array of entry IDs.
      # 
      # For information on the possible arguments, see
      #
      # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
      # * http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helppubmed.section.pubmedhelp.Search_Field_Descrip
      #
      # == Usage
      #
      #  ncbi = Bio::NCBI::REST.new
      #  ncbi.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"count"})
      #  ncbi.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"gb"})
      #  ncbi.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmax"=>5})
      #
      #  Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"count"})
      #  Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"gb"})
      #  Bio::NCBI::REST.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmax"=>5})
      #
      # ---
      # *Arguments*:
      # * _str_: query string (required)
      # * _hash_: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
      #   * _db_: "sequences", "nucleotide", "protein", "pubmed", "taxonomy", ...
      #   * _retmode_: "text", "xml", "html", ...
      #   * _rettype_: "gb", "medline", "count", ...
      #   * _retmax_: integer (default 100)
      #   * _retstart_: integer
      #   * _field_:
      #     * "titl": Title [TI]
      #     * "tiab": Title/Abstract [TIAB]
      #     * "word": Text words [TW]
      #     * "auth": Author [AU]
      #     * "affl": Affiliation [AD]
      #     * "jour": Journal [TA]
      #     * "vol":  Volume [VI]
      #     * "iss":  Issue [IP]
      #     * "page": First page [PG]
      #     * "pdat": Publication date [DP]
      #     * "ptyp": Publication type [PT]
      #     * "lang": Language [LA]
      #     * "mesh": MeSH term [MH]
      #     * "majr": MeSH major topic [MAJR]
      #     * "subh": Mesh sub headings [SH]
      #     * "mhda": MeSH date [MHDA]
      #     * "ecno": EC/RN Number [rn]
      #     * "si":   Secondary source ID [SI]
      #     * "uid":  PubMed ID (PMID) [UI]
      #     * "fltr": Filter [FILTER] [SB]
      #     * "subs": Subset [SB]
      #   * _reldate_: 365
      #   * _mindate_: 2001
      #   * _maxdate_: 2002/01/01
      #   * _datetype_: "edat"
      # * _limit_: maximum number of entries to be returned (0 for unlimited; nil for the "retmax" value in the hash or the internal default value (=100))
      # * _step_: maximum number of entries retrieved at a time
      # *Returns*:: array of entry IDs or a number of results
      def esearch(str, hash = {}, limit = nil, step = 10000)
        serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
        opts = default_parameters.merge({ "term" => str })
        opts.update(hash)
    
        case opts["rettype"]
        when "count"
          count = esearch_count(str, opts)
          return count
        else
          retstart = 0
          retstart = hash["retstart"].to_i if hash["retstart"]
    
          limit ||= hash["retmax"].to_i if hash["retmax"]
          limit ||= 100 # default limit is 100
          limit = esearch_count(str, opts) if limit == 0   # unlimit
    
          list = []
          0.step(limit, step) do |i|
            retmax = [step, limit - i].min
            opts.update("retmax" => retmax, "retstart" => i + retstart)
            response = ncbi_post_form(serv, opts)
            result = response.body
            list += result.scan(/(.*?)<\/Id>/m).flatten
          end
          return list
        end
      end
    
      # *Arguments*:: same as esearch method
      # *Returns*:: array of entry IDs or a number of results
      def esearch_count(str, hash = {})
        serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
        opts = default_parameters.merge({ "term" => str })
        opts.update(hash)
        opts.update("rettype" => "count")
        response = ncbi_post_form(serv, opts)
        result = response.body
        count = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
        return count
      end
    
    
      # Retrieve database entries by given IDs and using E-Utils (efetch) service.
      #
      # For information on the possible arguments, see
      #
      # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
      #
      # == Usage
      #
      #  ncbi = Bio::NCBI::REST.new
      #  ncbi.efetch("185041", {"db"=>"nucleotide", "rettype"=>"gb", "retmode" => "xml"})
      #  ncbi.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb", "retmode"=>"xml"})
      #  ncbi.efetch("AAA52805", {"db"=>"protein", "rettype"=>"gb"})
      #
      #  Bio::NCBI::REST.efetch("185041", {"db"=>"nucleotide", "rettype"=>"gb", "retmode" => "xml"})
      #  Bio::NCBI::REST.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb"})
      #  Bio::NCBI::REST.efetch("AAA52805", {"db"=>"protein", "rettype"=>"gb"})
      #
      # ---
      # *Arguments*:
      # * _ids_: list of NCBI entry IDs (required)
      # * _hash_: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
      #   * _db_: "sequences", "nucleotide", "protein", "pubmed", "omim", ...
      #   * _retmode_: "text", "xml", "html", ...
      #   * _rettype_: "gb", "gbc", "medline", "count",...
      # * _step_: maximum number of entries retrieved at a time
      # *Returns*:: String
      def efetch(ids, hash = {}, step = 100)
        serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
        opts = default_parameters.merge({ "retmode"  => "text" })
        opts.update(hash)
    
        case ids
        when Array
          list = ids
        else
          list = ids.to_s.split(/\s*,\s*/)
        end
    
        result = ""
        0.step(list.size, step) do |i|
          opts["id"] = list[i, step].join(',')
          unless opts["id"].empty?
            response = ncbi_post_form(serv, opts)
            result += response.body
          end
        end
        return result.strip
        #return result.strip.split(/\n\n+/)
      end
    
      def self.einfo
        self.new.einfo
      end
    
      def self.esearch(*args)
        self.new.esearch(*args)
      end
    
      def self.esearch_count(*args)
        self.new.esearch_count(*args)
      end
    
      def self.efetch(*args)
        self.new.efetch(*args)
      end
    
    
      # Shortcut methods for the ESearch service
      class ESearch
    
        # Search database entries by given keywords using E-Utils (esearch).
        #
        # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
        #
        #  sequences = gene + genome + nucleotide + protein + popset + snp
        #  nucleotide = nuccore + nucest + nucgss
        #
        # * http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
        #
        #  pubmed protein nucleotide nuccore nucgss nucest structure genome
        #  books cancerchromosomes cdd gap domains gene genomeprj gensat geo
        #  gds homologene journals mesh ncbisearch nlmcatalog omia omim pmc
        #  popset probe proteinclusters pcassay pccompound pcsubstance snp
        #  taxonomy toolkit unigene unists
        #
        # == Usage
        #
        #  Bio::NCBI::REST::ESearch.search("nucleotide", "tardigrada")
        #  Bio::NCBI::REST::ESearch.count("nucleotide", "tardigrada")
        #
        #  Bio::NCBI::REST::ESearch.nucleotide("tardigrada")
        #  Bio::NCBI::REST::ESearch.popset("aldh2")
        #  Bio::NCBI::REST::ESearch.taxonomy("tardigrada")
        #  Bio::NCBI::REST::ESearch.pubmed("tardigrada", "reldate" => 365)
        #  Bio::NCBI::REST::ESearch.pubmed("mammoth mitochondrial genome")
        #  Bio::NCBI::REST::ESearch.pmc("Indonesian coelacanth genome Latimeria menadoensis")
        #  Bio::NCBI::REST::ESearch.journal("bmc bioinformatics")
        #
        #  ncbi = Bio::NCBI::REST::ESearch.new
        #  ncbi.search("nucleotide", "tardigrada")
        #  ncbi.count("nucleotide", "tardigrada")
        #
        #  ncbi.nucleotide("tardigrada")
        #  ncbi.popset("aldh2")
        #  ncbi.taxonomy("tardigrada")
        #  ncbi.pubmed("tardigrada", "reldate" => 365)
        #  ncbi.pubmed("mammoth mitochondrial genome")
        #  ncbi.pmc("Indonesian coelacanth genome Latimeria menadoensis")
        #  ncbi.journal("bmc bioinformatics")
        #
        # ---
        #
        # *Arguments*:
        # * _term_: search keywords (required)
        # * _limit_: maximum number of entries to be returned (0 for unlimited)
        # * _hash_: hash of E-Utils option
        # *Returns*:: array of entry IDs or a number of results
        module Methods
    
          # search("nucleotide", "tardigrada")
          # search("nucleotide", "tardigrada", 0)                  # unlimited
          # search("pubmed", "tardigrada")
          # search("pubmed", "tardigrada", 5)                      # first five
          # search("pubmed", "tardigrada", "reldate" => 365)       # within a year
          # search("pubmed", "tardigrada", 5, "reldate" => 365)    # combination
          # search("pubmed", "tardigrada", {"reldate" => 365}, 5)  # combination 2
          # search("journals", "bmc", 10)
          def search(db, term, *args)
            limit = 100
            hash = {}
            args.each do |arg|
              case arg
              when Hash
                hash.update(arg)
              else
                limit = arg.to_i
              end
            end
            opts = { "db" => db }
            opts.update(hash)
            Bio::NCBI::REST.esearch(term, opts, limit)
          end
    
          # count("nucleotide", "tardigrada")
          # count("pubmed", "tardigrada")
          # count("journals", "bmc")
          def count(db, term, hash = {})
            opts = { "db" => db }
            opts.update(hash)
            Bio::NCBI::REST.esearch_count(term, opts)
          end
    
          # nucleotide("tardigrada")
          # nucleotide("tardigrada", 0)
          # pubmed("tardigrada")
          # pubmed("tardigrada", 5)
          # pubmed("tardigrada", "reldate" => 365)
          # pubmed("tardigrada", 5, "reldate" => 365)
          # pubmed("tardigrada", {"reldate" => 365}, 5)
          def method_missing(*args)
            self.search(*args)
          end
    
          # alias for journals
          def journal(*args)
            self.search("journals", *args)
          end
    
          # alias for "nucest"
          def est(*args)
            self.search("nucest", *args)
          end
    
          # alias for "nucgss"
          def gss(*args)
            self.search("nucgss", *args)
          end
    
        end # Methods
    
        include Methods
        extend Methods
    
      end # ESearch
    
    
      # Shortcut methods for the EFetch service
      class EFetch
    
        module Methods
    
          # Retrieve sequence entries by given IDs using E-Utils (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
          #
          #  sequences = gene + genome + nucleotide + protein + popset + snp
          #  nucleotide = nuccore + nucest + nucgss
          #
          # format (rettype):
          # * native       all but Gene    ASN Default format for viewing sequences
          # * fasta        all sequence    FASTA view of a sequence
          # * gb           NA sequence     GenBank view for sequences
          # * gbc          NA sequence     INSDSeq structured flat file
          # * gbwithparts  NA sequence     GenBank CON division with sequences
          # * est          dbEST sequence  EST Report
          # * gss          dbGSS sequence  GSS Report
          # * gp           AA sequence     GenPept view
          # * gpc          AA sequence     INSDSeq structured flat file
          # * seqid        all sequence    Convert GIs into seqids
          # * acc          all sequence    Convert GIs into accessions
          # * chr          dbSNP only      SNP Chromosome Report
          # * flt          dbSNP only      SNP Flat File report
          # * rsr          dbSNP only      SNP RS Cluster report
          # * brief        dbSNP only      SNP ID list
          # * docset       dbSNP only      SNP RS summary
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.sequence("123,U12345,U12345.1,gb|U12345|")
          #
          #  list = [123, "U12345.1", "gb|U12345|"]
          #  Bio::NCBI::REST::EFetch.sequence(list)
          #  Bio::NCBI::REST::EFetch.sequence(list, "fasta")
          #  Bio::NCBI::REST::EFetch.sequence(list, "acc")
          #  Bio::NCBI::REST::EFetch.sequence(list, "xml")
          #
          #  Bio::NCBI::REST::EFetch.sequence("AE009950")
          #  Bio::NCBI::REST::EFetch.sequence("AE009950", "gbwithparts")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.sequence("123,U12345,U12345.1,gb|U12345|")
          #  ncbi.sequence(list)
          #  ncbi.sequence(list, "fasta")
          #  ncbi.sequence(list, "acc")
          #  ncbi.sequence(list, "xml")
          #  ncbi.sequence("AE009950")
          #  ncbi.sequence("AE009950", "gbwithparts")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of NCBI entry IDs (required)
          # * _format_: "gb", "gbc", "fasta", "acc", "xml" etc.
          # *Returns*:: String
          def sequence(ids, format = "gb", hash = {})
            case format
            when "xml"
              format = "gbc"
            end
            opts = { "db" => "sequences", "rettype" => format }
            opts.update(hash)
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve nucleotide sequence entries by given IDs using E-Utils
          # (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
          #  nucleotide = nuccore + nucest + nucgss
          #
          # format (rettype):
          # * native       all but Gene    ASN Default format for viewing sequences
          # * fasta        all sequence    FASTA view of a sequence
          # * gb           NA sequence     GenBank view for sequences
          # * gbc          NA sequence     INSDSeq structured flat file
          # * gbwithparts  NA sequence     GenBank CON division with sequences
          # * est          dbEST sequence  EST Report
          # * gss          dbGSS sequence  GSS Report
          # * gp           AA sequence     GenPept view
          # * gpc          AA sequence     INSDSeq structured flat file
          # * seqid        all sequence    Convert GIs into seqids
          # * acc          all sequence    Convert GIs into accessions
          # * chr          dbSNP only      SNP Chromosome Report
          # * flt          dbSNP only      SNP Flat File report
          # * rsr          dbSNP only      SNP RS Cluster report
          # * brief        dbSNP only      SNP ID list
          # * docset       dbSNP only      SNP RS summary
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.nucleotide("123,U12345,U12345.1,gb|U12345|")
          #
          #  list = [123, "U12345.1", "gb|U12345|"]
          #  Bio::NCBI::REST::EFetch.nucleotide(list)
          #  Bio::NCBI::REST::EFetch.nucleotide(list, "fasta")
          #  Bio::NCBI::REST::EFetch.nucleotide(list, "acc")
          #  Bio::NCBI::REST::EFetch.nucleotide(list, "xml")
          #
          #  Bio::NCBI::REST::EFetch.nucleotide("AE009950")
          #  Bio::NCBI::REST::EFetch.nucleotide("AE009950", "gbwithparts")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.nucleotide("123,U12345,U12345.1,gb|U12345|")
          #  ncbi.nucleotide(list)
          #  ncbi.nucleotide(list, "fasta")
          #  ncbi.nucleotide(list, "acc")
          #  ncbi.nucleotide(list, "xml")
          #  ncbi.nucleotide("AE009950")
          #  ncbi.nucleotide("AE009950", "gbwithparts")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of NCBI entry IDs (required)
          # * _format_: "gb", "gbc", "fasta", "acc", "xml" etc.
          # *Returns*:: String
          def nucleotide(ids, format = "gb", hash = {})
            case format
            when "xml"
              format = "gbc"
            end
            opts = { "db" => "nucleotide", "rettype" => format }
            opts.update(hash)
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve protein sequence entries by given IDs using E-Utils
          # (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
          #  protein
          #
          # format (rettype):
          # * native       all but Gene    ASN Default format for viewing sequences
          # * fasta        all sequence    FASTA view of a sequence
          # * gb           NA sequence     GenBank view for sequences
          # * gbc          NA sequence     INSDSeq structured flat file
          # * gbwithparts  NA sequence     GenBank CON division with sequences
          # * est          dbEST sequence  EST Report
          # * gss          dbGSS sequence  GSS Report
          # * gp           AA sequence     GenPept view
          # * gpc          AA sequence     INSDSeq structured flat file
          # * seqid        all sequence    Convert GIs into seqids
          # * acc          all sequence    Convert GIs into accessions
          # * chr          dbSNP only      SNP Chromosome Report
          # * flt          dbSNP only      SNP Flat File report
          # * rsr          dbSNP only      SNP RS Cluster report
          # * brief        dbSNP only      SNP ID list
          # * docset       dbSNP only      SNP RS summary
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.protein("7527480,AAF63163.1,AAF63163")
          #
          #  list = [ 7527480, "AAF63163.1", "AAF63163"]
          #  Bio::NCBI::REST::EFetch.protein(list)
          #  Bio::NCBI::REST::EFetch.protein(list, "fasta")
          #  Bio::NCBI::REST::EFetch.protein(list, "acc")
          #  Bio::NCBI::REST::EFetch.protein(list, "xml")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.protein("7527480,AAF63163.1,AAF63163")
          #  ncbi.protein(list)
          #  ncbi.protein(list, "fasta")
          #  ncbi.protein(list, "acc")
          #  ncbi.protein(list, "xml")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of NCBI entry IDs (required)
          # * _format_: "gp", "gpc", "fasta", "acc", "xml" etc.
          # *Returns*:: String
          def protein(ids, format = "gp", hash = {})
            case format
            when "xml"
              format = "gpc"
            end
            opts = { "db" => "protein", "rettype" => format }
            opts.update(hash)
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve PubMed entries by given IDs using E-Utils (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.pubmed(15496913)
          #  Bio::NCBI::REST::EFetch.pubmed("15496913,11181995")
          #
          #  list = [15496913, 11181995]
          #  Bio::NCBI::REST::EFetch.pubmed(list)
          #  Bio::NCBI::REST::EFetch.pubmed(list, "abstract")
          #  Bio::NCBI::REST::EFetch.pubmed(list, "citation")
          #  Bio::NCBI::REST::EFetch.pubmed(list, "medline")
          #  Bio::NCBI::REST::EFetch.pubmed(list, "xml")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.pubmed(list)
          #  ncbi.pubmed(list, "abstract")
          #  ncbi.pubmed(list, "citation")
          #  ncbi.pubmed(list, "medline")
          #  ncbi.pubmed(list, "xml")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of PubMed entry IDs (required)
          # * _format_: "abstract", "citation", "medline", "xml"
          # *Returns*:: String
          def pubmed(ids, format = "medline", hash = {})
            case format
            when "xml"
              format = "medline"
              mode = "xml"
            else
              mode = "text"
            end
            opts = { "db" => "pubmed", "rettype" => format, "retmode" => mode }
            opts.update(hash)
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve PubMed Central entries by given IDs using E-Utils (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.pmc(1360101)
          #  Bio::NCBI::REST::EFetch.pmc("1360101,534663")
          #
          #  list = [1360101, 534663]
          #  Bio::NCBI::REST::EFetch.pmc(list)
          #  Bio::NCBI::REST::EFetch.pmc(list, "xml")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.pmc(list)
          #  ncbi.pmc(list, "xml")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of PubMed Central entry IDs (required)
          # * _format_: "docsum", "xml"
          # *Returns*:: String
          def pmc(ids, format = "docsum", hash = {})
            case format
            when "xml"
              format = "medline"
              mode = "xml"
            else
              mode = "text"
            end
            opts = { "db" => "pmc", "rettype" => format, "retmode" => mode }
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve journal entries by given IDs using E-Utils (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.journal(21854)
          #
          #  list = [21854, 21855]
          #  Bio::NCBI::REST::EFetch.journal(list)
          #  Bio::NCBI::REST::EFetch.journal(list, "xml")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.journal(list)
          #  ncbi.journal(list, "xml")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of journal entry IDs (required)
          # * _format_: "full", "xml"
          # *Returns*:: String
          def journal(ids, format = "full", hash = {})
            case format
            when "xml"
              format = "full"
              mode = "xml"
            else
              mode = "text"
            end
            opts = { "db" => "journals", "rettype" => format, "retmode" => mode }
            opts.update(hash)
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve OMIM entries by given IDs using E-Utils (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.omim(143100)
          #
          #  list = [143100, 602260]
          #  Bio::NCBI::REST::EFetch.omim(list)
          #  Bio::NCBI::REST::EFetch.omim(list, "xml")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.omim(list)
          #  ncbi.omim(list, "xml")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of OMIM entry IDs (required)
          # * _format_: "docsum", "synopsis", "variants", "detailed", "linkout", "xml"
          # *Returns*:: String
          def omim(ids, format = "detailed", hash = {})
            case format
            when "xml"
              format = "full"
              mode = "xml"
            when "linkout"
              format = "ExternalLink"
              mode = "text"
            else
              mode = "text"
            end
            opts = { "db" => "omim", "rettype" => format, "retmode" => mode }
            opts.update(hash)
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
          # Retrieve taxonomy entries by given IDs using E-Utils (efetch).
          #
          # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchtax_help.html
          #
          # == Usage
          #
          #  Bio::NCBI::REST::EFetch.taxonomy(42241)
          #
          #  list = [232323, 290179, 286681]
          #  Bio::NCBI::REST::EFetch.taxonomy(list)
          #  Bio::NCBI::REST::EFetch.taxonomy(list, "xml")
          #
          #  ncbi = Bio::NCBI::REST::EFetch.new
          #  ncbi.taxonomy(list)
          #  ncbi.taxonomy(list, "xml")
          #
          # ---
          #
          # *Arguments*:
          # * _ids_: list of Taxonomy entry IDs (required)
          # * _format_: "brief", "docsum", "xml"
          # *Returns*:: String
          def taxonomy(ids, format = "docsum", hash = {})
            case format
            when "xml"
              format = "full"
              mode = "xml"
            else
              mode = "text"
            end
            opts = { "db" => "taxonomy", "rettype" => format, "retmode" => mode }
            Bio::NCBI::REST.efetch(ids, opts)
          end
    
        end # Methods
    
        include Methods
        extend Methods
    
      end # EFetch
    
    
    end # REST
    end # NCBI
    end # Bio
    
    bio-1.4.3.0001/lib/bio/io/ncbisoap.rb0000644000004100000410000000754312200110570016745 0ustar  www-datawww-data#
    # = bio/io/ncbisoap.rb - SOAP interface for NCBI Entrez Utilities
    #
    # Copyright::   Copyright (C) 2004, 2006
    #               Toshiaki Katayama 
    # License::     The Ruby License
    #
    # $Id:$
    #
    
    require 'bio/io/ncbirest'
    require 'bio/io/soapwsdl'
    
    module Bio
    class NCBI
    
    # == References
    #
    # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esoap_help.html
    #
    # == Methods
    #
    # All methods accept a hash as its argument and most of the keys can be
    # ommited (values are string).
    #
    # Note: Methods which name ends with _MS are designed for use with
    # Microsoft Visual Studio and SOAP Toolkit 3.0
    #
    # * http://www.ncbi.nlm.nih.gov/entrez/query/static/esoap_ms_help.html
    #
    # * run_eFetch(_MS)
    #   * "db", "id", "WebEnv", "query_key", "tool", "email", "retstart",
    #     "retmax", "rettype", "strand", "seq_start", "seq_stop", "complexity",
    #     "report" 
    #
    # * run_eGquery(_MS)
    #   * "term", "tool", "email"
    #
    # * run_eInfo(_MS)
    #   * "db", "tool", "email"
    #
    # * run_eSpell(_MS)
    #   * "db", "term", "tool", "email"
    #
    # * run_eLink(_MS)
    #   * "db", "id", "reldate", "mindate", "maxdate", "datetype", "term"
    #     "dbfrom", "WebEnv", "query_key", "cmd", "tool", "email"
    #
    # * run_eSearch(_MS)
    #   * "db", "term", "WebEnv", "QueryKey", "usehistory", "tool", "email",
    #     "field", "reldate", "mindate", "maxdate", "datetype", "RetStart",
    #     "RetMax", "rettype", "sort"
    #
    # * run_eSummary(_MS)
    #   * "db", "id", "WebEnv", "query_key", "retstart", "retmax", "tool", "email"
    #
    # == Complex data types
    #
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/egquery.xsd
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/einfo.xsd
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/esearch.xsd
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/esummary.xsd
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/elink.xsd
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/efetch.xsd
    # * http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/espell.xsd
    #
    class SOAP < Bio::SOAPWSDL
    
      BASE_URI = "http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/"
    
      # set default to EUtils
      SERVER_URI = BASE_URI + "eutils.wsdl"
    
      def initialize(wsdl = nil)
        super(wsdl || self.class::SERVER_URI)
      end
    
      def method_missing(*arg)
        sleep 3			# make sure to rest for 3 seconds per request
        @driver.send(*arg)
      end
    
      class EUtils < Bio::NCBI::SOAP
        SERVER_URI = BASE_URI + "eutils.wsdl"
      end
    
      class EUtilsLite < Bio::NCBI::SOAP
        SERVER_URI = BASE_URI + "eutils_lite.wsdl"
      end
    
      class EFetch < Bio::NCBI::SOAP
        SERVER_URI = BASE_URI + "efetch.wsdl"
      end
    
      class EFetchLite < Bio::NCBI::SOAP
        SERVER_URI = BASE_URI + "efetch_lit.wsdl"
      end
    
    end # SOAP
    end # NCBI
    end # Bio
    
    
    if __FILE__ == $0
    
      puts ">>> Bio::NCBI::SOAP::EFetch"
      efetch = Bio::NCBI::SOAP::EFetch.new
    
      puts "### run_eFetch in EFetch"
      hash = {"db" => "protein", "id" => "37776955"}
      result = efetch.run_eFetch(hash)
      p result
    
      puts ">>> Bio::NCBI::SOAP::EUtils"
      eutils = Bio::NCBI::SOAP::EUtils.new
    
      puts "### run_eFetch in EUtils"
      hash = {"db" => "pubmed", "id" => "12345"}
      result = eutils.run_eFetch(hash)
      p result
    
      puts "### run_eGquery - Entrez meta search to count hits in each DB"
      hash = {"term" => "kinase"}
      result = eutils.run_eGquery(hash)      # working?
      p result
    
      puts "### run_eInfo - listing of the databases"
      hash = {"db" => "protein"}
      result = eutils.run_eInfo(hash)
      p result
    
      puts "### run_eSpell"
      hash = {"db" => "pubmed", "term" => "kinas"}
      result = eutils.run_eSpell(hash)
      p result
      p result["CorrectedQuery"]
      
      puts "### run_eLink"
      hash = {"db" => "protein", "id" => "37776955"}
      result = eutils.run_eLink(hash)        #  working?
      p result
    
      puts "### run_eSearch"
      hash = {"db" => "pubmed", "term" => "kinase"}
      result = eutils.run_eSearch(hash)
      p result
    
      puts "### run_eSummary"
      hash = {"db" => "protein", "id" => "37776955"}
      result = eutils.run_eSummary(hash)
      p result
    
    end
    
    
    
    bio-1.4.3.0001/lib/bio/io/sql.rb0000644000004100000410000000516312200110570015742 0ustar  www-datawww-data#module Bio
     # class SQL
      #  #no check is made
       # def self.establish_connection(configurations, env)
        #  #configurations is an hash similar what YAML returns.
         # #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
         # configurations.assert_valid_keys('development', 'production','test')
         # configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
         # DummyBase.configurations = configurations
         # DummyBase.establish_connection "#{env}"
        #end
    
    
    #require 'rubygems'
    #require 'composite_primary_keys'
    #require 'erb'
    # BiosqlPlug
    
    =begin
    Ok Hilmar gives to me some clarification
    1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
       If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
    
    
    =end
    =begin
    TODO:
    1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
    2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
    3) Chk Locations in Biofeatures ArSQL
    =end
    module Bio
      class SQL
    
        require 'bio/io/biosql/biosql'
        autoload :Sequence, 'bio/db/biosql/sequence'
    
        def self.fetch_id(id)
          Bio::SQL::Bioentry.find(id)
        end
    
        def self.fetch_accession(accession)
    #     Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
          Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession.upcase))
        end
    
        def self.exists_accession(accession)
    #      Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
          !Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil?
        end
    
        def self.exists_database(name)
    #      Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
          !Bio::SQL::Biodatabase.first(:name=>name).nil?
        end
    
        def self.list_entries
          Bio::SQL::Bioentry.all.collect do|entry|
            {:id=>entry.bioentry_id, :accession=>entry.accession}
          end
        end
    
        def self.list_databases
          Bio::SQL::Biodatabase.all.collect do|entry|
            {:id=>entry.biodatabase_id, :name => entry.name}
          end
        end
    
        def self.delete_entry_id(id)
          Bio::SQL::Bioentry.delete(id)
        end
    
        def self.delete_entry_accession(accession)
          Bio::SQL::Bioentry.find_by_accession(accession.upcase).destroy!
        end
    
      end #biosql
    
    end #Bio
    bio-1.4.3.0001/lib/bio/io/fetch.rb0000644000004100000410000001502212200110570016227 0ustar  www-datawww-data#
    # = bio/io/biofetch.rb - BioFetch access module
    #
    # Copyright::	Copyright (C) 2002, 2005 Toshiaki Katayama ,
    #               Copyright (C) 2006 Jan Aerts 
    # License::     The Ruby License
    #
    #  $Id:$
    #
    # == DESCRIPTION
    #
    # Using BioRuby BioFetch server
    #
    #   br_server = Bio::Fetch.new()
    #   puts br_server.databases
    #   puts br_server.formats('embl')
    #   puts br_server.maxids
    #
    # Using EBI BioFetch server
    #
    #   ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
    #   puts ebi_server.fetch('embl', 'J00231', 'raw')
    #   puts ebi_server.fetch('embl', 'J00231', 'html')
    #   puts Bio::Fetch.query('genbank', 'J00231')
    #   puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
    # 
    
    require 'uri'
    require 'cgi'
    require 'bio/command'
    
    module Bio
      # = DESCRIPTION
      # The Bio::Fetch class provides an interface to dbfetch servers. Given
      # a database name and an accession number, these servers return the associated
      # record. For example, for the embl database on the EBI, that would be a
      # nucleic or amino acid sequence.
      #
      # Possible dbfetch servers include:
      # * http://bioruby.org/cgi-bin/biofetch.rb (default)
      # * http://www.ebi.ac.uk/cgi-bin/dbfetch
      #
      # If you're behind a proxy server, be sure to set your HTTP_PROXY
      # environment variable accordingly.
      #
      # = USAGE
      #  require 'bio'
      #
      #  # Retrieve the sequence of accession number M33388 from the EMBL
      #  # database.
      #  server = Bio::Fetch.new()  #uses default server
      #  puts server.fetch('embl','M33388')
      #  
      #  # Do the same thing without creating a Bio::Fetch object. This method always
      #  # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
      #  puts Bio::Fetch.query('embl','M33388')
      #
      #  # To know what databases are available on the bioruby dbfetch server:
      #  server = Bio::Fetch.new()
      #  puts server.databases
      #
      #  # Some databases provide their data in different formats (e.g. 'fasta',
      #  # 'genbank' or 'embl'). To check which formats are supported by a given
      #  # database:
      #  puts server.formats('embl')
      #
      class Fetch
      
        # Create a new Bio::Fetch server object that can subsequently be queried
        # using the Bio::Fetch#fetch method
        # ---
        # *Arguments*:
        # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
        # *Returns*:: Bio::Fetch object
        def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
          @url = url
          schema, user, @host, @port, reg, @path, = URI.split(@url)
        end
      
        # The default database to query
        #--
        # This will be used by the get_by_id method
        #++
        attr_accessor :database
      
        # Get raw database entry by id. This method lets the Bio::Registry class
        # use Bio::Fetch objects.
        def get_by_id(id)
          fetch(@database, id)
        end
      
        # Fetch a database entry as specified by database (db), entry id (id),
        # 'raw' text or 'html' (style), and format.  When using BioRuby's
        # BioFetch server, value for the format should not be set.
        # Examples:
        #   server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
        #   puts server.fetch('embl','M33388','raw','fasta')
        #   puts server.fetch('refseq','NM_12345','html','embl')
        # ---
        # *Arguments*:
        # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
        # * _id_: single ID or ID list separated by commas or white space
        # * _style_: [raw|html] (default = 'raw')
        # * _format_: name of output format (see Bio::Fetch#formats)
        def fetch(db, id, style = 'raw', format = nil)
          query = [ [ 'db',    db ],
                    [ 'id',    id ],
                    [ 'style', style ] ]
          query.push([ 'format', format ]) if format
      
          _get(query)
        end
      
        # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
        # without creating an instance of BioFetch server. This method uses the 
        # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
        # 
        # Example:
        #   puts Bio::Fetch.query('refseq','NM_12345')
        #
        # ---
        # *Arguments*:
        # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
        # * _id_: single ID or ID list separated by commas or white space
        # * _style_: [raw|html] (default = 'raw')
        # * _format_: name of output format (see Bio::Fetch#formats)
        def self.query(*args)
          self.new.fetch(*args)
        end
      
        # Using this method, the user can ask a dbfetch server what databases
        # it supports. This would normally be the first step you'd take when
        # you use a dbfetch server for the first time.
        # Example:
        #  server = Bio::Fetch.new()
        #  puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
        #
        # This method only works for the bioruby dbfetch server. For a list
        # of databases available from the EBI, see the EBI website at 
        # http://www.ebi.ac.uk/cgi-bin/dbfetch/
        # ---
        # *Returns*:: array of database names
        def databases
          _get_single('info', 'dbs').strip.split(/\s+/)
        end
      
        # Lists the formats that are available for a given database. Like the
        # Bio::Fetch#databases method, this method is only available on 
        # the bioruby dbfetch server.
        # Example:
        #  server = Bio::Fetch.new()
        #  puts server.formats('embl') # returns "default fasta"
        # ---
        # *Arguments*:
        # * _database_:: name of database you want the supported formats for
        # *Returns*:: array of formats
        def formats(database = @database)
          if database
            query = [ [ 'info', 'formats' ],
                      [ 'db',   database  ] ]
            _get(query).strip.split(/\s+/)
          end
        end
      
        # A dbfetch server will only return entries up to a given maximum number.
        # This method retrieves that number from the server. As for the databases
        # and formats methods, the maxids method only works for the bioruby
        # dbfetch server.
        # ---
        # *Arguments*: none
        # *Returns*:: number
        def maxids
          _get_single('info', 'maxids').to_i
        end
    
        private
        # (private) query to the server.
        # ary must be nested array, e.g. [ [ key0, val0 ], [ key1, val1 ], ... ]
        def _get(ary)
          query = ary.collect do |a|
            "#{CGI.escape(a[0])}=#{CGI.escape(a[1])}"
          end.join('&')
          Bio::Command.read_uri(@url + '?' + query)
        end
    
        # (private) query with single parameter
        def _get_single(key, val)
          query = "#{CGI.escape(key)}=#{CGI.escape(val)}"
          Bio::Command.read_uri(@url + '?' + query)
        end
    
      end
    
    end # module Bio
    
    bio-1.4.3.0001/lib/bio/io/togows.rb0000644000004100000410000003321412200110570016463 0ustar  www-datawww-data#
    # = bio/io/togows.rb - REST interface for TogoWS
    #
    # Copyright::  Copyright (C) 2009 Naohisa Goto 
    # License::    The Ruby License
    #
    # $Id:$
    #
    # Bio::TogoWS is a set of clients for the TogoWS web services
    # (http://togows.dbcls.jp/).
    #
    # * Bio::TogoWS::REST is a REST client for the TogoWS.
    # * Bio::TogoWS::SOAP will be implemented in the future.
    #
    
    require 'uri'
    require 'cgi'
    require 'bio/version'
    require 'bio/command'
    
    module Bio
    
      # Bio::TogoWS is a namespace for the TogoWS web services.
      module TogoWS
    
        # Internal Use Only.
        #
        # Bio::TogoWS::AccessWait is a module to implement a
        # private method for access.
        module AccessWait
    
          # common default access wait for TogoWS services
          TOGOWS_ACCESS_WAIT = 1
    
          # Maximum waiting time to avoid dead lock.
          # When exceeding this value, (max/2) + rand(max) is used,
          # to randomize access.
          # This means real maximum waiting time is (max * 1.5).
          TOGOWS_ACCESS_WAIT_MAX = 60
    
          # Sleeping if needed. 
          # It sleeps about TOGOWS_ACCESS_WAIT * (number of waiting processes).
          #
          # ---
          # *Returns*:: (Numeric) sleeped time
          def togows_access_wait
            w_min = TOGOWS_ACCESS_WAIT
            debug = defined?(@debug) && @debug
    
            # initializing class variable
            @@togows_last_access ||= nil
    
            # determines waiting time
            wait = 0
            if last = @@togows_last_access then
              elapsed = Time.now - last
              if elapsed < w_min then
                wait = w_min - elapsed
              end
            end
    
            # If wait is too long, truncated to TOGOWS_ACCESS_WAIT_MAX.
            if wait > TOGOWS_ACCESS_WAIT_MAX then
              orig_wait = wait
              wait = TOGOWS_ACCESS_WAIT_MAX
              wait = wait / 2 + rand(wait)
              if debug then
                $stderr.puts "TogoWS: sleeping time #{orig_wait} is too long and set to #{wait} to avoid dead lock."
              end
              newlast = Time.now + TOGOWS_ACCESS_WAIT_MAX
            else
              newlast = Time.now + wait
            end
    
            # put expected end time of sleeping
            if !@@togows_last_access or @@togows_last_access < newlast then
              @@togows_last_access = newlast
            end
    
            # sleeping if needed
            if wait > 0 then
              $stderr.puts "TogoWS: sleeping #{wait} second" if debug
              sleep(wait)
            end
            # returns waited time
            wait
          end
          private :togows_access_wait
    
          # (private) resets last access.
          # Should be used only for debug purpose.
          def reset_togows_access_wait
            @@togows_last_access = nil
          end
          private :reset_togows_access_wait
    
        end #module AccessWait
    
        # == Description
        #
        # Bio::TogoWS::REST is a REST client for the TogoWS web service.
        #
        # Details of the service are desribed in the following URI.
        #
        # * http://togows.dbcls.jp/site/en/rest.html
        #
        # == Examples
        # 
        # For light users, class methods can be used.
        #
        #   print Bio::TogoWS::REST.entry('genbank', 'AF237819')
        #   print Bio::TogoWS::REST.search('uniprot', 'lung cancer')
        #
        # For heavy users, an instance of the REST class can be created, and
        # using the instance is more efficient than using class methods.
        #
        #   t = Bio::TogoWS::REST.new
        #   print t.entry('genbank', 'AF237819')
        #   print t.search('uniprot', 'lung cancer')
        #
        # == References
        #
        # * http://togows.dbcls.jp/site/en/rest.html
        #
        class REST
    
          include AccessWait
    
          # URI of the TogoWS REST service
          BASE_URI = 'http://togows.dbcls.jp/'.freeze
    
          # preset default databases used by the retrieve method.
          #
          DEFAULT_RETRIEVAL_DATABASES =
            %w( genbank uniprot embl ddbj dad )
    
          # Creates a new object.
          # ---
          # *Arguments*:
          # * (optional) _uri_: String or URI object
          # *Returns*:: new object
          def initialize(uri = BASE_URI)
            uri = URI.parse(uri) unless uri.kind_of?(URI)
            @pathbase = uri.path
            @pathbase = '/' + @pathbase unless /\A\// =~ @pathbase
            @pathbase = @pathbase + '/' unless /\/\z/ =~ @pathbase
            @http = Bio::Command.new_http(uri.host, uri.port)
            @header = {
              'User-Agent' => "BioRuby/#{Bio::BIORUBY_VERSION_ID}"
            }
            @debug = false
          end
    
          # If true, shows debug information to $stderr.
          attr_accessor :debug
    
          # Debug purpose only.
          # Returns Net::HTTP object used inside the object.
          # The method will be changed in the future if the implementation
          # of this class is changed.
          def internal_http
            @http
          end
    
          # Intelligent version of the entry method.
          # If two or more databases are specified, sequentially tries
          # them until valid entry is obtained.
          #
          # If database is not specified, preset default databases are used. 
          # See DEFAULT_RETRIEVAL_DATABASES for details.
          #
          # When multiple IDs and multiple databases are specified, sequentially
          # tries each IDs. Note that results with no hits found or with server
          # errors are regarded as void strings. Also note that data format of
          # the result entries can be different from entries to entries.
          # 
          # ---
          # *Arguments*:
          # * (required) _ids_: (String) an entry ID, or
          #   (Array containing String) IDs. Note that strings containing ","
          # * (optional) _hash_: (Hash) options below can be passed as a hash.
          #   * (optional) :database: (String) database name, or
          #     (Array containing String) database names.
          #   * (optional) :format: (String) format
          #   * (optional) :field: (String) gets only the specified field
          # *Returns*:: String or nil
          def retrieve(ids, hash = {})
            begin
              a = ids.to_ary
            rescue NoMethodError
              ids = ids.to_s
            end
            ids = a.join(',') if a
            ids = ids.split(',')
    
            dbs = hash[:database] || DEFAULT_RETRIEVAL_DATABASES
            begin
              dbs.to_ary
            rescue NoMethodError
              dbs = dbs.to_s.empty? ? [] : [ dbs.to_s ]
            end
            return nil if dbs.empty? or ids.empty?
    
            if dbs.size == 1 then
              return entry(dbs[0], ids, hash[:format], hash[:field])
            end
    
            results = []
            ids.each do |idstr|
              dbs.each do |dbstr|
                r = entry(dbstr, idstr, hash[:format], hash[:field])
                if r and !r.strip.empty? then
                  results.push r
                  break
                end
              end #dbs.each
            end #ids.each
            
            results.join('')
          end #def retrieve
    
          # Retrieves entries corresponding to the specified IDs.
          #
          # Example:
          #   t = Bio::TogoWS::REST.new
          #   kuma = t.entry('genbank', 'AF237819')
          #   # multiple IDs at a time
          #   misc = t.entry('genbank', [ 'AF237819', 'AF237820' ])
          #   # with format change
          #   p53 = t.entry('uniprot', 'P53_HUMAN', 'fasta')
          #
          # ---
          # *Arguments*:
          # * (required) _database_: (String) database name
          # * (required) _ids_: (String) an entry ID, or
          #   (Array containing String) IDs. Note that strings containing ","
          #   are regarded as multiple IDs.
          # * (optional) _format_: (String) format. nil means the default format
          #   (differs depending on the database).
          # * (optional) _field_: (String) gets only the specified field if not nil
          # *Returns*:: String or nil
          def entry(database, ids, format = nil, field = nil)
            begin
              a = ids.to_ary
            rescue NoMethodError
              ids = ids.to_s
            end
            ids = a.join(',') if a
    
            arg = [ 'entry', database, ids ]
            arg.push field if field
            arg[-1] = "#{arg[-1]}.#{format}" if format
            response = get(*arg)
    
            prepare_return_value(response)
          end
    
          # Database search.
          # Format of the search term string follows the Common Query Language.
          # * http://en.wikipedia.org/wiki/Common_Query_Language
          #
          # Example:
          #   t = Bio::TogoWS::REST.new
          #   print t.search('uniprot', 'lung cancer')
          #   # only get the 10th and 11th hit ID
          #   print t.search('uniprot', 'lung cancer', 10, 2)
          #   # with json format
          #   print t.search('uniprot', 'lung cancer', 10, 2, 'json')
          #
          # ---
          # *Arguments*:
          # * (required) _database_: (String) database name
          # * (required) _query_: (String) query string
          # * (optional) _offset_: (Integer) offset in search results.
          # * (optional) _limit_: (Integer) max. number of returned results.
          #   If offset is not nil and the limit is nil, it is set to 1.
          # * (optional) _format_: (String) format. nil means the default format.
          # *Returns*:: String or nil
          def search(database, query, offset = nil, limit = nil, format = nil)
            arg = [ 'search', database, query ]
            if offset then
              limit ||= 1
              arg.push "#{offset},#{limit}"
            end
            arg[-1] = "#{arg[-1]}.#{format}" if format
            response = get(*arg)
    
            prepare_return_value(response)
          end
    
          # Data format conversion.
          #
          # Example:
          #   t = Bio::TogoWS::REST.new
          #   blast_string = File.read('test.blastn')
          #   t.convert(blast_string, 'blast', 'gff')
          #
          # ---
          # *Arguments*:
          # * (required) _text_: (String) input data
          # * (required) _inputformat_: (String) data source format
          # * (required) _format_: (String) output format
          # *Returns*:: String or nil
          def convert(data, inputformat, format)
            response = post_data(data, 'convert', "#{inputformat}.#{format}")
    
            prepare_return_value(response)
          end
    
          # Returns list of available databases in the entry service.
          # ---
          # *Returns*:: Array containing String
          def entry_database_list
            database_list('entry')
          end
    
          # Returns list of available databases in the search service.
          # ---
          # *Returns*:: Array containing String
          def search_database_list
            database_list('search')
          end
    
          #--
          # class methods
          #++
    
          # The same as Bio::TogoWS::REST#entry.
          def self.entry(*arg)
            self.new.entry(*arg)
          end
    
          # The same as Bio::TogoWS::REST#search.
          def self.search(*arg)
            self.new.search(*arg)
          end
    
          # The same as Bio::TogoWS::REST#convert.
          def self.convert(*arg)
            self.new.convert(*arg)
          end
    
          # The same as Bio::TogoWS::REST#retrieve.
          def self.retrieve(*arg)
            self.new.retrieve(*arg)
          end
    
          # The same as Bio::TogoWS::REST#entry_database_list
          def self.entry_database_list(*arg)
            self.new.entry_database_list(*arg)
          end
    
          # The same as Bio::TogoWS::REST#search_database_list
          def self.search_database_list(*arg)
            self.new.search_database_list(*arg)
          end
    
          private
    
          # Access to the TogoWS by using GET method.
          #
          # Example 1:
          #   get('entry', 'genbank', AF209156')
          # Example 2:
          #   get('search', 'uniprot', 'lung cancer')
          #
          # ---
          # *Arguments*:
          # * (optional) _path_: String
          # *Returns*:: Net::HTTPResponse object
          def get(*paths)
            path = make_path(paths)
            if @debug then
              $stderr.puts "TogoWS: HTTP#get(#{path.inspect}, #{@header.inspect})"
            end
            togows_access_wait
            @http.get(path, @header)
          end
    
          # Access to the TogoWS by using GET method. 
          # Always adds '/' at the end of the path.
          #
          # Example 1:
          #   get_dir('entry')
          #
          # ---
          # *Arguments*:
          # * (optional) _path_: String
          # *Returns*:: Net::HTTPResponse object
          def get_dir(*paths)
            path = make_path(paths)
            path += '/' unless /\/\z/ =~ path
            if @debug then
              $stderr.puts "TogoWS: HTTP#get(#{path.inspect}, #{@header.inspect})"
            end
            togows_access_wait
            @http.get(path, @header)
          end
    
          # Access to the TogoWS by using POST method.
          # The data is stored to the form key 'data'.
          # Mime type is 'application/x-www-form-urlencoded'.
          # ---
          # *Arguments*:
          # * (required) _data_: String
          # * (optional) _path_: String
          # *Returns*:: Net::HTTPResponse object
          def post_data(data, *paths)
            path = make_path(paths)
            if @debug then
              $stderr.puts "TogoWS: Bio::Command.http_post_form(#{path.inspect}, { \"data\" => (#{data.size} bytes) }, #{@header.inspect})"
            end
            togows_access_wait
            Bio::Command.http_post_form(@http, path, { 'data' => data }, @header)
          end
    
          # Generates path string from the given paths.
          # ---
          # *Arguments*:
          # * (required) _paths_: Array containing String objects
          # *Returns*:: String
          def make_path(paths)
            @pathbase + paths.collect { |x| CGI.escape(x.to_s) }.join('/')
          end
    
          # If response.code == "200", returns body as a String.
          # Otherwise, returns nil.
          def prepare_return_value(response)
            if @debug then
              $stderr.puts "TogoWS: #{response.inspect}"
            end
            if response.code == "200" then
              response.body
            else
              nil
            end
          end
    
          # Returns list of available databases
          # ---
          # *Arguments*:
          # * (required) _service_: String
          # *Returns*:: Array containing String
          def database_list(service)
            response = get_dir(service)
            str = prepare_return_value(response)
            if str then
              str.chomp.split(/\r?\n/)
            else
              raise 'Unexpected server response'
            end
          end
    
        end #class REST
    
      end #module TogoWS
    
    end #module Bio
    bio-1.4.3.0001/lib/bio/io/registry.rb0000644000004100000410000001660712200110570017020 0ustar  www-datawww-data#
    # = bio/io/registry.rb - OBDA BioRegistry module
    #
    # Copyright::   Copyright (C) 2002, 2003, 2004, 2005
    #               Toshiaki Katayama 
    # License::     The Ruby License
    #
    # $Id:$
    #
    # == Description
    #
    # BioRegistry read the OBDA (Open Bio Database Access) configuration file
    # (seqdatabase.ini) and create a registry object.  OBDA is created during
    # the BioHackathon held in Tucson and South Africa in 2002 as a project
    # independent set of protocols to access biological databases.  The spec
    # is refined in the BioHackathon 2003 held in Singapore.
    #
    # By using the OBDA, user can access to the database by get_database method
    # without knowing where and how the database is stored, and each database
    # has the get_by_id method to obtain a sequence entry.
    #
    # Sample configuration file is distributed with BioRuby package which
    # consists of stanza format entries as following:
    #
    #   VERSION=1.00
    #
    #   [myembl]
    #   protocol=biofetch
    #   location=http://www.ebi.ac.uk/cgi-bin/dbfetch
    #   dbname=embl
    #
    #   [mysp]
    #   protocol=biosql
    #   location=db.bioruby.org
    #   dbname=biosql
    #   driver=mysql
    #   user=root
    #   pass= 
    #   biodbname=swissprot
    #
    # The first line means that this configration file is version 1.00.
    #
    # The [myembl] line defines a user defined database name 'myembl' and
    # following block indicates how the database can be accessed.
    # In this example, the 'myembl' database is accecced via the OBDA's
    # BioFetch protocol to the dbfetch server at EBI, where the EMBL
    # database is accessed by the name 'embl' on the server side.
    #
    # The [mysp] line defines another database 'mysp' which accesses the
    # RDB (Relational Database) at the db.bioruby.org via the OBDA's
    # BioSQL protocol.  This BioSQL server is running MySQL database as
    # its backend and stores the SwissProt database by the name 'swissprot'
    # and which can be accessed by 'root' user without password.
    # Note that the db.bioruby.org server is a dummy for the explanation.
    #
    # The configuration file is searched by the following order.
    #
    # 1. Local file name given to the Bio::Registry.new(filename).
    #
    # 2. Remote or local file list given by the environmenetal variable
    #    'OBDA_SEARCH_PATH', which is a '+' separated string of the
    #    remote (HTTP) and/or local files.
    #
    #      e.g. OBDA_SEARCH_PATH="http://example.org/obda.ini+$HOME/lib/myobda.ini"
    #
    # 3. Local file "$HOME/.bioinformatics/seqdatabase.ini" in the user's
    #    home directory.
    #
    # 4. Local file "/etc/bioinformatics/seqdatabase.ini" in the system
    #    configuration directry.
    #
    # All these configuration files are loaded.  If there are database
    # definitions having the same name, the first one is used.
    #
    # If none of these files can be found, Bio::Registry.new will try
    # to use http://www.open-bio.org/registry/seqdatabase.ini file.
    #
    # == References
    # 
    # * http://obda.open-bio.org/
    # * http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common
    # * http://www.open-bio.org/registry/seqdatabase.ini
    #
    
    require 'uri'
    require 'net/http'
    require 'bio/command'
    
    
    module Bio
    
    autoload :Fetch,          'bio/io/fetch'
    autoload :SQL,            'bio/io/sql'
    autoload :FlatFile,       'bio/io/flatfile'
    autoload :FlatFileIndex,  'bio/io/flatfile/index'
    
    class Registry
    
      def initialize(file = nil)
        @spec_version = nil
        @databases = Array.new
        read_local(file) if file
        env_path = ENV['OBDA_SEARCH_PATH']
        if env_path and env_path.size > 0
          read_env(env_path)
        else
          read_local("#{ENV['HOME']}/.bioinformatics/seqdatabase.ini")
          read_local("/etc/bioinformatics/seqdatabase.ini")
          if @databases.empty?
            read_remote("http://www.open-bio.org/registry/seqdatabase.ini")
          end
        end
      end
    
      # Version string of the first configulation file
      attr_reader :spec_version
    
      # List of databases (Array of Bio::Registry::DB)
      attr_reader :databases
    
      # Returns a dababase handle (Bio::SQL, Bio::Fetch etc.) or nil
      # if not found (case insensitive).
      # The handles should have get_by_id method.
      def get_database(dbname)
        @databases.each do |db|
          if db.database == dbname.downcase
            case db.protocol
            when 'biofetch'
              return serv_biofetch(db)
            when 'biosql'
              return serv_biosql(db)
            when 'flat', 'index-flat', 'index-berkeleydb'
              return serv_flat(db)
            when 'bsane-corba', 'biocorba'
              raise NotImplementedError
            when 'xembl'
              raise NotImplementedError
            end
          end
        end
        return nil
      end
      alias db get_database
    
      # Returns a Registry::DB object corresponding to the first dbname
      # entry in the registry records (case insensitive).
      def query(dbname)
        @databases.each do |db|
          return db if db.database == dbname.downcase
        end
      end
    
      private
    
      def read_env(path)
        path.split('+').each do |elem|
          if /:/.match(elem)
            read_remote(elem)
          else
            read_local(elem)
          end
        end
      end
    
      def read_local(file)
        if File.readable?(file)
          stanza = File.read(file)
          parse_stanza(stanza)
        end
      end
    
      def read_remote(url)
        schema, user, host, port, reg, path, = URI.split(url)
        Bio::Command.start_http(host, port) do |http|
          response = http.get(path)
          parse_stanza(response.body)
        end
      end
    
      def parse_stanza(stanza)
        return unless stanza
        if stanza[/.*/] =~ /VERSION\s*=\s*(\S+)/
          @spec_version ||= $1	# for internal use (may differ on each file)
          stanza[/.*/] = ''	        # remove VERSION line
        end
        stanza.each_line do |line|
          case line
          when /^\[(.*)\]/
            dbname = $1.downcase
            db = Bio::Registry::DB.new($1)
            @databases.push(db)
          when /=/
            tag, value = line.chomp.split(/\s*=\s*/)
            @databases.last[tag] = value
          end
        end
      end
    
      def serv_biofetch(db)
        serv = Bio::Fetch.new(db.location)
        serv.database = db.dbname
        return serv
      end
    
      def serv_biosql(db)
        location, port = db.location.split(':')
        port = db.port unless port
    
        case db.driver
        when /mysql/i
          driver = 'Mysql'
        when /pg|postgres/i
          driver = 'Pg'
        when /oracle/
        when /sybase/
        when /sqlserver/
        when /access/
        when /csv/
        when /informix/
        when /odbc/
        when /rdb/
        end
    
        dbi = [ "dbi", driver, db.dbname, location ].compact.join(':')
        dbi += ';port=' + port if port
        serv = Bio::SQL.new(dbi, db.user, db.pass)
    
        # We can not manage biodbname (for name space) in BioSQL yet.
        # use db.biodbname here!!
    
        return serv
      end
    
      def serv_flat(db)
        path = db.location
        path = File.join(path, db.dbname) if db.dbname
        serv = Bio::FlatFileIndex.open(path)
        return serv
      end
    
    
      class DB
    
        def initialize(dbname)
          @database = dbname
          @property = Hash.new
        end
        attr_reader :database
    
        def method_missing(meth_id)
          @property[meth_id.id2name]
        end
    
        def []=(tag, value)
          @property[tag] = value
        end
    
      end
    
    end # class Registry
    
    end # module Bio
    
    
    
    if __FILE__ == $0
      begin
        require 'pp'
        alias p pp
      rescue
      end
    
      # Usually, you don't need to pass ARGV.
      reg = Bio::Registry.new(ARGV[0])
    
      p reg
      p reg.query('genbank_biosql')
    
      serv = reg.get_database('genbank_biofetch')
      puts serv.get_by_id('AA2CG')
    
      serv = reg.get_database('genbank_biosql')
      puts serv.get_by_id('AA2CG')
    
      serv = reg.get_database('swissprot_biofetch')
      puts serv.get_by_id('CYC_BOVIN')
    
      serv = reg.get_database('swissprot_biosql')
      puts serv.get_by_id('CYC_BOVIN')
    end
    
    
    bio-1.4.3.0001/lib/bio/io/ddbjrest.rb0000644000004100000410000003127112200110570016743 0ustar  www-datawww-data#
    # = bio/io/ddbjrest.rb - DDBJ Web API for Biology (WABI) access class via REST
    #
    # Copyright::	Copyright (C) 2011
    #		Naohisa Goto 
    # License::	The Ruby License
    #
    # == Description
    # 
    # This file contains Bio::DDBJ::REST, DDBJ Web API for Biology (WABI) access
    # classes via REST (Representational State Transfer) protocol.
    #
    # == References
    #
    # * http://xml.nig.ac.jp/
    #
    
    require 'bio/command'
    require 'bio/db/genbank/ddbj'
    
    module Bio
    class DDBJ
    
      # == Description
      #
      # The module Bio::DDBJ::REST is the namespace for the DDBJ Web API for
      # Biology (WABI) via REST protocol. Under the Bio::DDBJ::REST,
      # following classes are available.
      #
      # * Bio::DDBJ::REST::DDBJ
      # * Bio::DDBJ::REST::Blast
      # * Bio::DDBJ::REST::ClustalW
      # * Bio::DDBJ::REST::Mafft
      # * Bio::DDBJ::REST::RequestManager
      #
      # Following classes are NOT available, but will be written in the future.
      #
      # * Bio::DDBJ::REST::GetEntry
      # * Bio::DDBJ::REST::ARSA
      # * Bio::DDBJ::REST::VecScreen
      # * Bio::DDBJ::REST::PhylogeneticTree
      # * Bio::DDBJ::REST::Gib
      # * Bio::DDBJ::REST::Gtop
      # * Bio::DDBJ::REST::GTPS
      # * Bio::DDBJ::REST::GIBV
      # * Bio::DDBJ::REST::GIBIS
      # * Bio::DDBJ::REST::SPS
      # * Bio::DDBJ::REST::TxSearch
      # * Bio::DDBJ::REST::Ensembl
      # * Bio::DDBJ::REST::NCBIGenomeAnnotation
      #
      # Read the document of each class for details.
      #
      # In addition, there is a private class Bio::DDBJ::REST::WABItemplate,
      # basic class for the above classes. Normal users should not use the
      # WABItemplate class directly.
      #
      module REST
    
        # Bio::DDBJ::REST::WABItemplate is a private class to provide common
        # methods to access DDBJ Web API for Biology (WABI) services by using
        # REST protocol.
        #
        # Normal users should not use the class directly.
        #
        class WABItemplate
    
          # hostname for the WABI service
          WABI_HOST = 'xml.nig.ac.jp'
    
          # path for the WABI service
          WABI_PATH = '/rest/Invoke'
    
          private
    
          # Creates a new object.
          def initialize
            @http = Bio::Command.new_http(WABI_HOST)
            @service = self.class.to_s.split(/\:\:/)[-1]
          end
    
          # (private) query to the service by using POST method
          def _wabi_post(method_name, param)
            parameters = {
              'service' => @service,
              'method' => method_name
            }
            parameters.update(param)
            #$stderr.puts parameters.inspect
            r = Bio::Command.http_post_form(@http, WABI_PATH, parameters)
            #$stderr.puts r.inspect
            #$stderr.puts "-"*78
            #$stderr.puts r.body
            #$stderr.puts "-"*78
            r.body
          end
    
          def self.define_wabi_method(array,
                                      ruby_method_name = nil,
                                      public_method_name = nil)
            wabi_method_name = array[0]
            ruby_method_name ||= wabi_method_name
            public_method_name ||= wabi_method_name
            arg = array[1..-1]
            arguments = arg.join(', ')
            parameters = "{" +
              arg.collect { |x| "#{x.dump} => #{x}" }.join(", ") + "}"
            module_eval "def #{ruby_method_name}(#{arguments})
                           param = #{parameters}
                           _wabi_post(#{wabi_method_name.dump}, param)
                         end
                         def self.#{public_method_name}(#{arguments})
                           self.new.#{public_method_name}(#{arguments})
                         end"
            self
          end
          private_class_method :define_wabi_method
    
          def self.def_wabi(array)
            define_wabi_method(array)
          end
          private_class_method :def_wabi
    
          def self.def_wabi_custom(array)
            ruby_method_name = '_' + array[0]
            define_wabi_method(array, ruby_method_name)
            module_eval "private :#{ruby_method_name}"
            self
          end
          private_class_method :def_wabi_custom
    
          def self.def_wabi_async(array)
            m = array[0]
            def_wabi_custom(array)
            module_eval "def #{m}(*arg)
                ret = _#{m}(*arg)
                if /Your +requestId +is\s*\:\s*(.+)\s*/i =~ ret.to_s then
                  return $1
                else
                  raise \"unknown return value: \#\{ret.inspect\}\"
                end
              end"
            self
          end
          private_class_method :def_wabi_async
        end #class WABItemplate
    
        # === Description
        #
        # DDBJ (DNA DataBank of Japan) entry retrieval functions.
        #
        # * http://xml.nig.ac.jp/wabi/Method?serviceName=DDBJ&mode=methodList&lang=en
        #
        # === Examples
        #
        # see http://xml.nig.ac.jp/wabi/Method?serviceName=DDBJ&mode=methodList&lang=en
        #
        class DDBJ < WABItemplate
    
          # Number and ratio of each base such as A,T,G,C.
          #
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=DDBJ&methodName=countBasePair&mode=methodDetail
          # ---
          # *Arguments*:
          # * (required) _accession_: (String) accession
          # *Returns*:: (String) tab-deliminated text
          def countBasePair(accession); end if false #dummy
          def_wabi %w( countBasePair accession )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=DDBJ&methodName=get&mode=methodDetail
          def get(accessionList, paramList); end if false #dummy
          def_wabi %w( get accessionList paramList )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=DDBJ&methodName=getAllFeatures&mode=methodDetail
          def getAllFeatures(accession); end if false #dummy
          def_wabi %w( getAllFeatures accession )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=DDBJ&methodName=getFFEntry&mode=methodDetail
          def getFFEntry(accession); end if false #dummy
          def_wabi %w( getFFEntry accession )
    
          # http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=DDBJ&methodName=getRelatedFeatures&mode=methodDetail
          def getRelatedFeatures(accession, start, stop); end if false #dummy
          def_wabi %w( getRelatedFeatures accession start stop )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=DDBJ&methodName=getRelatedFeaturesSeq&mode=methodDetail
          def getRelatedFeaturesSeq(accession, start, stop); end if false #dummy
          def_wabi %w( getRelatedFeaturesSeq accession start stop )
        end #class DDBJ
    
        # === Description
        #
        # DDBJ (DNA DataBank of Japan) BLAST web service.
        # See below for details and examples.
        #
        # Users normally would want to use searchParamAsync or
        # searchParallelAsync with RequestManager.
        #
        # * http://xml.nig.ac.jp/wabi/Method?serviceName=Blast&mode=methodList&lang=en
        class Blast < WABItemplate
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=extractPosition&mode=methodDetail
          def extractPosition(result); end if false #dummy
          def_wabi %w( extractPosition result )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=getSupportDatabaseList&mode=methodDetail
          def getSupportDatabaseList(); end if false #dummy
          def_wabi %w( getSupportDatabaseList )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=searchParallel&mode=methodDetail
          def searchParallel(program, database, query, param); end if false #dummy
          def_wabi %w( searchParallel program database query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=searchParallelAsync&mode=methodDetail
          def searchParallelAsync(program, database,
                                  query, param); end if false #dummy
          def_wabi_async %w( searchParallelAsync program database query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=searchParam&mode=methodDetail
          def searchParam(program, database, query, param); end if false #dummy
          def_wabi %w( searchParam program database query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=searchParamAsync&mode=methodDetail
          def searchParamAsync(program, database,
                               query, param); end if false #dummy
          def_wabi_async %w( searchParamAsync program database query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=searchSimple&mode=methodDetail
          def searchSimple(program, database, query); end if false #dummy
          def_wabi %w( searchSimple program database query )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Blast&methodName=searchSimpleAsync&mode=methodDetail
          def searchSimpleAsync(program, database, query); end if false #dummy
          def_wabi_async %w( searchSimpleAsync program database query )
    
        end #class Blast
    
        # === Description
        #
        # DDBJ (DNA DataBank of Japan) web service of ClustalW multiple sequence
        # alignment software.
        # See below for details and examples.
        #
        # * http://xml.nig.ac.jp/wabi/Method?serviceName=ClustalW&mode=methodList&lang=en
        class ClustalW < WABItemplate
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=ClustalW&methodName=analyzeParam&mode=methodDetail
          def analyzeParam(query, param); end if false #dummy
          def_wabi %w( analyzeParam query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=ClustalW&methodName=analyzeParamAsync&mode=methodDetail
          def analyzeParamAsync(query, param); end if false #dummy
          def_wabi_async %w( analyzeParamAsync query param )
    
          # http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=ClustalW&methodName=analyzeSimple&mode=methodDetail
          def analyzeSimple(query); end if false #dummy
          def_wabi %w( analyzeSimple query )
    
          # http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=ClustalW&methodName=analyzeSimpleAsync&mode=methodDetail
          def analyzeSimpleAsync(query); end if false #dummy
          def_wabi_async %w( analyzeSimpleAsync query )
        end #lcass ClustalW
    
        # === Description
        #
        # DDBJ (DNA DataBank of Japan) web service of MAFFT multiple sequence
        # alignment software.
        # See below for details and examples.
        #
        # * http://xml.nig.ac.jp/wabi/Method?serviceName=Mafft&mode=methodList&lang=en
        class Mafft < WABItemplate
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Mafft&methodName=analyzeParam&mode=methodDetail
          def analyzeParam(query, param); end if false #dummy
          def_wabi %w( analyzeParam query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Mafft&methodName=analyzeParamAsync&mode=methodDetail
          def analyzeParamAsync(query, param); end if false #dummy
          def_wabi_async %w( analyzeParamAsync query param )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Mafft&methodName=analyzeSimple&mode=methodDetail
          def analyzeSimple(query); end if false #dummy
          def_wabi %w( analyzeSimple query )
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=Mafft&methodName=analyzeSimpleAsync&mode=methodDetail
          def analyzeSimpleAsync(query); end if false #dummy
          def_wabi_async %w( analyzeSimpleAsync query )
        end #lcass Mafft
    
    
        # === Description
        #
        # DDBJ (DNA DataBank of Japan) special web service to get result of
        # asynchronous web service.
        # See below for details and examples.
        #
        # * http://xml.nig.ac.jp/wabi/Method?serviceName=RequestManager&mode=methodList&lang=en
        class RequestManager < WABItemplate
    
          # see http://xml.nig.ac.jp/wabi/Method?&lang=en&serviceName=RequestManager&methodName=getAsyncResult&mode=methodDetail
          def getAsyncResult(requestId); end if false #dummy
          def_wabi %w( getAsyncResult requestId )
    
          # Waits until the query is finished and the result is returnd,
          # with calling getAsyncResult.
          # 
          # This is BioRuby original method.
          # ---
          # *Arguments*:
          # * (required) _requestID_: (String) requestId
          # *Returns*:: (String) result
          def wait_getAsyncResult(requestId)
            sleeptime = 2
            while true
              result = getAsyncResult(requestId)
              case result.to_s
              when /The search and analysis service by WWW is very busy now/
                raise result.to_s.strip + '(Alternatively, wrong options may be given.)'
              when /\AYour job has not (?:been )?completed yet/
                sleeptime = 2 + rand(4)
              when /\AERROR:/
                raise result.to_s.strip
              else
                return result
              end #case
              if $VERBOSE then
                $stderr.puts "DDBJ REST: requestId: #{requestId} -- waitng #{sleeptime} sec."
              end
              sleep(sleeptime)
            end
            # will never be reached here
            raise "Bug?"
          end
    
          # the same as Bio::DDBJ::REST::RequestManager#wait_getAsyncResult
          def self.wait_getAsyncResult(requestId)
            self.new.wait_getAsyncResult(requestId)
          end
    
        end #class RequestManager
    
      end #module REST
    end #class DDBJ
    end #module Bio
    
    
    bio-1.4.3.0001/lib/bio/io/ebisoap.rb0000644000004100000410000000571612200110570016571 0ustar  www-datawww-data#
    # = bio/io/emblsoap.rb - EBI SOAP server access class
    #
    # Copyright::  Copyright (C) 2006
    #              Toshiaki Katayama 
    # License::    The Ruby License
    #
    # $Id: ebisoap.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $
    #
    
    require 'bio/io/soapwsdl'
    
    module Bio
    class EBI
    
    class SOAP < Bio::SOAPWSDL
    
      BASE_URI = "http://www.ebi.ac.uk/Tools/webservices/wsdl/"
    
      # set default to Dbfetch
      SERVER_URI = BASE_URI + "WSDbfetch.wsdl"
    
      def initialize(wsdl = nil)
        super(wsdl || self.class::SERVER_URI)
      end
    
      # * fetchData
      # * getSupportedDBs
      # * getSupportedFormats
      # * getSupportedStyles
      class Dbfetch < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSDbfetch.wsdl"
      end
    
      # * checkStatus
      # * doIprscan
      # * getResults
      # * poll
      # * polljob
      # * runInterProScan
      # * test
      class InterProScan < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSInterProScan.wsdl"
      end
    
      # * checkStatus
      # * getInfo
      # * getResults
      # * getTools
      # * poll
      # * run
      # * test
      class Emboss < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSEmboss.wsdl"
      end
    
      # * checkStatus
      # * getResults
      # * poll
      # * runClustalW
      # * test
      class ClustalW < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSClustalW.wsdl"
      end
    
      # * checkStatus
      # * getResults
      # * poll
      # * runTCoffee
      class TCoffee < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSTCoffee.wsdl"
      end
    
      # * checkStatus
      # * getResults
      # * poll
      # * runMuscle
      # * test
      class Muscle < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSMuscle.wsdl"
      end
    
      # * checkStatus
      # * doFasta
      # * getResults
      # * poll
      # * polljob
      # * runFasta
      class Fasta < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSFasta.wsdl"
      end
    
      # * checkStatus
      # * doWUBlast
      # * getIds
      # * getResults
      # * poll
      # * polljob
      # * runWUBlast
      # * test
      class WUBlast < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSWUBlast.wsdl"
      end
    
      # * checkStatus
      # * getResults
      # * poll
      # * runMPsrch
      # * test
      class MPsrch < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSMPsrch.wsdl"
      end
    
      # * checkStatus
      # * getResults
      # * poll
      # * runScanPS
      # * test
      class ScanPS < Bio::EBI::SOAP
        SERVER_URI = BASE_URI + "WSScanPS.wsdl"
      end
    
      class MSD < Bio::EBI::SOAP
        SERVER_URI = "http://www.ebi.ac.uk/msd-srv/docs/api/msd_soap_service.wsdl"
      end
    
      class Ontology < Bio::EBI::SOAP
        SERVER_URI = "http://www.ebi.ac.uk/ontology-lookup/OntologyQuery.wsdl"
      end
    
      class Citation < Bio::EBI::SOAP
        SERVER_URI = "http://www.ebi.ac.uk/citations/webservices/wsdl"
      end
    
    end # SOAP
    
    end # EBI
    end # Bio
    
    
    
    if __FILE__ == $0
      serv = Bio::EBI::SOAP::Dbfetch.new
      p serv.getSupportedDBs
    
      require 'base64'
    
      serv = Bio::EBI::SOAP::Emboss.new
      hash = {"tool" => "water",
              "asequence" => "uniprot:alk1_human",
              "bsequence" => "uniprot:alk1_mouse",
              "email" => "ebisoap@example.org"}
      poll = serv.run(hash, [])
      puts poll
      base = serv.poll(poll, "tooloutput")
      puts Base64.decode64(base)  
    end
    
    bio-1.4.3.0001/lib/bio/io/flatfile/0000755000004100000410000000000012200110570016377 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/io/flatfile/bdb.rb0000644000004100000410000001306312200110570017456 0ustar  www-datawww-data# 
    # bio/io/flatfile/bdb.rb - OBDA flatfile index by Berkley DB 
    # 
    # Copyright:: Copyright (C) 2002 GOTO Naohisa  
    # License::   The Ruby License
    # 
    #  $Id: bdb.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $ 
    # 
     
    begin 
      require 'bdb' 
    rescue LoadError,NotImplementedError
    end 
    
    require 'bio/io/flatfile/index'
    require 'bio/io/flatfile/indexer'
    
    module Bio
      class FlatFileIndex
    
        module BDBdefault
          def permission
            (0666 & (0777 ^ File.umask))
          end
          module_function :permission
    
          def flag_read
            BDB::RDONLY
          end
          module_function :flag_read
    
          def flag_write
            (BDB::CREATE | BDB::TRUNCATE)
          end
          module_function :flag_write
    
          def flag_append
            'r+'
          end
          module_function :flag_append
        end #module BDBdefault
    
        class BDBwrapper
          def initialize(name, filename, *arg)
            @dbname = name
            @file = nil
            @filename = filename
            #self.open(*arg)
          end
    
          def filename
            File.join(@dbname, @filename)
          end
    
          def open(flag = BDBdefault.flag_read,
                   permission = BDBdefault.permission)
            unless @file then
              DEBUG.print "BDBwrapper: open #{filename}\n"
              @file = BDB::Btree.open(filename, nil, flag, permission)
            end
            true
          end
    
          def close
            if @file
              DEBUG.print "BDBwrapper: close #{filename}\n"
              @file.close
              @file = nil
            end
            nil
          end
    
          def [](arg)
            #self.open
            if @file then
              @file[arg]
            else
              nil
            end
          end
    
          def []=(key, val)
            #self.open
            @file[key.to_s] = val.to_s
          end
    
          def writeback_array(prefix, array, *arg)
            self.close
            self.open(*arg)
            array.each_with_index do |val, key|
              @file["#{prefix}#{key}"] = val.to_s
            end
          end
    
          def keys
            if @file then
              @file.keys
            else
              []
            end
          end
        end #class BDBwrapper
    
        module BDB_1
          class BDBMappingFile
            def self.open(*arg)
              self.new(*arg)
            end
    
            def initialize(filename, flag = BDBdefault.flag_read,
                           permission = BDBdefault.permission)
              @filename = filename
              @flag = flag
              @permission = permission
              #@bdb = BDB::Btree.open(@filename, nil, @flag, @permission)
            end
            attr_reader :filename
            attr_accessor :flag, :permission
    
            def open
              unless @bdb then
                DEBUG.print "BDBMappingFile: open #{@filename}\n"
                @bdb = BDB::Btree.open(@filename, nil, @flag, @permission)
                true
              else
                nil
              end
            end
    
            def close
              if @bdb then
                DEBUG.print "BDBMappingFile: close #{@filename}\n"
                @bdb.close
                @bdb = nil
              end
              nil
            end
    
            def records
              @bdb.size
            end
            alias size records
    
            # methods for writing
            def add(key, val)
              open
              val = val.to_a.join("\t")
              s = @bdb[key]
              if s then
                s << "\t"
                s << val
                val = s
              end
              @bdb[key] = val
              #DEBUG.print "add: key=#{key.inspect}, val=#{val.inspect}\n"
              val
            end
    
            def add_exclusive(key, val)
              open
              val = val.to_a.join("\t")
              s = @bdb[key]
              if s then
                raise RuntimeError, "keys must be unique, but key #{key.inspect} already exists"
              end
              @bdb[key] = val
              #DEBUG.print "add_exclusive: key=#{key.inspect}, val=#{val.inspect}\n"
              val
            end
    
            def add_overwrite(key, val)
              open
              val = val.to_a.join("\t")
              s = @bdb[key]
              if s then
                DEBUG.print "Warining: overwrote unique id #{key.inspect}\n"
              end
              @bdb[key] = val
              #DEBUG.print "add_overwrite: key=#{key.inspect}, val=#{val.inspect}\n"
              val
            end
    
            def add_nr(key, val)
              open
              s = @bdb[key]
              if s then
                a = s.split("\t")
              else
                a = []
              end
              a.concat val.to_a
              a.sort!
              a.uniq!
              str = a.join("\t")
              @bdb[key] = str
              #DEBUG.print "add_nr: key=#{key.inspect}, val=#{str.inspect}\n"
              str
            end
                
            # methods for searching
            def search(key)
              open
              s = @bdb[key]
              if s then
                a = s.split("\t")
                a
              else
                []
              end
            end
          end #class BDBMappingFile
    
          class PrimaryNameSpace < Template::NameSpace
            def mapping(filename)
              BDBMappingFile.new(filename)
            end
            def filename
              File.join(dbname, "key_#{name}")
            end
            def search(key)
              r = super(key)
              unless r.empty? then
                [ r ]
              else
                r
              end
            end
          end #class PrimaryNameSpace
    
          class SecondaryNameSpace < Template::NameSpace
            def mapping(filename)
              BDBMappingFile.new(filename)
            end
            def filename
              File.join(dbname, "id_#{name}")
            end #class SecondaryNameSpaces
          
            def search(key)
              r = super(key)
              file.close
              r
            end
          end #class SecondaryNameSpace
        end #module BDB_1
    
      end #class FlatFileIndex
    end #module Bio
    
    =begin
    
      * Classes/modules in this file are internal use only.
    
    =end
    bio-1.4.3.0001/lib/bio/io/flatfile/indexer.rb0000644000004100000410000006545112200110570020375 0ustar  www-datawww-data# 
    # = bio/io/flatfile/indexer.rb - OBDA flatfile indexer
    # 
    # Copyright:: Copyright (C) 2002 GOTO Naohisa  
    # License::   The Ruby License
    # 
    #  $Id: indexer.rb,v 1.26 2007/12/11 15:13:32 ngoto Exp $ 
    # 
    
    require 'bio/io/flatfile/index'
    
    module Bio
      class FlatFileIndex
    
        module Indexer
    
          class NameSpace
            def initialize(name, method)
              @name = name
              @proc = method
            end
            attr_reader :name, :proc
          end #class NameSpace
    
          class NameSpaces < Hash
            def initialize(*arg)
              super()
              arg.each do |x|
                self.store(x.name, x)
              end
            end
            def names
              self.keys
            end
            def <<(x)
              self.store(x.name, x)
            end
            def add(x)
              self.store(x.name, x)
            end
            #alias each_orig each
            alias each each_value
          end
    
          module Parser
            def self.new(format, *arg)
              case format.to_s
              when 'embl', 'Bio::EMBL'
                EMBLParser.new(*arg)
              when 'swiss', 'Bio::SPTR', 'Bio::TrEMBL', 'Bio::SwissProt'
                SPTRParser.new(*arg)
              when 'genbank', 'Bio::GenBank', 'Bio::RefSeq', 'Bio::DDBJ'
                GenBankParser.new(*arg)
              when 'Bio::GenPept'
                GenPeptParser.new(*arg)
              when 'fasta', 'Bio::FastaFormat'
                FastaFormatParser.new(*arg)
              when 'Bio::FANTOM::MaXML::Sequence'
                MaXMLSequenceParser.new(*arg)
              when 'Bio::FANTOM::MaXML::Cluster'
                MaXMLClusterParser.new(*arg)
              when 'Bio::Blast::Default::Report'
                BlastDefaultParser.new(Bio::Blast::Default::Report, *arg)
              when 'Bio::Blast::Default::Report_TBlast'
                BlastDefaultParser.new(Bio::Blast::Default::Report_TBlast, *arg)
              when 'Bio::Blast::WU::Report'
                BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
              when 'Bio::Blast::WU::Report_TBlast'
                BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
              when 'Bio::PDB::ChemicalComponent'
                PDBChemicalComponentParser.new(Bio::PDB::ChemicalComponent, *arg)
              else
                raise 'unknown or unsupported format'
              end #case dbclass.to_s
            end
    
            class TemplateParser
              NAMESTYLE = NameSpaces.new
              def initialize
                @namestyle = self.class::NAMESTYLE
                @secondary = NameSpaces.new
                @errorlog = []
              end
              attr_reader :primary, :secondary, :format, :dbclass
              attr_reader :errorlog
    
              def set_primary_namespace(name)
                DEBUG.print "set_primary_namespace: #{name.inspect}\n"
                if name.is_a?(NameSpace) then
                  @primary = name
                else
                  @primary = @namestyle[name] 
                end
                raise 'unknown primary namespace' unless @primary
                @primary
              end
            
              def add_secondary_namespaces(*names)
                DEBUG.print "add_secondary_namespaces: #{names.inspect}\n"
                names.each do |x|
                  unless x.is_a?(NameSpace) then
                    y = @namestyle[x]
                    raise 'unknown secondary namespace' unless y
                    @secondary << y
                  end
                end
                true
              end
    
              # administration of a single flatfile
              def open_flatfile(fileid, file)
                @fileid = fileid
                @flatfilename = file
                DEBUG.print "fileid=#{fileid} file=#{@flatfilename.inspect}\n"
                @flatfile = Bio::FlatFile.open(@dbclass, file, 'rb')
                @flatfile.raw = nil
                @flatfile.entry_pos_flag = true
                @entry = nil
              end
              attr_reader :fileid
    
              def each
                @flatfile.each do |x|
                  @entry = x
                  pos = @flatfile.entry_start_pos
                  len = @flatfile.entry_ended_pos - @flatfile.entry_start_pos
                  begin
                    yield pos, len
                  rescue RuntimeError, NameError => evar
                    DEBUG.print "Caught error: #{evar.inspect}\n"
                    DEBUG.print "in #{@flatfilename.inspect} position #{pos}\n"
                    DEBUG.print "===begin===\n"
                    DEBUG.print @flatfile.entry_raw.to_s.chomp
                    DEBUG.print "\n===end===\n"
                    @errorlog << [ evar, @flatfilename, pos ]
                    if @fatal then
                      DEBUG.print "Fatal error occurred, stop creating index...\n"
                      raise evar
                    else
                      DEBUG.print "This entry shall be incorrectly indexed.\n"
                    end
                  end #rescue
                end
              end
    
              def parse_primary
                r = self.primary.proc.call(@entry)
                unless r.is_a?(String) and r.length > 0
                  #@fatal = true
                  raise 'primary id must be a non-void string (skipped this entry)'
                end
                r
              end
    
              def parse_secondary
                self.secondary.each do |x|
                  p = x.proc.call(@entry)
                  p.each do |y|
                    yield x.name, y if y.length > 0
                  end
                end
              end
    
              def close_flatfile
                DEBUG.print "close flatfile #{@flatfilename.inspect}\n"
                @flatfile.close
              end
    
              protected
              attr_writer :format, :dbclass
            end #class TemplateParser
    
            class GenBankParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'VERSION', Proc.new { |x| x.acc_version } ),
                 NameSpace.new( 'LOCUS', Proc.new { |x| x.entry_id } ),
                 NameSpace.new( 'ACCESSION',
                               Proc.new { |x| x.accessions } ),
                 NameSpace.new( 'GI', Proc.new { |x|
                                 x.gi.to_s.gsub(/\AGI\:/, '') } )
                                         )
              PRIMARY = 'VERSION'
              def initialize(pri_name = nil, sec_names = nil)
                super()
                self.format = 'genbank'
                self.dbclass = Bio::GenBank
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = []
                  @namestyle.each_value do |x|
                    sec_names << x.name if x.name != self.primary.name
                  end
                end
                self.add_secondary_namespaces(*sec_names)
              end
            end #class GenBankParser
    
            class GenPeptParser < GenBankParser
              def initialize(*arg)
                super(*arg)
                self.dbclass = Bio::GenPept
              end
            end #class GenPeptParser
    
            class EMBLParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'ID', Proc.new { |x| x.entry_id } ),
                 NameSpace.new( 'AC', Proc.new { |x| x.accessions } ),
                 NameSpace.new( 'SV', Proc.new { |x| x.sv } ),
                 NameSpace.new( 'DR', Proc.new { |x|
                                 y = []
                                 x.dr.each_value { |z| y << z }
                                 y.flatten!
                                 y.find_all { |z| z.length > 1 } }
                               )
                                         )
              PRIMARY = 'ID'
              SECONDARY = [ 'AC', 'SV' ]
              def initialize(pri_name = nil, sec_names = nil)
                super()
                self.format = 'embl'
                self.dbclass = Bio::EMBL
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = self.class::SECONDARY
                end
                self.add_secondary_namespaces(*sec_names)
              end
            end #class EMBLParser
    
            class SPTRParser < EMBLParser
              SECONDARY = [ 'AC' ]
              def initialize(*arg)
                super(*arg)
                self.format = 'swiss'
                self.dbclass = Bio::SPTR
              end
            end #class SPTRParser
    
            class FastaFormatParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'UNIQUE', nil ),
                 NameSpace.new( 'entry_id', Proc.new { |x| x.entry_id } ),
                 NameSpace.new( 'accession', Proc.new { |x| x.accessions } ),
                 NameSpace.new( 'id_string', Proc.new { |x| 
                                 x.identifiers.id_strings
                               }),
                 NameSpace.new( 'word', Proc.new { |x|
                                 x.identifiers.words
                               })
                                         )
              PRIMARY = 'UNIQUE'
              SECONDARY = [ 'entry_id', 'accession', 'id_string', 'word' ]
    
              def unique_primary_key
                r = "#{@flatfilename}:#{@count}"
                @count += 1
                r
              end
              private :unique_primary_key
    
              def parse_primary
                if p = self.primary.proc then
                  r = p.call(@entry)
                  unless r.is_a?(String) and r.length > 0
                    #@fatal = true
                    raise 'primary id must be a non-void string (skipped this entry)'
                  end
                  r
                else
                  unique_primary_key
                end
              end
                                         
              def initialize(pri_name = nil, sec_names = nil)
                super()
                self.format = 'fasta'
                self.dbclass = Bio::FastaFormat
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = self.class::SECONDARY
                end
                self.add_secondary_namespaces(*sec_names)
              end
              def open_flatfile(fileid, file)
                super
                @count = 1
                @flatfilename_base = File.basename(@flatfilename)
                @flatfile.pos = 0
                begin
                  pos = @flatfile.pos
                  line = @flatfile.gets
                end until (!line or line =~ /^\>/)
                @flatfile.pos = pos
              end
            end #class FastaFormatParser
    
            class MaXMLSequenceParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'id', Proc.new { |x| x.entry_id } ),
                 NameSpace.new( 'altid', Proc.new { |x| x.id_strings } ),
                 NameSpace.new( 'gene_ontology', Proc.new { |x|
                                 x.annotations.get_all_by_qualifier('gene_ontology').collect { |y|
                                   y.anntext
                                 }
                               }),
                 NameSpace.new( 'datasrc', Proc.new { |x|
                                 a = []
                                 x.annotations.each { |y|
                                   y.datasrc.each { |z|
                                     a << z.split('|',2)[-1]
                                     a << z
                                   }
                                 }
                                 a.sort!
                                 a.uniq!
                                 a
                               })
                                         )
              PRIMARY = 'id'
              SECONDARY = [ 'altid', 'gene_ontology', 'datasrc' ]
              def initialize(pri_name = nil, sec_names = nil)
                super()
                self.format = 'raw'
                self.dbclass = Bio::FANTOM::MaXML::Sequence
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = self.class::SECONDARY
                end
                self.add_secondary_namespaces(*sec_names)
              end
            end #class MaXMLSequenceParser
    
            class MaXMLClusterParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'id', Proc.new { |x| x.entry_id } ),
                 NameSpace.new( 'altid', Proc.new { |x| x.sequences.id_strings } ),
                 NameSpace.new( 'datasrc', Proc.new { |x|
                                 a = x.sequences.collect { |y|
                                   MaXMLSequenceParser::NAMESTYLE['datasrc'].proc.call(y)
                                 }
                                 a.flatten!
                                 a.sort!
                                 a.uniq!
                                 a
                               }),
                 NameSpace.new( 'gene_ontology', Proc.new { |x|
                                 a = x.sequences.collect { |y|
                                   MaXMLSequenceParser::NAMESTYLE['gene_ontology'].proc.call(y)
                                 }
                                 a.flatten!
                                 a.sort!
                                 a.uniq!
                                 a
                               })
                                         )
              PRIMARY = 'id'
              SECONDARY = [ 'altid', 'gene_ontology', 'datasrc' ]
              def initialize(pri_name = nil, sec_names = nil)
                super()
                self.format = 'raw'
                self.dbclass = Bio::FANTOM::MaXML::Cluster
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = self.class::SECONDARY
                end
                self.add_secondary_namespaces(*sec_names)
              end
            end #class MaXMLSequenceParser
    
            class BlastDefaultParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'QUERY', Proc.new { |x| x.query_def } ),
                 NameSpace.new( 'query_id', Proc.new { |x| 
                                 a = Bio::FastaDefline.new(x.query_def.to_s).id_strings
                                 a << x.query_def.to_s.split(/\s+/,2)[0]
                                 a
                               } ),
                 NameSpace.new( 'hit', Proc.new { |x|
                                 a = x.hits.collect { |y|
                                   b = Bio::FastaDefline.new(y.definition.to_s).id_strings
                                   b << y.definition
                                   b << y.definition.to_s.split(/\s+/,2)[0]
                                   b
                                 }
                                 a.flatten!
                                 a
                               } )
                                 )
              PRIMARY = 'QUERY'
              SECONDARY = [ 'query_id', 'hit' ]
              def initialize(klass, pri_name = nil, sec_names = nil)
                super()
                self.format = 'raw'
                self.dbclass = klass
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = []
                  @namestyle.each_value do |x|
                    sec_names << x.name if x.name != self.primary.name
                  end
                end
                self.add_secondary_namespaces(*sec_names)
              end
              def open_flatfile(fileid, file)
                super
                @flatfile.rewind
                @flatfile.dbclass = nil
                @flatfile.autodetect
                @flatfile.dbclass = self.dbclass unless @flatfile.dbclass
                @flatfile.rewind
                begin
                  pos = @flatfile.pos
                  line = @flatfile.gets
                end until (!line or line =~ /^T?BLAST/)
                @flatfile.pos = pos
              end
            end #class BlastDefaultReportParser
    
            class PDBChemicalComponentParser < TemplateParser
              NAMESTYLE = NameSpaces.new(
                 NameSpace.new( 'UNIQUE', Proc.new { |x| x.entry_id } )
                                         )
              PRIMARY = 'UNIQUE'
              def initialize(klass, pri_name = nil, sec_names = nil)
                super()
                self.format = 'raw'
                self.dbclass = Bio::PDB::ChemicalComponent
                self.set_primary_namespace((pri_name or PRIMARY))
                unless sec_names then
                  sec_names = []
                  @namestyle.each_value do |x|
                    sec_names << x.name if x.name != self.primary.name
                  end
                end
                self.add_secondary_namespaces(*sec_names)
              end
              def open_flatfile(fileid, file)
                super
                @flatfile.pos = 0
                begin
                  pos = @flatfile.pos
                  line = @flatfile.gets
                end until (!line or line =~ /^RESIDUE /)
                @flatfile.pos = pos
              end
            end #class PDBChemicalComponentParser
    
          end #module Parser
    
          def self.makeindexBDB(name, parser, options, *files)
            # options are not used in this method
            unless defined?(BDB)
              raise RuntimeError, "Berkeley DB support not found"
            end
            DEBUG.print "makeing BDB DataBank...\n"
            db = DataBank.new(name, MAGIC_BDB)
            db.format = parser.format
            db.fileids.add(*files)
            db.fileids.recalc
    
            db.primary = parser.primary.name
            db.secondary = parser.secondary.names
    
            DEBUG.print "writing config.dat, config, fileids ...\n"
            db.write('wb', BDBdefault::flag_write)
    
            DEBUG.print "reading files...\n"
    
            addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)),
                         parser, options)
            db.close
            true
          end #def
    
          def self.addindex_bdb(db, flag, need_update, parser, options)
            DEBUG.print "reading files...\n"
    
            pn = db.primary
            pn.file.close
            pn.file.flag = flag
    
            db.secondary.each_files do |x|
              x.file.close
              x.file.flag = flag
              x.file.open
              x.file.close
            end
    
            need_update.each do |fileid|
              filename = db.fileids[fileid].filename
              parser.open_flatfile(fileid, filename)
              parser.each do |pos, len|
                p = parser.parse_primary
                #pn.file.add_exclusive(p, [ fileid, pos, len ])
                pn.file.add_overwrite(p, [ fileid, pos, len ])
                #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
                parser.parse_secondary do |sn, sp|
                  db.secondary[sn].file.add_nr(sp, p)
                  #DEBUG.print "#{sp} #{p}\n"
                end
              end
              parser.close_flatfile
            end
            true
          end #def
    
          def self.makeindexFlat(name, parser, options, *files)
            DEBUG.print "makeing flat/1 DataBank using temporary files...\n"
    
            db = DataBank.new(name, nil)
            db.format = parser.format
            db.fileids.add(*files)
            db.primary = parser.primary.name
            db.secondary = parser.secondary.names
            db.fileids.recalc
            DEBUG.print "writing DabaBank...\n"
            db.write('wb')
    
            addindex_flat(db, :new, (0...(files.size)), parser, options)
            db.close
            true
          end #def
    
          def self.addindex_flat(db, mode, need_update, parser, options)
            require 'tempfile'
            prog = options['sort_program']
            env = options['env_program']
            env_args = options['env_program_arguments']
    
            return false if need_update.to_a.size == 0
    
            DEBUG.print "prepare temporary files...\n"
            tempbase = "bioflat#{rand(10000)}-"
            pfile = Tempfile.open(tempbase + 'primary-')
            DEBUG.print "open temporary file #{pfile.path.inspect}\n"
            sfiles = {}
            parser.secondary.names.each do |x|
              sfiles[x] =  Tempfile.open(tempbase + 'secondary-')
              DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n"
            end
    
            DEBUG.print "reading files...\n"
            need_update.each do |fileid|
              filename = db.fileids[fileid].filename
              parser.open_flatfile(fileid, filename)
              parser.each do |pos, len|
                p = parser.parse_primary
                pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n"
                #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
                parser.parse_secondary do |sn, sp|
                  sfiles[sn] << "#{sp}\t#{p}\n"
                  #DEBUG.print "#{sp} #{p}\n"
                end
              end
              parser.close_flatfile
              fileid += 1
            end
    
            sort_proc = chose_sort_proc(prog, mode, env, env_args)
            pfile.close(false)
            DEBUG.print "sorting primary (#{parser.primary.name})...\n"
            db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path)
            pfile.close(true)
    
            parser.secondary.names.each do |x|
              DEBUG.print "sorting secondary (#{x})...\n"
              sfiles[x].close(false)
              db.secondary[x].file.import_tsv_files(false, mode, sort_proc,
                                                    sfiles[x].path)
              sfiles[x].close(true)
            end
            true
          end #def
    
          # default sort program
          DEFAULT_SORT = '/usr/bin/sort'
    
          # default env program (run a program in a modified environment)
          DEFAULT_ENV = '/usr/bin/env'
    
          # default arguments for env program
          DEFAULT_ENV_ARGS = [ 'LC_ALL=C' ]
    
          def self.chose_sort_proc(prog, mode = :new,
                                   env = nil, env_args = nil)
            case prog
            when /^builtin$/i, /^hs$/i, /^lm$/i
              DEBUG.print "sort: internal sort routine\n"
              sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
            when nil, ''
              if FileTest.executable?(DEFAULT_SORT)
                return chose_sort_proc(DEFAULT_SORT, mode, env, env_args)
              else
                DEBUG.print "sort: internal sort routine\n"
                sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
              end
            else
              env_args ||= DEFAULT_ENV_ARGS
              if env == '' or env == false then # inhibit to use env program
                prefixes = [ prog ]
              elsif env then # uses given env program
                prefixes = [ env ] + env_args + [ prog ]
              else # env == nil; uses default env program if possible
                if FileTest.executable?(DEFAULT_ENV)
                  prefixes = [ DEFAULT_ENV ] + env_args + [ prog ]
                else
                  prefixes = [ prog ]
                end
              end
              DEBUG.print "sort: #{prefixes.join(' ')}\n"
              if mode == :new then
                sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes)
              else
                sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes)
              end
            end
            sort_proc
          end
    
          def self.update_index(name, parser, options, *files)
            db = DataBank.open(name)
    
            if parser then
              raise 'file format mismatch' if db.format != parser.format
            else
    
              begin
                dbclass_orig =
                  Bio::FlatFile.autodetect_file(db.fileids[0].filename)
              rescue TypeError, Errno::ENOENT
              end
              begin
                dbclass_new =
                  Bio::FlatFile.autodetect_file(files[0])
              rescue TypeError, Errno::ENOENT
              end
    
              case db.format
              when 'swiss', 'embl'
                parser = Parser.new(db.format)
                if dbclass_new and dbclass_new != parser.dbclass
                  raise 'file format mismatch'
                end
              when 'genbank'
                dbclass = dbclass_orig or dbclass_new
                if dbclass == Bio::GenBank or dbclass == Bio::GenPept
                  parser = Parser.new(dbclass_orig)
                elsif !dbclass then
                  raise 'cannnot determine format. please specify manually.'
                else
                  raise 'file format mismatch'
                end
                if dbclass_new and dbclass_new != parser.dbclass
                  raise 'file format mismatch'
                end
              else
                raise 'unsupported format'
              end
            end
    
            parser.set_primary_namespace(db.primary.name)
            parser.add_secondary_namespaces(*db.secondary.names)
    
            if options['renew'] then
              newfiles = db.fileids.filenames.find_all do |x|
                FileTest.exist?(x)
              end
              newfiles.concat(files)
              newfiles2 = newfiles.sort
              newfiles2.uniq!
              newfiles3 = []
              newfiles.each do |x|
                newfiles3 << x if newfiles2.delete(x)
              end
              t = db.index_type
              db.close
              case t
              when MAGIC_BDB
                Indexer::makeindexBDB(name, parser, options, *newfiles3)
              when MAGIC_FLAT
                Indexer::makeindexFlat(name, parser, options, *newfiles3)
              else
                raise 'Unsupported index type'
              end
              return true
            end
    
            need_update = []
            newfiles = files.dup
            db.fileids.cache_all
            db.fileids.each_with_index do |f, i|
              need_update << i unless f.check
              newfiles.delete(f.filename)
            end
    
            b = db.fileids.size
            begin
              db.fileids.recalc
            rescue Errno::ENOENT => evar
              DEBUG.print "Error: #{evar}\n"
              DEBUG.print "assumed --renew option\n"
              db.close
              options = options.dup
              options['renew'] = true
              update_index(name, parser, options, *files)
              return true
            end
            # add new files
            db.fileids.add(*newfiles)
            db.fileids.recalc
    
            need_update.concat((b...(b + newfiles.size)).to_a)
    
            DEBUG.print "writing DabaBank...\n"
            db.write('wb', BDBdefault::flag_append)
    
            case db.index_type
            when MAGIC_BDB
              addindex_bdb(db, BDBdefault::flag_append,
                           need_update, parser, options)
            when MAGIC_FLAT
              addindex_flat(db, :add, need_update, parser, options)
            else
              raise 'Unsupported index type'
            end
    
            db.close
            true
          end #def
        end #module Indexer
    
        ##############################################################
        def self.formatstring2class(format_string)
          case format_string
          when /genbank/i
            dbclass = Bio::GenBank
          when /genpept/i
            dbclass = Bio::GenPept
          when /embl/i
            dbclass = Bio::EMBL
          when /sptr/i
            dbclass = Bio::SPTR
          when /fasta/i
            dbclass = Bio::FastaFormat
          else
            raise "Unsupported format : #{format}"
          end
        end
    
        def self.makeindex(is_bdb, dbname, format, options, *files)
          if format then
            dbclass = formatstring2class(format)
          else
            dbclass = Bio::FlatFile.autodetect_file(files[0])
            raise "Cannot determine format" unless dbclass
            DEBUG.print "file format is #{dbclass}\n"
          end
    
          options = {} unless options
          pns = options['primary_namespace']
          sns = options['secondary_namespaces']
    
          parser = Indexer::Parser.new(dbclass, pns, sns)
    
          #if /(EMBL|SPTR)/ =~ dbclass.to_s then
            #a = [ 'DR' ]
            #parser.add_secondary_namespaces(*a)
          #end
          if sns = options['additional_secondary_namespaces'] then
            parser.add_secondary_namespaces(*sns)
          end
    
          if is_bdb then
            Indexer::makeindexBDB(dbname, parser, options, *files)
          else
            Indexer::makeindexFlat(dbname, parser, options, *files)
          end
        end #def makeindex
    
        def self.update_index(dbname, format, options, *files)
          if format then
            parser = Indexer::Parser.new(dbclass)
          else
            parser = nil
          end
          Indexer::update_index(dbname, parser, options, *files)
        end #def update_index
    
      end #class FlatFileIndex
    end #module Bio
    
    =begin
    
    = Bio::FlatFile
    
    --- Bio::FlatFile.makeindex(is_bdb, dbname, format, options, *files)
    
          Create index files (called a databank) of given files.
    
    --- Bio::FlatFile.update_index(dbname, format, options, *files)
    
          Add entries to databank.
    
    =end
    bio-1.4.3.0001/lib/bio/io/flatfile/splitter.rb0000644000004100000410000002036412200110570020577 0ustar  www-datawww-data#
    # = bio/io/flatfile/splitter.rb - input data splitter for FlatFile
    #
    #   Copyright (C) 2001-2008 Naohisa Goto 
    #
    # License:: The Ruby License
    #
    #  $Id:$
    #
    #
    # See documents for Bio::FlatFile::Splitter and Bio::FlatFile.
    #
    
    require 'bio/io/flatfile'
    
    module Bio
    
      class FlatFile
    
        # The Bio::FlatFile::Splitter is a namespace for flatfile splitters.
        # Each splitter is a class to get entries from a buffered input stream.
        #
        # It is internally called in Bio::FlatFile.
        # Normally, users do not need to use it directly.
        module Splitter
    
          # This is a template of splitter.
          class Template
            # Creates a new splitter.
            def initialize(klass, bstream)
              @dbclass = klass
              @stream = bstream
              @entry_pos_flag = nil
            end
    
            # skips leader of the entry.
            def skip_leader
              raise NotImplementedError
            end
    
            # rewind the stream
            def rewind
              @stream.rewind
            end
    
            # Gets entry as a string. (String)
            def get_entry
              raise NotImplementedError
            end
    
            # Gets entry as a data class's object
            def get_parsed_entry
              ent = get_entry
              if ent then
                self.parsed_entry = dbclass.new(ent)
              else
                self.parsed_entry = ent
              end
              parsed_entry
            end
    
            # the last entry string read from the stream (String)
            attr_reader :entry
    
            # The last parsed entry read from the stream (entry data class).
            # Note that it is valid only after get_parsed_entry is called,
            # and the get_entry may not affect the parsed_entry attribute.
            attr_reader :parsed_entry
    
            # a flag to write down entry start and end positions
            attr_accessor :entry_pos_flag
    
            # start position of the entry
            attr_reader :entry_start_pos
    
            # (end position of the entry) + 1
            attr_reader :entry_ended_pos
    
            #--
            #private
            #
            ## to prevent warning message "warning: private attribute?",
            ## private attributes are explicitly declared.
            #++
    
            # entry data class
            attr_reader :dbclass
            private     :dbclass
    
            # input stream
            attr_reader :stream
            private     :stream
    
            # the last entry string read from the stream
            attr_writer :entry
            private     :entry=
    
            # the last entry as a parsed data object
            attr_writer :parsed_entry
            private     :parsed_entry=
    
            # start position of the entry
            attr_writer :entry_start_pos
            private     :entry_start_pos=
    
            # (end position of the entry) + 1
            attr_writer :entry_ended_pos
            private     :entry_ended_pos=
    
            # Does stream.pos if entry_pos_flag is not nil.
            # Otherwise, returns nil.
            def stream_pos
              entry_pos_flag ? stream.pos : nil
            end
            private :stream_pos
          end #class Template
    
          # Default splitter.
          # It sees following constants in the given class.
          # DELIMITER:: (String) delimiter indicates the end of a entry.
          # FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
          # DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
          #
          class Default < Template
            # Creates a new splitter.
            # klass:: database class
            # bstream:: input stream. It must be a BufferedInputStream object.
            def initialize(klass, bstream)
              super(klass, bstream)
    
              @delimiter = klass::DELIMITER rescue nil
              @header = klass::FLATFILE_HEADER rescue nil
              # for specific classes' benefit
              unless header
                if (defined?(Bio::GenBank) and klass == Bio::GenBank) or
                    (defined?(Bio::GenPept) and klass == Bio::GenPept)
                  @header = 'LOCUS '
                end
              end
              @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
            end
    
            # (String) delimiter indicates the end of a entry.
            attr_accessor :delimiter
    
            # (String) start of a entry, located on head of a line.
            attr_accessor :header
    
            # (Integer) excess read data size included in delimiter.
            attr_accessor :delimiter_overrun
            
            # Skips leader of the entry.
            #
            # If @header is not nil, it reads till the contents of @header
            # comes at the head of a line.
            # If correct FLATFILE_HEADER is found, returns true.
            # Otherwise, returns nil.
            def skip_leader
              if @header then
                data = ''
                while s = stream.gets(@header)
                  data << s
                  if data.split(/[\r\n]+/)[-1] == @header then
                    stream.ungets(@header)
                    return true
                  end
                end
                # @header was not found. For safety,
                # pushes back data with removing white spaces in the head.
                data.sub(/\A\s+/, '')
                stream.ungets(data)
                return nil
              else
                stream.skip_spaces
                return nil
              end
            end
    
            # gets a entry
            def get_entry
              p0 = stream_pos()
              e  = stream.gets(@delimiter)
              if e and @delimiter_overrun then
                if e[-@delimiter.size, @delimiter.size ] == @delimiter then
                  overrun = e[-@delimiter_overrun, @delimiter_overrun]
                  e[-@delimiter_overrun, @delimiter_overrun] = ''
                  stream.ungets(overrun)
                end
              end
              p1 = stream_pos()
              self.entry_start_pos = p0
              self.entry = e
              self.entry_ended_pos = p1
              return entry
            end
          end #class Defalult
    
    
          # A splitter for line oriented text data.
          #
          # The given class's object must have following methods.
          #   Klass#add_header_line(line)
          #   Klass#add_line(line)
          # where 'line' is a string. They normally returns self.
          # If the line is not suitable to add to the current entry,
          # nil or false should be returned.
          # Then, the line is treated as (for add_header_line) the entry data
          # or (for add_line) the next entry's data.
          #
          class LineOriented < Template
            # Creates a new splitter.
            # klass:: database class
            # bstream:: input stream. It must be a BufferedInputStream object.
            def initialize(klass, bstream)
              super(klass, bstream)
              self.flag_to_fetch_header = true
            end
    
            # do nothing
            def skip_leader
              nil
            end
    
            # get an entry and return the entry as a string
            def get_entry
              if e = get_parsed_entry then
                entry
              else
                e
              end
            end
    
            # get an entry and return the entry as a data class object
            def get_parsed_entry
              p0 = stream_pos()
              ent = @dbclass.new()
    
              lines = []
              line_overrun = nil
    
              if flag_to_fetch_header then
                while line = stream.gets("\n")
                  unless ent.add_header_line(line) then
                    line_overrun = line
                    break
                  end
                  lines.push line
                end
                stream.ungets(line_overrun) if line_overrun
                line_overrun = nil
                self.flag_to_fetch_header = false
              end
                  
              while line = stream.gets("\n")
                unless ent.add_line(line) then
                  line_overrun = line
                  break
                end
                lines.push line
              end
              stream.ungets(line_overrun) if line_overrun
              p1 = stream_pos()
    
              return nil if lines.empty?
    
              self.entry_start_pos = p0
              self.entry = lines.join('')
              self.parsed_entry = ent
              self.entry_ended_pos = p1
    
              return ent
            end
    
            # rewinds the stream
            def rewind
              ret = super
              self.flag_to_fetch_header = true
              ret
            end
    
            #--
            #private methods / attributes
            #++
    
            # flag to fetch header
            attr_accessor :flag_to_fetch_header
            private       :flag_to_fetch_header
            private       :flag_to_fetch_header=
    
          end #class LineOriented
    
        end #module Splitter
    
      end #class FlatFile
    end #module Bio
    
    
    bio-1.4.3.0001/lib/bio/io/flatfile/buffer.rb0000644000004100000410000002042012200110570020173 0ustar  www-datawww-data#
    # = bio/io/flatfile/buffer.rb - Input stream buffer for FlatFile
    #
    #   Copyright (C) 2001-2006 Naohisa Goto 
    #
    # License:: The Ruby License
    #
    #  $Id:$
    #
    #
    # See documents for Bio::FlatFile::BufferedInputStream and Bio::FlatFile.
    #
    
    require 'bio/io/flatfile'
    
    module Bio
    
      class FlatFile
    
        # Wrapper for a IO (or IO-like) object.
        # It can input with a buffer.
        class BufferedInputStream
          # Creates a new input stream wrapper
          def initialize(io, path)
            @io = io
            @path = path
            # initialize prefetch buffer
            @buffer = ''
          end
    
          # Creates a new input stream wrapper from the given IO object.
          def self.for_io(io)
            begin
              path = io.path
            rescue NameError
              path = nil
            end
            self.new(io, path)
          end
    
          # Creates a new input stream wrapper to open file _filename_
          # by using File.open.
          # *arg is passed to File.open.
          #
          # Like File.open, a block can be accepted.
          #
          # Unlike File.open, the default is binary mode, unless text mode
          # is explicity specified in mode.
          def self.open_file(filename, *arg)
            params = _parse_file_open_arg(*arg)
            if params[:textmode] or /t/ =~ params[:fmode_string].to_s then
              textmode = true
            else
              textmode = false
            end
            if block_given? then
              File.open(filename, *arg) do |fobj|
                fobj.binmode unless textmode
                yield self.new(fobj, filename)
              end
            else
              fobj = File.open(filename, *arg)
              fobj.binmode unless textmode
              self.new(fobj, filename)
            end
          end
    
          # Parses file open mode parameter.
          # mode must be an Integer or a String.
          def self._parse_file_open_mode(mode)
            modeint = nil
            modestr = nil
            begin
              modeint = mode.to_int
            rescue NoMethodError
            end
            unless modeint then
              begin
                modestr = mode.to_str
              rescue NoMethodError
              end
            end
            if modeint then
              return { :fmode_integer => modeint }
            end
            if modestr then
              fmode, ext_enc, int_enc = modestr.split(/\:/)
              ret = { :fmode_string => fmode }
              ret[:external_encoding] = ext_enc if ext_enc
              ret[:internal_encoding] = int_enc if int_enc
              return ret
            end
            nil
          end
          private_class_method :_parse_file_open_mode
              
          # Parses file open arguments
          def self._parse_file_open_arg(*arg)
            fmode_hash = nil
            perm = nil
    
            elem = arg.shift
            if elem then
              fmode_hash = _parse_file_open_mode(elem)
              if fmode_hash then
                elem = arg.shift
                if elem then
                  begin
                    perm = elem.to_int
                  rescue NoMethodError
                  end
                end
                elem = arg.shift if perm
              end
            end
            if elem.kind_of?(Hash) then
              opt = elem.dup
            else
              opt = {}
            end
            if elem = opt[:mode] then
              fmode_hash = _parse_file_open_mode(elem)
            end
            fmode_hash ||= {}
            fmode_hash[:perm] = perm if perm
            unless enc = opt[:encoding].to_s.empty? then
              ext_enc, int_enc = enc.split(/\:/)
              fmode_hash[:external_encoding] = ext_enc if ext_enc
              fmode_hash[:internal_encoding] = int_enc if int_enc
            end
    
            [ :external_encoding, :internal_encoding,
              :textmode, :binmode, :autoclose, :perm ].each do |key|
              val = opt[key]
              fmode_hash[key] = val if val
            end
            fmode_hash
          end
          private_class_method :_parse_file_open_arg
    
          # Creates a new input stream wrapper from URI specified as _uri_.
          # by using OpenURI.open_uri or URI#open.
          # _uri_ must be a String or URI object.
          # *arg is passed to OpenURI.open_uri or URI#open.
          #
          # Like OpenURI.open_uri, it can accept a block.
          def self.open_uri(uri, *arg)
            if uri.kind_of?(URI)
              if block_given?
                uri.open(*arg) do |fobj|
                  yield self.new(fobj, uri.to_s)
                end
              else
                fobj = uri.open(*arg)
                self.new(fobj, uri.to_s)
              end
            else
              if block_given?
                OpenURI.open_uri(uri, *arg) do |fobj|
                  yield self.new(fobj, uri)
                end
              else
                fobj = OpenURI.open_uri(uri, *arg)
                self.new(fobj, uri)
              end
            end
          end
    
          # Pathname, filename or URI to open the object.
          # Like File#path, returned value isn't normalized.
          attr_reader :path
    
          # Converts to IO object if possible
          def to_io
            @io.to_io
          end
    
          # Closes the IO object if possible
          def close
            @io.close
          end
    
          # Rewinds the IO object if possible
          # Internal buffer in this wrapper is cleared.
          def rewind
            r = @io.rewind
            @buffer = ''
            r
          end
    
          # Returns current file position
          def pos
            @io.pos - @buffer.size
          end
    
          # Sets current file position if possible
          # Internal buffer in this wrapper is cleared.
          def pos=(p)
            r = (@io.pos = p)
            @buffer = ''
            r
          end
    
          # Returns true if end-of-file. Otherwise, returns false.
          #
          # Note that it returns false if internal buffer is this wrapper
          # is not empty,
          def eof?
            if @buffer.size > 0
              false
            else
              @io.eof?
            end
          end
    
          # Same as IO#gets.
          #
          # Compatibility note: the bahavior of paragraph mode (io_rs = '')
          # may differ from that of IO#gets('').
          def gets(io_rs = $/)
            if @buffer.size > 0
              if io_rs == nil then
                r = @buffer + @io.gets(nil).to_s
                @buffer = ''
              else
                if io_rs == '' then # io_rs.empty?
                  sp_rs = /((?:\r?\n){2,})/n
                else
                  sp_rs = io_rs
                end
                a = @buffer.split(sp_rs, 2)
                if a.size > 1 then
                  r = a.shift
                  r += (io_rs.empty? ? a.shift : io_rs)
                  @buffer = a.shift.to_s
                else
                  @buffer << @io.gets(io_rs).to_s
                  a = @buffer.split(sp_rs, 2)
                  if a.size > 1 then
                    r = a.shift
                    r += (io_rs.empty? ? a.shift : io_rs)
                    @buffer = a.shift.to_s
                  else
                    r = @buffer
                    @buffer = ''
                  end
                end
              end
              r
            else
              @io.gets(io_rs)
            end
          end
    
          # Pushes back given str to the internal buffer.
          # Returns nil.
          # str must be read previously with the wrapper object.
          #
          # Note that in current implementation, the str can be everything,
          # but please don't depend on it.
          #
          def ungets(str)
            @buffer = str + @buffer
            nil
          end
    
          # Same as IO#getc.
          def getc
            if @buffer.size > 0 then
              r = @buffer[0]
              @buffer = @buffer[1..-1]
            else
              r = @io.getc
            end
            r
          end
          
          # Pushes back one character into the internal buffer.
          # Unlike IO#getc, it can be called more than one time.
          def ungetc(c)
            @buffer = sprintf("%c", c) + @buffer
            nil
          end
    
          # Gets current prefetch buffer
          def prefetch_buffer
            @buffer
          end
    
          # It does @io.gets,  and addes returned string
          # to the internal buffer, and returns the string.
          def prefetch_gets(*arg)
            r = @io.gets(*arg)
            @buffer << r if r
            r
          end
          
          # It does @io.readpartial, and addes returned string
          # to the internal buffer, and returns the string.
          def prefetch_readpartial(*arg)
            r = @io.readpartial(*arg)
            @buffer << r if r
            r
          end
    
          # Skips space characters in the stream.
          # returns nil.
          def skip_spaces
            ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
            while r = self.getc
              unless ws[r] then
                self.ungetc(r)
                break
              end
            end
            nil
          end
        end #class BufferedInputStream
    
      end #class FlatFile
    end #module Bio
    bio-1.4.3.0001/lib/bio/io/flatfile/autodetection.rb0000644000004100000410000004054512200110570021603 0ustar  www-datawww-data#
    # = bio/io/flatfile/autodetection.rb - file format auto-detection
    #
    #   Copyright (C) 2001-2006 Naohisa Goto 
    #
    # License:: The Ruby License
    #
    #  $Id:$
    #
    #
    #  See documents for Bio::FlatFile::AutoDetect and Bio::FlatFile.
    #
    
    require 'tsort'
    require 'bio/io/flatfile'
    
    module Bio
    
      class FlatFile
    
        # AutoDetect automatically determines database class of given data.
        class AutoDetect
    
          include TSort
    
          # Array to store autodetection rules.
          # This is defined only for inspect.
          class RulesArray < Array
            # visualize contents
            def inspect
              "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
            end
          end #class RulesArray
    
          # Template of a single rule of autodetection
          class RuleTemplate
            # Creates a new element.
            def self.[](*arg)
              self.new(*arg)
            end
            
            # Creates a new element.
            def initialize
              @higher_priority_elements = RulesArray.new
              @lower_priority_elements  = RulesArray.new
              @name = nil
            end
    
            # self is prior to the _elem_.
            def is_prior_to(elem)
              return nil if self == elem
              elem.higher_priority_elements << self
              self.lower_priority_elements << elem
              true
            end
    
            # higher priority elements
            attr_reader :higher_priority_elements
            # lower priority elements
            attr_reader :lower_priority_elements
    
            # database classes
            attr_reader :dbclasses
    
            # unique name of the element
            attr_accessor :name
    
            # If given text (and/or meta information) is known, returns
            # the database class.
            # Otherwise, returns nil or false.
            #
            # _text_ will be a String.
            # _meta_ will be a Hash.
            # _meta_ may contain following keys.
            # :path => pathname, filename or uri.
            def guess(text, meta)
              nil
            end
    
            private
            # Gets constant from constant name given as a string.
            def str2const(str)
              const = Object
              str.split(/\:\:/).each do |x|
                const = const.const_get(x)
              end
              const
            end
    
            # Gets database class from given object.
            # Current implementation is: 
            # if _obj_ is kind of String, regarded as a constant.
            # Otherwise, returns _obj_ as is.
            def get_dbclass(obj)
              obj.kind_of?(String) ? str2const(obj) : obj
            end
          end #class Rule_Template
    
          # RuleDebug is a class for debugging autodetect classes/methods
          class RuleDebug < RuleTemplate
            # Creates a new instance.
            def initialize(name)
              super()
              @name = name
            end
    
            # prints information to the $stderr.
            def guess(text, meta)
              $stderr.puts @name
              $stderr.puts text.inspect
              $stderr.puts meta.inspect
              nil
            end
          end #class RuleDebug
    
          # Special element that is always top or bottom priority.
          class RuleSpecial < RuleTemplate
            def initialize(name)
              #super()
              @name = name
            end
            # modification of @name is inhibited.
            def name=(x)
              raise 'cannot modify name'
            end
    
            # always returns void array
            def higher_priority_elements
              []
            end
            # always returns void array
            def lower_priority_elements
              []
            end
          end #class RuleSpecial
    
          # Special element that is always top priority.
          TopRule = RuleSpecial.new('top')
          # Special element that is always bottom priority.
          BottomRule = RuleSpecial.new('bottom')
    
          # A autodetection rule to use a regular expression
          class RuleRegexp < RuleTemplate
            # Creates a new instance.
            def initialize(dbclass, re)
              super()
              @re = re
              @name = dbclass.to_s
              @dbclass = nil
              @dbclass_lazy = dbclass
            end
    
            # database class (lazy evaluation)
            def dbclass
              unless @dbclass
                @dbclass = get_dbclass(@dbclass_lazy)
              end
              @dbclass
            end
            private :dbclass
    
            # returns database classes
            def dbclasses
              [ dbclass ]
            end
    
            # If given text matches the regexp, returns the database class.
            # Otherwise, returns nil or false.
            # _meta_ is ignored.
            def guess(text, meta)
              @re =~ text ? dbclass : nil
            end
          end #class RuleRegexp
    
          # A autodetection rule to use more than two regular expressions.
          # If given string matches one of the regular expressions,
          # returns the database class.
          class RuleRegexp2 < RuleRegexp
            # Creates a new instance.
            def initialize(dbclass, *regexps)
              super(dbclass, nil)
              @regexps = regexps
            end
    
            # If given text matches one of the regexp, returns the database class.
            # Otherwise, returns nil or false.
            # _meta_ is ignored.
            def guess(text, meta)
              @regexps.each do |re|
                return dbclass if re =~ text
              end
              nil
            end
          end #class RuleRegexp
    
          # A autodetection rule that passes data to the proc object.
          class RuleProc < RuleTemplate
            # Creates a new instance.
            def initialize(*dbclasses, &proc)
              super()
              @proc = proc
              @dbclasses = nil
              @dbclasses_lazy = dbclasses
              @name = dbclasses.collect { |x| x.to_s }.join('|')
            end
    
            # database classes (lazy evaluation)
            def dbclasses
              unless @dbclasses
                @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
              end
              @dbclasses
            end
    
            # If given text (and/or meta information) is known, returns
            # the database class.
            # Otherwise, returns nil or false.
            #
            # Refer RuleTemplate#guess for _meta_.
            def guess(text, meta)
              @proc.call(text)
            end
          end #class RuleProc
          
          # Creates a new Autodetect object
          def initialize
            # stores autodetection rules.
            @rules = Hash.new
            # stores elements (cache)
            @elements = nil
            self.add(TopRule)
            self.add(BottomRule)
          end
    
          # Adds a new element.
          # Returns _elem_.
          def add(elem)
            raise 'element name conflicts' if @rules[elem.name]
            @elements = nil
            @rules[elem.name] = elem
            elem
          end
    
          # (required by TSort.)
          # For all elements, yields each element.
          def tsort_each_node(&x)
            @rules.each_value(&x)
          end
    
          # (required by TSort.)
          # For a given element, yields each child
          # (= lower priority elements) of the element.
          def tsort_each_child(elem)
            if elem == TopRule then
              @rules.each_value do |e|
                yield e unless e == TopRule or 
                  e.lower_priority_elements.index(TopRule)
              end
            elsif elem == BottomRule then
              @rules.each_value do |e|
                yield e if e.higher_priority_elements.index(BottomRule)
              end
            else
              elem.lower_priority_elements.each do |e|
                yield e if e != BottomRule
              end
              unless elem.higher_priority_elements.index(BottomRule)
                yield BottomRule
              end
            end
          end
    
          # Returns current elements as an array
          # whose order fulfills all elements' priorities.
          def elements
            unless @elements
              ary = tsort
              ary.reverse!
              @elements = ary
            end
            @elements
          end
    
          # rebuilds the object and clears internal cache.
          def rehash
            @rules.rehash
            @elements = nil
          end
    
          # visualizes the object (mainly for debug)
          def inspect
            "<#{self.class.to_s} " +
              self.elements.collect { |e| e.name.inspect }.join(' ') +
              ">"
          end
    
          # Iterates over each element.
          def each_rule(&x) #:yields: elem
            elements.each(&x)
          end
    
          # Autodetect from the text.
          # Returns a database class if succeeded.
          # Returns nil if failed.
          def autodetect(text, meta = {})
            r = nil
            elements.each do |e|
              #$stderr.puts e.name
              r = e.guess(text, meta)
              break if r
            end
            r
          end
    
          # autodetect from the FlatFile object.
          # Returns a database class if succeeded.
          # Returns nil if failed.
          def autodetect_flatfile(ff, lines = 31)
            meta = {}
            stream = ff.instance_eval { @stream }
            begin
              path = stream.path
            rescue NameError
            end
            if path then
              meta[:path] = path
              # call autodetect onece with meta and without any read action
              if r = self.autodetect(stream.prefetch_buffer, meta)
                return r
              end
            end
            # reading stream
            1.upto(lines) do |x|
              break unless line = stream.prefetch_gets
              if line.strip.size > 0 then
                if r = self.autodetect(stream.prefetch_buffer, meta)
                  return r
                end
              end
            end
            return nil
          end
    
          # default autodetect object for class method
          @default = nil
    
          # returns the default autodetect object
          def self.default
            unless @default then
              @default = self.make_default
            end
            @default
          end
    
          # sets the default autodetect object.
          def self.default=(ad)
            @default = ad
          end
    
          # make a new autodetect object
          def self.[](*arg)
            a = self.new
            arg.each { |e| a.add(e) }
            a
          end
    
          # make a default of default autodetect object
          def self.make_default
            a = self[
              genbank  = RuleRegexp[ 'Bio::GenBank',
                /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
              genpept  = RuleRegexp[ 'Bio::GenPept',
                /^LOCUS       .+ aa .+/ ],
              medline  = RuleRegexp[ 'Bio::MEDLINE',
                /^PMID\- [0-9]+$/ ],
              embl     = RuleRegexp[ 'Bio::EMBL',
                /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
              sptr     = RuleRegexp2[ 'Bio::SPTR',
                /^ID   .+\; *PRT\;/,
                /^ID   [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
              prosite  = RuleRegexp[ 'Bio::PROSITE',
                /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
              transfac = RuleRegexp[ 'Bio::TRANSFAC',
                /^AC  [-A-Za-z0-9_\.]+$/ ],
    
              aaindex  = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
                if /^H [-A-Z0-9_\.]+$/ =~ text then
                  if text =~ /^M [rc]/ then
                    Bio::AAindex2
                  elsif text =~ /^I    A\/L/ then
                    Bio::AAindex1
                  else
                    false #fail to determine
                  end
                else
                  nil
                end
              end,
    
              litdb    = RuleRegexp[ 'Bio::LITDB',
                /^CODE        [0-9]+$/ ],
              pathway_module = RuleRegexp[ 'Bio::KEGG::MODULE',
                /^ENTRY       .+ Pathway\s+Module\s*/ ],
              pathway  = RuleRegexp[ 'Bio::KEGG::PATHWAY',
                /^ENTRY       .+ Pathway\s*/ ],
              brite    = RuleRegexp[ 'Bio::KEGG::BRITE',
                /^Entry           [A-Z0-9]+/ ],
              orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
                /^ENTRY       .+ KO\s*/ ],
              drug     = RuleRegexp[ 'Bio::KEGG::DRUG',
                /^ENTRY       .+ Drug\s*/ ],
              glycan   = RuleRegexp[ 'Bio::KEGG::GLYCAN',
                /^ENTRY       .+ Glycan\s*/ ],
              enzyme   = RuleRegexp2[ 'Bio::KEGG::ENZYME',
                /^ENTRY       EC [0-9\.]+$/,
                /^ENTRY       .+ Enzyme\s*/
              ],
              compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
                /^ENTRY       C[A-Za-z0-9\._]+$/,
                /^ENTRY       .+ Compound\s*/
              ],
              reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
                /^ENTRY       R[A-Za-z0-9\._]+$/,
                /^ENTRY       .+ Reaction\s*/
              ],
              genes    = RuleRegexp[ 'Bio::KEGG::GENES',
                /^ENTRY       .+ (CDS|gene|.*RNA|Contig) / ],
              genome   = RuleRegexp[ 'Bio::KEGG::GENOME',
                /^ENTRY       [a-z]+$/ ],
    
              fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
                                    'Bio::FANTOM::MaXML::Sequence') do |text|
                if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                  case $1
                  when 'clusters'
                    Bio::FANTOM::MaXML::Cluster
                  when 'sequences'
                    Bio::FANTOM::MaXML::Sequence
                  else
                    nil #unknown
                  end
                else
                  nil
                end
              end,
    
              pdb = RuleRegexp[ 'Bio::PDB',
                /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
              het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
                /^RESIDUE +.+ +\d+\s*$/ ],
    
              clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
              /^CLUSTAL .*\(.*\).*sequence +alignment/,
              /^CLUSTAL FORMAT for T-COFFEE/ ],
    
              gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
              /^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
    
              gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
              /^!!(N|A)A_SEQUENCE .+/ ],
    
              blastxml = RuleRegexp[ 'Bio::Blast::Report',
                /\<\!DOCTYPE BlastOutput PUBLIC / ],
              wublast  = RuleRegexp[ 'Bio::Blast::WU::Report',
                /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
              wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
                /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
              blast    = RuleRegexp[ 'Bio::Blast::Default::Report',
                /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
              tblast   = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
                /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
              rpsblast   = RuleRegexp[ 'Bio::Blast::RPSBlast::Report',
                /^RPS\-BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
    
              blat   = RuleRegexp[ 'Bio::Blat::Report',
                /^psLayout version \d+/ ],
              spidey = RuleRegexp[ 'Bio::Spidey::Report',
                /^\-\-SPIDEY version .+\-\-$/ ],
              hmmer  = RuleRegexp[ 'Bio::HMMER::Report',
                /^HMMER +\d+\./ ],
              sim4   = RuleRegexp[ 'Bio::Sim4::Report',
                /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
    
              fastq  = RuleRegexp[ 'Bio::Fastq',
                /^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))+/ ],
    
              fastaformat = RuleProc.new('Bio::FastaFormat',
                                         'Bio::NBRF',
                                         'Bio::FastaNumericFormat') do |text|
                if /^>.+$/ =~ text
                  case text
                  when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
                    Bio::NBRF
                  when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
                      Bio::FastaFormat
                  when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
                    Bio::FastaNumericFormat
                  else
                    false
                  end
                else
                  nil
                end
              end
            ]
    
            # dependencies
            # NCBI
            genbank.is_prior_to genpept
            # EMBL/UniProt
            embl.is_prior_to sptr
            sptr.is_prior_to prosite
            prosite.is_prior_to transfac
            # KEGG
            #aaindex.is_prior_to litdb
            #litdb.is_prior_to brite
            pathway_module.is_prior_to pathway
            pathway.is_prior_to brite
            brite.is_prior_to orthology
            orthology.is_prior_to drug
            drug.is_prior_to glycan
            glycan.is_prior_to enzyme
            enzyme.is_prior_to compound
            compound.is_prior_to reaction
            reaction.is_prior_to genes
            genes.is_prior_to genome
            # PDB
            pdb.is_prior_to het
            # BLAST
            wublast.is_prior_to wutblast
            wutblast.is_prior_to blast
            blast.is_prior_to tblast
            # Fastq
            BottomRule.is_prior_to(fastq)
            fastq.is_prior_to(fastaformat)
            # FastaFormat
            BottomRule.is_prior_to(fastaformat)
    
            # for debug
            #debug_first = RuleDebug.new('debug_first')
            #a.add(debug_first)
            #debug_first.is_prior_to(TopRule)
    
            ## for debug
            #debug_last = RuleDebug.new('debug_last')
            #a.add(debug_last)
            #BottomRule.is_prior_to(debug_last)
            #fastaformat.is_prior_to(debug_last)
    
            a.rehash
            return a
          end
          
        end #class AutoDetect
      end #class FlatFile
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/io/flatfile/index.rb0000644000004100000410000010302412200110570020033 0ustar  www-datawww-data# 
    # = bio/io/flatfile/index.rb - OBDA flatfile index 
    # 
    # Copyright:: Copyright (C) 2002
    #             GOTO Naohisa  
    # License:: The Ruby License
    #
    #  $Id: index.rb,v 1.19 2007/04/05 23:35:41 trevor Exp $ 
    #
    # = About Bio::FlatFileIndex
    #
    # Please refer documents of following classes.
    # Classes/modules marked '#' are internal use only.
    #
    # == Classes/modules in index.rb
    # * class  Bio::FlatFileIndex
    # * class  Bio::FlatFileIndex::Results
    # * module Bio::FlatFileIndex::DEBUG
    # * #module Bio::FlatFileIndex::Template
    # * #class  Bio::FlatFileIndex::Template::NameSpace
    # * #class  Bio::FlatFileIndex::FileID
    # * #class  Bio::FlatFileIndex::FileIDs
    # * #module Bio::FlatFileIndex::Flat_1
    # * #class  Bio::FlatFileIndex::Flat_1::Record
    # * #class  Bio::FlatFileIndex::Flat_1::FlatMappingFile
    # * #class  Bio::FlatFileIndex::Flat_1::PrimaryNameSpace
    # * #class  Bio::FlatFileIndex::Flat_1::SecondaryNameSpace
    # * #class  Bio::FlatFileIndex::NameSpaces
    # * #class  Bio::FlatFileIndex::DataBank
    #
    # == Classes/modules in indexer.rb
    # * module Bio::FlatFileIndex::Indexer
    # * #class  Bio::FlatFileIndex::Indexer::NameSpace
    # * #class  Bio::FlatFileIndex::Indexer::NameSpaces
    # * #module Bio::FlatFileIndex::Indexer::Parser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::TemplateParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::GenBankParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::GenPeptParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::EMBLParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::SPTRParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::FastaFormatParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::MaXMLSequenceParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::MaXMLClusterParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::BlastDefaultParser
    # * #class  Bio::FlatFileIndex::Indexer::Parser::PDBChemicalComponentParser
    #
    # == Classes/modules in bdb.rb
    # * #module Bio::FlatFileIndex::BDBDefault
    # * #class  Bio::FlatFileIndex::BDBWrapper
    # * #module Bio::FlatFileIndex::BDB_1
    # * #class  Bio::FlatFileIndex::BDB_1::BDBMappingFile
    # * #class  Bio::FlatFileIndex::BDB_1::PrimaryNameSpace
    # * #class  Bio::FlatFileIndex::BDB_1::SecondaryNameSpace
    #
    # = References
    # * (())
    # * (())
    #
    
    require 'bio/io/flatfile/indexer'
    
    module Bio
    
    
      # Bio::FlatFileIndex is a class for OBDA flatfile index.
      class FlatFileIndex
    
        autoload :Indexer,    'bio/io/flatfile/indexer'
        autoload :BDBdefault, 'bio/io/flatfile/bdb'
        autoload :BDBwrapper, 'bio/io/flatfile/bdb'
        autoload :BDB_1,      'bio/io/flatfile/bdb'
    
        # magic string for flat/1 index
        MAGIC_FLAT = 'flat/1'
    
        # magic string for BerkeleyDB/1 index
        MAGIC_BDB  = 'BerkeleyDB/1'
    
        #########################################################
    
        # Opens existing databank. Databank is a directory which contains
        # indexed files and configuration files. The type of the databank
        # (flat or BerkeleyDB) are determined automatically.
        #
        # If block is given, the databank object is passed to the block.
        # The databank will be automatically closed when the block terminates.
        #
        def self.open(name)
          if block_given? then
            begin
              i = self.new(name)
              r = yield i
            ensure
              if i then
                begin
                  i.close
                rescue IOError
                end
              end
            end
          else
            r = self.new(name)
          end
          r
        end
    
        # Opens existing databank. Databank is a directory which contains
        # indexed files and configuration files. The type of the databank
        # (flat or BerkeleyDB) are determined automatically.
        #
        # Unlike +FlatFileIndex.open+, block is not allowed.
        #
        def initialize(name)
          @db = DataBank.open(name)
        end
    
        # common interface defined in registry.rb
        # Searching databank and returns entry (or entries) as a string.
        # Multiple entries (contatinated to one string) may be returned.
        # Returns empty string if not found.
        #
        def get_by_id(key)
          search(key).to_s
        end
    
        #--
        # original methods
        #++
    
        # Closes the databank.
        # Returns nil.
        def close
          check_closed?
          @db.close
          @db = nil
        end
    
        # Returns true if already closed. Otherwise, returns false.
        def closed?
          if @db then
            false
          else
            true
          end
        end
    
        # Set default namespaces.
        # default_namespaces = nil
        # means all namespaces in the databank.
        #
        # default_namespaces= [ str1, str2, ... ]
        # means set default namespeces to str1, str2, ...
        #
        # Default namespaces specified in this method only affect 
        # #get_by_id, #search, and #include? methods.
        #
        # Default of default namespaces is nil (that is, all namespaces
        # are search destinations by default).
        #
        def default_namespaces=(names)
          if names then
            @names = []
            names.each { |x| @names.push(x.dup) }
          else
            @names = nil
          end
        end
    
        # Returns default namespaces.
        # Returns an array of strings or nil.
        # nil means all namespaces.
        def default_namespaces
          @names
        end
    
        # Searching databank and returns a Bio::FlatFileIndex::Results object.
        def search(key)
          check_closed?
          if @names then
            @db.search_namespaces(key, *@names)
          else
            @db.search_all(key)
          end
        end
    
        # Searching only specified namespeces.
        # Returns a Bio::FlatFileIndex::Results object.
        #
        def search_namespaces(key, *names)
          check_closed?
          @db.search_namespaces(key, *names)
        end
    
        # Searching only primary namespece.
        # Returns a Bio::FlatFileIndex::Results object.
        #
        def search_primary(key)
          check_closed?
          @db.search_primary(key)
        end
    
        # Searching databank.
        # If some entries are found, returns an array of
        # unique IDs (primary identifiers).
        # If not found anything, returns nil.
        #
        # This method is useful when search result is very large and
        # #search method is very slow.
        #
        def include?(key)
          check_closed?
          if @names then
            r = @db.search_namespaces_get_unique_id(key, *@names)
          else
            r = @db.search_all_get_unique_id(key)
          end
          if r.empty? then
            nil
          else
            r
          end
        end
    
        # Same as #include?, but serching only specified namespaces.
        #
        def include_in_namespaces?(key, *names)
          check_closed?
          r = @db.search_namespaces_get_unique_id(key, *names)
          if r.empty? then
            nil
          else
            r
          end
        end
    
        # Same as #include?, but serching only primary namespace.
        #
        def include_in_primary?(key)
          check_closed?
          r = @db.search_primary_get_unique_id(key)
          if r.empty? then
            nil
          else
            r
          end
        end
    
        # Returns names of namespaces defined in the databank.
        # (example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
        #
        def namespaces
          check_closed?
          r = secondary_namespaces
          r.unshift primary_namespace
          r
        end
    
        # Returns name of primary namespace as a string.
        def primary_namespace
          check_closed?
          @db.primary.name
        end
    
        # Returns names of secondary namespaces as an array of strings.
        def secondary_namespaces
          check_closed?
          @db.secondary.names
        end
    
        # Check consistency between the databank(index) and original flat files.
        #
        # If the original flat files are changed after creating
        # the databank, raises RuntimeError.
        #
        # Note that this check only compares file sizes as
        # described in the OBDA specification.
        #
        def check_consistency
          check_closed?
          @db.check_consistency
        end
    
        # If true is given, consistency checks will be performed every time
        # accessing flatfiles. If nil/false, no checks are performed.
        #
        # By default, always_check_consistency is true.
        #
        def always_check_consistency=(bool)
          @db.always_check=(bool)
        end
    
        # If true, consistency checks will be performed every time
        # accessing flatfiles. If nil/false, no checks are performed.
        #
        # By default, always_check_consistency is true.
        #
        def always_check_consistency(bool)
          @db.always_check
        end
    
        #--
        # private methods
        #++
        
        # If the databank is closed, raises IOError.
        def check_closed?
          @db or raise IOError, 'closed databank'
        end
        private :check_closed?
    
        #--
        #########################################################
        #++
    
        # Results stores search results created by
        # Bio::FlatFileIndex methods.
        #
        # Currently, this class inherits Hash, but internal
        # structure of this class may be changed anytime.
        # Only using methods described below are strongly recomended.
        #
        class Results < Hash
    
          # Add search results.
          # "a + b" means "a OR b".
          # * Example
          #    # I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
          #    db = Bio::FlatFIleIndex.new(location)
          #    a1 = db.search('ADH_IRON_1')
          #    a2 = db.search('ADH_IRON_2')
          #    # a1 and a2 are Bio::FlatFileIndex::Results objects.
          #    print a1 + a2
          #
          def +(a)
            raise 'argument must be Results class' unless a.is_a?(self.class)
            res = self.dup
            res.update(a)
            res
          end
    
          # Returns set intersection of results.
          # "a * b" means "a AND b".
          # * Example
          #    # I want to search 'HIS_KIN' AND 'human'
          #    db = Bio::FlatFIleIndex.new(location)
          #    hk = db.search('HIS_KIN')
          #    hu = db.search('human')
          #    # hk and hu are Bio::FlatFileIndex::Results objects.
          #    print hk * hu
          #
          def *(a)
            raise 'argument must be Results class' unless a.is_a?(self.class)
            res = self.class.new
            a.each_key { |x| res.store(x, a[x]) if self[x] }
            res
          end
    
          # Returns a string. (concatinated if multiple results exists).
          # Same as to_a.join('').
          #
          def to_s
            self.values.join
          end
    
          #--
          #alias each_orig each
          #++
    
          # alias for each_value.
          alias each each_value
    
          # Iterates over each result (string).
          # Same as to_a.each.
          def each(&x) #:yields: str
            each_value(&x)
          end if false #dummy for RDoc
    
          #--
          #alias to_a_orig to_a
          #++
    
          # alias for to_a.
          alias to_a values
    
          # Returns an array of strings.
          # If no search results are exist, returns an empty array.
          #
          def to_a; values; end if false #dummy for RDoc
    
          # Returns number of results.
          # Same as to_a.size.
          def size; end if false #dummy for RDoc
    
        end #class Results
    
        #########################################################
    
        # Module for output debug messages.
        # Default setting: If $DEBUG or $VERBOSE is true, output debug
        # messages to $stderr; Otherwise, don't output messages.
        #
        module DEBUG
          @@out = $stderr
          @@flag = nil
    
          # Set debug messages output destination.
          # If true is given, outputs to $stderr.
          # If nil is given, outputs nothing.
          # This method affects ALL of FlatFileIndex related objects/methods.
          #
          def self.out=(io)
            if io then
              @@out = io
              @@out = $stderr if io == true
              @@flag = true
            else
              @@out = nil
              @@flag = nil
            end
            @@out
          end
    
          # get current debug messeages output destination
          def self.out
            @@out
          end
    
          # prints debug messages
          def self.print(*arg)
            @@flag = true if $DEBUG or $VERBOSE
            @@out.print(*arg) if @@out and @@flag
          end
        end #module DEBUG
    
        #########################################################
    
        # Templates
        #
        # Internal use only.
        module Template
    
          # templates of namespace
          #
          # Internal use only.
          class NameSpace
            def filename
              # should be redifined in child class
              raise NotImplementedError, "should be redefined in child class"
            end
    
            def mapping(filename)
              # should be redifined in child class
              raise NotImplementedError, "should be redefined in child class"
              #Flat_1::FlatMappingFile.new(filename)
            end
    
            def initialize(dbname, name)
              @dbname = dbname
              @name = name.dup
              @name.freeze
              @file = mapping(filename)
            end
            attr_reader :dbname, :name, :file
    
            def search(key)
              @file.open
              @file.search(key)
            end
    
            def close
              @file.close
            end
    
            def include?(key)
              r = search(key)
              unless r.empty? then
                key
              else
                nil
              end
            end
          end #class NameSpace
        end #module Template
    
        # FileID class.
        #
        # Internal use only.
        class FileID
          def self.new_from_string(str)
            a = str.split("\t", 2)
            a[1] = a[1].to_i if a[1]
            self.new(a[0], a[1])
          end
    
          def initialize(filename, filesize = nil)
            @filename = filename
            @filesize = filesize
            @io = nil
          end
          attr_reader :filename, :filesize
    
          def check
            begin
              fsize = File.size(@filename)
              r = ( fsize == @filesize)
            rescue Errno::ENOENT
              fsize = -1
              r = nil
            end
            DEBUG.print "FileID: File.size(#{@filename.inspect}) = ",
              fsize, (r ? ' == ' : ' != ') , @filesize,
              (r ? '' : ' bad!'), "\n"
            r
          end
    
          def recalc
            @filesize = File.size(@filename)
          end
    
          def to_s(i = nil)
            if i then
              str = "fileid_#{i}\t"
            else
              str = ''
            end
            str << "#{@filename}\t#{@filesize}"
            str
          end
    
          def open
            unless @io then
              DEBUG.print "FileID: open #{@filename}\n"
              @io = File.open(@filename, 'rb')
              true
            else
              nil
            end
          end
    
          def close
            if @io then
              DEBUG.print "FileID: close #{@filename}\n"
              @io.close
              @io = nil
              nil
            else
              true
            end
          end
    
          def seek(*arg)
            @io.seek(*arg)
          end
    
          def read(size)
            @io.read(size)
          end
    
          def get(pos, length)
            open
            seek(pos, IO::SEEK_SET)
            data = read(length)
            close
            data
          end
        end #class FileID
    
        # FileIDs class.
        #
        # Internal use only.
        class FileIDs < Array
          def initialize(prefix, hash)
            @hash = hash
            @prefix = prefix
          end
    
          def [](n)
            r = super(n)
            if r then
              r
            else
              data = @hash["#{@prefix}#{n}"]
              if data then
                self[n] = data
              end
              super(n)
            end
          end
    
          def []=(n, data)
            if data.is_a?(FileID) then
              super(n, data)
            elsif data then
              super(n, FileID.new_from_string(data))
            else
              # data is nil
              super(n, nil)
            end
            self[n]
          end
    
          def add(*arg)
            arg.each do |filename|
              self << FileID.new(filename)
            end
          end
    
          def cache_all
            a = @hash.keys.collect do |k|
              if k =~ /\A#{Regexp.escape(@prefix)}(\d+)/ then
                $1.to_i
              else
                nil
              end
            end
            a.compact!
            a.each do |i|
              self[i]
            end
            a
          end
    
          def each
            (0...self.size).each do |i|
              x = self[i]
              yield(x) if x
            end
            self
          end
    
          def each_with_index
            (0...self.size).each do |i|
              x = self[i]
              yield(x, i) if x
            end
            self
          end
    
          def keys
            self.cache_all
            a = []
            (0...self.size).each do |i|
              a << i if self[i]
            end
            a
          end
    
          def filenames
            self.cache_all
            a = []
            self.each do |x|
              a << x.filename
            end
            a
          end
    
          def check_all
            self.cache_all
            r = true
            self.each do |x|
              r = x.check
              break unless r
            end
            r
          end
          alias check check_all
    
          def close_all
            self.each do |x|
              x.close
            end
            nil
          end
          alias close close_all
    
          def recalc_all
            self.cache_all
            self.each do |x|
              x.recalc
            end
            true
          end
          alias recalc recalc_all
    
        end #class FileIDs
    
        # module for flat/1 databank
        #
        # Internal use only.
        module Flat_1
    
          # Record class.
          #
          # Internal use only.
          class Record
            def initialize(str, size = nil)
              a = str.split("\t")
              a.each { |x| x.to_s.gsub!(/[\000 ]+\z/, '') }
              @key = a.shift.to_s
              @val = a
              @size = (size or str.length)
              #DEBUG.print "key=#{@key.inspect},val=#{@val.inspect},size=#{@size}\n"
            end
            attr_reader :key, :val, :size
    
            def to_s
              self.class.to_string(@size, @key, @val)
            end
    
            def self.to_string(size, key, val)
              sprintf("%-*s", size, key + "\t" + val.join("\t"))
            end
    
            def self.create(size, key, val)
              self.new(self.to_string(size, key, val))
            end
    
            def ==(x)
              self.to_s == x.to_s
            end
          end #class Record
    
          # FlatMappingFile class.
          #
          # Internal use only.
          class FlatMappingFile
            @@recsize_width = 4
            @@recsize_regex = /\A\d{4}\z/
    
            def self.open(*arg)
              self.new(*arg)
            end
    
            def initialize(filename, mode = 'rb')
              @filename = filename
              @mode = mode
              @file = nil
              #@file = File.open(filename, mode)
              @record_size = nil
              @records = nil
            end
            attr_accessor :mode
            attr_reader :filename
            
            def open
              unless @file then
                DEBUG.print "FlatMappingFile: open #{@filename}\n"
                @file = File.open(@filename, @mode)
                true
              else
                nil
              end
            end
    
            def close
              if @file then
                DEBUG.print "FlatMappingFile: close #{@filename}\n"
                @file.close
                @file = nil
              end
              nil
            end
    
            def record_size
              unless @record_size then
                open
                @file.seek(0, IO::SEEK_SET)
                s = @file.read(@@recsize_width)
                raise 'strange record size' unless s =~ @@recsize_regex
                @record_size = s.to_i
                DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n"
              end
              @record_size
            end
    
            def get_record(i)
              rs = record_size
              seek(i)
              str = @file.read(rs)
              #DEBUG.print "get_record(#{i})=#{str.inspect}\n"
              str
            end
    
            def seek(i)
              rs = record_size
              @file.seek(@@recsize_width + rs * i)
            end
    
            def records
              unless @records then
                rs = record_size
                @records = (@file.stat.size - @@recsize_width) / rs
                DEBUG.print "FlatMappingFile: records: #{@records}\n"
              end
              @records
            end
            alias size records
    
            # methods for writing file
            def write_record(str)
              rs = record_size
              rec = sprintf("%-*s", rs, str)[0..rs]
              @file.write(rec)
            end
    
            def add_record(str)
              n = records
              rs = record_size
              @file.seek(0, IO::SEEK_END)
              write_record(str)
              @records += 1
            end
    
            def put_record(i, str)
              n = records
              rs = record_size
              if i >= n then
                @file.seek(0, IO::SEEK_END)
                @file.write(sprintf("%-*s", rs, '') * (i - n))
                @records = i + 1
              else
                seek(i)
              end
              write_record(str)
            end
    
            def init(rs)
              unless 0 < rs and rs < 10 ** @@recsize_width then
                raise 'record size out of range'
              end
              open
              @record_size = rs
              str = sprintf("%0*d", @@recsize_width, rs)
              @file.truncate(0)
              @file.seek(0, IO::SEEK_SET)
              @file.write(str)
              @records = 0
            end
    
            # export/import/edit data
            def each
              n = records
              seek(0)
              (0...n).each do |i|
                yield Record.new(get_record(i))
              end
              self
            end
    
            def export_tsv(stream)
              self.each do |x|
                stream << "#{x.to_s}\n"
              end
              stream
            end
    
            def init_with_sorted_tsv_file(filename, flag_primary = false)
              rec_size = 1
              f = File.open(filename)
              f.each do |y|
                rec_size = y.chomp.length if rec_size < y.chomp.length
              end
              self.init(rec_size)
    
              prev = nil
              f.rewind
              if flag_primary then
                f.each do |y|
                  x = Record.new(y.chomp, rec_size)
                  if prev then
                    if x.key == prev.key
                      DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n"
                    else
                      self.add_record(prev.to_s)
                    end
                  end
                  prev = x
                end
                self.add_record(prev.to_s) if prev
              else
                f.each do |y|
                  x = Record.new(y.chomp, rec_size)
                  self.add_record(x.to_s) if x != prev
                  prev = x
                end
              end
              f.close
              self
            end
    
            def self.external_sort_proc(sort_program = [ '/usr/bin/env', 
                                                         'LC_ALL=C',
                                                         '/usr/bin/sort' ])
              Proc.new do |out, in1, *files|
                cmd = sort_program + [ '-o', out, in1, *files ]
                system(*cmd)
              end
            end
    
            def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 
                                                               'LC_ALL=C',
                                                               '/usr/bin/sort' ])
              Proc.new do |out, in1, *files|
                # (in1 may be sorted)
                tf_all = []
                tfn_all = []
                files.each do |fn|
                  tf = Tempfile.open('sort')
                  tf.close(false)
                  cmd = sort_program + [ '-o', tf.path, fn ]
                  system(*cmd)
                  tf_all << tf
                  tfn_all << tf.path
                end
                cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ]
                system(*cmd_fin)
                tf_all.each do |tf|
                  tf.close(true)
                end
              end
            end
    
            def self.external_merge_proc(sort_program =  [ '/usr/bin/env', 
                                                           'LC_ALL=C',
                                                           '/usr/bin/sort' ])
              Proc.new do |out, in1, *files|
                # files (and in1) must be sorted
                cmd = sort_program + [ '-m', '-o', out, in1, *files ]
                system(*cmd)
              end
            end
    
            def self.internal_sort_proc
              Proc.new do |out, in1, *files|
                a = IO.readlines(in1)
                files.each do |fn|
                  IO.foreach(fn) do |x|
                    a << x
                  end
                end
                a.sort!
                of = File.open(out, 'w')
                a.each { |x| of << x }
                of.close
              end
            end
    
            def import_tsv_files(flag_primary, mode, sort_proc, *files)
              require 'tempfile'
    
              tmpfile1 = Tempfile.open('flat')
              self.export_tsv(tmpfile1) unless mode == :new
              tmpfile1.close(false)
    
              tmpfile0 = Tempfile.open('sorted')
              tmpfile0.close(false)
    
              sort_proc.call(tmpfile0.path, tmpfile1.path, *files)
    
              tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+')
              tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary)
              tmpmap.close
              self.close
    
              begin
                File.rename(self.filename, self.filename + ".#{$$}.bak~")
              rescue Errno::ENOENT
              end
              File.rename(tmpmap.filename, self.filename)
              begin
                File.delete(self.filename + ".#{$$}.bak~")
              rescue Errno::ENOENT
              end
    
              tmpfile0.close(true)
              tmpfile1.close(true)
              self
            end
    
    
            # methods for searching
            def search(key)
              n = records
              return [] if n <= 0
              i = n / 2
              i_prev = nil
              DEBUG.print "binary search starts...\n"
              begin
                rec = Record.new(get_record(i))
                i_prev = i
                if key < rec.key then
                  n = i
                  i = i / 2
                elsif key > rec.key then
                  i = (i + n) / 2
                else # key == rec.key
                  result = [ rec.val ]
                  j = i - 1
                  while j >= 0 and
                      (rec = Record.new(get_record(j))).key == key
                    result << rec.val
                    j = j - 1
                  end
                  result.reverse!
                  j = i + 1
                  while j < n and
                      (rec = Record.new(get_record(j))).key == key
                    result << rec.val
                    j = j + 1
                  end
                  DEBUG.print "#{result.size} hits found!!\n"
                  return result
                end
              end until i_prev == i
              DEBUG.print "no hits found\n"
              #nil
              []
            end
          end #class FlatMappingFile
    
          # primary name space
          #
          # Internal use only.
          class PrimaryNameSpace < Template::NameSpace
            def mapping(filename)
              FlatMappingFile.new(filename)
            end
            def filename
              File.join(dbname, "key_#{name}.key")
            end
          end #class PrimaryNameSpace
    
          # secondary name space
          #
          # Internal use only.
          class SecondaryNameSpace < Template::NameSpace
            def mapping(filename)
              FlatMappingFile.new(filename)
            end
            def filename
              File.join(dbname, "id_#{name}.index")
            end
            def search(key)
              r = super(key)
              file.close
              r.flatten!
              r
            end
          end #class SecondaryNameSpace
        end #module Flat_1
    
        # namespaces
        #
        # Internal use only.
        class NameSpaces < Hash
          def initialize(dbname, nsclass, arg)
            @dbname = dbname
            @nsclass = nsclass
            if arg.is_a?(String) then
              a = arg.split("\t")
            else
              a = arg
            end
            a.each do |x|
              self[x] = @nsclass.new(@dbname, x)
            end
            self
          end
    
          def each_names
            self.names.each do |x|
              yield x
            end
          end
    
          def each_files
            self.values.each do |x|
              yield x
            end
          end
    
          def names
            keys
          end
    
          def close_all
            values.each { |x| x.file.close }
          end
          alias close close_all
    
          def search(key)
            r = []
            values.each do |ns|
              r.concat ns.search(key)
            end
            r.sort!
            r.uniq!
            r
          end
    
          def search_names(key, *names)
            r = []
            names.each do |x|
              ns = self[x]
              raise "undefined namespace #{x.inspect}" unless ns
              r.concat ns.search(key)
            end
            r
          end
    
          def to_s
            names.join("\t")
          end
        end #class NameSpaces
    
        # databank
        #
        # Internal use only.
        class DataBank
          def self.file2hash(fileobj)
            hash = {}
            fileobj.each do |line|
              line.chomp!
              a = line.split("\t", 2)
              hash[a[0]] = a[1]
            end
            hash
          end
          private_class_method :file2hash
    
          def self.filename(dbname)
            File.join(dbname, 'config.dat')
          end
    
          def self.read(name, mode = 'rb', *bdbarg)
            f = File.open(filename(name), mode)
            hash = file2hash(f)
            f.close
            db = self.new(name, nil, hash)
            db.bdb_open(*bdbarg)
            db
          end
    
          def self.open(*arg)
            self.read(*arg)
          end
    
          def initialize(name, idx_type = nil, hash = {})
            @dbname = name.dup
            @dbname.freeze
            @bdb = nil
    
            @always_check = true
            self.index_type = (hash['index'] or idx_type)
    
            if @bdb then
              @config = BDBwrapper.new(@dbname, 'config')
              @bdb_fileids = BDBwrapper.new(@dbname, 'fileids')
              @nsclass_pri = BDB_1::PrimaryNameSpace
              @nsclass_sec = BDB_1::SecondaryNameSpace
            else
              @config = hash
              @nsclass_pri = Flat_1::PrimaryNameSpace
              @nsclass_sec = Flat_1::SecondaryNameSpace
            end
            true
          end
    
          attr_reader :dbname, :index_type
    
          def index_type=(str)
            case str
            when MAGIC_BDB
              @index_type = MAGIC_BDB
              @bdb = true
              unless defined?(BDB)
                raise RuntimeError, "Berkeley DB support not found"
              end
            when MAGIC_FLAT, '', nil, false
              @index_type = MAGIC_FLAT
              @bdb = false
            else
              raise 'unknown or unsupported index type'
            end
          end
    
          def to_s
            a = ""
            a << "index\t#{@index_type}\n"
    
            unless @bdb then
              a << "format\t#{@format}\n"
              @fileids.each_with_index do |x, i|
                a << "#{x.to_s(i)}\n"
              end
              a << "primary_namespace\t#{@primary.name}\n"
              a << "secondary_namespaces\t"
              a << @secondary.names.join("\t")
              a << "\n"
            end
            a
          end
    
          def bdb_open(*bdbarg)
            if @bdb then
              @config.close
              @config.open(*bdbarg)
              @bdb_fileids.close
              @bdb_fileids.open(*bdbarg)
              true
            else
              nil
            end
          end
    
          def write(mode = 'wb', *bdbarg)
            unless FileTest.directory?(@dbname) then
              Dir.mkdir(@dbname)
            end
            f = File.open(self.class.filename(@dbname), mode)
            f.write self.to_s
            f.close
    
            if @bdb then
              bdb_open(*bdbarg)
              @config['format'] = format
              @config['primary_namespace'] = @primary.name
              @config['secondary_namespaces'] = @secondary.names.join("\t")
              @bdb_fileids.writeback_array('', fileids, *bdbarg)
            end
            true
          end
    
          def close
            DEBUG.print "DataBank: close #{@dbname}\n"
            primary.close
            secondary.close
            fileids.close
            if @bdb then
              @config.close
              @bdb_fileids.close
            end
            nil
          end
    
          ##parameters
          def primary
            unless @primary then
              self.primary = @config['primary_namespace']
            end
            @primary
          end
    
          def primary=(pri_name)
            if !pri_name or pri_name.empty? then
              pri_name = 'UNIQUE'
            end
            @primary = @nsclass_pri.new(@dbname, pri_name)
            @primary
          end
    
          def secondary
            unless @secondary then
              self.secondary = @config['secondary_namespaces']
            end
            @secondary
          end
    
          def secondary=(sec_names)
            if !sec_names then
              sec_names = []
            end
            @secondary = NameSpaces.new(@dbname, @nsclass_sec, sec_names)
            @secondary
          end
    
          def format=(str)
            @format = str.to_s.dup
          end
    
          def format
            unless @format then
              self.format = @config['format']
            end
            @format
          end
    
          def fileids
            unless @fileids then
              init_fileids
            end
            @fileids
          end
    
          def init_fileids
            if @bdb then
              @fileids = FileIDs.new('', @bdb_fileids)
            else
              @fileids = FileIDs.new('fileid_', @config)
            end
            @fileids
          end
    
          # high level methods
          def always_check=(bool)
            if bool then
              @always_check = true
            else
              @always_check = false
            end
          end
          attr_reader :always_check
    
          def get_flatfile_data(f, pos, length)
            fi = fileids[f.to_i]
            if @always_check then
              raise "flatfile #{fi.filename.inspect} may be modified" unless fi.check
            end
            fi.get(pos.to_i, length.to_i)
          end
    
          def search_all_get_unique_id(key)
            s = secondary.search(key)
            p = primary.include?(key)
            s.push p if p
            s.sort!
            s.uniq!
            s
          end
    
          def search_primary(*arg)
            r = Results.new
            arg.each do |x|
              a = primary.search(x)
              # a is empty or a.size==1 because primary key must be unique
              r.store(x, get_flatfile_data(*a[0])) unless a.empty?
            end
            r
          end
    
          def search_all(key)
            s = search_all_get_unique_id(key)
            search_primary(*s)
          end
    
          def search_primary_get_unique_id(key)
            s = []
            p = primary.include?(key)
            s.push p if p
            s
          end
    
          def search_namespaces_get_unique_id(key, *names)
            if names.include?(primary.name) then
              n2 = names.dup
              n2.delete(primary.name)
              p = primary.include?(key)
            else
              n2 = names
              p = nil
            end
            s = secondary.search_names(key, *n2)
            s.push p if p
            s.sort!
            s.uniq!
            s
          end
    
          def search_namespaces(key, *names)
            s = search_namespaces_get_unique_id(key, *names)
            search_primary(*s)
          end
    
          def check_consistency
            fileids.check_all
          end
        end #class DataBank
    
      end #class FlatFileIndex
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/io/flatfile.rb0000644000004100000410000003247512200110570016737 0ustar  www-datawww-data#
    # = bio/io/flatfile.rb - flatfile access wrapper class
    #
    #   Copyright (C) 2001-2006 Naohisa Goto 
    #
    # License:: The Ruby License
    #
    #  $Id:$
    #
    #
    # Bio::FlatFile is a helper and wrapper class to read a biological data file.
    # It acts like a IO object.
    # It can automatically detect data format, and users do not need to tell
    # the class what the data is.
    #
    
    module Bio
    
      # Bio::FlatFile is a helper and wrapper class to read a biological data file.
      # It acts like a IO object.
      # It can automatically detect data format, and users do not need to tell
      # the class what the data is.
      class FlatFile
    
        autoload :AutoDetect,          'bio/io/flatfile/autodetection'
        autoload :Splitter,            'bio/io/flatfile/splitter'
        autoload :BufferedInputStream, 'bio/io/flatfile/buffer'
    
        include Enumerable
    
        #
        #   Bio::FlatFile.open(file, *arg)
        #   Bio::FlatFile.open(dbclass, file, *arg)
        #
        # Creates a new Bio::FlatFile object to read a file or a stream
        # which contains _dbclass_ data.
        #
        # _dbclass_ should be a class (or module) or nil.
        # e.g. Bio::GenBank, Bio::FastaFormat.
        #
        # If _file_ is a filename (which doesn't have gets method),
        # the method opens a local file named _file_
        # with File.open(filename, *arg).
        #
        # When _dbclass_ is omitted or nil is given to _dbclass_,
        # the method tries to determine database class
        # (file format) automatically.
        # When it fails to determine, dbclass is set to nil
        # and FlatFile#next_entry would fail.
        # You can still set dbclass using FlatFile#dbclass= method.
        #
        # * Example 1
        #     Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq")
        # * Example 2
        #     Bio::FlatFile.open(nil, "embl/est_hum17.dat")
        # * Example 3
        #     Bio::FlatFile.open("genbank/gbest40.seq")
        #
        # * Example 4
        #     Bio::FlatFile.open(Bio::GenBank, $stdin)
        #
        # If it is called with a block, the block will be executed with
        # a new Bio::FlatFile object. If filename is given,
        # the file is automatically closed when leaving the block.
        #
        # * Example 5
        #     Bio::FlatFile.open(nil, 'test4.fst') do |ff|
        #         ff.each { |e| print e.definition, "\n" }
        #     end
        #
        # * Example 6
        #     Bio::FlatFile.open('test4.fst') do |ff|
        #         ff.each { |e| print e.definition, "\n" }
        #     end
        #
        # Compatibility Note:
        # *arg is completely passed to the File.open
        # and you cannot specify ":raw => true" or ":raw => false".
        #
        def self.open(*arg, &block)
          # FlatFile.open(dbclass, file, mode, perm)
          # FlatFile.open(file, mode, perm)
          if arg.size <= 0
            raise ArgumentError, 'wrong number of arguments (0 for 1)'
          end
          x = arg.shift
          if x.is_a?(Module) then
            # FlatFile.open(dbclass, filename_or_io, ...)
            dbclass = x
          elsif x.nil? then
            # FlatFile.open(nil, filename_or_io, ...)
            dbclass = nil
          else
            # FlatFile.open(filename, ...)
            dbclass = nil
            arg.unshift(x)
          end
          if arg.size <= 0
            raise ArgumentError, 'wrong number of arguments (1 for 2)'
          end
          file = arg.shift
          # check if file is filename or IO object
          unless file.respond_to?(:gets)
            # 'file' is a filename
            _open_file(dbclass, file, *arg, &block)
          else
            # 'file' is a IO object
            ff = self.new(dbclass, file)
            block_given? ? (yield ff) : ff
          end
        end
    
        # Same as Bio::FlatFile.open(nil, filename_or_stream, mode, perm, options).
        #
        # * Example 1
        #    Bio::FlatFile.auto(ARGF)
        # * Example 2
        #    Bio::FlatFile.auto("embl/est_hum17.dat")
        # * Example 3
        #    Bio::FlatFile.auto(IO.popen("gzip -dc nc1101.flat.gz"))
        #
        def self.auto(*arg, &block)
          self.open(nil, *arg, &block)
        end
    
        # Same as FlatFile.auto(filename_or_stream, *arg).to_a
        #
        # (This method might be OBSOLETED in the future.)
        def self.to_a(*arg)
          self.auto(*arg) do |ff|
            raise 'cannot determine file format' unless ff.dbclass
            ff.to_a
          end
        end
    
        # Same as FlatFile.auto(filename, *arg),
        # except that it only accept filename and doesn't accept IO object.
        # File format is automatically determined.
        #
        # It can accept a block.
        # If a block is given, it returns the block's return value.
        # Otherwise, it returns a new FlatFile object.
        #
        def self.open_file(filename, *arg)
          _open_file(nil, filename, *arg)
        end
    
        # Same as FlatFile.open(dbclass, filename, *arg),
        # except that it only accept filename and doesn't accept IO object.
        #
        # It can accept a block.
        # If a block is given, it returns the block's return value.
        # Otherwise, it returns a new FlatFile object.
        #
        def self._open_file(dbclass, filename, *arg)
          if block_given? then
            BufferedInputStream.open_file(filename, *arg) do |stream|
              yield self.new(dbclass, stream)
            end
          else
            stream = BufferedInputStream.open_file(filename, *arg)
            self.new(dbclass, stream)
          end
        end
        private_class_method :_open_file
    
        # Opens URI specified as _uri_.
        # _uri_ must be a String or URI object.
        # *arg is passed to OpenURI.open_uri or URI#open.
        #
        # Like FlatFile#open, it can accept a block.
        #
        # Note that you MUST explicitly require 'open-uri'.
        # Because open-uri.rb modifies existing class,
        # it isn't required by default.
        # 
        def self.open_uri(uri, *arg)
          if block_given? then
            BufferedInputStream.open_uri(uri, *arg) do |stream|
              yield self.new(nil, stream)
            end
          else
            stream = BufferedInputStream.open_uri(uri, *arg)
            self.new(nil, stream)
          end
        end
    
        # Executes the block for every entry in the stream.
        # Same as FlatFile.open(*arg) { |ff| ff.each { |entry| ... }}.
        # 
        # * Example
        #     Bio::FlatFile.foreach('test.fst') { |e| puts e.definition }
        #
        def self.foreach(*arg)
          self.open(*arg) do |flatfileobj|
            flatfileobj.each do |entry|
              yield entry
            end
          end
        end
    
        # Same as FlatFile.open, except that 'stream' should be a opened
        # stream object (IO, File, ..., who have the 'gets' method).
        #
        # * Example 1
        #    Bio::FlatFile.new(Bio::GenBank, ARGF)
        # * Example 2
        #    Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz"))
        #
        # Compatibility Note:
        # Now, you cannot specify ":raw => true" or ":raw => false".
        # Below styles are DEPRECATED.
        #
        # * Example 3 (deprecated)
        #    # Bio::FlatFile.new(nil, $stdin, :raw=>true) # => ERROR
        #    # Please rewrite as below.
        #    ff = Bio::FlatFile.new(nil, $stdin)
        #    ff.raw = true
        # * Example 3 in old style (deprecated)
        #    # Bio::FlatFile.new(nil, $stdin, true) # => ERROR
        #    # Please rewrite as below.
        #    ff = Bio::FlatFile.new(nil, $stdin)
        #    ff.raw = true
        #
        def initialize(dbclass, stream)
          # 2nd arg: IO object
          if stream.kind_of?(BufferedInputStream)
            @stream = stream
          else
            @stream = BufferedInputStream.for_io(stream)
          end
          # 1st arg: database class (or file format autodetection)
          if dbclass then
    	self.dbclass = dbclass
          else
    	autodetect
          end
          #
          @skip_leader_mode = :firsttime
          @firsttime_flag = true
          # default raw mode is false
          self.raw = false
        end
    
        # The mode how to skip leader of the data.
        # :firsttime :: (DEFAULT) only head of file (= first time to read)
        # :everytime :: everytime to read entry
        # nil :: never skip
        attr_accessor :skip_leader_mode
    
        # (DEPRECATED) IO object in the flatfile object.
        #
        # Compatibility Note: Bio::FlatFile#io is deprecated.
        # Please use Bio::FlatFile#to_io instead.
        def io
          warn "Bio::FlatFile#io is deprecated."
          @stream.to_io
        end
    
        # IO object in the flatfile object.
        #
        # Compatibility Note: Bio::FlatFile#io is deprecated.
        def to_io
          @stream.to_io
        end
    
        # Pathname, filename or URI (or nil).
        def path
          @stream.path
        end
    
        # Exception class to be raised when data format hasn't been specified.
        class UnknownDataFormatError < IOError
        end
    
        # Get next entry.
        def next_entry
          raise UnknownDataFormatError, 
          'file format auto-detection failed?' unless @dbclass
          if @skip_leader_mode and
              ((@firsttime_flag and @skip_leader_mode == :firsttime) or
                 @skip_leader_mode == :everytime)
            @splitter.skip_leader
          end
          if raw then
            r = @splitter.get_entry
          else
            r = @splitter.get_parsed_entry
          end
          @firsttime_flag = false
          return nil unless r
          if raw then
    	r
          else
            @entry = r
            @entry
          end
        end
        attr_reader :entry
    
        # Returns the last raw entry as a string.
        def entry_raw
          @splitter.entry
        end
    
        # a flag to write down entry start and end positions
        def entry_pos_flag
          @splitter.entry_pos_flag
        end
    
        # Sets flag to write down entry start and end positions
        def entry_pos_flag=(x)
          @splitter.entry_pos_flag = x
        end
    
        # start position of the last entry
        def entry_start_pos
          @splitter.entry_start_pos
        end
    
        # (end position of the last entry) + 1
        def entry_ended_pos
          @splitter.entry_ended_pos
        end
    
        # Iterates over each entry in the flatfile.
        #
        # * Example
        #    include Bio
        #    ff = FlatFile.open(GenBank, "genbank/gbhtg14.seq")
        #    ff.each_entry do |x|
        #      puts x.definition
        #    end
        def each_entry
          while e = self.next_entry
    	yield e
          end
        end
        alias :each :each_entry
    
        # Resets file pointer to the start of the flatfile.
        # (similar to IO#rewind)
        def rewind
          r = (@splitter || @stream).rewind
          @firsttime_flag = true
          r
        end
    
        # Closes input stream.
        # (similar to IO#close)
        def close
          @stream.close
        end
    
        # Returns current position of input stream.
        # If the input stream is not a normal file,
        # the result is not guaranteed.
        # It is similar to IO#pos.
        # Note that it will not be equal to io.pos,
        # because FlatFile has its own internal buffer.
        def pos
          @stream.pos
        end
    
        # (Not recommended to use it.)
        # Sets position of input stream.
        # If the input stream is not a normal file,
        # the result is not guaranteed.
        # It is similar to IO#pos=.
        # Note that it will not be equal to io.pos=,
        # because FlatFile has its own internal buffer.
        def pos=(p)
          @stream.pos=(p)
        end
    
        # Returns true if input stream is end-of-file.
        # Otherwise, returns false.
        # (Similar to IO#eof?, but may not be equal to io.eof?,
        # because FlatFile has its own internal buffer.)
        def eof?
          @stream.eof?
        end
    
        # If true is given, the next_entry method returns
        # a entry as a text, whereas if false, returns as a parsed object.
        def raw=(bool)
          @raw = (bool ? true : false)
        end
    
        # If true, raw mode.
        attr_reader :raw
    
        # Similar to IO#gets.
        # Internal use only. Users should not call it directly.
        def gets(*arg)
          @stream.gets(*arg)
        end
    
        # Sets database class. Plese use only if autodetect fails.
        def dbclass=(klass)
          if klass then
    	@dbclass = klass
            begin
              @splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
            rescue NameError, NoMethodError
              begin
                splitter_class = @dbclass::FLATFILE_SPLITTER
              rescue NameError
                splitter_class = Splitter::Default
              end
              @splitter = splitter_class.new(klass, @stream)
            end
          else
    	@dbclass = nil
    	@splitter = nil
          end
        end
    
        # Returns database class which is automatically detected or
        # given in FlatFile#initialize.
        attr_reader :dbclass
    
        # Performs determination of database class (file format).
        # Pre-reads +lines+ lines for format determination (default 31 lines).
        # If fails, returns nil or false. Otherwise, returns database class.
        #
        # The method can be called anytime if you want (but not recommended).
        # This might be useful if input file is a mixture of muitiple format data.
        def autodetect(lines = 31, ad = AutoDetect.default)
          if r = ad.autodetect_flatfile(self, lines)
            self.dbclass = r
          else
            self.dbclass = nil unless self.dbclass
          end
          r
        end
    
        # Detects database class (== file format) of given file.
        # If fails to determine, returns nil.
        def self.autodetect_file(filename)
          self.open_file(filename).dbclass
        end
    
        # Detects database class (== file format) of given input stream.
        # If fails to determine, returns nil.
        # Caution: the method reads some data from the input stream,
        # and the data will be lost.
        def self.autodetect_io(io)
          self.new(nil, io).dbclass
        end
    
        # This is OBSOLETED. Please use autodetect_io(io) instead.
        def self.autodetect_stream(io)
          $stderr.print "Bio::FlatFile.autodetect_stream will be deprecated." if $VERBOSE
          self.autodetect_io(io)
        end
    
        # Detects database class (== file format) of given string.
        # If fails to determine, returns false or nil.
        def self.autodetect(text)
          AutoDetect.default.autodetect(text)
        end
    
      end #class FlatFile
    
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/io/hinv.rb0000644000004100000410000002677712200110570016125 0ustar  www-datawww-data#
    # = bio/io/hinv.rb - H-invDB web service (REST) client module
    #
    # Copyright::  Copyright (C) 2008 Toshiaki Katayama 
    # License::    The Ruby License
    #
    # $Id:$
    #
    
    require 'bio/command'
    require 'rexml/document'
    
    module Bio
    
      # = Bio::Hinv
      #
      # Accessing the H-invDB web services.
      #
      # * http://www.h-invitational.jp/
      # * http://www.jbirc.aist.go.jp/hinv/hws/doc/index.html
      #
      class Hinv
    
        BASE_URI = "http://www.jbirc.aist.go.jp/hinv/hws/"
    
        module Common
          def query(options = nil)
            response = Bio::Command.post_form(@url, options)
            @result = response.body
            @xml = REXML::Document.new(@result)
          end
        end
    
    
        # Bio::Hinv.acc2hit("BC053657")  # => "HIT000053961"
        def self.acc2hit(acc)
          serv = Acc2hit.new
          serv.query("acc" => acc)
          serv.result
        end
    
        # Bio::Hinv.hit2acc("HIT000022181")  # => "AK097327"
        def self.hit2acc(hit)
          serv = Hit2acc.new
          serv.query("hit" => hit)
          serv.result
        end
    
        # Bio::Hinv.hit_cnt  # => 187156
        def self.hit_cnt
          serv = HitCnt.new
          serv.query
          serv.result
        end
    
        # Bio::Hinv.hit_definition("HIT000000001")  # => "Rho guanine ..."
        def self.hit_definition(hit)
          serv = HitDefinition.new
          serv.query("hit" => hit)
          serv.result
        end
    
        # Bio::Hinv.hit_pubmedid("HIT000053961")  # => [7624364, 11279095, ... ]
        def self.hit_pubmedid(hit)
          serv = HitPubmedId.new
          serv.query("hit" => hit)
          serv.result
        end
    
        # Bio::Hinv.hit_xml("HIT000000001")  # => " hit)
          puts serv.result
        end
    
        # Bio::Hinv.hix2hit("HIX0000004")  # => ["HIT000012846", ... ]
        def self.hix2hit(hix)
          serv = Bio::Hinv::Hix2hit.new
          serv.query("hix" => hix)
          serv.result
        end
    
        # Bio::Hinv.hix_cnt  # => 36073
        def self.hix_cnt
          serv = HixCnt.new
          serv.query
          serv.result
        end
    
        # Bio::Hinv.hix_represent("HIX0000001")  # => "HIT000022181"
        def self.hix_represent(hix)
          serv = HixRepresent.new
          serv.query("hix" => hix)
          serv.result
        end
    
        # Bio::Hinv.id_search("HIT00002218*")  # => ["HIT000022181", ... ]
        def self.id_search(query)
          serv = IdSearch.new
          serv.query("query" => query)
          serv.result
        end
    
        # Bio::Hinv.keyword_search("HIT00002218*")  # => ["HIT000022181", ... ]
        def self.keyword_search(query)
          serv = KeywordSearch.new
          serv.query("query" => query)
          serv.result
        end
    
    
        # serv = Bio::Hinv::Acc2hit.new
        # serv.query("acc" => "BC053657")
        # puts serv.result
        class Acc2hit
          include Common
    
          def initialize
            @url = BASE_URI + "acc2hit.php"
          end
    
          # 
          # 
          #  HIT000053961
          # 
          def result
            @xml.elements['//H-INVITATIONAL-ID'].text
          end
        end
    
        # serv = Bio::Hinv::Hit2acc.new
        # serv.query("hit" => "HIT000022181")
        # puts serv.result
        class Hit2acc
          include Common
    
          def initialize
            @url = BASE_URI + "hit2acc.php"
          end
    
          # 
          # 
          #  AK097327
          # 
          def result
            @xml.elements['//ACCESSION-NO'].text
          end
        end
    
        # serv = Bio::Hinv::HitCnt.new
        # serv.query
        # puts serv.result
        class HitCnt
          include Common
    
          def initialize
            @url = BASE_URI + "hit_cnt.php"
          end
    
          # 
          # 
          #  187156
          # 
          def result
            @xml.elements['//TRANSCRIPT_CNT'].text.to_i
          end
        end
    
        # serv = Bio::Hinv::HitDefinition.new
        # serv.query("hit" => "HIT000000001")
        # puts serv.result
        # puts serv.data_source_definition
        # puts serv.cdna_rep_h_invitational
        # puts serv.cdna_splicing_isoform_curation
        # puts serv.data_source_db_reference_protein_motif_id
        # puts serv.data_source_identity
        # puts serv.data_source_coverage
        # puts serv.data_source_homologous_species
        # puts serv.data_source_similarity_category
        class HitDefinition
          include Common
    
          def initialize
            @url = BASE_URI + "hit_definition.php"
          end
    
          # 
          # 
          #  
          #   HIT000000001
          #   Rho guanine nucleotide exchange factor 10.
          #   Representative transcript
          #   
          #   NP_055444
          #   100.0
          #   100.0
          #   Homo sapiens
          #   Identical to known human protein(Category I).
          #  
          # 
          def result
            @xml.elements['//DATA-SOURCE_DEFINITION'].text
          end
          alias :data_source_definition :result
    
          def cdna_rep_h_invitational
            @xml.elements['//CDNA_REP-H-INVITATIONAL'].text
          end
          def cdna_splicing_isoform_curation
            @xml.elements['//CDNA_SPLICING-ISOFORM_CURATION'].text
          end
          def data_source_db_reference_protein_motif_id
            @xml.elements['//DATA-SOURCE_DB-REFERENCE_PROTEIN-MOTIF-ID'].text
          end
          def data_source_identity
            @xml.elements['//DATA-SOURCE_IDENTITY'].text.to_f
          end
          def data_source_coverage
            @xml.elements['//DATA-SOURCE_COVERAGE'].text.to_f
          end
          def data_source_homologous_species
            @xml.elements['//DATA-SOURCE_HOMOLOGOUS_SPECIES'].text
          end
          def data_source_similarity_category
            @xml.elements['//DATA-SOURCE_SIMILARITY-CATEGORY'].text
          end
        end
    
        # serv = Bio::Hinv::HitPubmedId.new
        # serv.query("hit" => "HIT000053961")
        # puts serv.result
        class HitPubmedId
          include Common
    
          def initialize
            @url = BASE_URI + "hit_pubmedid.php"
          end
    
          # 
          # 
          #  7624364
          #  11279095
          #  15489334
          # 
          def result
            list = []
            @xml.elements.each('//CDNA_DB-REFERENCE_PUBMED') do |e|
              list << e.text.to_i
            end
            return list
          end
        end
    
        # serv = Bio::Hinv::HitXML.new
        # serv.query("hit" => "HIT000000001")
        # puts serv.result
        class HitXML
          include Common
    
          def initialize
            @url = BASE_URI + "hit_xml.php"
          end
    
          # 
          # 
          #  
          #  HIX0021591
          #  HIX0021591.11
          #  HIT000000001
          #    :
          #    
          #   
          #  
          # 
          def result
            @result
          end
        end
    
        # serv = Bio::Hinv::Hix2hit.new
        # serv.query("hix" => "HIX0000004")
        # puts serv.result
        class Hix2hit
          include Common
    
          def initialize
            @url = BASE_URI + "hix2hit.php"
          end
    
          # 
          # 
          #  HIT000012846
          #  HIT000022124
          #  HIT000007722
          #    :
          #  HIT000262478
          # 
          def result
            list = []
            @xml.elements.each('//H-INVITATIONAL-ID') do |e|
              list << e.text
            end
            return list
          end
        end
    
        # serv = Bio::Hinv::HixCnt.new
        # serv.query
        # puts serv.result
        class HixCnt
          include Common
    
          def initialize
            @url = BASE_URI + "hix_cnt.php"
          end
    
          # 
          # 
          #  36073
          # 
          def result
            @xml.elements['//LOCUS_CNT'].text.to_i
          end
        end
    
        # serv = Bio::Hinv::HixRepresent.new
        # serv.query("hix" => "HIX0000001")
        # puts serv.result
        # puts serv.rep_h_invitational_id
        # puts serv.rep_accession_no
        class HixRepresent
          include Common
    
          def initialize
            @url = BASE_URI + "hix_represent.php"
          end
    
          # 
          # 
          #  
          #   HIX0000001
          #   HIT000022181
          #   AK097327
          #  
          # 
          def result
            @xml.elements['//REP-H-INVITATIONAL-ID'].text
          end
          alias :rep_h_invitational_id :result
    
          def rep_accession_no
            @xml.elements['//REP-ACCESSION-NO'].text
          end
        end
    
        # example at "http://www.jbirc.aist.go.jp/hinv/hws/doc/index_jp.html"
        # is for hit_xml.php (not for hix_xml.php)
        class HixXML
        end
    
        # serv = Bio::Hinv::KeywordSearch.new
        # serv.query("query" => "HIT00002218*", "start" => 1, "end" => 100)
        # puts serv.result
        # puts serv.size
        # puts serv.start
        # puts serv.end
        class KeywordSearch
          include Common
    
          def initialize
            @url = BASE_URI + "keyword_search.php"
          end
    
          def query(hash = {})
            default = {
              "start" => 1,
              "end" => 100
            }
            options = default.update(hash)
            super(options)
          end
    
          # 
          # 
          #  HIT00002218*
          #  8
          #  1
          #  8
          #  HIT000022180
          #  HIT000022181
          #  HIT000022183
          #  HIT000022184
          #  HIT000022185
          #  HIT000022186
          #  HIT000022188
          #  HIT000022189
          # 
          def result
            list = []
            @xml.elements.each('//H-INVITATIONAL-ID') do |e|
              list << e.text
            end
            return list
          end
    
          def size
            @xml.elements['//SIZE'].text.to_i
          end
          def start
            @xml.elements['//START'].text.to_i
          end
          def end
            @xml.elements['//END'].text.to_i
          end
        end
    
        # serv = Bio::Hinv::IdSearch.new
        # serv.query("query" => "HIT00002218*", "id_type" => "H-INVITATIONAL-ID", "start" => 1, "end" => 100)
        # puts serv.result
        # puts serv.size
        # puts serv.start
        # puts serv.end
        class IdSearch < KeywordSearch
          def initialize
            @url = BASE_URI + "id_search.php"
          end
    
          def query(hash = {})
            default = {
              "id_type" => "H-INVITATIONAL-ID",
              "start" => 1,
              "end" => 100
            }
            options = default.update(hash)
            super(options)
          end
        end
    
      end
    end
    
    bio-1.4.3.0001/lib/bio/io/das.rb0000644000004100000410000002727512200110570015722 0ustar  www-datawww-data#
    # = bio/io/das.rb - BioDAS access module
    #
    # Copyright::	Copyright (C) 2003, 2004, 2007
    #		Shuichi Kawashima ,
    #		Toshiaki Katayama 
    # License::	The Ruby License
    #
    # $Id:$
    #
    #--
    # == TODO
    #
    #  link, stylesheet
    #
    #++
    #
    
    begin
      require 'rexml/document'
    rescue LoadError
    end
    require 'bio/command'
    require 'bio/sequence'
    
    
    module Bio
    
    class DAS
    
      # Specify DAS server to connect
      def initialize(url = 'http://www.wormbase.org:80/db/')
        @server = url.chomp('/')
      end
    
      def dna(dsn, entry_point, start, stop)
        seg = Bio::DAS::SEGMENT.region(entry_point, start, stop)
        self.get_dna(dsn, seg).first.sequence
      end
    
      def features(dsn, entry_point, start, stop)
        seg = Bio::DAS::SEGMENT.region(entry_point, start, stop)
        self.get_features(dsn, seg)
      end
    
    
      # Returns an Array of Bio::DAS::DSN
      def get_dsn
        ary = []
        result = Bio::Command.post_form("#{@server}/das/dsn")
        doc = REXML::Document.new(result.body)
        doc.elements.each('/descendant::DSN') do |e|
          dsn = DSN.new
          e.elements.each do |e|
            case e.name
            when 'SOURCE'
              dsn.source = e.text
              dsn.source_id = e.attributes['id']
              dsn.source_version = e.attributes['version']
            when 'MAPMASTER'
              dsn.mapmaster = e.text
            when 'DESCRIPTION'
              dsn.description = e.text
              dsn.description_href = e.attributes['href']
            end
          end
          ary << dsn
        end
        ary
      end
    
      # Returns Bio::DAS::ENTRY_POINT.
      # The 'dsn' can be a String or a Bio::DAS::DSN object.
      def get_entry_points(dsn)
        entry_point = ENTRY_POINT.new
        if dsn.instance_of?(Bio::DAS::DSN)
          src = dsn.source 
        else
          src = dsn
        end
        result = Bio::Command.post_form("#{@server}/das/#{src}/entry_points")
        doc = REXML::Document.new(result.body)
        doc.elements.each('/descendant::ENTRY_POINTS') do |e|
          entry_point.href = e.attributes['href']
          entry_point.version = e.attributes['version']
          e.elements.each do |e|
            segment = SEGMENT.new
            segment.entry_id = e.attributes['id']
            segment.start = e.attributes['start']
            segment.stop = e.attributes['stop'] || e.attributes['size']
            segment.orientation = e.attributes['orientation']
            segment.subparts = e.attributes['subparts']
            segment.description = e.text
            entry_point.segments << segment
          end
        end
        entry_point
      end
    
      # Returns an Array of Bio::DAS::DNA.
      # The 'dsn' can be a String or a Bio::DAS::DSN object.
      # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
      # Bio::DAS::SEGMENT
      def get_dna(dsn, segments)
        ary = []
    
        dsn = dsn.source if dsn.instance_of?(DSN)
        segments = [segments] if segments.instance_of?(SEGMENT)
    
        opts = []
        segments.each do |s|
          opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
        end
    
        result = Bio::Command.post_form("#{@server}/das/#{dsn}/dna", opts)
        doc = REXML::Document.new(result.body)
        doc.elements.each('/descendant::SEQUENCE') do |e|
          sequence = DNA.new
          sequence.entry_id = e.attributes['id']
          sequence.start = e.attributes['start']
          sequence.stop = e.attributes['stop']
          sequence.version = e.attributes['version']
          e.elements.each do |e|
            sequence.sequence = Bio::Sequence::NA.new(e.text)
            sequence.length = e.attributes['length'].to_i
          end
          ary << sequence
        end
        ary
      end
    
      # Returns an Array of Bio::DAS::SEQUENCE.
      # The 'dsn' can be a String or a Bio::DAS::DSN object.
      # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
      # Bio::DAS::SEGMENT
      def get_sequence(dsn, segments)
        ary = []
    
        dsn = dsn.source if dsn.instance_of?(DSN)
        segments = [segments] if segments.instance_of?(SEGMENT)
    
        opts = []
        segments.each do |s|
          opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
        end
    
        result = Bio::Command.post_form("#{@server}/das/#{dsn}/sequence", opts)
        doc = REXML::Document.new(result.body)
        doc.elements.each('/descendant::SEQUENCE') do |e|
          sequence = SEQUENCE.new
          sequence.entry_id = e.attributes['id']
          sequence.start = e.attributes['start']
          sequence.stop = e.attributes['stop']
          sequence.moltype = e.attributes['moltype']
          sequence.version = e.attributes['version']
          case sequence.moltype
          when /dna|rna/i		# 'DNA', 'ssRNA', 'dsRNA'
            sequence.sequence = Bio::Sequence::NA.new(e.text)
          when /protein/i		# 'Protein
            sequence.sequence = Bio::Sequence::AA.new(e.text)
          else
            sequence.sequence = e.text
          end
          ary << sequence
        end
        ary
      end
    
      # Returns a Bio::DAS::TYPES object.
      # The 'dsn' can be a String or a Bio::DAS::DSN object.
      # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
      # an Array of Bio::DAS::SEGMENT
      def get_types(dsn, segments = [])	# argument 'type' is deprecated
        types = TYPES.new
    
        dsn = dsn.source if dsn.instance_of?(DSN)
        segments = [segments] if segments.instance_of?(SEGMENT)
    
        opts = []
        segments.each do |s|
          opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
        end
    
        result = Bio::Command.post_form("#{@server}/das/#{dsn}/types", opts)
        doc = REXML::Document.new(result.body)
        doc.elements.each('/descendant::GFF') do |e|
          types.version = e.attributes['version']
          types.href = e.attributes['href']
          e.elements.each do |e|
            segment = SEGMENT.new
            segment.entry_id = e.attributes['id']
            segment.start = e.attributes['start']
            segment.stop = e.attributes['stop']
            segment.version = e.attributes['version']
            segment.label = e.attributes['label']
            e.elements.each do |e|
              t = TYPE.new
              t.entry_id = e.attributes['id']
              t.method = e.attributes['method']
              t.category = e.attributes['category']
              t.count = e.text.to_i
              segment.types << t
            end
            types.segments << segment
          end
        end
        types
      end
    
      # Returns a Bio::DAS::GFF object.
      # The 'dsn' can be a String or a Bio::DAS::DSN object.
      # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
      # an Array of Bio::DAS::SEGMENT
      def get_features(dsn, segments = [], categorize = false, feature_ids = [], group_ids = [])
        # arguments 'type' and 'category' are deprecated
        gff = GFF.new
    
        dsn = dsn.source if dsn.instance_of?(DSN)
        segments = [segments] if segments.instance_of?(SEGMENT)
    
        opts = []
        segments.each do |s|
          opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
        end
        if categorize
          opts << "categorize=yes"	# default is 'no'
        end
        feature_ids.each do |fid|
          opts << "feature_id=#{fid}"
        end
        group_ids.each do |gid|
          opts << "group_id=#{gid}"
        end
    
        result = Bio::Command.post_form("#{@server}/das/#{dsn}/features", opts)
        doc = REXML::Document.new(result.body)
        doc.elements.each('/descendant::GFF') do |e|
          gff.version = e.attributes['version']
          gff.href = e.attributes['href']
          e.elements.each('SEGMENT') do |e|
            segment = SEGMENT.new
            segment.entry_id = e.attributes['id']
            segment.start = e.attributes['start']
            segment.stop = e.attributes['stop']
            segment.version = e.attributes['version']
            segment.label = e.attributes['label']
            e.elements.each do |e|
              feature = FEATURE.new
              feature.entry_id = e.attributes['id']
              feature.label = e.attributes['label']
              e.elements.each do |e|
                case e.name
                when 'TYPE'
                  type = TYPE.new
                  type.entry_id = e.attributes['id']
                  type.category = e.attributes['category']
                  type.reference = e.attributes['referrence']
                  type.label = e.text
                  feature.types << type
                when 'METHOD'
                  feature.method_id = e.attributes['id']
                  feature.method = e.text
                when 'START'
                  feature.start = e.text
                when 'STOP', 'END'
                  feature.stop = e.text
                when 'SCORE'
                  feature.score = e.text
                when 'ORIENTATION'
                  feature.orientation = e.text
                when 'PHASE'
                  feature.phase = e.text
                when 'NOTE'
                  feature.notes << e.text
                when 'LINK'
                  link = LINK.new
                  link.href = e.attributes['href']
                  link.text = e.text
                  feature.links << link
                when 'TARGET'
                  target = TARGET.new
                  target.entry_id = e.attributes['id']
                  target.start = e.attributes['start']
                  target.stop = e.attributes['stop']
                  target.name = e.text
                  feature.targets << target
                when 'GROUP'
                  group = GROUP.new
                  group.entry_id = e.attributes['id']
                  group.label = e.attributes['label']
                  group.type = e.attributes['type']
                  e.elements.each do |e|
                    case e.name
                    when 'NOTE'		# in GROUP
                      group.notes << e.text
                    when 'LINK'		# in GROUP
                      link = LINK.new
                      link.href = e.attributes['href']
                      link.text = e.text
                      group.links << link
                    when 'TARGET'		# in GROUP
                      target = TARGET.new
                      target.entry_id = e.attributes['id']
                      target.start = e.attributes['start']
                      target.stop = e.attributes['stop']
                      target.name = e.text
                      group.targets << target
                    end
                  end
                  feature.groups << group
                end
              end
              segment.features << feature
            end
            gff.segments << segment
          end
        end
        gff
      end
    
    
      class DSN
        attr_accessor :source, :source_id, :source_version,
          :mapmaster, :description, :description_href
      end
    
      class ENTRY_POINT
        def initialize
          @segments = Array.new
        end
        attr_reader :segments
        attr_accessor :href, :version
    
        def each
          @segments.each do |x|
            yield x
          end
        end
      end
    
      class SEGMENT
        def self.region(entry_id, start, stop)
          segment = self.new
          segment.entry_id = entry_id
          segment.start = start
          segment.stop = stop
          return segment
        end
    
        def initialize
          @features = Array.new		# for FEATURE
          @types = Array.new		# for TYPE
        end
        attr_accessor :entry_id, :start, :stop, :orientation, :description,
          :subparts,			# optional
          :features, :version, :label,	# for FEATURE
          :types				# for TYPE
      end
    
      class DNA
        attr_accessor :entry_id, :start, :stop, :version, :sequence, :length
      end
    
      class SEQUENCE
        attr_accessor :entry_id, :start, :stop, :moltype, :version, :sequence
      end
    
      class TYPES < ENTRY_POINT; end
    
      class TYPE
        attr_accessor :entry_id, :method, :category, :count,
          :reference, :label	# for FEATURE
      end
    
      class GFF
        def initialize
          @segments = Array.new
        end
        attr_reader :segments
        attr_accessor :version, :href
      end
    
      class FEATURE
        def initialize
          @notes = Array.new
          @links = Array.new
          @types = Array.new
          @targets = Array.new
          @groups = Array.new
        end
        attr_accessor :entry_id, :label,
          :method_id, :method, :start, :stop, :score, :orientation, :phase
        attr_reader :notes, :links, :types, :targets, :groups
      end
    
      class LINK
        attr_accessor :href, :text
      end
    
      class TARGET
        attr_accessor :entry_id, :start, :stop, :name
      end
    
      class GROUP
        def initialize
          @notes = Array.new
          @links = Array.new
          @targets = Array.new
        end
        attr_accessor :entry_id, :label, :type
        attr_reader :notes, :links, :targets
      end
    
    end
    
    end # module Bio
    
    bio-1.4.3.0001/lib/bio/io/ddbjxml.rb0000644000004100000410000002475412200110570016576 0ustar  www-datawww-data#
    # = bio/io/ddbjxml.rb - DDBJ SOAP server access class
    #
    # Copyright::	Copyright (C) 2003, 2004
    #		Toshiaki Katayama 
    # License::	The Ruby License
    #
    # $Id:$
    #
    
    require 'bio/io/soapwsdl'
    require 'bio/db/genbank/ddbj'
    
    
    module Bio
    class DDBJ
    
    
    # = Bio::DDBJ::XML
    #
    # Accessing the DDBJ web services.
    #
    # * http://xml.nig.ac.jp/
    # * http://xml.nig.ac.jp/wsdl/index.jsp
    #
    class XML < Bio::SOAPWSDL
    
      BASE_URI = "http://xml.nig.ac.jp/wsdl/"
    
      # set default to GetEntry
      SERVER_URI = BASE_URI + "GetEntry.wsdl"
    
      def initialize(wsdl = nil)
        super(wsdl || self.class::SERVER_URI)
      end
    
      # === Description
      #
      # DDBJ XML BLAST Database Search 
      #
      # * http://xml.nig.ac.jp/doc/Blast.txt
      #
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::Blast.new
      #   program = 'blastp'
      #   database = 'SWISS'
      #   query = "MSSRIARALALVVTLLHLTRLALSTCPAACHCPLEAPKCAPGVGLVRDGCGCCKVCAKQL"
      #   
      #   report = serv.searchSimple(program, database, query)
      #   Bio::Blast::Default::Report.new(report).each_hit do |hit|
      #     hit.hsps.find_all {|x| x.evalue < 0.1 }.each do |hsp|
      #       p [hsps.evalue, hsps.identity, hsps.definition]
      #     end
      #   end
      #  
      #   program = 'tblastn'
      #   database = 'ddbjvrl'
      #   param = '-m 8 -e 0.001'
      #   puts serv.searchParam(program, database, query, param)
      # 
      # === WSDL Methods
      # 
      # * searchSimple(program, database, query)
      #
      # Returns a blast report in the default format.
      #
      # * searchParam(program, database, query, param)
      #
      # Blasts with param and returns a blast report.
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/Blast.txt
      #
      class Blast < XML
        SERVER_URI = BASE_URI + "Blast.wsdl"
      end
    
    
      # === ClustalW
      # 
      # Multiple seaquece alignment using ClustalW.
      #
      # * http://xml.nig.ac.jp/doc/ClustalW.txt
      #
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::ClustalW.new
      #
      #   query = < RABSTOUT   rabbit Guinness receptor
      #   LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS
      #   ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC
      #   > MUSNOSE   mouse nose drying factor
      #   mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt
      #   fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfdv
      #   > HSHEAVEN    human Guinness receptor repeat
      #   mhkmmhkgmkhmhgmhmhg   lhmkmhlkmgkhmgkmk  ytytytryrwtqtqwtwyt
      #   fdgfdsgafdagfdgfsag   dfavdfdvgavfsvfgv  dfsvdgvagvfdv
      #   mhkmmhkgmkhmhgmhmhg   lhmkmhlkmgkhmgkmk  ytytytryrwtqtqwtwyt
      #   fdgfdsgafdagfdgfsag   dfavdfdvgavfsvfgv  dfsvdgvagvfdv
      #   END
      #
      #   puts serv.analyzeSimple(query)
      #   puts serv.analyzeParam(query, '-align -matrix=blosum')
      #
      # === WSDL Methods
      #
      # * analyzeSimple(query)
      # * analyzeParam(query, param)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/ClustalW.txt
      #
      class ClustalW < XML
        SERVER_URI = BASE_URI + "ClustalW.wsdl"
      end
    
    
      # == DDBJ
      #
      # Retrieves a sequence entry from the DDBJ DNA Data Bank Japan.
      #
      # * http://xml.nig.ac.jp/doc/DDBJ.txt
      #
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::DDBJ.new
      #   puts serv.getFFEntry('AB000050')
      #   puts serv.getXMLEntry('AB000050')
      #   puts serv.getFeatureInfo('AB000050', 'cds')
      #   puts serv.getAllFeatures('AB000050')
      #   puts serv.getRelatedFeatures('AL121903', '59000', '64000')
      #   puts serv.getRelatedFeaturesSeq('AL121903', '59000', '64000')
      #
      # === WSDL Methods 
      #
      # * getFFEntry(accession)
      # * getXMLEntry(accession)
      # * getFeatureInfo(accession, feature)
      # * getAllFeatures(accession)
      # * getRelatedFeatures(accession, start, stop)
      # * getRelatedFeaturesSeq(accession, start, stop)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/DDBJ.txt
      #
      class DDBJ < XML
        SERVER_URI = BASE_URI + "DDBJ.wsdl"
      end
    
    
      # == Fasta
      # 
      # Searching database using the Fasta package.
      #
      # * http://xml.nig.ac.jp/doc/Fasta.txt
      # 
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::Fasta.new
      #   query = ">Test\nMSDGAVQPDG GQPAVRNERA TGSGNGSGGG GGGGSGGVGI"
      #    
      #   puts serv.searchSimple('fasta34', 'PDB', query)
      #   query = ">Test\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
      #   puts serv.searchParam('fastx34_t', 'PDB', query, '-n')
      #
      # === WSDL Methods
      #
      # * searchSimple(program, database, query)
      # * searchParam(program, database, query, param)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/Fasta.txt
      #
      class Fasta < XML
        SERVER_URI = BASE_URI + "Fasta.wsdl"
      end
    
    
      # == GetEntry
      #
      # Retrieves database entries.
      #
      # * http://xml.nig.ac.jp/doc/GetEntry.txt
      #
      # === Examples
      #
      #  serv = Bio::DDBJ::XML::GetEntry.new
      #  puts serv.getDDBJEntry('AB000050')
      #  puts serv. getPDBEntry('1AAR')
      #
      # === WSDL Methods
      #
      # * getEntry(database, var, param1, param2)
      # * getEntry(database, var)
      # * getDDBJEntry(accession)
      # * getDDBJCONEntry(accession)
      # * getDDBJVerEntry(accession)
      # * getLocus_DDBJEntry(locus)
      # * getGene_DDBJEntry(gene)
      # * getProd_DDBJEntry(products)
      # * getPID_DDBJEntry(pid)
      # * getClone_DDBJEntry(clone)
      # * getXML_DDBJEntry(accession)
      # * getEMBLEntry(accession)
      # * getSWISSEntry(accession)
      # * getPIREntry(accession)
      # * getPRFEntry(accession)
      # * getPDBEntry(accession)
      # * getQVEntry(accession)
      # * getDADEntry(accession)
      # * getPID_DADEntry(pid)
      # * getFASTA_DDBJEntry(accession)
      # * getFASTA_DDBJCONEntry(accession)
      # * getFASTA_DDBJVerEntry(accession)
      # * getFASTA_DDBJSeqEntry(accession, start, end)
      # * getFASTA_DADEntry(accession)
      # * getFASTA_PIREntry(accession)
      # * getFASTA_SWISSEntry(accession)
      # * getFASTA_PDBEntry(accession)
      # * getFASTA_PRFEntry(accession)
      # * getFASTA_CDSEntry(accession)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/GetEntry.txt
      #
      class GetEntry < XML
        SERVER_URI = BASE_URI + "GetEntry.wsdl"
      end
    
    
      # === Gib
      # 
      # Genome Information broker
      #
      # * http://xml.nig.ac.jp/doc/Gib.txt
      #
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::Gib.new
      #   puts serv.getOrganismList
      #   puts serv.getChIDList
      #   puts serv.getOrganismNameFromChid('Sent_CT18:')
      #   puts serv.getChIDFromOrganismName('Aquifex aeolicus VF5')
      #   puts serv.getAccession('Ecol_K12_MG1655:')
      #   puts serv.getPieceNumber('Mgen_G37:')
      #   puts serv.getDivision('Mgen_G37:')
      #   puts serv.getType('Mgen_G37:')
      #   puts serv.getCDS('Aaeo_VF5:ece1')
      #   puts serv.getFlatFile('Nost_PCC7120:pCC7120zeta')
      #   puts serv.getFastaFile('Nost_PCC7120:pCC7120zeta', 'cdsaa')
      #
      # === WSDL Methods
      #
      # * getOrganismList
      # * getChIDList
      # * getOrganismNameFromChid(chid)
      # * getChIDFromOrganismName(orgName)
      # * getAccession(chid)
      # * getPieceNumber(chid)
      # * getDivision(chid)
      # * getType(chid)
      # * getFlatFile(chid)
      # * getFastaFile(chid, type)
      # * getCDS(chid)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/Gib.txt
      #
      class Gib < XML
        SERVER_URI = BASE_URI + "Gib.wsdl"
      end
    
      
      # === Gtop
      #
      # GTOP: Gene to protein.
      #
      # * http://xml.nig.ac.jp/doc/Gtop.txt
      #
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::Gtop.new
      #   puts serv.getOrganismList
      #   puts serv.getMasterInfo('thrA', 'ecol0')
      #
      # === WSDL Methods
      #
      # * getOrganismList
      # * getMasterInfo(orfID, organism)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/Gtop.txt
      #
      class Gtop < XML
        SERVER_URI = BASE_URI + "Gtop.wsdl"
      end
    
    
      # === PML
      #
      # Variation database
      #
      # * http://xml.nig.ac.jp/doc/PML.txt
      # 
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::PML.new
      #   puts serv.getVariation('1')
      #
      # === WSDL Methods
      #
      # * searchVariation(field, query, order)
      # * searchVariationSimple(field, query)
      # * searchFrequency(field, query, order)
      # * searchFrequencySimple(field, query)
      # * getVariation(variation_id)
      # * getFrequency(variation_id, population_id)
      #
      # === References
      #
      # * http://xml.nig.ac.jp/doc/PML.txt
      #
      class PML < XML
        SERVER_URI = BASE_URI + "PML.wsdl"
      end
    
      # === RequestManager
      #
      # Sequence Retrieving System
      # 
      # * http://xml.nig.ac.jp/doc/RequestManager.txt
      # 
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::RequestManager.new
      #   puts serv.getAsyncResult('20070420102828140')
      #
      # === WSDL Methods
      #
      # * getAsyncResult( requestId )
      # * getAsyncResultMime( requestId )
      #
      # === Examples
      #
      # * http://xml.nig.ac.jp/doc/RequestManager.txt
      #
      class RequestManager < XML
        SERVER_URI = BASE_URI + "RequestManager.wsdl"
    
        # RequestManager using DDBJ REST interface
        class REST
          require 'bio/command'
    
          Uri = 'http://xml.nig.ac.jp/rest/Invoke'
          Service = 'RequestManager'
    
          def getAsyncResult(requestId)
            params = {
              'service'   => Service,
              'method'    => 'getAsyncResult',
              'requestId' => requestId.to_s
            }
            r = Bio::Command.post_form(Uri, params)
            r.body
          end
        end #class REST
    
        unless defined? new_orig then
          class << RequestManager
            alias new_orig new
            private :new_orig
          end
        end
    
        # creates a new driver
        def self.new(wsdl = nil)
          begin
            new_orig(wsdl)
          rescue RuntimeError
            REST.new
          end
        end
      end #class RequestManager
    
      # === SRS
      #
      # Sequence Retrieving System
      # 
      # * http://xml.nig.ac.jp/doc/SRS.txt
      # 
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::SRS.new
      #   puts serv.searchSimple('[pathway-des:sugar]')
      #   puts serv.searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')
      #
      # === WSDL Methods
      #
      # * searchSimple(query)
      # * searchParam(query, param)
      #
      # === Examples
      #
      # * http://xml.nig.ac.jp/doc/SRS.txt
      #
      class SRS < XML
        SERVER_URI = BASE_URI + "SRS.wsdl"
      end
      
    
      # === TxSearch
      #
      # Searching taxonomy information.
      # 
      # * http://xml.nig.ac.jp/doc/TxSearch.txt
      #
      # === Examples
      #
      #   serv = Bio::DDBJ::XML::TxSearch.new
      #   puts serv.searchSimple('*coli')
      #   puts serv.searchSimple('*tardigrada*')
      #   puts serv.getTxId('Escherichia coli')
      #   puts serv.getTxName('562')
      #
      #   query = ["Campylobacter coli", "Escherichia coli"].join("\n")
      #   rank = ["family", "genus"].join("\n")
      #   puts serv.searchLineage(query, rank, 'Bacteria')
      #
      # === WSDL Methdos
      #
      # * searchSimple(tx_Name)
      # * searchParam(tx_Name, tx_Clas, tx_Rank, tx_Rmax, tx_Dcls)
      # * getTxId(tx_Name)
      # * getTxName(tx_Id)
      # * searchLineage(query, ranks, superkingdom)
      # 
      # === References
      #
      # * http://xml.nig.ac.jp/doc/TxSearch.txt
      #
      class TxSearch < XML
        SERVER_URI = BASE_URI + "TxSearch.wsdl"
      end
    
    end # XML
    
    end # DDBJ
    end # Bio
    
    bio-1.4.3.0001/lib/bio/io/biosql/0000755000004100000410000000000012200110570016102 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/io/biosql/config/0000755000004100000410000000000012200110570017347 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/io/biosql/config/database.yml0000644000004100000410000000057312200110570021643 0ustar  www-datawww-data#This is the database configuration specific for BioSQL
    #User can configure it's db here
    
    development:
      adapter: jdbcmysql
      database: bioseq
      username: febo
      password:
      hostname: localhost
    
    test:
      adapter: postgresql
      database: biorails_test
      username: rails
      password: 
      
    production:
      adapter: postgresql
      database: biorails_production
      username: rails
      password: bio-1.4.3.0001/lib/bio/io/biosql/biosql.rb0000644000004100000410000000355712200110570017732 0ustar  www-datawww-data#require 'dm-ar-finders'
    #require 'dm-core'
    require 'erb'
    require 'composite_primary_keys'
    
    module Bio
     class SQL
        class DummyBase < ActiveRecord::Base
          #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
          #NOTE: this class will not establish the connection automatically
          self.abstract_class = true
          self.pluralize_table_names = false
          #prepend table name to the usual id, avoid to specify primary id for every table
          self.primary_key_prefix_type = :table_name_with_underscore
          #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'./config', 'database.yml'))).result)
          #self.configurations=biosql_configurations
          #self.establish_connection "development"
        end #DummyBase
    
        require 'bio/io/biosql/ar-biosql'
        
      #  #no check is made
      def self.establish_connection(configurations, env)
        #  #configurations is an hash similar what YAML returns.
    
         #configurations.assert_valid_keys('development', 'production','test')
         #configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
         DummyBase.configurations = configurations
        connection = DummyBase.establish_connection "#{env}"
        #Init of basis terms and ontologies
        Ontology.first(:conditions => ["name = ?", 'Annotation Tags']) || Ontology.create({:name => 'Annotation Tags'})
        Ontology.first(:conditions => ["name = ?", 'SeqFeature Keys']) || Ontology.create({:name => 'SeqFeature Keys'})
        Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources']) ||Ontology.create({:name => 'SeqFeature Sources'})
        Term.first(:conditions => ["name = ?", 'EMBLGenBankSwit']) || Term.create({:name => 'EMBLGenBankSwit', :ontology => Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources'])})
        connection 
      end #establish_connection
      
      end #SQL
    end #Bio
    bio-1.4.3.0001/lib/bio/io/biosql/ar-biosql.rb0000644000004100000410000003060112200110570020320 0ustar  www-datawww-datamodule Bio
      class SQL
        class Biodatabase < DummyBase
          has_many :bioentries, :class_name =>"Bioentry", :foreign_key => "biodatabase_id"
          validates_uniqueness_of :name
        end
        class BioentryDbxref < DummyBase
          #delete				set_sequence_name nil
          set_primary_key nil #bioentry_id,dbxref_id
          belongs_to :bioentry, :class_name => "Bioentry"
          belongs_to :dbxref, :class_name => "Dbxref"
        end
    
        class BioentryPath < DummyBase
          set_primary_key nil
          #delete				set_sequence_name nil
          belongs_to :term, :class_name => "Term"
          #da sistemare per poter procedere.
          belongs_to :object_bioentry, :class_name=>"Bioentry"
          belongs_to :subject_bioentry, :class_name=>"Bioentry"
        end #BioentryPath
    
        class BioentryQualifierValue < DummyBase
          #NOTE: added rank to primary_keys, now it's finished.
          set_primary_keys :bioentry_id, :term_id, :rank
          belongs_to :bioentry, :class_name => "Bioentry"
          belongs_to :term, :class_name => "Term"
        end #BioentryQualifierValue
      
        class Bioentry < DummyBase
          belongs_to :biodatabase, :class_name => "Biodatabase"
          belongs_to :taxon, :class_name => "Taxon"
          has_one :biosequence
          #, :class_name => "Biosequence", :foreign_key => "bioentry_id"
          has_many :comments, :class_name =>"Comment", :order =>'rank'
          has_many :seqfeatures, :class_name => "Seqfeature", :order=>'rank'
          has_many :bioentry_references, :class_name=>"BioentryReference" #, :foreign_key => "bioentry_id"
          has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
          has_many :object_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
          has_many :subject_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
          has_many :object_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
          has_many :subject_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
    
          has_many :cdsfeatures, :class_name=>"Seqfeature", :foreign_key =>"bioentry_id", :conditions=>["term.name='CDS'"], :include=>"type_term"
          has_many :references, :through=>:bioentry_references, :class_name => "Reference"
          has_many :terms, :through=>:bioentry_qualifier_values, :class_name => "Term"
          #NOTE: added order_by for multiple hit and manage ranks correctly
          has_many :bioentry_qualifier_values, :order=>"bioentry_id,term_id,rank", :class_name => "BioentryQualifierValue"
            
          #per la creazione richiesti:
          #name, accession, version
          #				validates_uniqueness_of :accession, :scope=>[:biodatabase_id]
          #				validates_uniqueness_of :name, :scope=>[:biodatabase_id]
    			#	validates_uniqueness_of :identifier, :scope=>[:biodatabase_id]
    				
        end
        class BioentryReference < DummyBase
          set_primary_keys :bioentry_id, :reference_id, :rank
          belongs_to :bioentry, :class_name => "Bioentry"
          belongs_to :reference , :class_name => "Reference"
        end
        class BioentryRelationship < DummyBase
          #delete				set_primary_key "bioentry_relationship_id"
          set_sequence_name "bieontry_relationship_pk_seq"
          belongs_to :object_bioentry, :class_name => "Bioentry"
          belongs_to :subject_bioentry, :class_name => "Bioentry"
          belongs_to :term
        end
        class Biosequence < DummyBase
          set_primary_keys :bioentry_id, :version
          #delete				set_sequence_name "biosequence_pk_seq"
          belongs_to :bioentry, :foreign_key=>"bioentry_id"
          #has_one :bioentry
          #, :class_name => "Bioentry"
        end
        class Comment < DummyBase
          belongs_to :bioentry, :class_name => "Bioentry"
        end
        class DbxrefQualifierValue < DummyBase
          #think to use composite primary key
          set_primary_key nil #dbxref_id, term_id, rank
          #delete			      set_sequence_name nil
          belongs_to :dbxref, :class_name => "Dbxref"
          belongs_to :term, :class_name => "Term"
        end
        class Dbxref < DummyBase
          #set_sequence_name "dbxref_pk_seq"
          has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
          has_many :locations, :class_name => "Location"
          has_many :references, :class_name=>"Reference"
          has_many :term_dbxrefs, :class_name => "TermDbxref"
          has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
          #TODO: check is with bioentry there is an has_and_belongs_to_many relationship has specified in schema overview.
        end
        class LocationQualifierValue <  DummyBase
          set_primary_key nil #location_id, term_id
          #delete			      set_sequence_name nil
          belongs_to :location, :class_name => "Location"
          belongs_to :term, :class_name => "Term"
        end
        class Location < DummyBase
          #set_sequence_name "location_pk_seq"
          belongs_to :seqfeature, :class_name => "Seqfeature"
          belongs_to :dbxref, :class_name => "Dbxref"
          belongs_to :term, :class_name => "Term"
          has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
          
          def to_s
            if strand==-1
              str="complement("+start_pos.to_s+".."+end_pos.to_s+")"
            else
              str=start_pos.to_s+".."+end_pos.to_s
            end
            return str    
          end
          
          def sequence
            seq=""
            unless self.seqfeature.bioentry.biosequence.seq.nil?
              seq=Bio::Sequence::NA.new(self.seqfeature.bioentry.biosequence.seq[start_pos-1..end_pos-1])
              seq.reverse_complement! if strand==-1
            end
            return seq        
          end
          
          
          
        end
        class Ontology < DummyBase
          has_many :terms, :class_name => "Term"
          has_many :term_paths, :class_name => "TermPath"
          has_many :term_relationships, :class_name => "TermRelationship"
        end
        class Reference < DummyBase
          belongs_to :dbxref, :class_name => "Dbxref"
          has_many :bioentry_references, :class_name=>"BioentryReference"
          has_many :bioentries, :through=>:bioentry_references
        end
        class SeqfeatureDbxref < DummyBase
          set_primary_keys :seqfeature_id, :dbxref_id
          #delete		      set_sequence_name nil
          belongs_to :seqfeature, :class_name => "Seqfeature", :foreign_key => "seqfeature_id"
          belongs_to :dbxref, :class_name => "Dbxref", :foreign_key => "dbxref_id"
        end
        class SeqfeaturePath < DummyBase
          set_primary_keys :object_seqfeature_id, :subject_seqfeature_id, :term_id
          set_sequence_name nil
          belongs_to :object_seqfeature, :class_name => "Seqfeature", :foreign_key => "object_seqfeature_id"
          belongs_to :subject_seqfeature, :class_name => "Seqfeature", :foreign_key => "subject_seqfeature_id"
          belongs_to :term, :class_name => "Term"
        end
        class SeqfeatureQualifierValue < DummyBase
          set_primary_keys  :seqfeature_id, :term_id, :rank
          set_sequence_name nil
          belongs_to :seqfeature
          belongs_to :term, :class_name => "Term"
        end		
        class Seqfeature  "Bioentry"
          belongs_to :type_term, :class_name => "Term", :foreign_key => "type_term_id"
          belongs_to :source_term, :class_name => "Term", :foreign_key =>"source_term_id"
          has_many :seqfeature_dbxrefs, :class_name => "SeqfeatureDbxref", :foreign_key => "seqfeature_id"
          has_many :seqfeature_qualifier_values, :order=>'rank', :foreign_key => "seqfeature_id"
          #, :class_name => "SeqfeatureQualifierValue"
          has_many :locations, :class_name => "Location", :order=>'rank'
          has_many :object_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "object_seqfeature_id"
          has_many :subject_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "subject_seqfeature_id"
          has_many :object_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "object_seqfeature_id"
          has_many :subject_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "subject_seqfeature_id"
    
          #get the subsequence described by the locations objects
          def sequence
            return self.locations.inject(Bio::Sequence::NA.new("")){|seq, location| seq< "Term"
          belongs_to :object_seqfeature, :class_name => "Seqfeature"
          belongs_to :subject_seqfeature, :class_name => "Seqfeature"
        end
        class TaxonName < DummyBase
          set_primary_keys :taxon_id, :name, :name_class
          belongs_to :taxon, :class_name => "Taxon"
        end
        class Taxon < DummyBase
          set_sequence_name "taxon_pk_seq"
          has_many :taxon_names, :class_name => "TaxonName"
          has_one :taxon_scientific_name, :class_name => "TaxonName", :conditions=>"name_class = 'scientific name'"
          has_one :taxon_genbank_common_name, :class_name => "TaxonName", :conditions=>"name_class = 'genbank common name'"
          has_one :bioentry, :class_name => "Bioentry"
        end
        class TermDbxref < DummyBase
          set_primary_key nil #term_id, dbxref_id
          #delete			      set_sequence_name nil
          belongs_to :term, :class_name => "Term"
          belongs_to :dbxref, :class_name => "Dbxref"
        end
        class TermPath < DummyBase
          set_sequence_name "term_path_pk_seq"
          belongs_to :ontology, :class_name => "Ontology"
          belongs_to :subject_term, :class_name => "Term"
          belongs_to :object_term, :class_name => "Term"
          belongs_to :predicate_term, :class_name => "Term"
        end
        class Term < DummyBase
          belongs_to :ontology, :class_name => "Ontology"
          has_many :seqfeature_qualifier_values, :class_name => "SeqfeatureQualifierValue"
          has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
          has_many :bioentry_qualifer_values, :class_name => "BioentryQualifierValue"
          has_many :bioentries, :through=>:bioentry_qualifier_values
          has_many :locations, :class_name => "Location"
          has_many :seqfeature_relationships, :class_name => "SeqfeatureRelationship"
          has_many :term_dbxrefs, :class_name => "TermDbxref"
          has_many :term_relationship_terms, :class_name => "TermRelationshipTerm"
          has_many :term_synonyms, :class_name => "TermSynonym"
          has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
          has_many :seqfeature_types, :class_name => "Seqfeature", :foreign_key => "type_term_id"
          has_many :seqfeature_sources, :class_name => "Seqfeature", :foreign_key => "source_term_id"
          has_many :term_path_subjects, :class_name => "TermPath", :foreign_key => "subject_term_id"
          has_many :term_path_predicates, :class_name => "TermPath", :foreign_key => "predicate_term_id"
          has_many :term_path_objects, :class_name => "TermPath", :foreign_key => "object_term_id"
          has_many :term_relationship_subjects, :class_name => "TermRelationship", :foreign_key =>"subject_term_id"
          has_many :term_relationship_predicates, :class_name => "TermRelationship", :foreign_key =>"predicate_term_id"
          has_many :term_relationship_objects, :class_name => "TermRelationship", :foreign_key =>"object_term_id"
          has_many :seqfeature_paths, :class_name => "SeqfeaturePath"
        end
        class TermRelationship < DummyBase
          set_sequence_name "term_relationship_pk_seq"
          belongs_to :ontology, :class_name => "Ontology"
          belongs_to :subject_term, :class_name => "Term"
          belongs_to :predicate_term, :class_name => "Term"
          belongs_to :object_term, :class_name => "Term"
          has_one :term_relationship_term, :class_name => "TermRelationshipTerm"
        end
        class TermRelationshipTerm < DummyBase
          #delete			      set_sequence_name nil
          set_primary_key :term_relationship_id
          belongs_to :term_relationship, :class_name => "TermRelationship"
          belongs_to :term, :class_name => "Term"
        end
        class TermSynonym < DummyBase
          #delete			      set_sequence_name nil
          set_primary_key nil
          belongs_to :term, :class_name => "Term"
        end
      end #SQL
    end #Bio
    bio-1.4.3.0001/lib/bio/io/dbget.rb0000644000004100000410000001230212200110570016221 0ustar  www-datawww-data#
    # = bio/io/dbget.rb - GenomeNet/DBGET client module
    #
    # Copyright::	Copyright (C) 2000, 2001
    #		Mitsuteru C. Nakao ,
    #		Toshiaki Katayama 
    # License::	The Ruby License
    #
    # $Id: dbget.rb,v 1.13 2007/04/05 23:35:41 trevor Exp $
    #
    # == DBGET
    #
    # Accessing the GenomeNet/DBGET data retrieval system
    # http://www.genome.jp/dbget/ within the intranet.
    #
    
    require 'socket'
    
    module Bio
    
    class DBGET
    
      # default DBGET server address
    # SERV = "dbgetserv.genome.jp"
      SERV = "dbget.genome.jp"
      # default DBGET port number
      PORT = "3266"
    
      # Main class method to access DBGET server.  Optionally, this method
      # can be called with the alternative DBGET server address and the
      # TCP/IP port number.
      #
      # 'com' should be one of the following DBGET commands:
      #
      # * alink, bfind, bget, binfo, blink, bman, bref, btab, btit
      #
      # These methods are shortcut for the dbget commands.  Actually,
      # Bio::DBGET.((|com|))(arg) internally calls Bio::DBGET.dbget(com, arg).
      # Most of these methods accept the argument "-h" for help.
      #
      # 'arg' should be one of the following formats :
      #
      # * [options] db
      #   * specify the database name only for binfo, bman etc.
      # * [options] db:entry
      #   * specify the database name and the entry name to retrieve.
      # * [options] db entry1 entry2 ...
      #   * specify the database name and the list of entries to retrieve.
      #
      # Note that options in the above example can be omitted.  If 'arg' is
      # empty, the help message with a list of options for 'com' will be
      # shown by default.  Supported database names will be found at the
      # GenomeNet DBGET web page http://www.genome.jp/dbget/.
      #
      def DBGET.dbget(com, arg, serv = nil, port = nil)
    
        unless serv or port		# if both of serv and port are nil
          if ENV["DBGET"] =~ /:/		# and ENV["DBGET"] exists
            serv, port = ENV["DBGET"].split(':')
          end
        end
        serv = serv ? serv : SERV
        port = port ? port : PORT
    
        if arg.empty?
          arg = "-h"			# DBGET help message
        end
    
        query = "#{com} #{arg}\n"		# DBGET query string
    
        sock = TCPSocket.open("#{serv}", "#{port}")
    
        sock.write(query)			# submit query
        sock.flush			# buffer flush
    
        sock.gets				# skip "+Helo DBgetServ ..."
        sock.gets				# skip "#If you see this message, ..."
        sock.gets				# skip "*Request-IDent"
    
        result = sock.read		# DBGET result
    
        sock.close
    
        return result
      end
    
      # Show the version information of the DBGET server.
      def DBGET.version
        dbget("bget", "-V")
      end
    
    
      #--
      # bacc("db entry")	- not supported : get accession(s)
      # bent("db entry")	- not supported : get entry name
      # lmarge("db entry")	- not supported
      #++
    
      # alink("db entry") method returns relations
      def DBGET.alink(arg)
        dbget("alink", arg)
      end
    
      # bfind("db keyword")	method searches entries by keyword
      def DBGET.bfind(arg)
        dbget("bfind", arg)
      end
    
      # bget("db entry") method retrieves entries specified by the entry names
      def DBGET.bget(arg)
        dbget("bget", arg)
      end
    
      # seq("db entry") method retrieves the first sequence of the entry
      #
      # Shortcut to retrieve the sequence of the entry in FASTA format.
      # This method is equivalent to Bio::DBGET.bget("-f -n 1 #{arg}") and
      # 'arg' should be the "db:entry" or "db entry1 entry2 ..." format.
      def DBGET.seq(arg)
        dbget("bget", "-f -n 1 #{arg}")
      end
    
      # seq2("db entry") method retrieves the second sequence of the entry if any
      #
      # Shortcut to retrieve the second sequence of the entry in FASTA format.
      # This method is equivalent to Bio::DBGET.bget("-f -n 2 #{arg}").
      # Only useful when treating the KEGG GENES database entries which have
      # both AASEQ and NTSEQ fields. This method is obsolete and it is
      # recommended to use 'naseq' and 'aaseq' instead.
      def DBGET.seq2(arg)
        dbget("bget", "-f -n 2 #{arg}")
      end
    
      # naseq("db entry") method retrieves the nucleic acid sequence of the
      # entry if any.
      def DBGET.naseq(arg)
        dbget("bget", "-f -n n #{arg}")
      end
    
      # aaseq("db entry") method retrieves the amino acid sequence of the
      # entry if any.
      def DBGET.aaseq(arg)
        dbget("bget", "-f -n a #{arg}")
      end
    
      # binfo("db")	method retrieves the database information
      def DBGET.binfo(arg)
        dbget("binfo", arg)
      end
    
      # blink("db entry") method retrieves the link information
      def DBGET.blink(arg)
        dbget("blink", arg)
      end
    
      # bman ("db entry") method shows the manual page
      def DBGET.bman(arg)
        dbget("bman", arg)
      end
    
      # bref("db entry") method retrieves the references and authors
      def DBGET.bref(arg)
        dbget("bref", arg)
      end
    
      # btab ("db entry") method retrives (and generates) the database alias table
      def DBGET.btab(arg)
        dbget("btab", arg)
      end
    
      # btit("db entry ..")	method retrieves the entry definition
      def DBGET.btit(arg)
        dbget("btit", arg)
      end
    
    end
    
    end # module Bio
    
    
    if __FILE__ == $0
      puts "### DBGET version"
      p Bio::DBGET.version
      puts "### DBGET.dbget('bfind', 'sce tyrosin kinase')"
      puts Bio::DBGET.dbget('bfind', 'sce tyrosin kinase')
      puts "### DBGET.bfind('sce tyrosin kinase')"
      puts Bio::DBGET.bfind('sce tyrosin kinase')
      puts "### DBGET.bget('sce:YDL028C')"
      puts Bio::DBGET.bget('sce:YDL028C')
      puts "### DBGET.binfo('dbget')"
      puts Bio::DBGET.binfo('dbget')
    end
    
    
    bio-1.4.3.0001/lib/bio/io/fastacmd.rb0000644000004100000410000001015012200110570016715 0ustar  www-datawww-data#
    # = bio/io/fastacmd.rb - NCBI fastacmd wrapper class
    #
    # Copyright::  Copyright (C) 2005, 2006
    #              Shuji SHIGENOBU ,
    #              Toshiaki Katayama ,
    #              Mitsuteru C. Nakao ,
    #              Jan Aerts 
    # License::    The Ruby License
    #
    # $Id: fastacmd.rb,v 1.16 2007/04/05 23:35:41 trevor Exp $
    #
    
    require 'bio/db/fasta'
    require 'bio/io/flatfile'
    require 'bio/command'
    
    module Bio
    class Blast
    
    # = DESCRIPTION
    #
    # Retrieves FASTA formatted sequences from a blast database using 
    # NCBI fastacmd command.
    # 
    # This class requires 'fastacmd' command and a blast database  
    # (formatted using the '-o' option of 'formatdb').
    #
    # = USAGE
    #  require 'bio'
    #  
    #  fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
    #
    #  entry = fastacmd.get_by_id("sp:128U_DROME")
    #  fastacmd.fetch("sp:128U_DROME")
    #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
    #
    #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
    #    puts fasta
    #  end
    #
    # = REFERENCES
    #
    # * NCBI tool
    #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
    #
    # * fastacmd.html
    #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
    #
    class Fastacmd
    
      include Enumerable
    
      # Database file path.
      attr_accessor :database
    
      # fastacmd command file path.
      attr_accessor :fastacmd
    
      # This method provides a handle to a BLASTable database, which you can then
      # use to retrieve sequences.
      # 
      # Prerequisites:
      # * You have created a BLASTable database with the '-o T' option.
      # * You have the NCBI fastacmd tool installed.
      #
      # For example, suppose the original input file looks like:
      #  >my_seq_1
      #  ACCGACCTCCGGAACGGATAGCCCGACCTACG
      #  >my_seq_2
      #  TCCGACCTTTCCTACCGCACACCTACGCCATCAC
      #  ...
      # and you've created a BLASTable database from that with the command
      #  cd /my_dir/
      #  formatdb -i my_input_file -t Test -n Test -o T
      # then you can get a handle to this database with the command
      #  fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
      # ---
      # *Arguments*:
      # * _database_:: path and name of BLASTable database
      def initialize(blast_database_file_path)
        @database = blast_database_file_path
        @fastacmd = 'fastacmd'
      end
    
    
      # Get the sequence of a specific entry in the BLASTable database.
      # For example:
      #  entry = fastacmd.get_by_id("sp:128U_DROME")
      # ---
      # *Arguments*:
      # * _id_: id of an entry in the BLAST database
      # *Returns*:: a Bio::FastaFormat object
      def get_by_id(entry_id)
        fetch(entry_id).shift
      end
    
      # Get the sequence for a _list_ of IDs in the database.
      #
      # For example:
      #  p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
      #
      # This method always returns an array of Bio::FastaFormat objects, even when 
      # the result is a single entry.
      # ---
      # *Arguments*:
      # * _ids_: list of IDs to retrieve from the database
      # *Returns*:: array of Bio::FastaFormat objects
      def fetch(list)
        if list.respond_to?(:join)
          entry_id = list.join(",")
        else
          entry_id = list
        end
    
        cmd = [ @fastacmd, '-d', @database, '-s', entry_id ]
        Bio::Command.call_command(cmd) do |io|
          io.close_write
          Bio::FlatFile.new(Bio::FastaFormat, io).to_a
        end
      end
    
      # Iterates over _all_ sequences in the database.
      #
      #  fastacmd.each_entry do |fasta|
      #    p [ fasta.definition[0..30], fasta.seq.size ]
      #  end
      # ---
      # *Returns*:: a Bio::FastaFormat object for each iteration
      def each_entry
        cmd = [ @fastacmd, '-d', @database, '-D', '1' ]
        Bio::Command.call_command(cmd) do |io|
          io.close_write
          Bio::FlatFile.open(Bio::FastaFormat, io) do |f|
            f.each_entry do |entry|
              yield entry
            end
          end
        end
        self
      end
      alias each each_entry
    
    end # class Fastacmd
    
    end # class Blast
    end # module Bio
    
    if $0 == __FILE__
      fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
      seq = fastacmd.get_by_id('id_of_entry1')
      puts seq.class
      puts seq
      
      seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
      seqs.each do |seq|
        puts seq
      end
    
      fastacmd.each_entry do |fasta|
        puts fasta.seq.size.to_s + "\t" + fasta.definition
      end
    end
    bio-1.4.3.0001/lib/bio/map.rb0000644000004100000410000003145612200110570015315 0ustar  www-datawww-data#
    # = bio/map.rb - biological mapping class
    #
    # Copyright::   Copyright (C) 2006 Jan Aerts 
    # License::     The Ruby License
    #
    # $Id: map.rb,v 1.11 2007/04/12 12:19:16 aerts Exp $
    
    require 'bio/location'
    
    module Bio
    
      # == Description
      #
      # The Bio::Map contains classes that describe mapping information
      # and can be used to contain linkage maps, radiation-hybrid maps,
      # etc.  As the same marker can be mapped to more than one map, and a
      # single map typically contains more than one marker, the link
      # between the markers and maps is handled by Bio::Map::Mapping
      # objects. Therefore, to link a map to a marker, a Bio::Map::Mapping
      # object is added to that Bio::Map. See usage below.
      #
      # Not only maps in the strict sense have map-like features (and
      # similarly not only markers in the strict sense have marker-like
      # features). For example, a microsatellite is something that can be
      # mapped on a linkage map (and hence becomes a 'marker'), but a
      # clone can also be mapped to a cytogenetic map. In that case, the
      # clone acts as a marker and has marker-like properties.  That same
      # clone can also be considered a 'map' when BAC-end sequences are
      # mapped to it. To reflect this flexibility, the modules
      # Bio::Map::ActsLikeMap and Bio::Map::ActsLikeMarker define methods
      # that are typical for maps and markers.
      # 
      #--
      # In a certain sense, a biological sequence also has map- and
      # marker-like properties: things can be mapped to it at certain
      # locations, and the sequence itself can be mapped to something else
      # (e.g. the BAC-end sequence example above, or a BLAST-result).
      #++
      # 
      # == Usage
      #
      #  my_marker1 = Bio::Map::Marker.new('marker1')
      #  my_marker2 = Bio::Map::Marker.new('marker2')
      #  my_marker3 = Bio::Map::Marker.new('marker3')
      #  
      #  my_map1 = Bio::Map::SimpleMap.new('RH_map_ABC (2006)', 'RH', 'cR')
      #  my_map2 = Bio::Map::SimpleMap.new('consensus', 'linkage', 'cM')
      #  
      #  my_map1.add_mapping_as_map(my_marker1, '17')
      #  my_map1.add_mapping_as_map(Bio::Map::Marker.new('marker2'), '5')
      #  my_marker3.add_mapping_as_marker(my_map1, '9')
      #  
      #  print "Does my_map1 contain marker3? => "
      #  puts my_map1.contains_marker?(my_marker3).to_s
      #  print "Does my_map2 contain marker3? => "
      #  puts my_map2.contains_marker?(my_marker3).to_s
      #  
      #  my_map1.mappings_as_map.sort.each do |mapping|
      #    puts [ mapping.map.name,
      #           mapping.marker.name,
      #           mapping.location.from.to_s,
      #           mapping.location.to.to_s ].join("\t")
      #  end
      #  puts my_map1.mappings_as_map.min.marker.name
      #
      #  my_map2.mappings_as_map.each do |mapping|
      #    puts [ mapping.map.name,
      #           mapping.marker.name,
      #           mapping.location.from.to_s,
      #           mapping.location.to.to_s ].join("\t")
      #  end
      #
      module Map
    
        # == Description
        #
        # The Bio::Map::ActsLikeMap module contains methods that are typical for
        # map-like things:
        #
        # * add markers with their locations (through Bio::Map::Mappings)
        # * check if a given marker is mapped to it,
        #   and can be mixed into other classes (e.g. Bio::Map::SimpleMap)
        # 
        # Classes that include this mixin should provide an array property
        # called mappings_as_map.
        #
        # For example:
        #
        #   class MyMapThing
        #     include Bio::Map::ActsLikeMap
        #     
        #     def initialize (name)
        #       @name = name
        #       @mappings_as_maps = Array.new
        #     end
        #     attr_accessor :name, :mappings_as_map
        #    end
        #
        module ActsLikeMap
    
          # == Description
          #
          # Adds a Bio::Map::Mappings object to its array of mappings.
          # 
          # == Usage
          #
          #   # suppose we have a Bio::Map::SimpleMap object called my_map
          #   my_map.add_mapping_as_map(Bio::Map::Marker.new('marker_a'), '5')
          #
          # ---
          # *Arguments*:
          # * _marker_ (required): Bio::Map::Marker object
          # * _location_: location of mapping. Should be a _string_, not a _number_.
          # *Returns*:: itself
          def add_mapping_as_map(marker, location = nil)
            unless marker.class.include?(Bio::Map::ActsLikeMarker)
              raise "[Error] marker is not object that implements Bio::Map::ActsLikeMarker"
            end
            my_mapping = ( location.nil? ) ? Bio::Map::Mapping.new(self, marker, nil) : Bio::Map::Mapping.new(self, marker, Bio::Locations.new(location))
            if ! marker.mapped_to?(self)
              self.mappings_as_map.push(my_mapping)
              marker.mappings_as_marker.push(my_mapping)
            else
              already_mapped = false
              marker.positions_on(self).each do |loc|
                if loc.equals?(Bio::Locations.new(location))
                  already_mapped = true
                end
              end
              if ! already_mapped
                self.mappings_as_map.push(my_mapping)
                marker.mappings_as_marker.push(my_mapping)
              end
            end
    
            return self
          end
    
          # Checks whether a Bio::Map::Marker is mapped to this
          # Bio::Map::SimpleMap.
          #
          # ---
          # *Arguments*:
          # * _marker_: a Bio::Map::Marker object
          # *Returns*:: true or false
          def contains_marker?(marker)
            unless marker.class.include?(Bio::Map::ActsLikeMarker)
              raise "[Error] marker is not object that implements Bio::Map::ActsLikeMarker"
            end
            contains = false
            self.mappings_as_map.each do |mapping|
              if mapping.marker == marker
                contains = true
                return contains
              end
            end
            return contains
          end
          
        end # ActsLikeMap
    
        # == Description
        #
        # The Bio::Map::ActsLikeMarker module contains methods that are
        # typical for marker-like things:
        #
        # * map it to one or more maps
        # * check if it's mapped to a given map
        #   and can be mixed into other classes (e.g. Bio::Map::Marker)
        # 
        # Classes that include this mixin should provide an array property
        # called mappings_as_marker.
        #
        # For example:
        #
        #   class MyMarkerThing
        #     include Bio::Map::ActsLikeMarker
        #     
        #     def initialize (name)
        #       @name = name
        #       @mappings_as_marker = Array.new
        #     end
        #     attr_accessor :name, :mappings_as_marker
        #    end
        #
        module ActsLikeMarker
    
          # == Description
          #
          # Adds a Bio::Map::Mappings object to its array of mappings.
          # 
          # == Usage
          #
          #   # suppose we have a Bio::Map::Marker object called marker_a
          #   marker_a.add_mapping_as_marker(Bio::Map::SimpleMap.new('my_map'), '5')
          #
          # ---
          # *Arguments*:
          # * _map_ (required): Bio::Map::SimpleMap object
          # * _location_: location of mapping. Should be a _string_, not a _number_.
          # *Returns*:: itself
          def add_mapping_as_marker(map, location = nil)
            unless map.class.include?(Bio::Map::ActsLikeMap)
              raise "[Error] map is not object that implements Bio::Map::ActsLikeMap"
            end
            my_mapping = (location.nil?) ? Bio::Map::Mappings.new(map, self, nil) : Bio::Map::Mapping.new(map, self, Bio::Locations.new(location))
            if ! self.mapped_to?(map)
              self.mappings_as_marker.push(my_mapping)
              map.mappings_as_map.push(my_mapping)
            else
              already_mapped = false
              self.positions_on(map).each do |loc|
                if loc.equals?(Bio::Locations.new(location))
                  already_mapped = true
                end
              end
              if ! already_mapped
                self.mappings_as_marker.push(my_mapping)
                map.mappings_as_map.push(my_mapping)
              end
            end
          end
          
          # Check whether this marker is mapped to a given Bio::Map::SimpleMap.
          # ---
          # *Arguments*:
          # * _map_: a Bio::Map::SimpleMap object
          # *Returns*:: true or false
          def mapped_to?(map)
            unless map.class.include?(Bio::Map::ActsLikeMap)
              raise "[Error] map is not object that implements Bio::Map::ActsLikeMap"
            end
    		
            mapped = false
            self.mappings_as_marker.each do |mapping|
              if mapping.map == map
                mapped = true
                return mapped
              end
            end
    
            return mapped
          end
    
          # Return all positions of this marker on a given map.
          # ---
          # *Arguments*:
          # * _map_: an object that mixes in Bio::Map::ActsLikeMap
          # *Returns*:: array of Bio::Location objects
          def positions_on(map)
            unless map.class.include?(Bio::Map::ActsLikeMap)
              raise "[Error] map is not object that implements Bio::Map::ActsLikeMap"
            end
            
            positions = Array.new
            self.mappings_as_marker.each do |mapping|
              if mapping.map == map
                positions.push(mapping.location)
              end
            end
            
            return positions
          end
    
          # Return all mappings of this marker on a given map.
          # ---
          # *Arguments*:
          # * _map_: an object that mixes in Bio::Map::ActsLikeMap
          # *Returns*:: array of Bio::Map::Mapping objects
          def mappings_on(map)
            unless map.class.include?(Bio::Map::ActsLikeMap)
              raise "[Error] map is not object that implements Bio::Map::ActsLikeMap"
            end
            
            m = Array.new
            self.mappings_as_marker.each do |mapping|
              if mapping.map == map
                m.push(mapping)
              end
            end
            
            return m
          end
          
    
        end # ActsLikeMarker
    	  
        # == Description
        #
        # Creates a new Bio::Map::Mapping object, which links Bio::Map::ActsAsMap-
        # and Bio::Map::ActsAsMarker-like objects. This class is typically not
        # accessed directly, but through map- or marker-like objects.
        class Mapping
    
          include Comparable
          
          # Creates a new Bio::Map::Mapping object
          # ---
          # *Arguments*:
          # * _map_: a Bio::Map::SimpleMap object
          # * _marker_: a Bio::Map::Marker object
          # * _location_: a Bio::Locations object
          def initialize (map, marker, location = nil)
            @map, @marker, @location = map, marker, location
          end
          attr_accessor :map, :marker, :location
          
          # Compares the location of this mapping to another mapping.
          # ---
          # *Arguments*:
          # * other_mapping: Bio::Map::Mapping object
          # *Returns*::
          # * 1 if self < other location
          # * -1 if self > other location
          # * 0 if both location are the same
          # * nil if the argument is not a Bio::Location object
          def <=>(other)
            unless other.kind_of?(Bio::Map::Mapping)
              raise "[Error] markers are not comparable"
            end
    	    unless @map.equal?(other.map)
              raise "[Error] maps have to be the same"
            end
    
            return self.location[0].<=>(other.location[0])
          end
        end # Mapping
        
        # == Description
        #
        # This class handles the essential storage of name, type and units
        # of a map.  It includes Bio::Map::ActsLikeMap, and therefore
        # supports the methods of that module.
        # 
        # == Usage
        #
        #   my_map1 = Bio::Map::SimpleMap.new('RH_map_ABC (2006)', 'RH', 'cR')
        #   my_map1.add_marker(Bio::Map::Marker.new('marker_a', '17')
        #   my_map1.add_marker(Bio::Map::Marker.new('marker_b', '5')
        #
        class SimpleMap
    
          include Bio::Map::ActsLikeMap
        
          # Builds a new Bio::Map::SimpleMap object
          # ---
          # *Arguments*:
          # * name: name of the map
          # * type: type of the map (e.g. linkage, radiation_hybrid, cytogenetic, ...)
          # * units: unit of the map (e.g. cM, cR, ...)
          # *Returns*:: new Bio::Map::SimpleMap object
          def initialize (name = nil, type = nil, length = nil, units = nil)
            @name, @type, @length, @units = name, type, length, units
            @mappings_as_map = Array.new
          end
          
          # Name of the map
          attr_accessor :name
    			
          # Type of the map
          attr_accessor :type
    	
          # Length of the map
          attr_accessor :length
          		
          # Units of the map
          attr_accessor :units
          
          # Mappings
          attr_accessor :mappings_as_map
          
        end # SimpleMap
        
        # == Description
        #
        # This class handles markers that are anchored to a Bio::Map::SimpleMap.
        # It includes Bio::Map::ActsLikeMarker, and therefore supports the
        # methods of that module.
        # 
        # == Usage
        #
        #   marker_a = Bio::Map::Marker.new('marker_a')
        #   marker_b = Bio::Map::Marker.new('marker_b')
        #
        class Marker
    
          include Bio::Map::ActsLikeMarker
          
          # Builds a new Bio::Map::Marker object
          # ---
          # *Arguments*:
          # * name: name of the marker
          # *Returns*:: new Bio::Map::Marker object
          def initialize(name)
            @name = name
            @mappings_as_marker = Array.new
          end
    
          # Name of the marker
          attr_accessor :name
          
          # Mappings
          attr_accessor :mappings_as_marker
          
        end # Marker
    
      end # Map
    
    end # Bio
    bio-1.4.3.0001/lib/bio/db/0000755000004100000410000000000012200110570014567 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/db/rebase.rb0000644000004100000410000003346412200110570016367 0ustar  www-datawww-data#
    # bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
    #
    # Author::    Trevor Wennblom  
    # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
    # License::   The Ruby License
    #
    #  $Id:$
    #
    
    require 'yaml'
    require 'bio/reference'
    
    module Bio
    
    #
    # bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
    #
    # Author::    Trevor Wennblom  
    # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
    # License::   The Ruby License
    #
    #
    # = Description
    # 
    # Bio::REBASE provides utilties for interacting with REBASE data in EMBOSS
    # format.  REBASE is the Restriction Enzyme Database, more information
    # can be found here:
    # 
    # * http://rebase.neb.com
    # 
    # EMBOSS formatted files located at:
    # 
    # * http://rebase.neb.com/rebase/rebase.f37.html
    # 
    # These files are the same as the "emboss_?.???" files located at:
    # 
    # * ftp://ftp.neb.com/pub/rebase/
    # 
    # To easily get started with the data you can simply type this command
    # at your shell prompt:
    # 
    #   % wget "ftp://ftp.neb.com/pub/rebase/emboss_*"
    # 
    # 
    # = Usage
    # 
    #   require 'bio'
    #   require 'pp'
    # 
    #   enz = File.read('emboss_e')
    #   ref = File.read('emboss_r')
    #   sup = File.read('emboss_s')
    # 
    #   # When creating a new instance of Bio::REBASE
    #   # the contents of the enzyme file must be passed.
    #   # The references and suppiers file contents
    #   # may also be passed.
    #   rebase = Bio::REBASE.new( enz )
    #   rebase = Bio::REBASE.new( enz, ref )
    #   rebase = Bio::REBASE.new( enz, ref, sup )
    # 
    #   # The 'read' class method allows you to read in files
    #   # that are REBASE EMBOSS formatted
    #   rebase = Bio::REBASE.read( 'emboss_e' )
    #   rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r' )
    #   rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r', 'emboss_s' )
    # 
    #   # The data loaded may be saved in YAML format
    #   rebase.save_yaml( 'enz.yaml' )
    #   rebase.save_yaml( 'enz.yaml', 'ref.yaml' )
    #   rebase.save_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
    # 
    #   # YAML formatted files can also be read with the
    #   # class method 'load_yaml'
    #   rebase = Bio::REBASE.load_yaml( 'enz.yaml' )
    #   rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml' )
    #   rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
    # 
    #   pp rebase.enzymes[0..4]                     # ["AarI", "AasI", "AatI", "AatII", "Acc16I"]
    #   pp rebase.enzyme_name?('aasi')              # true
    #   pp rebase['AarI'].pattern                   # "CACCTGC"
    #   pp rebase['AarI'].blunt?                    # false
    #   pp rebase['AarI'].organism                  # "Arthrobacter aurescens SS2-322"
    #   pp rebase['AarI'].source                    # "A. Janulaitis"
    #   pp rebase['AarI'].primary_strand_cut1       # 11
    #   pp rebase['AarI'].primary_strand_cut2       # 0
    #   pp rebase['AarI'].complementary_strand_cut1 # 15
    #   pp rebase['AarI'].complementary_strand_cut2 # 0
    #   pp rebase['AarI'].suppliers                 # ["F"]
    #   pp rebase['AarI'].supplier_names            # ["Fermentas International Inc."]
    # 
    #   pp rebase['AarI'].isoschizomers             # Currently none stored in the references file
    #   pp rebase['AarI'].methylation               # ""
    # 
    #   pp rebase['EcoRII'].methylation             # "2(5)"
    #   pp rebase['EcoRII'].suppliers               # ["F", "J", "M", "O", "S"]
    #   pp rebase['EcoRII'].supplier_names  # ["Fermentas International Inc.", "Nippon Gene Co., Ltd.",
    #                                       # "Roche Applied Science", "Toyobo Biochemicals",
    #                                       # "Sigma Chemical Corporation"]
    # 
    #   # Number of enzymes in the database
    #   pp rebase.size                              # 673
    #   pp rebase.enzymes.size                      # 673
    # 
    #   rebase.each do |name, info|
    #     pp "#{name}:  #{info.methylation}" unless info.methylation.empty?
    #   end
    #
    
    class REBASE
    
      class DynamicMethod_Hash < Hash #:nodoc:
        # Define a writer or reader
        # * Allows hash[:kay]= to be accessed like hash.key=
        # * Allows hash[:key] to be accessed like hash.key
        def method_missing(method_id, *args)
          k = self.class
          if method_id.to_s[-1].chr == '='
            k.class_eval do
              define_method(method_id) { |s| self[ method_id.to_s[0..-2].to_sym ] = s }
            end
            k.instance_method(method_id).bind(self).call(args[0])
          else
            k.class_eval do
              define_method(method_id) { self[method_id] }
            end
            k.instance_method(method_id).bind(self).call
          end
        end
      end
    
      class EnzymeEntry < DynamicMethod_Hash #:nodoc:
        @@supplier_data = {}
        def self.supplier_data=(d); @@supplier_data = d; end
    
        def supplier_names
          ret = []
          self.suppliers.each { |s| ret << @@supplier_data[s] }
          ret
        end
      end
    
      # Calls _block_ once for each element in @data hash, passing that element as a parameter.
      #
      # ---
      # *Arguments*
      # * Accepts a block
      # *Returns*:: results of _block_ operations
      def each
        @data.each { |item| yield item }
      end
    
      # Make the instantiated class act like a Hash on @data
      # Does the equivalent and more of this:
      #  def []( key ); @data[ key ]; end
      #  def size; @data.size; end
      def method_missing(method_id, *args) #:nodoc:
        self.class.class_eval do
          define_method(method_id) { |a| Hash.instance_method(method_id).bind(@data).call(a) }
        end
        Hash.instance_method(method_id).bind(@data).call(*args)
      end
    
      # Constructor
      #
      # ---
      # *Arguments*
      # * +enzyme_lines+: (_required_) contents of EMBOSS formatted enzymes file 
      # * +reference_lines+: (_optional_) contents of EMBOSS formatted references file 
      # * +supplier_lines+: (_optional_) contents of EMBOSS formatted suppliers files 
      # * +yaml+: (_optional_, _default_ +false+) enzyme_lines, reference_lines, and supplier_lines are read as YAML if set to true 
      # *Returns*:: Bio::REBASE
      def initialize( enzyme_lines, reference_lines = nil, supplier_lines = nil, yaml = false )
        # All your REBASE are belong to us.
    
        if yaml
          @enzyme_data = enzyme_lines
          @reference_data = reference_lines
          @supplier_data = supplier_lines
        else
          @enzyme_data = parse_enzymes(enzyme_lines)
          @reference_data = parse_references(reference_lines)
          @supplier_data = parse_suppliers(supplier_lines)
        end
    
        EnzymeEntry.supplier_data = @supplier_data
        setup_enzyme_data
      end
    
      # List the enzymes available
      #
      # ---
      # *Arguments*
      # * _none_
      # *Returns*:: +Array+ sorted enzyme names
      def enzymes
        @enzyme_names
      end
      
      # Check if supplied name is the name of an available enzyme
      #
      # ---
      # *Arguments*
      # * +name+: Enzyme name
      # *Returns*:: +true/false+
      def enzyme_name?(name)
        @enzyme_names_downcased.include?(name.downcase)
      end
    
      # Save the current data
      #  rebase.save_yaml( 'enz.yaml' )
      #  rebase.save_yaml( 'enz.yaml', 'ref.yaml' )
      #  rebase.save_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
      #
      # ---
      # *Arguments*
      # * +f_enzyme+: (_required_) Filename to save YAML formatted output of enzyme data
      # * +f_reference+: (_optional_) Filename to save YAML formatted output of reference data
      # * +f_supplier+: (_optional_) Filename to save YAML formatted output of supplier data  
      # *Returns*:: nothing
      def save_yaml( f_enzyme, f_reference=nil, f_supplier=nil )
        File.open(f_enzyme, 'w') { |f| f.puts YAML.dump(@enzyme_data) }
        File.open(f_reference, 'w') { |f| f.puts YAML.dump(@reference_data) } if f_reference
        File.open(f_supplier, 'w') { |f| f.puts YAML.dump(@supplier_data) } if f_supplier
        return
      end
    
      # Read REBASE EMBOSS-formatted files
      #  rebase = Bio::REBASE.read( 'emboss_e' )
      #  rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r' )
      #  rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r', 'emboss_s' )
      #
      # ---
      # *Arguments*
      # * +f_enzyme+: (_required_) Filename to read enzyme data
      # * +f_reference+: (_optional_) Filename to read reference data
      # * +f_supplier+: (_optional_) Filename to read supplier data  
      # *Returns*:: Bio::REBASE object
      def self.read( f_enzyme, f_reference=nil, f_supplier=nil )
        e = IO.readlines(f_enzyme)
        r = f_reference ? IO.readlines(f_reference) : nil
        s = f_supplier ? IO.readlines(f_supplier) : nil
        self.new(e,r,s)
      end
    
      # Read YAML formatted files
      #  rebase = Bio::REBASE.load_yaml( 'enz.yaml' )
      #  rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml' )
      #  rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
      #
      # ---
      # *Arguments*
      # * +f_enzyme+: (_required_) Filename to read YAML-formatted enzyme data
      # * +f_reference+: (_optional_) Filename to read YAML-formatted reference data
      # * +f_supplier+: (_optional_) Filename to read YAML-formatted supplier data  
      # *Returns*:: Bio::REBASE object
      def self.load_yaml( f_enzyme, f_reference=nil, f_supplier=nil )
        e = YAML.load_file(f_enzyme)
        r = f_reference ? YAML.load_file(f_reference) : nil
        s = f_supplier ? YAML.load_file(f_supplier) : nil
        self.new(e,r,s,true)
      end
    
      #########
      protected
      #########
    
      def setup_enzyme_data
        @data = {}
        
        @enzyme_data.each do |name, hash|
          @data[name] = EnzymeEntry.new
          d = @data[name]
          d.pattern                   = hash[:pattern]
          # d.blunt?= is a syntax error
          d[:blunt?] = (hash[:blunt].to_i == 1 ? true : false)
          d.primary_strand_cut1       = hash[:c1].to_i
          d.complementary_strand_cut1 = hash[:c2].to_i
          d.primary_strand_cut2       = hash[:c3].to_i
          d.complementary_strand_cut2 = hash[:c4].to_i
    
          # Set up keys just in case there's no reference data supplied
          [:organism, :isoschizomers, 
          :methylation, :source].each { |k| d[k] = '' }
          d.suppliers = []
          d.references = []
        end
    
        @enzyme_names = @data.keys.sort
        @enzyme_names_downcased = @enzyme_names.map{|a| a.downcase}
        setup_enzyme_and_reference_association
      end
    
      def setup_enzyme_and_reference_association
        return unless @reference_data
        @reference_data.each do |name, hash|
          d = @data[name]
          [:organism, :isoschizomers, 
          :methylation, :source].each { |k| d[k] = hash[k] }
          d.suppliers = hash[:suppliers].split('')
          d.references = []
          hash[:references].each { |k| d.references << raw_to_reference(k) }
        end
      end
    
      # data is a hash indexed by the :name of each entry which is also a hash
      # * data[enzyme_name] has the following keys:
      #   :name, :pattern, :len, :ncuts, :blunt, :c1, :c2, :c3, :c4
      #   :c1 => First 5' cut
      #   :c2 => First 3' cut
      #   :c3 => Second 5' cut
      #   :c4 => Seocnd 3' cut
      def parse_enzymes( lines )
        data = {}
        return data if lines == nil
        lines.each_line do |line|
          next if line[0].chr == '#'
          line.chomp!
          
          a = line.split("\s")
          
          data[ a[0] ] = {
            :name => a[0],
            :pattern => a[1],
            :len => a[2],
            :ncuts => a[3],
            :blunt => a[4],
            :c1 => a[5],
            :c2 => a[6],
            :c3 => a[7],
            :c4 => a[8]
          }
        end  # lines.each
        data
      end
    
      # data is a hash indexed by the :name of each entry which is also a hash
      # * data[enzyme_name] has the following keys:
      #   :organism, :isoschizomers, :references, :source, :methylation, :suppliers, :name, :number_of_references
      def parse_references( lines )
        data = {}
        return data if lines == nil
        index = 1
        h = {}
        references_left = 0
    
        lines.each_line do |line|
          next if line[0].chr == '#'  # Comment
          next if line[0..1] == '//'  # End of entry marker
          line.chomp!
    
          if (1..7).include?( index )
            h[index] = line
            references_left = h[index].to_i if index == 7
            index += 1
            next
          end
    
          if index == 8
            h[index] ||= []
            h[index] << line
            references_left -= 1
          end
    
          if references_left == 0
            data[ h[1] ] = {
              :name => h[1],
              :organism => h[2],
              :isoschizomers => h[3],
              :methylation => h[4],
              :source => h[5],
              :suppliers => h[6],
              :number_of_references => h[7],
              :references => h[8]
            }
            index = 1
            h = {}
          end
        end  # lines.each
        data
      end
    
      # data is a hash indexed by the supplier code
      #   data[supplier_code]
      #   returns the suppliers name
      def parse_suppliers( lines )
        data = {}
        return data if lines == nil
        lines.each_line do |line|
          next if line[0].chr == '#'
          data[$1] = $2 if line =~ %r{(.+?)\s(.+)}
        end
        data
      end
    
      # Takes a string in one of the three formats listed below and returns a
      # Bio::Reference object
      # * Possible input styles:
      #   a = 'Inagaki, K., Hikita, T., Yanagidani, S., Nomura, Y., Kishimoto, N., Tano, T., Tanaka, H., (1993) Biosci. Biotechnol. Biochem., vol. 57, pp. 1716-1721.'
      #   b = 'Nekrasiene, D., Lapcinskaja, S., Kiuduliene, L., Vitkute, J., Janulaitis, A., Unpublished observations.'
      #   c = "Grigaite, R., Maneliene, Z., Janulaitis, A., (2002) Nucleic Acids Res., vol. 30."
      def raw_to_reference( line )
        a = line.split(', ')
    
        if a[-1] == 'Unpublished observations.'
          title = a.pop.chop
          pages = volume = year = journal = ''
        else
          title = ''
    
          pages_or_volume = a.pop.chop
          if pages_or_volume =~ %r{pp\.\s}
            pages = pages_or_volume
            pages.gsub!('pp. ', '')
            volume = a.pop
          else
            pages = ''
            volume = pages_or_volume
          end
    
          volume.gsub!('vol. ', '')
    
          year_and_journal = a.pop
          year_and_journal =~ %r{\((\d+)\)\s(.+)}
          year = $1
          journal = $2
        end
    
        authors = []
    
        last_name = nil
        a.each do |e|
          if last_name
            authors << "#{last_name}, #{e}"
            last_name = nil
          else
            last_name = e
          end
        end
    
        ref = {
          'title' => title,
          'pages' => pages,
          'volume' => volume,
          'year' => year,
          'journal' => journal,
          'authors' => authors,
        }
    
        Bio::Reference.new(ref)
      end
    
    end # REBASE
    end # Bio
    bio-1.4.3.0001/lib/bio/db/pdb/0000755000004100000410000000000012200110570015334 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/db/pdb/model.rb0000644000004100000410000000707112200110570016766 0ustar  www-datawww-data#
    # = bio/db/pdb/model.rb - model class for PDB
    #
    # Copyright:: Copyright (C) 2004, 2006
    #             Alex Gutteridge 
    #             Naohisa Goto 
    # License::   The Ruby License
    #
    #
    # = Bio::PDB::Model
    #
    # Please refer Bio::PDB::Model.
    #
    
    
    module Bio
    
      require 'bio/db/pdb' unless const_defined?(:PDB)
    
      class PDB
    
        # Bio::PDB::Model is a class to store a model.
        #
        # The object would contain some chains (Bio::PDB::Chain objects).
        class Model
          
          include Utils
          include AtomFinder
          include ResidueFinder
          include ChainFinder
    
          include HetatmFinder
          include HeterogenFinder
    
          include Enumerable
          include Comparable
          
          # Creates a new Model object
          def initialize(serial = nil, structure = nil)
            
            @serial = serial
            @structure = structure
            @chains = []
            @chains_hash = {}
            @solvents = Chain.new('', self)
          end
    
          # chains in this model
          attr_reader :chains
    
          # (OBSOLETE) solvents (water, HOH) in this model
          attr_reader :solvents
    
          # serial number of this model. (Integer or nil)
          attr_accessor :serial
    
          # for backward compatibility
          alias model_serial serial
    
          # (reserved for future extension)
          attr_reader :structure
         
          # Adds a chain to this model
          def addChain(chain)
            raise "Expecting a Bio::PDB::Chain" unless chain.is_a? Bio::PDB::Chain
            @chains.push(chain)
            if @chains_hash[chain.chain_id] then
              $stderr.puts "Warning: chain_id #{chain.chain_id.inspect} is already used" if $VERBOSE
            else
              @chains_hash[chain.chain_id] = chain
            end
            self
          end
    
          # rehash chains hash
          def rehash
            begin
              chains_bak = @chains
              chains_hash_bak = @chains_hash
              @chains = []
              @chains_hash = {}
              chains_bak.each do |chain|
                self.addChain(chain)
              end
            rescue RuntimeError
              @chains = chains_bak
              @chains_hash = chains_hash_bak
              raise
            end
            self
          end
          
          # (OBSOLETE) Adds a solvent molecule to this model
          def addSolvent(solvent)
            raise "Expecting a Bio::PDB::Residue" unless solvent.is_a? Bio::PDB::Residue
            @solvents.addResidue(solvent)
          end
    
          # (OBSOLETE) not recommended to use this method
          def removeSolvent
            @solvents = nil
          end
    
          # Iterates over each chain
          def each(&x) #:yields: chain
            @chains.each(&x)
          end
          # Alias to override ChainFinder#each_chain
          alias each_chain each
         
          # Operator aimed to sort models based on serial number
          def <=>(other)
            return @serial <=> other.model_serial
          end
          
          # Keyed access to chains
          def [](key)
            #chain = @chains.find{ |chain| key == chain.id }
            @chains_hash[key]
          end
          
          # stringifies to chains
          def to_s
            string = ""
            if model_serial
              string = "MODEL     #{model_serial}\n" #Should use proper formatting
            end
            @chains.each{ |chain| string << chain.to_s }
            #if solvent
            #  string << @solvent.to_s
            #end
            if model_serial
              string << "ENDMDL\n"
            end
            return string
          end
    
          # returns a string containing human-readable representation
          # of this object.
          def inspect
            "#<#{self.class.to_s} serial=#{serial.inspect} chains.size=#{chains.size}>"
          end
          
        end #class Model
    
      end #class PDB
    
    end #module Bio
    bio-1.4.3.0001/lib/bio/db/pdb/residue.rb0000644000004100000410000001020212200110570017314 0ustar  www-datawww-data#
    # = bio/db/pdb/residue.rb - residue class for PDB
    #
    # Copyright::  Copyright (C) 2004, 2006
    #              Alex Gutteridge 
    #              Naohisa Goto 
    # License::    The Ruby License
    #
    #
    # = Bio::PDB::Residue
    #
    # = Bio::PDB::Heterogen
    #
    
    
    module Bio
    
      require 'bio/db/pdb' unless const_defined?(:PDB)
    
      class PDB
    
        # Bio::PDB::Residue is a class to store a residue.
        # The object would contain some atoms (Bio::PDB::Record::ATOM objects).
        #
        class Residue
          
          include Utils
          include AtomFinder
    
          include Enumerable
          include Comparable
    
          # Creates residue id from an ATOM (or HETATM) object.
          def self.get_residue_id_from_atom(atom)
            "#{atom.resSeq}#{atom.iCode.strip}".strip
          end
    
          # Creates a new Residue object.
          def initialize(resName = nil, resSeq = nil, iCode = nil, 
                         chain = nil)
            
            @resName = resName
            @resSeq  = resSeq
            @iCode   = iCode
            
            @chain   = chain
            @atoms   = []
    
            update_residue_id
          end
    
          # atoms in this residue. (Array)
          attr_reader :atoms
    
          # the chain to which this residue belongs
          attr_accessor :chain
    
          # resName (residue name)
          attr_accessor :resName
    
          # residue id (String or nil).
          # The id is a composite of resSeq and iCode.
          attr_reader   :residue_id
    
          # Now, Residue#id is an alias of residue_id.
          alias id residue_id
    
          #Keyed access to atoms based on atom name e.g. ["CA"]
          def [](key)
            @atoms.find{ |atom| key == atom.name }
          end
    
          # Updates residue id. This is a private method.
          # Need to call this method to make sure id is correctly updated.
          def update_residue_id
            if !@resSeq and !@iCode
              @residue_id = nil
            else
              @residue_id = "#{@resSeq}#{@iCode.to_s.strip}".strip
            end
          end
          private :update_residue_id
    
          # resSeq
          attr_reader :resSeq
    
          # resSeq=()
          def resSeq=(resSeq)
            @resSeq = resSeq.to_i
            update_residue_id
            @resSeq
          end
    
          # iCode
          attr_reader :iCode
    
          # iCode=()
          def iCode=(iCode)
            @iCode = iCode
            update_residue_id
            @iCode
          end
          
          # Adds an atom to this residue
          def addAtom(atom)
            raise "Expecting ATOM or HETATM" unless atom.is_a? Bio::PDB::Record::ATOM
            @atoms.push(atom)
            self
          end
          
          # Iterator over the atoms
          def each
            @atoms.each{ |atom| yield atom }
          end
          # Alias to override AtomFinder#each_atom
          alias each_atom each
          
          # Sorts based on resSeq and iCode if need be
          def <=>(other)
            if @resSeq != other.resSeq
              return @resSeq <=> other.resSeq
            else
              return @iCode <=> other.iCode
            end
          end
          
          # Stringifies each atom
          def to_s
            @atoms.join('')
          end
    
          # returns a string containing human-readable representation
          # of this object.
          def inspect
            "#<#{self.class.to_s} resName=#{resName.inspect} id=#{residue_id.inspect} chain.id=#{(chain ? chain.id : nil).inspect} resSeq=#{resSeq.inspect} iCode=#{iCode.inspect} atoms.size=#{atoms.size}>"
          end
    
          # Always returns false.
          #
          # If the residue is HETATM, returns true.
          # Otherwise, returns false.
          def hetatm
            false
          end
        end #class Residue
    
        # Bio::PDB::Heterogen is a class to store a heterogen.
        # It inherits Bio::PDB::Residue and most of the methods are the same.
        #
        # The object would contain some HETATMs
        # (Bio::PDB::Record::HETATM objects).
        class Heterogen < Residue
    
          include HetatmFinder
    
          # Always returns true.
          #
          # If the residue is HETATM, returns true.
          # Otherwise, returns false.
          def hetatm
            true
          end
    
          # Alias to override HetatmFinder#each_hetatm
          alias each_hetatm each
    
          # Alias needed for HeterogenFinder.
          alias hetatms atoms
    
          # Alias to avoid confusion
          alias heterogen_id residue_id
        end #class Heterogen
    
      end #class PDB
    
    end #module Bio
    bio-1.4.3.0001/lib/bio/db/pdb/utils.rb0000644000004100000410000002416112200110570017025 0ustar  www-datawww-data#
    # = bio/db/pdb/utils.rb - Utility modules for PDB
    #
    # Copyright::  Copyright (C) 2004, 2006
    #              Alex Gutteridge 
    #              Naohisa Goto 
    # License::    The Ruby License
    #
    #
    # = Bio::PDB::Utils
    #
    # Bio::PDB::Utils
    #
    # = Bio::PDB::ModelFinder
    #
    # Bio::PDB::ModelFinder
    #
    # = Bio::PDB::ChainFinder
    #
    # Bio::PDB::ChainFinder
    #
    # = Bio::PDB::ResidueFinder
    #
    # Bio::PDB::ResidueFinder
    #
    # = Bio::PDB::AtomFinder
    #
    # Bio::PDB::AtomFinder
    #
    # = Bio::PDB::HeterogenFinder
    #
    # Bio::PDB::HeterogenFinder
    #
    # = Bio::PDB::HetatmFinder
    #
    # Bio::PDB::HetatmFinder
    #
    
    require 'matrix'
    
    module Bio
    
    require 'bio/db/pdb' unless const_defined?(:PDB)
    
    class PDB
    
      # Utility methods for PDB data.
      # The methods in this mixin should be applicalbe to all PDB objects.
      #
      # Bio::PDB::Utils is included by Bio::PDB, Bio::PDB::Model,
      # Bio::PDB::Chain, Bio::PDB::Residue, and Bio::PDB::Heterogen classes.
      module Utils
        
        # Returns the coordinates of the geometric centre (average co-ord)
        # of any AtomFinder (or .atoms) implementing object
        #
        # If you want to get the geometric centre of hetatms,
        # call geometricCentre(:each_hetatm).
        def geometricCentre(method = :each_atom)
          x = y = z = count = 0
          
          self.__send__(method) do |atom|
            x += atom.x
            y += atom.y
            z += atom.z
            count += 1
          end
          
          x = (x / count)
          y = (y / count)
          z = (z / count)
         
          Coordinate[x,y,z]
        end
    
        #Returns the coords of the centre of gravity for any
        #AtomFinder implementing object
        #Blleurgh! - working out what element it is from the atom name is
        #tricky - this'll work in most cases but not metals etc...
        #a proper element field is included in some PDB files but not all.
        ElementMass = {
          'H' => 1,
          'C' => 12,
          'N' => 14,
          'O' => 16,
          'S' => 32,
          'P' => 31
        }
    
        # calculates centre of gravitiy
        def centreOfGravity()
          x = y = z = total = 0
          
          self.each_atom{ |atom|
            element = atom.element[0,1]
            mass    = ElementMass[element]
            total += mass
            x += atom.x * mass
            y += atom.y * mass
            z += atom.z * mass
          }
          
          x = x / total
          y = y / total
          z = z / total
          
          Coordinate[x,y,z]
        end
    
        #--
        #Perhaps distance and dihedral would be better off as class methods?
        #(rather) than instance methods
        #++
    
        # Calculates distance between _coord1_ and _coord2_.
        def distance(coord1, coord2)
          coord1 = convert_to_xyz(coord1)
          coord2 = convert_to_xyz(coord2)
          (coord1 - coord2).r
        end
        module_function :distance
    
        # Calculates dihedral angle.
        def dihedral_angle(coord1, coord2, coord3, coord4)
          (a1,b1,c1,d) = calculatePlane(coord1,coord2,coord3)
          (a2,b2,c2)   = calculatePlane(coord2,coord3,coord4)
          
          torsion = acos((a1*a2 + b1*b2 + c1*c2)/(Math.sqrt(a1**2 + b1**2 + c1**2) * Math.sqrt(a2**2 + b2**2 + c2**2)))
          
          if ((a1*coord4.x + b1*coord4.y + c1*coord4.z + d) < 0)
            -torsion
          else
            torsion
          end
        end
        module_function :dihedral_angle
          
        # Implicit conversion into Vector or Bio::PDB::Coordinate
        def convert_to_xyz(obj)
          unless obj.is_a?(Vector)
            begin
              obj = obj.xyz
            rescue NameError
              obj = Vector.elements(obj.to_a)
            end
          end
          obj
        end
        module_function :convert_to_xyz
    
        # (Deprecated) alias of convert_to_xyz(obj)
        def self.to_xyz(obj)
          convert_to_xyz(obj)
        end
    
        #--
        #Methods required for the dihedral angle calculations
        #perhaps these should go in some separate Math module
        #++
    
        # radian to degree
        def rad2deg(r)
          (r/Math::PI)*180
        end
        module_function :rad2deg
    
        # acos
        def acos(x)
          Math.atan2(Math.sqrt(1 - x**2),x)
        end
        module_function :acos
    
        # calculates plane
        def calculatePlane(coord1, coord2, coord3)
          a = coord1.y * (coord2.z - coord3.z) +
              coord2.y * (coord3.z - coord1.z) + 
              coord3.y * (coord1.z - coord2.z)
          b = coord1.z * (coord2.x - coord3.x) +
              coord2.z * (coord3.x - coord1.x) + 
              coord3.z * (coord1.x - coord2.x)
          c = coord1.x * (coord2.y - coord3.y) +
              coord2.x * (coord3.y - coord1.y) + 
              coord3.x * (coord1.y - coord2.y)
          d = -1 *
              (
               (coord1.x * (coord2.y * coord3.z - coord3.y * coord2.z)) +
               (coord2.x * (coord3.y * coord1.z - coord1.y * coord3.z)) +
               (coord3.x * (coord1.y * coord2.z - coord2.y * coord1.z))
               )
    
          return [a,b,c,d]
        end
        module_function :calculatePlane
    
        # Every class in the heirarchy implements finder, this takes 
        # a class which determines which type of object to find, the associated
        # block is then run in classic .find style.
        # 
        # The method might be deprecated.
        # You'd better using find_XXX  directly.
        def finder(findtype, &block) #:yields: obj
          if findtype == Bio::PDB::Atom
            return self.find_atom(&block)
          elsif findtype == Bio::PDB::Residue
            return self.find_residue(&block)
          elsif findtype == Bio::PDB::Chain
            return self.find_chain(&block)
          elsif findtype == Bio::PDB::Model
            return self.find_model(&block)
          else
            raise TypeError, "You can't find a #{findtype}"
          end
        end
      end #module Utils
    
      #--
      #The *Finder modules implement a find_* method which returns
      #an array of anything for which the block evals true
      #(suppose Enumerable#find_all method).
      #The each_* style methods act as classic iterators.
      #++
    
      # methods to access models
      #
      # XXX#each_model must be defined.
      #
      # Bio::PDB::ModelFinder is included by Bio::PDB::PDB.
      #
      module ModelFinder
        # returns an array containing all chains for which given block
        # is not +false+ (similar to Enumerable#find_all).
        def find_model
          array = []
          self.each_model do |model|
            array.push(model) if yield(model)
          end
          return array
        end
      end #module ModelFinder
      
      #--
      #The heirarchical nature of the objects allow us to re-use the
      #methods from the previous level - e.g. A PDB object can use the .models
      #method defined in ModuleFinder to iterate through the models to find the
      #chains
      #++
    
      # methods to access chains
      #
      # XXX#each_model must be defined.
      #
      # Bio::PDB::ChainFinder is included by Bio::PDB::PDB and Bio::PDB::Model.
      #
      module ChainFinder
    
        # returns an array containing all chains for which given block
        # is not +false+ (similar to Enumerable#find_all).
        def find_chain
          array = []
          self.each_chain do |chain|
            array.push(chain) if yield(chain)
          end
          return array
        end
    
        # iterates over each chain
        def each_chain(&x) #:yields: chain
          self.each_model { |model| model.each(&x) }
        end
    
        # returns all chains
        def chains
          array = []
          self.each_model { |model| array.concat(model.chains) }
          return array
        end
      end #module ChainFinder
      
      # methods to access residues
      #
      # XXX#each_chain must be defined.
      #
      # Bio::PDB::ResidueFinder is included by Bio::PDB::PDB, Bio::PDB::Model,
      # and Bio::PDB::Chain.
      #
      module ResidueFinder
    
        # returns an array containing all residues for which given block
        # is not +false+ (similar to Enumerable#find_all).
        def find_residue
          array = []
          self.each_residue do |residue|
            array.push(residue) if yield(residue)
          end
          return array
        end
    
        # iterates over each residue
        def each_residue(&x) #:yields: residue
          self.each_chain { |chain| chain.each(&x) }
        end
    
        # returns all residues
        def residues
          array = []
          self.each_chain { |chain| array.concat(chain.residues) }
          return array
        end
      end #module ResidueFinder
      
      # methods to access atoms
      #
      # XXX#each_residue must be defined.
      module AtomFinder
        # returns an array containing all atoms for which given block
        # is not +false+ (similar to Enumerable#find_all).
        def find_atom
          array = []
          self.each_atom do |atom|
            array.push(atom) if yield(atom)
          end
          return array
        end
    
        # iterates over each atom
        def each_atom(&x) #:yields: atom
          self.each_residue { |residue| residue.each(&x) }
        end
    
        # returns all atoms
        def atoms
          array = []
          self.each_residue { |residue| array.concat(residue.atoms) }
          return array
        end
      end #module AtomFinder
    
      # methods to access HETATMs
      #
      # XXX#each_heterogen must be defined.
      #
      # Bio::PDB::HetatmFinder is included by Bio::PDB::PDB, Bio::PDB::Model,
      # Bio::PDB::Chain, and Bio::PDB::Heterogen.
      #
      module HetatmFinder
        # returns an array containing all HETATMs for which given block
        # is not +false+ (similar to Enumerable#find_all).
        def find_hetatm
          array = []
          self.each_hetatm do |hetatm|
            array.push(hetatm) if yield(hetatm)
          end
          return array
        end
    
        # iterates over each HETATM
        def each_hetatm(&x) #:yields: hetatm
          self.each_heterogen { |heterogen| heterogen.each(&x) }
        end
    
        # returns all HETATMs
        def hetatms
          array = []
          self.each_heterogen { |heterogen| array.concat(heterogen.hetatms) }
          return array
        end
      end #module HetatmFinder
    
      # methods to access heterogens (compounds or ligands)
      #
      # XXX#each_chain must be defined.
      #
      # Bio::PDB::HeterogenFinder is included by Bio::PDB::PDB, Bio::PDB::Model,
      # and Bio::PDB::Chain.
      #
      module HeterogenFinder
        # returns an array containing all heterogens for which given block
        # is not +false+ (similar to Enumerable#find_all).
        def find_heterogen
          array = []
          self.each_heterogen do |heterogen|
            array.push(heterogen) if yield(heterogen)
          end
          return array
        end
    
        # iterates over each heterogens
        def each_heterogen(&x) #:yields: heterogen
          self.each_chain { |chain| chain.each_heterogen(&x) }
        end
    
        # returns all heterogens
        def heterogens
          array = []
          self.each_chain { |chain| array.concat(chain.heterogens) }
          return array
        end
      end #module HeterogenFinder
    
    end #class PDB
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/db/pdb/atom.rb0000644000004100000410000000327212200110570016625 0ustar  www-datawww-data#
    # = bio/db/pdb/atom.rb - Coordinate class for PDB
    #
    # Copyright::  Copyright (C) 2004, 2006
    #              Alex Gutteridge 
    #              Naohisa Goto 
    # License::    The Ruby License
    #
    #
    # = Bio::PDB::Coordinate
    #
    # Coordinate class for PDB.
    #
    # = Compatibility Note
    #
    # From bioruby 0.7.0, the Bio::PDB::Atom class is no longer available.
    # Please use Bio::PDB::Record::ATOM and Bio::PDB::Record::HETATM instead.
    #
    
    require 'matrix'
    
    module Bio
    
      require 'bio/db/pdb' unless const_defined?(:PDB)
    
      class PDB
    
        # Bio::PDB::Coordinate is a class to store a 3D coordinate.
        # It inherits Vector (in bundled library in Ruby).
        #
        class Coordinate < Vector
          # same as Vector.[x,y,z]
          def self.[](x,y,z)
            super
          end
    
          # same as Vector.elements
          def self.elements(array, *a)
            raise 'Size of given array must be 3' if array.size != 3
            super
          end
    
          # x
          def x; self[0]; end
          # y
          def y; self[1]; end
          # z
          def z; self[2]; end
          # x=(n)
          def x=(n); self[0]=n; end
          # y=(n)
          def y=(n); self[1]=n; end
          # z=(n)
          def z=(n); self[2]=n; end
    
          # Implicit conversion to an array.
          #
          # Note that this method would be deprecated in the future.
          #
          #--
          # Definition of 'to_ary' means objects of the class is
          # implicitly regarded as an array.
          #++
          def to_ary; self.to_a; end
    
          # returns self.
          def xyz; self; end
    
          # distance between object2.
          def distance(object2)
            Utils::convert_to_xyz(object2)
            (self - object2).r
          end
        end #class Coordinate
    
      end #class PDB
    end #class Bio
    
    bio-1.4.3.0001/lib/bio/db/pdb/pdb.rb0000644000004100000410000017511312200110570016436 0ustar  www-datawww-data#
    # = bio/db/pdb/pdb.rb - PDB database class for PDB file format
    #
    # Copyright:: Copyright (C) 2003-2006
    #             GOTO Naohisa 
    #             Alex Gutteridge 
    # License::   The Ruby License
    #
    #  $Id:$
    #
    # = About Bio::PDB
    #
    # Please refer document of Bio::PDB class.
    #
    # = References
    #
    # * (())
    # * PDB File Format Contents Guide Version 2.2 (20 December 1996)
    #   (())
    #
    # = *** CAUTION ***
    # This is beta version. Specs shall be changed frequently.
    #
    
    require 'bio/data/aa'
    
    module Bio
    
      require 'bio/db/pdb' unless const_defined?(:PDB)
    
      # This is the main PDB class which takes care of parsing, annotations
      # and is the entry way to the co-ordinate data held in models.
      #
      # There are many related classes.
      #
      # Bio::PDB::Model
      # Bio::PDB::Chain
      # Bio::PDB::Residue
      # Bio::PDB::Heterogen
      # Bio::PDB::Record::ATOM
      # Bio::PDB::Record::HETATM
      # Bio::PDB::Record::*
      # Bio::PDB::Coordinate
      # 
      class PDB
    
        include Utils
        include AtomFinder
        include ResidueFinder
        include ChainFinder
        include ModelFinder
    
        include HetatmFinder
        include HeterogenFinder
    
        include Enumerable
    
        # delimiter for reading via Bio::FlatFile
        DELIMITER = RS = nil # 1 file 1 entry
    
        # Modules required by the field definitions
        module DataType
    
          Pdb_Continuation = nil
    
          module Pdb_Integer
            def self.new(str)
              str.to_i
            end
          end
    
          module Pdb_SList
            def self.new(str)
              str.to_s.strip.split(/\;\s*/)
            end
          end
    
          module Pdb_List
            def self.new(str)
              str.to_s.strip.split(/\,\s*/)
            end
          end
    
          module Pdb_Specification_list
            def self.new(str)
              a = str.to_s.strip.split(/\;\s*/)
              a.collect! { |x| x.split(/\:\s*/, 2) }
              a
            end
          end
    
          module Pdb_String
            def self.new(str)
              str.to_s.gsub(/\s+\z/, '')
            end
    
            #Creates a new module with a string left justified to the
            #length given in nn
            def self.[](nn)
              m = Module.new
              m.module_eval %Q{
                @@nn = nn
                def self.new(str)
                  str.to_s.gsub(/\s+\z/, '').ljust(@@nn)[0, @@nn]
                end
              }
              m
            end
          end #module Pdb_String
    
          module Pdb_LString
            def self.[](nn)
              m = Module.new
              m.module_eval %Q{
                @@nn = nn
                def self.new(str)
                  str.to_s.ljust(@@nn)[0, @@nn]
                end
              }
              m
            end
            def self.new(str)
              String.new(str.to_s)
            end
          end
    
          module Pdb_Real
            def self.[](fmt)
              m = Module.new
              m.module_eval %Q{
                @@format = fmt
                def self.new(str)
                  str.to_f
                end
              }
              m
            end
            def self.new(str)
              str.to_f
            end
          end
    
          module Pdb_StringRJ
            def self.new(str)
              str.to_s.gsub(/\A\s+/, '')
            end
          end
    
          Pdb_Date         = Pdb_String
          Pdb_IDcode       = Pdb_String
          Pdb_Residue_name = Pdb_String
          Pdb_SymOP        = Pdb_String
          Pdb_Atom         = Pdb_String
          Pdb_AChar        = Pdb_String
          Pdb_Character    = Pdb_LString
    
          module ConstLikeMethod
            def Pdb_LString(nn)
              Pdb_LString[nn]
            end
    
            def Pdb_String(nn)
              Pdb_String[nn]
            end
    
            def Pdb_Real(fmt)
              Pdb_Real[fmt]
            end
          end #module ConstLikeMethod
        end #module DataType
    
        # The ancestor of every single PDB record class.
        # It inherits Struct class.
        # Basically, each line of a PDB file corresponds to
        # an instance of each corresponding child class.
        # If continuation exists, multiple lines may correspond to
        # single instance.
        #
        class Record < Struct
          include DataType
          extend DataType::ConstLikeMethod
    
          # Internal use only.
          #
          # parse filed definitions.
          def self.parse_field_definitions(ary)
            symbolhash = {}
            symbolary = []
            cont = false
    
            # For each field definition (range(start, end), type,symbol)
            ary.each do |x|
              range = (x[0] - 1)..(x[1] - 1)
              # If type is nil (Pdb_Continuation) then set 'cont' to the range
              # (other wise it is false to indicate no continuation
              unless x[2] then
                cont = range
              else
                klass = x[2]
                sym = x[3]
                # If the symbol is a proper symbol then...
                if sym.is_a?(Symbol) then
                  # ..if we have the symbol already in the symbol hash
                  # then add the range onto the range array
                  if symbolhash.has_key?(sym) then
                    symbolhash[sym][1] << range
                  else
                    # Other wise put a new symbol in with its type and range
                    # range is given its own array. You can have
                    # anumber of ranges.
                    symbolhash[sym] = [ klass, [ range ] ]
                    symbolary << sym
                  end
                end
              end
            end #each
            [ symbolhash, symbolary, cont ]
          end
          private_class_method :parse_field_definitions
    
          # Creates new class by given field definition
          # The difference from new_direct() is the class
          # created by the method does lazy evaluation.
          #
          # Internal use only.
          def self.def_rec(*ary)
            symbolhash, symbolary, cont = parse_field_definitions(ary)
    
            klass = Class.new(self.new(*symbolary))
            klass.module_eval {
              @definition = ary
              @symbols = symbolhash
              @cont = cont
            }
            klass.module_eval {
              symbolary.each do |x|
                define_method(x) { do_parse; super() }
              end
            }
            klass
          end #def self.def_rec
    
          # creates new class which inherits given class.
          def self.new_inherit(klass)
            newklass = Class.new(klass)
            newklass.module_eval {
              @definition = klass.module_eval { @definition }
              @symbols    = klass.module_eval { @symbols }
              @cont       = klass.module_eval { @cont }
            }
            newklass
          end
    
          # Creates new class by given field definition.
          #
          # Internal use only.
          def self.new_direct(*ary)
            symbolhash, symbolary, cont = parse_field_definitions(ary)
            if cont
              raise 'continuation not allowed. please use def_rec instead'
            end
    
            klass = Class.new(self.new(*symbolary))
            klass.module_eval {
              @definition = ary
              @symbols = symbolhash
              @cont = cont
            }
            klass.module_eval {
              define_method(:initialize_from_string) { |str|
                r = super(str)
                do_parse
                r
              }
            }
            klass
          end #def self.new_direct
    
          # symbols
          def self.symbols
            #p self
            @symbols
          end
    
          # Returns true if this record has a field type which allows 
          # continuations.
          def self.continue?
            @cont
          end
    
          # Returns true if this record has a field type which allows 
          # continuations.
          def continue?
            self.class.continue?
          end
    
          # yields the symbol(k), type(x[0]) and array of ranges
          # of each symbol.
          def each_symbol
            self.class.symbols.each do |k, x|
              yield k, x[0], x[1]
            end
          end
    
          # Return original string (except that "\n" are truncated) 
          # for this record (usually just @str, but
          # sometimes add on the continuation data from other lines.
          # Returns an array of string.
          #
          def original_data
            if defined?(@cont_data) then
              [ @str, *@cont_data ]
            else
              [ @str ]
            end
          end
    
          # initialize this record from the given string.
          # str must be a line (in PDB format).
          #
          # You can add continuation lines later using
          # add_continuation method.
          def initialize_from_string(str)
            @str = str
            @record_name = fetch_record_name(str)
            @parsed = false
            self
          end
    
          #--
          # Called when we need to access the data, takes the string
          # and the array of FieldDefs and parses it out.
          #++
    
          # In order to speeding up processing of PDB file format,
          # fields have not been parsed before calling this method.
          #
          # Normally, it is automatically called and you don't explicitly
          # need to call it .
          #
          def do_parse
            return self if @parsed or !@str
            str0 = @str
            each_symbol do |key, klass, ranges|
              #If we only have one range then pull that out
              #and store it in the hash
              if ranges.size <= 1 then
                self[key] = klass.new(str0[ranges.first])
              else
                #Go through each range and add the string to an array
                #set the hash key to point to that array
                ary = []
                ranges.each do |r|
                  ary << klass.new(str0[r]) unless str0[r].to_s.strip.empty?
                end
                self[key] = ary
              end
            end #each_symbol
            #If we have continuations then for each line of extra data...
            if defined?(@cont_data) then
              @cont_data.each do |str|
                #Get the symbol, type and range array 
                each_symbol do |key, klass, ranges|
                  #If there's one range then grab that range
                  if ranges.size <= 1 then
                    r1 = ranges.first
                    unless str[r1].to_s.strip.empty?
                      #and concatenate the new data onto the old
                      v = klass.new(str[r1])
                      self[key].concat(v) if self[key] != v
                    end
                  else
                    #If there's more than one range then add to the array
                    ary = self[key]
                    ranges.each do |r|
                      ary << klass.new(str[r]) unless str[r].to_s.strip.empty?
                    end
                  end
                end
              end
            end
            @parsed = true
            self
          end
    
          # fetches record name
          def fetch_record_name(str)
            str[0..5].strip
          end
          private :fetch_record_name
    
          # fetches record name
          def self.fetch_record_name(str)
            str[0..5].strip
          end
          private_class_method :fetch_record_name
    
          # If given str can be the continuation of the current record,
          # then return the order number of the continuation associated with
          # the Pdb_Continuation field definition.
          # Otherwise, returns -1.
          def fetch_cont(str)
            (c = continue?) ? str[c].to_i : -1
          end
          private :fetch_cont
    
          # Record name of this record, e.g. "HEADER", "ATOM".
          def record_name
            @record_name or self.class.to_s.split(/\:\:/)[-1].to_s.upcase
          end
          # keeping compatibility with old version
          alias record_type record_name
    
          # Internal use only.
          #
          # Adds continuation data to the record from str if str is
          # really the continuation of current record.
          # Returns self (= not nil) if str is the continuation.
          # Otherwaise, returns false.
          #
          def add_continuation(str)
            #Check that this record can continue
            #and that str has the same type and definition
            return false unless self.continue?
            return false unless fetch_record_name(str) == @record_name
            return false unless self.class.get_record_class(str) == self.class
            return false unless fetch_cont(str) >= 2
            #If all this is OK then add onto @cont_data
            unless defined?(@cont_data)
              @cont_data = []
            end
            @cont_data << str
            # Returns self (= not nil) if succeeded.
            self
          end
    
          # creates definition hash from current classes constants
          def self.create_definition_hash
            hash = {}
            constants.each do |x|
              x = x.intern # keep compatibility both Ruby 1.8 and 1.9
              hash[x] = const_get(x) if /\A[A-Z][A-Z0-9]+\z/ =~ x.to_s
            end
            if x = const_get(:Default) then
              hash.default = x
            end
            hash
          end
    
          # same as Struct#inspect.
          #
          # Note that do_parse is automatically called
          # before inspect.
          #
          # (Warning: The do_parse might sweep hidden bugs in PDB classes.)
          def inspect
            do_parse
            super
          end
    
          #--
          #
          # definitions
          # contains all the rules for parsing each field
          # based on format V 2.2, 16-DEC-1996
          #
          # http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html
          # http://www.rcsb.org/pdb/docs/format/pdbguide2.2/Contents_Guide_21.html
          #
          # Details of following data are taken from these documents.
    
          # [ 1..6,  :Record_name, nil ],
    
          # XXXXXX =
          #   new([ start, end, type of data, symbol to access ], ...)
          #
          #++
    
          # HEADER record class
          HEADER = 
            def_rec([ 11, 50, Pdb_String, :classification ], #Pdb_String(40)
                    [ 51, 59, Pdb_Date,   :depDate ],
                    [ 63, 66, Pdb_IDcode, :idCode ]
                    )
    
          # OBSLTE record class
          OBSLTE =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 12, 20, Pdb_Date,   :repDate ],
                    [ 22, 25, Pdb_IDcode, :idCode ],
                    [ 32, 35, Pdb_IDcode, :rIdCode ],
                    [ 37, 40, Pdb_IDcode, :rIdCode ],
                    [ 42, 45, Pdb_IDcode, :rIdCode ],
                    [ 47, 50, Pdb_IDcode, :rIdCode ],
                    [ 52, 55, Pdb_IDcode, :rIdCode ],
                    [ 57, 60, Pdb_IDcode, :rIdCode ],
                    [ 62, 65, Pdb_IDcode, :rIdCode ],
                    [ 67, 70, Pdb_IDcode, :rIdCode ]
                    )
    
          # TITLE record class
          TITLE =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 11, 70, Pdb_String, :title ]
                    )
            
          # CAVEAT record class
          CAVEAT =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 12, 15, Pdb_IDcode, :idcode ],
                    [ 20, 70, Pdb_String, :comment ]
                    )
    
          # COMPND record class
          COMPND =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 11, 70, Pdb_Specification_list, :compound ]
                    )
    
          # SOURCE record class
          SOURCE =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 11, 70, Pdb_Specification_list, :srcName ]
                    )
    
          # KEYWDS record class
          KEYWDS =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 11, 70, Pdb_List, :keywds ]
                    )
    
          # EXPDTA record class
          EXPDTA =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 11, 70, Pdb_SList, :technique ]
                    )
    
          # AUTHOR record class
          AUTHOR =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 11, 70, Pdb_List, :authorList ]
                    )
    
          # REVDAT record class
          REVDAT =
            def_rec([  8, 10, Pdb_Integer,      :modNum  ],
                    [ 11, 12, Pdb_Continuation, nil      ],
                    [ 14, 22, Pdb_Date,         :modDate ],
                    [ 24, 28, Pdb_String,       :modId   ], # Pdb_String(5)
                    [ 32, 32, Pdb_Integer,      :modType ],
                    [ 40, 45, Pdb_LString(6),   :record  ],
                    [ 47, 52, Pdb_LString(6),   :record  ],
                    [ 54, 59, Pdb_LString(6),   :record  ],
                    [ 61, 66, Pdb_LString(6),   :record  ]
                    )
    
          # SPRSDE record class
          SPRSDE =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 12, 20, Pdb_Date,   :sprsdeDate ],
                    [ 22, 25, Pdb_IDcode, :idCode ],
                    [ 32, 35, Pdb_IDcode, :sIdCode ],
                    [ 37, 40, Pdb_IDcode, :sIdCode ],
                    [ 42, 45, Pdb_IDcode, :sIdCode ],
                    [ 47, 50, Pdb_IDcode, :sIdCode ],
                    [ 52, 55, Pdb_IDcode, :sIdCode ],
                    [ 57, 60, Pdb_IDcode, :sIdCode ],
                    [ 62, 65, Pdb_IDcode, :sIdCode ],
                    [ 67, 70, Pdb_IDcode, :sIdCode ]
                    )
    
          # 'JRNL' is defined below
          JRNL = nil
    
          # 'REMARK' is defined below
          REMARK = nil
    
          # DBREF record class
          DBREF =
            def_rec([  8, 11, Pdb_IDcode,    :idCode      ],
                    [ 13, 13, Pdb_Character, :chainID     ],
                    [ 15, 18, Pdb_Integer,   :seqBegin    ],
                    [ 19, 19, Pdb_AChar,     :insertBegin ],
                    [ 21, 24, Pdb_Integer,   :seqEnd      ],
                    [ 25, 25, Pdb_AChar,     :insertEnd   ],
                    [ 27, 32, Pdb_String,    :database    ], #Pdb_LString
                    [ 34, 41, Pdb_String,    :dbAccession ], #Pdb_LString
                    [ 43, 54, Pdb_String,    :dbIdCode    ], #Pdb_LString
                    [ 56, 60, Pdb_Integer,   :dbseqBegin  ],
                    [ 61, 61, Pdb_AChar,     :idbnsBeg    ],
                    [ 63, 67, Pdb_Integer,   :dbseqEnd    ],
                    [ 68, 68, Pdb_AChar,     :dbinsEnd    ]
                    )
            
          # SEQADV record class
          SEQADV =
            def_rec([  8, 11, Pdb_IDcode,       :idCode   ],
                    [ 13, 15, Pdb_Residue_name, :resName  ],
                    [ 17, 17, Pdb_Character,    :chainID  ],
                    [ 19, 22, Pdb_Integer,      :seqNum   ],
                    [ 23, 23, Pdb_AChar,        :iCode    ],
                    [ 25, 28, Pdb_String,       :database ], #Pdb_LString
                    [ 30, 38, Pdb_String,       :dbIdCode ], #Pdb_LString
                    [ 40, 42, Pdb_Residue_name, :dbRes    ],
                    [ 44, 48, Pdb_Integer,      :dbSeq    ],
                    [ 50, 70, Pdb_LString,      :conflict ]
                    )
    
          # SEQRES record class
          SEQRES =
            def_rec(#[  9, 10, Pdb_Integer,      :serNum ],
                    [  9, 10, Pdb_Continuation, nil      ],
                    [ 12, 12, Pdb_Character,    :chainID ],
                    [ 14, 17, Pdb_Integer,      :numRes  ],
                    [ 20, 22, Pdb_Residue_name, :resName ],
                    [ 24, 26, Pdb_Residue_name, :resName ],
                    [ 28, 30, Pdb_Residue_name, :resName ],
                    [ 32, 34, Pdb_Residue_name, :resName ],
                    [ 36, 38, Pdb_Residue_name, :resName ],
                    [ 40, 42, Pdb_Residue_name, :resName ],
                    [ 44, 46, Pdb_Residue_name, :resName ],
                    [ 48, 50, Pdb_Residue_name, :resName ],
                    [ 52, 54, Pdb_Residue_name, :resName ],
                    [ 56, 58, Pdb_Residue_name, :resName ],
                    [ 60, 62, Pdb_Residue_name, :resName ],
                    [ 64, 66, Pdb_Residue_name, :resName ],
                    [ 68, 70, Pdb_Residue_name, :resName ]
                    )
          
          # MODRS record class
          MODRES =
            def_rec([  8, 11, Pdb_IDcode,       :idCode ],
                    [ 13, 15, Pdb_Residue_name, :resName ],
                    [ 17, 17, Pdb_Character,    :chainID ],
                    [ 19, 22, Pdb_Integer,      :seqNum ],
                    [ 23, 23, Pdb_AChar,        :iCode ],
                    [ 25, 27, Pdb_Residue_name, :stdRes ],
                    [ 30, 70, Pdb_String,       :comment ]
                    )
          
          # HET record class
          HET =
            def_rec([  8, 10, Pdb_LString(3), :hetID ],
                    [ 13, 13, Pdb_Character,  :ChainID ],
                    [ 14, 17, Pdb_Integer,    :seqNum ],
                    [ 18, 18, Pdb_AChar,      :iCode ],
                    [ 21, 25, Pdb_Integer,    :numHetAtoms ],
                    [ 31, 70, Pdb_String,     :text ]
                    )
          
          # HETNAM record class
          HETNAM =
            def_rec([ 9, 10,  Pdb_Continuation, nil ],
                    [ 12, 14, Pdb_LString(3),   :hetID ],
                    [ 16, 70, Pdb_String,       :text ]
                    )
            
          # HETSYN record class
          HETSYN =
            def_rec([  9, 10, Pdb_Continuation, nil ],
                    [ 12, 14, Pdb_LString(3),   :hetID ],
                    [ 16, 70, Pdb_SList,        :hetSynonyms ]
                    )
          
          # FORMUL record class
          FORMUL =
            def_rec([  9, 10, Pdb_Integer,    :compNum ],
                    [ 13, 15, Pdb_LString(3), :hetID ],
                    [ 17, 18, Pdb_Integer,    :continuation ],
                    [ 19, 19, Pdb_Character,  :asterisk ],
                    [ 20, 70, Pdb_String,     :text ]
                    )
          
          # HELIX record class
          HELIX =
            def_rec([  8, 10, Pdb_Integer,      :serNum ],
                    #[ 12, 14, Pdb_LString(3),   :helixID ],
                    [ 12, 14, Pdb_StringRJ,     :helixID ],
                    [ 16, 18, Pdb_Residue_name, :initResName ],
                    [ 20, 20, Pdb_Character,    :initChainID ],
                    [ 22, 25, Pdb_Integer,      :initSeqNum ],
                    [ 26, 26, Pdb_AChar,        :initICode ],
                    [ 28, 30, Pdb_Residue_name, :endResName ],
                    [ 32, 32, Pdb_Character,    :endChainID ],
                    [ 34, 37, Pdb_Integer,      :endSeqNum ],
                    [ 38, 38, Pdb_AChar,        :endICode ],
                    [ 39, 40, Pdb_Integer,      :helixClass ],
                    [ 41, 70, Pdb_String,       :comment ],
                    [ 72, 76, Pdb_Integer,      :length ]
                    )
    
          # SHEET record class
          SHEET =
            def_rec([  8, 10, Pdb_Integer,      :strand ],
                    #[ 12, 14, Pdb_LString(3),   :sheetID ],
                    [ 12, 14, Pdb_StringRJ,     :sheetID ],
                    [ 15, 16, Pdb_Integer,      :numStrands ],
                    [ 18, 20, Pdb_Residue_name, :initResName ],
                    [ 22, 22, Pdb_Character,    :initChainID ],
                    [ 23, 26, Pdb_Integer,      :initSeqNum ],
                    [ 27, 27, Pdb_AChar,        :initICode ],
                    [ 29, 31, Pdb_Residue_name, :endResName ],
                    [ 33, 33, Pdb_Character,    :endChainID ],
                    [ 34, 37, Pdb_Integer,      :endSeqNum ],
                    [ 38, 38, Pdb_AChar,        :endICode ],
                    [ 39, 40, Pdb_Integer,      :sense ],
                    [ 42, 45, Pdb_Atom,         :curAtom ],
                    [ 46, 48, Pdb_Residue_name, :curResName ],
                    [ 50, 50, Pdb_Character,    :curChainId ],
                    [ 51, 54, Pdb_Integer,      :curResSeq ],
                    [ 55, 55, Pdb_AChar,        :curICode ],
                    [ 57, 60, Pdb_Atom,         :prevAtom ],
                    [ 61, 63, Pdb_Residue_name, :prevResName ],
                    [ 65, 65, Pdb_Character,    :prevChainId ],
                    [ 66, 69, Pdb_Integer,      :prevResSeq ],
                    [ 70, 70, Pdb_AChar,        :prevICode ]
                    )
          
          # TURN record class
          TURN =
            def_rec([  8, 10, Pdb_Integer,      :seq ],
                    #[ 12, 14, Pdb_LString(3),   :turnId ],
                    [ 12, 14, Pdb_StringRJ,     :turnId ],
                    [ 16, 18, Pdb_Residue_name, :initResName ],
                    [ 20, 20, Pdb_Character,    :initChainId ],
                    [ 21, 24, Pdb_Integer,      :initSeqNum ],
                    [ 25, 25, Pdb_AChar,        :initICode ],
                    [ 27, 29, Pdb_Residue_name, :endResName ],
                    [ 31, 31, Pdb_Character,    :endChainId ],
                    [ 32, 35, Pdb_Integer,      :endSeqNum ],
                    [ 36, 36, Pdb_AChar,        :endICode ],
                    [ 41, 70, Pdb_String,       :comment ]
                    )
            
          # SSBOND record class
          SSBOND =
            def_rec([  8, 10, Pdb_Integer,    :serNum   ],
                    [ 12, 14, Pdb_LString(3), :pep1     ], # "CYS"
                    [ 16, 16, Pdb_Character,  :chainID1 ],
                    [ 18, 21, Pdb_Integer,    :seqNum1  ],
                    [ 22, 22, Pdb_AChar,      :icode1   ],
                    [ 26, 28, Pdb_LString(3), :pep2     ], # "CYS"
                    [ 30, 30, Pdb_Character,  :chainID2 ],
                    [ 32, 35, Pdb_Integer,    :seqNum2  ],
                    [ 36, 36, Pdb_AChar,      :icode2   ],
                    [ 60, 65, Pdb_SymOP,      :sym1     ],
                    [ 67, 72, Pdb_SymOP,      :sym2     ]
                    )
    
          # LINK record class
          LINK =
            def_rec([ 13, 16, Pdb_Atom,         :name1 ],
                    [ 17, 17, Pdb_Character,    :altLoc1 ],
                    [ 18, 20, Pdb_Residue_name, :resName1 ],
                    [ 22, 22, Pdb_Character,    :chainID1 ],
                    [ 23, 26, Pdb_Integer,      :resSeq1 ],
                    [ 27, 27, Pdb_AChar,        :iCode1 ],
                    [ 43, 46, Pdb_Atom,         :name2 ],
                    [ 47, 47, Pdb_Character,    :altLoc2 ],
                    [ 48, 50, Pdb_Residue_name, :resName2 ],
                    [ 52, 52, Pdb_Character,    :chainID2 ],
                    [ 53, 56, Pdb_Integer,      :resSeq2 ],
                    [ 57, 57, Pdb_AChar,        :iCode2 ],
                    [ 60, 65, Pdb_SymOP,        :sym1 ],
                    [ 67, 72, Pdb_SymOP,        :sym2 ]
                    )
            
          # HYDBND record class
          HYDBND =
            def_rec([ 13, 16, Pdb_Atom,         :name1 ],
                    [ 17, 17, Pdb_Character,    :altLoc1 ],
                    [ 18, 20, Pdb_Residue_name, :resName1 ],
                    [ 22, 22, Pdb_Character,    :Chain1 ],
                    [ 23, 27, Pdb_Integer,      :resSeq1 ],
                    [ 28, 28, Pdb_AChar,        :ICode1 ],
                    [ 30, 33, Pdb_Atom,         :nameH ],
                    [ 34, 34, Pdb_Character,    :altLocH ],
                    [ 36, 36, Pdb_Character,    :ChainH ],
                    [ 37, 41, Pdb_Integer,      :resSeqH ],
                    [ 42, 42, Pdb_AChar,        :iCodeH ],
                    [ 44, 47, Pdb_Atom,         :name2 ],
                    [ 48, 48, Pdb_Character,    :altLoc2 ],
                    [ 49, 51, Pdb_Residue_name, :resName2 ],
                    [ 53, 53, Pdb_Character,    :chainID2 ],
                    [ 54, 58, Pdb_Integer,      :resSeq2 ],
                    [ 59, 59, Pdb_AChar,        :iCode2 ],
                    [ 60, 65, Pdb_SymOP,        :sym1 ],
                    [ 67, 72, Pdb_SymOP,        :sym2 ]
                    )
            
          # SLTBRG record class
          SLTBRG =
            def_rec([ 13, 16, Pdb_Atom,          :atom1 ],
                    [ 17, 17, Pdb_Character,     :altLoc1 ],
                    [ 18, 20, Pdb_Residue_name,  :resName1 ],
                    [ 22, 22, Pdb_Character,     :chainID1 ],
                    [ 23, 26, Pdb_Integer,       :resSeq1 ],
                    [ 27, 27, Pdb_AChar,         :iCode1 ],
                    [ 43, 46, Pdb_Atom,          :atom2 ],
                    [ 47, 47, Pdb_Character,     :altLoc2 ],
                    [ 48, 50, Pdb_Residue_name,  :resName2 ],
                    [ 52, 52, Pdb_Character,     :chainID2 ],
                    [ 53, 56, Pdb_Integer,       :resSeq2 ],
                    [ 57, 57, Pdb_AChar,         :iCode2 ],
                    [ 60, 65, Pdb_SymOP,         :sym1 ],
                    [ 67, 72, Pdb_SymOP,         :sym2 ]
                    )
          
          # CISPEP record class
          CISPEP =
            def_rec([  8, 10, Pdb_Integer,     :serNum ],
                    [ 12, 14, Pdb_LString(3),  :pep1 ],
                    [ 16, 16, Pdb_Character,   :chainID1 ],
                    [ 18, 21, Pdb_Integer,     :seqNum1 ],
                    [ 22, 22, Pdb_AChar,       :icode1 ],
                    [ 26, 28, Pdb_LString(3),  :pep2 ],
                    [ 30, 30, Pdb_Character,   :chainID2 ],
                    [ 32, 35, Pdb_Integer,     :seqNum2 ],
                    [ 36, 36, Pdb_AChar,       :icode2 ],
                    [ 44, 46, Pdb_Integer,     :modNum ],
                    [ 54, 59, Pdb_Real('6.2'), :measure ]
                    )
          
          # SITE record class
          SITE =
            def_rec([  8, 10, Pdb_Integer,      :seqNum    ],
                    [ 12, 14, Pdb_LString(3),   :siteID    ],
                    [ 16, 17, Pdb_Integer,      :numRes    ],
                    [ 19, 21, Pdb_Residue_name, :resName1  ],
                    [ 23, 23, Pdb_Character,    :chainID1  ],
                    [ 24, 27, Pdb_Integer,      :seq1      ],
                    [ 28, 28, Pdb_AChar,        :iCode1    ],
                    [ 30, 32, Pdb_Residue_name, :resName2  ],
                    [ 34, 34, Pdb_Character,    :chainID2  ],
                    [ 35, 38, Pdb_Integer,      :seq2      ],
                    [ 39, 39, Pdb_AChar,        :iCode2    ],
                    [ 41, 43, Pdb_Residue_name, :resName3  ],
                    [ 45, 45, Pdb_Character,    :chainID3  ],
                    [ 46, 49, Pdb_Integer,      :seq3      ],
                    [ 50, 50, Pdb_AChar,        :iCode3    ],
                    [ 52, 54, Pdb_Residue_name, :resName4  ],
                    [ 56, 56, Pdb_Character,    :chainID4  ],
                    [ 57, 60, Pdb_Integer,      :seq4      ],
                    [ 61, 61, Pdb_AChar,        :iCode4    ]
                    )
          
          # CRYST1 record class
          CRYST1 =
            def_rec([  7, 15, Pdb_Real('9.3'), :a ],
                    [ 16, 24, Pdb_Real('9.3'), :b ],
                    [ 25, 33, Pdb_Real('9.3'), :c ],
                    [ 34, 40, Pdb_Real('7.2'), :alpha ],
                    [ 41, 47, Pdb_Real('7.2'), :beta ],
                    [ 48, 54, Pdb_Real('7.2'), :gamma ],
                    [ 56, 66, Pdb_LString,     :sGroup ],
                    [ 67, 70, Pdb_Integer,     :z ]
                    )
          
          # ORIGX1 record class
          #
          # ORIGXn n=1, 2, or 3
          ORIGX1 =
            def_rec([ 11, 20, Pdb_Real('10.6'), :On1 ],
                    [ 21, 30, Pdb_Real('10.6'), :On2 ],
                    [ 31, 40, Pdb_Real('10.6'), :On3 ],
                    [ 46, 55, Pdb_Real('10.5'), :Tn ]
                    )
          
          # ORIGX2 record class
          ORIGX2 = new_inherit(ORIGX1)
          # ORIGX3 record class
          ORIGX3 = new_inherit(ORIGX1)
    
          # SCALE1 record class
          #
          # SCALEn n=1, 2, or 3
          SCALE1 =
            def_rec([ 11, 20, Pdb_Real('10.6'), :Sn1 ],
                    [ 21, 30, Pdb_Real('10.6'), :Sn2 ],
                    [ 31, 40, Pdb_Real('10.6'), :Sn3 ],
                    [ 46, 55, Pdb_Real('10.5'), :Un ]
                    )
          
          # SCALE2 record class
          SCALE2 = new_inherit(SCALE1)
          # SCALE3 record class
          SCALE3 = new_inherit(SCALE1)
          
          # MTRIX1 record class
          #
          # MTRIXn n=1,2, or 3
          MTRIX1 =
            def_rec([  8, 10, Pdb_Integer,      :serial ],
                    [ 11, 20, Pdb_Real('10.6'), :Mn1 ],
                    [ 21, 30, Pdb_Real('10.6'), :Mn2 ],
                    [ 31, 40, Pdb_Real('10.6'), :Mn3 ],
                    [ 46, 55, Pdb_Real('10.5'), :Vn ],
                    [ 60, 60, Pdb_Integer,      :iGiven ]
                    )
          
          # MTRIX2 record class
          MTRIX2 = new_inherit(MTRIX1)
          # MTRIX3 record class
          MTRIX3 = new_inherit(MTRIX1)
    
          # TVECT record class
          TVECT =
            def_rec([  8, 10, Pdb_Integer,      :serial ],
                    [ 11, 20, Pdb_Real('10.5'), :t1 ],
                    [ 21, 30, Pdb_Real('10.5'), :t2 ],
                    [ 31, 40, Pdb_Real('10.5'), :t3 ],
                    [ 41, 70, Pdb_String,       :text ]
                    )
    
          # MODEL record class
          MODEL =
            def_rec([ 11, 14, Pdb_Integer, :serial ]
                    )
            # ChangeLog: model_serial are changed to serial
          
          # ATOM record class
          ATOM =
            new_direct([  7, 11, Pdb_Integer,      :serial ],
                       [ 13, 16, Pdb_Atom,         :name ],
                       [ 17, 17, Pdb_Character,    :altLoc ],
                       [ 18, 20, Pdb_Residue_name, :resName ],
                       [ 22, 22, Pdb_Character,    :chainID ],
                       [ 23, 26, Pdb_Integer,      :resSeq ],
                       [ 27, 27, Pdb_AChar,        :iCode ],
                       [ 31, 38, Pdb_Real('8.3'),  :x ],
                       [ 39, 46, Pdb_Real('8.3'),  :y ],
                       [ 47, 54, Pdb_Real('8.3'),  :z ],
                       [ 55, 60, Pdb_Real('6.2'),  :occupancy ],
                       [ 61, 66, Pdb_Real('6.2'),  :tempFactor ],
                       [ 73, 76, Pdb_LString(4),   :segID ],
                       [ 77, 78, Pdb_LString(2),   :element ],
                       [ 79, 80, Pdb_LString(2),   :charge ]
                       )
    
          # ATOM record class
          class ATOM
    
            include Utils
            include Comparable
    
            # for backward compatibility
            alias occ  occupancy
            # for backward compatibility
            alias bfac tempFactor
    
            # residue the atom belongs to.
            attr_accessor :residue
    
            # SIGATM record
            attr_accessor :sigatm
    
            # ANISOU record
            attr_accessor :anisou
    
            # TER record
            attr_accessor :ter
    
            #Returns a Coordinate class instance of the xyz positions
            def xyz
              Coordinate[ x, y, z ]
            end
    
            #Returns an array of the xyz positions
            def to_a
              [ x, y, z ]
            end
          
            #Sorts based on serial numbers
            def <=>(other)
              return serial <=> other.serial
            end
    
            def do_parse
              return self if @parsed or !@str
              self.serial     = @str[6..10].to_i
              self.name       = @str[12..15].strip
              self.altLoc     = @str[16..16]
              self.resName    = @str[17..19].strip
              self.chainID    = @str[21..21]
              self.resSeq     = @str[22..25].to_i
              self.iCode      = @str[26..26].strip
              self.x          = @str[30..37].to_f
              self.y          = @str[38..45].to_f
              self.z          = @str[46..53].to_f
              self.occupancy  = @str[54..59].to_f
              self.tempFactor = @str[60..65].to_f
              self.segID      = @str[72..75].to_s.rstrip
              self.element    = @str[76..77].to_s.lstrip
              self.charge     = @str[78..79].to_s.strip
              @parsed = true
              self
            end
    
            def justify_atomname
              atomname = self.name.to_s
              return atomname[0, 4] if atomname.length >= 4
              case atomname.length
              when 0
                return '    '
              when 1
                return ' ' + atomname + '  '
              when 2
                if /\A[0-9]/ =~ atomname then
                  return sprintf('%-4s', atomname)
                elsif /[0-9]\z/ =~ atomname then
                  return sprintf(' %-3s', atomname)
                end
              when 3
                if /\A[0-9]/ =~ atomname then
                  return sprintf('%-4s', atomname)
                end
              end
              # ambiguous case for two- or three-letter name
              elem = self.element.to_s.strip
              if elem.size > 0 and i = atomname.index(elem) then
                if i == 0 and elem.size == 1 then
                  return sprintf(' %-3s', atomname)
                else
                  return sprintf('%-4s', atomname)
                end
              end
              if self.kind_of?(HETATM) then
                if /\A(B[^AEHIKR]|C[^ADEFLMORSU]|F[^EMR]|H[^EFGOS]|I[^NR]|K[^R]|N[^ABDEIOP]|O[^S]|P[^ABDMORTU]|S[^BCEGIMNR]|V|W|Y[^B])/ =~
                    atomname then
                  return sprintf(' %-3s', atomname)
                else
                  return sprintf('%-4s', atomname)
                end
              else # ATOM
                if /\A[CHONSP]/ =~ atomname then
                  return sprintf(' %-3s', atomname)
                else
                  return sprintf('%-4s', atomname)
                end
              end
              # could not be reached here
              raise 'bug!'
            end
            private :justify_atomname
    
            def to_s
              atomname = justify_atomname
              sprintf("%-6s%5d %-4s%-1s%3s %-1s%4d%-1s   %8.3f%8.3f%8.3f%6.2f%6.2f      %-4s%2s%-2s\n",
                      self.record_name,
                      self.serial, 
                      atomname,
                      self.altLoc,
                      self.resName,
                      self.chainID,
                      self.resSeq,
                      self.iCode,
                      self.x, self.y, self.z,
                      self.occupancy,
                      self.tempFactor,
                      self.segID,
                      self.element,
                      self.charge)
            end
          end #class ATOM
    
          # SIGATM record class
          SIGATM =
            def_rec([  7, 11, Pdb_Integer,      :serial ],
                    [ 13, 16, Pdb_Atom,         :name ],
                    [ 17, 17, Pdb_Character,    :altLoc ],
                    [ 18, 20, Pdb_Residue_name, :resName ],
                    [ 22, 22, Pdb_Character,    :chainID ],
                    [ 23, 26, Pdb_Integer,      :resSeq ],
                    [ 27, 27, Pdb_AChar,        :iCode ],
                    [ 31, 38, Pdb_Real('8.3'),  :sigX ],
                    [ 39, 46, Pdb_Real('8.3'),  :sigY ],
                    [ 47, 54, Pdb_Real('8.3'),  :sigZ ],
                    [ 55, 60, Pdb_Real('6.2'),  :sigOcc ],
                    [ 61, 66, Pdb_Real('6.2'),  :sigTemp ],
                    [ 73, 76, Pdb_LString(4),   :segID ],
                    [ 77, 78, Pdb_LString(2),   :element ],
                    [ 79, 80, Pdb_LString(2),   :charge ]
                    )
    
          # ANISOU record class
          ANISOU =
            def_rec([  7, 11, Pdb_Integer,      :serial ],
                    [ 13, 16, Pdb_Atom,         :name ],
                    [ 17, 17, Pdb_Character,    :altLoc ],
                    [ 18, 20, Pdb_Residue_name, :resName ],
                    [ 22, 22, Pdb_Character,    :chainID ],
                    [ 23, 26, Pdb_Integer,      :resSeq ],
                    [ 27, 27, Pdb_AChar,        :iCode ],
                    [ 29, 35, Pdb_Integer,      :U11 ],
                    [ 36, 42, Pdb_Integer,      :U22 ],
                    [ 43, 49, Pdb_Integer,      :U33 ],
                    [ 50, 56, Pdb_Integer,      :U12 ],
                    [ 57, 63, Pdb_Integer,      :U13 ],
                    [ 64, 70, Pdb_Integer,      :U23 ],
                    [ 73, 76, Pdb_LString(4),   :segID ],
                    [ 77, 78, Pdb_LString(2),   :element ],
                    [ 79, 80, Pdb_LString(2),   :charge ]
                    )
    
          # ANISOU record class
          class ANISOU
            # SIGUIJ record
            attr_accessor :siguij
          end #class ANISOU
    
          # SIGUIJ record class
          SIGUIJ =
            def_rec([  7, 11, Pdb_Integer,      :serial ],
                    [ 13, 16, Pdb_Atom,         :name ],
                    [ 17, 17, Pdb_Character,    :altLoc ],
                    [ 18, 20, Pdb_Residue_name, :resName ],
                    [ 22, 22, Pdb_Character,    :chainID ],
                    [ 23, 26, Pdb_Integer,      :resSeq ],
                    [ 27, 27, Pdb_AChar,        :iCode ],
                    [ 29, 35, Pdb_Integer,      :SigmaU11 ],
                    [ 36, 42, Pdb_Integer,      :SigmaU22 ],
                    [ 43, 49, Pdb_Integer,      :SigmaU33 ],
                    [ 50, 56, Pdb_Integer,      :SigmaU12 ],
                    [ 57, 63, Pdb_Integer,      :SigmaU13 ],
                    [ 64, 70, Pdb_Integer,      :SigmaU23 ],
                    [ 73, 76, Pdb_LString(4),   :segID ],
                    [ 77, 78, Pdb_LString(2),   :element ],
                    [ 79, 80, Pdb_LString(2),   :charge ]
                    )
    
          # TER record class
          TER =
            def_rec([  7, 11, Pdb_Integer,      :serial ],
                    [ 18, 20, Pdb_Residue_name, :resName ],
                    [ 22, 22, Pdb_Character,    :chainID ],
                    [ 23, 26, Pdb_Integer,      :resSeq ],
                    [ 27, 27, Pdb_AChar,        :iCode ]
                    )
          
          #HETATM =
          #  new_direct([  7, 11, Pdb_Integer,      :serial ],
          #             [ 13, 16, Pdb_Atom,         :name ],
          #             [ 17, 17, Pdb_Character,    :altLoc ],
          #             [ 18, 20, Pdb_Residue_name, :resName ],
          #             [ 22, 22, Pdb_Character,    :chainID ],
          #             [ 23, 26, Pdb_Integer,      :resSeq ],
          #             [ 27, 27, Pdb_AChar,        :iCode ],
          #             [ 31, 38, Pdb_Real('8.3'),  :x ],
          #             [ 39, 46, Pdb_Real('8.3'),  :y ],
          #             [ 47, 54, Pdb_Real('8.3'),  :z ],
          #             [ 55, 60, Pdb_Real('6.2'),  :occupancy ],
          #             [ 61, 66, Pdb_Real('6.2'),  :tempFactor ],
          #             [ 73, 76, Pdb_LString(4),   :segID ],
          #             [ 77, 78, Pdb_LString(2),   :element ],
          #             [ 79, 80, Pdb_LString(2),   :charge ]
          #             )
    
          # HETATM record class
          HETATM = new_inherit(ATOM)
    
          # HETATM record class.
          # It inherits ATOM class.
          class HETATM; end
    
          # ENDMDL record class
          ENDMDL =
            def_rec([  2,  1, Pdb_Integer, :serial ] # dummy field (always 0)
                    )
    
          # CONECT record class
          CONECT =
            def_rec([  7, 11, Pdb_Integer, :serial ],
                    [ 12, 16, Pdb_Integer, :serial ],
                    [ 17, 21, Pdb_Integer, :serial ],
                    [ 22, 26, Pdb_Integer, :serial ],
                    [ 27, 31, Pdb_Integer, :serial ],
                    [ 32, 36, Pdb_Integer, :serial ],
                    [ 37, 41, Pdb_Integer, :serial ],
                    [ 42, 46, Pdb_Integer, :serial ],
                    [ 47, 51, Pdb_Integer, :serial ],
                    [ 52, 56, Pdb_Integer, :serial ],
                    [ 57, 61, Pdb_Integer, :serial ]
                    )
    
          # MASTER record class
          MASTER =
            def_rec([ 11, 15, Pdb_Integer, :numRemark ],
                    [ 16, 20, Pdb_Integer, "0" ],
                    [ 21, 25, Pdb_Integer, :numHet ],
                    [ 26, 30, Pdb_Integer, :numHelix ],
                    [ 31, 35, Pdb_Integer, :numSheet ],
                    [ 36, 40, Pdb_Integer, :numTurn ],
                    [ 41, 45, Pdb_Integer, :numSite ],
                    [ 46, 50, Pdb_Integer, :numXform ],
                    [ 51, 55, Pdb_Integer, :numCoord ],
                    [ 56, 60, Pdb_Integer, :numTer ],
                    [ 61, 65, Pdb_Integer, :numConect ],
                    [ 66, 70, Pdb_Integer, :numSeq ]
                    )
    
          # JRNL record classes
          class Jrnl < self
            # subrecord of JRNL
            # 13, 16
            # JRNL AUTH record class
            AUTH =
              def_rec([ 13, 16, Pdb_String,       :sub_record ], # "AUTH"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_List,         :authorList ]
                      )
    
            # JRNL TITL record class
            TITL =
              def_rec([ 13, 16, Pdb_String,       :sub_record ], # "TITL"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_LString,      :title ]
                      )
    
            # JRNL EDIT record class
            EDIT =
              def_rec([ 13, 16, Pdb_String,       :sub_record ], # "EDIT"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_List,         :editorList ]
                      )
    
            # JRNL REF record class
            REF =
              def_rec([ 13, 16, Pdb_String,       :sub_record ], # "REF"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 47, Pdb_LString,      :pubName ],
                      [ 50, 51, Pdb_LString(2),   "V." ],
                      [ 52, 55, Pdb_String,       :volume ],
                      [ 57, 61, Pdb_String,       :page ],
                      [ 63, 66, Pdb_Integer,      :year ]
                      )
    
            # JRNL PUBL record class
            PUBL =
              def_rec([ 13, 16, Pdb_String,       :sub_record ], # "PUBL"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_LString,      :pub ]
                      )
    
            # JRNL REFN record class
            REFN =
              def_rec([ 13, 16, Pdb_String,     :sub_record ], # "REFN"
                      [ 20, 23, Pdb_LString(4), "ASTM" ], 
                      [ 25, 30, Pdb_LString(6), :astm ],
                      [ 33, 34, Pdb_LString(2), :country ],
                      [ 36, 39, Pdb_LString(4), :BorS ], # "ISBN" or "ISSN"
                      [ 41, 65, Pdb_LString,    :isbn ],
                      [ 67, 70, Pdb_LString(4), :coden ] # "0353" for unpublished
                      )
    
            # default or unknown record
            #
            Default =
              def_rec([ 13, 16, Pdb_String, :sub_record ]) # ""
            
            # definitions (hash)
            Definition = create_definition_hash
          end #class JRNL
    
          # REMARK record classes for REMARK 1
          class Remark1 < self
            # 13, 16
            # REMARK 1 REFERENCE record class
            EFER =
              def_rec([  8, 10, Pdb_Integer,    :remarkNum ],  # "1"
                      [ 12, 20, Pdb_String,     :sub_record ], # "REFERENCE"
                      [ 22, 70, Pdb_Integer,    :refNum ]
                      )
            
            # REMARK 1 AUTH record class
            AUTH =
              def_rec([  8, 10, Pdb_Integer,      :remarkNum ],  # "1"
                      [ 13, 16, Pdb_String,       :sub_record ], # "AUTH"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_List,         :authorList ]
                      )
            
            # REMARK 1 TITL record class
            TITL =
              def_rec([  8, 10, Pdb_Integer,      :remarkNum ],  # "1"
                      [ 13, 16, Pdb_String,       :sub_record ], # "TITL"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_LString,      :title ]
                      )
            
            # REMARK 1 EDIT record class
            EDIT =
              def_rec([  8, 10, Pdb_Integer,      :remarkNum ],  # "1"
                      [ 13, 16, Pdb_String,       :sub_record ], # "EDIT"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_LString,      :editorList ]
                      )
            
            # REMARK 1 REF record class
            REF =
              def_rec([  8, 10, Pdb_Integer,      :remarkNum ],  # "1"
                      [ 13, 16, Pdb_LString(3),   :sub_record ], # "REF"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 47, Pdb_LString,      :pubName ],
                      [ 50, 51, Pdb_LString(2),   "V." ],
                      [ 52, 55, Pdb_String,       :volume ],
                      [ 57, 61, Pdb_String,       :page ],
                      [ 63, 66, Pdb_Integer,      :year ]
                      )
            
            # REMARK 1 PUBL record class
            PUBL =
              def_rec([  8, 10, Pdb_Integer,      :remarkNum ],  # "1"
                      [ 13, 16, Pdb_String,       :sub_record ], # "PUBL"
                      [ 17, 18, Pdb_Continuation, nil ],
                      [ 20, 70, Pdb_LString,      :pub ]
                      )
            
            # REMARK 1 REFN record class
            REFN =
              def_rec([  8, 10, Pdb_Integer,    :remarkNum ],  # "1"
                      [ 13, 16, Pdb_String,     :sub_record ], # "REFN"
                      [ 20, 23, Pdb_LString(4), "ASTM" ],
                      [ 25, 30, Pdb_LString,    :astm ],
                      [ 33, 34, Pdb_LString,    :country ],
                      [ 36, 39, Pdb_LString(4), :BorS ],
                      [ 41, 65, Pdb_LString,    :isbn ],
                      [ 68, 70, Pdb_LString(4), :coden ]
                      )
            
            # default (or unknown) record class for REMARK 1
            Default =
              def_rec([  8, 10, Pdb_Integer,    :remarkNum ],  # "1"
                      [ 13, 16, Pdb_String,     :sub_record ]  # ""
                      )
    
            # definitions (hash)
            Definition = create_definition_hash
          end #class Remark1
    
          # REMARK record classes for REMARK 2
          class Remark2 < self
            # 29, 38 == 'ANGSTROMS.'
            ANGSTROMS = 
              def_rec([  8, 10, Pdb_Integer,     :remarkNum ], # "2"
                      [ 12, 22, Pdb_LString(11), :sub_record ], # "RESOLUTION."
                      [ 23, 27, Pdb_Real('5.2'), :resolution ],
                      [ 29, 38, Pdb_LString(10), "ANGSTROMS." ]
                      )
            
            # 23, 38 == ' NOT APPLICABLE.'
            NOT_APPLICABLE = 
              def_rec([  8, 10, Pdb_Integer,     :remarkNum ], # "2"
                      [ 12, 22, Pdb_LString(11), :sub_record ], # "RESOLUTION."
                      [ 23, 38, Pdb_LString(16), :resolution ], # " NOT APPLICABLE."
                      [ 41, 70, Pdb_String,      :comment ]
                      )
            
            # others
            Default = 
              def_rec([  8, 10, Pdb_Integer,     :remarkNum ], # "2"
                      [ 12, 22, Pdb_LString(11), :sub_record ], # "RESOLUTION."
                      [ 24, 70, Pdb_String,      :comment ]
                      )
          end #class Remark2
          
          # REMARK record class for REMARK n (n>=3)
          RemarkN =
            def_rec([  8, 10, Pdb_Integer, :remarkNum ],
                    [ 12, 70, Pdb_LString, :text ]
                    )
    
          # default (or unknown) record class
          Default = def_rec([ 8, 70, Pdb_LString, :text ])
    
          # definitions (hash)
          Definition = create_definition_hash
    
          # END record class.
          #
          # Because END is a reserved word of Ruby, it is separately
          # added to the hash
          End = 
            def_rec([  2,  1, Pdb_Integer, :serial ]) # dummy field (always 0)
    
          Definition['END'.intern] = End
    
          # Basically just look up the class in Definition hash
          # do some munging for JRNL and REMARK
          def self.get_record_class(str)
            t = fetch_record_name(str)
            t = t.intern unless t.empty?
            if d = Definition[t] then
              return d
            end
            case t
            when :JRNL
              ts = str[12..15].to_s.strip
              ts = ts.intern unless ts.empty?
              d = Jrnl::Definition[ts]
            when :REMARK
              case str[7..9].to_i
              when 1
                ts = str[12..15].to_s.strip
                ts = ts.intern unless ts.empty?
                d = Remark1::Definition[ts]
              when 2
                if str[28..37] == 'ANGSTROMS.' then
                  d = Remark2::ANGSTROMS
                elsif str[22..37] == ' NOT APPLICABLE.' then
                  d = Remark2::NOT_APPLICABLE
                else
                  d = Remark2::Default
                end
              else
                d = RemarkN
              end
            else
              # unknown field
              d = Default
            end
            return d
          end
        end #class Record
    
        Coordinate_fileds = {
          'MODEL'  => true,
          :MODEL   => true,
          'ENDMDL' => true,
          :ENDMDL  => true,
          'ATOM'   => true,
          :ATOM    => true,
          'HETATM' => true,
          :HETATM  => true,
          'SIGATM' => true,
          :SIGATM  => true,
          'SIGUIJ' => true,
          :SIGUIJ  => true,
          'ANISOU' => true,
          :ANISOU  => true,
          'TER'    => true,
          :TER     => true,
        }
    
        # Creates a new Bio::PDB object from given str.
        def initialize(str)
          #Aha! Our entry into the world of PDB parsing, we initialise a PDB
          #object with the whole PDB file as a string
          #each PDB has an array of the lines of the original file
          #a bit memory-tastic! A hash of records and an array of models
          #also has an id
    
          @data = str.split(/[\r\n]+/)
          @hash = {}
          @models = []
          @id = nil
    
          #Flag to say whether the current line is part of a continuation
          cont = false
    
          #Empty current model
          cModel    = Model.new
          cChain    = nil #Chain.new
          cResidue  = nil #Residue.new
          cLigand   = nil #Heterogen.new
          c_atom    = nil
    
          #Goes through each line and replace that line with a PDB::Record
          @data.collect! do |line|
            #Go to next if the previous line was contiunation able, and
            #add_continuation returns true. Line is added by add_continuation
            next if cont and cont = cont.add_continuation(line)
    
            #Make the new record
            f = Record.get_record_class(line).new.initialize_from_string(line)
            #p f
            #Set cont
            cont = f if f.continue?
            #Set the hash to point to this record either by adding to an
            #array, or on it's own
            key = f.record_name
            if a = @hash[key] then
              a << f
            else
              @hash[key] = [ f ]
            end
    
            # Do something for ATOM and HETATM
            if key == 'ATOM' or key == 'HETATM' then
              if cChain and f.chainID == cChain.id
                chain = cChain
              else
                if chain = cModel[f.chainID]
                  cChain = chain unless cChain
                else
                  # If we don't have chain, add a new chain
                  newChain = Chain.new(f.chainID, cModel)
                  cModel.addChain(newChain)
                  cChain = newChain
                  chain = newChain
                end
                # chain might be changed, clearing cResidue and cLigand
                cResidue = nil
                cLigand = nil
              end
            end
    
            case key
            when 'ATOM'
              c_atom = f
              residueID = Residue.get_residue_id_from_atom(f)
    
              if cResidue and residueID == cResidue.id
                residue = cResidue
              else
                if residue = chain.get_residue_by_id(residueID)
                  cResidue = residue unless cResidue
                else
                  # add a new residue
                  newResidue = Residue.new(f.resName, f.resSeq, f.iCode, chain)
                  chain.addResidue(newResidue)
                  cResidue = newResidue
                  residue = newResidue
                end
              end
              
              f.residue = residue
              residue.addAtom(f)
    
            when 'HETATM'
              c_atom = f
              residueID = Heterogen.get_residue_id_from_atom(f)
    
              if cLigand and residueID == cLigand.id
                ligand = cLigand
              else
                if ligand = chain.get_heterogen_by_id(residueID)
                  cLigand = ligand unless cLigand
                else
                  # add a new heterogen
                  newLigand = Heterogen.new(f.resName, f.resSeq, f.iCode, chain)
                  chain.addLigand(newLigand)
                  cLigand = newLigand
                  ligand = newLigand
                  #Each model has a special solvent chain. (for compatibility)
                  if f.resName == 'HOH'
                    cModel.addSolvent(newLigand)
                  end
                end
              end
    
              f.residue = ligand
              ligand.addAtom(f)
    
            when 'MODEL'
              c_atom = nil
              cChain = nil
              cResidue = nil
              cLigand = nil
              if cModel.model_serial or cModel.chains.size > 0 then
                self.addModel(cModel)
              end
              cModel = Model.new(f.serial)
    
            when 'TER'
              if c_atom
                c_atom.ter = f
              else
                #$stderr.puts "Warning: stray TER?"
              end
            when 'SIGATM'
              if c_atom
                #$stderr.puts "Warning: duplicated SIGATM?" if c_atom.sigatm
                c_atom.sigatm = f
              else
                #$stderr.puts "Warning: stray SIGATM?"
              end
            when 'ANISOU'
              if c_atom
                #$stderr.puts "Warning: duplicated ANISOU?" if c_atom.anisou
                c_atom.anisou = f
              else
                #$stderr.puts "Warning: stray ANISOU?"
              end
            when 'SIGUIJ'
              if c_atom and c_atom.anisou
                #$stderr.puts "Warning: duplicated SIGUIJ?" if c_atom.anisou.siguij
                c_atom.anisou.siguij = f
              else
                #$stderr.puts "Warning: stray SIGUIJ?"
              end
    
            else
              c_atom = nil
    
            end
            f
          end #each
          #At the end we need to add the final model
          self.addModel(cModel)
          @data.compact!
        end #def initialize
    
        # all records in this entry as an array.
        attr_reader :data
    
        # all records in this entry as an hash accessed by record names.
        attr_reader :hash
    
        # models in this entry (array).
        attr_reader :models
    
        # Adds a Bio::Model object to the current strucutre.
        # Adds a model to the current structure.
        # Returns self.
        def addModel(model)
          raise "Expecting a Bio::PDB::Model" if not model.is_a? Bio::PDB::Model
          @models.push(model)
          self
        end
        
        # Iterates over each model.
        # Iterates over each of the models in the structure.
        # Returns self.
        def each
          @models.each{ |model| yield model }
          self
        end
        # Alias needed for Bio::PDB::ModelFinder
        alias each_model each
        
        # Provides keyed access to the models based on serial number
        # returns nil if it's not there 
        def [](key)
          @models.find{ |model| key == model.model_serial }
        end
        #--
        # (should it raise an exception?)
        #++
    
        #--
        #Stringifies to a list of atom records - we could add the annotation
        #as well if needed
        #++
    
        # Returns a string of Bio::PDB::Models. This propogates down the heirarchy
        # till you get to Bio::PDB::Record::ATOM which are outputed in PDB format
        def to_s
          string = ""
          @models.each{ |model| string << model.to_s }
          string << "END\n"
          return string
        end
        
        #Makes a hash out of an array of PDB::Records and some kind of symbol
        #.__send__ invokes the method specified by the symbol.
        #Essentially it ends up with a hash with keys given in the sub_record
        #Not sure I fully understand this
        def make_hash(ary, meth)
          h = {}
          ary.each do |f|
            k = f.__send__(meth)
            h[k] = [] unless h.has_key?(k)
            h[k] << f
          end
          h
        end
        private :make_hash
    
        #Takes an array and returns another array of PDB::Records
        def make_grouping(ary, meth)
          a = []
          k_prev = nil
          ary.each do |f|
            k = f.__send__(meth)
            if k_prev and k_prev == k then
              a.last << f
            else
              a << []
              a.last << f
            end
            k_prev = k
          end
          a
        end
        private :make_grouping
    
        # Gets all records whose record type is _name_.
        # Returns an array of Bio::PDB::Record::* objects.
        #
        # if _name_ is nil, returns hash storing all record data.
        #
        # Example:
        # p pdb.record('HETATM')
        # p pdb.record['HETATM']
        #
        def record(name = nil)
          name ? (@hash[name] || []) : @hash
        end
    
        #--
        # PDB original methods
        #Returns a hash of the REMARK records based on the remarkNum
        #++
    
        # Gets REMARK records.
        # If no arguments, it returns all REMARK records as a hash.
        # If remark number is specified, returns only corresponding REMARK records.
        # If number == 1 or 2 ("REMARK   1" or "REMARK   2"), returns an array
        # of Bio::PDB::Record instances. Otherwise, returns an array of strings.
        #
        def remark(nn = nil)
          unless defined?(@remark)
            h = make_hash(self.record('REMARK'), :remarkNum)
            h.each do |i, a|
                a.shift # remove first record (= space only)
              if i != 1 and i != 2 then
                a.collect! { |f| f.text.gsub(/\s+\z/, '') }
              end
            end
            @remark = h
          end
          nn ? @remark[nn] : @remark
        end
    
        # Gets JRNL records.
        # If no arguments, it returns all JRNL records as a hash.
        # If sub record name is specified, it returns only corresponding records
        # as an array of Bio::PDB::Record instances.
        #
        def jrnl(sub_record = nil)
          unless defined?(@jrnl)
            @jrnl = make_hash(self.record('JRNL'), :sub_record)
          end
          sub_record ? @jrnl[sub_record] : @jrnl
        end
    
        #--
        #Finding methods - just grabs the record with the appropriate id
        #or returns and array of all of them
        #++
    
        # Gets HELIX records.
        # If no arguments are given, it returns all HELIX records.
        # (Returns an array of Bio::PDB::Record::HELIX instances.)
        # If helixID is given, it only returns records
        # corresponding to given helixID.
        # (Returns an Bio::PDB::Record::HELIX instance.)
        #
        def helix(helixID = nil)
          if helixID then
            self.record('HELIX').find { |f| f.helixID == helixID }
          else
            self.record('HELIX')
          end
        end
    
        # Gets TURN records.
        # If no arguments are given, it returns all TURN records. 
        # (Returns an array of Bio::PDB::Record::TURN instances.)
        # If turnId is given, it only returns a record
        # corresponding to given turnId.
        # (Returns an Bio::PDB::Record::TURN instance.)
        #
        def turn(turnId = nil)
          if turnId then
            self.record('TURN').find { |f| f.turnId == turnId }
          else
            self.record('TURN')
          end
        end
    
        # Gets SHEET records.
        # If no arguments are given, it returns all SHEET records
        # as an array of arrays of Bio::PDB::Record::SHEET instances.
        # If sheetID is given, it returns an array of
        # Bio::PDB::Record::SHEET instances.
        def sheet(sheetID = nil)
          unless defined?(@sheet)
            @sheet = make_grouping(self.record('SHEET'), :sheetID)
          end
          if sheetID then
            @sheet.find_all { |f| f.first.sheetID == sheetID }
          else
            @sheet
          end
        end
    
        # Gets SSBOND records.
        def ssbond
          self.record('SSBOND')
        end
    
        #--
        # Get seqres - we get this to return a nice Bio::Seq object
        #++
        
        # Amino acid or nucleic acid sequence of backbone residues in "SEQRES".
        # If chainID is given, it returns corresponding sequence
        # as an array of string.
        # Otherwise, returns a hash which contains all sequences.
        #
        def seqres(chainID = nil)
          unless defined?(@seqres)
            h = make_hash(self.record('SEQRES'), :chainID)
            newHash = {}
            h.each do |k, a|
              a.collect! { |f| f.resName }
              a.flatten!
              # determine nuc or aa?
              tmp = Hash.new(0)
              a[0,13].each { |x| tmp[x.to_s.strip.size] += 1 }
              if tmp[3] >= tmp[1] then
                # amino acid sequence
                a.collect! do |aa|
                  #aa is three letter code: i.e. ALA
                  #need to look up with Ala
                  aa = aa.capitalize
                  (begin
                     Bio::AminoAcid.three2one(aa)
                   rescue ArgumentError
                     nil
                   end || 'X')
                end
                seq = Bio::Sequence::AA.new(a.join(''))
              else
                # nucleic acid sequence
                a.collect! do |na|
                  na = na.delete('^a-zA-Z')
                  na.size == 1 ? na : 'n'
                end
                seq = Bio::Sequence::NA.new(a.join(''))
              end
              newHash[k] = seq
            end
            @seqres = newHash
          end
          if chainID then
            @seqres[chainID]
          else
            @seqres
          end
        end
    
        # Gets DBREF records.
        # Returns an array of Bio::PDB::Record::DBREF objects.
        #
        # If chainID is given, it returns corresponding DBREF records.
        def dbref(chainID = nil)
          if chainID then
            self.record('DBREF').find_all { |f| f.chainID == chainID }
          else
            self.record('DBREF')
          end
        end
    
        # Keywords in "KEYWDS".
        # Returns an array of string.
        def keywords
          self.record('KEYWDS').collect { |f| f.keywds }.flatten
        end
    
        # Classification in "HEADER".
        def classification
          f = self.record('HEADER').first
          f ? f.classification : nil
        end
    
        # Get authors in "AUTHOR".
        def authors
          self.record('AUTHOR').collect { |f| f.authorList }.flatten
        end
    
        #--
        # Bio::DB methods
        #++
    
        # PDB identifier written in "HEADER". (e.g. 1A00)
        def entry_id
          unless @id
            f = self.record('HEADER').first
            @id = f ? f.idCode : nil
          end
          @id
        end
    
        # Same as Bio::PDB#entry_id.
        def accession
          self.entry_id
        end
    
        # Title of this entry in "TITLE".
        def definition
          f = self.record('TITLE').first
          f ? f.title : nil
        end
    
        # Current modification number in "REVDAT".
        def version
          f = self.record('REVDAT').first
          f ? f.modNum : nil
        end
    
        # returns a string containing human-readable representation
        # of this object.
        def inspect
          "#<#{self.class.to_s} entry_id=#{entry_id.inspect}>"
        end
    
      end #class PDB
    
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/db/pdb/chain.rb0000644000004100000410000001306712200110570016752 0ustar  www-datawww-data#
    # = bio/db/pdb/chain.rb - chain class for PDB
    #
    # Copyright:: Copyright (C) 2004, 2006
    #             Alex Gutteridge 
    #             Naohisa Goto 
    # License::   The Ruby License
    # 
    #
    # = Bio::PDB::Chain
    # 
    # Please refer Bio::PDB::Chain.
    #
    
    module Bio
    
      require 'bio/db/pdb' unless const_defined?(:PDB)
    
      class PDB
    
        # Bio::PDB::Chain is a class to store a chain.
        #
        # The object would contain some residues (Bio::PDB::Residue objects)
        # and some heterogens (Bio::PDB::Heterogen objects).
        # 
        class Chain
          
          include Utils
          include AtomFinder
          include ResidueFinder
    
          include HetatmFinder
          include HeterogenFinder
    
          include Enumerable
          include Comparable
    
          # Creates a new chain object.
          def initialize(id = nil, model = nil)
            
            @chain_id  = id
            
            @model    = model
            
            @residues   = []
            @residues_hash = {}
            @heterogens = []
            @heterogens_hash = {}
          end
    
          # Identifier of this chain
          attr_accessor :chain_id
          # alias
          alias id chain_id
    
          # the model to which this chain belongs.
          attr_reader :model
    
          # residues in this chain
          attr_reader :residues
    
          # heterogens in this chain
          attr_reader :heterogens
          
          # get the residue by id
          def get_residue_by_id(key)
            #@residues.find { |r| r.residue_id == key }
            @residues_hash[key]
          end
    
          # get the residue by id.
          #
          # Compatibility Note: Now, you cannot find HETATMS in this method.
          # To add "LIGAND" to the id is no longer available.
          # To get heterogens, you must use get_heterogen_by_id.
          def [](key)
            get_residue_by_id(key)
          end
    
          # get the heterogen (ligand) by id
          def get_heterogen_by_id(key)
            #@heterogens.find { |r| r.residue_id == key }
            @heterogens_hash[key]
          end
          
          #Add a residue to this chain
          def addResidue(residue)
            raise "Expecting a Bio::PDB::Residue" unless residue.is_a? Bio::PDB::Residue
            @residues.push(residue)
            if @residues_hash[residue.residue_id] then
              $stderr.puts "Warning: residue_id #{residue.residue_id.inspect} is already used" if $VERBOSE
            else
              @residues_hash[residue.residue_id] = residue
            end
            self
          end
          
          #Add a heterogen (ligand) to this chain
          def addLigand(ligand)
            raise "Expecting a Bio::PDB::Residue" unless ligand.is_a? Bio::PDB::Residue
            @heterogens.push(ligand)
            if @heterogens_hash[ligand.residue_id] then
              $stderr.puts "Warning: heterogen_id (residue_id) #{ligand.residue_id.inspect} is already used" if $VERBOSE
            else
              @heterogens_hash[ligand.residue_id] = ligand
            end
            self
          end
    
          # rehash residues hash
          def rehash_residues
            begin
              residues_bak = @residues
              residues_hash_bak = @residues_hash
              @residues = []
              @residues_hash = {}
              residues_bak.each do |residue|
                self.addResidue(residue)
              end
            rescue RuntimeError
              @residues = residues_bak
              @residues_hash = residues_hash_bak
              raise
            end
            self
          end
    
          # rehash heterogens hash
          def rehash_heterogens
            begin
              heterogens_bak = @heterogens
              heterogens_hash_bak = @heterogens_hash
              @heterogens = []
              @heterogens_hash = {}
              heterogens_bak.each do |heterogen|
                self.addLigand(heterogen)
              end
            rescue RuntimeError
              @heterogens = heterogens_bak
              @heterogens_hash = heterogens_hash_bak
              raise
            end
            self
          end
    
          # rehash residues hash and heterogens hash
          def rehash
            rehash_residues
            rehash_heterogens
          end
    
          # Iterates over each residue
          def each(&x) #:yields: residue
            @residues.each(&x)
          end
          #Alias to override ResidueFinder#each_residue
          alias each_residue each
    
          # Iterates over each hetero-compound
          def each_heterogen(&x) #:yields: heterogen
            @heterogens.each(&x)
          end
          
          # Operator aimed to sort based on chain id
          def <=>(other)
            return @chain_id <=> other.chain_id
          end
          
          # Stringifies each residue
          def to_s
            @residues.join('') + "TER\n" + @heterogens.join('')
          end
    
          # returns a string containing human-readable representation
          # of this object.
          def inspect
            "#<#{self.class.to_s} id=#{chain_id.inspect} model.serial=#{(model ? model.serial : nil).inspect} residues.size=#{residues.size} heterogens.size=#{heterogens.size} aaseq=#{aaseq.inspect}>"
          end
    
          # gets an amino acid sequence of this chain from ATOM records
          def aaseq
            unless defined? @aaseq
              string = ""
              last_residue_num = nil
              @residues.each do |residue|
                if last_residue_num and 
                    (x = (residue.resSeq.to_i - last_residue_num).abs) > 1 then
                  x.times { string << 'X' }
                end
                tlc = residue.resName.capitalize
                olc = (begin
                         Bio::AminoAcid.three2one(tlc)
                       rescue ArgumentError
                         nil
                       end || 'X')
                string << olc
              end
              @aaseq = Bio::Sequence::AA.new(string)
            end
            @aaseq
          end
          # for backward compatibility
          alias atom_seq aaseq
          
        end #class Chain
    
      end #class PDB
    
    end #module Bio
    bio-1.4.3.0001/lib/bio/db/pdb/chemicalcomponent.rb0000644000004100000410000001521312200110570021353 0ustar  www-datawww-data#
    # = bio/db/pdb/chemicalcomponent.rb - PDB Chemical Component Dictionary parser
    #
    # Copyright:: Copyright (C) 2006
    #             GOTO Naohisa 
    # License::   The Ruby License
    #
    #
    # = About Bio::PDB::ChemicalComponent
    #
    # Please refer Bio::PDB::ChemicalComponent.
    #
    # = References
    #
    # * (())
    # * http://deposit.pdb.org/het_dictionary.txt
    #
    
    
    module Bio
    
      require 'bio/db/pdb' unless const_defined?(:PDB)
    
      class PDB
    
        # Bio::PDB::ChemicalComponet is a parser for a entry of
        # the PDB Chemical Component Dictionary.
        # 
        # The PDB Chemical Component Dictionary is available in
        # http://deposit.pdb.org/het_dictionary.txt
        class ChemicalComponent
    
          # delimiter for reading via Bio::FlatFile
          DELIMITER = RS = "\n\n"
    
          # Single field (normally single line) of a entry
          class Record < Bio::PDB::Record
    
            # fetches record name
            def fetch_record_name(str)
              str[0..6].strip
            end
            private :fetch_record_name
    
            # fetches record name
            def self.fetch_record_name(str)
              str[0..6].strip
            end
            private_class_method :fetch_record_name
    
            # RESIDUE field.
            # It would be wrong because the definition described in documents
            # seems ambiguous.
            RESIDUE =
              def_rec([ 11, 13, Pdb_LString[3], :hetID ],
                      [ 16, 20, Pdb_Integer,    :numHetAtoms ]
                      )
    
            # CONECT field
            # It would be wrong because the definition described in documents
            # seems ambiguous.
            CONECT =
              def_rec([ 12, 15, Pdb_Atom,         :name ],
                      [ 19, 20, Pdb_Integer,      :num ],
                      [ 21, 24, Pdb_Atom,         :other_atoms ],
                      [ 26, 29, Pdb_Atom,         :other_atoms ],
                      [ 31, 34, Pdb_Atom,         :other_atoms ],
                      [ 36, 39, Pdb_Atom,         :other_atoms ],
                      [ 41, 44, Pdb_Atom,         :other_atoms ],
                      [ 46, 49, Pdb_Atom,         :other_atoms ],
                      [ 51, 54, Pdb_Atom,         :other_atoms ],
                      [ 56, 59, Pdb_Atom,         :other_atoms ],
                      [ 61, 64, Pdb_Atom,         :other_atoms ],
                      [ 66, 69, Pdb_Atom,         :other_atoms ],
                      [ 71, 74, Pdb_Atom,         :other_atoms ],
                      [ 76, 79, Pdb_Atom,         :other_atoms ]
                      )
    
            # HET field.
            # It is the same as Bio::PDB::Record::HET.
            HET    = Bio::PDB::Record::HET
    
            #--
            #HETSYN = Bio::PDB::Record::HETSYN
            #++
    
            # HETSYN field.
            # It is very similar to Bio::PDB::Record::HETSYN.
            HETSYN = 
                def_rec([  9, 10, Pdb_Continuation, nil ],
                        [ 12, 14, Pdb_LString(3),   :hetID ],
                        [ 16, 70, Pdb_String,       :hetSynonyms ]
                        )
    
            # HETNAM field.
            # It is the same as Bio::PDB::Record::HETNAM.
            HETNAM = Bio::PDB::Record::HETNAM
    
            # FORMUL field.
            # It is the same as Bio::PDB::Record::FORMUL.
            FORMUL = Bio::PDB::Record::FORMUL
    
            # default definition for unknown fields.
            Default = Bio::PDB::Record::Default
    
            # Hash to store allowed definitions.
            Definition = create_definition_hash
    
            # END record class.
            #
            # Because END is a reserved word of Ruby, it is separately
            # added to the hash
            End    = Bio::PDB::Record::End
            Definition['END'] = End
    
            # Look up the class in Definition hash
            def self.get_record_class(str)
              t = fetch_record_name(str)
              return Definition[t]
            end
          end #class Record
    
          # Creates a new object.
          def initialize(str)
            @data = str.split(/[\r\n]+/)
            @hash = {}
    
            #Flag to say whether the current line is part of a continuation
            cont = false
            
            #Goes through each line and replace that line with a PDB::Record
            @data.collect! do |line|
              #Go to next if the previous line was contiunation able, and
              #add_continuation returns true. Line is added by add_continuation
              next if cont and cont = cont.add_continuation(line)
    
              #Make the new record
              f = Record.get_record_class(line).new.initialize_from_string(line)
              #p f
              #Set cont
              cont = f if f.continue?
              #Set the hash to point to this record either by adding to an
              #array, or on it's own
              key = f.record_name
              if a = @hash[key] then
                a << f
              else
                @hash[key] = [ f ]
              end
              f
            end #each
            #At the end we need to add the final model
            @data.compact!
          end
    
          # all records in this entry as an array.
          attr_reader :data
    
          # all records in this entry as an hash accessed by record names.
          attr_reader :hash
    
          # Identifier written in the first line "RESIDUE" record. (e.g. CMP)
          def entry_id
            @data[0].hetID
          end
    
          # Synonyms for the comical component. Returns an array of strings.
          def hetsyn
            unless defined? @hetsyn
              if r = @hash["HETSYN"]
                @hetsyn = r[0].hetSynonyms.to_s.split(/\;\s*/)
              else
                return []
              end
            end
            @hetsyn
          end
          
          # The name of the chemical component.
          # Returns a string (or nil, if the entry is something wrong).
          def hetnam
            @hash["HETNAM"][0].text
          end
    
          # The chemical formula of the chemical component.
          # Returns a string  (or nil, if the entry is something wrong).
          def formul
            @hash["FORMUL"][0].text
          end
    
          # Returns an hash of bindings of atoms.
          # Note that each white spaces are stripped for atom symbols.
          def conect
            unless defined? @conect
              c = {}
              @hash["CONECT"].each do |e|
                key = e.name.to_s.strip
                unless key.empty?
                  val = e.other_atoms.collect { |x| x.strip }
                  #warn "Warning: #{key}: atom name conflict?" if c[key]
                  c[key] = val
                end
              end
              @conect = c
            end
            @conect
          end
    
          # Gets all records whose record type is _name_.
          # Returns an array of Bio::PDB::Record::* objects.
          #
          # if _name_ is nil, returns hash storing all record data.
          #
          # Example:
          # p pdb.record('CONECT')
          # p pdb.record['CONECT']
          #
          def record(name = nil)
            name ? @hash[name] : @hash
          end
    
        end #class ChemicalComponent
      end #class PDB
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/db/nbrf.rb0000644000004100000410000001232012200110570016041 0ustar  www-datawww-data#
    # = bio/db/nbrf.rb - NBRF/PIR format sequence data class
    #
    # Copyright:: Copyright (C) 2001-2003,2006 Naohisa Goto 
    #             Copyright (C) 2001-2002 Toshiaki Katayama 
    # License::   The Ruby License
    #
    #  $Id: nbrf.rb,v 1.10 2007/04/05 23:35:40 trevor Exp $
    #
    # Sequence data class for NBRF/PIR flatfile format.
    #
    # = References
    #
    # * http://pir.georgetown.edu/pirwww/otherinfo/doc/techbulletin.html
    # * http://www.sander.embl-ebi.ac.uk/Services/webin/help/webin-align/align_format_help.html#pir
    # * http://www.cmbi.kun.nl/bioinf/tools/crab_pir.html
    #
    
    require 'bio/db'
    require 'bio/sequence'
    
    module Bio
    
      # Sequence data class for NBRF/PIR flatfile format.
      class NBRF < DB
        #--
        # based on Bio::FastaFormat class
        #++
    
        # Delimiter of each entry. Bio::FlatFile uses it.
        DELIMITER	= RS = "\n>"
    
        # (Integer) excess read size included in DELIMITER.
        DELIMITER_OVERRUN = 1 # '>'
    
        #--
        # Note: DELIMITER is changed due to the change of Bio::FlatFile.
        # DELIMITER	= RS = "*\n"
        #++
    
        # Creates a new NBRF object. It stores the comment and sequence
        # information from one entry of the NBRF/PIR format string.
        # If the argument contains more than one
        # entry, only the first entry is used.
        def initialize(str)
          str = str.sub(/\A[\r\n]+/, '') # remove first void lines
          line1, line2, rest = str.split(/^/, 3)
    
          rest = rest.to_s
          rest.sub!(/^>.*/m, '') # remove trailing entries for sure
          @entry_overrun = $&
          rest.sub!(/\*\s*\z/, '') # remove last '*' and "\n"
          @data = rest
    
          @definition = line2.to_s.chomp
          if /^>?([A-Za-z0-9]{2})\;(.*)/ =~ line1.to_s then
            @seq_type = $1
            @entry_id = $2
          end
        end
    
        # Returns sequence type described in the entry.
        #  P1 (protein), F1 (protein fragment)
        #  DL (DNA linear), DC (DNA circular)
        #  RL (DNA linear), RC (DNA circular)
        #  N3 (tRNA), N1 (other functional RNA)
        attr_accessor :seq_type
    
        # Returns ID described in the entry.
        attr_accessor :entry_id
        alias accession entry_id
    
        # Returns the description line of the NBRF/PIR formatted data.
        attr_accessor :definition
    
        # sequence data of the entry (???)
        attr_accessor :data
    
        # piece of next entry. Bio::FlatFile uses it.
        attr_reader :entry_overrun
    
    
        # Returns the stored one entry as a NBRF/PIR format. (same as to_s)
        def entry
          @entry = ">#{@seq_type or 'XX'};#{@entry_id}\n#{definition}\n#{@data}*\n"
        end
        alias to_s entry
    
        # Returns Bio::Sequence::AA, Bio::Sequence::NA, or Bio::Sequence,
        # depending on sequence type.
        def seq_class
          case @seq_type
          when /[PF]1/
            # protein
            Sequence::AA
          when /[DR][LC]/, /N[13]/
            # nucleic
            Sequence::NA
          else
            Sequence
          end
        end
    
        # Returns sequence data.
        # Returns Bio::Sequence::NA, Bio::Sequence::AA or Bio::Sequence,
        # according to the sequence type.
        def seq
          unless defined?(@seq)
            @seq = seq_class.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
          end
          @seq
        end
    
        # Returns sequence length.
        def length
          seq.length
        end
    
        # Returens the nucleic acid sequence.
        # If you call naseq for protein sequence, RuntimeError will be occurred.
        # Use the method if you know whether the sequence is NA or AA.
        def naseq
          if seq.is_a?(Bio::Sequence::AA) then
            raise 'not nucleic but protein sequence'
          elsif seq.is_a?(Bio::Sequence::NA) then
            seq
          else
            Bio::Sequence::NA.new(seq)
          end
        end
          
        # Returens the length of sequence.
        # If you call nalen for protein sequence, RuntimeError will be occurred.
        # Use the method if you know whether the sequence is NA or AA.
        def nalen
          naseq.length
        end
    
        # Returens the protein (amino acids) sequence.
        # If you call aaseq for nucleic acids sequence,
        # RuntimeError will be occurred.
        # Use the method if you know whether the sequence is NA or AA.
        def aaseq
          if seq.is_a?(Bio::Sequence::NA) then
            raise 'not nucleic but protein sequence'
          elsif seq.is_a?(Bio::Sequence::AA) then
            seq
          else
            Bio::Sequence::AA.new(seq)
          end
        end
    
        # Returens the length of protein (amino acids) sequence.
        # If you call aaseq for nucleic acids sequence,
        # RuntimeError will be occurred.
        # Use the method if you know whether the sequence is NA or AA.
        def aalen
          aaseq.length
        end
    
        #--
        #class method
        #++
    
        # Creates a NBRF/PIR formatted text.
        # Parameters can be omitted.
        def self.to_nbrf(hash)
          seq_type = hash[:seq_type]
          seq = hash[:seq]
          unless seq_type
            if seq.is_a?(Bio::Sequence::AA) then
              seq_type = 'P1'
            elsif seq.is_a?(Bio::Sequence::NA) then
              seq_type = /u/i =~ seq ? 'RL' : 'DL'
            else
              seq_type = 'XX'
            end
          end
          width = hash.has_key?(:width) ? hash[:width] : 70
          if width then
            seq = seq.to_s + "*"
            seq.gsub!(Regexp.new(".{1,#{width}}"), "\\0\n")
          else
            seq = seq.to_s + "*\n"
          end
          ">#{seq_type};#{hash[:entry_id]}\n#{hash[:definition]}\n#{seq}"
        end
    
      end #class NBRF
    end #module Bio
    
    bio-1.4.3.0001/lib/bio/db/fastq/0000755000004100000410000000000012200110570015705 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/db/fastq/fastq_to_biosequence.rb0000644000004100000410000000160412200110570022435 0ustar  www-datawww-data#
    # = bio/db/fastq/fastq_to_biosequence.rb - Bio::Fastq to Bio::Sequence adapter module
    #
    # Copyright::   Copyright (C) 2009
    #               Naohisa Goto 
    # License::     The Ruby License
    #
    
    require 'bio/sequence'
    require 'bio/sequence/adapter'
    
    # Internal use only. Normal users should not use this module.
    #
    # Bio::Fastq to Bio::Sequence adapter module.
    # It is internally used in Bio::Fastq#to_biosequence.
    #
    module Bio::Sequence::Adapter::Fastq
    
      extend Bio::Sequence::Adapter
    
      private
    
      def_biosequence_adapter :seq
    
      def_biosequence_adapter :entry_id
    
      # primary accession
      def_biosequence_adapter :primary_accession do |orig|
        orig.entry_id
      end
    
      def_biosequence_adapter :definition
    
      def_biosequence_adapter :quality_scores
    
      def_biosequence_adapter :quality_score_type
    
      def_biosequence_adapter :error_probabilities
    
    end #module Bio::Sequence::Adapter::Fastq
    bio-1.4.3.0001/lib/bio/db/fastq/format_fastq.rb0000644000004100000410000001254112200110570020723 0ustar  www-datawww-data#
    # = bio/db/fasta/format_fastq.rb - FASTQ format generater
    #
    # Copyright::   Copyright (C) 2009
    #               Naohisa Goto 
    # License::    The Ruby License
    #
    
    require 'bio/db/fastq'
    
    module Bio::Sequence::Format::Formatter
    
      # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
      #
      # FASTQ format output class for Bio::Sequence.
      #
      # The default FASTQ format is fastq-sanger.
      class Fastq < Bio::Sequence::Format::FormatterBase
    
        # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
        #
        # Creates a new Fasta format generater object from the sequence.
        #
        # ---
        # *Arguments*:
        # * _sequence_: Bio::Sequence object
        # * (optional) :repeat_title => (true or false) if true, repeating title in the "+" line; if not true, "+" only (default false)
        # * (optional) :width => _width_: (Fixnum) width to wrap sequence and quality lines;  nil to prevent wrapping (default nil)
        # * (optional) :title => _title_: (String) completely replaces title line with the _title_ (default nil)
        # * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities; false or nil means the lowest score, true means the highest score (default nil)
        def initialize; end if false # dummy for RDoc
    
        # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
        #
        # Output the FASTQ format string of the sequence.  
        #
        # Currently, this method is used in Bio::Sequence#output like so,
        #
        #   s = Bio::Sequence.new('atgc')
        #   puts s.output(:fastq_sanger)
        # ---
        # *Returns*:: String object
        def output
          title = @options[:title]
          width = @options.has_key?(:width) ? @options[:width] : nil
          seq = @sequence.seq.to_s
          entry_id = @sequence.entry_id || 
            "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
          definition = @sequence.definition
          unless title then
            title = definition.to_s
            unless title[0, entry_id.length] == entry_id and
                /\s/ =~ title[entry_id.length, 1].to_s then
              title = "#{entry_id} #{title}"
            end
          end
          title2 = @options[:repeat_title] ? title : ''
          qstr = fastq_quality_string(seq, @options[:default_score])
    
          "@#{title}\n" +
            if width then
              seq.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
            else
              seq + "\n"
            end +
            "+#{title2}\n" +
            if width then
              qstr.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
            else
              qstr + "\n"
            end
        end
    
        private
        def fastq_format_data
          Bio::Fastq::FormatData::FASTQ_SANGER.instance
        end
    
        def fastq_quality_string(seq, default_score)
          sc = fastq_quality_scores(seq)
          if sc.size < seq.length then
            if default_score == true then
              # when true, the highest score
              default_score = fastq_format_data.score_range.end
            else
              # when false or nil, the lowest score
              default_score ||= fastq_format_data.score_range.begin
            end
            sc = sc + ([ default_score ] * (seq.length - sc.size))
          end
          fastq_format_data.scores2str(sc)
        end
    
        def fastq_quality_scores(seq)
          return [] if seq.length <= 0
          fmt = fastq_format_data
          # checks quality_scores
          qsc = @sequence.quality_scores
          qsc_type = @sequence.quality_score_type
          if qsc and qsc_type and
              qsc_type == fmt.quality_score_type and
              qsc.size >= seq.length then
            return qsc
          end
          
          # checks error_probabilities
          ep = @sequence.error_probabilities
          if ep and ep.size >= seq.length then
            return fmt.p2q(ep[0, seq.length])
          end
    
          # If quality score type of the sequence is nil, regarded as :phred.
          qsc_type ||= :phred
    
          # checks if scores can be converted
          if qsc and qsc.size >= seq.length then
            case [ qsc_type, fmt.quality_score_type ]
            when [ :phred, :solexa ]
              return fmt.convert_scores_from_phred_to_solexa(qsc[0, seq.length])
            when [ :solexa, :phred ]
              return fmt.convert_scores_from_solexa_to_phred(qsc[0, seq.length])
            end
          end
    
          # checks quality scores type
          case qsc_type
          when :phred, :solexa
            #does nothing
          else
            qsc_type = nil
            qsc = nil
          end
    
          # collects piece of information
          qsc_cov = qsc ? qsc.size.quo(seq.length) : 0
          ep_cov = ep ? ep.size.quo(seq.length) : 0
          if qsc_cov > ep_cov then
            case [ qsc_type, fmt.quality_score_type ]
            when [ :phred, :phred ], [ :solexa, :solexa ]
              return qsc
            when [ :phred, :solexa ]
              return fmt.convert_scores_from_phred_to_solexa(qsc)
            when [ :solexa, :phred ]
              return fmt.convert_scores_from_solexa_to_phred(qsc)
            end
          elsif ep_cov > qsc_cov then
            return fmt.p2q(ep)
          end
    
          # if no information, returns empty array
          return []
        end
      end #class Fastq
    
      # class Fastq_sanger is the same as the Fastq class.
      Fastq_sanger = Fastq
    
      class Fastq_solexa < Fastq
        private
        def fastq_format_data
          Bio::Fastq::FormatData::FASTQ_SOLEXA.instance
        end
      end #class Fastq_solexa
    
      class Fastq_illumina < Fastq
        private
        def fastq_format_data
          Bio::Fastq::FormatData::FASTQ_ILLUMINA.instance
        end
      end #class Fastq_illumina
    
    end #module Bio::Sequence::Format::Formatter
    
    
    bio-1.4.3.0001/lib/bio/db/prosite.rb0000644000004100000410000002574112200110570016612 0ustar  www-datawww-data#
    # = bio/db/prosite.rb - PROSITE database class
    #
    # Copyright::  Copyright (C) 2001 Toshiaki Katayama 
    # License::    The Ruby License
    #
    # $Id:$
    #
    
    require 'bio/db'
    
    module Bio
    
    class PROSITE < EMBLDB
    
      # Delimiter
      DELIMITER = "\n//\n"
    
      # Delimiter
      RS = DELIMITER
    
      # Bio::DB API
      TAGSIZE = 5
    
    
      def initialize(entry)
        super(entry, TAGSIZE)
      end
    
    
      # ID  Identification                     (Begins each entry; 1 per entry)
      #
      #  ID   ENTRY_NAME; ENTRY_TYPE.  (ENTRY_TYPE : PATTERN, MATRIX, RULE)
      #
      # Returns
      def name
        unless @data['ID']
          @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
        end
        @data['ID']
      end
    
      # Returns
      def division
        unless @data['TYPE']
          name
        end
        @data['TYPE']
      end
    
    
      # AC  Accession number                   (1 per entry)
      #
      #  AC   PSnnnnn;
      #
      # Returns
      def ac
        unless @data['AC']
          @data['AC'] = fetch('AC').chomp(';')
        end
        @data['AC']
      end
    
      alias entry_id ac
    
    
      # DT  Date                               (1 per entry)
      #
      #  DT   MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
      #
      # Returns
      def dt
        field_fetch('DT')
      end
    
      alias date dt
    
    
      # DE  Short description                  (1 per entry)
      #
      #  DE   Description.
      #
      # Returns
      def de
        field_fetch('DE')
      end
    
      alias definition de
    
    
      # PA  Pattern                            (>=0 per entry)
      #
      #  see - pa2re method
      #
      # Returns
      def pa
        field_fetch('PA')
        @data['PA'] = fetch('PA') unless @data['PA']
        @data['PA'].gsub!(/\s+/, '') if @data['PA']
        @data['PA']
      end
    
      alias pattern pa
    
    
      # MA  Matrix/profile                     (>=0 per entry)
      #
      #  see - ma2re method
      #
      # Returns
      def ma
        field_fetch('MA')
      end
    
      alias profile ma
    
    
      # RU  Rule                               (>=0 per entry)
      #
      #  RU   Rule_Description.
      #
      #  The rule is described in ordinary English and is free-format.
      #
      # Returns
      def ru
        field_fetch('RU')
      end
    
      alias rule ru
    
    
      # NR  Numerical results                  (>=0 per entry)
      #
      #   - SWISS-PROT scan statistics of true and false positives/negatives
      #
      # /RELEASE     SWISS-PROT release  number and  total  number  of  sequence
      #              entries in that release.
      # /TOTAL       Total number of hits in SWISS-PROT.
      # /POSITIVE    Number of  hits on proteins that are known to belong to the
      #              set in consideration.
      # /UNKNOWN     Number of  hits on  proteins that  could possibly belong to
      #              the set in consideration.
      # /FALSE_POS   Number of false hits (on unrelated proteins).
      # /FALSE_NEG   Number of known missed hits.
      # /PARTIAL     Number of  partial sequences  which belong  to the  set  in
      #              consideration, but  which  are  not  hit  by the pattern or
      #              profile because they are partial (fragment) sequences.
      #
      # Returns
      def nr
        unless @data['NR']
          hash = {}			# temporal hash
          fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
            if v =~ /^(\d+)\((\d+)\)$/
              hits = $1.to_i		# the number of hits
              seqs = $2.to_i		# the number of sequences
              v = [hits, seqs]
            elsif v =~ /([\d\.]+),(\d+)/
              sprel = $1			# the number of SWISS-PROT release
              spseq = $2.to_i		# the number of SWISS-PROT sequences
              v = [sprel, spseq]
            else
              v = v.to_i
            end
            hash[k] = v
          end
          @data['NR'] = hash
        end
        @data['NR']
      end
    
      alias statistics nr
    
      # Returns
      def release
        statistics['RELEASE']
      end
    
      # Returns
      def swissprot_release_number
        release.first
      end
    
      # Returns
      def swissprot_release_sequences
        release.last
      end
    
      # Returns
      def total
        statistics['TOTAL']
      end
    
      # Returns
      def total_hits
        total.first
      end
    
      # Returns
      def total_sequences
        total.last
      end
    
      # Returns
      def positive
        statistics['POSITIVE']
      end
    
      # Returns
      def positive_hits
        positive.first
      end
    
      # Returns
      def positive_sequences
        positive.last
      end
    
      # Returns
      def unknown
        statistics['UNKNOWN']
      end
    
      # Returns
      def unknown_hits
        unknown.first
      end
    
      # Returns
      def unknown_sequences
        unknown.last
      end
    
      # Returns
      def false_pos
        statistics['FALSE_POS']
      end
    
      # Returns
      def false_positive_hits
        false_pos.first
      end
    
      # Returns
      def false_positive_sequences
        false_pos.last
      end
    
      # Returns
      def false_neg
        statistics['FALSE_NEG']
      end
      alias false_negative_hits false_neg
    
      # Returns
      def partial
        statistics['PARTIAL']
      end
    
    
      # CC  Comments                           (>=0 per entry)
      #
      #  CC   /QUALIFIER=data; /QUALIFIER=data; .......
      #
      # /TAXO-RANGE  Taxonomic range.
      # /MAX-REPEAT  Maximum known  number of  repetitions of  the pattern  in a
      #              single protein.
      # /SITE        Indication of an `interesting' site in the pattern.
      # /SKIP-FLAG   Indication of  an entry that can be, in some cases, ignored
      #              by a program (because it is too unspecific).
      #
      # Returns
      def cc
        unless @data['CC']
          hash = {}			# temporal hash
          fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
            hash[k] = v
          end
          @data['CC'] = hash
        end
        @data['CC']
      end
    
      alias comment cc
    
      # Returns
      def taxon_range(expand = nil)
        range = comment['TAXO-RANGE']
        if range and expand
          expand = []
          range.scan(/./) do |x|
            case x
            when 'A'; expand.push('archaebacteria')
            when 'B'; expand.push('bacteriophages')
            when 'E'; expand.push('eukaryotes')
            when 'P'; expand.push('prokaryotes')
            when 'V'; expand.push('eukaryotic viruses')
            end
          end
          range = expand
        end
        return range
      end
    
      # Returns
      def max_repeat
        comment['MAX-REPEAT'].to_i
      end
    
      # Returns
      def site
        if comment['SITE']
          num, desc = comment['SITE'].split(',')
        end
        return [num.to_i, desc]
      end
    
      # Returns
      def skip_flag
        if comment['SKIP-FLAG'] == 'TRUE'
          return true
        end
      end
    
    
      # DR  Cross-references to SWISS-PROT     (>=0 per entry)
      #
      #  DR   AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
      #
      # - `AC_NB' is the SWISS-PROT primary accession number of the entry to
      #   which reference is being made.
      # - `ENTRY_NAME' is the SWISS-PROT entry name.
      # - `C' is a one character flag that can be one of the following:
      #
      # T For a true positive.
      # N For a false negative; a sequence which belongs to the set under
      #   consideration, but which has not been picked up by the pattern or
      #   profile.
      # P For a `potential' hit; a sequence that belongs to the set under
      #   consideration, but which was not picked up because the region(s) that
      #   are used as a 'fingerprint' (pattern or profile) is not yet available
      #   in the data bank (partial sequence).
      # ? For an unknown; a sequence which possibly could belong to the set under
      #   consideration.
      # F For a false positive; a sequence which does not belong to the set in
      #   consideration.
      #
      # Returns
      def dr
        unless @data['DR']
          hash = {}			# temporal hash
          if fetch('DR')
            fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
              hash[a] = [e, c]	# SWISS-PROT : accession, entry, true/false
            end
          end
          @data['DR'] = hash
        end
        @data['DR']
      end
    
      alias sp_xref dr
    
      # Returns
      def list_xref(flag, by_name = nil)
        ary = []
        sp_xref.each do |sp_acc, value|
          if value[1] == flag
            if by_name
              sp_name = value[0]
              ary.push(sp_name)
            else
              ary.push(sp_acc)
            end
          end
        end
        return ary
      end
    
      # Returns
      def list_truepositive(by_name = nil)
        list_xref('T', by_name)
      end
    
      # Returns
      def list_falsenegative(by_name = nil)
        list_xref('F', by_name)
      end
    
      # Returns
      def list_falsepositive(by_name = nil)
        list_xref('P', by_name)
      end
    
      # Returns
      def list_potentialhit(by_name = nil)
        list_xref('P', by_name)
      end
    
      # Returns
      def list_unknown(by_name = nil)
        list_xref('?', by_name)
      end
    
    
      # 3D  Cross-references to PDB            (>=0 per entry)
      #
      #  3D   name; [name2;...]
      #
      # Returns
      def pdb_xref
        unless @data['3D']
          @data['3D'] = fetch('3D').split(/; */)
        end
        @data['3D']
      end
    
    
      # DO  Pointer to the documentation file  (1 per entry)
      #
      #  DO   PDOCnnnnn;
      #
      # Returns
      def pdoc_xref
        @data['DO'] = fetch('DO').chomp(';')
      end
    
    
      ### prosite pattern to regular expression
      #
      # prosite/prosuser.txt:
      #
      # The PA (PAttern) lines contains the definition of a PROSITE pattern. The
      # patterns are described using the following conventions:
      #
      # 0) The standard IUPAC one-letter codes for the amino acids are used.
      # 0) Ambiguities are indicated by listing the acceptable amino acids for a
      #   given position, between square parentheses `[ ]'. For example: [ALT]
      #   stands for Ala or Leu or Thr.
      # 1) A period ends the pattern.
      # 2) When a pattern is restricted to either the N- or C-terminal of a
      #   sequence, that pattern either starts with a `<' symbol or respectively
      #   ends with a `>' symbol.
      # 3) Ambiguities are also indicated by listing between a pair of curly
      #   brackets `{ }' the amino acids that are not accepted at a given
      #   position. For example: {AM} stands for any amino acid except Ala and
      #   Met.
      # 4) Repetition of an element of the pattern can be indicated by following
      #   that element with a numerical value or a numerical range between
      #   parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to
      #   x-x or x-x-x or x-x-x-x.
      # 5) The symbol `x' is used for a position where any amino acid is accepted.
      # 6) Each element in a pattern is separated from its neighbor by a `-'.
      #
      # Examples:
      #
      # PA  [AC]-x-V-x(4)-{ED}.
      #
      # This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any
      # but Glu or Asp}
      #
      # PA  $/, '$')	# (2) restricted to the C-terminal : `>'
        pattern.gsub!(/\{(\w+)\}/) { |m|
          '[^' + $1 + ']'		# (3) not accepted at a given position : '{}'
        }
        pattern.gsub!(/\(([\d,]+)\)/) { |m|
          '{' + $1 + '}'		# (4) repetition of an element : (n), (n,m)
        }
        pattern.tr!('x', '.')	# (5) any amino acid is accepted : 'x'
        pattern.tr!('-', '')	# (6) each element is separated by a '-'
        Regexp.new(pattern, Regexp::IGNORECASE)
      end
    
      def pa2re(pattern)
        self.class.pa2re(pattern)
      end
    
      def re
        self.class.pa2re(self.pa)
      end
    
    
      ### prosite profile to regular expression
      #
      # prosite/profile.txt:
      #
      # Returns
      def ma2re(matrix)
        raise NotImplementedError
      end
    
    end # PROSITE
    
    end # Bio
    
    bio-1.4.3.0001/lib/bio/db/fastq.rb0000644000004100000410000004412712200110570016242 0ustar  www-datawww-data#
    # = bio/db/fastq.rb - FASTQ format parser class
    #
    # Copyright::  Copyright (C) 2009
    #              Naohisa Goto 
    # License::    The Ruby License
    #
    # == Description
    # 
    # FASTQ format parser class.
    #
    # Be careful that it is for the fastQ format, not for the fastA format.
    #
    # == Examples
    #
    # See documents of Bio::Fastq class.
    #
    # == References
    #
    # * FASTQ format specification
    #   http://maq.sourceforge.net/fastq.shtml
    #
    
    require "strscan"
    require "singleton"
    
    require 'bio/sequence'
    require 'bio/io/flatfile'
    
    module Bio
    
    # Bio::Fastq is a parser for FASTQ format.
    #
    class Fastq
    
      # Bio::Fastq::FormatData is a data class to store Fastq format parameters
      # and quality calculation methods.
      # Bio::Fastq internal use only. 
      class FormatData
    
        # Format name. Should be redefined in subclass.
        NAME = nil
    
        # Offset. Should be redefined in subclass.
        OFFSET = nil
    
        # Range of score. Should be redefined in subclass.
        # The range must not exclude end value, i.e. it must be X..Y,
        # and must not be X...Y.
        SCORE_RANGE = nil
    
        def initialize
          @name = self.class::NAME
          @symbol = @name.gsub(/\-/, '_').to_sym
          @offset = self.class::OFFSET
          @score_range = self.class::SCORE_RANGE
        end
    
        # Format name
        attr_reader :name
    
        # Format name symbol.
        # Note that "-" in the format name is substituted to "_" because
        # "-" in a symbol is relatively difficult to handle.
        attr_reader :symbol
    
        # Offset when converting a score to a character
        attr_reader :offset
    
        # Allowed range of a score value
        attr_reader :score_range
    
        # Type of quality scores. Maybe one of :phred or :solexa.
        attr_reader :quality_score_type if false # for RDoc
    
        # Converts quality string to scores.
        # No overflow/underflow checks will be performed.
        # ---
        # *Arguments*:
        # * (required) _c_: (String) quality string
        # *Returns*:: (Array containing Integer) score values
        def str2scores(str)
          a = str.unpack('C*')
          a.collect! { |i| i - @offset }
          a
        end
    
        # Converts scores to a string.
        # Overflow/underflow checks will be performed. 
        # If a block is given, when overflow/underflow detected,
        # the score value is passed to the block, and uses returned value
        # as the score. If no blocks, silently truncated.
        #
        # ---
        # *Arguments*:
        # * (required) _a_: (Array containing Integer) score values
        # *Returns*:: (String) quality string
        def scores2str(a)
          if block_given? then
            tmp = a.collect do |i|
              i = yield(i) unless @score_range.include?(i)
              i + @offset
            end
          else
            min = @score_range.begin
            max = @score_range.end
            tmp = a.collect do |i|
              if i < min then
                i = min
              elsif i > max then
                i = max
              end
              i + @offset
            end
          end
          tmp.pack('C*')
        end
    
        # Format information for "fastq-sanger".
        # Bio::Fastq internal use only.
        class FASTQ_SANGER < FormatData
          include Singleton
    
          include Bio::Sequence::QualityScore::Phred
    
          # format name
          NAME = 'fastq-sanger'.freeze
          # offset 
          OFFSET = 33
          # score range
          SCORE_RANGE = 0..93
    
        end #class FASTQ_SANGER
    
        # Format information for "fastq-solexa"
        # Bio::Fastq internal use only.
        class FASTQ_SOLEXA < FormatData
          include Singleton
    
          include Bio::Sequence::QualityScore::Solexa
    
          # format name
          NAME = 'fastq-solexa'.freeze
          # offset 
          OFFSET = 64
          # score range
          SCORE_RANGE = (-5)..62
    
        end #class FASTQ_SOLEXA
    
        # Format information for "fastq-illumina"
        # Bio::Fastq internal use only.
        class FASTQ_ILLUMINA < FormatData
          include Singleton
    
          include Bio::Sequence::QualityScore::Phred
    
          # format name
          NAME = 'fastq-illumina'.freeze
          # offset 
          OFFSET = 64
          # score range
          SCORE_RANGE = 0..62
    
        end #class FASTQ_ILLUMINA
    
      end #class FormatData
    
    
      # Available format names.
      FormatNames = {
        "fastq-sanger"   => FormatData::FASTQ_SANGER,
        "fastq-solexa"   => FormatData::FASTQ_SOLEXA,
        "fastq-illumina" => FormatData::FASTQ_ILLUMINA
      }.freeze
    
      # Available format name symbols.
      Formats = {
        :fastq_sanger   => FormatData::FASTQ_SANGER,
        :fastq_solexa   => FormatData::FASTQ_SOLEXA,
        :fastq_illumina => FormatData::FASTQ_ILLUMINA
      }.freeze
    
      # Default format name
      DefaultFormatName = 'fastq-sanger'.freeze
    
      # Splitter for Bio::FlatFile
      FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented
    
    
      # Basic exception class of all Bio::Fastq::Error:XXXX.
      # Bio::Fastq internal use only.
      class Error < RuntimeError
    
        private
        # default error message for this exception
        def default_message(i)
          "FASTQ error #{i}"
        end
    
        # Creates a new object.
        # If error message is not given, default error message is stored.
        # If error message is a Integer value, it is treated as the
        # position inside the sequence or the quality, and default
        # error message including the position is stored.
        # ---
        # *Arguments*:
        # * (optional) error_message: error message (see above)
        def initialize(error_message = nil)
          if !error_message or error_message.kind_of?(Integer) then
            error_message = default_message(error_message)
          end
          super(error_message)
        end
    
        # Error::No_atmark  -- the first identifier does not begin with "@"
        class No_atmark < Error
          private
          # default error message for this exception
          def default_message(i)
            'the first identifier does not begin with "@"'
          end
        end
    
        # Error::No_ids     -- sequence identifier not found
        class No_ids < Error
          private
          # default error message for this exception
          def default_message(i)
            'sequence identifier not found'
          end
        end
    
        # Error::Diff_ids   -- the identifier in the two lines are different
        class Diff_ids < Error
          private
          # default error message for this exception
          def default_message(i)
            'the identifier in the two lines are different'
          end
        end
    
        # Error::Long_qual  -- length of quality is longer than the sequence
        class Long_qual < Error
          private
          # default error message for this exception
          def default_message(i)
            'length of quality is longer than the sequence'
          end
        end
    
        # Error::Short_qual -- length of quality is shorter than the sequence
        class Short_qual < Error
          private
          # default error message for this exception
          def default_message(i)
            'length of quality is shorter than the sequence'
          end
        end
    
        # Error::No_qual    -- no quality characters found
        class No_qual < Error
          private
          # default error message for this exception
          def default_message(i)
            'no quality characters found'
          end
        end
    
        # Error::No_seq     -- no sequence found
        class No_seq < Error
          private
          # default error message for this exception
          def default_message(i)
            'no sequence found'
          end
        end
    
        # Error::Qual_char  -- invalid character in the quality
        class Qual_char < Error
          private
          # default error message for this exception
          def default_message(i)
            pos = i ? " at [#{i}]" : ''
            "invalid character in the quality#{pos}"
          end
        end
    
        # Error::Seq_char   -- invalid character in the sequence
        class Seq_char < Error
          private
          # default error message for this exception
          def default_message(i)
            pos = i ? " at [#{i}]" : ''
            "invalid character in the sequence#{pos}"
          end
        end
    
        # Error::Qual_range -- quality score value out of range
        class Qual_range < Error
          private
          # default error message for this exception
          def default_message(i)
            pos = i ? " at [#{i}]" : ''
            "quality score value out of range#{pos}"
          end
        end
    
        # Error::Skipped_unformatted_lines -- the parser skipped unformatted
        # lines that could not be recognized as FASTQ format
        class Skipped_unformatted_lines < Error
          private
          # default error message for this exception
          def default_message(i)
            "the parser skipped unformatted lines that could not be recognized as FASTQ format"
          end
        end
      end #class Error
    
      # Adds a header line if the header data is not yet given and
      # the given line is suitable for header.
      # Returns self if adding header line is succeeded.
      # Otherwise, returns false (the line is not added).
      def add_header_line(line)
        @header ||= ""
        if line[0,1] == "@" then
          false
        else
          @header.concat line
          self
        end
      end
    
      # misc lines before the entry (String or nil)
      attr_reader :header
    
      # Adds a line to the entry if the given line is regarded as
      # a part of the current entry.
      def add_line(line)
        line = line.chomp
        if !defined? @definition then
          if line[0, 1] == "@" then
            @definition = line[1..-1]
          else
            @definition = line
            @parse_errors ||= []
            @parse_errors.push Error::No_atmark.new
          end
          return self
        end
        if defined? @definition2 then
          @quality_string ||= ''
          if line[0, 1] == "@" and
              @quality_string.size >= @sequence_string.size then
            return false
          else
            @quality_string.concat line
            return self
          end
        else
          @sequence_string ||= ''
          if line[0, 1] == '+' then
            @definition2 = line[1..-1]
          else
            @sequence_string.concat line
          end
          return self
        end
        raise "Bug: should not reach here!"
      end
    
      # entry_overrun
      attr_reader :entry_overrun
    
      # Creates a new Fastq object from formatted text string.
      #
      # The format of quality scores should be specified later
      # by using format= method.
      #
      # ---
      # *Arguments*:
      # * _str_: Formatted string (String)
      def initialize(str = nil)
        return unless str
        sc = StringScanner.new(str)
        while !sc.eos? and line = sc.scan(/.*(?:\n|\r|\r\n)?/)
          unless add_header_line(line) then
            sc.unscan
            break
          end
        end
        while !sc.eos? and line = sc.scan(/.*(?:\n|\r|\r\n)?/)
          unless add_line(line) then
            sc.unscan
            break
          end
        end
        @entry_overrun = sc.rest
      end
    
      # definition; ID line (begins with @)
      attr_reader :definition
    
      # quality as a string
      attr_reader :quality_string
    
      # raw sequence data as a String object
      attr_reader :sequence_string
    
      # Returns Fastq formatted string constructed from instance variables.
      # The string will always be consisted of four lines without wrapping of
      # the sequence and quality string, and the third-line is always only
      # contains "+". This may be different from initial entry.
      #
      # Note that use of the method may be inefficient and may lose performance
      # because new string object is created every time it is called.
      # For showing an entry as-is, consider using Bio::FlatFile#entry_raw.
      # For output with various options, use Bio::Sequence#output(:fastq).
      #
      def to_s
        "@#{@definition}\n#{@sequence_string}\n+\n#{@quality_string}\n"
      end
    
      # returns Bio::Sequence::NA
      def naseq
        unless defined? @naseq then
          @naseq = Bio::Sequence::NA.new(@sequence_string)
        end
        @naseq
      end
    
      # length of naseq
      def nalen
        naseq.length
      end
    
      # returns Bio::Sequence::Generic
      def seq
        unless defined? @seq then
          @seq = Bio::Sequence::Generic.new(@sequence_string)
        end
        @seq
      end
    
      # Identifier of the entry. Normally, the first word of the ID line.
      def entry_id
        unless defined? @entry_id then
          eid = @definition.strip.split(/\s+/)[0] || @definition
          @entry_id = eid
        end
        @entry_id
      end
    
      # (private) reset internal state
      def reset_state
        if defined? @quality_scores then
          remove_instance_variable(:@quality_scores)
        end
        if defined? @error_probabilities then
          remove_instance_variable(:@error_probabilities)
        end
      end
      private :reset_state
    
      # Specify the format. If the format is not found, raises RuntimeError.
      #
      # Available formats are:
      #   "fastq-sanger" or :fastq_sanger
      #   "fastq-solexa" or :fastq_solexa
      #   "fastq-illumina" or :fastq_illumina
      # 
      # ---
      # *Arguments*:
      # * (required) _name_: format name (String or Symbol).
      # *Returns*:: (String) format name
      def format=(name)
        if name then
          f = FormatNames[name] || Formats[name]
          if f then
            reset_state
            @format = f.instance
            self.format
          else
            raise "unknown format"
          end
        else
          reset_state
          nil
        end
      end
    
      # Format name.
      # One of "fastq-sanger", "fastq-solexa", "fastq-illumina",
      # or nil (when not specified).
      # ---
      # *Returns*:: (String or nil) format name
      def format
        @format ? @format.name : nil
      end
    
    
      # The meaning of the quality scores.
      # It may be one of :phred, :solexa, or nil.
      def quality_score_type
        self.format ||= self.class::DefaultFormatName
        @format.quality_score_type
      end
    
      # Quality score for each base.
      # For "fastq-sanger" or "fastq-illumina", it is PHRED score.
      # For "fastq-solexa", it is Solexa score.
      #
      # ---
      # *Returns*:: (Array containing Integer) quality score values
      def quality_scores
        unless defined? @quality_scores then
          self.format ||= self.class::DefaultFormatName
          s = @format.str2scores(@quality_string)
          @quality_scores = s
        end
        @quality_scores
      end
    
      alias qualities quality_scores
    
      # Estimated probability of error for each base.
      # ---
      # *Returns*:: (Array containing Float) error probability values
      def error_probabilities
        unless defined? @error_probabilities then
          self.format ||= self.class::DefaultFormatName
          a = @format.q2p(self.quality_scores)
          @error_probabilities = a
        end
        @error_probabilities
      end
    
      # Format validation.
      #
      # If an array is given as the argument, when errors are found,
      # error objects are pushed to the array.
      # Currently, following errors may be added to the array.
      # (All errors are under the Bio::Fastq namespace, for example,
      # Bio::Fastq::Error::Diff_ids).
      #
      # Error::Diff_ids   -- the identifier in the two lines are different
      # Error::Long_qual  -- length of quality is longer than the sequence
      # Error::Short_qual -- length of quality is shorter than the sequence
      # Error::No_qual    -- no quality characters found
      # Error::No_seq     -- no sequence found
      # Error::Qual_char  -- invalid character in the quality
      # Error::Seq_char   -- invalid character in the sequence
      # Error::Qual_range -- quality score value out of range
      # Error::No_ids     -- sequence identifier not found
      # Error::No_atmark  -- the first identifier does not begin with "@"
      # Error::Skipped_unformatted_lines -- the parser skipped unformatted lines that could not be recognized as FASTQ format
      #
      # ---
      # *Arguments*:
      # * (optional) _errors_: (Array or nil) an array for pushing error messages. The array should be empty.
      # *Returns*:: true:no error, false: containing error.
      def validate_format(errors = nil)
        err = []
    
        # if header exists, the format might be broken.
        if defined? @header and @header and !@header.strip.empty? then
          err.push Error::Skipped_unformatted_lines.new
        end
    
        # if parse errors exist, adding them
        if defined? @parse_errors and @parse_errors then
          err.concat @parse_errors
        end
    
        # check if identifier exists, and identifier matches
        if !defined?(@definition) or !@definition then
          err.push Error::No_ids.new
        elsif defined?(@definition2) and
            !@definition2.to_s.empty? and
            @definition != @definition2 then
          err.push Error::Diff_ids.new
        end
    
        # check if sequence exists
        has_seq  = true
        if !defined?(@sequence_string) or !@sequence_string then
          err.push Error::No_seq.new
          has_seq = false
        end
    
        # check if quality exists
        has_qual = true
        if !defined?(@quality_string) or !@quality_string then
          err.push Error::No_qual.new
          has_qual = false
        end
    
        # sequence and quality length check
        if has_seq and has_qual then
          slen = @sequence_string.length
          qlen = @quality_string.length
          if slen > qlen then
            err.push Error::Short_qual.new
          elsif qlen > slen then
            err.push Error::Long_qual.new
          end
        end
    
        # sequence character check
        if has_seq then
          sc = StringScanner.new(@sequence_string)
          while sc.scan_until(/[ \x00-\x1f\x7f-\xff]/n)
            err.push Error::Seq_char.new(sc.pos - sc.matched_size)
          end
        end
    
        # sequence character check
        if has_qual then
          fmt = if defined?(@format) and @format then
                  @format.name
                else
                  nil
                end
          re = case fmt
               when 'fastq-sanger'
                 /[^\x21-\x7e]/n
               when 'fastq-solexa'
                 /[^\x3b-\x7e]/n
               when 'fastq-illumina'
                 /[^\x40-\x7e]/n
               else
                 /[ \x00-\x1f\x7f-\xff]/n
               end
          sc = StringScanner.new(@quality_string)
          while sc.scan_until(re)
            err.push Error::Qual_char.new(sc.pos - sc.matched_size)
          end
        end
    
        # if "errors" is given, set errors
        errors.concat err if errors
        # returns true if no error; otherwise, returns false
        err.empty? ? true : false
      end
    
      # Returns sequence as a Bio::Sequence object.
      #
      # Note: If you modify the returned Bio::Sequence object,
      # the sequence or definition in this Fastq object
      # might also be changed (but not always be changed)
      # because of efficiency.
      # 
      def to_biosequence
        Bio::Sequence.adapter(self, Bio::Sequence::Adapter::Fastq)
      end
    
      # Masks low quality sequence regions.
      # For each sequence position, if the quality score is smaller than
      # the threshold, the sequence in the position is replaced with
      # mask_char.
      #
      # Note: This method does not care quality_score_type.
      # ---
      # *Arguments*:
      # * (required) threshold : (Numeric) threshold
      # * (optional) mask_char : (String) character used for masking
      # *Returns*:: Bio::Sequence object
      def mask(threshold, mask_char = 'n')
        to_biosequence.mask_with_quality_score(threshold, mask_char)
      end
    
    end #class Fastq
    
    end #module Bio
    bio-1.4.3.0001/lib/bio/db/medline.rb0000644000004100000410000001670612200110570016543 0ustar  www-datawww-data#
    # = bio/db/medline.rb - NCBI PubMed/MEDLINE database class
    #
    # Copyright::  Copyright (C) 2001, 2005
    #              Toshiaki Katayama 
    # License::    The Ruby License
    #
    # $Id: medline.rb,v 1.17 2007/12/21 05:12:41 k Exp $
    #
    
    require 'bio/db'
    
    module Bio
    
    # == Description
    #
    # NCBI PubMed/MEDLINE database class.
    #
    # == Examples
    #
    #   medline = Bio::MEDLINE.new(txt)
    #   medline.reference
    #   medline.pmid == medline.entry_id
    #   medilne.mesh
    #
    class MEDLINE < NCBIDB
    
      def initialize(entry)
        @pubmed = Hash.new('')
    
        tag = ''
        entry.each_line do |line|
          if line =~ /^\w/
            tag = line[0,4].strip
          else
            # continuation from previous lines
            @pubmed[tag] = @pubmed[tag].sub(/(?:\r|\r\n|\n)\z/, ' ')
          end
          value = line[6..-1]
          @pubmed[tag] += value if value
        end
      end
      attr_reader :pubmed
    
    
      # returns a Reference object.
      def reference
        hash = Hash.new
    
        hash['authors']	= authors
        hash['title']	= title
        hash['journal']	= journal
        hash['volume']	= volume
        hash['issue']	= issue
        hash['pages']	= pages
        hash['year']	= year
        hash['pubmed']	= pmid
        hash['medline']  	= ui
        hash['abstract']	= abstract
        hash['mesh']	= mesh
        hash['doi']	= doi
        hash['affiliations'] = affiliations
    
        hash.delete_if { |k, v| v.nil? or v.empty? }
    
        return Reference.new(hash)
      end
    
    
      ### Common MEDLINE tags
    
      # PMID - PubMed Unique Identifier
      #   Unique number assigned to each PubMed citation.
      def pmid
        @pubmed['PMID'].strip
      end
      alias entry_id pmid
    
      # UI   - MEDLINE Unique Identifier
      #   Unique number assigned to each MEDLINE citation.
      def ui
        @pubmed['UI'].strip
      end
    
      # TA   - Journal Title Abbreviation
      #   Standard journal title abbreviation.
      def ta
        @pubmed['TA'].gsub(/\s+/, ' ').strip
      end
      alias journal ta
    
      # VI   - Volume
      #   Journal volume.
      def vi
        @pubmed['VI'].strip
      end
      alias volume vi
    
      # IP   - Issue
      #   The number of the issue, part, or supplement of the journal in which
      #   the article was published.
      def ip
        @pubmed['IP'].strip
      end
      alias issue ip
    
      # PG   - Page Number
      #   The full pagination of the article.
      def pg
        @pubmed['PG'].strip
      end
    
      def pages
        pages = pg
        if pages =~ /-/
          from, to = pages.split('-')
          if (len = from.length - to.length) > 0
            to = from[0,len] + to
          end
          pages = "#{from}-#{to}"
        end
        return pages
      end
    
      # DP   - Publication Date
      #   The date the article was published.
      def dp
        @pubmed['DP'].strip
      end
      alias date dp
    
      def year
        dp[0,4]
      end
    
      # TI   - Title Words
      #   The title of the article.
      def ti
        @pubmed['TI'].gsub(/\s+/, ' ').strip
      end
      alias title ti
    
      # AB   - Abstract
      #   Abstract.
      def ab
        @pubmed['AB'].gsub(/\s+/, ' ').strip
      end
      alias abstract ab
    
      # AU   - Author Name
      #   Authors' names.
      def au
        @pubmed['AU'].strip
      end
    
      def authors
        authors = []
        au.split(/\n/).each do |author|
          if author =~ / /
            name = author.split(/\s+/)
            suffix = nil
            if name.length > 2 && name[-2] =~ /^[A-Z]+$/ # second to last are the initials
              suffix = name.pop
            end
            initial = name.pop.split(//).join('. ')
            author = "#{name.join(' ')}, #{initial}."
          end
          if suffix
            author << " " + suffix
          end
          authors.push(author)
        end
        return authors
      end
    
      # SO   - Source
      #   Composite field containing bibliographic information.
      def so
        @pubmed['SO'].strip
      end
      alias source so
    
      # MH   - MeSH Terms
      #   NLM's controlled vocabulary.
      def mh
        @pubmed['MH'].strip.split(/\n/)
      end
      alias mesh mh
    
      # AD   - Affiliation
      #   Institutional affiliation and address of the first author, and grant
      #   numbers.
      def ad
        @pubmed['AD'].strip.split(/\n/)
      end
      alias affiliations ad
    
      # AID  - Article Identifier
      #   Article ID values may include the pii (controlled publisher identifier)
      #   or doi (Digital Object Identifier).
      def doi
        @pubmed['AID'][/(\S+) \[doi\]/, 1]
      end
    
      def pii
        @pubmed['AID'][/(\S+) \[pii\]/, 1]
      end
    
      ### Other MEDLINE tags
    
      # CI   - Copyright Information
      #   Copyright statement.
    
      # CIN  - Comment In
      #   Reference containing a comment about the article.
    
      # CN   - Collective Name
      #   Corporate author or group names with authorship responsibility.
    
      # CON  - Comment On
      #   Reference upon which the article comments.
    
      # CY   - Country
      #   The place of publication of the journal.
    
      # DA   - Date Created
      #   Used for internal processing at NLM.
    
      # DCOM - Date Completed
      #   Used for internal processing at NLM.
    
      # DEP  - Date of Electronic Publication
      #   Electronic publication date.
    
      # EDAT - Entrez Date
      #   The date the citation was added to PubMed.
    
      # EIN  - Erratum In
      #   Reference containing a published erratum to the article.
    
      # GS   - Gene Symbol
      #   Abbreviated gene names (used 1991 through 1996).
    
      # ID   - Identification Number 
      #   Research grant numbers, contract numbers, or both that designate
      #   financial support by any agency of the US PHS (Public Health Service).
    
      # IS   - ISSN
      #   International Standard Serial Number of the journal.
    
      # JC   - Journal Title Code
      #   MEDLINE unique three-character code for the journal.
    
      # JID  - NLM Unique ID
      #   Unique journal ID in NLM's catalog of books, journals, and audiovisuals.
    
      # LA   - Language
      #   The language in which the article was published.
    
      # LR   - Last Revision Date
      #   The date a change was made to the record during a maintenance procedure.
    
      # MHDA - MeSH Date
      #   The date MeSH terms were added to the citation. The MeSH date is the
      #   same as the Entrez date until MeSH are added.
    
      # PHST - Publication History Status Date
      #   History status date.
    
      # PS   - Personal Name as Subject
      #   Individual is the subject of the article.
    
      # PST  - Publication Status
      #   Publication status.
    
      # PT   - Publication Type
      #   The type of material the article represents.
      def pt
        @pubmed['PT'].strip.split(/\n/)   
      end
      alias publication_type pt
    
      # RF   - Number of References
      #   Number of bibliographic references for Review articles.
    
      # RIN  - Retraction In
      #   Retraction of the article
    
      # RN   - EC/RN Number
      #   Number assigned by the Enzyme Commission to designate a particular
      #   enzyme or by the Chemical Abstracts Service for Registry Numbers.
    
      # ROF  - Retraction Of
      #   Article being retracted.
    
      # RPF  - Republished From
      #   Original article.
    
      # SB   - Journal Subset
      #   Code for a specific set of journals.
    
      # SI   - Secondary Source Identifier
      #   Identifies a secondary source that supplies information, e.g., other
      #   data sources, databanks and accession numbers of molecular sequences
      #   discussed in articles.
    
      # TT   - Transliterated / Vernacular Title 
      #   Non-Roman alphabet language titles are transliterated.
    
      # UIN  - Update In
      #   Update to the article.
    
      # UOF  - Update Of
      #   The article being updated.
    
      # URLF - URL Full-Text
      #   Link to the full-text of article at provider's website. Links are
      #   incomplete. Use PmLink for the complete set of available links.
      #   [PmLink] http://www.ncbi.nlm.nih.gov/entrez/utils/pmlink_help.html
    
      # URLS - URL Summary
      #   Link to the article summary at provider's website. Links are
      #   incomplete. Use PmLink for the complete set of available links.
      #   [PmLink] http://www.ncbi.nlm.nih.gov/entrez/utils/pmlink_help.html
    
    end # MEDLINE
    
    end # Bio
    
    
    
    bio-1.4.3.0001/lib/bio/db/embl/0000755000004100000410000000000012200110570015506 5ustar  www-datawww-databio-1.4.3.0001/lib/bio/db/embl/trembl.rb0000644000004100000410000000176212200110570017326 0ustar  www-datawww-data#
    # = bio/db/embl/trembl.rb - TrEMBL database class
    # 
    # Copyright::  Copyright (C) 2001, 2002 Toshiaki Katayama 
    # License::    The Ruby License
    #
    # $Id: trembl.rb,v 1.7 2007/04/05 23:35:40 trevor Exp $
    #
    
    require 'bio/db/embl/sptr'
    
    module Bio
    
    # == Description
    #
    # Parser class for TrEMBL database entry. See also Bio::SPTR class.
    # This class holds name space for TrEMBL specific methods.
    #
    # UniProtKB/SwissProt specific methods are defined in this class. 
    # Shared methods for UniProtKB/SwissProt and TrEMBL classes are 
    # defined in Bio::SPTR class.
    #
    # == Examples
    #
    #   str = File.read("Q2UNG2_ASPOR.trembl")
    #   obj = Bio::TrEMBL.new(str)
    #   obj.entry_id #=> "Q2UNG2_ASPOR"
    #
    # == Referencees
    #
    # * TrEMBL Computer-annotated supplement to Swiss-Prot	
    #   http://au.expasy.org/sprot/
    #
    # * TrEMBL Computer-annotated supplement to Swiss-Prot User Manual
    #   http://au.expasy.org/sprot/userman.html
    # 
    class TrEMBL < SPTR
      # Nothing to do (TrEMBL format is abstracted in SPTR)
    end
    
    end
    bio-1.4.3.0001/lib/bio/db/embl/uniprot.rb0000644000004100000410000000166712200110570017545 0ustar  www-datawww-data#
    # = bio/db/embl/uniprot.rb - UniProt database class
    # 
    # Copyright::  Copyright (C) 2005 Toshiaki Katayama 
    # License::    The Ruby License
    #
    #  $Id: uniprot.rb,v 1.5 2007/04/05 23:35:40 trevor Exp $
    #
    
    require 'bio/db/embl/sptr'
    
    module Bio
    
    # == Description
    # 
    # Parser class for SwissProt database entry.# See also Bio::SPTR class.
    # This class holds name space for UniProtKB/SwissProt specific methods.
    #
    # UniProtKB/SwissProt specific methods are defined in this class. 
    # Shared methods for UniProtKB/SwissProt and TrEMBL classes are 
    # defined in Bio::SPTR class.
    #
    # == Examples
    #
    #   str = File.read("p53_human.swiss")
    #   obj = Bio::UniProt.new(str)
    #   obj.entry_id #=> "P53_HUMAN"
    #
    # == Referencees
    #
    # * UniProt
    #   http://uniprot.org/
    #
    # * The UniProtKB/SwissProt/TrEMBL User Manual
    #   http://www.expasy.org/sprot/userman.html
    #
    class UniProt < SPTR
      # Nothing to do (UniProt format is abstracted in SPTR)
    end
    
    end
    
    bio-1.4.3.0001/lib/bio/db/embl/sptr.rb0000644000004100000410000012055412200110570017032 0ustar  www-datawww-data#
    # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
    # 
    # Copyright::   Copyright (C) 2001-2006  Mitsuteru C. Nakao 
    # License::     The Ruby License
    #
    # $Id:$
    #
    # == Description
    # 
    # Shared methods for UniProtKB/SwissProt and TrEMBL classes.
    #
    # See the SWISS-PROT document file SPECLIST.TXT or UniProtKB/SwissProt 
    # user manual.
    # 
    # == Examples
    #
    #   str = File.read("p53_human.swiss")
    #   obj = Bio::SPTR.new(str)
    #   obj.entry_id #=> "P53_HUMAN"
    # 
    # == References
    # 
    # * Swiss-Prot Protein knowledgebase. TrEMBL Computer-annotated supplement 
    #   to Swiss-Prot	
    #   http://au.expasy.org/sprot/
    #
    # * UniProt
    #   http://uniprot.org/
    #
    # * The UniProtKB/SwissProt/TrEMBL User Manual
    #   http://www.expasy.org/sprot/userman.html
    #
    
    
    require 'bio/db'
    require 'bio/db/embl/common'
    
    module Bio
    
    # Parser class for UniProtKB/SwissProt and TrEMBL database entry.
    class SPTR < EMBLDB
      include Bio::EMBLDB::Common
        
      @@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/
      @@data_class = ["STANDARD", "PRELIMINARY"]
    
      # returns a Hash of the ID line.
      #
      # returns a content (Int or String) of the ID line by a given key.
      # Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH']
      #
      # === ID Line (since UniProtKB release 9.0 of 31-Oct-2006)
      #   ID   P53_HUMAN               Reviewed;         393 AA.
      #   #"ID  #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."
      #
      # === Examples
      #   obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed", 
      #                     "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}
      #
      #   obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
      #
      # 
      # === ID Line (older style)
      #   ID   P53_HUMAN      STANDARD;      PRT;   393 AA.
      #   #"ID  #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
      #
      # === Examples
      #   obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", 
      #                     "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
      #
      #   obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
      #
      def id_line(key = nil)
        return id_line[key] if key
        return @data['ID'] if @data['ID']
    
        part = @orig['ID'].split(/ +/)         
        if part[4].to_s.chomp == 'AA.' then
          # after UniProtKB release 9.0 of 31-Oct-2006
          # (http://www.uniprot.org/docs/sp_news.htm)
          molecule_type   = nil
          sequence_length = part[3].to_i
        else
          molecule_type   = part[3].sub(/;/,'')
          sequence_length = part[4].to_i
        end
        @data['ID'] = {
          'ENTRY_NAME'      => part[1],
          'DATA_CLASS'      => part[2].sub(/;/,''),
          'MOLECULE_TYPE'   => molecule_type,
          'SEQUENCE_LENGTH' => sequence_length
        }
      end
    
    
      # returns a ENTRY_NAME in the ID line. 
      #
      def entry_id
        id_line('ENTRY_NAME')
      end
      alias entry_name entry_id
      alias entry entry_id
    
    
      # returns a MOLECULE_TYPE in the ID line.
      #
      # A short-cut for Bio::SPTR#id_line('MOLECULE_TYPE').
      def molecule
        id_line('MOLECULE_TYPE')
      end
      alias molecule_type molecule
    
    
      # returns a SEQUENCE_LENGTH in the ID line.
      # 
      # A short-cut for Bio::SPTR#id_line('SEQUENCE_LENGHT').
      def sequence_length
        id_line('SEQUENCE_LENGTH')
      end
      alias aalen sequence_length
    
    
      # Bio::EMBLDB::Common#ac  -> ary
      #                  #accessions  -> ary
      #                  #accession  -> String (accessions.first)
      @@ac_regrexp = /[OPQ][0-9][A-Z0-9]{3}[0-9]/ 
    
    
    
      # returns a Hash of information in the DT lines.
      #  hash keys: 
      #    ['created', 'sequence', 'annotation']
      #--
      #  also Symbols acceptable (ASAP):
      #    [:created, :sequence, :annotation]
      #++
      #
      # Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is
      # changed, and the word "annotation" is no longer used in DT lines.
      # Despite the change, the word "annotation" is still used for keeping
      # compatibility.
      #
      # returns a String of information in the DT lines by a given key.
      #
      # === DT Line; date (3/entry)
      #   DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
      #   DT DD-MMM-YYY (sequence version NN)
      #   DT DD-MMM-YYY (entry version NN)
      #
      # The format have been changed in UniProtKB release 7.0 of 07-Feb-2006.
      # Below is the older format.
      #
      # === Old format of DT Line; date (3/entry)
      #   DT DD-MMM-YYY (rel. NN, Created)
      #   DT DD-MMM-YYY (rel. NN, Last sequence update)
      #   DT DD-MMM-YYY (rel. NN, Last annotation update)
      def dt(key = nil)
        return dt[key] if key
        return @data['DT'] if @data['DT']
    
        part = self.get('DT').split(/\n/)
        @data['DT'] = {
          'created'    => part[0].sub(/\w{2}   /,'').strip,
          'sequence'   => part[1].sub(/\w{2}   /,'').strip,
          'annotation' => part[2].sub(/\w{2}   /,'').strip
        }
      end
    
    
      # (private) parses DE line (description lines)
      # since UniProtKB release 14.0 of 22-Jul-2008
      #
      # Return array containing array.
      #
      # http://www.uniprot.org/docs/sp_news.htm
      def parse_DE_line_rel14(str)
        # Retruns if it is not the new format since Rel.14
        return nil unless /^DE   (RecName|AltName|SubName)\: / =~ str
        ret = []
        cur = nil
        str.each_line do |line|
          case line
          when /^DE   (Includes|Contains)\: *$/
            cur = [ $1 ]
            ret.push cur
            cur = nil
            #subcat_and_desc = nil
            next
          when /^DE   *(RecName|AltName|SubName)\: +(.*)/
            category = $1
            subcat_and_desc = $2
            cur = [ category ]
            ret.push cur
          when /^DE   *(Flags)\: +(.*)/
            category = $1
            desc = $2
            flags = desc.strip.split(/\s*\;\s*/) || []
            cur = [ category, flags ]
            ret.push cur
            cur = nil
            #subcat_and_desc = nil
            next
          when /^DE   *(.*)/
            subcat_and_desc = $1
          else
            warn "Warning: skipped DE line in unknown format: #{line.inspect}"
            #subcat_and_desc = nil
            next
          end
          case subcat_and_desc
          when nil
            # does nothing
          when /\A([^\=]+)\=(.*)/
            subcat = $1
            desc = $2
            desc.sub!(/\;\s*\z/, '')
            unless cur
              warn "Warning: unknown category in DE line: #{line.inspect}"
              cur = [ '' ]
              ret.push cur
            end
            cur.push [ subcat, desc ]
          else
            warn "Warning: skipped DE line description in unknown format: #{line.inspect}"
          end
        end
        ret
      end
      private :parse_DE_line_rel14
    
      # returns the proposed official name of the protein.
      # Returns a String.
      #
      # Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
      # been changed. The method returns the full name which is taken from
      # "RecName: Full=" or "SubName: Full=" line normally in the beginning of
      # the DE lines. 
      # Unlike parser for old format, no special treatments for fragment or
      # precursor.
      #
      # For old format, the method parses the DE lines and returns the protein
      # name as a String.
      # 
      # === DE Line; description (>=1)
      #  "DE #{OFFICIAL_NAME} (#{SYNONYM})"
      #  "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
      #  OFFICIAL_NAME  1/entry
      #  SYNONYM        >=0
      #  CONTEINS       >=0
      def protein_name
        @data['DE'] ||= parse_DE_line_rel14(get('DE'))
        parsed_de_line = @data['DE']
        if parsed_de_line then
          # since UniProtKB release 14.0 of 22-Jul-2008
          name = nil
          parsed_de_line.each do |a|
            case a[0]
            when 'RecName', 'SubName'
              if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
                name = name_pair[1]
                break
              end
            end
          end
          name = name.to_s
        else
          # old format (before Rel. 13.x)
          name = ""
          if de_line = fetch('DE') then
            str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
            name = str[/^[^(]*/].strip
            name << ' (Fragment)' if str =~ /fragment/i
          end
        end
        return name
      end
    
    
      # returns synonyms (unofficial and/or alternative names).
      # Returns an Array containing String objects.
      #
      # Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
      # been changed. The method returns the full or short names which are
      # taken from "RecName: Short=", "RecName: EC=", and AltName lines,
      # except after "Contains:" or "Includes:".
      # For keeping compatibility with old format parser, "RecName: EC=N.N.N.N"
      # is reported as "EC N.N.N.N".
      # In addition, to prevent confusion, "Allergen=" and "CD_antigen=" 
      # prefixes are added for the corresponding fields.
      #
      # For old format, the method parses the DE lines and returns synonyms.
      # synonyms are each placed in () following the official name on the DE line.
      def synonyms
        ary = Array.new
        @data['DE'] ||= parse_DE_line_rel14(get('DE'))
        parsed_de_line = @data['DE']
        if parsed_de_line then
          # since UniProtKB release 14.0 of 22-Jul-2008
          parsed_de_line.each do |a|
            case a[0]
            when 'Includes', 'Contains'
              break #the each loop
            when 'RecName', 'SubName', 'AltName'
              a[1..-1].each do |b|
                if name = b[1] and b[1] != self.protein_name then
                  case b[0]
                  when 'EC'
                    name = "EC " + b[1]
                  when 'Allergen', 'CD_antigen'
                    name = b[0] + '=' + b[1]
                  else
                    name = b[1]
                  end
                  ary.push name
                end
              end
            end #case a[0]
          end #parsed_de_line.each
        else
          # old format (before Rel. 13.x)
          if de_line = fetch('DE') then
            line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ].  That's the "contains" part
          line.scan(/\([^)]+/) do |synonym| 
            unless synonym =~ /fragment/i then 
              ary << synonym[1..-1].strip # index to remove the leading (  
            end
            end
          end
        end
        return ary
      end
    
    
      # returns gene names in the GN line.
      #
      # New UniProt/SwissProt format:
      # * Bio::SPTR#gn -> [ * ]
      # where  is:
      #                    { :name => '...', 
      #                      :synonyms => [ 's1', 's2', ... ],
      #                      :loci   => [ 'l1', 'l2', ... ],
      #                      :orfs     => [ 'o1', 'o2', ... ] 
      #                    }
      #
      # Old format:
      # * Bio::SPTR#gn -> Array      # AND 
      # * Bio::SPTR#gn[0] -> Array   # OR
      #
      # === GN Line: Gene name(s) (>=0, optional)
      def gn
        unless @data['GN']
          case fetch('GN')
          when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
            @data['GN'] = gn_uniprot_parser
          else
            @data['GN'] = gn_old_parser
          end
        end
        @data['GN']
      end
    
    
      # returns contents in the old style GN line.
      # === GN Line: Gene name(s) (>=0, optional)
      #  GN   HNS OR DRDX OR OSMZ OR BGLY.
      #  GN   CECA1 AND CECA2.
      #  GN   CECA1 AND (HOGE OR FUGA).
      #
      #  GN NAME1 [(AND|OR) NAME]+.
      #
      # Bio::SPTR#gn -> Array      # AND 
      #          #gn[0] -> Array   # OR
      #          #gene_names -> Array
      def gn_old_parser
        names = Array.new
        if get('GN').size > 0
          names = fetch('GN').sub(/\.$/,'').split(/ AND /)
          names.map! { |synonyms|
            synonyms = synonyms.gsub(/\(|\)/,'').split(/ OR /).map { |e|
              e.strip 
            }
          }
        end
        @data['GN'] = names
      end
      private :gn_old_parser
    
      # returns contents in the structured GN line.
      # The new format of the GN line is:
      #  GN   Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...];
      #  GN   ORFNames=[, ...];
      #
      # * Bio::SPTR#gn -> [ * ]
      # where  is:
      #                    { :name => '...', 
      #                      :synonyms => [ 's1', 's2', ... ],
      #                      :loci   => [ 'l1', 'l2', ... ],
      #                      :orfs     => [ 'o1', 'o2', ... ] 
      #                    }
      def gn_uniprot_parser
        @data['GN'] = Array.new
        gn_line = fetch('GN').strip
        records = gn_line.split(/\s*and\s*/)
        records.each do |record|
          gene_hash = {:name => '', :synonyms => [], :loci => [], :orfs => []}
          record.each_line(';') do |element|
            case element
            when /Name=/ then
              gene_hash[:name] = $'[0..-2]
            when /Synonyms=/ then
              gene_hash[:synonyms] = $'[0..-2].split(/\s*,\s*/)
            when /OrderedLocusNames=/ then
              gene_hash[:loci] = $'[0..-2].split(/\s*,\s*/)
            when /ORFNames=/ then
              gene_hash[:orfs] = $'[0..-2].split(/\s*,\s*/)
            end
          end
          @data['GN'] << gene_hash
        end
        return @data['GN']
      end
      private :gn_uniprot_parser
    
    
      # returns a Array of gene names in the GN line.
      def gene_names
        gn # set @data['GN'] if it hasn't been already done
        if @data['GN'].first.class == Hash then
          @data['GN'].collect { |element| element[:name] }
        else
          @data['GN'].first
        end
      end
    
    
      # returns a String of the first gene name in the GN line.
      def gene_name
        gene_names.first
      end
    
    
      # returns a Array of Hashs or a String of the OS line when a key given.
      # * Bio::EMBLDB#os  -> Array
      #  [{'name' => '(Human)', 'os' => 'Homo sapiens'}, 
      #   {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
      # * Bio::EPTR#os[0] -> Hash 
      #  {'name' => "(Human)", 'os' => 'Homo sapiens'}
      # * Bio::SPTR#os[0]['name'] -> "(Human)"
      # * Bio::EPTR#os(0) -> "Homo sapiens (Human)"
      # 
      # === OS Line; organism species (>=1)
      #  OS   Genus species (name).
      #  OS   Genus species (name0) (name1).
      #  OS   Genus species (name0) (name1).
      #  OS   Genus species (name0), G s0 (name0), and G s (name0) (name1).
      #  OS   Homo sapiens (Human), and Rarrus norveticus (Rat)
      #  OS   Hippotis sp. Clark and Watts 825.
      #  OS   unknown cyperaceous sp.
      def os(num = nil)
        unless @data['OS']
          os = Array.new
          fetch('OS').split(/, and|, /).each do |tmp|
            if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
              org = $1
              tmp =~ /(\(.+\))/ 
              os.push({'name' => $1, 'os' => org})
            else
              raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
            end
          end
          @data['OS'] = os
        end
    
        if num
          # EX. "Trifolium repens (white clover)"
          return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}"
        else
          return @data['OS']
        end
      end
      
    
      # Bio::EMBLDB::Common#og -> Array
      # OG Line; organella (0 or 1/entry)
      # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"]
      #  or a plasmid name (e.g. "Plasmid pBR322").  
    
    
      # Bio::EMBLDB::Common#oc -> Array
      # OC Line; organism classification (>=1)
      # "OC   Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;"
      # "OC   Theileria."
    
    
    
      # returns a Hash of oraganism taxonomy cross-references.
      # * Bio::SPTR#ox -> Hash
      #    {'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
      #
      # === OX Line; organism taxonomy cross-reference (>=1 per entry)
      #  OX   NCBI_TaxID=1234;
      #  OX   NCBI_TaxID=1234, 2345, 3456, 4567;
      def ox
        unless @data['OX']
          tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip }
          hsh = Hash.new
          tmp.each do |e|
            db,refs = e.split(/=/)
            hsh[db] = refs.split(/, */)
          end
          @data['OX'] = hsh
        end
        return @data['OX']
      end
    
      # === The OH Line;  
      #
      # OH   NCBI_TaxID=TaxID; HostName.
      # http://br.expasy.org/sprot/userman.html#OH_line
      def oh
        unless @data['OH']
          @data['OH'] = fetch('OH').split("\. ").map {|x|
            if x =~ /NCBI_TaxID=(\d+);/
              taxid = $1
            else
              raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
                                    $!, "\n", get('OH'), "\n"].join
              
            end
            if x =~ /NCBI_TaxID=\d+; (.+)/ 
              host_name = $1
              host_name.sub!(/\.$/, '')
            else
              host_name = nil
            end
            {'NCBI_TaxID' => taxid, 'HostName' => host_name}
          }
        end
        @data['OH']
      end
    
    
      
      # Bio::EMBLDB::Common#ref -> Array
      # R Lines
      # RN RC RP RX RA RT RL
    
      # returns contents in the R lines.
      # * Bio::EMBLDB::Common#ref -> [ * ]
      # where  is:
      #  {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
      #   'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
      # 
      # R Lines
      # * RN RC RP RX RA RT RL RG
      def ref
        unless @data['R']
          @data['R'] = [get('R').split(/\nRN   /)].flatten.map { |str|
            hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
                   'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
            str = 'RN   ' + str unless /^RN   / =~ str
    
            str.split("\n").each do |line|
              if /^(R[NPXARLCTG])   (.+)/ =~ line
                hash[$1] += $2 + ' '
              else
                raise "Invalid format in R lines, \n[#{line}]\n"
              end
            end
    
            hash['RN'] = set_RN(hash['RN'])
            hash['RC'] = set_RC(hash['RC'])
            hash['RP'] = set_RP(hash['RP'])
            hash['RX'] = set_RX(hash['RX'])
            hash['RA'] = set_RA(hash['RA'])
            hash['RT'] = set_RT(hash['RT'])
            hash['RL'] = set_RL(hash['RL'])
            hash['RG'] = set_RG(hash['RG'])
    
            hash
          }
    
        end
        @data['R']
      end
    
      def set_RN(data)
        data.strip
      end
    
      def set_RC(data)
        data.scan(/([STP]\w+)=(.+);/).map { |comment|
          [comment[1].split(/, and |, /)].flatten.map { |text|
            {'Token' => comment[0], 'Text' => text}
          }
        }.flatten
      end
      private :set_RC
    
      def set_RP(data)
        data = data.strip
        data = data.sub(/\.$/, '')
        data.split(/, AND |, /i).map {|x| 
          x = x.strip
          x = x.gsub('  ', ' ')
        }
      end
      private :set_RP
    
      def set_RX(data)
        rx = {'MEDLINE' => nil, 'PubMed' => nil, 'DOI' => nil}
        if data =~ /MEDLINE=(.+?);/
          rx['MEDLINE'] = $1
        end
        if data =~ /PubMed=(.+?);/
          rx['PubMed'] = $1
        end
        if data =~ /DOI=(.+?);/
          rx['DOI'] = $1
        end
        rx
      end
      private :set_RX
    
      def set_RA(data)
        data = data.sub(/; *$/, '')
      end
      private :set_RA
    
      def set_RT(data)
        data = data.sub(/; *$/, '')
        data = data.gsub(/(^"|"$)/, '')
      end
      private :set_RT
    
      def set_RL(data)
        data = data.strip
      end
      private :set_RL
    
      def set_RG(data)
        data = data.split('; ')
      end
      private :set_RG
    
    
    
      # returns Bio::Reference object from Bio::EMBLDB::Common#ref.
      # * Bio::EMBLDB::Common#ref -> Bio::References
      def references
        unless @data['references']
          ary = self.ref.map {|ent|
            hash = Hash.new('')
            ent.each {|key, value|
              case key
              when 'RA'
                hash['authors'] = value.split(/, /)
              when 'RT'
                hash['title'] = value
              when 'RL'
                if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
                  hash['journal'] = $1
                  hash['volume']  = $2
                  hash['issue']   = $3
                  hash['pages']   = $4
                  hash['year']    = $5
                else
                  hash['journal'] = value
                end
              when 'RX'  # PUBMED, MEDLINE, DOI
                value.each do |tag, xref|
                  hash[ tag.downcase ]  = xref
                end
              end
            }
            Reference.new(hash)
          }
          @data['references'] = References.new(ary)
        end
        @data['references']
      end
    
    
    
    
    
    
      # === The HI line
      # Bio::SPTR#hi #=> hash
      def hi
        unless @data['HI']
          @data['HI'] = []
          fetch('HI').split(/\. /).each do |hlist|
            hash = {'Category' => '',  'Keywords' => [], 'Keyword' => ''}
            hash['Category'], hash['Keywords'] = hlist.split(': ')
            hash['Keywords'] = hash['Keywords'].split('; ')
            hash['Keyword'] = hash['Keywords'].pop
            hash['Keyword'].sub!(/\.$/, '')
            @data['HI'] << hash
          end
        end
        @data['HI']
      end
    
    
      @@cc_topics = ['PHARMACEUTICAL',
                     'BIOTECHNOLOGY',
                     'TOXIC DOSE', 
                     'ALLERGEN',   
                     'RNA EDITING',
                     'POLYMORPHISM',
                     'BIOPHYSICOCHEMICAL PROPERTIES',
                     'MASS SPECTROMETRY',
                     'WEB RESOURCE', 
                     'ENZYME REGULATION',
                     'DISEASE',
                     'INTERACTION',
                     'DEVELOPMENTAL STAGE',
                     'INDUCTION',
                     'CAUTION',
                     'ALTERNATIVE PRODUCTS',
                     'DOMAIN',
                     'PTM',
                     'MISCELLANEOUS',
                     'TISSUE SPECIFICITY',
                     'COFACTOR',
                     'PATHWAY',
                     'SUBUNIT',
                     'CATALYTIC ACTIVITY',
                     'SUBCELLULAR LOCATION',
                     'FUNCTION',
                     'SIMILARITY']
      # returns contents in the CC lines.
      # * Bio::SPTR#cc -> Hash
      #
      # returns an object of contents in the TOPIC.
      # * Bio::SPTR#cc(TOPIC) -> Array w/in Hash, Hash
      #
      # returns contents of the "ALTERNATIVE PRODUCTS".
      # * Bio::SPTR#cc('ALTERNATIVE PRODUCTS') -> Hash
      #    {'Event' => str, 
      #     'Named isoforms' => int,  
      #     'Comment' => str,
      #     'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
      # 
      #    CC   -!- ALTERNATIVE PRODUCTS:
      #    CC       Event=Alternative splicing; Named isoforms=15;
      #    ...
      #    CC         placentae isoforms. All tissues differentially splice exon 13;
      #    CC       Name=A; Synonyms=no del;
      #    CC         IsoId=P15529-1; Sequence=Displayed;
      #
      # returns contents of the "DATABASE".
      # * Bio::SPTR#cc('DATABASE') -> Array
      #    [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
      #
      #    CC   -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
      #
      # returns contents of the "MASS SPECTROMETRY".
      # * Bio::SPTR#cc('MASS SPECTROMETRY') -> Array
      #    [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
      #
      #    CC   -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
      #
      # === CC lines (>=0, optional)
      #   CC   -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
      #   CC       IN LIVER, KIDNEY, LUNG AND BRAIN.
      # 
      #   CC   -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
      #   CC       SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
      #
      # See also http://www.expasy.org/sprot/userman.html#CC_line
      #
      def cc(topic = nil)
        unless @data['CC']
          cc  = Hash.new
          comment_border= '-' * (77 - 4 + 1)
          dlm = /-!- /
    
          # 12KD_MYCSM has no CC lines.
          return cc if get('CC').size == 0
          
          cc_raw = fetch('CC')
    
          # Removing the copyright statement.
          cc_raw.sub!(/ *---.+---/m, '')
    
          # Not any CC Lines without the copyright statement.
          return cc if cc_raw == ''
    
          begin
            cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
            cc_raw = cc_raw.sub(dlm,'')
            cc_raw.split(dlm).each do |tmp|
              tmp = tmp.strip
    
              if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
                key  = $1
                body = $2
                body.gsub!(/- (?!AND)/,'-')
                body.strip!
                unless cc[key]
                  cc[key] = [body]
                else
                  cc[key].push(body)
                end
              else
                raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
                       '', get('CC'),''].join("\n")
              end
            end
          rescue NameError
            if fetch('CC') == ''
              return {}
            else
              raise ["Error: Invalid CC Lines: [#{entry_id}]: ",
                     "\n'#{self.get('CC')}'\n", "(#{$!})"].join
            end
          rescue NoMethodError
          end
          
          @data['CC'] = cc
        end
    
    
        case topic
        when 'ALLERGEN'
          return @data['CC'][topic]
        when 'ALTERNATIVE PRODUCTS'
          return cc_alternative_products(@data['CC'][topic])
        when 'BIOPHYSICOCHEMICAL PROPERTIES'
          return cc_biophysiochemical_properties(@data['CC'][topic])
        when 'BIOTECHNOLOGY'
          return @data['CC'][topic]
        when 'CATALITIC ACTIVITY'
          return cc_catalytic_activity(@data['CC'][topic])
        when 'CAUTION'
          return cc_caution(@data['CC'][topic])
        when 'COFACTOR'
          return @data['CC'][topic]
        when 'DEVELOPMENTAL STAGE'
          return @data['CC'][topic].join('')
        when 'DISEASE'
          return @data['CC'][topic].join('')
        when 'DOMAIN'
          return @data['CC'][topic]
        when 'ENZYME REGULATION'
          return @data['CC'][topic].join('')
        when 'FUNCTION'
          return @data['CC'][topic].join('')
        when 'INDUCTION'
          return @data['CC'][topic].join('')
        when 'INTERACTION'
          return cc_interaction(@data['CC'][topic])
        when 'MASS SPECTROMETRY'
          return cc_mass_spectrometry(@data['CC'][topic])
        when 'MISCELLANEOUS'
          return @data['CC'][topic]
        when 'PATHWAY'
          return cc_pathway(@data['CC'][topic])
        when 'PHARMACEUTICAL'
          return @data['CC'][topic]
        when 'POLYMORPHISM'
          return @data['CC'][topic]
        when 'PTM'
          return @data['CC'][topic]
        when 'RNA EDITING'
          return cc_rna_editing(@data['CC'][topic])
        when 'SIMILARITY'
          return @data['CC'][topic]
        when 'SUBCELLULAR LOCATION'
          return cc_subcellular_location(@data['CC'][topic])
        when 'SUBUNIT'
          return @data['CC'][topic]
        when 'TISSUE SPECIFICITY'
          return @data['CC'][topic]
        when 'TOXIC DOSE'
          return @data['CC'][topic]
        when 'WEB RESOURCE'
          return cc_web_resource(@data['CC'][topic])
        when 'DATABASE'
          # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
          tmp = Array.new
          db = @data['CC']['DATABASE']
          return db unless db
    
          db.each do |e|
            db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil}
            e.sub(/.$/,'').split(/;/).each do |line|
              case line
              when /NAME=(.+)/
                db['NAME'] = $1
              when /NOTE=(.+)/
                db['NOTE'] = $1
              when /WWW="(.+)"/
                db['WWW'] = $1
              when /FTP="(.+)"/
                db['FTP'] = $1
              end 
            end
            tmp.push(db)
          end
          return tmp
        when nil
          return @data['CC']
        else
          return @data['CC'][topic]
        end
      end
    
    
      def cc_alternative_products(data)
        ap = data.join('')
        return ap unless ap
    
        # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
        tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "", 
               'Variants'  => []}
        if /Event=(.+?);/ =~ ap
          tmp['Event'] = $1
          tmp['Event'] = tmp['Event'].sub(/;/,'').split(/, /)
        end
        if /Named isoforms=(\S+?);/ =~ ap
          tmp['Named isoforms'] = $1
        end
        if /Comment=(.+?);/m =~ ap
          tmp['Comment'] = $1
        end
        ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
          tmp['Variants'] << cc_alternative_products_variants(ent)
        end
        return tmp
      end
      private :cc_alternative_products
    
      def cc_alternative_products_variants(data)
        variant = {'Name' => '', 'Synonyms' => [], 'IsoId' => [], 'Sequence' => []}
        data.split(/; /).map {|x| x.split(/=/) }.each do |e|
          case e[0]
          when 'Sequence', 'Synonyms', 'IsoId'
            e[1] = e[1].sub(/;/,'').split(/, /)
          end
          variant[e[0]] = e[1]
        end
        variant
      end
      private :cc_alternative_products_variants
    
    
      def cc_biophysiochemical_properties(data)
        data = data[0]
    
        hash = {'Absorption' => {}, 
                'Kinetic parameters' => {},
                'pH dependence' => "",
                'Redox potential' => "",
                'Temperature dependence' => ""}
        if data =~ /Absorption: Abs\(max\)=(.+?);/
          hash['Absorption']['Abs(max)'] = $1
        end
        if data =~ /Absorption: Abs\(max\)=.+; Note=(.+?);/
          hash['Absorption']['Note'] = $1
        end
        if data =~ /Kinetic parameters: KM=(.+?); Vmax=(.+?);/
          hash['Kinetic parameters']['KM'] = $1
          hash['Kinetic parameters']['Vmax'] = $2
        end
        if data =~ /Kinetic parameters: KM=.+; Vmax=.+; Note=(.+?);/
          hash['Kinetic parameters']['Note'] = $1
        end
        if data =~ /pH dependence: (.+?);/
          hash['pH dependence'] = $1
        end
        if data =~ /Redox potential: (.+?);/
          hash['Redox potential'] = $1
        end
        if data =~ /Temperature dependence: (.+?);/
          hash['Temperature dependence'] = $1
        end
        hash
      end
      private :cc_biophysiochemical_properties
    
    
      def cc_caution(data)
        data.join('')
      end
      private :cc_caution
    
    
      # returns conteins in a line of the CC INTERACTION section.
      #
      #   CC       P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
      def cc_interaction(data)
        str = data.join('')
        it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
        it.map {|ent|
          ent.map! {|x| x.strip }
          if ent[0] =~ /^(.+):(.+)/
            spac = $1
            spid = $2.split(' ')[0]
            optid = nil
          elsif ent[0] =~ /Self/
            spac = self.entry_id
            spid = self.entry_id
            optid = nil
          end
          if ent[0] =~ /^.+:.+ (.+)/
            optid = $1
          end
    
          {'SP_Ac' => spac,
           'identifier' => spid,
           'NbExp' => ent[1],
           'IntAct' => ent[2].split(', '),
           'optional_identifier' => optid}
        }
      end
      private :cc_interaction
    
    
      def cc_mass_spectrometry(data)
        # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
        return data unless data
    
        data.map { |m|
          mass = {'MW' => nil, 'MW_ERR' => nil, 'METHOD' => nil, 'RANGE' => nil,
                  'NOTE' => nil}
          m.sub(/.$/,'').split(/;/).each do |line|
            case line
            when /MW=(.+)/
              mass['MW'] = $1
            when /MW_ERR=(.+)/
              mass['MW_ERR'] = $1
            when /METHOD=(.+)/
              mass['METHOD'] = $1
            when /RANGE=(\d+-\d+)/ 
              mass['RANGE'] = $1          # RANGE class ? 
            when /NOTE=(.+)/
              mass['NOTE'] = $1
            end 
          end
          mass
        }
      end
      private :cc_mass_spectrometry
    
    
      def cc_pathway(data)
        data.map {|x| x.sub(/\.$/, '') }.map {|x|
          x.split(/; | and |: /)
        }[0]
      end
      private :cc_pathway
    
    
      def cc_rna_editing(data)
        data = data.join('')
        entry = {'Modified_positions' => [], 'Note' => ""}
        if data =~ /Modified_positions=(.+?)(\.|;)/
          entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ')
        else
          raise ArgumentError, "Invarid CC RNA Editing lines (#{self.entry_id}):#{$!}\n#{get('CC')}"
        end
        if data =~ /Note=(.+)/
          entry['Note'] = $1
        end
        entry
      end
      private :cc_rna_editing
    
    
      def cc_subcellular_location(data)
        data.map {|x| 
          x.split('. ').map {|y| 
            y.split('; ').map {|z| 
              z.sub(/\.$/, '') 
            } 
          } 
        }[0]
      end
      private :cc_subcellular_location
    
    
      #--
      # Since UniProtKB release 12.2 of 11-Sep-2007:
      # CC   -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress].  # Old format:
      # CC   -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
      #++
    
      def cc_web_resource(data)
        data.map {|x|
          entry = {'Name' => nil, 'Note' => nil, 'URL' => nil}
          x.split(';').each do |y|
            case y
            when /(Name|Note)\=(.+)/
              key = $1
              val = $2.strip
              entry[key] = val
            when /(NAME|NOTE)\=(.+)/
              key = $1.downcase.capitalize
              val = $2.strip
              entry[key] = val
            when /URL\=\"(.+)\"/
              entry['URL'] = $1.strip
            end
          end
          entry
        }
      end
      private :cc_web_resource
    
      # returns databases cross-references in the DR lines.
      # * Bio::SPTR#dr  -> Hash w/in Array
      #
      # === DR Line; defabases cross-reference (>=0)
      #    DR  database_identifier; primary_identifier; secondary_identifier.
      #  a cross_ref pre one line
      @@dr_database_identifier = ['EMBL','CARBBANK','DICTYDB','ECO2DBASE',
        'ECOGENE',
        'FLYBASE','GCRDB','HIV','HSC-2DPAGE','HSSP','INTERPRO','MAIZEDB',
        'MAIZE-2DPAGE','MENDEL','MGD''MIM','PDB','PFAM','PIR','PRINTS',
        'PROSITE','REBASE','AARHUS/GHENT-2DPAGE','SGD','STYGENE','SUBTILIST',
        'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN']
    
      # Backup Bio::EMBLDB#dr as embl_dr
      alias :embl_dr :dr 
    
      # Bio::SPTR#dr
      def dr(key = nil)
        unless key
          embl_dr
        else
          (embl_dr[key] or []).map {|x|
            {'Accession' => x[0],
             'Version' => x[1],
             ' ' => x[2],
             'Molecular Type' => x[3]}
          }
        end
      end
    
    
      # Bio::EMBLDB::Common#kw - Array
      #                    #keywords  -> Array
      #
      # KW Line; keyword (>=1)
      # KW   [Keyword;]+
    
    
      # returns contents in the feature table.
      #
      # == Examples
      #
      #  sp = Bio::SPTR.new(entry)
      #  ft = sp.ft
      #  ft.class #=> Hash
      #  ft.keys.each do |feature_key|
      #    ft[feature_key].each do |feature|
      #      feature['From'] #=> '1'
      #      feature['To']   #=> '21'
      #      feature['Description'] #=> ''
      #      feature['FTId'] #=> ''
      #      feature['diff'] #=> []
      #      feature['original'] #=> [feature_key, '1', '21', '', '']
      #    end
      #  end
      #
      # * Bio::SPTR#ft -> Hash
      #    {FEATURE_KEY => [{'From' => int, 'To' => int, 
      #                      'Description' => aStr, 'FTId' => aStr,
      #                      'diff' => [original_residues, changed_residues],
      #                      'original' => aAry }],...}
      #
      # returns an Array of the information about the feature_name in the feature table.
      # * Bio::SPTR#ft(feature_name) -> Array of Hash
      #    [{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
      #
      # == FT Line; feature table data (>=0, optional)
      #
      #   Col     Data item
      #   -----   -----------------
      #    1- 2   FT
      #    6-13   Feature name 
      #   15-20   `FROM' endpoint
      #   22-27   `TO' endpoint
      #   35-75   Description (>=0 per key)
      #   -----   -----------------
      #
      # Note: 'FROM' and 'TO' endopoints are allowed to use non-numerial charactors 
      # including '<', '>' or '?'. (c.f. '<1', '?42')
      #
      # See also http://www.expasy.org/sprot/userman.html#FT_line
      #
      def ft(feature_key = nil)
        return ft[feature_key] if feature_key
        return @data['FT'] if @data['FT']
    
        table = []
        begin
          get('FT').split("\n").each do |line|
            if line =~ /^FT   \w/
              feature = line.chomp.ljust(74)
              table << [feature[ 5..12].strip,   # Feature Name
                        feature[14..19].strip,   # From
                        feature[21..26].strip,   # To
                        feature[34..74].strip ]  # Description
            else
              table.last << line.chomp.sub!(/^FT +/, '')
            end
          end
    
          # Joining Description lines
          table = table.map { |feature| 
            ftid = feature.pop if feature.last =~ /FTId=/
            if feature.size > 4
              feature = [feature[0], 
                         feature[1], 
                         feature[2], 
                         feature[3, feature.size - 3].join(" ")]
            end
            feature << if ftid then ftid else '' end
          }
    
          hash = {}
          table.each do |feature|
            hash[feature[0]] = [] unless hash[feature[0]]
            hash[feature[0]] << {
              # Removing '<', '>' or '?' in FROM/TO endopoint.
              'From' => feature[1].sub(/\D/, '').to_i,  
              'To'   => feature[2].sub(/\D/, '').to_i, 
              'Description' => feature[3], 
              'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
              'diff' => [],
              'original' => feature
            }
    
            case feature[0]
            when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
              case hash[feature[0]].last['Description']
              when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
                original_res = $1
                changed_res = $2
                original_res = original_res.gsub(/ /,'').strip
                chenged_res = changed_res.gsub(/ /,'').strip
              when /Missing/i
                original_res = seq.subseq(hash[feature[0]].last['From'],
                                          hash[feature[0]].last['To'])
                changed_res = ''
              end
              hash[feature[0]].last['diff'] = [original_res, chenged_res]
            end
          end
        rescue
          raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
        end
    
        @data['FT'] = hash
      end
    
    
    
      # returns a Hash of conteins in the SQ lines.
      # * Bio::SPTRL#sq  -> hsh
      #
      # returns a value of a key given in the SQ lines.
      # * Bio::SPTRL#sq(key)  -> int or str
      # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 
      #          'CRC64']
      #
      # === SQ Line; sequence header (1/entry)
      #    SQ   SEQUENCE   233 AA;  25630 MW;  146A1B48A1475C86 CRC64;
      #    SQ   SEQUENCE  \d+ AA; \d+ MW;  [0-9A-Z]+ CRC64;
      #
      # MW, Dalton unit.
      # CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
      def sq(key = nil)
        unless @data['SQ']
          if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/
            @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 }
          else
            raise "Invalid SQ Line: \n'#{fetch('SQ')}'"
          end
        end
    
        if key
          case key
          when /mw/, /molecular/, /weight/
            @data['SQ']['MW']
          when /len/, /length/, /AA/
            @data['SQ']['aalen']
          else
            @data['SQ'][key]
          end
        else 
          @data['SQ']
        end
      end
    
    
      # returns a Bio::Sequence::AA of the amino acid sequence.
      # * Bio::SPTR#seq -> Bio::Sequence::AA
      #
      # blank Line; sequence data (>=1)
      def seq
        unless @data['']
          @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') )
        end
        return @data['']
      end
      alias aaseq seq
    
    end # class SPTR
    
    end # module Bio
    
    
    
    =begin
    
    = Bio::SPTR < Bio::DB
    
    Class for a entry in the SWISS-PROT/TrEMBL database.
    
      * (())
      * (())
      * (())
      
    
    --- Bio::SPTR.new(a_sp_entry)
    
    === ID line (Identification)
    
    --- Bio::SPTR#id_line -> {'ENTRY_NAME' => str, 'DATA_CLASS' => str,
                              'MOLECULE_TYPE' => str, 'SEQUENCE_LENGTH' => int }  
    --- Bio::SPTR#id_line(key) -> str
    
           key = (ENTRY_NAME|MOLECULE_TYPE|DATA_CLASS|SEQUENCE_LENGTH)
    
    --- Bio::SPTR#entry_id -> str
    --- Bio::SPTR#molecule -> str
    --- Bio::SPTR#sequence_length -> int
        
    
    === AC lines (Accession number)
    
    --- Bio::SPTR#ac -> ary
    --- Bio::SPTR#accessions -> ary
    --- Bio::SPTR#accession -> accessions.first
    
     
    === GN line (Gene name(s))
    
    --- Bio::SPTR#gn -> [ary, ...] or [{:name => str, :synonyms => [], :loci => [], :orfs => []}]
    --- Bio::SPTR#gene_name -> str
    --- Bio::SPTR#gene_names -> [str] or [str]
    
    
    === DT lines (Date) 
    
    --- Bio::SPTR#dt -> {'created' => str, 'sequence' => str, 'annotation' => str}
    --- Bio::SPTR#dt(key) -> str
    
          key := (created|annotation|sequence)
    
    
    === DE lines (Description)
    
    --- Bio::SPTR#de -> str
                 #definition -> str
    
    --- Bio::SPTR#protein_name
    
          Returns the proposed official name of the protein
    
    
    --- Bio::SPTR#synonyms
    
          Returns an array of synonyms (unofficial names)
    
    === KW lines (Keyword)
    
    --- Bio::SPTR#kw -> ary
    
    === OS lines (Organism species)
    
    --- Bio::SPTR#os -> [{'name' => str, 'os' => str}, ...]
    
    === OC lines (organism classification)
    
    --- Bio::SPTR#oc -> ary
    
    === OG line (Organella)
    
    --- Bio::SPTR#og -> ary
    
    === OX line (Organism taxonomy cross-reference)
    
    --- Bio::SPTR#ox -> {'NCBI_TaxID' => [], ...}
    
    === RN RC RP RX RA RT RL RG lines (Reference)  
    
    --- Bio::SPTR#ref -> [{'RN' => int, 'RP' => str, 'RC' => str, 'RX' => str, ''RT' => str, 'RL' => str, 'RA' => str, 'RC' => str, 'RG' => str},...]
    
    === DR lines (Database cross-reference)
    
    --- Bio::SPTR#dr -> {'EMBL' => ary, ...}
    
    === FT lines (Feature table data)
    
    --- Bio::SPTR#ft -> hsh
    
    === SQ lines (Sequence header and data)
    
    --- Bio::SPTR#sq -> {'CRC64' => str, 'MW' => int, 'aalen' => int}
    --- Bio::SPTR#sq(key) -> int or str
    
              key := (aalen|MW|CRC64)
    
    --- Bio::EMBL#seq -> Bio::Sequece::AA
                 #aaseq -> Bio::Sequece::AA
    
    =end
    
      #      Content                      Occurrence in an entry
      # ---- ---------------------------  --------------------------------
      # ID - identification               (begins each entry; 1 per entry)
      # AC - accession number(s)          (>=1 per entry)
      # DT - date                         (3 per entry)
      # DE - description                  (>=1 per entry)
      # GN - gene name(s)                 (>=0 per entry; optional)
      # OS - organism species             (>=1 per entry)
      # OG - organelle                    (0 or 1 per entry; optional)
      # OC - organism classification      (>=1 per entry)
      # OX - organism taxonomy x-ref      (>=1 per entry)
      # OH - Organism Host
      # RN - reference number             (>=1 per entry)
      # RP - reference positions          (>=1 per entry)
      # RC - reference comment(s)         (>=0 per entry; optional)
      # RX - reference cross-reference(s) (>=0 per entry; optional)
      # RA - reference author(s)          (>=1 per entry)
      # RT - reference title              (>=0 per entry; optional)
      # RL - reference location           (>=1 per entry)
      # RG - reference group(s)
      # CC - comments or notes            (>=0 per entry; optional)
      # DR - database cross-references    (>=0 per entry; optional)
      # KW - keywords                     (>=1 per entry)
      # FT - feature table data           (>=0 per entry; optional)
      # SQ - sequence header              (1 per entry)
      #    - (blanks) The sequence data   (>=1 per entry)
      # // - termination line             (ends each entry; 1 per entry)
      # ---- ---------------------------  --------------------------------
    
    
    bio-1.4.3.0001/lib/bio/db/embl/format_embl.rb0000644000004100000410000001325012200110570020323 0ustar  www-datawww-data#
    # = bio/db/embl/format_embl.rb - EMBL format generater
    #
    # Copyright::  Copyright (C) 2008
    #              Jan Aerts ,
    #              Naohisa Goto 
    # License::    The Ruby License
    #
    
    module Bio::Sequence::Format::NucFormatter
    
      # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
      # Embl format output class for Bio::Sequence.
      class Embl < Bio::Sequence::Format::FormatterBase
    
        # helper methods
        include Bio::Sequence::Format::INSDFeatureHelper
        
        private
    
        # wrapping with EMBL style
        def embl_wrap(prefix, str)
          wrap(str.to_s, 80, prefix)
        end
    
        # Given words (an Array of String) are wrapping with EMBL style.
        # Each word is never splitted inside the word.
        def embl_wrap_words(prefix, array)
          width = 80
          result = []
          str = nil
          array.each do |x|
            if str then
              if str.length + 1 + x.length > width then
                str = nil
              else
                str.concat ' '
                str.concat x
              end
            end
            unless str then
              str = prefix + x
              result.push str
            end
          end
          result.join("\n")
        end
    
        # format reference
        # ref:: Bio::Reference object
        # hash:: (optional) a hash for RN (reference number) administration
        def reference_format_embl(ref, hash = nil)
          lines = Array.new
          if ref.embl_gb_record_number or hash then
            refno = ref.embl_gb_record_number.to_i
            hash ||= {}
            if refno <= 0 or hash[refno] then
              refno = hash.keys.sort[-1].to_i + 1
              hash[refno] = true
            end
            lines << embl_wrap("RN   ", "[#{refno}]")
          end
          if ref.comments then
            ref.comments.each do |cmnt|
              lines << embl_wrap("RC   ", cmnt)
            end
          end
          unless ref.sequence_position.to_s.empty? then
            lines << embl_wrap("RP   ",   "#{ref.sequence_position}")
          end
          unless ref.doi.to_s.empty? then
            lines << embl_wrap("RX   ",   "DOI; #{ref.doi}.")
          end
          unless ref.pubmed.to_s.empty? then
            lines << embl_wrap("RX   ",   "PUBMED; #{ref.pubmed}.")
          end
          unless ref.authors.empty? then
            auth = ref.authors.collect do |x|
              y = x.to_s.strip.split(/\, *([^\,]+)\z/)
              y[1].gsub!(/\. +/, '.') if y[1]
              y.join(' ')
            end
            lastauth = auth.pop
            auth.each { |x| x.concat ',' }
            auth.push(lastauth.to_s + ';')
            lines << embl_wrap_words('RA   ', auth)
          end
          lines << embl_wrap('RT   ',
                             (ref.title.to_s.empty? ? '' :
                              "\"#{ref.title}\"") + ';')
          unless ref.journal.to_s.empty? then
            volissue = "#{ref.volume.to_s}"
            volissue = "#{volissue}(#{ref.issue})" unless ref.issue.to_s.empty? 
            rl = "#{ref.journal}"
            rl += " #{volissue}" unless volissue.empty? 
            rl += ":#{ref.pages}" unless ref.pages.to_s.empty?
            rl += "(#{ref.year})" unless ref.year.to_s.empty?
            rl += '.'
            lines << embl_wrap('RL   ', rl)
          end
          lines << "XX"
          return lines.join("\n")
        end
    
        def seq_format_embl(seq)
          counter = 0
          result = seq.gsub(/.{1,60}/) do |x|
            counter += x.length
            x = x.gsub(/.{10}/, '\0 ')
            sprintf("     %-66s%9d\n", x, counter)
          end
          result.chomp!
          result
        end
    
        def seq_composition(seq)
          { :a => seq.count('aA'),
            :c => seq.count('cC'),
            :g => seq.count('gG'),
            :t => seq.count('tTuU'),
            :other => seq.count('^aAcCgGtTuU')
          }
        end
    
        # moleculue type
        def mol_type_embl
          if mt = molecule_type then
            mt
          elsif f = (features or []).find { |f| f.feature == 'source' } and
              q = f.qualifiers.find { |q| q.qualifier == 'mol_type' } then
            q.value
          else
            'NA'
          end
        end
    
        # CC line. Comments.
        def comments_format_embl(cmnts)
          return '' if !cmnts or cmnts.empty?
          cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
          a = []
          cmnts.each do |str|
            a.push embl_wrap('CC   ', str)
          end
          unless a.empty? then
            a.push "XX   "
            a.push '' # dummy to put "\n" at the end of the string
          end
          a.join("\n")
        end
    
    
        # Erb template of EMBL format for Bio::Sequence
        erb_template <<'__END_OF_TEMPLATE__'
    ID   <%= primary_accession || entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= mol_type_embl %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
    XX   
    <%= embl_wrap('AC   ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
    XX   
    DT   <%= format_date(date_created || null_date) %> (Rel. <%= release_created || 0 %>, Created)
    DT   <%= format_date(date_modified || null_date) %> (Rel. <%= release_modified || 0 %>, Last updated, Version <%= entry_version || 0 %>)
    XX   
    <%= embl_wrap('DE   ', definition) %>
    XX   
    <%= embl_wrap('KW   ', (keywords || []).join('; ') + '.') %>
    XX   
    OS   <%= species %>
    <%= embl_wrap('OC   ', (classification || []).join('; ') + '.') %>
    XX   
    <% hash = {}; (references || []).each do |ref| %><%= reference_format_embl(ref, hash) %>
    <% end %><% (dblinks || []).each do |r|
    %>DR   <%= r.database %>; <%= r.id %><% unless r.secondary_ids.empty? %>; <%= r.secondary_ids[0] %><% end %>.
    <% end %><% if dblinks and !dblinks.empty? then
     %>XX   
    <% end %><%= comments_format_embl(comments)
    %>FH   Key             Location/Qualifiers
    FH   
    <%= format_features_embl(features || []) %>XX   
    SQ   Sequence <%= seq.length %> BP; <% c = seq_composition(seq) %><%= c[:a] %> A; <%= c[:c] %> C; <%= c[:g] %> G; <%= c[:t] %> T; <%= c[:other] %> other;
    <%= seq_format_embl(seq) %>
    //
    __END_OF_TEMPLATE__
    
      end #class Embl
    
    end #module Bio::Sequence::Format::NucFormatter
    
    bio-1.4.3.0001/lib/bio/db/embl/embl.rb0000644000004100000410000003215712200110570016762 0ustar  www-datawww-data#
    # = bio/db/embl/embl.rb - EMBL database class
    #
    # 
    # Copyright::   Copyright (C) 2001-2007
    #               Mitsuteru C. Nakao 
    #               Jan Aerts 
    # License::     The Ruby License
    #
    # $Id: embl.rb,v 1.29.2.7 2008/06/17 16:04:36 ngoto Exp $
    #
    # == Description
    #
    # Parser class for EMBL database entry.
    #
    # == Examples
    # 
    #   emb = Bio::EMBL.new($<.read)
    #   emb.entry_id
    #   emb.each_cds do |cds|
    #     cds # A CDS in feature table.
    #   end
    #   emb.seq #=> "ACGT..."
    #
    # == References
    #
    # * The EMBL Nucleotide Sequence Database
    #   http://www.ebi.ac.uk/embl/
    #
    # * The EMBL Nucleotide Sequence Database: Users Manual
    #   http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
    #
    
    require 'date'
    require 'bio/db'
    require 'bio/db/embl/common'
    require 'bio/compat/features'
    require 'bio/compat/references'
    require 'bio/sequence'
    require 'bio/sequence/dblink'
    
    module Bio
    class EMBL < EMBLDB
      include Bio::EMBLDB::Common
    
      # returns contents in the ID line.
      # * Bio::EMBL#id_line -> 
      # where  is:
      #  {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
      #   'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
      #
      # ID Line
      #  "ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
      #
      # DATA_CLASS = ['standard']
      #
      # MOLECULE_TYPE: DNA RNA XXX
      #
      # Code ( DIVISION )
      #  EST (ESTs)
      #  PHG (Bacteriophage)
      #  FUN (Fungi)
      #  GSS (Genome survey)
      #  HTC (High Throughput cDNAs) 
      #  HTG (HTGs)
      #  HUM (Human)
      #  INV (Invertebrates)
      #  ORG (Organelles)
      #  MAM (Other Mammals)
      #  VRT (Other Vertebrates)
      #  PLN (Plants)
      #  PRO (Prokaryotes)
      #  ROD (Rodents)
      #  SYN (Synthetic)
      #  STS (STSs)
      #  UNC (Unclassified)
      #  VRL (Viruses)
      #
      # Rel 89-
      # ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.
      # ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
      # 1. Primary accession number
      # 2. Sequence version number
      # 3. Topology: 'circular' or 'linear'
      # 4. Molecule type (see note 1 below)
      # 5. Data class (see section 3.1)
      # 6. Taxonomic division (see section 3.2)
      # 7. Sequence length (see note 2 below)
      def id_line(key=nil)
        unless @data['ID']
          tmp = Hash.new
          idline = fetch('ID').split(/; +/)         
          tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
          if idline.first =~ /^SV/
            tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
            tmp['TOPOLOGY'] = idline.shift
            tmp['MOLECULE_TYPE'] = idline.shift
            tmp['DATA_CLASS'] = idline.shift
          else
            tmp['MOLECULE_TYPE'] = idline.shift
          end
          tmp['DIVISION'] = idline.shift
          tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
    
          @data['ID'] = tmp
        end
        
        if key
          @data['ID'][key]
        else
          @data['ID']
        end
      end
    
      # returns ENTRY_NAME in the ID line.
      # * Bio::EMBL#entry -> String
      def entry
        id_line('ENTRY_NAME')
      end
      alias entry_name entry
      alias entry_id entry
    
      # returns MOLECULE_TYPE in the ID line.
      # * Bio::EMBL#molecule -> String
      def molecule
        id_line('MOLECULE_TYPE')
      end
      alias molecule_type molecule
    
      def data_class
        id_line('DATA_CLASS')
      end
      
      def topology
        id_line('TOPOLOGY')
      end
      
      # returns DIVISION in the ID line.
      # * Bio::EMBL#division -> String
      def division
        id_line('DIVISION')
      end
    
      # returns SEQUENCE_LENGTH in the ID line.
      # * Bio::EMBL#sequencelength -> String
      def sequence_length
        id_line('SEQUENCE_LENGTH')
      end
      alias seqlen sequence_length
      
    
      # AC Line
      # "AC   A12345; B23456;"
    
    
      # returns the version information in the sequence version (SV) line.
      # * Bio::EMBL#sv -> Accession.Version in String
      # * Bio::EMBL#version -> accession in Int
      #
      # SV Line; sequence version (1/entry)
      #  SV    Accession.Version
      def sv
        if (v = field_fetch('SV').sub(/;/,'')) == ""
          [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
        else
          v
        end  
      end
      def version
        (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
      end
    
      
      # returns contents in the date (DT) line.
      # * Bio::EMBL#dt  -> 
    # where
    is: # {} # * Bio::EMBL#dt(key) -> String # keys: 'created' and 'updated' # # DT Line; date (2/entry) def dt(key=nil) unless @data['DT'] tmp = Hash.new dt_line = self.get('DT').split(/\n/) tmp['created'] = dt_line[0].sub(/\w{2} /,'').strip tmp['updated'] = dt_line[1].sub(/\w{2} /,'').strip @data['DT'] = tmp end if key @data['DT'][key] else @data['DT'] end end #-- ## # DE Line; description (>=1) # #++ #-- ## # KW Line; keyword (>=1) # KW [Keyword;]+ # # Bio::EMBLDB#kw -> Array # #keywords -> Array #++ #-- ## # OS Line; organism species (>=1) # OS Genus species (name) # "OS Trifolium repens (white clover)" # # Bio::EMBLDB#os -> Array #++ # returns contents in the OS line. # * Bio::EMBL#os -> Array of # where is: # [{'name'=>'Human', 'os'=>'Homo sapiens'}, # {'name'=>'Rat', 'os'=>'Rattus norveticus'}] # * Bio::EMBL#os[0]['name'] => "Human" # * Bio::EMBL#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'} #-- # * Bio::EMBL#os(0) => "Homo sapiens (Human)" #++ # # OS Line; organism species (>=1) # OS Trifolium repens (white clover) # # Typically, OS line shows "Genus species (name)" style: # OS Genus species (name) # # Other examples: # OS uncultured bacterium # OS xxxxxx metagenome # OS Cloning vector xxxxxxxx # Complicated examples: # OS Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848)) # OS Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark) # OS Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias) # OS Sicydiinae sp. 'Keith et al., 2010' # OS Acanthopagrus sp. 'Jean & Lee, 2008' # OS Gaussia princeps (T. Scott, 1894) # OS Rana sp. 8 Hillis & Wilcox, 2005 # OS Contracaecum rudolphii C D'Amelio et al., 2007 # OS Partula sp. 'Mt. Marau, Tahiti' # OS Leptocephalus sp. 'type II larva' (Smith, 1989) # OS Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002 # OS Non-A, non-B hepatitis virus # OS Canidae (dog, coyote, wolf, fox) # OS Salmonella enterica subsp. enterica serovar 4,[5],12:i:- # OS Yersinia enterocolitica (type O:5,27) # OS Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4)) # OS Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2)) # OS Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L # def os(num = nil) unless @data['OS'] os = Array.new tmp = fetch('OS') if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp org = $1 os.push({'name' => $2, 'os' => $1}) else os.push({'name' => nil, 'os' => tmp}) end @data['OS'] = os end if num # EX. "Trifolium repens (white clover)" "#{@data['OS'][num]['os']} {#data['OS'][num]['name']" end @data['OS'] end #-- ## # OC Line; organism classification (>=1) # # Bio::EMBLDB#oc -> Array #++ #-- ## # OG Line; organella (0 or 1/entry) # ["Mitochondrion", "Chloroplast","Kinetoplast", "Cyanelle", "Plastid"] # or a plasmid name (e.g. "Plasmid pBR322"). # # Bio::EMBLDB#og -> String #++ #-- ## # R Lines # RN RC RP RX RA RT RL # # Bio::EMBLDB#ref #++ #-- ## # DR Line; defabases cross-regerence (>=0) # "DR database_identifier; primary_identifier; secondary_identifier." # # Bio::EMBLDB#dr #++ # returns feature table header (String) in the feature header (FH) line. # # FH Line; feature table header (0 or 2) def fh fetch('FH') end # returns contents in the feature table (FT) lines. # * Bio::EMBL#ft -> Bio::Features # * Bio::EMBL#ft {} -> {|Bio::Feature| } # # same as features method in bio/db/genbank.rb # # FT Line; feature table data (>=0) def ft unless @data['FT'] ary = Array.new in_quote = false @orig['FT'].each_line do |line| next if line =~ /^FEATURES/ head = line[0,20].strip # feature key (source, CDS, ...) body = line[20,60].chomp # feature value (position, /qualifier=) if line =~ /^FT {3}(\S+)/ ary.push([ $1, body ]) # [ feature, position, /q="data", ... ] elsif body =~ /^ \// and not in_quote ary.last.push(body) # /q="data..., /q=data, /q if body =~ /=" / and body !~ /"$/ in_quote = true end else ary.last.last << body # ...data..., ...data..." if body =~ /"$/ in_quote = false end end end ary.map! do |subary| parse_qualifiers(subary) end @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility) end if block_given? @data['FT'].each do |feature| yield feature end else @data['FT'] end end alias features ft # iterates on CDS features in the FT lines. def each_cds ft.each do |cds_feature| if cds_feature.feature == 'CDS' yield cds_feature end end end # iterates on gene features in the FT lines. def each_gene ft.each do |gene_feature| if gene_feature.feature == 'gene' yield gene_feature end end end # returns comment text in the comments (CC) line. # # CC Line; comments of notes (>=0) def cc get('CC').to_s.gsub(/^CC /, '') end alias comment cc ## # XX Line; spacer line (many) # def nxx # end # returns sequence header information in the sequence header (SQ) line. # * Bio::EMBL#sq -> # where is: # {'ntlen' => Int, 'other' => Int, # 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int} # * Bio::EMBL#sq(base) -> # * Bio::EMBL#sq[base] -> # # SQ Line; sequence header (1/entry) # SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other; def sq(base = nil) unless @data['SQ'] fetch('SQ') =~ \ /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/ @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i, 'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i} else @data['SQ'] end if base @data['SQ'][base.downcase] else @data['SQ'] end end # returns the nucleotie sequence in this entry. # * Bio::EMBL#seq -> Bio::Sequence::NA # # @orig[''] as sequence # bb Line; (blanks) sequence data (>=1) def seq Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') ) end alias naseq seq alias ntseq seq #-- # // Line; termination line (end; 1/entry) #++ # modified date. Returns Date object, String or nil. def date_modified parse_date(self.dt['updated']) end # created date. Returns Date object, String or nil. def date_created parse_date(self.dt['created']) end # release number when last updated def release_modified parse_release_version(self.dt['updated'])[0] end # release number when created def release_created parse_release_version(self.dt['created'])[0] end # entry version number numbered by EMBL def entry_version parse_release_version(self.dt['updated'])[1] end # parse date string. Returns Date object. def parse_date(str) begin Date.parse(str) rescue ArgumentError, TypeError, NoMethodError, NameError str end end private :parse_date # extracts release and version numbers from DT line def parse_release_version(str) return [ nil, nil ] unless str a = str.split(/[\(\,\)]/) dstr = a.shift rel = nil ver = nil a.each do |x| case x when /Rel\.\s*(.+)/ rel = $1.strip when /Version\s*(.+)/ ver = $1.strip end end [ rel, ver ] end private :parse_release_version # database references (DR). # Returns an array of Bio::Sequence::DBLink objects. def dblinks get('DR').split(/\n/).collect { |x| Bio::Sequence::DBLink.parse_embl_DR_line(x) } end # species def species self.fetch('OS') end # taxonomy classfication alias classification oc # converts the entry to Bio::Sequence object # --- # *Arguments*:: # *Returns*:: Bio::Sequence object def to_biosequence Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL) end ### private methods private ## # same as Bio::GenBank#parse_qualifiers(feature) def parse_qualifiers(ary) feature = Feature.new feature.feature = ary.shift feature.position = ary.shift.gsub(/\s/, '') ary.each do |f| if f =~ %r{/([^=]+)=?"?([^"]*)"?} qualifier, value = $1, $2 if value.empty? value = true end case qualifier when 'translation' value = Sequence::AA.new(value.gsub(/\s/, '')) when 'codon_start' value = value.to_i end feature.append(Feature::Qualifier.new(qualifier, value)) end end return feature end end # class EMBL end # module Bio bio-1.4.3.0001/lib/bio/db/embl/common.rb0000644000004100000410000002306212200110570017326 0ustar www-datawww-data# # = bio/db/embl.rb - Common methods for EMBL style database classes # # Copyright:: Copyright (C) 2001-2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id: common.rb,v 1.12.2.5 2008/05/07 12:22:10 ngoto Exp $ # # == Description # # EMBL style databases class # # This module defines a common framework among EMBL, UniProtKB, SWISS-PROT, # TrEMBL. For more details, see the documentations in each embl/*.rb # libraries. # # EMBL style format: # ID - identification (begins each entry; 1 per entry) # AC - accession number (>=1 per entry) # SV - sequence version (1 per entry) # DT - date (2 per entry) # DE - description (>=1 per entry) # KW - keyword (>=1 per entry) # OS - organism species (>=1 per entry) # OC - organism classification (>=1 per entry) # OG - organelle (0 or 1 per entry) # RN - reference number (>=1 per entry) # RC - reference comment (>=0 per entry) # RP - reference positions (>=1 per entry) # RX - reference cross-reference (>=0 per entry) # RA - reference author(s) (>=1 per entry) # RG - reference group (>=0 per entry) # RT - reference title (>=1 per entry) # RL - reference location (>=1 per entry) # DR - database cross-reference (>=0 per entry) # FH - feature table header (0 or 2 per entry) # FT - feature table data (>=0 per entry) # CC - comments or notes (>=0 per entry) # XX - spacer line (many per entry) # SQ - sequence header (1 per entry) # bb - (blanks) sequence data (>=1 per entry) # // - termination line (ends each entry; 1 per entry) # # == Examples # # # Make a new parser class for EMBL style database entry. # require 'bio/db/embl/common' # module Bio # class NEWDB < EMBLDB # include Bio::EMBLDB::Common # end # end # # == References # # * The EMBL Nucleotide Sequence Database # http://www.ebi.ac.uk/embl/ # # * The EMBL Nucleotide Sequence Database: Users Manual # http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html # # * Swiss-Prot Protein knowledgebase. TrEMBL Computer-annotated supplement # to Swiss-Prot # http://au.expasy.org/sprot/ # # * UniProt # http://uniprot.org/ # # * The UniProtKB/SwissProt/TrEMBL User Manual # http://www.expasy.org/sprot/userman.html # require 'bio/db' require 'bio/reference' require 'bio/compat/references' module Bio class EMBLDB module Common DELIMITER = "\n//\n" RS = DELIMITER TAGSIZE = 5 def initialize(entry) super(entry, TAGSIZE) end # returns a Array of accession numbers in the AC lines. # # AC Line # "AC A12345; B23456;" # AC [AC1;]+ # # Accession numbers format: # 1 2 3 4 5 6 # [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9] def ac unless @data['AC'] tmp = Array.new field_fetch('AC').split(/ /).each do |e| tmp.push(e.sub(/;/,'')) end @data['AC'] = tmp end @data['AC'] end alias accessions ac # returns the first accession number in the AC lines def accession ac[0] end # returns a String int the DE line. # # DE Line def de unless @data['DE'] @data['DE'] = fetch('DE') end @data['DE'] end alias description de alias definition de # API # returns contents in the OS line. # * Bio::EMBLDB#os -> Array of # where is: # [{'name'=>'Human', 'os'=>'Homo sapiens'}, # {'name'=>'Rat', 'os'=>'Rattus norveticus'}] # * Bio::SPTR#os[0]['name'] => "Human" # * Bio::SPTR#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'} # * Bio::STPR#os(0) => "Homo sapiens (Human)" # # OS Line; organism species (>=1) # "OS Trifolium repens (white clover)" # # OS Genus species (name). # OS Genus species (name0) (name1). # OS Genus species (name0) (name1). # OS Genus species (name0), G s0 (name0), and G s (name1). def os(num = nil) unless @data['OS'] os = Array.new fetch('OS').split(/, and|, /).each do |tmp| if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/ org = $1 tmp =~ /(\(.+\))/ os.push({'name' => $1, 'os' => org}) else raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n" end end @data['OS'] = os end if num # EX. "Trifolium repens (white clover)" "#{@data['OS'][num]['os']} {#data['OS'][num]['name']" end @data['OS'] end # returns contents in the OG line. # * Bio::EMBLDB::Common#og -> [ * ] # # OG Line; organella (0 or 1/entry) # OG Plastid; Chloroplast. # OG Mitochondrion. # OG Plasmid sym pNGR234a. # OG Plastid; Cyanelle. # OG Plasmid pSymA (megaplasmid 1). # OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1. def og unless @data['OG'] og = Array.new if get('OG').size > 0 ogstr = fetch('OG') ogstr.sub!(/\.$/,'') ogstr.sub!(/ and/,'') ogstr.sub!(/;/, ',') ogstr.split(',').each do |tmp| og.push(tmp.strip) end end @data['OG'] = og end @data['OG'] end # returns contents in the OC line. # * Bio::EMBLDB::Common#oc -> [ * ] # OC Line; organism classification (>=1) # OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; # OC Theileria. def oc unless @data['OC'] begin @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e| e.strip } rescue NameError nil end end @data['OC'] end # returns keywords in the KW line. # * Bio::EMBLDB::Common#kw -> [ * ] # KW Line; keyword (>=1) # KW [Keyword;]+ def kw unless @data['KW'] if get('KW').size > 0 tmp = fetch('KW').sub(/.$/,'') @data['KW'] = tmp.split(/;/).map {|e| e.strip } else @data['KW'] = [] end end @data['KW'] end alias keywords kw # returns contents in the R lines. # * Bio::EMBLDB::Common#ref -> [ * ] # where is: # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} # # R Lines # * RN RC RP RX RA RT RL RG def ref unless @data['R'] ary = Array.new get('R').split(/\nRN /).each do |str| raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} str = 'RN ' + str unless /^RN / =~ str str.split("\n").each do |line| if /^(R[NPXARLCTG]) (.+)/ =~ line raw[$1] += $2 + ' ' else raise "Invalid format in R lines, \n[#{line}]\n" end end raw.each_value {|v| v.strip! v.sub!(/^"/,'') v.sub!(/;$/,'') v.sub!(/"$/,'') } ary.push(raw) end @data['R'] = ary end @data['R'] end # returns Bio::Reference object from Bio::EMBLDB::Common#ref. # * Bio::EMBLDB::Common#ref -> Bio::References def references unless @data['references'] ary = self.ref.map {|ent| hash = Hash.new ent.each {|key, value| case key when 'RN' if /\[(\d+)\]/ =~ value.to_s hash['embl_gb_record_number'] = $1.to_i end when 'RC' unless value.to_s.strip.empty? hash['comments'] ||= [] hash['comments'].push value end when 'RP' hash['sequence_position'] = value when 'RA' a = value.split(/\, /) a.each do |x| x.sub!(/( [^ ]+)\z/, ",\\1") end hash['authors'] = a when 'RT' hash['title'] = value when 'RL' if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s hash['journal'] = $1.rstrip hash['volume'] = $2 hash['issue'] = $4 hash['pages'] = $6 hash['year'] = $7 else hash['journal'] = value end when 'RX' # PUBMED, DOI, (AGRICOLA) value.split(/\. /).each {|item| tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') } hash[ tag.downcase ] = xref } end } Reference.new(hash) } @data['references'] = ary.extend(Bio::References::BackwardCompatibility) end @data['references'] end # returns contents in the DR line. # * Bio::EMBLDB::Common#dr -> [ * ] # where is: # * Bio::EMBLDB::Common#dr {|k,v| } # # DR Line; defabases cross-reference (>=0) # a cross_ref pre one line # "DR database_identifier; primary_identifier; secondary_identifier." def dr unless @data['DR'] tmp = Hash.new self.get('DR').split(/\n/).each do |db| a = db.sub(/^DR /,'').sub(/.$/,'').strip.split(/;[ ]/) dbname = a.shift tmp[dbname] = Array.new unless tmp[dbname] tmp[dbname].push(a) end @data['DR'] = tmp end if block_given? @data['DR'].each do |k,v| yield(k, v) end else @data['DR'] end end end # module Common end # class EMBLDB end # module Bio bio-1.4.3.0001/lib/bio/db/embl/embl_to_biosequence.rb0000644000004100000410000000331312200110570022036 0ustar www-datawww-data# # = bio/db/embl/embl_to_biosequence.rb - Bio::EMBL to Bio::Sequence adapter module # # Copyright:: Copyright (C) 2008 # Naohisa Goto , # License:: The Ruby License # # $Id:$ # require 'bio/sequence' require 'bio/sequence/adapter' # Internal use only. Normal users should not use this module. # # Bio::EMBL to Bio::Sequence adapter module. # It is internally used in Bio::EMBL#to_biosequence. # module Bio::Sequence::Adapter::EMBL extend Bio::Sequence::Adapter private def_biosequence_adapter :seq def_biosequence_adapter :id_namespace do |orig| 'EMBL' end def_biosequence_adapter :entry_id def_biosequence_adapter :primary_accession do |orig| orig.accessions[0] end def_biosequence_adapter :secondary_accessions do |orig| orig.accessions[1..-1] || [] end def_biosequence_adapter :molecule_type def_biosequence_adapter :data_class def_biosequence_adapter :definition, :description def_biosequence_adapter :topology def_biosequence_adapter :date_created def_biosequence_adapter :date_modified def_biosequence_adapter :release_created def_biosequence_adapter :release_modified def_biosequence_adapter :entry_version def_biosequence_adapter :division def_biosequence_adapter :sequence_version, :version def_biosequence_adapter :keywords def_biosequence_adapter :species def_biosequence_adapter :classification #-- # unsupported yet # def_biosequence_adapter :organelle do |orig| # orig.fetch('OG') # end #++ def_biosequence_adapter :references def_biosequence_adapter :features def_biosequence_adapter :comments, :cc def_biosequence_adapter :dblinks end #module Bio::Sequence::Adapter::EMBL bio-1.4.3.0001/lib/bio/db/embl/swissprot.rb0000644000004100000410000000176612200110570020122 0ustar www-datawww-data# # = bio/db/embl/swissprot.rb - SwissProt database class # # Copyright:: Copyright (C) 2001, 2002 Toshiaki Katayama # License:: The Ruby License # # $Id: swissprot.rb,v 1.7 2007/04/05 23:35:40 trevor Exp $ # require 'bio/db/embl/sptr' module Bio # == Description # # Parser class for SwissProt database entry. See also Bio::SPTR class. # This class holds name space for SwissProt specific methods. # # SwissProt (before UniProtKB/SwissProt) specific methods are defined in # this class. Shared methods for UniProtKB/SwissProt and TrEMBL classes # are defined in Bio::SPTR class. # # == Examples # # str = File.read("p53_human.swiss") # obj = Bio::SwissProt.new(str) # obj.entry_id #=> "P53_HUMAN" # # == Referencees # # * Swiss-Prot Protein knowledgebase # http://au.expasy.org/sprot/ # # * Swiss-Prot Protein Knowledgebase User Manual # http://au.expasy.org/sprot/userman.html # class SwissProt < SPTR # Nothing to do (SwissProt format is abstracted in SPTR) end end bio-1.4.3.0001/lib/bio/db/soft.rb0000644000004100000410000003365112200110570016077 0ustar www-datawww-data# # bio/db/soft.rb - Interface for SOFT formatted files # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id:$ # module Bio # # bio/db/soft.rb - Interface for SOFT formatted files # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # # = Description # # "SOFT (Simple Omnibus in Text Format) is a compact, simple, line-based, # ASCII text format that incorporates experimental data and metadata." # -- GEO, National Center for Biotechnology Information # # The Bio::SOFT module reads SOFT Series or Platform formatted files that # contain information # describing one database, one series, one platform, and many samples (GEO # accessions). The data from the file can then be viewed with Ruby methods. # # Bio::SOFT also supports the reading of SOFT DataSet files which contain # one database, one dataset, and many subsets. # # Format specification is located here: # * http://www.ncbi.nlm.nih.gov/projects/geo/info/soft2.html#SOFTformat # # SOFT data files may be directly downloaded here: # * ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT # # NCBI's Gene Expression Omnibus (GEO) is here: # * http://www.ncbi.nlm.nih.gov/geo # # = Usage # # If an attribute has more than one value then the values are stored in an # Array of String objects. Otherwise the attribute is stored as a String. # # The platform and each sample may contain a table of data. A dataset from a # DataSet file may also contain a table. # # Attributes are dynamically created based on the data in the file. # Predefined keys have not been created in advance due to the variability of # SOFT files in-the-wild. # # Keys are generally stored as Symbols. In the case of keys for samples and # table headings may alternatively be accessed with Strings. # The names of samples (geo accessions) are case sensitive. Table headers # are case insensitive. # # require 'bio' # # lines = IO.readlines('GSE3457_family.soft') # soft = Bio::SOFT.new(lines) # # soft.platform[:geo_accession] # => "GPL2092" # soft.platform[:organism] # => "Populus" # soft.platform[:contributor] # => ["Jingyi,,Li", "Olga,,Shevchenko", "Steve,H,Strauss", "Amy,M,Brunner"] # soft.platform[:data_row_count] # => "240" # soft.platform.keys.sort {|a,b| a.to_s <=> b.to_s}[0..2] # => [:contact_address, :contact_city, :contact_country] # soft.platform[:"contact_zip/postal_code"] # => "97331" # soft.platform[:table].header # => ["ID", "GB_ACC", "SPOT_ID", "Function/Family", "ORGANISM", "SEQUENCE"] # soft.platform[:table].header_description # => {"ORGANISM"=>"sequence sources", "SEQUENCE"=>"oligo sequence used", "Function/Family"=>"gene functions and family", "ID"=>"", "SPOT_ID"=>"", "GB_ACC"=>"Gene bank accession number"} # soft.platform[:table].rows.size # => 240 # soft.platform[:table].rows[5] # => ["A039P68U", "AI163321", "", "TF, flowering protein CONSTANS", "P. tremula x P. tremuloides", "AGAAAATTCGATATACTGTCCGTAAAGAGGTAGCACTTAGAATGCAACGGAATAAAGGGCAGTTCACCTC"] # soft.platform[:table].rows[5][4] # => "P. tremula x P. tremuloides" # soft.platform[:table].rows[5][:organism] # => "P. tremula x P. tremuloides" # soft.platform[:table].rows[5]['ORGANISM'] # => "P. tremula x P. tremuloides" # # soft.series[:geo_accession] # => "GSE3457" # soft.series[:contributor] # => ["Jingyi,,Li", "Olga,,Shevchenko", "Ove,,Nilsson", "Steve,H,Strauss", "Amy,M,Brunner"] # soft.series[:platform_id] # => "GPL2092" # soft.series[:sample_id].size # => 74 # soft.series[:sample_id][0..4] # => ["GSM77557", "GSM77558", "GSM77559", "GSM77560", "GSM77561"] # # soft.database[:name] # => "Gene Expression Omnibus (GEO)" # soft.database[:ref] # => "Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6" # soft.database[:institute] # => "NCBI NLM NIH" # # soft.samples.size # => 74 # soft.samples[:GSM77600][:series_id] # => "GSE3457" # soft.samples['GSM77600'][:series_id] # => "GSE3457" # soft.samples[:GSM77600][:platform_id] # => "GPL2092" # soft.samples[:GSM77600][:type] # => "RNA" # soft.samples[:GSM77600][:title] # => "jst2b2" # soft.samples[:GSM77600][:table].header # => ["ID_REF", "VALUE"] # soft.samples[:GSM77600][:table].header_description # => {"ID_REF"=>"", "VALUE"=>"normalized signal intensities"} # soft.samples[:GSM77600][:table].rows.size # => 217 # soft.samples[:GSM77600][:table].rows[5] # => ["A039P68U", "8.19"] # soft.samples[:GSM77600][:table].rows[5][0] # => "A039P68U" # soft.samples[:GSM77600][:table].rows[5][:id_ref] # => "A039P68U" # soft.samples[:GSM77600][:table].rows[5]['ID_REF'] # => "A039P68U" # # # lines = IO.readlines('GDS100.soft') # soft = Bio::SOFT.new(lines) # # soft.database[:name] # => "Gene Expression Omnibus (GEO)" # soft.database[:ref] # => "Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6" # soft.database[:institute] # => "NCBI NLM NIH" # # soft.subsets.size # => 8 # soft.subsets.keys # => ["GDS100_1", "GDS100_2", "GDS100_3", "GDS100_4", "GDS100_5", "GDS100_6", "GDS100_7", "GDS100_8"] # soft.subsets[:GDS100_7] # => {:dataset_id=>"GDS100", :type=>"time", :sample_id=>"GSM548,GSM543", :description=>"60 minute"} # soft.subsets['GDS100_7'][:sample_id] # => "GSM548,GSM543" # soft.subsets[:GDS100_7][:sample_id] # => "GSM548,GSM543" # soft.subsets[:GDS100_7][:dataset_id] # => "GDS100" # # soft.dataset[:order] # => "none" # soft.dataset[:sample_organism] # => "Escherichia coli" # soft.dataset[:table].header # => ["ID_REF", "IDENTIFIER", "GSM549", "GSM542", "GSM543", "GSM547", "GSM544", "GSM545", "GSM546", "GSM548"] # soft.dataset[:table].rows.size # => 5764 # soft.dataset[:table].rows[5] # => ["6", "EMPTY", "0.097", "0.217", "0.242", "0.067", "0.104", "0.162", "0.104", "0.154"] # soft.dataset[:table].rows[5][4] # => "0.242" # soft.dataset[:table].rows[5][:gsm549] # => "0.097" # soft.dataset[:table].rows[5][:GSM549] # => "0.097" # soft.dataset[:table].rows[5]['GSM549'] # => "0.097" # class SOFT attr_accessor :database attr_accessor :series, :platform, :samples attr_accessor :dataset, :subsets LINE_TYPE_ENTITY_INDICATOR = '^' LINE_TYPE_ENTITY_ATTRIBUTE = '!' LINE_TYPE_TABLE_HEADER = '#' # data table row defined by absence of line type character TABLE_COLUMN_DELIMITER = "\t" # Constructor # # --- # *Arguments* # * +lines+: (_required_) contents of SOFT formatted file # *Returns*:: Bio::SOFT def initialize(lines=nil) @database = Database.new @series = Series.new @platform = Platform.new @samples = Samples.new @dataset = Dataset.new @subsets = Subsets.new process(lines) end # Classes for Platform and Series files class Samples < Hash #:nodoc: def [](x) x = x.to_s if x.kind_of?( Symbol ) super(x) end end class Entity < Hash #:nodoc: end class Sample < Entity #:nodoc: end class Platform < Entity #:nodoc: end class Series < Entity #:nodoc: end # Classes for DataSet files class Subsets < Samples #:nodoc: end class Subset < Entity #:nodoc: end class Dataset < Entity #:nodoc: end # Classes important for all types class Database < Entity #:nodoc: end class Table #:nodoc: attr_accessor :header attr_accessor :header_description attr_accessor :rows class Header < Array #:nodoc: # @column_index contains column name => numerical index of column attr_accessor :column_index def initialize @column_index = {} end end class Row < Array #:nodoc: attr_accessor :header_object def initialize( n, header_object=nil ) @header_object = header_object super(n) end def [](x) if x.kind_of?( Fixnum ) super(x) else begin x = x.to_s.downcase.to_sym z = @header_object.column_index[x] unless z.kind_of?( Fixnum ) raise IndexError, "#{x.inspect} is not a valid index. Contents of @header_object.column_index: #{@header_object.column_index.inspect}" end self[ z ] rescue NoMethodError unless @header_object $stderr.puts "Table::Row @header_object undefined!" end raise end end end end def initialize() @header_description = {} @header = Header.new @rows = [] end def add_header( line ) raise "Can only define one header" unless @header.empty? @header = @header.concat( parse_row( line ) ) # beware of clobbering this into an Array @header.each_with_index do |key, i| @header.column_index[key.downcase.to_sym] = i end end def add_row( line ) @rows << Row.new( parse_row( line ), @header ) end def add_header_or_row( line ) @header.empty? ? add_header( line ) : add_row( line ) end protected def parse_row( line ) line.split( TABLE_COLUMN_DELIMITER ) end end ######### protected ######### def process(lines) current_indicator = nil current_class_accessor = nil in_table = false lines.each_with_index do |line, line_number| line.strip! next if line.nil? or line.empty? case line[0].chr when LINE_TYPE_ENTITY_INDICATOR current_indicator, value = split_label_value_in( line[1..-1] ) case current_indicator when 'DATABASE' current_class_accessor = @database when 'DATASET' current_class_accessor = @dataset when 'PLATFORM' current_class_accessor = @platform when 'SERIES' current_class_accessor = @series when 'SAMPLE' @samples[value] = Sample.new current_class_accessor = @samples[value] when 'SUBSET' @subsets[value] = Subset.new current_class_accessor = @subsets[value] else custom_raise( line_number, error_msg(40, line) ) end when LINE_TYPE_ENTITY_ATTRIBUTE if( current_indicator == nil ) custom_raise( line_number, error_msg(30) ) end # Handle lines such as '!platform_table_begin' and '!platform_table_end' if in_table if line =~ %r{table_begin} next elsif line =~ %r{table_end} in_table = false next end end key, value = split_label_value_in( line, true ) key_s = key.to_sym if current_class_accessor.include?( key_s ) if current_class_accessor[ key_s ].class != Array current_class_accessor[ key_s ] = [ current_class_accessor[ key_s ] ] end current_class_accessor[key.to_sym] << value else current_class_accessor[key.to_sym] = value end when LINE_TYPE_TABLE_HEADER if( (current_indicator != 'SAMPLE') and (current_indicator != 'PLATFORM') and (current_indicator != 'DATASET') ) custom_raise( line_number, error_msg(20, current_indicator.inspect) ) end in_table = true # may be redundant, computationally not worth checking # We only expect one table per platform or sample current_class_accessor[:table] ||= Table.new key, value = split_label_value_in( line ) # key[1..-1] -- Remove first character which is the LINE_TYPE_TABLE_HEADER current_class_accessor[:table].header_description[ key[1..-1] ] = value else # Type: No line type - should be a row in a table. if( (current_indicator == nil) or (in_table == false) ) custom_raise( line_number, error_msg(10) ) end current_class_accessor[:table].add_header_or_row( line ) end end end def error_msg( i, extra_info=nil ) case i when 10 x = ["Lines without line-type characters are rows in a table, but", "a line containing an entity indicator such as", "\"#{LINE_TYPE_ENTITY_INDICATOR}SAMPLE\",", "\"#{LINE_TYPE_ENTITY_INDICATOR}PLATFORM\",", "or \"#{LINE_TYPE_ENTITY_INDICATOR}DATASET\" has not been", "previously encountered or it does not appear that this line is", "in a table."] when 20 # tables are allowed inside samples and platforms x = ["Tables are only allowed inside SAMPLE and PLATFORM.", "Current table information found inside #{extra_info}."] when 30 x = ["Entity attribute line (\"#{LINE_TYPE_ENTITY_ATTRIBUTE}\")", "found before entity indicator line (\"#{LINE_TYPE_ENTITY_INDICATOR}\")"] when 40 x = ["Unkown entity indicator. Must be DATABASE, SAMPLE, PLATFORM,", "SERIES, DATASET, or SUBSET."] else raise IndexError, "Unknown error message requested." end x.join(" ") end def custom_raise( line_number_with_0_based_indexing, msg ) raise ["Error processing input line: #{line_number_with_0_based_indexing+1}", msg].join("\t") end def split_label_value_in( line, shift_key=false ) line =~ %r{\s*=\s*} key, value = $`, $' if shift_key key =~ %r{_} key = $' end if( (key == nil) or (value == nil) ) puts line.inspect raise end [key, value] end end # SOFT end # Bio bio-1.4.3.0001/lib/bio/db/fantom.rb0000644000004100000410000003612412200110570016406 0ustar www-datawww-data# # bio/db/fantom.rb - RIKEN FANTOM2 database classes # # Copyright:: Copyright (C) 2003 GOTO Naohisa # License:: The Ruby License # # $Id:$ # require 'rexml/document' require 'cgi' require 'uri' require 'net/http' require 'bio/db' require 'bio/command' #require 'bio/sequence' module Bio module FANTOM def query(idstr, http_proxy = nil) xml = get_by_id(idstr, http_proxy) seqs = MaXML::Sequences.new(xml.to_s) seqs[0] end module_function :query def get_by_id(idstr, http_proxy = nil) addr = 'fantom.gsc.riken.go.jp' port = 80 path = "/db/maxml/maxmlseq.cgi?masterid=#{CGI.escape(idstr.to_s)}&style=xml" xml = '' if http_proxy then proxy = URI.parse(http_proxy.to_s) Net::HTTP.start(addr, port, proxy.host, proxy.port) do |http| response = http.get(path) xml = response.body end else Bio::Command.start_http(addr, port) do |http| response = http.get(path) xml = response.body end end xml end module_function :get_by_id class MaXML < DB # DTD of MaXML(Mouse annotation XML) # http://fantom.gsc.riken.go.jp/maxml/maxml.dtd DELIMITER = RS = "\n--EOF--\n" # This class is for {allseq|repseq|allclust}.sep.xml, # not for {allseq|repseq|allclust}.xml. Data_XPath = '' def initialize(x) if x.is_a?(REXML::Element) then @elem = x else if x.is_a?(String) then x = x.sub(/#{Regexp.escape(DELIMITER)}\z/om, "\n") end doc = REXML::Document.new(x) @elem = doc.elements[self.class::Data_XPath] #raise 'element is null' unless @elem @elem = REXML::Document.new('') unless @elem end end attr_reader :elem def to_s @elem.to_s end def gsub_entities(str) # workaround for bug? if str then str.gsub(/\&\#(\d{1,3})\;/) { sprintf("%c", $1.to_i) } else str end end def entry_id unless defined?(@entry_id) @entry_id = @elem.attributes['id'] end @entry_id end def self.define_element_text_method(array) array.each do |tagstr| module_eval(" def #{tagstr} unless defined?(@#{tagstr}) @#{tagstr} = gsub_entities(@elem.text('#{tagstr}')) end @#{tagstr} end ") end end private_class_method :define_element_text_method class Cluster < MaXML # (MaXML cluster) # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz Data_XPath = 'maxml-clusters/cluster' def representative_seqid unless defined?(@representative_seqid) @representative_seqid = gsub_entities(@elem.text('representative-seqid')) end @representative_seqid end def sequences unless defined?(@sequences) @sequences = MaXML::Sequences.new(@elem) end @sequences end def sequence(idstr = nil) idstr ? sequences[idstr] : representative_sequence end def representative_sequence unless defined?(@representative_sequence) rid = representative_seqid @representative_sequence = rid ? sequences[representative_seqid] : nil end @representative_sequence end alias representative_clone representative_sequence def representative_annotations e = representative_sequence e ? e.annotations : nil end def representative_cloneid e = representative_sequence e ? e.cloneid : nil end define_element_text_method(%w(fantomid)) end #class MaXML::Cluster class Sequences < MaXML Data_XPath = 'maxml-sequences' include Enumerable def each to_a.each { |x| yield x } end def to_a unless defined?(@sequences) @sequences = @elem.get_elements('sequence') @sequences.collect! { |e| MaXML::Sequence.new(e) } end @sequences end def get(idstr) unless defined?(@hash) @hash = {} end unless @hash.member?(idstr) then @hash[idstr] = self.find do |x| x.altid.values.index(idstr) end end @hash[idstr] end def [](*arg) if arg[0].is_a?(String) and arg.size == 1 then get(arg[0]) else to_a[*arg] end end def cloneids unless defined?(@cloneids) @cloneids = to_a.collect { |x| x.cloneid } end @cloneids end def id_strings unless defined?(@id_strings) @id_strings = to_a.collect { |x| x.id_strings } @id_strings.flatten! @id_strings.sort! @id_strings.uniq! end @id_strings end end #class MaXML::Sequences class Sequence < MaXML # (MaXML sequence) # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz Data_XPath = 'maxml-sequences/sequence' def altid(t = nil) unless defined?(@altid) @altid = {} @elem.each_element('altid') do |e| @altid[e.attributes['type']] = gsub_entities(e.text) end end if t then @altid[t] else @altid end end def id_strings altid.values.sort.uniq end def library_id entry_id[0,2] end def annotations unless defined?(@annotations) @annotations = MaXML::Annotations.new(@elem.elements['annotations']) end @annotations end define_element_text_method(%w(annotator version modified_time comment)) def self.define_id_method(array) array.each do |tagstr| module_eval(" def #{tagstr} unless defined?(@#{tagstr}) @#{tagstr} = gsub_entities(@elem.text('#{tagstr}')) @#{tagstr} = altid('#{tagstr}') unless @#{tagstr} end @#{tagstr} end ") end end private_class_method :define_id_method define_id_method(%w(seqid fantomid cloneid rearrayid accession)) end #class MaXML::Sequence class Annotations < MaXML Data_XPath = nil include Enumerable def each to_a.each { |x| yield x } end def to_a unless defined?(@a) @a = @elem.get_elements('annotation') @a.collect! { |e| MaXML::Annotation.new(e) } end @a end def get_all_by_qualifier(qstr) unless defined?(@hash) @hash = {} end unless @hash.member?(qstr) then @hash[qstr] = self.find_all do |x| x.qualifier == qstr end end @hash[qstr] end def get_by_qualifier(qstr) a = get_all_by_qualifier(qstr) a ? a[0] : nil end def [](*arg) if arg[0].is_a?(String) and arg.size == 1 then get_by_qualifier(arg[0]) else to_a[*arg] end end def cds_start unless defined?(@cds_start) e = get_by_qualifier('cds_start') @cds_start = e ? e.anntext.to_i : nil end @cds_start end def cds_stop unless defined?(@cds_stop) e = get_by_qualifier('cds_stop') @cds_stop = e ? e.anntext.to_i : nil end @cds_stop end def gene_name unless defined?(@gene_name) e = get_by_qualifier('gene_name') @gene_name = e ? e.anntext : nil end @gene_name end def data_source unless defined?(@data_source) e = get_by_qualifier('gene_name') @data_source = e ? e.datasrc[0] : nil end @data_source end def evidence unless defined?(@evidence) e = get_by_qualifier('gene_name') @evidence = e ? e.evidence : nil end @evidence end end #class MaXML::Annotations class Annotation < MaXML def entry_id nil end class DataSrc < String def initialize(text, href) super(text) @href = href end attr_reader :href end def datasrc unless defined?(@datasrc) @datasrc = [] @elem.each_element('datasrc') do |e| text = e.text href = e.attributes['href'] @datasrc << DataSrc.new(gsub_entities(text), gsub_entities(href)) end end @datasrc end define_element_text_method(%w(qualifier srckey anntext evidence)) end #class MaXML::Annotation end #class MaXML end #module FANTOM end #module Bio =begin Bio::FANTOM are database classes (and modules) treating RIKEN FANTOM2 data. FANTOM2 is available at (()). = Bio::FANTOM This module contains useful methods to access databases. --- Bio::FANTOM.query(idstr, http_proxy=nil) Get MaXML sequence data corresponding to given ID through the internet from (()). Not that this class is not suitable for 'allclust.xml'. --- Bio::FANTOM::MaXML::Cluster.new(str) --- Bio::FANTOM::MaXML::Cluster#entry_id --- Bio::FANTOM::MaXML::Cluster#fantomid --- Bio::FANTOM::MaXML::Cluster#representative_seqid --- Bio::FANTOM::MaXML::Cluster#sequences Lists sequences in this cluster. Returns Bio::FANTOM::MaXML::Sequences object. --- Bio::FANTOM::MaXML::Cluster#sequence(id_str) Shows a sequence information of given id. Returns Bio::FANTOM::MaXML::Sequence object or nil. --- Bio::FANTOM::MaXML::Cluster#representataive_sequence --- Bio::FANTOM::MaXML::Cluster#representataive_clone Shows a sequence of repesentative_seqid. Returns Bio::FANTOM::MaXML::Sequence object (or nil). -- Bio::FANTOM::MaXML::Cluster#representative_annotations Shows annotations of repesentative sequence. Returns Bio::FANTOM::MaXML::Annotations object (or nil). -- Bio::FANTOM::MaXML::Cluster#representative_cloneid Shows cloneid of repesentative sequence. Returns String (or nil). = Bio::FANTOM::MaXML::Sequences The instances of this class are automatically created by Bio::FANTOM::MaXML::Cluster class. This class can also be used for 'allseq.sep.xml' and 'repseq.sep.xml', but you'd better using Bio::FANTOM::MaXML::Sequence class. In addition, this class can be used for 'allseq.xml' and 'repseq.xml', but you'd better not to use them, becase of the speed is very slow. --- Bio::FANTOM::MaXML::Sequences#to_a Returns an Array of Bio::FANTOM::MaXML::Sequence objects. --- Bio::FANTOM::MaXML::Sequences#each --- Bio::FANTOM::MaXML::Sequences#[](x) Same as to_a[x] when x is a integer. Same as get[x] when x is a string. --- Bio::FANTOM::MaXML::Sequences#get(id_str) Shows a sequence information of given id. Returns Bio::FANTOM::MaXML::Sequence object or nil. --- Bio::FANTOM::MaXML::Sequences#cloneids Shows clone ID list. Returns an array of strings. --- Bio::FANTOM::MaXML::Sequences#id_strings Shows ID list. Returns an array of strings. = Bio::FANTOM::MaXML::Sequence This class is for 'allseq.sep.xml' and 'repseq.sep.xml' found at (()) and (()). Not that this class is not suitable for 'allseq.xml' and 'repseq.xml'. In addition, the instances of this class are automatically created by Bio::FANTOM::MaXML::Sequences class. --- Bio::FANTOM::MaXML::Sequence.new(str) --- Bio::FANTOM::MaXML::Sequence#entry_id --- Bio::FANTOM::MaXML::Sequence#altid(type_str = nil) Returns hash of altid if no arguments are given. Returns ID as a string if a type of ID (string) is given. --- Bio::FANTOM::MaXML::Sequence#annotations Gets lists of annotation data. Returns a Bio::FANTOM::MaXML::Annotations object. --- Bio::FANTOM::MaXML::Sequence#id_strings Gets lists of ID. (same as altid.values) Returns an array of strings. --- Bio::FANTOM::MaXML::Sequence#library_id Shows library ID. (same as cloneid[0,2]) Library IDs are listed at: (()) =end bio-1.4.3.0001/lib/bio/db/genbank/0000755000004100000410000000000012200110570016174 5ustar www-datawww-databio-1.4.3.0001/lib/bio/db/genbank/format_genbank.rb0000644000004100000410000001242112200110570021476 0ustar www-datawww-data# # = bio/db/genbank/format_genbank.rb - GenBank format generater # # Copyright:: Copyright (C) 2008 Naohisa Goto # License:: The Ruby License # module Bio::Sequence::Format::NucFormatter # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS. # GenBank format output class for Bio::Sequence. class Genbank < Bio::Sequence::Format::FormatterBase # helper methods include Bio::Sequence::Format::INSDFeatureHelper private # string wrapper for GenBank format def genbank_wrap(str) wrap(str.to_s, 67).gsub(/\n/, "\n" + " " * 12) end # string wrap with adding a dot at the end of the string def genbank_wrap_dot(str) str = str.to_s str = str + '.' unless /\.\z/ =~ str genbank_wrap(str) end # Given words (an Array of String) are wrapping with EMBL style. # Each word is never splitted inside the word. def genbank_wrap_words(array) width = 67 result = [] str = nil array.each do |x| if str then if str.length + 1 + x.length > width then str = nil else str.concat ' ' str.concat x end end unless str then str = "#{x}" result.push str end end result.join("\n" + " " * 12) end # formats references def reference_format_genbank(ref, num) pos = ref.sequence_position.to_s.gsub(/\s/, '') pos.gsub!(/(\d+)\-(\d+)/, "\\1 to \\2") pos.gsub!(/\s*\,\s*/, '; ') if pos.empty? pos = '' else pos = " (bases #{pos})" end volissue = "#{ref.volume.to_s}" volissue += " (#{ref.issue})" unless ref.issue.to_s.empty? journal = "#{ref.journal.to_s}" journal += " #{volissue}" unless volissue.empty? journal += ", #{ref.pages}" unless ref.pages.to_s.empty? journal += " (#{ref.year})" unless ref.year.to_s.empty? alist = ref.authors.collect do |x| y = x.to_s.strip.split(/\, *([^\,]+)\z/) y[1].gsub!(/\. +/, '.') if y[1] y.join(',') end lastauthor = alist.pop last2author = alist.pop alist.each { |x| x.concat ',' } alist.push last2author if last2author alist.push "and" unless alist.empty? alist.push lastauthor.to_s result = <<__END_OF_REFERENCE__ REFERENCE #{ genbank_wrap(sprintf('%-2d%s', num, pos))} AUTHORS #{ genbank_wrap_words(alist) } TITLE #{ genbank_wrap(ref.title.to_s) } JOURNAL #{ genbank_wrap(journal) } __END_OF_REFERENCE__ unless ref.pubmed.to_s.empty? then result.concat " PUBMED #{ genbank_wrap(ref.pubmed) }\n" end if ref.comments and !(ref.comments.empty?) then ref.comments.each do |c| result.concat " REMARK #{ genbank_wrap(c) }\n" end end result end # formats comments lines as GenBank def comments_format_genbank(cmnts) return '' if !cmnts or cmnts.empty? cmnts = [ cmnts ] unless cmnts.kind_of?(Array) a = [] cmnts.each do |str| a.push "COMMENT #{ genbank_wrap(str) }\n" end a.join('') end # formats sequence lines as GenBank def seq_format_genbank(str) i = 1 result = str.gsub(/.{1,60}/) do |s| s = s.gsub(/.{1,10}/, ' \0') y = sprintf("%9d%s\n", i, s) i += 60 y end result end # formats date def date_format_genbank format_date(date_modified || date_created || null_date) end # moleculue type def mol_type_genbank if /(DNA|(t|r|m|u|sn|sno)?RNA)/i =~ molecule_type.to_s then $1.sub(/[DR]NA/) { |x| x.upcase } else 'NA' end end # NCBI GI number def ncbi_gi_number ids = other_seqids if ids and r = ids.find { |x| x.database == 'GI' } then r.id else nil end end # strandedness def strandedness_genbank return nil unless strandedness case strandedness when 'single'; 'ss-'; when 'double'; 'ds-'; when 'mixed'; 'ms-'; else; nil end end # Erb template of GenBank format for Bio::Sequence erb_template <<'__END_OF_TEMPLATE__' LOCUS <%= sprintf("%-16s", entry_id) %> <%= sprintf("%11d", length) %> bp <%= sprintf("%3s", strandedness_genbank) %><%= sprintf("%-6s", mol_type_genbank) %> <%= sprintf("%-8s", topology) %><%= sprintf("%4s", division) %> <%= date_format_genbank %> DEFINITION <%= genbank_wrap_dot(definition.to_s) %> ACCESSION <%= genbank_wrap(([ primary_accession ] + (secondary_accessions or [])).join(" ")) %> VERSION <%= primary_accession %>.<%= sequence_version %><% if gi = ncbi_gi_number then %> GI:<%= gi %><% end %> KEYWORDS <%= genbank_wrap_dot((keywords or []).join('; ')) %> SOURCE <%= genbank_wrap(species) %> ORGANISM <%= genbank_wrap(species) %> <%= genbank_wrap_dot((classification or []).join('; ')) %> <% n = 0 (references or []).each do |ref| n += 1 %><%= reference_format_genbank(ref, n) %><% end %><%= comments_format_genbank(comments) %>FEATURES Location/Qualifiers <%= format_features_genbank(features || []) %>ORIGIN <%= seq_format_genbank(seq) %>// __END_OF_TEMPLATE__ end #class Genbank end #module Bio::Sequence::Format::NucFormatter bio-1.4.3.0001/lib/bio/db/genbank/genpept.rb0000644000004100000410000000233712200110570020170 0ustar www-datawww-data# # = bio/db/genbank/genpept.rb - GenPept database class # # Copyright:: Copyright (C) 2002-2004 Toshiaki Katayama # License:: The Ruby License # # $Id: genpept.rb,v 1.12 2007/04/05 23:35:40 trevor Exp $ # require 'bio/db/genbank/common' require 'bio/db/genbank/genbank' module Bio class GenPept < NCBIDB include Bio::NCBIDB::Common # LOCUS class Locus def initialize(locus_line) @entry_id = locus_line[12..27].strip @length = locus_line[29..39].to_i @circular = locus_line[55..62].strip # always linear @division = locus_line[63..66].strip @date = locus_line[68..78].strip end attr_accessor :entry_id, :length, :circular, :division, :date end def locus @data['LOCUS'] ||= Locus.new(get('LOCUS')) end def entry_id; locus.entry_id; end def length; locus.length; end def circular; locus.circular; end def division; locus.division; end def date; locus.date; end # ORIGIN def seq unless @data['SEQUENCE'] origin end Bio::Sequence::AA.new(@data['SEQUENCE']) end alias aaseq seq alias aalen length def seq_len seq.length end # DBSOURCE def dbsource get('DBSOURCE') end end # GenPept end # Bio bio-1.4.3.0001/lib/bio/db/genbank/common.rb0000644000004100000410000001677212200110570020026 0ustar www-datawww-data# # = bio/db/genbank/common.rb - Common methods for GenBank style database classes # # Copyright:: Copyright (C) 2004 Toshiaki Katayama # License:: The Ruby License # # $Id: common.rb,v 1.11.2.5 2008/06/17 15:53:21 ngoto Exp $ # require 'bio/db' module Bio class NCBIDB # == Description # # This module defines a common framework among GenBank, GenPept, RefSeq, and # DDBJ. For more details, see the documentations in each genbank/*.rb files. # # == References # # * ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt # * http://www.ncbi.nlm.nih.gov/collab/FT/index.html # module Common DELIMITER = RS = "\n//\n" TAGSIZE = 12 def initialize(entry) super(entry, TAGSIZE) end # LOCUS -- Locus class must be defined in child classes. def locus # must be overrided in each subclass end # DEFINITION -- Returns contents of the DEFINITION record as a String. def definition field_fetch('DEFINITION') end # ACCESSION -- Returns contents of the ACCESSION record as an Array. def accessions field_fetch('ACCESSION').strip.split(/\s+/) end # VERSION -- Returns contents of the VERSION record as an Array of Strings. def versions @data['VERSION'] ||= fetch('VERSION').split(/\s+/) end # Returns the first part of the VERSION record as "ACCESSION.VERSION" String. def acc_version versions.first.to_s end # Returns the ACCESSION part of the acc_version. def accession acc_version.split(/\./).first.to_s end # Returns the VERSION part of the acc_version as a Fixnum def version acc_version.split(/\./).last.to_i end # Returns the second part of the VERSION record as a "GI:#######" String. def gi versions.last end # NID -- Returns contents of the NID record as a String. def nid field_fetch('NID') end # KEYWORDS -- Returns contents of the KEYWORDS record as an Array of Strings. def keywords @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /) end # SEGMENT -- Returns contents of the SEGMENT record as a "m/n" form String. def segment @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/") end # SOURCE -- Returns contents of the SOURCE record as a Hash. def source unless @data['SOURCE'] name, org = get('SOURCE').split('ORGANISM') org ||= "" if org[/\S+;/] organism = $` taxonomy = $& + $' elsif org[/\S+\./] # rs:NC_001741 organism = $` taxonomy = $& + $' else organism = org taxonomy = '' end @data['SOURCE'] = { 'common_name' => truncate(tag_cut(name)), 'organism' => truncate(organism), 'taxonomy' => truncate(taxonomy), } @data['SOURCE'].default = '' end @data['SOURCE'] end def common_name source['common_name'] end alias vernacular_name common_name def organism source['organism'] end def taxonomy source['taxonomy'] end # REFERENCE -- Returns contents of the REFERENCE records as an Array of # Bio::Reference objects. def references unless @data['REFERENCE'] ary = [] toptag2array(get('REFERENCE')).each do |ref| hash = Hash.new subtag2array(ref).each do |field| case tag_get(field) when /REFERENCE/ if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then hash['embl_gb_record_number'] = $1.to_i if $3 and $3 != 'sites' then seqpos = $3 seqpos.sub!(/\A\s*bases\s+/, '') seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2") seqpos.gsub!(/\s*\;\s*/, ', ') hash['sequence_position'] = seqpos end end when /AUTHORS/ authors = truncate(tag_cut(field)) authors = authors.split(/, /) authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1] authors = authors.flatten.map { |a| a.sub(/,/, ', ') } hash['authors'] = authors when /TITLE/ hash['title'] = truncate(tag_cut(field)) # CHECK Actually GenBank is not demanding for dot at the end of TITLE #+ '.' when /JOURNAL/ journal = truncate(tag_cut(field)) if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ hash['journal'] = $1 hash['volume'] = $2 hash['issue'] = $3 hash['pages'] = $4 hash['year'] = $5 else hash['journal'] = journal end when /MEDLINE/ hash['medline'] = truncate(tag_cut(field)) when /PUBMED/ hash['pubmed'] = truncate(tag_cut(field)) when /REMARK/ hash['comments'] ||= [] hash['comments'].push truncate(tag_cut(field)) end end ary.push(Reference.new(hash)) end @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility) end if block_given? @data['REFERENCE'].each do |r| yield r end else @data['REFERENCE'] end end # COMMENT -- Returns contents of the COMMENT record as a String. def comment str = get('COMMENT').to_s.sub(/\ACOMMENT /, '') str.gsub!(/^ {12}/, '') str.chomp! str end # FEATURES -- Returns contents of the FEATURES record as an array of # Bio::Feature objects. def features unless @data['FEATURES'] ary = [] in_quote = false get('FEATURES').each_line do |line| next if line =~ /^FEATURES/ # feature type (source, CDS, ...) head = line[0,20].to_s.strip # feature value (position or /qualifier=) body = line[20,60].to_s.chomp # sub-array [ feature type, position, /q="data", ... ] if line =~ /^ {5}\S/ ary.push([ head, body ]) # feature qualifier start (/q="data..., /q="data...", /q=data, /q) elsif body =~ /^ \// and not in_quote # gb:IRO125195 ary.last.push(body) # flag for open quote (/q="data...) if body =~ /="/ and body !~ /"$/ in_quote = true end # feature qualifier continued (...data..., ...data...") else ary.last.last << body # flag for closing quote (/q="data... lines ...") if body =~ /"$/ in_quote = false end end end ary.collect! do |subary| parse_qualifiers(subary) end @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility) end if block_given? @data['FEATURES'].each do |f| yield f end else @data['FEATURES'] end end # ORIGIN -- Returns contents of the ORIGIN record as a String. def origin unless @data['ORIGIN'] ori, seqstr = get('ORIGIN').split("\n", 2) seqstr ||= "" @data['ORIGIN'] = truncate(tag_cut(ori)) @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '') end @data['ORIGIN'] end ### private methods private def parse_qualifiers(ary) feature = Feature.new feature.feature = ary.shift feature.position = ary.shift.gsub(/\s/, '') ary.each do |f| if f =~ %r{/([^=]+)=?"?([^"]*)"?} qualifier, value = $1, $2 case qualifier when 'translation' value = Sequence::AA.new(value) when 'codon_start' value = value.to_i else value = true if value.empty? end feature.append(Feature::Qualifier.new(qualifier, value)) end end return feature end end # Common end # GenBank end # Bio bio-1.4.3.0001/lib/bio/db/genbank/ddbj.rb0000644000004100000410000000063312200110570017426 0ustar www-datawww-data# # = bio/db/genbank/ddbj.rb - DDBJ database class # # Copyright:: Copyright (C) 2000-2004 Toshiaki Katayama # License:: The Ruby License # require 'bio/db/genbank/genbank' module Bio class DDBJ < GenBank autoload :XML, 'bio/io/ddbjxml' autoload :REST, 'bio/io/ddbjrest' # Nothing to do (DDBJ database format is completely same as GenBank) end # DDBJ end # Bio bio-1.4.3.0001/lib/bio/db/genbank/genbank.rb0000644000004100000410000001013212200110570020123 0ustar www-datawww-data# # = bio/db/genbank/genbank.rb - GenBank database class # # Copyright:: Copyright (C) 2000-2005 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'date' require 'bio/db' require 'bio/db/genbank/common' require 'bio/sequence' require 'bio/sequence/dblink' module Bio # == Description # # Parses a GenBank formatted database entry # # == Example # # # entry is a string containing only one entry contents # gb = Bio::GenBank.new(entry) # class GenBank < NCBIDB include Bio::NCBIDB::Common # Parses the LOCUS line and returns contents of the LOCUS record # as a Bio::GenBank::Locus object. Locus object is created automatically # when Bio::GenBank#locus, entry_id etc. methods are called. class Locus def initialize(locus_line) if locus_line.empty? # do nothing (just for empty or incomplete entry string) elsif locus_line.length > 75 # after Rel 126.0 @entry_id = locus_line[12..27].strip @length = locus_line[29..39].to_i @strand = locus_line[44..46].strip @natype = locus_line[47..52].strip @circular = locus_line[55..62].strip @division = locus_line[63..66].strip @date = locus_line[68..78].strip else @entry_id = locus_line[12..21].strip @length = locus_line[22..29].to_i @strand = locus_line[33..35].strip @natype = locus_line[36..39].strip @circular = locus_line[42..51].strip @division = locus_line[52..54].strip @date = locus_line[62..72].strip end end attr_accessor :entry_id, :length, :strand, :natype, :circular, :division, :date end # Accessor methods for the contents of the LOCUS record. def locus @data['LOCUS'] ||= Locus.new(get('LOCUS')) end def entry_id; locus.entry_id; end def length; locus.length; end def circular; locus.circular; end def division; locus.division; end def date; locus.date; end def strand; locus.strand; end def natype; locus.natype; end # FEATURES -- Iterate only for the 'CDS' portion of the Bio::Features. def each_cds features.each do |feature| if feature.feature == 'CDS' yield(feature) end end end # FEATURES -- Iterate only for the 'gene' portion of the Bio::Features. def each_gene features.each do |feature| if feature.feature == 'gene' yield(feature) end end end # BASE COUNT (this field is obsoleted after GenBank release 138.0) -- # Returns the BASE COUNT as a Hash. When the base is specified, returns # count of the base as a Fixnum. The base can be one of 'a', 't', 'g', # 'c', and 'o' (others). def basecount(base = nil) unless @data['BASE COUNT'] hash = Hash.new(0) get('BASE COUNT').scan(/(\d+) (\w)/).each do |c, b| hash[b] = c.to_i end @data['BASE COUNT'] = hash end if base base.downcase! @data['BASE COUNT'][base] else @data['BASE COUNT'] end end # ORIGIN -- Returns DNA sequence in the ORIGIN record as a # Bio::Sequence::NA object. def seq unless @data['SEQUENCE'] origin end Bio::Sequence::NA.new(@data['SEQUENCE']) end alias naseq seq alias nalen length # (obsolete???) length of the sequence def seq_len seq.length end # modified date. Returns Date object, String or nil. def date_modified begin Date.parse(self.date) rescue ArgumentError, TypeError, NoMethodError, NameError self.date end end # Taxonomy classfication. Returns an array of strings. def classification self.taxonomy.to_s.sub(/\.\z/, '').split(/\s*\;\s*/) end # Strandedness. Returns one of 'single', 'double', 'mixed', or nil. def strandedness case self.strand.to_s.downcase when 'ss-'; 'single' when 'ds-'; 'double' when 'ms-'; 'mixed' else nil; end end # converts Bio::GenBank to Bio::Sequence # --- # *Arguments*: # *Returns*:: Bio::Sequence object def to_biosequence Bio::Sequence.adapter(self, Bio::Sequence::Adapter::GenBank) end end # GenBank end # Bio bio-1.4.3.0001/lib/bio/db/genbank/refseq.rb0000644000004100000410000000057412200110570020014 0ustar www-datawww-data# # = bio/db/genbank/refseq.rb - RefSeq database class # # Copyright:: Copyright (C) 2000-2004 Toshiaki Katayama # License:: The Ruby License # # $Id: refseq.rb,v 1.8 2007/04/05 23:35:40 trevor Exp $ # require 'bio/db/genbank/genbank' module Bio class RefSeq < GenBank # Nothing to do (RefSeq database format is completely same as GenBank) end end # Bio bio-1.4.3.0001/lib/bio/db/genbank/genbank_to_biosequence.rb0000644000004100000410000000340512200110570023214 0ustar www-datawww-data# # = bio/db/genbank/genbank_to_biosequence.rb - Bio::GenBank to Bio::Sequence adapter module # # Copyright:: Copyright (C) 2008 # Naohisa Goto , # License:: The Ruby License # # $Id:$ # require 'bio/sequence' require 'bio/sequence/adapter' # Internal use only. Normal users should not use this module. # # Bio::GenBank to Bio::Sequence adapter module. # It is internally used in Bio::GenBank#to_biosequence. # module Bio::Sequence::Adapter::GenBank extend Bio::Sequence::Adapter private def_biosequence_adapter :seq def_biosequence_adapter :id_namespace do |orig| if /\_/ =~ orig.accession.to_s then 'RefSeq' else 'GenBank' end end def_biosequence_adapter :entry_id def_biosequence_adapter :primary_accession, :accession def_biosequence_adapter :secondary_accessions do |orig| orig.accessions - [ orig.accession ] end def_biosequence_adapter :other_seqids do |orig| if /GI\:(.+)/ =~ orig.gi.to_s then [ Bio::Sequence::DBLink.new('GI', $1) ] else nil end end def_biosequence_adapter :molecule_type, :natype def_biosequence_adapter :division def_biosequence_adapter :topology, :circular def_biosequence_adapter :strandedness def_biosequence_adapter :sequence_version, :version #-- #sequence.date_created = nil #???? #++ def_biosequence_adapter :date_modified def_biosequence_adapter :definition def_biosequence_adapter :keywords def_biosequence_adapter :species, :organism def_biosequence_adapter :classification #-- #sequence.organelle = nil # yet unsupported #++ def_biosequence_adapter :comments, :comment def_biosequence_adapter :references def_biosequence_adapter :features end #module Bio::Sequence::Adapter::GenBank bio-1.4.3.0001/lib/bio/db/pdb.rb0000644000004100000410000000100512200110570015655 0ustar www-datawww-data# # = bio/db/pdb.rb - PDB database classes # # Copyright:: Copyright (C) 2004 # GOTO Naohisa # License:: The Ruby License # # # definition of the PDB class module Bio class PDB autoload :ChemicalComponent, 'bio/db/pdb/chemicalcomponent' end #class PDB end #module Bio # require other files under pdb directory require 'bio/db/pdb/utils' require 'bio/db/pdb/atom' require 'bio/db/pdb/residue' require 'bio/db/pdb/chain' require 'bio/db/pdb/model' require 'bio/db/pdb/pdb' bio-1.4.3.0001/lib/bio/db/newick.rb0000644000004100000410000002762212200110570016405 0ustar www-datawww-data# # = bio/db/newick.rb - Newick Standard phylogenetic tree parser / formatter # # Copyright:: Copyright (C) 2004-2006 # Naohisa Goto # Daniel Amelang # License:: The Ruby License # # # == Description # # This file contains parser and formatter of Newick and NHX. # # == References # # * http://evolution.genetics.washington.edu/phylip/newick_doc.html # * http://www.phylosoft.org/forester/NHX.html # require 'strscan' require 'bio/tree' module Bio #--- # newick parser #+++ # Newick standard phylogenetic tree parser class. # # This is alpha version. Incompatible changes may be made frequently. class Newick # delemiter of the entry DELIMITER = RS = ";" # parse error class class ParseError < RuntimeError; end # same as Bio::Tree::Edge Edge = Bio::Tree::Edge # same as Bio::Tree::Node Node = Bio::Tree::Node # Creates a new Newick object. # _options_ for parsing can be set. # # Available options: # :bootstrap_style:: # :traditional for traditional bootstrap style, # :molphy for molphy style, # :disabled to ignore bootstrap strings. # For details of default actions, please read the notes below. # :parser:: # :naive for using naive parser, compatible with # BioRuby 1.1.0, which ignores quoted strings and # do not convert underscores to spaces. # # Notes for bootstrap style: # Molphy-style bootstrap values may always be parsed, even if # the options[:bootstrap_style] is set to # :traditional or :disabled. # # Note for default or traditional bootstrap style: # By default, if all of the internal node's names are numeric # and there are no NHX and no molphy-style boostrap values, # the names of internal nodes are regarded as bootstrap values. # options[:bootstrap_style] = :disabled or :molphy # to disable the feature (or at least one NHX tag exists). def initialize(str, options = nil) str = str.sub(/\;(.*)/m, ';') @original_string = str @entry_overrun = $1 @options = (options or {}) end # parser options # (in some cases, options can be automatically set by the parser) attr_reader :options # original string before parsing attr_reader :original_string # string after this entry attr_reader :entry_overrun # Gets the tree. # Returns a Bio::Tree object. def tree if !defined?(@tree) @tree = __parse_newick(@original_string, @options) else @tree end end # Re-parses the tree from the original string. # Returns self. # This method is useful after changing parser options. def reparse if defined?(@tree) remove_instance_variable(:@tree) end self.tree self end private # gets a option def __get_option(key, options) options[key] or (@options ? @options[key] : nil) end # Parses newick formatted leaf (or internal node) name. def __parse_newick_leaf(leaf_tokens, node, edge, options) t = leaf_tokens.shift if !t.kind_of?(Symbol) then node.name = t t = leaf_tokens.shift end if t == :':' then t = leaf_tokens.shift if !t.kind_of?(Symbol) then edge.distance_string = t if t and !(t.strip.empty?) t = leaf_tokens.shift end end if t == :'[' then btokens = leaf_tokens case __get_option(:original_format, options) when :nhx # regarded as NHX string which might be broken __parse_nhx(btokens, node, edge) when :traditional # simply ignored else case btokens[0].to_s.strip when '' # not automatically determined when /\A\&\&NHX/ # NHX string # force to set NHX mode @options[:original_format] = :nhx __parse_nhx(btokens, node, edge) else # Molphy-style boostrap values # let molphy mode if nothing determined @options[:original_format] ||= :molphy bstr = '' while t = btokens.shift and t != :']' bstr.concat t.to_s end node.bootstrap_string = bstr end #case btokens[0] end end if !btokens and !leaf_tokens.empty? then # syntax error? end node.name ||= '' # compatibility for older BioRuby # returns true true end # Parses NHX (New Hampshire eXtended) string def __parse_nhx(btokens, node, edge) btokens.shift if btokens[0] == '&&NHX' btokens.each do |str| break if str == :']' next if str.kind_of?(Symbol) tag, val = str.split(/\=/, 2) case tag when 'B' node.bootstrap_string = val when 'D' case val when 'Y' node.events.push :gene_duplication when 'N' node.events.push :speciation end when 'E' node.ec_number = val when 'L' edge.log_likelihood = val.to_f when 'S' node.scientific_name = val when 'T' node.taxonomy_id = val when 'W' edge.width = val.to_i when 'XB' edge.nhx_parameters[:XB] = val when 'O', 'SO' node.nhx_parameters[tag.to_sym] = val.to_i else # :Co, :SN, :Sw, :XN, and others node.nhx_parameters[tag.to_sym] = val end end #each true end # splits string to tokens def __parse_newick_tokenize(str, options) str = str.chop if str[-1..-1] == ';' # http://evolution.genetics.washington.edu/phylip/newick_doc.html # quoted_label ==> ' string_of_printing_characters ' # single quote in quoted_label is '' (two single quotes) # if __get_option(:parser, options) == :naive then ary = str.split(/([\(\)\,\:\[\]])/) ary.collect! { |x| x.strip!; x.empty? ? nil : x } ary.compact! ary.collect! do |x| if /\A([\(\)\,\:\[\]])\z/ =~ x then x.intern else x end end return ary end tokens = [] ss = StringScanner.new(str) while !(ss.eos?) if ss.scan(/\s+/) then # do nothing elsif ss.scan(/[\(\)\,\:\[\]]/) then # '(' or ')' or ',' or ':' or '[' or ']' t = ss.matched tokens.push t.intern elsif ss.scan(/\'/) then # quoted_label t = '' while true if ss.scan(/([^\']*)\'/) then t.concat ss[1] if ss.scan(/\'/) then # single quote in quoted_label t.concat ss.matched else break end else # incomplete quoted_label? break end end #while true unless ss.match?(/\s*[\(\)\,\:\[\]]/) or ss.match?(/\s*\z/) then # label continues? (illegal, but try to rescue) if ss.scan(/[^\(\)\,\:\[\]]+/) then t.concat ss.matched.lstrip end end tokens.push t elsif ss.scan(/[^\(\)\,\:\[\]]+/) then # unquoted_label t = ss.matched.strip t.gsub!(/[\r\n]/, '') # unquoted underscore should be converted to blank t.gsub!(/\_/, ' ') tokens.push t unless t.empty? else # unquoted_label in end of string t = ss.rest.strip t.gsub!(/[\r\n]/, '') # unquoted underscore should be converted to blank t.gsub!(/\_/, ' ') tokens.push t unless t.empty? ss.terminate end end #while !(ss.eos?) tokens end # get tokens for a leaf def __parse_newick_get_tokens_for_leaf(ary) r = [] while t = ary[0] and t != :',' and t != :')' and t != :'(' r.push ary.shift end r end # Parses newick formatted string. def __parse_newick(str, options = {}) # initializing root = Node.new cur_node = root edges = [] nodes = [ root ] internal_nodes = [] node_stack = [] # preparation of tokens ary = __parse_newick_tokenize(str, options) previous_token = nil # main loop while token = ary.shift #p token case token when :',' if previous_token == :',' or previous_token == :'(' then # there is a leaf whose name is empty. ary.unshift(token) ary.unshift('') token = nil end when :'(' node = Node.new nodes << node internal_nodes << node node_stack.push(cur_node) cur_node = node when :')' if previous_token == :',' or previous_token == :'(' then # there is a leaf whose name is empty. ary.unshift(token) ary.unshift('') token = nil else edge = Edge.new leaf_tokens = __parse_newick_get_tokens_for_leaf(ary) token = nil if leaf_tokens.size > 0 then __parse_newick_leaf(leaf_tokens, cur_node, edge, options) end parent = node_stack.pop raise ParseError, 'unmatched parentheses' unless parent edges << Bio::Relation.new(parent, cur_node, edge) cur_node = parent end else leaf = Node.new edge = Edge.new ary.unshift(token) leaf_tokens = __parse_newick_get_tokens_for_leaf(ary) token = nil __parse_newick_leaf(leaf_tokens, leaf, edge, options) nodes << leaf edges << Bio::Relation.new(cur_node, leaf, edge) end #case previous_token = token end #while raise ParseError, 'unmatched parentheses' unless node_stack.empty? bsopt = __get_option(:bootstrap_style, options) ofmt = __get_option(:original_format, options) unless bsopt == :disabled or bsopt == :molphy or ofmt == :nhx or ofmt == :molphy then # If all of the internal node's names are numeric, # the names are regarded as bootstrap values. flag = false internal_nodes.each do |inode| if inode.name and !inode.name.to_s.strip.empty? then if /\A[\+\-]?\d*\.?\d*\z/ =~ inode.name flag = true else flag = false break end end end if flag then @options[:bootstrap_style] = :traditional @options[:original_format] = :traditional internal_nodes.each do |inode| if inode.name then inode.bootstrap_string = inode.name inode.name = nil end end end end # Sets nodes order numbers nodes.each_with_index do |xnode, i| xnode.order_number = i end # If the root implicitly prepared by the program is a leaf and # there are no additional information for the edge from the root to # the first internal node, the root is removed. if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and rel.relation.instance_eval { !defined?(@distance) and !defined?(@log_likelihood) and !defined?(@width) and !defined?(@nhx_parameters) } and edges.find_all { |x| x.node.include?(root) }.size == 1 nodes.shift edges.pop end # Let the tree into instance variables tree = Bio::Tree.new tree.instance_eval { @pathway.relations.concat(edges) @pathway.to_list } tree.root = nodes[0] tree.options.update(@options) tree end end #class Newick end #module Bio bio-1.4.3.0001/lib/bio/db/fasta/0000755000004100000410000000000012200110570015665 5ustar www-datawww-databio-1.4.3.0001/lib/bio/db/fasta/fasta_to_biosequence.rb0000644000004100000410000000301412200110570022372 0ustar www-datawww-data# # = bio/db/fasta/fasta_to_biosequence.rb - Bio::FastaFormat to Bio::Sequence adapter module # # Copyright:: Copyright (C) 2008 # Naohisa Goto , # License:: The Ruby License # # $Id:$ # require 'bio/sequence' require 'bio/sequence/adapter' # Internal use only. Normal users should not use this module. # # Bio::FastaFormat to Bio::Sequence adapter module. # It is internally used in Bio::FastaFormat#to_biosequence. # module Bio::Sequence::Adapter::FastaFormat extend Bio::Sequence::Adapter private def_biosequence_adapter :seq # primary accession def_biosequence_adapter :primary_accession do |orig| orig.identifiers.accessions.first or orig.identifiers.entry_id end # secondary accessions def_biosequence_adapter :secondary_accessions do |orig| orig.identifiers.accessions[1..-1] end # entry_id def_biosequence_adapter :entry_id do |orig| orig.identifiers.locus or orig.identifiers.accessions.first or orig.identifiers.entry_id end # NCBI GI is stored on other_seqids def_biosequence_adapter :other_seqids do |orig| other = [] if orig.identifiers.gi then other.push Bio::Sequence::DBLink.new('GI', orig.identifiers.gi) end other.empty? ? nil : other end # definition def_biosequence_adapter :definition do |orig| if orig.identifiers.accessions.empty? and !(orig.identifiers.gi) then orig.definition else orig.identifiers.description end end end #module Bio::Sequence::Adapter::FastaFormat bio-1.4.3.0001/lib/bio/db/fasta/qual.rb0000644000004100000410000000657712200110570017173 0ustar www-datawww-data# # = bio/db/fasta/qual.rb - Qual format, FASTA formatted numeric entry # # Copyright:: Copyright (C) 2001, 2002, 2009 # Naohisa Goto , # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # == Description # # QUAL format, FASTA formatted numeric entry. # # == Examples # # See documents of Bio::FastaNumericFormat class. # # == References # # * FASTA format (WikiPedia) # http://en.wikipedia.org/wiki/FASTA_format # # * Phred quality score (WikiPedia) # http://en.wikipedia.org/wiki/Phred_quality_score # # * Fasta format description (NCBI) # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml # require 'bio/db/fasta' module Bio # Treats a FASTA formatted numerical entry, such as: # # >id and/or some comments <== comment line # 24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data # 22 17 15 25 27 32 26 32 29 29 25 # # The precedent '>' can be omitted and the trailing '>' will be removed # automatically. # # --- Bio::FastaNumericFormat.new(entry) # # Stores the comment and the list of the numerical data. # # --- Bio::FastaNumericFormat#definition # # The comment line of the FASTA formatted data. # # * FASTA format (Wikipedia) # http://en.wikipedia.org/wiki/FASTA_format # # * Phred quality score (WikiPedia) # http://en.wikipedia.org/wiki/Phred_quality_score # class FastaNumericFormat < FastaFormat # Returns the list of the numerical data (typically the quality score # of its corresponding sequence) as an Array. # --- # *Returns*:: (Array containing Integer) numbers def data unless defined?(@list) @list = @data.strip.split(/\s+/).map {|x| x.to_i} end @list end # Returns the number of elements in the numerical data, # which will be the same of its corresponding sequence length. # --- # *Returns*:: (Integer) the number of elements def length data.length end # Yields on each elements of the numerical data. # --- # *Yields*:: (Integer) a numerical data element # *Returns*:: (undefined) def each data.each do |x| yield x end end # Returns the n-th element. If out of range, returns nil. # --- # *Arguments*: # * (required) _n_: (Integer) position # *Returns*:: (Integer or nil) the value def [](n) data[n] end # Returns the data as a Bio::Sequence object. # In the returned sequence object, the length of the sequence is zero, # and the numeric data is stored to the Bio::Sequence#quality_scores # attirbute. # # Because the meaning of the numeric data is unclear, # Bio::Sequence#quality_score_type is not set by default. # # Note: If you modify the returned Bio::Sequence object, # the sequence or definition in this FastaNumericFormat object # might also be changed (but not always be changed) # because of efficiency. # # --- # *Arguments*: # *Returns*:: (Bio::Sequence) sequence object def to_biosequence s = Bio::Sequence.adapter(self, Bio::Sequence::Adapter::FastaNumericFormat) s.seq = Bio::Sequence::Generic.new('') s end alias to_seq to_biosequence undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen end #class FastaNumericFormat end #module Bio bio-1.4.3.0001/lib/bio/db/fasta/qual_to_biosequence.rb0000644000004100000410000000136612200110570022246 0ustar www-datawww-data# # = bio/db/fasta/qual_to_biosequence.rb - Bio::FastaNumericFormat to Bio::Sequence adapter module # # Copyright:: Copyright (C) 2010 # Naohisa Goto # License:: The Ruby License # require 'bio/sequence' require 'bio/sequence/adapter' require 'bio/db/fasta/fasta_to_biosequence' # Internal use only. Normal users should not use this module. # # Bio::FastaNumericFormat to Bio::Sequence adapter module. # It is internally used in Bio::FastaNumericFormat#to_biosequence. # module Bio::Sequence::Adapter::FastaNumericFormat extend Bio::Sequence::Adapter include Bio::Sequence::Adapter::FastaFormat private def_biosequence_adapter :quality_scores, :data end #module Bio::Sequence::Adapter::FastaNumericFormat bio-1.4.3.0001/lib/bio/db/fasta/defline.rb0000644000004100000410000004074612200110570017633 0ustar www-datawww-data# # = bio/db/fasta/defline.rb - FASTA defline parser class # # Copyright:: Copyright (C) 2001, 2002 # GOTO Naohisa , # Toshiaki Katayama # License:: The Ruby License # # $Id: defline.rb,v 1.1.2.1 2008/06/20 13:22:32 ngoto Exp $ # # == Description # # Bio::FastaDefline is a parser class for definition line (defline) # of the FASTA format. # # == Examples # # rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]') # rub.entry_id ==> 'gi|671595' # rub.get('emb') ==> 'CAA85678.1' # rub.emb ==> 'CAA85678.1' # rub.gi ==> '671595' # rub.accession ==> 'CAA85678' # rub.accessions ==> [ 'CAA85678' ] # rub.acc_version ==> 'CAA85678.1' # rub.locus ==> nil # rub.list_ids ==> [["gi", "671595"], # ["emb", "CAA85678.1", nil], # ["Perovskia abrotanoides"]] # # ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]") # ckr.entry_id ==> "gi|2495000" # ckr.sp ==> "CCKR_CAVPO" # ckr.pir ==> "I51898" # ckr.gb ==> "AAB29504.1" # ckr.gi ==> "2495000" # ckr.accession ==> "AAB29504" # ckr.accessions ==> ["Q63931", "AAB29504"] # ckr.acc_version ==> "AAB29504.1" # ckr.locus ==> nil # ckr.description ==> # "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)" # ckr.descriptions ==> # ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", # "cholecystokinin A receptor - guinea pig", # "cholecystokinin A receptor; CCK-A receptor [Cavia]"] # ckr.words ==> # ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", # "receptor", "type"] # ckr.id_strings ==> # ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", # "544724", "AAB29504.1", "Cavia"] # ckr.list_ids ==> # [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"], # ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"], # ["gb", "AAB29504.1", nil], ["Cavia"]] # # == References # # * FASTA format (WikiPedia) # http://en.wikipedia.org/wiki/FASTA_format # # * Fasta format description (NCBI) # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml # module Bio #-- # split from fasta.rb revision 1.28 #++ # Parsing FASTA Defline, and extract IDs and other informations. # IDs are NSIDs (NCBI standard FASTA sequence identifiers) # or ":"-separated IDs. # # specs are described in: # ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers # # === Examples # # rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]') # rub.entry_id ==> 'gi|671595' # rub.get('emb') ==> 'CAA85678.1' # rub.emb ==> 'CAA85678.1' # rub.gi ==> '671595' # rub.accession ==> 'CAA85678' # rub.accessions ==> [ 'CAA85678' ] # rub.acc_version ==> 'CAA85678.1' # rub.locus ==> nil # rub.list_ids ==> [["gi", "671595"], # ["emb", "CAA85678.1", nil], # ["Perovskia abrotanoides"]] # # ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]") # ckr.entry_id ==> "gi|2495000" # ckr.sp ==> "CCKR_CAVPO" # ckr.pir ==> "I51898" # ckr.gb ==> "AAB29504.1" # ckr.gi ==> "2495000" # ckr.accession ==> "AAB29504" # ckr.accessions ==> ["Q63931", "AAB29504"] # ckr.acc_version ==> "AAB29504.1" # ckr.locus ==> nil # ckr.description ==> # "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)" # ckr.descriptions ==> # ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", # "cholecystokinin A receptor - guinea pig", # "cholecystokinin A receptor; CCK-A receptor [Cavia]"] # ckr.words ==> # ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", # "receptor", "type"] # ckr.id_strings ==> # ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", # "544724", "AAB29504.1", "Cavia"] # ckr.list_ids ==> # [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"], # ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"], # ["gb", "AAB29504.1", nil], ["Cavia"]] # # === References # # * Fasta format description (NCBI) # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml # # * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.) # (Dead link. Please find in http://web.archive.org/ ). # http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers # # * Program Parameters for formatdb and fastacmd (by Tao Tao) # http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/formatdb_fastacmd.html#t1.1 # # * Formatdb README # ftp://ftp.ncbi.nih.gov/blast/documents/formatdb.html # class FastaDefline NSIDs = { # NCBI and WU-BLAST 'gi' => [ 'gi' ], # NCBI GI 'gb' => [ 'acc_version', 'locus' ], # GenBank 'emb' => [ 'acc_version', 'locus' ], # EMBL 'dbj' => [ 'acc_version', 'locus' ], # DDBJ 'sp' => [ 'accession', 'entry_id' ], # SWISS-PROT 'tr' => [ 'accession', 'entry_id' ], # TREMBL 'pdb' => [ 'entry_id', 'chain' ], # PDB 'bbs' => [ 'number' ], # GenInfo Backbone Id 'gnl' => [ 'database' , 'entry_id' ], # General database identifier 'ref' => [ 'acc_version' , 'locus' ], # NCBI Reference Sequence 'lcl' => [ 'entry_id' ], # Local Sequence identifier # WU-BLAST and NCBI 'pir' => [ 'accession', 'entry_id' ], # PIR 'prf' => [ 'accession', 'entry_id' ], # Protein Research Foundation 'pat' => [ 'country', 'number', 'serial' ], # Patents # WU-BLAST only 'bbm' => [ 'number' ], # NCBI GenInfo Backbone database identifier 'gim' => [ 'number' ], # NCBI GenInfo Import identifier 'gp' => [ 'acc_version', 'locus' ], # GenPept 'oth' => [ 'accession', 'name', 'release' ], # Other (user-definable) identifier 'tpd' => [ 'accession', 'name' ], # Third party annotation, DDBJ 'tpe' => [ 'accession', 'name' ], # Third party annotation, EMBL 'tpg' => [ 'accession', 'name' ], # Third party annotation, GenBank # Original 'ri' => [ 'entry_id', 'rearray_id', 'len' ], # RIKEN FANTOM DB } # Shows array that contains IDs (or ID-like strings). # Returns an array of arrays of strings. attr_reader :list_ids # Shows a possibly unique identifier. # Returns a string. attr_reader :entry_id # Parses given string. def initialize(str) @deflines = [] @info = {} @list_ids = [] @entry_id = nil lines = str.split("\x01") lines.each do |line| add_defline(line) end end #def initialize # Parses given string and adds parsed data. def add_defline(str) case str when /^\>?\s*((?:[^\|\s]*\|)+[^\s]+)\s*(.*)$/ # NSIDs # examples: # >gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P # # note: regexp (:?) means grouping without backreferences i = $1 d = $2 tks = i.split('|') tks << '' if i[-1,1] == '|' a = parse_NSIDs(tks) i = a[0].join('|') a.unshift('|') d = tks.join('|') + ' ' + d unless tks.empty? a << d this_line = a match_EC(d) parse_square_brackets(d).each do |x| if !match_EC(x, false) and x =~ /\A[A-Z]/ then di = [ x ] @list_ids << di @info['organism'] = x unless @info['organism'] end end when /^\>?\s*([a-zA-Z0-9]+\:[^\s]+)\s*(.*)$/ # examples: # >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST] # >emb:CACDC28 [X80034] C.albicans CDC28 gene i = $1 d = $2 a = parse_ColonSepID(i) i = a.join(':') this_line = [ ':', a , d ] match_EC(d) parse_square_brackets(d).each do |x| if !match_EC(x, false) and x =~ /:/ then parse_ColonSepID(x) elsif x =~ /\A\s*([A-Z][A-Z0-9_\.]+)\s*\z/ then @list_ids << [ $1 ] end end when /^\>?\s*(\S+)(?:\s+(.+))?$/ # examples: # >ABC12345 this is test i = $1 d = $2.to_s @list_ids << [ i.chomp('.') ] this_line = [ '', [ i ], d ] match_EC(d) else i = str d = '' match_EC(i) this_line = [ '', [ i ], d ] end @deflines << this_line @entry_id = i unless @entry_id end def match_EC(str, write_flag = true) di = nil str.scan(/EC\:((:?[\-\d]+\.){3}(:?[\-\d]+))/i) do |x| di = [ 'EC', $1 ] if write_flag then @info['ec'] = di[1] if (!@info['ec'] or @info['ec'].to_s =~ /\-/) @list_ids << di end end di end private :match_EC def parse_square_brackets(str) r = [] str.scan(/\[([^\]]*)\]/) do |x| r << x[0] end r end private :parse_square_brackets def parse_ColonSepID(str) di = str.split(':', 2) di << nil if di.size <= 1 @list_ids << di di end private :parse_ColonSepID def parse_NSIDs(ary) # this method destroys ary data = [] while token = ary.shift if labels = self.class::NSIDs[token] then di = [ token ] idtype = token labels.each do |x| token = ary.shift break unless token if self.class::NSIDs[token] then ary.unshift(token) break #each end if token.length > 0 then di << token else di << nil end end data << di else if token.length > 0 then # UCID (uncontrolled identifiers) di = [ token ] data << di @info['ucid'] = token unless @info['ucid'] end break #while end end #while @list_ids.concat data data end #def parse_NSIDs private :parse_NSIDs # Shows original string. # Note that the result of this method may be different from # original string which is given in FastaDefline.new method. def to_s @deflines.collect { |a| s = a[0] (a[1..-2].collect { |x| x.join(s) }.join(s) + ' ' + a[-1]).strip }.join("\x01") end # Shows description. def description @deflines[0].to_a[-1] end # Returns descriptions. def descriptions @deflines.collect do |a| a[-1] end end # Shows ID-like strings. # Returns an array of strings. def id_strings r = [] @list_ids.each do |a| if a.size >= 2 then r.concat a[1..-1].find_all { |x| x } else if a[0].to_s.size > 0 and a[0] =~ /\A[A-Za-z0-9\.\-\_]+\z/ r << a[0] end end end r.concat( words(true, []).find_all do |x| x =~ /\A[A-Z][A-Za-z0-9\_]*[0-9]+[A-Za-z0-9\_]+\z/ or x =~ /\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/ end) r end KillWords = [ 'an', 'the', 'this', 'that', 'is', 'are', 'were', 'was', 'be', 'can', 'may', 'might', 'as', 'at', 'by', 'for', 'in', 'of', 'on', 'to', 'with', 'from', 'and', 'or', 'not', 'dna', 'rna', 'mrna', 'cdna', 'orf', 'aa', 'nt', 'pct', 'id', 'ec', 'sp', 'subsp', 'similar', 'involved', 'identical', 'identity', 'cds', 'clone', 'library', 'contig', 'contigs', 'homolog', 'homologue', 'homologs', 'homologous', 'protein', 'proteins', 'gene', 'genes', 'product', 'products', 'sequence', 'sequences', 'strain', 'strains', 'region', 'regions', ] KillWordsHash = {} KillWords.each { |x| KillWordsHash[x] = true } KillRegexpArray = [ /\A\d{1,3}\%?\z/, /\A[A-Z][A-Za-z0-9\_]*[0-9]+[A-Za-z0-9\_]+\z/, /\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/ ] # Shows words used in the defline. Returns an Array. def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray, kwhash = self.class::KillWordsHash) a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/) a.collect! do |x| x.sub!(/\A[\$\*\-\+]+/, '') x.sub!(/[\$\*\-\=]+\z/, '') if x.size <= 1 then nil elsif kwhash[x.downcase] then nil else if kill_regexp.find { |expr| expr =~ x } then nil else x end end end a.compact! a.collect! { |x| x.downcase } unless case_sensitive a.sort! a.uniq! a end # Returns identifires by a database name. def get(dbname) db = dbname.to_s r = nil unless r = @info[db] then di = @list_ids.find { |x| x[0] == db.to_s } if di and di.size <= 2 then r = di[-1] elsif di then labels = self.class::NSIDs[db] [ 'acc_version', 'entry_id', 'locus', 'accession', 'number'].each do |x| if i = labels.index(x) then r = di[i+1] break if r end end r = di[1..-1].find { |x| x } unless r end @info[db] = r if r end r end # Returns an identifier by given type. def get_by_type(type_str) @list_ids.each do |x| if labels = self.class::NSIDs[x[0]] then if i = labels.index(type_str) then return x[i+1] end end end nil end # Returns identifiers by given type. def get_all_by_type(*type_strarg) d = [] @list_ids.each do |x| if labels = self.class::NSIDs[x[0]] then type_strarg.each do |y| if i = labels.index(y) then d << x[i+1] if x[i+1] end end end end d end # Shows locus. # If the entry has more than two of such IDs, # only the first ID are shown. # Returns a string or nil. def locus unless defined?(@locus) @locus = get_by_type('locus') end @locus end # Shows GI. # If the entry has more than two of such IDs, # only the first ID are shown. # Returns a string or nil. def gi unless defined?(@gi) then @gi = get_by_type('gi') end @gi end # Shows accession with version number. # If the entry has more than two of such IDs, # only the first ID are shown. # Returns a string or nil. def acc_version unless defined?(@acc_version) then @acc_version = get_by_type('acc_version') end @acc_version end # Shows accession numbers. # Returns an array of strings. def accessions unless defined?(@accessions) then @accessions = get_all_by_type('accession', 'acc_version') @accessions.collect! { |x| x.sub(/\..*\z/, '') } end @accessions end # Shows an accession number. def accession unless defined?(@accession) then if acc_version then @accession = acc_version.split('.')[0] else @accession = accessions[0] end end @accession end def method_missing(name, *args) # raise ArgumentError, # "wrong # of arguments(#{args.size} for 1)" if args.size >= 2 r = get(name, *args) if !r and !(self.class::NSIDs[name.to_s]) then raise "NameError: undefined method `#{name.inspect}'" end r end end #class FastaDefline end #module Bio bio-1.4.3.0001/lib/bio/db/fasta/format_fasta.rb0000644000004100000410000000547712200110570020675 0ustar www-datawww-data# # = bio/db/fasta/format_fasta.rb - Fasta format generater # # Copyright:: Copyright (C) 2006-2008 # Toshiaki Katayama , # Naohisa Goto , # Jan Aerts # License:: The Ruby License # module Bio::Sequence::Format::Formatter # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS. # Simple Fasta format output class for Bio::Sequence. class Fasta < Bio::Sequence::Format::FormatterBase # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. # # Creates a new Fasta format generater object from the sequence. # # --- # *Arguments*: # * _sequence_: Bio::Sequence object # * (optional) :header => _header_: String (default nil) # * (optional) :width => _width_: Fixnum (default 70) def initialize; end if false # dummy for RDoc # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. # # Output the FASTA format string of the sequence. # # Currently, this method is used in Bio::Sequence#output like so, # # s = Bio::Sequence.new('atgc') # puts s.output(:fasta) #=> "> \natgc\n" # --- # *Returns*:: String object def output header = @options[:header] width = @options.has_key?(:width) ? @options[:width] : 70 seq = @sequence.seq entry_id = @sequence.entry_id || "#{@sequence.primary_accession}.#{@sequence.sequence_version}" definition = @sequence.definition header ||= "#{entry_id} #{definition}" ">#{header}\n" + if width seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n") else seq.to_s + "\n" end end end #class Fasta # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS. # NCBI-Style Fasta format output class for Bio::Sequence. # (like "ncbi" format in EMBOSS) # # Note that this class is under construction. class Fasta_ncbi < Bio::Sequence::Format::FormatterBase # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. # # Output the FASTA format string of the sequence. # # Currently, this method is used in Bio::Sequence#output like so, # # s = Bio::Sequence.new('atgc') # puts s.output(:ncbi) #=> "> \natgc\n" # --- # *Returns*:: String object def output width = 70 seq = @sequence.seq #gi = @sequence.gi_number dbname = 'lcl' if @sequence.primary_accession.to_s.empty? then idstr = @sequence.entry_id else idstr = "#{@sequence.primary_accession}.#{@sequence.sequence_version}" end definition = @sequence.definition header = "#{dbname}|#{idstr} #{definition}" ">#{header}\n" + seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n") end end #class Ncbi end #module Bio::Sequence::Format::Formatter bio-1.4.3.0001/lib/bio/db/fasta/format_qual.rb0000644000004100000410000001413012200110570020523 0ustar www-datawww-data# # = bio/db/fasta/format_qual.rb - Qual format and FastaNumericFormat generater # # Copyright:: Copyright (C) 2009 # Naohisa Goto # License:: The Ruby License # module Bio::Sequence::Format::Formatter # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS. # Simple FastaNumeric format output class for Bio::Sequence. class Fasta_numeric < Bio::Sequence::Format::FormatterBase # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. # # Creates a new FastaNumericFormat generater object from the sequence. # # It does not care whether the content of the quality score is # consistent with the sequence or not, e.g. it does not check # length of the quality score. # # --- # *Arguments*: # * _sequence_: Bio::Sequence object # * (optional) :header => _header_: (String) (default nil) # * (optional) :width => _width_: (Fixnum) (default 70) def initialize; end if false # dummy for RDoc # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. # # Output the FASTA format string of the sequence. # # Currently, this method is used in Bio::Sequence#output like so, # # s = Bio::Sequence.new('atgc') # s.quality_scores = [ 70, 80, 90, 100 ] # puts s.output(:fasta_numeric) # --- # *Returns*:: String object def output header = @options[:header] width = @options.has_key?(:width) ? @options[:width] : 70 seq = @sequence.seq.to_s entry_id = @sequence.entry_id || "#{@sequence.primary_accession}.#{@sequence.sequence_version}" definition = @sequence.definition header ||= "#{entry_id} #{definition}" sc = fastanumeric_quality_scores(seq) if width then if width <= 0 then main = sc.join("\n") else len = 0 main = sc.collect do |x| str = (len == 0) ? "#{x}" : " #{x}" len += str.size if len > width then len = "#{x}".size str = "\n#{x}" end str end.join('') end else main = sc.join(' ') end ">#{header}\n#{main}\n" end private def fastanumeric_quality_scores(seq) @sequence.quality_scores || [] end end #class Fasta_numeric # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS. # Simple Qual format (sequence quality) output class for Bio::Sequence. class Qual < Fasta_numeric # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. # # Creates a new Qual format generater object from the sequence. # # The only difference from Fastanumeric is that Qual outputs # Phred score by default, and data conversion will be performed # if needed. Output score type can be changed by the # ":quality_score_type" option. # # If the sequence have no quality score type information # and no error probabilities, but the score exists, # the score is regarded as :phred (Phred score). # # --- # *Arguments*: # * _sequence_: Bio::Sequence object # * (optional) :header => _header_: (String) (default nil) # * (optional) :width => _width_: (Fixnum) (default 70) # * (optional) :quality_score_type => _type_: (Symbol) (default nil) # * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities (default 0) def initialize; end if false # dummy for RDoc private def fastanumeric_quality_scores(seq) qsc = qual_quality_scores(seq) if qsc.size > seq.length then qsc = qsc[0, seq.length] elsif qsc.size < seq.length then padding = @options[:default_score] || 0 psize = seq.length - qsc.size qsc += Array.new(psize, padding) end qsc end def qual_quality_scores(seq) return [] if seq.length <= 0 # get output quality score type fmt = @options[:quality_score_type] qsc = @sequence.quality_scores qsc_type = @sequence.quality_score_type # checks if no need to convert if qsc and qsc_type == fmt and qsc.size >= seq.length then return qsc end # default output quality score type is :phred fmt ||= :phred # If quality score type of the sequence is nil, implicitly # regarded as :phred. qsc_type ||= :phred # checks error_probabilities ep = @sequence.error_probabilities if ep and ep.size >= seq.length then case fmt when :phred return Bio::Sequence::QualityScore::Phred.p2q(ep[0, seq.length]) when :solexa return Bio::Sequence::QualityScore::Solexa.p2q(ep[0, seq.length]) end end # Checks if scores can be converted. if qsc and qsc.size >= seq.length then case [ qsc_type, fmt ] when [ :phred, :solexa ] return Bio::Sequence::QualityScore::Phred.convert_scores_to_solexa(qsc[0, seq.length]) when [ :solexa, :phred ] return Bio::Sequence::QualityScore::Solexa.convert_scores_to_phred(qsc[0, seq.length]) end end # checks quality scores type case qsc_type when :phred, :solexa #does nothing else qsc_type = nil qsc = nil end # collects piece of information qsc_cov = qsc ? qsc.size.quo(seq.length) : 0 ep_cov = ep ? ep.size.quo(seq.length) : 0 if qsc_cov > ep_cov then case [ qsc_type, fmt ] when [ :phred, :phred ], [ :solexa, :solexa ] return qsc when [ :phred, :solexa ] return Bio::Sequence::QualityScore::Phred.convert_scores_to_solexa(qsc) when [ :solexa, :phred ] return Bio::Sequence::QualityScore::Solexa.convert_scores_to_phred(qsc) end elsif ep_cov > qsc_cov then case fmt when :phred return Bio::Sequence::QualityScore::Phred.p2q(ep) when :solexa return Bio::Sequence::QualityScore::Solexa.p2q(ep) end end # if no information, returns empty array return [] end end #class Qual end #module Bio::Sequence::Format::Formatter bio-1.4.3.0001/lib/bio/db/fasta.rb0000644000004100000410000002121412200110570016212 0ustar www-datawww-data# # = bio/db/fasta.rb - FASTA format class # # Copyright:: Copyright (C) 2001, 2002 # Naohisa Goto , # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # == Description # # FASTA format class. # # == Examples # # See documents of Bio::FastaFormat class. # # == References # # * FASTA format (WikiPedia) # http://en.wikipedia.org/wiki/FASTA_format # # * Fasta format description (NCBI) # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml # require 'bio/db' require 'bio/sequence' require 'bio/sequence/dblink' require 'bio/db/fasta/defline' module Bio # Treats a FASTA formatted entry, such as: # # >id and/or some comments <== definition line # ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines # ATGCATGCATGCATGCATGCATGCATGCATGCATGC # ATGCATGCATGC # # The precedent '>' can be omitted and the trailing '>' will be removed # automatically. # # === Examples # # fasta_string = <gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c] # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI # VRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ # NLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP # IFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP # INRISARRAAIHPYFQES # END_OF_STRING # # f = Bio::FastaFormat.new(fasta_string) # # f.entry #=> ">gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]\n"+ # # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI\n"+ # # VRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ\n"+ # # NLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP\n"+ # # IFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP\n"+ # # INRISARRAAIHPYFQES" # # ==== Methods related to the name of the sequence # # A larger range of methods for dealing with Fasta definition lines can be found in FastaDefline, accessed through the FastaFormat#identifiers method. # # f.entry_id #=> "gi|398365175" # f.definition #=> "gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]" # f.identifiers #=> Bio::FastaDefline instance # f.accession #=> "NP_009718" # f.accessions #=> ["NP_009718"] # f.acc_version #=> "NP_009718.3" # f.comment #=> nil # # ==== Methods related to the actual sequence # # f.seq #=> "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES" # f.data #=> "\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI\nVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ\nNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP\nIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP\nINRISARRAAIHPYFQES\n" # f.length #=> 298 # f.aaseq #=> "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES" # f.aaseq.composition #=> {"M"=>5, "S"=>15, "G"=>21, "E"=>16, "L"=>36, "A"=>17, "N"=>8, "Y"=>13, "K"=>22, "R"=>20, "V"=>18, "T"=>7, "D"=>23, "P"=>17, "Q"=>10, "I"=>23, "H"=>7, "F"=>12, "C"=>4, "W"=>4} # f.aalen #=> 298 # # # === A less structured fasta entry # # f.entry #=> ">abc 123 456\nASDF" # # f.entry_id #=> "abc" # f.definition #=> "abc 123 456" # f.comment #=> nil # f.accession #=> nil # f.accessions #=> [] # f.acc_version #=> nil # # f.seq #=> "ASDF" # f.data #=> "\nASDF\n" # f.length #=> 4 # f.aaseq #=> "ASDF" # f.aaseq.composition #=> {"A"=>1, "S"=>1, "D"=>1, "F"=>1} # f.aalen #=> 4 # # # === References # # * FASTA format (WikiPedia) # http://en.wikipedia.org/wiki/FASTA_format # class FastaFormat < DB # Entry delimiter in flatfile text. DELIMITER = RS = "\n>" # (Integer) excess read size included in DELIMITER. DELIMITER_OVERRUN = 1 # '>' # The comment line of the FASTA formatted data. attr_accessor :definition # The seuqnce lines in text. attr_accessor :data attr_reader :entry_overrun # Stores the comment and sequence information from one entry of the # FASTA format string. If the argument contains more than one # entry, only the first entry is used. def initialize(str) @definition = str[/.*/].sub(/^>/, '').strip # 1st line @data = str.sub(/.*/, '') # rests @data.sub!(/^>.*/m, '') # remove trailing entries for sure @entry_overrun = $& end # Returns the stored one entry as a FASTA format. (same as to_s) def entry @entry = ">#{@definition}\n#{@data.strip}\n" end alias to_s entry # Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast # factory object. # # #!/usr/bin/env ruby # require 'bio' # # factory = Bio::Fasta.local('fasta34', 'db/swissprot.f') # flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f') # flatfile.each do |entry| # p entry.definition # result = entry.fasta(factory) # result.each do |hit| # print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at " # p hit.lap_at # end # end # def query(factory) factory.query(entry) end alias fasta query alias blast query # Returns a joined sequence line as a String. def seq unless defined?(@seq) unless /\A\s*^\#/ =~ @data then @seq = Sequence::Generic.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up else a = @data.split(/(^\#.*$)/) i = 0 cmnt = {} s = [] a.each do |x| if /^# ?(.*)$/ =~ x then cmnt[i] ? cmnt[i] << "\n" << $1 : cmnt[i] = $1 else x.tr!(" \t\r\n0-9", '') # lazy clean up i += x.length s << x end end @comment = cmnt @seq = Bio::Sequence::Generic.new(s.join('')) end end @seq end # Returns comments. def comment seq @comment end # Returns sequence length. def length seq.length end # Returens the Bio::Sequence::NA. def naseq Sequence::NA.new(seq) end # Returens the length of Bio::Sequence::NA. def nalen self.naseq.length end # Returens the Bio::Sequence::AA. def aaseq Sequence::AA.new(seq) end # Returens the length of Bio::Sequence::AA. def aalen self.aaseq.length end # Returns sequence as a Bio::Sequence object. # # Note: If you modify the returned Bio::Sequence object, # the sequence or definition in this FastaFormat object # might also be changed (but not always be changed) # because of efficiency. # def to_biosequence Bio::Sequence.adapter(self, Bio::Sequence::Adapter::FastaFormat) end alias to_seq to_biosequence # Parsing FASTA Defline, and extract IDs. # IDs are NSIDs (NCBI standard FASTA sequence identifiers) # or ":"-separated IDs. # It returns a Bio::FastaDefline instance. def identifiers unless defined?(@ids) then @ids = FastaDefline.new(@definition) end @ids end # Parsing FASTA Defline (using #identifiers method), and # shows a possibly unique identifier. # It returns a string. def entry_id identifiers.entry_id end # Parsing FASTA Defline (using #identifiers method), and # shows GI/locus/accession/accession with version number. # If a entry has more than two of such IDs, # only the first ID are shown. # It returns a string or nil. def gi identifiers.gi end # Returns an accession number. def accession identifiers.accession end # Parsing FASTA Defline (using #identifiers method), and # shows accession numbers. # It returns an array of strings. def accessions identifiers.accessions end # Returns accession number with version. def acc_version identifiers.acc_version end # Returns locus. def locus identifiers.locus end end #class FastaFormat end #module Bio bio-1.4.3.0001/lib/bio/db/go.rb0000644000004100000410000002354312200110570015530 0ustar www-datawww-data# # = bio/db/go.rb - Classes for Gene Ontology # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # == Gene Ontology # # == Example # # == References # require 'bio/pathway' module Bio # = Bio::GO # Classes for Gene Ontology http://www.geneontology.org class GO # = Bio::GO::Ontology # # Container class for ontologies in the DAG Edit format. # # == Example # # c_data = File.open('component.oontology').read # go_c = Bio::GO::Ontology.new(c_data) # p go_c.bfs_shortest_path('0003673','0005632') class Ontology < Bio::Pathway # Bio::GO::Ontology.parse_ogids(line) # # Parsing GOID line in the DAGEdit format # GO:ID[ ; GO:ID...] def self.parse_goids(line) goids = [] loop { if /^ *[$%<]\S.+?;/ =~ line endpoint = line.index(';') + 1 line = line[endpoint..line.size] elsif /^,* GO:(\d{7}),*/ =~ line goids << $1.clone endpoint = line.index(goids.last) + goids.last.size line = line[endpoint..line.size] else break end } return goids end # Returns a Hash instance of the header lines in ontology flatfile. attr_reader :header_lines # attr_reader :id2term # attr_reader :id2id # Bio::GO::Ontology.new(str) # The DAG Edit format ontology data parser. def initialize(str) @id2term = {} @header_lines = {} @id2id = {} adj_list = dag_edit_format_parser(str) super(adj_list) end # Returns a GO_Term correspondig with the given GO_ID. def goid2term(goid) term = id2term[goid] term = id2term[id2id[goid]] if term == nil return term end private # constructing adjaency list for the given ontology def dag_edit_format_parser(str) stack = [] adj_list = [] str.each_line {|line| if /^!(.+?):\s+(\S.+)$/ =~ line # Parsing head lines tag = $1 value = $2 tag.gsub!(/-/,'_') next if tag == 'type' instance_eval("@header_lines['#{tag}'] = '#{value}'") next end case line when /^( *)([$<%])(.+?) ; GO:(\d{7})(\n*)/ # GO Term ; GO:ID depth = $1.length.to_i rel = $2 term = $3 goid1 = goid = $4 en = $5 goids = parse_goids(line) # GO:ID[ ; GO:ID...] synonyms = parse_synonyms(line) # synonym:Term[ ; synonym:Term...] stack[depth] = goids.first @id2term[goid] = term next if depth == 0 goids.each {|goid| @id2term[goid] = term @id2id[goid] = goids.first adj_list << Bio::Relation.new(stack[depth - 1], goid, rel) } if en == "" loop { case line when /^\n$/ break when /^ *([<%]) (.+?) ; GO:(\d{7})/ # <%GO Term ; GO:ID rel1 = $1 term1 = $2 goid1 = $3 goids1 = parse_goids(line) synonyms1 = parse_synonyms(line) @id2term[goid1] = term1 goids.each {|goid| adj_list << Bio::Relation.new(goid1, goid, rel1) } else break end } end end } return adj_list end # Returns an ary of GO IDs by parsing an entry line in the DAG Edit # format. def parse_goids(line) Ontology.parse_goids(line) end # Bio::GO::Ontology#parse_synonyms(line) def parse_synonyms(line) synonyms = [] loop { if / ; synonym:(\S.+?) *[;<%\n]/ =~ line synonyms << $1.clone endpoint = line.index(synonyms.last) + synonyms.last.size line = line[endpoint..line.size] else break end } return synonyms end end # class Ontology # = Bio::GO::GeneAssociation # $CVSROOT/go/gene-associations/gene_association.* # # Data parser for the gene_association go annotation. # See also the file format http://www.geneontology.org/doc/GO.annotation.html#file # # == Example # # mgi_data = File.open('gene_association.mgi').read # mgi = Bio::GO::GeneAssociation.parser(mgi_data) # # Bio::GO::GeneAssociation.parser(mgi_data) do |entry| # p [entry.entry_id, entry.evidence, entry.goid] # end # class GeneAssociation # < Bio::DB # Delimiter DELIMITER = "\n" # Delimiter RS = DELIMITER # Retruns an Array of parsed gene_association flatfile. # Block is acceptable. def self.parser(str) if block_given? str.each_line(DELIMITER) {|line| next if /^!/ =~ line yield GeneAssociation.new(line) } else galist = [] str.each_line(DELIMITER) {|line| next if /^!/ =~ line galist << GeneAssociation.new(line) } return galist end end # Returns DB variable. attr_reader :db # -> aStr # Returns Db_Object_Id variable. Alias to entry_id. attr_reader :db_object_id # -> aStr # Returns Db_Object_Symbol variable. attr_reader :db_object_symbol # Returns Db_Object_Name variable. attr_reader :qualifier # Returns Db_Reference variable. attr_reader :db_reference # -> [] # Retruns Evidence code variable. attr_reader :evidence # Returns the entry is associated with this value. attr_reader :with # -> [] # Returns Aspect valiable. attr_reader :aspect # attr_reader :db_object_name # attr_reader :db_object_synonym # -> [] # Returns Db_Object_Type variable. attr_reader :db_object_type # Returns Taxon variable. attr_reader :taxon # Returns Date variable. attr_reader :date # attr_reader :assigned_by alias entry_id db_object_id # Parsing an entry (in a line) in the gene_association flatfile. def initialize(entry) tmp = entry.chomp.split(/\t/) @db = tmp[0] @db_object_id = tmp[1] @db_object_symbol = tmp[2] @qualifier = tmp[3] # @goid = tmp[4] @db_reference = tmp[5].split(/\|/) # @evidence = tmp[6] @with = tmp[7].split(/\|/) # @aspect = tmp[8] @db_object_name = tmp[9] # @db_object_synonym = tmp[10].split(/\|/) # @db_object_type = tmp[11] @taxon = tmp[12] # taxon:4932 @date = tmp[13] # 20010118 @assigned_by = tmp[14] end # Returns GO_ID in /\d{7}/ format. Giving not nil arg, returns # /GO:\d{7}/ style. # # * Bio::GO::GeneAssociation#goid -> "001234" # * Bio::GO::GeneAssociation#goid(true) -> "GO:001234" def goid(org = nil) if org @goid else @goid.sub('GO:','') end end # Bio::GO::GeneAssociation#to_str -> a line of gene_association file. def to_str return [@db, @db_object_id, @db_object_symbol, @qualifier, @goid, @db_reference.join("|"), @evidence, @with.join("|"), @aspect, @db_object_name, @db_object_synonym.join("|"), @db_object_type, @taxon, @date, @assigned_by].join("\t") end end # class GeneAssociation # = Container class for files in geneontology.org/go/external2go/*2go. # # The line syntax is: # # database: > GO: ; GO: # # == Example # # spkw2go = Bio::GO::External2go.new(File.read("spkw2go")) # spkw2go.size # spkw2go.each do |relation| # relation # -> {:db => "", :db_id => "", :go_term => "", :go_id => ""} # end # spkw2go.dbs # # == SAMPLE # !date: 2005/02/08 18:02:54 # !Mapping of SWISS-PROT KEYWORDS to GO terms. # !Evelyn Camon, SWISS-PROT. # ! # SP_KW:ATP synthesis > GO:ATP biosynthesis ; GO:0006754 # ... # class External2go < Array # Returns aHash of the external2go header information attr_reader :header # Constructor from parsing external2go file. def self.parser(str) e2g = self.new str.each_line do |line| line.chomp! if line =~ /^\!date: (.+)/ e2g.header[:date] = $1 elsif line =~ /^\!(.*)/ e2g.header[:desc] << $1 elsif ary = line.scan(/^(.+?):(.+) > GO:(.+) ; (GO:\d{7})/).first e2g << {:db_id => ary[1], :db => ary[0], :go_term => ary[2], :go_id => ary[3]} else raise("Invalid Format Line: \n #{line.inspect}\n") end end return e2g end # Constructor. # relation := {:db => aStr, :db_id => aStr, :go_term => aStr, :go_id => aStr} def initialize @header = {:date => '', :desc => []} super end # Bio::GO::External2go#set_date(value) def set_date(value) @header[:date] = value end # Bio::GO::External2go#set_desc(ary) def set_desc(ary) @header[:desc] = ary end # Bio::GO::External2go#to_str # Returns the contents in the external2go format. def to_str ["!date: #{@header[:date]}", @header[:desc].map {|e| "!#{e}" }, self.map { |e| [e[:db], ':', e[:db_id], ' > GO:', e[:go_term], ' ; ', e[:go_id]].join } ].join("\n") end # Returns ary of databases. def dbs self.map {|rel| rel[:db] }.uniq end # Returns ary of database IDs. def db_ids self.map {|rel| rel[:db_id] }.uniq end # Returns ary of GO Terms. def go_terms self.map {|rel| rel[:go_term] }.uniq end # Returns ary of GO IDs. def go_ids self.map {|rel| rel[:go_id] }.uniq end end # class External2go end # class GO end # module Bio bio-1.4.3.0001/lib/bio/db/gff.rb0000644000004100000410000017030312200110570015662 0ustar www-datawww-data# coding: US-ASCII # # = bio/db/gff.rb - GFF format class # # Copyright:: Copyright (C) 2003, 2005 # Toshiaki Katayama # 2006 Jan Aerts # 2008 Naohisa Goto # License:: The Ruby License # # $Id:$ # require 'uri' require 'strscan' require 'enumerator' require 'bio/db/fasta' module Bio # == DESCRIPTION # The Bio::GFF and Bio::GFF::Record classes describe data contained in a # GFF-formatted file. For information on the GFF format, see # http://www.sanger.ac.uk/Software/formats/GFF/. Data are represented in tab- # delimited format, including # * seqname # * source # * feature # * start # * end # * score # * strand # * frame # * attributes (optional) # # For example: # SEQ1 EMBL atg 103 105 . + 0 # SEQ1 EMBL exon 103 172 . + 0 # SEQ1 EMBL splice5 172 173 . + . # SEQ1 netgene splice5 172 173 0.94 + . # SEQ1 genie sp5-20 163 182 2.3 + . # SEQ1 genie sp5-10 168 177 2.1 + . # SEQ1 grail ATG 17 19 2.1 - 0 # # The Bio::GFF object is a container for Bio::GFF::Record objects, each # representing a single line in the GFF file. class GFF # Creates a Bio::GFF object by building a collection of Bio::GFF::Record # objects. # # Create a Bio::GFF object the hard way # this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n" # this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n" # this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n" # this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n" # this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n" # this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n" # this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n" # p Bio::GFF.new(this_gff) # # or create one based on a GFF-formatted file: # p Bio::GFF.new(File.open('my_data.gff') # --- # *Arguments*: # * _str_: string in GFF format # *Returns*:: Bio::GFF object def initialize(str = '') @records = Array.new str.each_line do |line| @records << Record.new(line) end end # An array of Bio::GFF::Record objects. attr_accessor :records # Represents a single line of a GFF-formatted file. See Bio::GFF for more # information. class Record # Name of the reference sequence attr_accessor :seqname # Name of the source of the feature (e.g. program that did prediction) attr_accessor :source # Name of the feature attr_accessor :feature # Start position of feature on reference sequence attr_accessor :start # End position of feature on reference sequence attr_accessor :end # Score of annotation (e.g. e-value for BLAST search) attr_accessor :score # Strand that feature is located on attr_accessor :strand # For features of type 'exon': indicates where feature begins in the reading frame attr_accessor :frame # List of tag=value pairs (e.g. to store name of the feature: ID=my_id) attr_accessor :attributes # Comments for the GFF record attr_accessor :comment # "comments" is deprecated. Instead, use "comment". def comments #warn "#{self.class.to_s}#comments is deprecated. Instead, use \"comment\"." if $VERBOSE self.comment end # "comments=" is deprecated. Instead, use "comment=". def comments=(str) #warn "#{self.class.to_s}#comments= is deprecated. Instead, use \"comment=\"." if $VERBOSE self.comment = str end # Creates a Bio::GFF::Record object. Is typically not called directly, but # is called automatically when creating a Bio::GFF object. # --- # *Arguments*: # * _str_: a tab-delimited line in GFF format def initialize(str) @comment = str.chomp[/#.*/] return if /^#/.match(str) @seqname, @source, @feature, @start, @end, @score, @strand, @frame, attributes, = str.chomp.split("\t") @attributes = parse_attributes(attributes) if attributes end private def parse_attributes(attributes) hash = Hash.new sc = StringScanner.new(attributes) attrs = [] token = '' while !sc.eos? if sc.scan(/[^\\\;\"]+/) then token.concat sc.matched elsif sc.scan(/\;/) then attrs.push token unless token.empty? token = '' elsif sc.scan(/\"/) then origtext = sc.matched while !sc.eos? if sc.scan(/[^\\\"]+/) then origtext.concat sc.matched elsif sc.scan(/\"/) then origtext.concat sc.matched break elsif sc.scan(/\\([\"\\])/) then origtext.concat sc.matched elsif sc.scan(/\\/) then origtext.concat sc.matched else raise 'Bug: should not reach here' end end token.concat origtext elsif sc.scan(/\\\;/) then token.concat sc.matched elsif sc.scan(/\\/) then token.concat sc.matched else raise 'Bug: should not reach here' end #if end #while attrs.push token unless token.empty? attrs.each do |x| key, value = x.split(' ', 2) key.strip! value.strip! if value hash[key] = value end hash end end #Class Record # = DESCRIPTION # Represents version 2 of GFF specification. # Its behavior is somehow different from Bio::GFF, # especially for attributes. # class GFF2 < GFF VERSION = 2 # string representation of the whole entry. def to_s ver = @gff_version || VERSION.to_s ver = ver.gsub(/[\r\n]+/, ' ') ([ "##gff-version #{ver}\n" ] + @metadata.collect { |m| m.to_s } + @records.collect{ |r| r.to_s }).join('') end # Private methods for GFF2 escaping characters. # Internal only. Users should not use this module directly. module Escape # unsafe characters to be escaped UNSAFE_GFF2 = /[^-_.!~*'()a-zA-Z\d\/?:@+$\[\] \x80-\xfd><;=,%^&\|`]/n # GFF2 standard identifier IDENTIFIER_GFF2 = /\A[A-Za-z][A-Za-z0-9_]*\z/n # GFF2 numeric value NUMERIC_GFF2 = /\A[-+]?([0-9]+|[0-9]*\.[0-9]*)([eE][+-]?[0-9]+)?\z/n # List of 1-letter special backslash code. # The letters other than listed here are the same as # those of without backslash, except for "x" and digits. # (Note that \u (unicode) is not supported.) BACKSLASH = { 't' => "\t", 'n' => "\n", 'r' => "\r", 'f' => "\f", 'b' => "\b", 'a' => "\a", 'e' => "\e", 'v' => "\v", # 's' => " ", }.freeze # inverted hash of BACKSLASH CHAR2BACKSLASH = BACKSLASH.invert.freeze # inverted hash of BACKSLASH, including double quote and backslash CHAR2BACKSLASH_EXTENDED = CHAR2BACKSLASH.merge({ '"' => '"', "\\" => "\\" }).freeze # prohibited characters in GFF2 columns PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x1f\x7f\xfe\xff]/ # prohibited characters in GFF2 attribute tags PROHIBITED_GFF2_TAGS = /[\s\"\;\t\r\n\x00-\x1f\x7f\xfe\xff]/ private # (private) escapes GFF2 free text string def escape_gff2_freetext(str) '"' + str.gsub(UNSAFE_GFF2) do |x| "\\" + (CHAR2BACKSLASH_EXTENDED[x] || char2octal(x)) end + '"' end # (private) "x" => "\\oXXX" # "x" must be a letter. # If "x" is consisted of two bytes or more, joined with "\\". def char2octal(x) x.enum_for(:each_byte).collect { |y| sprintf("%03o", y) }.join("\\") end # (private) escapes GFF2 attribute value string def escape_gff2_attribute_value(str) freetext?(str) ? escape_gff2_freetext(str) : str end # (private) check if the given string is a free text to be quoted # by double-qoute. def freetext?(str) if IDENTIFIER_GFF2 =~ str or NUMERIC_GFF2 =~ str then false else true end end # (private) escapes normal columns in GFF2 def gff2_column_to_s(str) str = str.to_s str = str.empty? ? '.' : str str = str.gsub(PROHIBITED_GFF2_COLUMNS) do |x| "\\" + (CHAR2BACKSLASH[x] || char2octal(x)) end if str[0, 1] == '#' then str[0, 1] = "\\043" end str end # (private) escapes GFF2 attribute tag string def escape_gff2_attribute_tag(str) str = str.to_s str = str.empty? ? '.' : str str = str.gsub(PROHIBITED_GFF2_TAGS) do |x| "\\" + (CHAR2BACKSLASH[x] || char2octal(x)) end if str[0, 1] == '#' then str[0, 1] = "\\043" end str end # (private) dummy method, will be redefined in GFF3. def unescape(str) str end end #module Escape # Stores GFF2 record. class Record < GFF::Record include Escape # Stores GFF2 attribute's value. class Value include Escape # Creates a new Value object. # Note that the given array _values_ is directly stored in # the object. # # --- # *Arguments*: # * (optional) _values_: Array containing String objects. # *Returns*:: Value object. def initialize(values = []) @values = values end # Returns string representation of this Value object. # --- # *Returns*:: String def to_s @values.collect do |str| escape_gff2_attribute_value(str) end.join(' ') end # Returns all values in this object. # # Note that modification of the returned array would affect # original Value object. # --- # *Returns*:: Array def values @values end alias to_a values # Returns true if other == self. # Otherwise, returns false. def ==(other) return false unless other.kind_of?(self.class) or self.kind_of?(other.class) self.values == other.values rescue super(other) end end #class Value # Parses a GFF2-formatted line and returns a new # Bio::GFF::GFF2::Record object. def self.parse(str) self.new.parse(str) end # Creates a Bio::GFF::GFF2::Record object. # Is typically not called directly, but # is called automatically when creating a Bio::GFF::GFF2 object. # # --- # *Arguments*: # * _str_: a tab-delimited line in GFF2 format # *Arguments*: # * _seqname_: seqname (String or nil) # * _source_: source (String or nil) # * _feature_: feature type (String) # * _start_position_: start (Integer) # * _end_position_: end (Integer) # * _score_: score (Float or nil) # * _strand_: strand (String or nil) # * _frame_: frame (Integer or nil) # * _attributes_: attributes (Array or nil) def initialize(*arg) if arg.size == 1 then parse(arg[0]) else @seqname, @source, @feature, start, endp, @score, @strand, frame, @attributes = arg @start = start ? start.to_i : nil @end = endp ? endp.to_i : nil @score = score ? score.to_f : nil @frame = frame ? frame.to_i : nil end @attributes ||= [] end # Comment for the GFF record attr_accessor :comment # "comments" is deprecated. Instead, use "comment". def comments warn "#{self.class.to_s}#comments is deprecated. Instead, use \"comment\"." self.comment end # "comments=" is deprecated. Instead, use "comment=". def comments=(str) warn "#{self.class.to_s}#comments= is deprecated. Instead, use \"comment=\"." self.comment = str end # Parses a GFF2-formatted line and stores data from the string. # Note that all existing data is wiped out. def parse(string) if /^\s*\#/ =~ string then @comment = string[/\#(.*)/, 1].chomp columns = [] else columns = string.chomp.split("\t", 10) @comment = columns[9][/\#(.*)/, 1].chomp if columns[9] end @seqname, @source, @feature, start, endp, score, @strand, frame = columns[0, 8].collect { |x| str = unescape(x) str == '.' ? nil : str } @start = start ? start.to_i : nil @end = endp ? endp.to_i : nil @score = score ? score.to_f : nil @frame = frame ? frame.to_i : nil @attributes = parse_attributes(columns[8]) end # Returns true if the entry is empty except for comment. # Otherwise, returns false. def comment_only? if !@seqname and !@source and !@feature and !@start and !@end and !@score and !@strand and !@frame and @attributes.empty? then true else false end end # Return the record as a GFF2 compatible string def to_s cmnt = if defined?(@comment) and @comment and !@comment.to_s.strip.empty? then @comment.gsub(/[\r\n]+/, ' ') else false end return "\##{cmnt}\n" if self.comment_only? and cmnt [ gff2_column_to_s(@seqname), gff2_column_to_s(@source), gff2_column_to_s(@feature), gff2_column_to_s(@start), gff2_column_to_s(@end), gff2_column_to_s(@score), gff2_column_to_s(@strand), gff2_column_to_s(@frame), attributes_to_s(@attributes) ].join("\t") + (cmnt ? "\t\##{cmnt}\n" : "\n") end # Returns true if self == other. Otherwise, returns false. def ==(other) super || ((self.class == other.class and self.seqname == other.seqname and self.source == other.source and self.feature == other.feature and self.start == other.start and self.end == other.end and self.score == other.score and self.strand == other.strand and self.frame == other.frame and self.attributes == other.attributes) ? true : false) end # Gets the attribute value for the given tag. # # Note that if two or more tag-value pairs with the same name found, # only the first value is returned. # --- # *Arguments*: # * (required) _tag_: String # *Returns*:: String, Bio::GFF::GFF2::Record::Value object, or nil. def get_attribute(tag) ary = @attributes.assoc(tag) ary ? ary[1] : nil end alias attribute get_attribute # Gets the attribute values for the given tag. # This method always returns an array. # --- # *Arguments*: # * (required) _tag_: String # *Returns*:: Array containing String or \ # Bio::GFF::GFF2::Record::Value objects. def get_attributes(tag) ary = @attributes.find_all do |x| x[0] == tag end ary.collect! { |x| x[1] } ary end # Sets value for the given tag. # If the tag exists, the value of the tag is replaced with _value_. # Note that if two or more tag-value pairs with the same name found, # only the first tag-value pair is replaced. # # If the tag does not exist, the tag-value pair is newly added. # --- # *Arguments*: # * (required) _tag_: String # * (required) _value_: String or Bio::GFF::GFF2::Record::Value object. # *Returns*:: _value_ def set_attribute(tag, value) ary = @attributes.find do |x| x[0] == tag end if ary then ary[1] = value else ary = [ String.new(tag), value ] @attributes.push ary end value end # Replaces values for the given tags with new values. # Existing values for the tag are completely wiped out and # replaced by new tag-value pairs. # If the tag does not exist, the tag-value pairs are newly added. # # --- # *Arguments*: # * (required) _tag_: String # * (required) _values_: String or Bio::GFF::GFF2::Record::Value objects. # *Returns*:: _self_ def replace_attributes(tag, *values) i = 0 @attributes.reject! do |x| if x[0] == tag then if i >= values.size then true else x[1] = values[i] i += 1 false end else false end end (i...(values.size)).each do |j| @attributes.push [ String.new(tag), values[j] ] end self end # Adds a new tag-value pair. # --- # *Arguments*: # * (required) _tag_: String # * (required) _value_: String or Bio::GFF::GFF2::Record::Value object. # *Returns*:: _value_ def add_attribute(tag, value) @attributes.push([ String.new(tag), value ]) end # Removes a specific tag-value pair. # # Note that if two or more tag-value pairs found, # only the first tag-value pair is removed. # # --- # *Arguments*: # * (required) _tag_: String # * (required) _value_: String or Bio::GFF::GFF2::Record::Value object. # *Returns*:: if removed, _value_. Otherwise, nil. def delete_attribute(tag, value) removed = nil if i = @attributes.index([ tag, value ]) then ary = @attributes.delete_at(i) removed = ary[1] end removed end # Removes all attributes with the specified tag. # # --- # *Arguments*: # * (required) _tag_: String # *Returns*:: if removed, self. Otherwise, nil. def delete_attributes(tag) @attributes.reject! do |x| x[0] == tag end ? self : nil end # Sorts attributes order by given tag name's order. # If a block is given, the argument _tags_ is ignored, and # yields two tag names like Array#sort!. # # --- # *Arguments*: # * (required or optional) _tags_: Array containing String objects # *Returns*:: _self_ def sort_attributes_by_tag!(tags = nil) h = {} s = @attributes.size @attributes.each_with_index { |x, i| h[x] = i } if block_given? then @attributes.sort! do |x, y| r = yield x[0], y[0] if r == 0 then r = (h[x] || s) <=> (h[y] || s) end r end else unless tags then raise ArgumentError, 'wrong number of arguments (0 for 1) or wrong argument value' end @attributes.sort! do |x, y| r = (tags.index(x[0]) || tags.size) <=> (tags.index(y[0]) || tags.size) if r == 0 then r = (h[x] || s) <=> (h[y] || s) end r end end self end # Returns hash representation of attributes. # # Note: If two or more tag-value pairs with same tag names exist, # only the first tag-value pair is used for each tag. # # --- # *Returns*:: Hash object def attributes_to_hash h = {} @attributes.each do |x| key, val = x h[key] = val unless h[key] end h end private # (private) Parses attributes. # Returns arrays def parse_attributes(str) return [] if !str or str == '.' attr_pairs = parse_attributes_string(str) attr_pairs.collect! do |x| key = x.shift val = (x.size == 1) ? x[0] : Value.new(x) [ key, val ] end attr_pairs end # (private) Parses attributes string. # Returns arrays def parse_attributes_string(str) sc = StringScanner.new(str) attr_pairs = [] tokens = [] cur_token = '' while !sc.eos? if sc.scan(/[^\\\;\"\s]+/) then cur_token.concat sc.matched elsif sc.scan(/\s+/) then tokens.push cur_token unless cur_token.empty? cur_token = '' elsif sc.scan(/\;/) then tokens.push cur_token unless cur_token.empty? cur_token = '' attr_pairs.push tokens tokens = [] elsif sc.scan(/\"/) then tokens.push cur_token unless cur_token.empty? cur_token = '' freetext = '' while !sc.eos? if sc.scan(/[^\\\"]+/) then freetext.concat sc.matched elsif sc.scan(/\"/) then break elsif sc.scan(/\\([\"\\])/) then freetext.concat sc[1] elsif sc.scan(/\\x([0-9a-fA-F][0-9a-fA-F])/n) then chr = sc[1].to_i(16).chr freetext.concat chr elsif sc.scan(/\\([0-7][0-7][0-7])/n) then chr = sc[1].to_i(8).chr freetext.concat chr elsif sc.scan(/\\([^x0-9])/n) then chr = Escape::BACKSLASH[sc[1]] || sc.matched freetext.concat chr elsif sc.scan(/\\/) then freetext.concat sc.matched else raise 'Bug: should not reach here' end end tokens.push freetext #p freetext # # disabled support for \; out of freetext #elsif sc.scan(/\\\;/) then # cur_token.concat sc.matched elsif sc.scan(/\\/) then cur_token.concat sc.matched else raise 'Bug: should not reach here' end #if end #while tokens.push cur_token unless cur_token.empty? attr_pairs.push tokens unless tokens.empty? return attr_pairs end # (private) string representation of attributes def attributes_to_s(attr) attr.collect do |a| tag, val = a if Escape::IDENTIFIER_GFF2 !~ tag then warn "Illegal GFF2 attribute tag: #{tag.inspect}" if $VERBOSE end tagstr = gff2_column_to_s(tag) valstr = if val.kind_of?(Value) then val.to_s else escape_gff2_attribute_value(val) end "#{tagstr} #{valstr}" end.join(' ; ') end end #class Record # Stores GFF2 meta-data. class MetaData # Creates a new MetaData object def initialize(directive, data = nil) @directive = directive @data = data end # Directive. Usually, one of "feature-ontology", "attribute-ontology", # or "source-ontology". attr_accessor :directive # data of this entry attr_accessor :data # parses a line def self.parse(line) directive, data = line.chomp.split(/\s+/, 2) directive = directive.sub(/\A\#\#/, '') if directive self.new(directive, data) end # string representation of this meta-data def to_s d = @directive.to_s.gsub(/[\r\n]+/, ' ') v = ' ' + @data.to_s.gsub(/[\r\n]+/, ' ') unless @data.to_s.empty? "\#\##{d}#{v}\n" end # Returns true if self == other. Otherwise, returns false. def ==(other) if self.class == other.class and self.directive == other.directive and self.data == other.data then true else false end end end #class MetaData # (private) parses metadata def parse_metadata(directive, line) case directive when 'gff-version' @gff_version ||= line.split(/\s+/)[1] else @metadata.push MetaData.parse(line) end true end private :parse_metadata # Creates a Bio::GFF::GFF2 object by building a collection of # Bio::GFF::GFF2::Record (and metadata) objects. # # --- # *Arguments*: # * _str_: string in GFF format # *Returns*:: Bio::GFF::GFF2 object def initialize(str = nil) @gff_version = nil @records = [] @metadata = [] parse(str) if str end # GFF2 version string (String or nil). nil means "2". attr_reader :gff_version # Metadata (except "##gff-version"). # Must be an array of Bio::GFF::GFF2::MetaData objects. attr_accessor :metadata # Parses a GFF2 entries, and concatenated the parsed data. # # --- # *Arguments*: # * _str_: string in GFF format # *Returns*:: self def parse(str) # parses GFF lines str.each_line do |line| if /^\#\#([^\s]+)/ =~ line then parse_metadata($1, line) else @records << GFF2::Record.new(line) end end self end end #class GFF2 # = DESCRIPTION # Represents version 3 of GFF specification. # For more information on version GFF3, see # http://song.sourceforge.net/gff3.shtml #-- # obsolete URL: # http://flybase.bio.indiana.edu/annot/gff3.html #++ class GFF3 < GFF VERSION = 3 # Creates a Bio::GFF::GFF3 object by building a collection of # Bio::GFF::GFF3::Record (and metadata) objects. # # --- # *Arguments*: # * _str_: string in GFF format # *Returns*:: Bio::GFF object def initialize(str = nil) @gff_version = nil @records = [] @sequence_regions = [] @metadata = [] @sequences = [] @in_fasta = false parse(str) if str end # GFF3 version string (String or nil). nil means "3". attr_reader :gff_version # Metadata of "##sequence-region". # Must be an array of Bio::GFF::GFF3::SequenceRegion objects. attr_accessor :sequence_regions # Metadata (except "##sequence-region", "##gff-version", "###"). # Must be an array of Bio::GFF::GFF3::MetaData objects. attr_accessor :metadata # Sequences bundled within GFF3. # Must be an array of Bio::Sequence objects. attr_accessor :sequences # Parses a GFF3 entries, and concatenated the parsed data. # # Note that after "##FASTA" line is given, # only fasta-formatted text is accepted. # # --- # *Arguments*: # * _str_: string in GFF format # *Returns*:: self def parse(str) # if already after the ##FASTA line, parses fasta format and return if @in_fasta then parse_fasta(str) return self end if str.respond_to?(:gets) then # str is a IO-like object fst = nil else # str is a String gff, sep, fst = str.split(/^(\>|##FASTA.*)/n, 2) fst = sep + fst if sep == '>' and fst str = gff end # parses GFF lines str.each_line do |line| if /^\#\#([^\s]+)/ =~ line then parse_metadata($1, line) parse_fasta(str) if @in_fasta elsif /^\>/ =~ line then @in_fasta = true parse_fasta(str, line) else @records << GFF3::Record.new(line) end end # parses fasta format when str is a String and fasta data exists if fst then @in_fasta = true parse_fasta(fst) end self end # parses fasta formatted data def parse_fasta(str, line = nil) str.each_line("\n>") do |seqstr| if line then seqstr = line + seqstr; line = nil; end x = seqstr.strip next if x.empty? or x == '>' fst = Bio::FastaFormat.new(seqstr) seq = fst.to_seq seq.entry_id = unescape(fst.definition.strip.split(/\s/, 2)[0].to_s) @sequences.push seq end end private :parse_fasta # string representation of whole entry. def to_s ver = @gff_version || VERSION.to_s if @sequences.size > 0 then seqs = "##FASTA\n" + @sequences.collect { |s| s.to_fasta(s.entry_id, 70) }.join('') else seqs = '' end ([ "##gff-version #{escape(ver)}\n" ] + @metadata.collect { |m| m.to_s } + @sequence_regions.collect { |m| m.to_s } + @records.collect{ |r| r.to_s }).join('') + seqs end # Private methods for escaping characters. # Internal only. Users should not use this module directly. module Escape # unsafe characters to be escaped for normal columns UNSAFE = /[^-_.!~*'()a-zA-Z\d\/?:@+$\[\] "\x80-\xfd><;=,]/n # unsafe characters to be escaped for seqid columns # and target_id of the "Target" attribute UNSAFE_SEQID = /[^-a-zA-Z0-9.:^*$@!+_?|]/n # unsafe characters to be escaped for attribute columns UNSAFE_ATTRIBUTE = /[^-_.!~*'()a-zA-Z\d\/?:@+$\[\] "\x80-\xfd><]/n private # If str is empty, returns '.'. Otherwise, returns str. def column_to_s(str) str = str.to_s str.empty? ? '.' : str end if URI.const_defined?(:Parser) then # (private) URI::Parser object for escape/unescape GFF3 columns URI_PARSER = URI::Parser.new # (private) the same as URI::Parser#escape(str, unsafe) def _escape(str, unsafe) URI_PARSER.escape(str, unsafe) end # (private) the same as URI::Parser#unescape(str) def _unescape(str) URI_PARSER.unescape(str) end else # (private) the same as URI.escape(str, unsafe) def _escape(str, unsafe) URI.escape(str, unsafe) end # (private) the same as URI.unescape(str) def _unescape(str) URI.unescape(str) end end # Return the string corresponding to these characters unescaped def unescape(string) _unescape(string) end # Escape a column according to the specification at # http://song.sourceforge.net/gff3.shtml. def escape(string) _escape(string, UNSAFE) end # Escape seqid column according to the specification at # http://song.sourceforge.net/gff3.shtml. def escape_seqid(string) _escape(string, UNSAFE_SEQID) end # Escape attribute according to the specification at # http://song.sourceforge.net/gff3.shtml. # In addition to the normal escape rule, the following characters # are escaped: ",=;". # Returns the string corresponding to these characters escaped. def escape_attribute(string) _escape(string, UNSAFE_ATTRIBUTE) end end #module Escape include Escape # Stores meta-data "##sequence-region seqid start end". class SequenceRegion include Escape extend Escape # creates a new SequenceRegion class def initialize(seqid, start, endpos) @seqid = seqid @start = start ? start.to_i : nil @end = endpos ? endpos.to_i : nil end # parses given string and returns SequenceRegion class def self.parse(str) dummy, seqid, start, endpos = str.chomp.split(/\s+/, 4).collect { |x| unescape(x) } self.new(seqid, start, endpos) end # sequence ID attr_accessor :seqid # start position attr_accessor :start # end position attr_accessor :end # string representation def to_s i = escape_seqid(column_to_s(@seqid)) s = escape_seqid(column_to_s(@start)) e = escape_seqid(column_to_s(@end)) "##sequence-region #{i} #{s} #{e}\n" end # Returns true if self == other. Otherwise, returns false. def ==(other) if other.class == self.class and other.seqid == self.seqid and other.start == self.start and other.end == self.end then true else false end end end #class SequenceRegion # Represents a single line of a GFF3-formatted file. # See Bio::GFF::GFF3 for more information. class Record < GFF2::Record include GFF3::Escape # shortcut to the ID attribute def id get_attribute('ID') end # set ID attribute def id=(str) set_attribute('ID', str) end # aliases for Column 1 (formerly "seqname") alias seqid seqname alias seqid= seqname= # aliases for Column 3 (formerly "feature"). # In the GFF3 document http://song.sourceforge.net/gff3.shtml, # column3 is called "type", but we used "feature_type" # because "type" is already used by Ruby itself. alias feature_type feature alias feature_type= feature= # aliases for Column 8 alias phase frame alias phase= frame= # Parses a GFF3-formatted line and returns a new # Bio::GFF::GFF3::Record object. def self.parse(str) self.new.parse(str) end # Creates a Bio::GFF::GFF3::Record object. # Is typically not called directly, but # is called automatically when creating a Bio::GFF::GFF3 object. # # --- # *Arguments*: # * _str_: a tab-delimited line in GFF3 format # *Arguments*: # * _seqid_: sequence ID (String or nil) # * _source_: source (String or nil) # * _feature_type_: type of feature (String) # * _start_position_: start (Integer) # * _end_position_: end (Integer) # * _score_: score (Float or nil) # * _strand_: strand (String or nil) # * _phase_: phase (Integer or nil) # * _attributes_: attributes (Array or nil) def initialize(*arg) super(*arg) end # Parses a GFF3-formatted line and stores data from the string. # Note that all existing data is wiped out. def parse(string) super end # Return the record as a GFF3 compatible string def to_s cmnt = if defined?(@comment) and @comment and !@comment.to_s.strip.empty? then @comment.gsub(/[\r\n]+/, ' ') else false end return "\##{cmnt}\n" if self.comment_only? and cmnt [ escape_seqid(column_to_s(@seqname)), escape(column_to_s(@source)), escape(column_to_s(@feature)), escape(column_to_s(@start)), escape(column_to_s(@end)), escape(column_to_s(@score)), escape(column_to_s(@strand)), escape(column_to_s(@frame)), attributes_to_s(@attributes) ].join("\t") + (cmnt ? "\t\##{cmnt}\n" : "\n") end # Bio:GFF::GFF3::Record::Target is a class to store # data of "Target" attribute. class Target include GFF3::Escape extend GFF3::Escape # Creates a new Target object. def initialize(target_id, start, endpos, strand = nil) @target_id = target_id @start = start ? start.to_i : nil @end = endpos ? endpos.to_i : nil @strand = strand end # target ID attr_accessor :target_id # start position attr_accessor :start # end position attr_accessor :end # strand (optional). Normally, "+" or "-", or nil. attr_accessor :strand # parses "target_id start end [strand]"-style string # (for example, "ABC789 123 456 +") # and creates a new Target object. # def self.parse(str) target_id, start, endpos, strand = str.split(/ +/, 4).collect { |x| unescape(x) } self.new(target_id, start, endpos, strand) end # returns a string def to_s i = escape_seqid(column_to_s(@target_id)) s = escape_attribute(column_to_s(@start)) e = escape_attribute(column_to_s(@end)) strnd = escape_attribute(@strand.to_s) strnd = " " + strnd unless strnd.empty? "#{i} #{s} #{e}#{strnd}" end # Returns true if self == other. Otherwise, returns false. def ==(other) if other.class == self.class and other.target_id == self.target_id and other.start == self.start and other.end == self.end and other.strand == self.strand then true else false end end end #class Target # Bio:GFF::GFF3::Record::Gap is a class to store # data of "Gap" attribute. class Gap # Code is a class to store length of single-letter code. Code = Struct.new(:code, :length) # Code is a class to store length of single-letter code. class Code # 1-letter code (Symbol). One of :M, :I, :D, :F, or :R is expected. attr_reader :code if false #dummy for RDoc # length (Integer) attr_reader :length if false #dummy for RDoc def to_s "#{code}#{length}" end end #class code # Creates a new Gap object. # # --- # *Arguments*: # * _str_: a formatted string, or nil. def initialize(str = nil) if str then @data = str.split(/ +/).collect do |x| if /\A([A-Z])([0-9]+)\z/ =~ x.strip then Code.new($1.intern, $2.to_i) else warn "ignored unknown token: #{x}.inspect" if $VERBOSE nil end end @data.compact! else @data = [] end end # Same as new(str). def self.parse(str) self.new(str) end # (private method) # Scans gaps and returns an array of Code objects def __scan_gap(str, gap_regexp = /[^a-zA-Z]/, code_i = :I, code_m = :M) sc = StringScanner.new(str) data = [] while len = sc.skip_until(gap_regexp) mlen = len - sc.matched_size data.push Code.new(code_m, mlen) if mlen > 0 g = Code.new(code_i, sc.matched_size) while glen = sc.skip(gap_regexp) g.length += glen end data.push g end if sc.rest_size > 0 then m = Code.new(code_m, sc.rest_size) data.push m end data end private :__scan_gap # (private method) # Parses given reference-target sequence alignment and # initializes self. Existing data will be erased. def __initialize_from_sequences_na(reference, target, gap_regexp = /[^a-zA-Z]/) data_ref = __scan_gap(reference, gap_regexp, :I, :M) data_tgt = __scan_gap(target, gap_regexp, :D, :M) data = [] while !data_ref.empty? and !data_tgt.empty? ref = data_ref.shift tgt = data_tgt.shift if ref.length > tgt.length then x = Code.new(ref.code, ref.length - tgt.length) data_ref.unshift x ref.length = tgt.length elsif ref.length < tgt.length then x = Code.new(tgt.code, tgt.length - ref.length) data_tgt.unshift x tgt.length = ref.length end case ref.code when :M if tgt.code == :M then data.push ref elsif tgt.code == :D then data.push tgt else raise 'Bug: should not reach here.' end when :I if tgt.code == :M then data.push ref elsif tgt.code == :D then # This site is ignored, # because both reference and target are gap else raise 'Bug: should not reach here.' end end end #while # rest of data_ref len = 0 data_ref.each do |r| len += r.length if r.code == :M end data.push Code.new(:D, len) if len > 0 # rest of data_tgt len = 0 data_tgt.each do |t| len += t.length if t.code == :M end data.push Code.new(:I, len) if len > 0 @data = data true end private :__initialize_from_sequences_na # Creates a new Gap object from given sequence alignment. # # Note that sites of which both reference and target are gaps # are silently removed. # # --- # *Arguments*: # * _reference_: reference sequence (nucleotide sequence) # * _target_: target sequence (nucleotide sequence) # * gap_regexp: regexp to identify gap def self.new_from_sequences_na(reference, target, gap_regexp = /[^a-zA-Z]/) gap = self.new gap.instance_eval { __initialize_from_sequences_na(reference, target, gap_regexp) } gap end # (private method) # scans a codon or gap in reference sequence def __scan_codon(sc_ref, gap_regexp, space_regexp, forward_frameshift_regexp, reverse_frameshift_regexp) chars = [] gap_count = 0 fs_count = 0 while chars.size < 3 + fs_count and char = sc_ref.scan(/./mn) case char when space_regexp # ignored when forward_frameshift_regexp # next char is forward frameshift fs_count += 1 when reverse_frameshift_regexp # next char is reverse frameshift fs_count -= 1 when gap_regexp chars.push char gap_count += 1 else chars.push char end end #while if chars.size < (3 + fs_count) then gap_count += (3 + fs_count) - chars.size end return gap_count, fs_count end private :__scan_codon # (private method) # internal use only def __push_code_to_data(cur, data, code, len) if cur and cur.code == code then cur.length += len else cur = Code.new(code, len) data.push cur end return cur end private :__push_code_to_data # (private method) # Parses given reference(nuc)-target(amino) sequence alignment and # initializes self. Existing data will be erased. def __initialize_from_sequences_na_aa(reference, target, gap_regexp = /[^a-zA-Z]/, space_regexp = /\s/, forward_frameshift_regexp = /\>/, reverse_frameshift_regexp = /\ 0 then cur = __push_code_to_data(cur, data, :F, ref_fs) end end #len.times elsif len = sc_tgt.skip(re_one) then # always 1-letter ref_gaps, ref_fs = __scan_codon(sc_ref, gap_regexp, space_regexp, forward_frameshift_regexp, reverse_frameshift_regexp) case ref_gaps when 3 cur = __push_code_to_data(cur, data, :I, 1) when 2, 1, 0 # reverse frameshift inserted when gaps exist ref_fs -= ref_gaps # normal site cur = __push_code_to_data(cur, data, :M, 1) else raise 'Bug: should not reach here' end if ref_fs < 0 then cur = __push_code_to_data(cur, data, :R, -ref_fs) elsif ref_fs > 0 then cur = __push_code_to_data(cur, data, :F, ref_fs) end else raise 'Bug: should not reach here' end end #while if sc_ref.rest_size > 0 then rest = sc_ref.scan(/.*/mn) rest.gsub!(space_regexp, '') rest.gsub!(forward_frameshift_regexp, '') rest.gsub!(reverse_frameshift_regexp, '') rest.gsub!(gap_regexp, '') len = rest.length.div(3) cur = __push_code_to_data(cur, data, :D, len) if len > 0 len = rest.length % 3 cur = __push_code_to_data(cur, data, :F, len) if len > 0 end @data = data self end private :__initialize_from_sequences_na_aa # Creates a new Gap object from given sequence alignment. # # Note that sites of which both reference and target are gaps # are silently removed. # # For incorrect alignments that break 3:1 rule, # gap positions will be moved inside codons, # unwanted gaps will be removed, and # some forward or reverse frameshift will be inserted. # # For example, # atgg-taagac-att # M V K - I # is treated as: # atggt>I # # Incorrect combination of frameshift with frameshift or gap # may cause undefined behavior. # # Forward frameshifts are recomended to be indicated in the # target sequence. # Reverse frameshifts can be indicated in the reference sequence # or the target sequence. # # Priority of regular expressions: # space > forward/reverse frameshift > gap # # --- # *Arguments*: # * _reference_: reference sequence (nucleotide sequence) # * _target_: target sequence (amino acid sequence) # * gap_regexp: regexp to identify gap # * space_regexp: regexp to identify space character which is completely ignored # * forward_frameshift_regexp: regexp to identify forward frameshift # * reverse_frameshift_regexp: regexp to identify reverse frameshift def self.new_from_sequences_na_aa(reference, target, gap_regexp = /[^a-zA-Z]/, space_regexp = /\s/, forward_frameshift_regexp = /\>/, reverse_frameshift_regexp = /\gap_char: gap character def process_sequences_na(reference, target, gap_char = '-') s_ref, s_tgt = dup_seqs(reference, target) s_ref, s_tgt = __process_sequences(s_ref, s_tgt, gap_char, gap_char, 1, 1, gap_char, gap_char) if $VERBOSE and s_ref.length != s_tgt.length then warn "returned sequences not equal length" end return s_ref, s_tgt end # Processes sequences and # returns gapped sequences as an array of sequences. # reference must be a nucleotide sequence, and # target must be an amino acid sequence. # # Note for reverse frameshift: # Reverse_frameshift characers are inserted in the # reference sequence. # For example, alignment of "Gap=M3 R1 M2" is: # atgaagatgap_char: gap character # * space_char: space character inserted to amino sequence for matching na-aa alignment # * forward_frameshift: forward frameshift character # * reverse_frameshift: reverse frameshift character def process_sequences_na_aa(reference, target, gap_char = '-', space_char = ' ', forward_frameshift = '>', reverse_frameshift = '<') s_ref, s_tgt = dup_seqs(reference, target) s_tgt = s_tgt.gsub(/./, "\\0#{space_char}#{space_char}") ref_increment = 3 tgt_increment = 1 + space_char.length * 2 ref_gap = gap_char * 3 tgt_gap = "#{gap_char}#{space_char}#{space_char}" return __process_sequences(s_ref, s_tgt, ref_gap, tgt_gap, ref_increment, tgt_increment, forward_frameshift, reverse_frameshift) end end #class Gap private def parse_attributes(string) return [] if !string or string == '.' attr_pairs = [] string.split(';').each do |pair| key, value = pair.split('=', 2) key = unescape(key) values = value.to_s.split(',') case key when 'Target' values.collect! { |v| Target.parse(v) } when 'Gap' values.collect! { |v| Gap.parse(v) } else values.collect! { |v| unescape(v) } end attr_pairs.concat values.collect { |v| [ key, v ] } end return attr_pairs end # method parse_attributes # Return the attributes as a string as it appears at the end of # a GFF3 line def attributes_to_s(attr) return '.' if !attr or attr.empty? keys = [] hash = {} attr.each do |pair| key = pair[0] val = pair[1] keys.push key unless hash[key] hash[key] ||= [] hash[key].push val end keys.collect do |key| values = hash[key] val = values.collect do |v| if v.kind_of?(Target) then v.to_s else escape_attribute(v.to_s) end end.join(',') "#{escape_attribute(key)}=#{val}" end.join(';') end end # class GFF3::Record # This is a dummy record corresponding to the "###" metadata. class RecordBoundary < GFF3::Record def initialize(*arg) super(*arg) self.freeze end def to_s "###\n" end end #class RecordBoundary # stores GFF3 MetaData MetaData = GFF2::MetaData # parses metadata def parse_metadata(directive, line) case directive when 'gff-version' @gff_version ||= line.split(/\s+/)[1] when 'FASTA' @in_fasta = true when 'sequence-region' @sequence_regions.push SequenceRegion.parse(line) when '#' # "###" directive @records.push RecordBoundary.new else @metadata.push MetaData.parse(line) end true end private :parse_metadata end #class GFF3 end # class GFF end # module Bio bio-1.4.3.0001/lib/bio/db/biosql/0000755000004100000410000000000012200110570016060 5ustar www-datawww-databio-1.4.3.0001/lib/bio/db/biosql/biosql_to_biosequence.rb0000644000004100000410000000331612200110570022765 0ustar www-datawww-data# # = bio/db/biosql/biosql_to_biosequence.rb - Bio::SQL::Sequence to Bio::Sequence adapter module # # Copyright:: Copyright (C) 2008 # Naohisa Goto , # Raoul Jean Pierre Bonnal # License:: The Ruby License # # $Id:$ # require 'bio/sequence' require 'bio/sequence/adapter' # Internal use only. Normal users should not use this module. # # Bio::SQL::Sequence to Bio::Sequence adapter module. # It is internally used in Bio::SQL::Sequence#to_biosequence. # module Bio::Sequence::Adapter::BioSQL extend Bio::Sequence::Adapter private def_biosequence_adapter :seq def_biosequence_adapter :entry_id def_biosequence_adapter :primary_accession def_biosequence_adapter :secondary_accessions def_biosequence_adapter :molecule_type #-- #TODO: identify where is stored data_class in biosql #++ def_biosequence_adapter :data_class def_biosequence_adapter :definition, :description def_biosequence_adapter :topology def_biosequence_adapter :date_created def_biosequence_adapter :date_modified #do |bs| # Date.parse(bs.date_modified.to_s).strftime("%d-%b-%Y").upcase # end def_biosequence_adapter :division def_biosequence_adapter :sequence_version def_biosequence_adapter :keywords def_biosequence_adapter :species def_biosequence_adapter :classification, :taxonomy def_biosequence_adapter :references def_biosequence_adapter :features def_biosequence_adapter :comments def_biosequence_adapter :other_seqids do |orig| orig.identifier.split(',').collect do |dblink| database, id = dblink.split(':') Bio::Sequence::DBLink.new(database,id) end end end #module Bio::Sequence::Adapter::BioSQL bio-1.4.3.0001/lib/bio/db/biosql/sequence.rb0000644000004100000410000004262112200110570020222 0ustar www-datawww-data #TODO save on db reading from a genbank or embl object module Bio class SQL class Sequence private # example # bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type' # this function creates other 3 functions, molecule_type, molecule_type=, molecule_type_update #molecule_type => return an array of strings, where each string is the value associated with the qualifier, ordered by rank. #molecule_type=value add a bioentry_qualifier value to the table #molecule_type_update(value, rank) update an entry of the table with an existing rank #the method inferr the qualifier term from the name of the first symbol, or you can specify a synonym to use #creating an object with to_biosql is transaction safe. #TODO: implement setting for more than a qualifier-vale. def self.bioentry_qualifier_anchor(sym, *args) options = args.first || Hash.new #options.assert_valid_keys(:rank,:synonym,:multi) method_reader = sym.to_s.to_sym method_writer_operator = (sym.to_s+"=").to_sym method_writer_modder = (sym.to_s+"_update").to_sym synonym = options[:synonym].nil? ? sym.to_s : options[:synonym] #DELETE #Bio::SQL::Term.create(:name=>synonym, :ontology=> Bio::SQL::Ontology.find_by_name('Annotation Tags')) unless Bio::SQL::Term.exists?(:name =>synonym) send :define_method, method_reader do #return an array of bioentry_qualifier_values begin #DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'}) term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])}) bioentry_qualifier_values = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id]) data = bioentry_qualifier_values.map{|row| row.value} unless bioentry_qualifier_values.nil? begin # this block try to check if the data retrived is a # Date or not and change it according to GenBank/EMBL format # in that case return a string # otherwise the [] Date.parse(data.to_s).strftime("%d-%b-%Y").upcase rescue ArgumentError, TypeError, NoMethodError, NameError data end rescue Exception => e puts "Reader Error: #{synonym} #{e.message}" end end send :define_method, method_writer_operator do |value| begin #DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'}) term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])}) datas = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id]) #add an element incrementing the rank or setting the first to 1 be_qu_va=@entry.bioentry_qualifier_values.build({:term=>term, :rank=>(datas.empty? ? 1 : datas.last.rank.succ), :value=>value}) be_qu_va.save rescue Exception => e puts "WriterOperator= Error: #{synonym} #{e.message}" end end send :define_method, method_writer_modder do |value, rank| begin #DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'}) term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])}) data = @entry.bioentry_qualifier_values.all(:term_id=>term.term_id, :rank=>rank) if data.nil? send method_writer_operator, value else data.value=value data.save end rescue Exception => e puts "WriterModder Error: #{synonym} #{e.message}" end end end public attr_reader :entry def delete #TODO: check is references connected to this bioentry are leaf or not. #actually I think it should be more sofisticated, check if there are #other bioentries connected to references; if not delete 'em @entry.references.each { |ref| ref.delete if ref.bioentries.size==1} @entry.destroy end def get_seqfeature(sf) #in seqfeature BioSQL class locations_str = sf.locations.map{|loc| loc.to_s}.join(',') #pp sf.locations.inspect locations_str = "join(#{locations_str})" if sf.locations.count>1 Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)}) end def length=(len) @entry.biosequence.length=len end def initialize(options={}) #options.assert_valid_keys(:entry, :biodatabase,:biosequence) return @entry = options[:entry] unless options[:entry].nil? return to_biosql(options[:biosequence], options[:biodatabase]) unless options[:biosequence].nil? or options[:biodatabase].nil? end def to_biosql(bs,biodatabase) #DELETE #Transcaction works greatly!!! begin #DELETE Bioentry.transaction do @entry = biodatabase.bioentries.build({:name=>bs.entry_id}) puts "primary" if $DEBUG self.primary_accession = bs.primary_accession puts "def" if $DEBUG self.definition = bs.definition unless bs.definition.nil? puts "seqver" if $DEBUG self.sequence_version = bs.sequence_version || 0 puts "divi" if $DEBUG self.division = bs.division unless bs.division.nil? puts "identifier" if $DEBUG self.identifier = bs.other_seqids.collect{|dblink| "#{dblink.database}:#{dblink.id}"}.join(';') unless bs.other_seqids.nil? @entry.save puts "secacc" if $DEBUG bs.secondary_accessions.each do |sa| puts "#{sa}" if $DEBUG #write as qualifier every secondary accession into the array self.secondary_accessions = sa end unless bs.secondary_accessions.nil? #to create the sequence entry needs to exists puts "seq" if $DEBUG puts bs.seq if $DEBUG self.seq = bs.seq unless bs.seq.nil? puts "mol" if $DEBUG self.molecule_type = bs.molecule_type unless bs.molecule_type.nil? puts "dc" if $DEBUG self.data_class = bs.data_class unless bs.data_class.nil? puts "top" if $DEBUG self.topology = bs.topology unless bs.topology.nil? puts "datec" if $DEBUG self.date_created = bs.date_created unless bs.date_created.nil? puts "datemod" if $DEBUG self.date_modified = bs.date_modified unless bs.date_modified.nil? puts "key" if $DEBUG bs.keywords.each do |kw| #write as qualifier every secondary accessions into the array self.keywords = kw end unless bs.keywords.nil? puts "spec" if $DEBUG #self.species = bs.species unless bs.species.nil? self.species = bs.species unless bs.species.empty? puts "Debug: #{bs.species}" if $DEBUG puts "Debug: feat..start" if $DEBUG bs.features.each do |feat| self.feature=feat end unless bs.features.nil? puts "Debug: feat...end" if $DEBUG bs.references.each do |reference| self.reference=reference end unless bs.references.nil? bs.comments.each do |comment| self.comment=comment end unless bs.comments.nil? #DELETE end #transaction return self rescue Exception => e puts "to_biosql exception: #{e}" puts $! end #rescue end #to_biosql def name @entry.name end alias entry_id name def name=(value) @entry.name=value end alias entry_id= name= def primary_accession @entry.accession end def primary_accession=(value) @entry.accession=value end #TODO def secondary_accession # @entry.bioentry_qualifier_values # end def organism @entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '') end alias species organism def organism=(value) #FIX there is a shortcut taxon_name=TaxonName.first(:conditions=>["name = ? and name_class = ?",value.gsub(/\s+\(.+\)/,''),'scientific name']) if taxon_name.nil? puts "Error value doesn't exists in taxon_name table with scientific name constraint." else @entry.taxon_id=taxon_name.taxon_id @entry.save end end alias species= organism= def database @entry.biodatabase.name end def database_desc @entry.biodatabase.description end def version @entry.version end alias sequence_version version def version=(value) @entry.version=value end alias sequence_version= version= def division @entry.division end def division=(value) @entry.division=value end def description @entry.description end alias definition description def description=(value) @entry.description=value end alias definition= description= def identifier @entry.identifier end alias other_seqids identifier def identifier=(value) @entry.identifier=value end bioentry_qualifier_anchor :data_class bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type' bioentry_qualifier_anchor :topology bioentry_qualifier_anchor :date_created bioentry_qualifier_anchor :date_modified, :synonym=>'date_changed' bioentry_qualifier_anchor :keywords, :synonym=>'keyword' bioentry_qualifier_anchor :secondary_accessions, :synonym=>'secondary_accession' def features @entry.seqfeatures.collect do |sf| self.get_seqfeature(sf) end end def feature=(feat) #ToDo: avoid Ontology find here, probably more efficient create class variables #DELETE type_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Keys'}) puts "feature:type_term = #{feat.feature}" if $DEBUG type_term = Term.first(:conditions=>["name = ?", feat.feature]) || Term.create({:name=>feat.feature, :ontology=>Ontology.first(:conditions=>["name = ?",'SeqFeature Keys'])}) #DELETE source_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Sources'}) puts "feature:source_term" if $DEBUG source_term = Term.first(:conditions=>["name = ?",'EMBLGenBankSwit']) puts "feature:seqfeature" if $DEBUG seqfeature = @entry.seqfeatures.build({:source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>''}) seqfeature.save puts "feature:location" if $DEBUG feat.locations.each do |loc| location = seqfeature.locations.build({:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ}) location.save end #DELETE qual_term_ontology = Ontology.find_or_create({:name=>'Annotation Tags'}) puts "feature:qualifier" if $DEBUG feat.each do |qualifier| #DELETE qual_term = Term.find_or_create({:name=>qualifier.qualifier}, {:ontology=>qual_term_ontology}) qual_term = Term.first(:conditions=>["name = ?", qualifier.qualifier]) || Term.create({:name=>qualifier.qualifier, :ontology=>Ontology.first(:conditions=>["name = ?", 'Annotation Tags'])}) qual = seqfeature.seqfeature_qualifier_values.build({:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ}) qual.save end end #return the seqfeature mapped from BioSQL with a type_term like 'CDS' def cdsfeatures @entry.cdsfeatures end # Returns the sequence. # Returns a Bio::Sequence::Generic object. def seq s = @entry.biosequence Bio::Sequence::Generic.new(s ? s.seq : '') end def seq=(value) #TODO: revise this piece of code. #chk which type of alphabet is, NU/NA/nil if @entry.biosequence.nil? #DELETE puts "intoseq1" @entry.biosequence = Biosequence.new(:seq=>value) # biosequence = @entry.biosequence.build({:seq=>value}) @entry.biosequence.save # biosequence.save else @entry.biosequence.seq=value end self.length=value.length #DELETE #@entry.biosequence.length=value.length #DELETE #break @entry.save end #report parents and exclude info with "no rank". Now I report rank == class but ... Question ? Have to be reported taxonomy with rank=="class"? def taxonomy tax = [] taxon = Taxon.first(:conditions=>["taxon_id = ?",@entry.taxon.parent_taxon_id]) while taxon and taxon.taxon_id != taxon.parent_taxon_id and taxon.node_rank!='no rank' tax << taxon.taxon_scientific_name.name if taxon.node_rank!='class' #Note: I don't like this call very much, correct with a relationship in the ref class. taxon = Taxon.first(:conditions=>["taxon_id = ?",taxon.parent_taxon_id]) end tax.reverse end def length @entry.biosequence.length end def references #return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"] #probably would be better to d a class refrence to collect these informations @entry.bioentry_references.collect do |bio_ref| hash = Hash.new hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/) hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos) hash['title'] = bio_ref.reference.title hash['embl_gb_record_number'] = bio_ref.rank #TODO: solve the problem with specific comment per reference. #TODO: get dbxref #take a look when location is build up in def reference=(value) bio_ref.reference.location.split('|').each do |element| key,value=element.split('=') hash[key]=value end unless bio_ref.reference.location.nil? hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : '' Bio::Reference.new(hash) end end def comments @entry.comments.map do |comment| comment.comment_text end end def reference=(value) locations=Array.new locations << "journal=#{value.journal}" unless value.journal.empty? locations << "volume=#{value.volume}" unless value.volume.empty? locations << "issue=#{value.issue}" unless value.issue.empty? locations << "pages=#{value.pages}" unless value.pages.empty? locations << "year=#{value.year}" unless value.year.empty? locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty? locations << "medline=#{value.medline}" unless value.medline.empty? locations << "doi=#{value.doi}" unless value.doi.nil? locations << "abstract=#{value.abstract}" unless value.abstract.empty? locations << "url=#{value.url}" unless value.url.nil? locations << "mesh=#{value.mesh}" unless value.mesh.empty? locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty? locations << "comments=#{value.comments.join('~')}"unless value.comments.nil? start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil] reference= Reference.first(:conditions=>["title = ?",value.title]) || Reference.create({:title=>value.title,:authors=>value.authors.join(' '), :location=>locations.join('|')}) bio_reference=@entry.bioentry_references.build({:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos}) bio_reference.save end def comment=(value) #DELETE comment=Comment.new({:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ}) comment = @entry.comments.build({:comment_text=>value, :rank=>@entry.comments.count.succ}) comment.save end def save #I should add chks for SQL errors @entry.biosequence.save @entry.save end def to_fasta ">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n") end def to_fasta_reverse_complememt ">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n") end def to_biosequence Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL) end end #Sequence end #SQL end #Bio bio-1.4.3.0001/lib/bio/db/transfac.rb0000644000004100000410000001361712200110570016725 0ustar www-datawww-data# # = bio/db/transfac.rb - TRANSFAC database class # # Copyright:: Copyright (C) 2001 # Shuichi Kawashima # License:: The Ruby License # # $Id: transfac.rb,v 1.12 2007/04/05 23:35:40 trevor Exp $ # require "bio/db" require "matrix" module Bio class TRANSFAC < EMBLDB DELIMITER = RS = "\n//\n" TAGSIZE = 4 def initialize(entry) super(entry, TAGSIZE) end # AC Accession number (1 per entry) # # AC T00001 in the case of FACTOR # AC M00001 in the case of MATRIX # AC R00001 in the case of SITE # AC G000001 in the case of GENE # AC C00001 in the case of CLASS # AC 00001 in the case of CELL # def ac unless @data['AC'] @data['AC'] = fetch('AC') end @data['AC'] end alias entry_id ac # DT Date (1 per entry) # # DT DD.MM.YYYY (created); ewi. # DT DD.MM.YYYY (updated); mpr. # def dt field_fetch('DT') end alias date dt def cc field_fetch('CC') end alias comment cc def os field_fetch('OS') end alias org_species os def oc field_fetch('OC') end alias org_class oc def rn field_fetch('RN') end alias ref_no rn def ra field_fetch('RA') end alias ref_authors ra def rt field_fetch('RT') end alias ref_title rt def rl field_fetch('RL') end alias ref_data rl class MATRIX < TRANSFAC def initialize(entry) super(entry) end # NA Name of the binding factor def na field_fetch('NA') end # DE Short factor description def de field_fetch('DE') end # BF List of linked factor entries def bf field_fetch('bf') end def ma ma_dat = {} ma_ary = [] key = '' @orig.each do |k, v| if k =~ /^0*(\d+)/ key = $1.to_i ma_dat[key] = fetch(k) unless ma_dat[key] end end ma_dat.keys.sort.each_with_index do |k, i| rep_nt = ma_dat[k].slice!(-1, 1) ma_dat[k].slice!(-1, 1) ma_ary[i] = ma_dat[k].split(/\s+/) ma_ary[i].each_with_index do |x, j| ma_ary[i][j] = x.to_i end end Matrix[*ma_ary] end # BA Statistical basis def ba field_fetch('BA') end end class SITE < TRANSFAC def initialize(entry) super(entry) end def ty field_fetch('TY') end def de field_fetch('DE') end def re field_fetch('RE') end def sq field_fetch('SQ') end def el field_fetch('EL') end def sf field_fetch('SF') end def st field_fetch('ST') end def s1 field_fetch('S1') end def bf field_fetch('BF') end def so field_fetch('SO') end def mm field_fetch('MM') end # DR Cross-references to other databases (>=0 per entry) def dr field_fetch('DR') end end class FACTOR < TRANSFAC def initialize(entry) super(entry) end # FA Factor name def fa field_fetch('FA') end # SY Synonyms def sy field_fetch('SY') end # DR Cross-references to other databases (>=0 per entry) def dr field_fetch('DR') end # HO Homologs (suggested) def ho field_fetch('HO') end # CL Classification (class accession no.; class identifier; decimal # CL classification number.) def cl field_fetch('CL') end # SZ Size (length (number of amino acids); calculated molecular mass # SZ in kDa; experimental molecular mass (or range) in kDa # SZ (experimental method) [Ref] def sz field_fetch('SZ') end # SQ Sequence def sq field_fetch('SQ') end # SC Sequence comment, i. e. source of the protein sequence def sc field_fetch('SC') end # FT Feature table (1st position last position feature) def ft field_fetch('FT') end # SF Structural features def sf field_fetch('SF') end # CP Cell specificity (positive) def cp field_fetch('CP') end # CN Cell specificity (negative) def cn field_fetch('CN') end # FF Functional features def ff field_fetch('FF') end # IN Interacting factors (factor accession no.; factor name; # IN biological species.) def in field_fetch('IN') end # MX Matrix (matrix accession no.; matrix identifier) def mx field_fetch('MX') end # BS Bound sites (site accession no.; site ID; quality: N; biological # BS species) def bs field_fetch('BS') end end class CELL < TRANSFAC def initialize(entry) super(entry) end # CD Cell description def cd field_fetch('CD') end end class CLASS < TRANSFAC def initialize(entry) super(entry) end # CL Class def cl field_fetch('CL') end # SD Structure description def sd field_fetch('SD') end # BF Factors belonging to this class def bf field_fetch('BF') end # DR PROSITE accession numbers def dr field_fetch('DR') end end class GENE < TRANSFAC def initialize(entry) super(entry) end # SD Short description/name of the gene def sd field_fetch('SD') end # DE def de field_fetch('DE') end # BC Bucher promoter def bc field_fetch('BC') end # BS TRANSFAC SITE positions and accession numbers def bs field_fetch('BS') end # CO COMPEL accession number def co field_fetch('CO') end # TR TRRD accession number def tr field_fetch('TR') end end end # class TRANSFAC end # module Bio bio-1.4.3.0001/lib/bio/db/kegg/0000755000004100000410000000000012200110570015504 5ustar www-datawww-databio-1.4.3.0001/lib/bio/db/kegg/common.rb0000644000004100000410000001514412200110570017326 0ustar www-datawww-data# # = bio/db/kegg/common.rb - Common methods for KEGG database classes # # Copyright:: Copyright (C) 2001-2007 Toshiaki Katayama # Copyright:: Copyright (C) 2003 Masumi Itoh # Copyright:: Copyright (C) 2009 Kozo Nishida # License:: The Ruby License # # # # == Description # # Note that the modules in this file are intended to be Bio::KEGG::* # internal use only. # # This file contains modules that implement methods commonly used from # KEGG database parser classes. # module Bio class KEGG # Namespace for methods commonly used in the Bio::KEGG::* classes. module Common # The module provides references method. module References # REFERENCE -- Returns contents of the REFERENCE records as an Array of # Bio::Reference objects. def references unless @data['REFERENCE'] ary = [] toptag2array(get('REFERENCE')).each do |ref| hash = Hash.new subtag2array(ref).each do |field| case tag_get(field) when /REFERENCE/ cmnt = tag_cut(field).chomp if /^\s*PMID\:(\d+)\s*/ =~ cmnt then hash['pubmed'] = $1 cmnt = $' end if cmnt and !cmnt.empty? then hash['comments'] ||= [] hash['comments'].push(cmnt) end when /AUTHORS/ authors = truncate(tag_cut(field)) authors = authors.split(/\, /) authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1] authors = authors.flatten.map { |a| a.sub(',', ', ') } hash['authors'] = authors when /TITLE/ hash['title'] = truncate(tag_cut(field)) when /JOURNAL/ journal = truncate(tag_cut(field)) case journal # KEGG style when /(.*) (\d*(?:\([^\)]+\))?)\:(\d+\-\d+) \((\d+)\)$/ hash['journal'] = $1 hash['volume'] = $2 hash['pages'] = $3 hash['year'] = $4 # old KEGG style when /(.*) (\d+):(\d+\-\d+) \((\d+)\) \[UI:(\d+)\]$/ hash['journal'] = $1 hash['volume'] = $2 hash['pages'] = $3 hash['year'] = $4 hash['medline'] = $5 # Only journal name and year are available when /(.*) \((\d+)\)$/ hash['journal'] = $1 hash['year'] = $2 else hash['journal'] = journal end end end ary.push(Reference.new(hash)) end @data['REFERENCE'] = ary #.extend(Bio::References::BackwardCompatibility) end @data['REFERENCE'] end end #module References # The module providing dblinks_as_hash methods. # # Bio::KEGG::* internal use only. module DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in # DBLINKS field. def dblinks_as_hash unless defined? @dblinks_as_hash hash = {} dblinks_as_strings.each do |line| db, ids = line.split(/\:\s*/, 2) list = ids.split(/\s+/) hash[db] = list end @dblinks_as_hash = hash end @dblinks_as_hash end end #module DblinksAsHash # The module providing pathways_as_hash method. # # Bio::KEGG::* internal use only. module PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash unless defined? @pathways_as_hash then hash = {} pathways_as_strings.each do |line| line = line.sub(/\APATH\:\s+/, '') entry_id, name = line.split(/\s+/, 2) hash[entry_id] = name end @pathways_as_hash = hash end @pathways_as_hash end end #module PathwaysAsHash # This module provides orthologs_as_hash method. # # Bio::KEGG::* internal use only. module OrthologsAsHash # Returns a Hash of the orthology ID and definition in ORTHOLOGY field. def orthologs_as_hash unless defined? @orthologs_as_hash kos = {} orthologs_as_strings.each do |line| ko = line.sub(/\AKO\:\s+/, '') entry_id, definition = ko.split(/\s+/, 2) kos[entry_id] = definition end @orthologs_as_hash = kos end @orthologs_as_hash end end #module OrthologsAsHash # This module provides genes_as_hash method. # # Bio::KEGG::* internal use only. module GenesAsHash # Returns a Hash of the organism ID and an Array of entry IDs in # GENES field. def genes_as_hash unless defined? @genes_as_hash hash = {} genes_as_strings.each do |line| name, *list = line.split(/\s+/) org = name.downcase.sub(/:/, '') genes = list.map {|x| x.sub(/\(.*\)/, '')} #names = list.map {|x| x.scan(/.*\((.*)\)/)} hash[org] = genes end @genes_as_hash = hash end @genes_as_hash end end #module GenesAsHash # This module provides modules_as_hash method. # # Bio::KEGG::* internal use only. module ModulesAsHash # Returns MODULE field as a Hash. # Each key of the hash is KEGG MODULE ID, # and each value is the name of the Pathway Module. # --- # *Returns*:: Hash def modules_as_hash unless defined? @modules_s_as_hash then hash = {} modules_as_strings.each do |line| entry_id, name = line.split(/\s+/, 2) hash[entry_id] = name end @modules_as_hash = hash end @modules_as_hash end end #module ModulesAsHash # This module provides strings_as_hash private method. # # Bio::KEGG::* internal use only. module StringsAsHash # (Private) Creates a hash from lines. # Each line is consisted of two components, ID and description, # separated with spaces. IDs must be unique with each other. def strings_as_hash(lines) hash = {} lines.each do |line| entry_id, definition = line.split(/\s+/, 2) hash[entry_id] = definition end return hash end private :strings_as_hash end #module StringsAsHash end #module Common end #class KEGG end #module Bio bio-1.4.3.0001/lib/bio/db/kegg/reaction.rb0000644000004100000410000000621012200110570017634 0ustar www-datawww-data# # = bio/db/kegg/reaction.rb - KEGG REACTION database class # # Copyright:: Copyright (C) 2004 Toshiaki Katayama # Copyright:: Copyright (C) 2009 Kozo Nishida # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' require 'enumerator' module Bio class KEGG class REACTION < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash include Common::OrthologsAsHash # Returns a Hash of the orthology ID and definition in ORTHOLOGY field. def orthologs_as_hash; super; end if false #dummy for RDoc alias orthologs orthologs_as_hash # Creates a new Bio::KEGG::REACTION object. # --- # *Arguments*: # * (required) _entry_: (String) single entry as a string # *Returns*:: Bio::KEGG::REACTION object def initialize(entry) super(entry, TAGSIZE) end # ID of the entry, described in the ENTRY line. # --- # *Returns*:: String def entry_id field_fetch('ENTRY')[/\S+/] end # Name of the reaction, described in the NAME line. # --- # *Returns*:: String def name field_fetch('NAME') end # Definition of the reaction, described in the DEFINITION line. # --- # *Returns*:: String def definition field_fetch('DEFINITION') end # Chemical equation, described in the EQUATION line. # --- # *Returns*:: String def equation field_fetch('EQUATION') end # KEGG RPAIR (ReactantPair) information, described in the RPAIR lines. # --- # *Returns*:: Array containing String def rpairs_as_strings lines_fetch('RPAIR') end # KEGG RPAIR (ReactantPair) information, described in the RPAIR lines. # Returns a hash of RPair IDs and [ name, type ] informations, for example, # { "RP12733" => [ "C00022_C00900", "trans" ], # "RP05698" => [ "C00011_C00022", "leave" ], # "RP00440" => [ "C00022_C00900", "main" ] # } # --- # *Returns*:: Hash def rpairs_as_hash unless defined? @rpairs_as_hash rps = {} rpairs_as_strings.each do |line| namespace, entry_id, name, rptype = line.split(/\s+/) rps[entry_id] = [ name, rptype ] end @rpairs_as_hash = rps end @rpairs_as_hash end alias rpairs rpairs_as_hash # Returns the content of the RPAIR entry as tokens # (RPair signature, RPair ID, , RPair type). # --- # *Returns*:: Array containing String def rpairs_as_tokens fetch('RPAIR').split(/\s+/) end # Pathway information, described in the PATHWAY lines. # --- # *Returns*:: Array containing String def pathways_as_strings lines_fetch('PATHWAY') end # Enzymes described in the ENZYME line. # --- # *Returns*:: Array containing String def enzymes unless @data['ENZYME'] @data['ENZYME'] = fetch('ENZYME').scan(/\S+/) end @data['ENZYME'] end # Orthologs described in the ORTHOLOGY lines. # --- # *Returns*:: Array containing String def orthologs_as_strings lines_fetch('ORTHOLOGY') end end # REACTION end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/orthology.rb0000644000004100000410000000634212200110570020064 0ustar www-datawww-data# # = bio/db/kegg/orthology.rb - KEGG ORTHOLOGY database class # # Copyright:: Copyright (C) 2003-2007 Toshiaki Katayama # Copyright:: Copyright (C) 2003 Masumi Itoh # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG # == Description # # KO (KEGG Orthology) entry parser. # # == References # # * http://www.genome.jp/dbget-bin/get_htext?KO # * ftp://ftp.genome.jp/pub/kegg/genes/ko # class ORTHOLOGY < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::GenesAsHash # Returns a Hash of the organism ID and an Array of entry IDs in GENES field. def genes_as_hash; super; end if false #dummy for RDoc alias genes genes_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash include Common::ModulesAsHash # Returns MODULE field as a Hash. # Each key of the hash is KEGG MODULE ID, # and each value is the name of the Pathway Module. # --- # *Returns*:: Hash def modules_as_hash; super; end if false #dummy for RDoc alias modules modules_as_hash include Common::References # REFERENCE -- Returns contents of the REFERENCE records as an Array of # Bio::Reference objects. # --- # *Returns*:: an Array containing Bio::Reference objects def references; super; end if false #dummy for RDoc # Reads a flat file format entry of the KO database. def initialize(entry) super(entry, TAGSIZE) end # Returns ID of the entry. def entry_id field_fetch('ENTRY')[/\S+/] end # Returns NAME field of the entry. def name field_fetch('NAME') end # Returns an Array of names in NAME field. def names name.split(', ') end # Returns DEFINITION field of the entry. def definition field_fetch('DEFINITION') end # Returns CLASS field of the entry. def keggclass field_fetch('CLASS') end # Returns an Array of biological classes in CLASS field. def keggclasses keggclass.gsub(/ \[[^\]]+/, '').split(/\] ?/) end # Pathways described in the PATHWAY field. # --- # *Returns*:: Array containing String def pathways_as_strings lines_fetch('PATHWAY') end # *OBSOLETE* Do not use this method. # Because KEGG ORTHOLOGY format is changed and PATHWAY field is added, # older "pathways" method is renamed and remain only for compatibility. # # Returns an Array of KEGG/PATHWAY ID in CLASS field. def pathways_in_keggclass keggclass.scan(/\[PATH:(.*?)\]/).flatten end # Returns MODULE field of the entry. # --- # *Returns*:: Array containing String objects def modules_as_strings lines_fetch('MODULE') end # Returns an Array of a database name and entry IDs in DBLINKS field. def dblinks_as_strings lines_fetch('DBLINKS') end # Returns an Array of the organism ID and entry IDs in GENES field. def genes_as_strings lines_fetch('GENES') end end # ORTHOLOGY end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/taxonomy.rb0000644000004100000410000002100312200110570017703 0ustar www-datawww-data# # = bio/db/kegg/taxonomy.rb - KEGG taxonomy parser class # # Copyright:: Copyright (C) 2007 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # module Bio class KEGG # == Description # # Parse the KEGG 'taxonomy' file which describes taxonomic classification # of organisms. # # == References # # The KEGG 'taxonomy' file is available at # # * ftp://ftp.genome.jp/pub/kegg/genes/taxonomy # class Taxonomy def initialize(filename, orgs = []) # Stores the taxonomic tree as a linked list (implemented in Hash), so # every node need to have unique name (key) to work correctly @tree = Hash.new # Also stores the taxonomic tree as a list of arrays (full path) @path = Array.new # Also stores all leaf nodes (organism codes) of every intermediate nodes @leaves = Hash.new # tentative name for the root node (use accessor to change) @root = 'Genes' hier = Array.new level = 0 label = nil File.open(filename).each do |line| next if line.strip.empty? # line for taxonomic hierarchy (indent according to the number of # marks) if line[/^#/] level = line[/^#+/].length label = line[/[A-z].*/] hier[level] = sanitize(label) # line for organims name (unify different strains of a species) else tax, org, name, desc = line.chomp.split("\t") if orgs.nil? or orgs.empty? or orgs.include?(org) species, strain, = name.split('_') # (0) Grouping of the strains of the same species. # If the name of species is the same as the previous line, # add the species to the same species group. # ex. Gamma/enterobacteria has a large number of organisms, # so sub grouping of strains is needed for E.coli strains etc. # # However, if the species name is already used, need to avoid # collision of species name as the current implementation stores # the tree as a Hash, which may cause the infinite loop. # # (1) If species name == the intermediate node of other lineage # Add '_sp' to the species name to avoid the conflict (1-1), and if # 'species_sp' is already taken, use 'species_strain' instead (1-2). # ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd # Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn # -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd # Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn # # (2) If species name == the intermediate node of the same lineage # Add '_sp' to the species name to avoid the conflict. # ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya # Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya # Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_MC1/mgm # -> Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya # Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya # Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_sp/mgm sp_group = "#{species}_sp" if @tree[species] if hier[level+1] == species # case (0) else # case (1-1) species = sp_group # case (1-2) if @tree[sp_group] and hier[level+1] != species species = name end end else if hier[level] == species # case (2) species = sp_group end end # 'hier' is an array of the taxonomic tree + species and strain name. # ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] + # [S_cerevisiae, sce] hier[level+1] = species # sanitize(species) hier[level+2] = org ary = hier[1, level+2] warn ary.inspect if $DEBUG add_to_tree(ary) add_to_leaves(ary) add_to_path(ary) end end end return tree end attr_reader :tree attr_reader :path attr_reader :leaves attr_accessor :root def organisms(group) @leaves[group] end # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node # and every intermediate nodes stores their child nodes as a Hash. def add_to_tree(ary) parent = @root ary.each do |node| @tree[parent] ||= Hash.new @tree[parent][node] = nil parent = node end end # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node # and stores leaf nodes to the every intermediate nodes as an Array. def add_to_leaves(ary) leaf = ary.last ary.each do |node| @leaves[node] ||= Array.new @leaves[node] << leaf end end # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node # and stores the path itself in an Array. def add_to_path(ary) @path << ary end # Compaction of intermediate nodes of the resulted taxonomic tree. # - If child node has only one child node (grandchild), make the child of # grandchild as a grandchild. # ex. # Plants / Monocotyledons / grass family / osa # --> Plants / Monocotyledons / osa # def compact(node = root) # if the node has children if subnodes = @tree[node] # obtain grandchildren for each child subnodes.keys.each do |subnode| if subsubnodes = @tree[subnode] # if the number of grandchild node is 1 if subsubnodes.keys.size == 1 # obtain the name of the grandchild node subsubnode = subsubnodes.keys.first # obtain the child of the grandchlid node if subsubsubnodes = @tree[subsubnode] # make the child of grandchild node as a chlid of child node @tree[subnode] = subsubsubnodes # delete grandchild node @tree[subnode].delete(subsubnode) warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG # retry until new grandchild also needed to be compacted. retry end end end # repeat recurseively compact(subnode) end end end # Reduction of the leaf node of the resulted taxonomic tree. # - If the parent node have only one leaf node, replace parent node # with the leaf node. # ex. # Plants / Monocotyledons / osa # --> Plants / osa # def reduce(node = root) # if the node has children if subnodes = @tree[node] # obtain grandchildren for each child subnodes.keys.each do |subnode| if subsubnodes = @tree[subnode] # if the number of grandchild node is 1 if subsubnodes.keys.size == 1 # obtain the name of the grandchild node subsubnode = subsubnodes.keys.first # if the grandchild node is a leaf node unless @tree[subsubnode] # make the grandchild node as a child node @tree[node].update(subsubnodes) # delete child node @tree[node].delete(subnode) warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG end end end # repeat recursively reduce(subnode) end end end # Traverse the taxonomic tree by the depth first search method # under the given (root or intermediate) node. def dfs(parent, &block) if children = @tree[parent] yield parent, children children.keys.each do |child| dfs(child, &block) end end end # Similar to the dfs method but also passes the current level of the nest # to the iterator. def dfs_with_level(parent, &block) @level ||= 0 if children = @tree[parent] yield parent, children, @level @level += 1 children.keys.each do |child| dfs_with_level(child, &block) end @level -= 1 end end # Convert the taxonomic tree structure to a simple ascii art. def to_s result = "#{@root}\n" @tree[@root].keys.each do |node| result += ascii_tree(node, " ") end return result end private # Helper method for the to_s method. def ascii_tree(node, indent) result = "#{indent}+- #{node}\n" indent += " " @tree[node].keys.each do |child| if @tree[child] result += ascii_tree(child, indent) else result += "#{indent}+- #{child}\n" end end return result end def sanitize(str) str.gsub(/[^A-z0-9]/, '_') end end # Taxonomy end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/module.rb0000644000004100000410000000667012200110570017327 0ustar www-datawww-data# # = bio/db/kegg/module.rb - KEGG MODULE database class # # Copyright:: Copyright (C) 2010 Kozo Nishida # Copyright:: Copyright (C) 2010 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG # == Description # # Bio::KEGG::MODULE is a parser class for the KEGG MODULE database entry. # # == References # # * http://www.kegg.jp/kegg-bin/get_htext?ko00002.keg # * ftp://ftp.genome.jp/pub/kegg/pathway/module # class MODULE < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 #-- # for a private method strings_as_hash. #++ include Common::StringsAsHash # Creates a new Bio::KEGG::MODULE object. # --- # *Arguments*: # * (required) _entry_: (String) single entry as a string # *Returns*:: Bio::KEGG::MODULE object def initialize(entry) super(entry, TAGSIZE) end # Return the ID, described in the ENTRY line. # --- # *Returns*:: String def entry_id field_fetch('ENTRY')[/\S+/] end # Name of the module, described in the NAME line. # --- # *Returns*:: String def name field_fetch('NAME') end # Definition of the module, described in the DEFINITION line. # --- # *Returns*:: String def definition field_fetch('DEFINITION') end # Name of the KEGG class, described in the CLASS line. # --- # *Returns*:: String def keggclass field_fetch('CLASS') end # Pathways described in the PATHWAY lines. # --- # *Returns*:: Array containing String def pathways_as_strings lines_fetch('PATHWAY') end # Pathways described in the PATHWAY lines. # --- # *Returns*:: Hash of pathway ID and its definition def pathways_as_hash unless @pathways_as_hash @pathways_as_hash = strings_as_hash(pathways_as_strings) end @pathways_as_hash end alias pathways pathways_as_hash # Orthologs described in the ORTHOLOGY lines. # --- # *Returns*:: Array containing String def orthologs_as_strings lines_fetch('ORTHOLOGY') end # Orthologs described in the ORTHOLOGY lines. # --- # *Returns*:: Hash of orthology ID and its definition def orthologs_as_hash unless @orthologs_as_hash @orthologs_as_hash = strings_as_hash(orthologs_as_strings) end @orthologs_as_hash end alias orthologs orthologs_as_hash # All KO IDs in the ORTHOLOGY lines. # --- # *Returns*:: Array of orthology IDs def orthologs_as_array orthologs_as_hash.keys.map{|x| x.split(/\+|\-|,/)}.flatten.sort.uniq end # Reactions described in the REACTION lines. # --- # *Returns*:: Array containing String def reactions_as_strings lines_fetch('REACTION') end # Reactions described in the REACTION lines. # --- # *Returns*:: Hash of reaction ID and its definition def reactions_as_hash unless @reactions_as_hash @reactions_as_hash = strings_as_hash(reactions_as_strings) end @reactions_as_hash end alias reactions reactions_as_hash # Compounds described in the COMPOUND lines. # --- # *Returns*:: Array containing String def compounds_as_strings lines_fetch('COMPOUND') end # Compounds described in the COMPOUND lines. # --- # *Returns*:: Hash of compound ID and its definition def compounds_as_hash unless @compounds_as_hash @compounds_as_hash = strings_as_hash(compounds_as_strings) end @compounds_as_hash end alias compounds compounds_as_hash end # MODULE end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/genes.rb0000644000004100000410000002253112200110570017135 0ustar www-datawww-data# # = bio/db/kegg/genes.rb - KEGG/GENES database class # # Copyright:: Copyright (C) 2001, 2002, 2006, 2010 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # # == KEGG GENES parser # # See http://www.genome.jp/kegg/genes.html # # # === Examples # # require 'bio/io/fetch' # entry_string = Bio::Fetch.query('genes', 'b0002') # # entry = Bio::KEGG::GENES.new(entry_string) # # # ENTRY # p entry.entry # => Hash # # p entry.entry_id # => String # p entry.division # => String # p entry.organism # => String # # # NAME # p entry.name # => String # p entry.names # => Array # # # DEFINITION # p entry.definition # => String # p entry.eclinks # => Array # # # PATHWAY # p entry.pathway # => String # p entry.pathways # => Hash # # # POSITION # p entry.position # => String # p entry.chromosome # => String # p entry.gbposition # => String # p entry.locations # => Bio::Locations # # # MOTIF # p entry.motifs # => Hash of Array # # # DBLINKS # p entry.dblinks # => Hash of Array # # # STRUCTURE # p entry.structure # => Array # # # CODON_USAGE # p entry.codon_usage # => Hash # p entry.cu_list # => Array # # # AASEQ # p entry.aaseq # => Bio::Sequence::AA # p entry.aalen # => Fixnum # # # NTSEQ # p entry.ntseq # => Bio::Sequence::NA # p entry.naseq # => Bio::Sequence::NA # p entry.ntlen # => Fixnum # p entry.nalen # => Fixnum # module Bio autoload :Locations, 'bio/location' unless const_defined?(:Locations) autoload :Sequence, 'bio/sequence' unless const_defined?(:Sequence) require 'bio/db' require 'bio/db/kegg/common' class KEGG # == Description # # KEGG GENES entry parser. # # == References # # * http://www.genome.jp/kegg/genes.html # class GENES < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash include Common::OrthologsAsHash # Returns a Hash of the orthology ID and definition in ORTHOLOGY field. def orthologs_as_hash; super; end if false #dummy for RDoc alias orthologs orthologs_as_hash # Creates a new Bio::KEGG::GENES object. # --- # *Arguments*: # * (required) _entry_: (String) single entry as a string # *Returns*:: Bio::KEGG::GENES object def initialize(entry) super(entry, TAGSIZE) end # Returns the "ENTRY" line content as a Hash. # For example, # {"organism"=>"E.coli", "division"=>"CDS", "id"=>"b0356"} # # --- # *Returns*:: Hash def entry unless @data['ENTRY'] hash = Hash.new('') if get('ENTRY').length > 30 e = get('ENTRY') hash['id'] = e[12..29].strip hash['division'] = e[30..39].strip hash['organism'] = e[40..80].strip end @data['ENTRY'] = hash end @data['ENTRY'] end # ID of the entry, described in the ENTRY line. # --- # *Returns*:: String def entry_id entry['id'] end # Division of the entry, described in the ENTRY line. # --- # *Returns*:: String def division entry['division'] # CDS, tRNA etc. end # Organism name of the entry, described in the ENTRY line. # --- # *Returns*:: String def organism entry['organism'] # H.sapiens etc. end # Returns the NAME line. # --- # *Returns*:: String def name field_fetch('NAME') end # Names of the entry as an Array, described in the NAME line. # # --- # *Returns*:: Array containing String def names_as_array name.split(', ') end alias names names_as_array # The method will be deprecated. Use Bio::KEGG::GENES#names. # # Names of the entry as an Array, described in the NAME line. # # --- # *Returns*:: Array containing String def genes names_as_array end # The method will be deprecated. # Use entry.names.first instead. # # Returns the first gene name described in the NAME line. # --- # *Returns*:: String def gene genes.first end # Definition of the entry, described in the DEFINITION line. # --- # *Returns*:: String def definition field_fetch('DEFINITION') end # Enzyme's EC numbers shown in the DEFINITION line. # --- # *Returns*:: Array containing String def eclinks unless defined? @eclinks ec_list = definition.slice(/\[EC\:([^\]]+)\]/, 1) || definition.slice(/\(EC\:([^\)]+)\)/, 1) ary = ec_list ? ec_list.strip.split(/\s+/) : [] @eclinks = ary end @eclinks end # Orthologs described in the ORTHOLOGY lines. # --- # *Returns*:: Array containing String def orthologs_as_strings lines_fetch('ORTHOLOGY') end # Returns the PATHWAY lines as a String. # --- # *Returns*:: String def pathway unless defined? @pathway @pathway = fetch('PATHWAY') end @pathway end # Pathways described in the PATHWAY lines. # --- # *Returns*:: Array containing String def pathways_as_strings lines_fetch('PATHWAY') end # Returns CLASS field of the entry. def keggclass field_fetch('CLASS') end # Returns an Array of biological classes in CLASS field. def keggclasses keggclass.gsub(/ \[[^\]]+/, '').split(/\] ?/) end # The position in the genome described in the POSITION line. # --- # *Returns*:: String def position unless @data['POSITION'] @data['POSITION'] = fetch('POSITION').gsub(/\s/, '') end @data['POSITION'] end # Chromosome described in the POSITION line. # --- # *Returns*:: String or nil def chromosome if position[/:/] position.sub(/:.*/, '') elsif ! position[/\.\./] position else nil end end # The position in the genome described in the POSITION line # as GenBank feature table location formatted string. # --- # *Returns*:: String def gbposition position.sub(/.*?:/, '') end # The position in the genome described in the POSITION line # as Bio::Locations object. # --- # *Returns*:: Bio::Locations object def locations Bio::Locations.new(gbposition) end # Motif information described in the MOTIF lines. # --- # *Returns*:: Strings def motifs_as_strings lines_fetch('MOTIF') end # Motif information described in the MOTIF lines. # --- # *Returns*:: Hash def motifs_as_hash unless @data['MOTIF'] hash = {} db = nil motifs_as_strings.each do |line| if line[/^\S+:/] db, str = line.split(/:/, 2) else str = line end hash[db] ||= [] hash[db] += str.strip.split(/\s+/) end @data['MOTIF'] = hash end @data['MOTIF'] # Hash of Array of IDs in MOTIF end alias motifs motifs_as_hash # The specification of the method will be changed in the future. # Please use Bio::KEGG::GENES#motifs. # # Motif information described in the MOTIF lines. # --- # *Returns*:: Hash def motif motifs end # Links to other databases described in the DBLINKS lines. # --- # *Returns*:: Array containing String objects def dblinks_as_strings lines_fetch('DBLINKS') end # Returns structure ID information described in the STRUCTURE lines. # --- # *Returns*:: Array containing String def structure unless @data['STRUCTURE'] @data['STRUCTURE'] = fetch('STRUCTURE').sub(/(PDB: )*/,'').split(/\s+/) end @data['STRUCTURE'] # ['PDB:1A9X', ...] end alias structures structure # Codon usage data described in the CODON_USAGE lines. (Deprecated: no more exists) # --- # *Returns*:: Hash def codon_usage(codon = nil) unless @data['CODON_USAGE'] hash = Hash.new list = cu_list base = %w(t c a g) base.each_with_index do |x, i| base.each_with_index do |y, j| base.each_with_index do |z, k| hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k] end end end @data['CODON_USAGE'] = hash end @data['CODON_USAGE'] end # Codon usage data described in the CODON_USAGE lines as an array. # --- # *Returns*:: Array def cu_list ary = [] get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu| ary.push(cu.to_i) end end return ary end # Returns amino acid sequence described in the AASEQ lines. # --- # *Returns*:: Bio::Sequence::AA object def aaseq unless @data['AASEQ'] @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, '')) end @data['AASEQ'] end # Returns length of the amino acid sequence described in the AASEQ lines. # --- # *Returns*:: Integer def aalen fetch('AASEQ')[/\d+/].to_i end # Returns nucleic acid sequence described in the NTSEQ lines. # --- # *Returns*:: Bio::Sequence::NA object def ntseq unless @data['NTSEQ'] @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, '')) end @data['NTSEQ'] end alias naseq ntseq # Returns nucleic acid sequence length. # --- # *Returns*:: Integer def ntlen fetch('NTSEQ')[/\d+/].to_i end alias nalen ntlen end end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/genome.rb0000644000004100000410000001153712200110570017312 0ustar www-datawww-data# # = bio/db/kegg/genome.rb - KEGG/GENOME database class # # Copyright:: Copyright (C) 2001, 2002, 2007 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/reference' require 'bio/db/kegg/common' module Bio class KEGG # == Description # # Parser for the KEGG GENOME database # # == References # # * ftp://ftp.genome.jp/pub/kegg/genomes/genome # * http://www.genome.jp/dbget-bin/www_bfind?genome # * http://www.genome.jp/kegg/catalog/org_list.html # class GENOME < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::References # REFERENCE -- Returns contents of the REFERENCE records as an Array of # Bio::Reference objects. def references; super; end if false #dummy for RDoc def initialize(entry) super(entry, TAGSIZE) end # (private) Returns a tag name of the field as a String. # Needed to redefine because of the PLASMID field. def tag_get(str) if /\APLASMID\s+/ =~ str.to_s then 'PLASMID' else super(str) end end private :tag_get # (private) Returns a String of the field without a tag name. # Needed to redefine because of the PLASMID field. def tag_cut(str) if /\APLASMID\s+/ =~ str.to_s then $' else super(str) end end private :tag_cut # ENTRY -- Returns contents of the ENTRY record as a String. def entry_id field_fetch('ENTRY')[/\S+/] end # NAME -- Returns contents of the NAME record as a String. def name field_fetch('NAME') end # DEFINITION -- Returns contents of the DEFINITION record as a String. def definition field_fetch('DEFINITION') end alias organism definition # TAXONOMY -- Returns contents of the TAXONOMY record as a Hash. def taxonomy unless @data['TAXONOMY'] taxid, lineage = subtag2array(get('TAXONOMY')) taxid = taxid ? truncate(tag_cut(taxid)) : '' lineage = lineage ? truncate(tag_cut(lineage)) : '' @data['TAXONOMY'] = { 'taxid' => taxid, 'lineage' => lineage, } @data['TAXONOMY'].default = '' end @data['TAXONOMY'] end # Returns NCBI taxonomy ID from the TAXONOMY record as a String. def taxid taxonomy['taxid'] end # Returns contents of the TAXONOMY/LINEAGE record as a String. def lineage taxonomy['lineage'] end # DATA_SOURCE -- Returns contents of the DATA_SOURCE record as a String. def data_source field_fetch('DATA_SOURCE') end # ORIGINAL_DB -- Returns contents of the ORIGINAL_DB record as a String. def original_db #field_fetch('ORIGINAL_DB') unless defined?(@original_db) @original_db = fetch('ORIGINAL_DB') end @original_db end # Returns ORIGINAL_DB record as an Array containing String objects. # # --- # *Arguments*: # *Returns*:: Array containing String objects def original_databases lines_fetch('ORIGINAL_DB') end # DISEASE -- Returns contents of the COMMENT record as a String. def disease field_fetch('DISEASE') end # COMMENT -- Returns contents of the COMMENT record as a String. def comment field_fetch('COMMENT') end # CHROMOSOME -- Returns contents of the CHROMOSOME records as an Array # of Hash. def chromosomes unless @data['CHROMOSOME'] @data['CHROMOSOME'] = [] toptag2array(get('CHROMOSOME')).each do |chr| hash = Hash.new('') subtag2array(chr).each do |field| hash[tag_get(field)] = truncate(tag_cut(field)) end @data['CHROMOSOME'].push(hash) end end @data['CHROMOSOME'] end # PLASMID -- Returns contents of the PLASMID records as an Array of Hash. def plasmids unless @data['PLASMID'] @data['PLASMID'] = [] toptag2array(get('PLASMID')).each do |chr| hash = Hash.new('') subtag2array(chr).each do |field| hash[tag_get(field)] = truncate(tag_cut(field)) end @data['PLASMID'].push(hash) end end @data['PLASMID'] end # STATISTICS -- Returns contents of the STATISTICS record as a Hash. def statistics unless @data['STATISTICS'] hash = Hash.new(0.0) get('STATISTICS').each_line do |line| case line when /nucleotides:\s+(\d+)/ hash['num_nuc'] = $1.to_i when /protein genes:\s+(\d+)/ hash['num_gene'] = $1.to_i when /RNA genes:\s+(\d+)/ hash['num_rna'] = $1.to_i end end @data['STATISTICS'] = hash end @data['STATISTICS'] end # Returns number of nucleotides from the STATISTICS record as a Fixnum. def nalen statistics['num_nuc'] end alias length nalen # Returns number of protein genes from the STATISTICS record as a Fixnum. def num_gene statistics['num_gene'] end # Returns number of rna from the STATISTICS record as a Fixnum. def num_rna statistics['num_rna'] end end # GENOME end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/kgml.rb0000644000004100000410000004144312200110570016771 0ustar www-datawww-data# # = bio/db/kegg/kgml.rb - KEGG KGML parser class # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama # License:: The Ruby License # # require 'rexml/document' module Bio class KEGG # == KGML (KEGG XML) parser # # See http://www.genome.jp/kegg/xml/ for more details on KGML. # # === Note for older version users # * Most of incompatible attribute names with KGML tags are now deprecated. # Use the names of KGML tags instead of old incompatible names that will # be removed in the future. # * Bio::KGML::Entry#id (entry_id is deprecated) # * Bio::KGML::Entry#type (category is deprecated) # * Bio::KGML::Relation#entry1 (node1 is deprecated) # * Bio::KGML::Relation#entry2 (node2 is deprecated) # * Bio::KGML::Relation#type (rel is deprecated) # * Bio::KGML::Reaction#name (entry_id is deprecated) # * Bio::KGML::Reaction#type (direction is deprecated) # * New class Bio::KGML::Graphics and new method Bio::KGML::Entry#graphics. # Because two or more graphics elements may exist, following attribute # methods in Bio::KGML::Entry are now deprecated and will be removed # in the future. See rdoc of these methods for details. # * Bio::KEGG::KGML::Entry#label # * Bio::KEGG::KGML::Entry#shape # * Bio::KEGG::KGML::Entry#x # * Bio::KEGG::KGML::Entry#y # * Bio::KEGG::KGML::Entry#width # * Bio::KEGG::KGML::Entry#height # * Bio::KEGG::KGML::Entry#fgcolor # * Bio::KEGG::KGML::Entry#bgcolor # * Incompatible changes: Bio::KEGG::KGML::Reaction#substrates now returns # an array containing Bio::KEGG::KGML::Substrate objects, and # Bio::KEGG::KGML::Reaction#products now returns an array containing # Bio::KEGG::KGML::Product objects. The changes enable us to get id of # substrates and products. # # === Incompatible attribute names with KGML tags # # # :map -> :pathway # names() # # edge() # # === Examples # # file = File.read("kgml/hsa/hsa00010.xml") # kgml = Bio::KEGG::KGML.new(file) # # # attributes # puts kgml.name # puts kgml.org # puts kgml.number # puts kgml.title # puts kgml.image # puts kgml.link # # kgml.entries.each do |entry| # # attributes # puts entry.id # puts entry.name # puts entry.type # puts entry.link # puts entry.reaction # # attributes # entry.graphics.each do |graphics| # puts graphics.name # puts graphics.type # puts graphics.x # puts graphics.y # puts graphics.width # puts graphics.height # puts graphics.fgcolor # puts graphics.bgcolor # end # # attributes # puts entry.components # # methood # puts entry.names # end # # kgml.relations.each do |relation| # # attributes # puts relation.entry1 # puts relation.entry2 # puts relation.type # # attributes # puts relation.name # puts relation.value # end # # kgml.reactions.each do |reaction| # # attributes # puts reaction.name # puts reaction.type # # attributes # reaction.substrates.each do |substrate| # puts substrate.id # puts substrate.name # # attributes # altnames = reaction.alt[entry_id] # altnames.each do |name| # puts name # end # end # # attributes # reaction.products.each do |product| # puts product.id # puts product.name # # attributes # altnames = reaction.alt[entry_id] # altnames.each do |name| # puts name # end # end # end # # === References # # * http://www.genome.jp/kegg/xml/docs/ # class KGML # Creates a new KGML object. # # --- # *Arguments*: # * (required) _str_: String containing xml data # *Returns*:: Bio::KEGG::KGML object def initialize(xml) dom = REXML::Document.new(xml) parse_root(dom) parse_entry(dom) parse_relation(dom) parse_reaction(dom) end # KEGG-style ID string of this pathway map (String or nil) # ('pathway' element) attr_reader :name # "ko" (KEGG Orthology), "ec" (KEGG ENZYME), # or the KEGG 3-letter organism code (String or nil) # ('pathway' element) attr_reader :org # map number (String or nil) # ('pathway' element) attr_reader :number # title (String or nil) # ('pathway' element) attr_reader :title # image URL of this pathway map (String or nil) # ('pathway' element) attr_reader :image # information URL of this pathway map (String or nil) # ('pathway' element) attr_reader :link # entry elements (Array containing KGML::Entry objects, or nil) attr_accessor :entries # relation elements (Array containing KGML::Relations objects, or nil) attr_accessor :relations # reaction elements (Array containing KGML::Reactions objects, or nil) attr_accessor :reactions # Bio::KEGG:Entry contains an entry element in the KGML. class Entry # ID of this entry in this pathway map (Integer or nil). # ('id' attribute in 'entry' element) attr_accessor :id alias entry_id id alias entry_id= id= # KEGG-style ID string of this entry (String or nil) attr_accessor :name # type of this entry (String or nil). # Normally one of the following: # * "ortholog" # * "enzyme" # * "reaction" # * "gene" # * "group" # * "compound" # * "map" # See http://www.genome.jp/kegg/xml/docs/ for details. # ('type' attribute in 'entry' element) attr_accessor :type alias category type alias category= type= # URL pointing information about this entry (String or nil) attr_accessor :link # KEGG-style ID string of this reaction (String or nil) attr_accessor :reaction # (Deprecated?) ('map' attribute in 'entry' element) attr_accessor :pathway # (private) get an attribute value in the graphics[-1] object def _graphics_attr(attr) if self.graphics then g = self.graphics[-1] g ? g.__send__(attr) : nil else nil end end private :_graphics_attr # (private) get an attribute value in the graphics[-1] object def _graphics_set_attr(attr, val) self.graphics ||= [] unless g = self.graphics[-1] then g = Graphics.new self.graphics.push(g) end g.__send__(attr, val) end private :_graphics_set_attr # Deprecated. # Same as self.graphics[-1].name (additional nil checks may be needed). # # label of the 'graphics' element (String or nil) # ('name' attribute in 'graphics' element) def label _graphics_attr(:name) end # Deprecated. # Same as self.graphics[-1].name= (additional nil checks may be needed). # def label=(val) _graphics_set_attr(:name=, val) end # Deprecated. # Same as self.graphics[-1].type (additional nil checks may be needed). # # shape of the 'graphics' element (String or nil) # Normally one of the following: # * "rectangle" # * "circle" # * "roundrectangle" # * "line" # If not specified, "rectangle" is the default value. # ('type' attribute in 'graphics' element) def shape _graphics_attr(:type) end # Deprecated. # Same as self.graphics[-1].type= (additional nil checks may be needed). # def shape=(val) _graphics_set_attr(:type=, val) end # Deprecated. # Same as self.graphics[-1].x (additional nil checks may be needed). # # X axis position (Integer or nil) ('graphics' element) def x _graphics_attr(:x) end # Deprecated. # Same as self.graphics[-1].x= (additional nil checks may be needed). # def x=(val) _graphics_set_attr(:x=, val) end # Deprecated. # Same as self.graphics[-1].y (additional nil checks may be needed). # # Y axis position (Integer or nil) ('graphics' element) def y _graphics_attr(:y) end # Deprecated. # Same as self.graphics[-1].y= (additional nil checks may be needed). # def y=(val) _graphics_set_attr(:y=, val) end # Deprecated. # Same as self.graphics[-1].width (additional nil checks may be needed). # # width (Integer or nil) ('graphics' element) def width _graphics_attr(:width) end # Deprecated. # Same as self.graphics[-1].width= (additional nil checks may be needed). # def width=(val) _graphics_set_attr(:width=, val) end # Deprecated. # Same as self.graphics[-1].height (additional nil checks may be needed). # # height (Integer or nil) ('graphics' element) def height _graphics_attr(:height) end # Deprecated. # Same as self.graphics[-1].height= (additional nil checks may be needed). # def height=(val) _graphics_set_attr(:height=, val) end # Deprecated. # Same as self.graphics[-1].fgcolor (additional nil checks may be needed). # # foreground color (String or nil) ('graphics' element) def fgcolor _graphics_attr(:fgcolor) end # Deprecated. # Same as self.graphics[-1].fgcolor= (additional nil checks may be needed). # def fgcolor=(val) _graphics_set_attr(:fgcolor=, val) end # Deprecated. # Same as self.graphics[-1].bgcolor (additional nil checks may be needed). # # background color (String or nil) ('graphics' element) def bgcolor _graphics_attr(:bgcolor) end # Deprecated. # Same as self.graphics[-1].bgcolor= (additional nil checks may be needed). # def bgcolor=(val) _graphics_set_attr(:bgcolor=, val) end # graphics elements included in this entry # (Array containing Graphics objects, or nil) attr_accessor :graphics # component elements included in this entry # (Array containing Integer objects, or nil) attr_accessor :components # the "name" attribute may contain multiple names separated # with space characters. This method returns the names # as an array. (Array containing String objects) def names @name.split(/\s+/) end end # Bio::KEGG::KGML::Graphics contains a 'graphics' element in the KGML. class Graphics # label of the 'graphics' element (String or nil) attr_accessor :name # shape of the 'graphics' element (String or nil) # Normally one of the following: # * "rectangle" # * "circle" # * "roundrectangle" # * "line" # If not specified, "rectangle" is the default value. attr_accessor :type # X axis position (Integer or nil) attr_accessor :x # Y axis position (Integer or nil) attr_accessor :y # polyline coordinates # (Array containing Array of [ x, y ] pair of Integer values) attr_accessor :coords # width (Integer or nil) attr_accessor :width # height (Integer or nil) attr_accessor :height # foreground color (String or nil) attr_accessor :fgcolor # background color (String or nil) attr_accessor :bgcolor end #class Graphics # Bio::KEGG::KGML::Relation contains a relation element in the KGML. class Relation # the first entry of the relation (Integer or nil) # ('entry1' attribute in 'relation' element) attr_accessor :entry1 alias node1 entry1 alias node1= entry1= # the second entry of the relation (Integer or nil) # ('entry2' attribute in 'relation' element) attr_accessor :entry2 alias node2 entry2 alias node2= entry2= # type of this relation (String or nil). # Normally one of the following: # * "ECrel" # * "PPrel" # * "GErel" # * "PCrel" # * "maplink" # ('type' attribute in 'relation' element) attr_accessor :type alias rel type alias rel= type= # interaction and/or relation type (String or nil). # See http://www.genome.jp/kegg/xml/docs/ for details. # ('name' attribute in 'subtype' element) attr_accessor :name # interaction and/or relation information (String or nil). # See http://www.genome.jp/kegg/xml/docs/ for details. # ('value' attribute in 'subtype' element) attr_accessor :value # (Deprecated?) def edge @value.to_i end end # Bio::KEGG::KGML::Reaction contains a reaction element in the KGML. class Reaction # ID of this reaction (Integer or nil) attr_accessor :id # KEGG-stype ID string of this reaction (String or nil) # ('name' attribute in 'reaction' element) attr_accessor :name alias entry_id name alias entry_id= name= # type of this reaction (String or nil). # Normally "reversible" or "irreversible". # ('type' attribute in 'reaction' element) attr_accessor :type alias direction type alias direction= type= # Substrates. Each substrate name is the KEGG-style ID string. # (Array containing String objects, or nil) attr_accessor :substrates # Products. Each product name is the KEGG-style ID string. # (Array containing String objects, or nil) attr_accessor :products # alt element (Hash) attr_accessor :alt end # Bio::KEGG::KGML::SubstrateProduct contains a substrate element # or a product element in the KGML. # # Please do not use SubstrateProduct directly. # Instead, please use Substrate or Product class. class SubstrateProduct # ID of this substrate or product (Integer or nil) attr_accessor :id # name of this substrate or product (String or nil) attr_accessor :name # Creates a new object def initialize(id = nil, name = nil) @id ||= id @name ||= name end end #class SubstrateProduct # Bio::KEGG::KGML::Substrate contains a substrate element in the KGML. class Substrate < SubstrateProduct end # Bio::KEGG::KGML::Product contains a product element in the KGML. class Product < SubstrateProduct end private def parse_root(dom) root = dom.root.attributes @name = root["name"] @org = root["org"] @number = root["number"] @title = root["title"] @image = root["image"] @link = root["link"] end def parse_entry(dom) @entries = Array.new dom.elements.each("/pathway/entry") { |node| attr = node.attributes entry = Entry.new entry.id = attr["id"].to_i entry.name = attr["name"] entry.type = attr["type"] # implied entry.link = attr["link"] entry.reaction = attr["reaction"] entry.pathway = attr["map"] node.elements.each("graphics") { |graphics| g = Graphics.new attr = graphics.attributes g.x = attr["x"].to_i g.y = attr["y"].to_i g.type = attr["type"] g.name = attr["name"] g.width = attr["width"].to_i g.height = attr["height"].to_i g.fgcolor = attr["fgcolor"] g.bgcolor = attr["bgcolor"] if str = attr["coords"] then coords = [] tmp = str.split(',') tmp.collect! { |n| n.to_i } while xx = tmp.shift yy = tmp.shift coords.push [ xx, yy ] end g.coords = coords else g.coords = nil end entry.graphics ||= [] entry.graphics.push g } node.elements.each("component") { |component| attr = component.attributes entry.components ||= [] entry.components << attr["id"].to_i } @entries << entry } end def parse_relation(dom) @relations = Array.new dom.elements.each("/pathway/relation") { |node| attr = node.attributes relation = Relation.new relation.entry1 = attr["entry1"].to_i relation.entry2 = attr["entry2"].to_i relation.type = attr["type"] node.elements.each("subtype") { |subtype| attr = subtype.attributes relation.name = attr["name"] relation.value = attr["value"] } @relations << relation } end def parse_reaction(dom) @reactions = Array.new dom.elements.each("/pathway/reaction") { |node| attr = node.attributes reaction = Reaction.new reaction.id = attr["id"].to_i reaction.name = attr["name"] reaction.type = attr["type"] substrates = Array.new products = Array.new hash = Hash.new node.elements.each("substrate") { |substrate| id = substrate.attributes["id"].to_i name = substrate.attributes["name"] substrates << Substrate.new(id, name) substrate.elements.each("alt") { |alt| hash[name] ||= Array.new hash[name] << alt.attributes["name"] } } node.elements.each("product") { |product| id = product.attributes["id"].to_i name = product.attributes["name"] products << Product.new(id, name) product.elements.each("alt") { |alt| hash[name] ||= Array.new hash[name] << alt.attributes["name"] } } reaction.substrates = substrates reaction.products = products reaction.alt = hash @reactions << reaction } end end # KGML end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/enzyme.rb0000644000004100000410000000560312200110570017344 0ustar www-datawww-data# # = bio/db/kegg/enzyme.rb - KEGG/ENZYME database class # # Copyright:: Copyright (C) 2001, 2002, 2007 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG class ENZYME < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash include Common::OrthologsAsHash # Returns a Hash of the orthology ID and definition in ORTHOLOGY field. def orthologs_as_hash; super; end if false #dummy for RDoc alias orthologs orthologs_as_hash include Common::GenesAsHash # Returns a Hash of the organism ID and an Array of entry IDs in GENES field. def genes_as_hash; super; end if false #dummy for RDoc alias genes genes_as_hash def initialize(entry) super(entry, TAGSIZE) end # ENTRY def entry field_fetch('ENTRY') end def entry_id entry[/EC (\S+)/, 1] end def obsolete? entry[/Obsolete/] ? true : false end # NAME def names field_fetch('NAME').split(/\s*;\s*/) end def name names.first end # CLASS def classes lines_fetch('CLASS') end # SYSNAME def sysname field_fetch('SYSNAME') end # REACTION def reaction field_fetch('REACTION') end # ALL_REAC ';' def all_reac field_fetch('ALL_REAC') end def iubmb_reactions all_reac.sub(/;\s*\(other\).*/, '').split(/\s*;\s*/) end def kegg_reactions reac = all_reac if reac[/\(other\)/] reac.sub(/.*\(other\)\s*/, '').split(/\s*;\s*/) else [] end end # SUBSTRATE def substrates field_fetch('SUBSTRATE').split(/\s*;\s*/) end # PRODUCT def products field_fetch('PRODUCT').split(/\s*;\s*/) end # INHIBITOR def inhibitors field_fetch('INHIBITOR').split(/\s*;\s*/) end # COFACTOR def cofactors field_fetch('COFACTOR').split(/\s*;\s*/) end # COMMENT def comment field_fetch('COMMENT') end # PATHWAY def pathways_as_strings lines_fetch('PATHWAY') end # ORTHOLOGY def orthologs_as_strings lines_fetch('ORTHOLOGY') end # GENES def genes_as_strings lines_fetch('GENES') end # DISEASE def diseases lines_fetch('DISEASE') end # MOTIF def motifs lines_fetch('MOTIF') end # STRUCTURES def structures unless @data['STRUCTURES'] @data['STRUCTURES'] = fetch('STRUCTURES').sub(/(PDB: )*/,'').split(/\s+/) end @data['STRUCTURES'] end # REFERENCE # DBLINKS def dblinks_as_strings lines_fetch('DBLINKS') end end # ENZYME end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/glycan.rb0000644000004100000410000000602512200110570017311 0ustar www-datawww-data# # = bio/db/kegg/glycan.rb - KEGG GLYCAN database class # # Copyright:: Copyright (C) 2004 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG class GLYCAN < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash include Common::OrthologsAsHash # Returns a Hash of the orthology ID and definition in ORTHOLOGY field. def orthologs_as_hash; super; end if false #dummy for RDoc alias orthologs orthologs_as_hash def initialize(entry) super(entry, TAGSIZE) end # ENTRY def entry_id field_fetch('ENTRY')[/\S+/] end # NAME def name field_fetch('NAME') end # COMPOSITION def composition unless @data['COMPOSITION'] hash = Hash.new(0) fetch('COMPOSITION').scan(/\((\S+)\)(\d+)/).each do |key, val| hash[key] = val.to_i end @data['COMPOSITION'] = hash end @data['COMPOSITION'] end # MASS def mass unless @data['MASS'] @data['MASS'] = field_fetch('MASS')[/[\d\.]+/].to_f end @data['MASS'] end # CLASS def keggclass field_fetch('CLASS') end # COMPOUND def compounds unless @data['COMPOUND'] @data['COMPOUND'] = fetch('COMPOUND').split(/\s+/) end @data['COMPOUND'] end # REACTION def reactions unless @data['REACTION'] @data['REACTION'] = fetch('REACTION').split(/\s+/) end @data['REACTION'] end # PATHWAY def pathways_as_strings lines_fetch('PATHWAY') end # ENZYME def enzymes unless @data['ENZYME'] field = fetch('ENZYME') if /\(/.match(field) # old version @data['ENZYME'] = field.scan(/\S+ \(\S+\)/) else @data['ENZYME'] = field.scan(/\S+/) end end @data['ENZYME'] end # ORTHOLOGY def orthologs_as_strings unless @data['ORTHOLOGY'] @data['ORTHOLOGY'] = lines_fetch('ORTHOLOGY') end @data['ORTHOLOGY'] end # COMMENT def comment field_fetch('COMMENT') end # REMARK def remark field_fetch('REMARK') end # REFERENCE def references unless @data['REFERENCE'] ary = Array.new lines = lines_fetch('REFERENCE') lines.each do |line| if /^\d+\s+\[PMID/.match(line) ary << line else ary.last << " #{line.strip}" end end @data['REFERENCE'] = ary end @data['REFERENCE'] end # DBLINKS def dblinks_as_strings unless @data['DBLINKS'] @data['DBLINKS'] = lines_fetch('DBLINKS') end @data['DBLINKS'] end # ATOM, BOND def kcf return "#{get('NODE')}#{get('EDGE')}" end end # GLYCAN end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/keggtab.rb0000644000004100000410000002155512200110570017445 0ustar www-datawww-data# # = bio/db/kegg/keggtab.rb - KEGG keggtab class # # Copyright:: Copyright (C) 2001 Mitsuteru C. Nakao # Copyright (C) 2003, 2006 Toshiaki Katayama # License:: The Ruby License # # $Id: keggtab.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $ # module Bio class KEGG # == Description # # Parse 'keggtab' KEGG database definition file which also includes # Taxonomic category of the KEGG organisms. # # == References # # The 'keggtab' file is included in # # * ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.tar.gz # * ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z # # == Format # # File format is something like # # # KEGGTAB # # # # name type directory abbreviation # # # enzyme enzyme $BIOROOT/db/ideas/ligand ec # ec alias enzyme # (snip) # # Human # h.sapiens genes $BIOROOT/db/kegg/genes hsa # H.sapiens alias h.sapiens # hsa alias h.sapiens # (snip) # # # # Taxonomy # # # (snip) # animals alias hsa+mmu+rno+dre+dme+cel # eukaryotes alias animals+plants+protists+fungi # genes alias eubacteria+archaea+eukaryotes # class Keggtab # Path for keggtab file and optionally set bioroot top directory. # Environmental variable BIOROOT overrides bioroot. def initialize(file_path, bioroot = nil) @bioroot = ENV['BIOROOT'] || bioroot @db_names = Hash.new @database = Hash.new @taxonomy = Hash.new File.open(file_path) do |f| parse_keggtab(f.read) end end # Returns a string of the BIOROOT path prefix. attr_reader :bioroot attr_reader :db_names # Bio::KEGG::Keggtab::DB class DB # Create a container object for database definitions. def initialize(db_name, db_type, db_path, db_abbrev) @name = db_name @type = db_type @path = db_path @abbrev = db_abbrev @aliases = Array.new end # Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...) attr_reader :name # Definition type. (e.g. 'enzyme', 'alias', 'genes', ...) attr_reader :type # Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...) attr_reader :path # Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...) # korg and keggorg are alias for abbrev method. attr_reader :abbrev # Array containing all alias names for the database. # (e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...) attr_reader :aliases alias korg abbrev alias keggorg abbrev end # DB section # Returns a hash containing DB definition section of the keggtab file. # If database name is given as an argument, returns a Keggtab::DB object. def database(db_abbrev = nil) if db_abbrev @database[db_abbrev] else @database end end # Returns an Array containing all alias names for the database. # (e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"]) def aliases(db_abbrev) if @database[db_abbrev] @database[db_abbrev].aliases end end # Returns a canonical database name for the abbreviation. # (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...) def name(db_abbrev) if @database[db_abbrev] @database[db_abbrev].name end end # Returns an absolute path for the flat file database. # (e.g. '/bio/db/kegg/genes', ...) def path(db_abbrev) if @database[db_abbrev] file = @database[db_abbrev].name if @bioroot "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}" else "#{@database[db_abbrev].path}/#{file}" end end end # deprecated def alias_list(db_name) if @db_names[db_name] @db_names[db_name].aliases end end # deprecated def db_path(db_name) if @bioroot "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}" else "#{@db_names[db_name].path}/#{db_name}" end end # deprecated def db_by_abbrev(db_abbrev) @db_names.each do |k, db| return db if db.abbrev == db_abbrev end return nil end # deprecated def name_by_abbrev(db_abbrev) db_by_abbrev(db_abbrev).name end # deprecated def db_path_by_abbrev(db_abbrev) db_name = name_by_abbrev(db_abbrev) db_path(db_name) end # Taxonomy section # Returns a hash containing Taxonomy section of the keggtab file. # If argument is given, returns a List of all child nodes belongs # to the label node. # (e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...) def taxonomy(node = nil) if node @taxonomy[node] else @taxonomy end end # List of all node labels from Taxonomy section. # (e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...) def taxa_list @taxonomy.keys.sort end def child_nodes(node = 'genes') return @taxonomy[node] end # Returns an array of organism names included in the specified taxon # label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"]) # This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases. def taxo2korgs(node = 'genes') if node.length == 3 return node else if @taxonomy[node] tmp = Array.new @taxonomy[node].each do |x| tmp.push(taxo2korgs(x)) end return tmp else return nil end end end alias taxo2keggorgs taxo2korgs alias taxon2korgs taxo2korgs alias taxon2keggorgs taxo2korgs # Returns an array of taxonomy names the organism belongs. # (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes']) # This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy. def korg2taxo(keggorg) tmp = Array.new traverse = Proc.new {|keggorg| @taxonomy.each do |k,v| if v.include?(keggorg) tmp.push(k) traverse.call(k) break end end } traverse.call(keggorg) return tmp end alias keggorg2taxo korg2taxo alias korg2taxonomy korg2taxo alias keggorg2taxonomy korg2taxo private def parse_keggtab(keggtab) in_taxonomy = nil keggtab.each do |line| case line when /^# Taxonomy/ # beginning of the taxonomy section in_taxonomy = true when /^#|^$/ next when /(^\w\S+)\s+(\w+)\s+(\$\S+)\s+(\w+)/ # db db_name = $1 db_type = $2 db_path = $3 db_abbrev = $4 @db_names[db_name] = Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev) when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias db_alias = $1 db_name = $2#.downcase if in_taxonomy @taxonomy.update(db_alias => db_name.split('+')) elsif @db_names[db_name] @db_names[db_name].aliases.push(db_alias) end end end # convert keys-by-names hash @db_names to keys-by-abbrev hash @database @db_names.each do |k,v| @database[v.abbrev] = v end end end # Keggtab end # KEGG end # Bio if __FILE__ == $0 begin require 'pp' alias p pp rescue LoadError end if ARGV.empty? prefix = ENV['BIOROOT'] || '/bio' keggtab_file = "#{prefix}/etc/keggtab" else keggtab_file = ARGV.shift end puts "= Initialize: keggtab = Bio::KEGG::Keggtab.new(file)" keggtab = Bio::KEGG::Keggtab.new(keggtab_file) puts "\n--- Bio::KEGG::Keggtab#bioroot # -> String" p keggtab.bioroot puts "\n== Methods for DB section" puts "\n--- Bio::KEGG::Keggtab#database # -> Hash" p keggtab.database puts "\n--- Bio::KEGG::Keggtab#database('eco') # -> Keggtab::DB" p keggtab.database('eco') puts "\n--- Bio::KEGG::Keggtab#name('eco') # -> String" p keggtab.name('eco') puts "\n--- Bio::KEGG::Keggtab#path('eco') # -> String" p keggtab.path('eco') puts "\n--- Bio::KEGG::Keggtab#aliases(abbrev) # -> Array" puts "\n++ keggtab.aliases('eco')" p keggtab.aliases('eco') puts "\n++ keggtab.aliases('vg')" p keggtab.aliases('vg') puts "\n== Methods for Taxonomy section" puts "\n--- Bio::KEGG::Keggtab#taxonomy # -> Hash" p keggtab.taxonomy puts "\n--- Bio::KEGG::Keggtab#taxonomy('archaea') # -> Hash" p keggtab.taxonomy('archaea') puts "\n--- Bio::KEGG::Keggtab#taxa_list # -> Array" p keggtab.taxa_list puts "\n--- Bio::KEGG::Keggtab#taxo2korgs(node) # -> Array" puts "\n++ keggtab.taxo2korgs('proteobeta')" p keggtab.taxo2korgs('proteobeta') puts "\n++ keggtab.taxo2korgs('eubacteria')" p keggtab.taxo2korgs('eubacteria') puts "\n++ keggtab.taxo2korgs('archaea')" p keggtab.taxo2korgs('archaea') puts "\n++ keggtab.taxo2korgs('eukaryotes')" p keggtab.taxo2korgs('eukaryotes') puts "\n--- Bio::KEGG::Keggtab#korg2taxo(keggorg) # -> Array" puts "\n++ keggtab.korg2taxo('eco')" p keggtab.korg2taxo('eco') puts "\n++ keggtab.korg2taxo('plants')" p keggtab.korg2taxo('plants') end bio-1.4.3.0001/lib/bio/db/kegg/pathway.rb0000644000004100000410000001410612200110570017510 0ustar www-datawww-data# # = bio/db/kegg/pathway.rb - KEGG PATHWAY database class # # Copyright:: Copyright (C) 2010 Kozo Nishida # Copyright:: Copyright (C) 2010 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG # == Description # # Bio::KEGG::PATHWAY is a parser class for the KEGG PATHWAY database entry. # # == References # # * http://www.genome.jp/kegg/pathway.html # * ftp://ftp.genome.jp/pub/kegg/pathway/pathway # class PATHWAY < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash include Common::OrthologsAsHash # Returns a Hash of the orthology ID and definition in ORTHOLOGY field. def orthologs_as_hash; super; end if false #dummy for RDoc alias orthologs orthologs_as_hash include Common::References # REFERENCE -- Returns contents of the REFERENCE records as an Array of # Bio::Reference objects. # --- # *Returns*:: an Array containing Bio::Reference objects def references; super; end if false #dummy for RDoc include Common::ModulesAsHash # Returns MODULE field as a Hash. # Each key of the hash is KEGG MODULE ID, # and each value is the name of the Pathway Module. # --- # *Returns*:: Hash def modules_as_hash; super; end if false #dummy for RDoc alias modules modules_as_hash #-- # for a private method strings_as_hash. #++ include Common::StringsAsHash # Creates a new Bio::KEGG::PATHWAY object. # --- # *Arguments*: # * (required) _entry_: (String) single entry as a string # *Returns*:: Bio::KEGG::PATHWAY object def initialize(entry) super(entry, TAGSIZE) end # Return the ID of the pathway, described in the ENTRY line. # --- # *Returns*:: String def entry_id field_fetch('ENTRY')[/\S+/] end # Name of the pathway, described in the NAME line. # --- # *Returns*:: String def name field_fetch('NAME') end # Description of the pathway, described in the DESCRIPTION line. # --- # *Returns*:: String def description field_fetch('DESCRIPTION') end alias definition description # Return the name of the KEGG class, described in the CLASS line. # --- # *Returns*:: String def keggclass field_fetch('CLASS') end # Pathways described in the PATHWAY_MAP lines. # --- # *Returns*:: Array containing String def pathways_as_strings lines_fetch('PATHWAY_MAP') end # Returns MODULE field of the entry. # --- # *Returns*:: Array containing String objects def modules_as_strings lines_fetch('MODULE') end # Disease described in the DISEASE lines. # --- # *Returns*:: Array containing String def diseases_as_strings lines_fetch('DISEASE') end # Diseases described in the DISEASE lines. # --- # *Returns*:: Hash of disease ID and its definition def diseases_as_hash unless @diseases_as_hash @diseases_as_hash = strings_as_hash(diseases_as_strings) end @diseases_as_hash end alias diseases diseases_as_hash # Returns an Array of a database name and entry IDs in DBLINKS field. # --- # *Returns*:: Array containing String def dblinks_as_strings lines_fetch('DBLINKS') end # Orthologs described in the ORTHOLOGY lines. # --- # *Returns*:: Array containing String def orthologs_as_strings lines_fetch('ORTHOLOGY') end # Organism described in the ORGANISM line. # --- # *Returns*:: String def organism field_fetch('ORGANISM') end # Genes described in the GENE lines. # --- # *Returns*:: Array containing String def genes_as_strings lines_fetch('GENE') end # Genes described in the GENE lines. # --- # *Returns*:: Hash of gene ID and its definition def genes_as_hash unless @genes_as_hash @genes_as_hash = strings_as_hash(genes_as_strings) end @genes_as_hash end alias genes genes_as_hash # Enzymes described in the ENZYME lines. # --- # *Returns*:: Array containing String def enzymes_as_strings lines_fetch('ENZYME') end alias enzymes enzymes_as_strings # Reactions described in the REACTION lines. # --- # *Returns*:: Array containing String def reactions_as_strings lines_fetch('REACTION') end # Reactions described in the REACTION lines. # --- # *Returns*:: Hash of reaction ID and its definition def reactions_as_hash unless @reactions_as_hash @reactions_as_hash = strings_as_hash(reactions_as_strings) end @reactions_as_hash end alias reactions reactions_as_hash # Compounds described in the COMPOUND lines. # --- # *Returns*:: Array containing String def compounds_as_strings lines_fetch('COMPOUND') end # Compounds described in the COMPOUND lines. # --- # *Returns*:: Hash of compound ID and its definition def compounds_as_hash unless @compounds_as_hash @compounds_as_hash = strings_as_hash(compounds_as_strings) end @compounds_as_hash end alias compounds compounds_as_hash # Returns REL_PATHWAY field of the entry. # --- # *Returns*:: Array containing String objects def rel_pathways_as_strings lines_fetch('REL_PATHWAY') end # Returns REL_PATHWAY field as a Hash. Each key of the hash is # Pathway ID, and each value is the name of the pathway. # --- # *Returns*:: Hash def rel_pathways_as_hash unless defined? @rel_pathways_as_hash then hash = {} rel_pathways_as_strings.each do |line| entry_id, name = line.split(/\s+/, 2) hash[entry_id] = name end @rel_pathways_as_hash = hash end @rel_pathways_as_hash end alias rel_pathways rel_pathways_as_hash # KO pathway described in the KO_PATHWAY line. # --- # *Returns*:: String def ko_pathway field_fetch('KO_PATHWAY') end end # PATHWAY end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/brite.rb0000644000004100000410000000117712200110570017144 0ustar www-datawww-data# # = bio/db/kegg/brite.rb - KEGG/BRITE database class # # Copyright:: Copyright (C) 2001 Toshiaki Katayama # License:: The Ruby License # # $Id: brite.rb,v 0.8 2007/04/05 23:35:41 trevor Exp $ # require 'bio/db' module Bio class KEGG # == Note # # This class is not completely implemented, but obsolete as the original # database BRITE has changed it's meaning. # class BRITE < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 def initialize(entry) super(entry, TAGSIZE) end # ENTRY # DEFINITION # RELATION # FACTORS # INTERACTION # SOURCE # REFERENCE end # BRITE end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/compound.rb0000644000004100000410000000501512200110570017656 0ustar www-datawww-data# # = bio/db/kegg/compound.rb - KEGG COMPOUND database class # # Copyright:: Copyright (C) 2001, 2002, 2004, 2007 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG # == Description # # Bio::KEGG::COMPOUND is a parser class for the KEGG COMPOUND database entry. # KEGG COMPOUND is a chemical structure database. # # == References # # * http://www.genome.jp/kegg/compound/ # class COMPOUND < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash # Creates a new Bio::KEGG::COMPOUND object. # --- # *Arguments*: # * (required) _entry_: (String) single entry as a string # *Returns*:: Bio::KEGG::COMPOUND object def initialize(entry) super(entry, TAGSIZE) end # ENTRY def entry_id field_fetch('ENTRY')[/\S+/] end # NAME def names field_fetch('NAME').split(/\s*;\s*/) end # The first name recorded in the NAME field. def name names.first end # FORMULA def formula field_fetch('FORMULA') end # MASS def mass field_fetch('MASS').to_f end # REMARK def remark field_fetch('REMARK') end # GLYCAN def glycans unless @data['GLYCAN'] @data['GLYCAN'] = fetch('GLYCAN').split(/\s+/) end @data['GLYCAN'] end # REACTION def reactions unless @data['REACTION'] @data['REACTION'] = fetch('REACTION').split(/\s+/) end @data['REACTION'] end # RPAIR def rpairs unless @data['RPAIR'] @data['RPAIR'] = fetch('RPAIR').split(/\s+/) end @data['RPAIR'] end # PATHWAY def pathways_as_strings lines_fetch('PATHWAY') end # ENZYME def enzymes unless @data['ENZYME'] field = fetch('ENZYME') if /\(/.match(field) # old version @data['ENZYME'] = field.scan(/\S+ \(\S+\)/) else @data['ENZYME'] = field.scan(/\S+/) end end @data['ENZYME'] end # DBLINKS def dblinks_as_strings lines_fetch('DBLINKS') end # ATOM, BOND def kcf return "#{get('ATOM')}#{get('BOND')}" end # COMMENT def comment field_fetch('COMMENT') end end # COMPOUND end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/kegg/expression.rb0000644000004100000410000000575312200110570020242 0ustar www-datawww-data# # = bio/db/kegg/expression.rb - KEGG EXPRESSION database class # # Copyright:: Copyright (C) 2001-2003, 2005 # Shuichi Kawashima , # Toshiaki Katayama # License:: The Ruby License # # $Id: expression.rb,v 1.11 2007/04/05 23:35:41 trevor Exp $ # require "bio/db" module Bio class KEGG class EXPRESSION def initialize(entry) @orf2val = Hash.new('') @orf2rgb = Hash.new('') @orf2ratio = Hash.new('') @max_intensity = 10000 entry.split("\n").each do |line| unless /^#/ =~ line ary = line.split("\t") orf = ary.shift val = ary[2, 4].collect {|x| x.to_f} @orf2val[orf] = val end end end attr_reader :orf2val attr_reader :orf2rgb attr_reader :orf2ratio attr_reader :max_intensity def control_avg sum = 0.0 @orf2val.values.each do |v| sum += v[0] - v[1] end sum/orf2val.size end def target_avg sum = 0.0 @orf2val.values.each do |v| sum += v[2] - v[3] end sum/orf2val.size end def control_var sum = 0.0 avg = self.control_avg @orf2val.values.each do |v| tmp = v[0] - v[1] sum += (tmp - avg)*(tmp - avg) end sum/orf2val.size end def target_var sum = 0.0 avg = self.target_avg @orf2val.values.each do |v| tmp = v[2] - v[3] sum += (tmp - avg)*(tmp - avg) end sum/orf2val.size end def control_sd var = self.control_var Math.sqrt(var) end def target_sd var = self.target_var Math.sqrt(var) end def up_regulated(num=20, threshold=nil) logy_minus_logx ary = @orf2ratio.to_a.sort{|a, b| b[1] <=> a[1]} if threshold != nil i = 0 while ary[i][1] > threshold i += 1 end return ary[0..i] else return ary[0..num-1] end end def down_regulated(num=20, threshold=nil) logy_minus_logx ary = @orf2ratio.to_a.sort{|a, b| a[1] <=> b[1]} if threshold != nil i = 0 while ary[i][1] < threshold i += 1 end return ary[0..i] else return ary[0..num-1] end end def regulated(num=20, threshold=nil) logy_minus_logx ary = @orf2ratio.to_a.sort{|a, b| b[1].abs <=> a[1].abs} if threshold != nil i = 0 while ary[i][1].abs > threshold i += 1 end return ary[0..i] else return ary[0..num-1] end end def logy_minus_logx @orf2val.each do |k, v| @orf2ratio[k] = (1.0/Math.log10(2))*(Math.log10(v[2]-v[3]) - Math.log10(v[0]-v[1])) end end def val2rgb col_unit = @max_intensity/255 @orf2val.each do |k, v| tmp_val = ((v[0] - v[1])/col_unit).to_i if tmp_val > 255 g = "ff" else g = format("%02x", tmp_val) end tmp_val = ((v[2] - v[3])/col_unit).to_i if tmp_val > 255 r = "ff" else r = format("%02x", tmp_val) end @orf2rgb[k] = r + g + "00" end end end # class EXPRESSION end # class KEGG end # module Bio bio-1.4.3.0001/lib/bio/db/kegg/drug.rb0000644000004100000410000000536612200110570017004 0ustar www-datawww-data# # = bio/db/kegg/drug.rb - KEGG DRUG database class # # Copyright:: Copyright (C) 2007 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' require 'bio/db/kegg/common' module Bio class KEGG # == Description # # Bio::KEGG::DRUG is a parser class for the KEGG DRUG database entry. # KEGG DRUG is a drug information database. # # == References # # * http://www.genome.jp/kegg/drug/ # class DRUG < KEGGDB DELIMITER = RS = "\n///\n" TAGSIZE = 12 include Common::DblinksAsHash # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field. def dblinks_as_hash; super; end if false #dummy for RDoc alias dblinks dblinks_as_hash include Common::PathwaysAsHash # Returns a Hash of the pathway ID and name in PATHWAY field. def pathways_as_hash; super; end if false #dummy for RDoc alias pathways pathways_as_hash # Creates a new Bio::KEGG::DRUG object. # --- # *Arguments*: # * (required) _entry_: (String) single entry as a string # *Returns*:: Bio::KEGG::DRUG object def initialize(entry) super(entry, TAGSIZE) end # ID of the entry, described in the ENTRY line. # --- # *Returns*:: String def entry_id field_fetch('ENTRY')[/\S+/] end # Names described in the NAME line. # --- # *Returns*:: Array containing String objects def names field_fetch('NAME').split(/\s*;\s*/) end # The first name recorded in the NAME field. # --- # *Returns*:: String def name names.first end # Chemical formula described in the FORMULA line. # --- # *Returns*:: String def formula field_fetch('FORMULA') end # Molecular weight described in the MASS line. # --- # *Returns*:: Float def mass field_fetch('MASS').to_f end # Biological or chemical activity described in the ACTIVITY line. # --- # *Returns*:: String def activity field_fetch('ACTIVITY') end # REMARK lines. # --- # *Returns*:: String def remark field_fetch('REMARK') end # List of KEGG Pathway IDs with short descriptions, # described in the PATHWAY lines. # --- # *Returns*:: Array containing String objects def pathways_as_strings lines_fetch('PATHWAY') end # List of database names and IDs, described in the DBLINKS lines. # --- # *Returns*:: Array containing String objects def dblinks_as_strings lines_fetch('DBLINKS') end # ATOM, BOND lines. # --- # *Returns*:: String def kcf return "#{get('ATOM')}#{get('BOND')}" end # COMMENT lines. # --- # *Returns*:: String def comment field_fetch('COMMENT') end # Product names described in the PRODUCTS lines. # --- # *Returns*:: Array containing String objects def products lines_fetch('PRODUCTS') end end # DRUG end # KEGG end # Bio bio-1.4.3.0001/lib/bio/db/lasergene.rb0000644000004100000410000002147512200110570017072 0ustar www-datawww-data# # bio/db/lasergene.rb - Interface for DNAStar Lasergene sequence file format # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2007 Center for Biomedical Research Informatics, University of Minnesota (http://cbri.umn.edu) # License:: The Ruby License # # $Id:$ # module Bio # # bio/db/lasergene.rb - Interface for DNAStar Lasergene sequence file format # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2007 Center for Biomedical Research Informatics, University of Minnesota (http://cbri.umn.edu) # License:: The Ruby License # # = Description # # Bio::Lasergene reads DNAStar Lasergene formatted sequence files, or +.seq+ # files. It only expects to find one sequence per file. # # = Usage # # require 'bio' # filename = 'MyFile.seq' # lseq = Bio::Lasergene.new( IO.readlines(filename) ) # lseq.entry_id # => "Contig 1" # lseq.seq # => ATGACGTATCCAAAGAGGCGTTACC # # = Comments # # I'm only aware of the following three kinds of Lasergene file formats. Feel # free to send me other examples that may not currently be accounted for. # # File format 1: # # ## begin ## # "Contig 1" (1,934) # Contig Length: 934 bases # Average Length/Sequence: 467 bases # Total Sequence Length: 1869 bases # Top Strand: 2 sequences # Bottom Strand: 2 sequences # Total: 4 sequences # ^^ # ATGACGTATCCAAAGAGGCGTTACCGGAGAAGAAGACACCGCCCCCGCAGTCCTCTTGGCCAGATCCTCCGCCGCCGCCCCTGGCTCGTCCACCCCCGCCACAGTTACCGCTGGAGAAGGAAAAATGGCATCTTCAWCACCCGCCTATCCCGCAYCTTCGGAWRTACTATCAAGCGAACCACAGTCAGAACGCCCTCCTGGGCGGTGGACATGATGAGATTCAATATTAATGACTTTCTTCCCCCAGGAGGGGGCTCAAACCCCCGCTCTGTGCCCTTTGAATACTACAGAATAAGAAAGGTTAAGGTTGAATTCTGGCCCTGCTCCCCGATCACCCAGGGTGACAGGGGAATGGGCTCCAGTGCTGWTATTCTAGMTGATRRCTTKGTAACAAAGRCCACAGCCCTCACCTATGACCCCTATGTAAACTTCTCCTCCCGCCATACCATAACCCAGCCCTTCTCCTACCRCTCCCGYTACTTTACCCCCAAACCTGTCCTWGATKCCACTATKGATKACTKCCAACCAAACAACAAAAGAAACCAGCTGTGGSTGAGACTACAWACTGCTGGAAATGTAGACCWCGTAGGCCTSGGCACTGCGTKCGAAAACAGTATATACGACCAGGAATACAATATCCGTGTMACCATGTATGTACAATTCAGAGAATTTAATCTTAAAGACCCCCCRCTTMACCCKTAATGAATAATAAMAACCATTACGAAGTGATAAAAWAGWCTCAGTAATTTATTYCATATGGAAATTCWSGGCATGGGGGGGAAAGGGTGACGAACKKGCCCCCTTCCTCCSTSGMYTKTTCYGTAGCATTCYTCCAMAAYACCWAGGCAGYAMTCCTCCSATCAAGAGcYTSYACAGCTGGGACAGCAGTTGAGGAGGACCATTCAAAGGGGGTCGGATTGCTGGTAATCAGA # ## end ## # # # File format 2: # # ## begin ## # ^^: 350,935 # Contig 1 (1,935) # Contig Length: 935 bases # Average Length/Sequence: 580 bases # Total Sequence Length: 2323 bases # Top Strand: 2 sequences # Bottom Strand: 2 sequences # Total: 4 sequences # ^^ # ATGTCGGGGAAATGCTTGACCGCGGGCTACTGCTCATCATTGCTTTCTTTGTGGTATATCGTGCCGTTCTGTTTTGCTGTGCTCGTCAACGCCAGCGGCGACAGCAGCTCTCATTTTCAGTCGATTTATAACTTGACGTTATGTGAGCTGAATGGCACGAACTGGCTGGCAGACAACTTTAACTGGGCTGTGGAGACTTTTGTCATCTTCCCCGTGTTGACTCACATTGTTTCCTATGGTGCACTCACTACCAGTCATTTTCTTGACACAGTTGGTCTAGTTACTGTGTCTACCGCCGGGTTTTATCACGGGCGGTACGTCTTGAGTAGCATCTACGCGGTCTGTGCTCTGGCTGCGTTGATTTGCTTCGCCATCAGGTTTGCGAAGAACTGCATGTCCTGGCGCTACTCTTGCACTAGATACACCAACTTCCTCCTGGACACCAAGGGCAGACTCTATCGTTGGCGGTCGCCTGTCATCATAGAGAAAGGGGGTAAGGTTGAGGTCGAAGGTCATCTGATCGATCTCAAAAGAGTTGTGCTTGATGGCTCTGTGGCGACACCTTTAACCAGAGTTTCAGCGGAACAATGGGGTCGTCCCTAGACGACTTTTGCCATGATAGTACAGCCCCACAGAAGGTGCTCTTGGCGTTTTCCATCACCTACACGCCAGTGATGATATATGCCCTAAAGGTAAGCCGCGGCCGACTTTTGGGGCTTCTGCACCTTTTGATTTTTTTGAACTGTGCCTTTACTTTCGGGTACATGACATTCGTGCACTTTCGGAGCACGAACAAGGTCGCGCTCACTATGGGAGCAGTAGTCGCACTCCTTTGGGGGGTGTACTCAGCCATAGAAACCTGGAAATTCATCACCTCCAGATGCCGTTGTGCTTGCTAGGCCGCAAGTACATTCTGGCCCCTGCCCACCACGTTG # ## end ## # # File format 3 (non-standard Lasergene header): # # ## begin ## # LOCUS PRU87392 15411 bp RNA linear VRL 17-NOV-2000 # DEFINITION Porcine reproductive and respiratory syndrome virus strain VR-2332, # complete genome. # ACCESSION U87392 AF030244 U00153 # VERSION U87392.3 GI:11192298 # [...cut...] # 3'UTR 15261..15411 # polyA_site 15409 # ORIGIN # ^^ # atgacgtataggtgttggctctatgccttggcatttgtattgtcaggagctgtgaccattggcacagcccaaaacttgctgcacagaaacacccttctgtgatagcctccttcaggggagcttagggtttgtccctagcaccttgcttccggagttgcactgctttacggtctctccacccctttaaccatgtctgggatacttgatcggtgcacgtgtacccccaatgccagggtgtttatggcggagggccaagtctactgcacacgatgcctcagtgcacggtctctccttcccctgaacctccaagtttctgagctcggggtgctaggcctattctacaggcccgaagagccactccggtggacgttgccacgtgcattccccactgttgagtgctcccccgccggggcctgctggctttctgcaatctttccaatcgcacgaatgaccagtggaaacctgaacttccaacaaagaatggtacgggtcgcagctgagctttacagagccggccagctcacccctgcagtcttgaaggctctacaagtttatgaacggggttgccgctggtaccccattgttggacctgtccctggagtggccgttttcgccaattccctacatgtgagtgataaacctttcccgggagcaactcacgtgttgaccaacctgccgctcccgcagagacccaagcctgaagacttttgcccctttgagtgtgctatggctactgtctatgacattggtcatgacgccgtcatgtatgtggccgaaaggaaagtctcctgggcccctcgtggcggggatgaagtgaaatttgaagctgtccccggggagttgaagttgattgcgaaccggctccgcacctccttcccgccccaccacacagtggacatgtctaagttcgccttcacagcccctgggtgtggtgtttctatgcgggtcgaacgccaacacggctgccttcccgctgacactgtccctgaaggcaactgctggtggagcttgtttgacttgcttccactggaagttcagaacaaagaaattcgccatgctaaccaatttggctaccagaccaagcatggtgtctctggcaagtacctacagcggaggctgca[...cut...] # ## end ## # class Lasergene # Entire header before the sequence attr_reader :comments # Sequence # # Bio::Sequence::NA or Bio::Sequence::AA object attr_reader :sequence # Name of sequence # * Parsed from standard Lasergene header attr_reader :name # Contig length, length of present sequence # * Parsed from standard Lasergene header attr_reader :contig_length # Average length per sequence # * Parsed from standard Lasergene header attr_reader :average_length # Length of parent sequence # * Parsed from standard Lasergene header attr_reader :total_length # Number of top strand sequences # * Parsed from standard Lasergene header attr_reader :top_strand_sequences # Number of bottom strand sequences # * Parsed from standard Lasergene header attr_reader :bottom_strand_sequences # Number of sequences # * Parsed from standard Lasergene header attr_reader :total_sequences DELIMITER_1 = '^\^\^:' # Match '^^:' at the beginning of a line DELIMITER_2 = '^\^\^' # Match '^^' at the beginning of a line def initialize(lines) process(lines) end # Is the comment header recognized as standard Lasergene format? # # --- # *Arguments* # * _none_ # *Returns*:: +true+ _or_ +false+ def standard_comment? @standard_comment end # Sequence # # Bio::Sequence::NA or Bio::Sequence::AA object def seq @sequence end # Name of sequence # * Parsed from standard Lasergene header def entry_id @name end ######### protected ######### def process(lines) delimiter_1_indices = [] delimiter_2_indices = [] # If the data from the file is passed as one big String instead of # broken into an Array, convert lines to an Array if lines.kind_of? String lines = lines.tr("\r", '').split("\n") end lines.each_with_index do |line, index| if line.match DELIMITER_1 delimiter_1_indices << index elsif line.match DELIMITER_2 delimiter_2_indices << index end end raise InputError, "More than one delimiter of type '#{DELIMITER_1}'" if delimiter_1_indices.size > 1 raise InputError, "More than one delimiter of type '#{DELIMITER_2}'" if delimiter_2_indices.size > 1 raise InputError, "No comment to data separator of type '#{DELIMITER_2}'" if delimiter_2_indices.size < 1 if !delimiter_1_indices.empty? # toss out DELIMETER_1 and anything preceding it @comments = lines[ (delimiter_1_indices[0] + 1) .. (delimiter_2_indices[0] - 1) ] else @comments = lines[ 0 .. (delimiter_2_indices[0] - 1) ] end @standard_comment = false if @comments[0] =~ %r{(.+)\s+\(\d+,\d+\)} # if we have a standard Lasergene comment @standard_comment = true @name = $1 comments.each do |comment| if comment.match('Contig Length:\s+(\d+)') @contig_length = $1.to_i elsif comment.match('Average Length/Sequence:\s+(\d+)') @average_length = $1.to_i elsif comment.match('Total Sequence Length:\s+(\d+)') @total_length = $1.to_i elsif comment.match('Top Strand:\s+(\d+)') @top_strand_sequences = $1.to_i elsif comment.match('Bottom Strand:\s+(\d+)') @bottom_strand_sequences = $1.to_i elsif comment.match('Total:\s+(\d+)') @total_sequences = $1.to_i end end end @comments = @comments.join('') @sequence = Bio::Sequence.auto( lines[ (delimiter_2_indices[0] + 1) .. -1 ].join('') ) end end # Lasergene end # Bio bio-1.4.3.0001/lib/bio/db/litdb.rb0000644000004100000410000000270612200110570016217 0ustar www-datawww-data# # = bio/db/litdb.rb - LITDB database class # # Copyright:: Copyright (C) 2001 Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/db' module Bio # = LITDB class class LITDB < NCBIDB # Delimiter DELIMITER = "\nEND\n" # Delimiter RS = DELIMITER # TAGSIZE = 12 # def initialize(entry) super(entry, TAGSIZE) end # Returns def reference hash = Hash.new('') hash['authors'] = author.split(/;/).map {|x| x.sub(/,/, ', ')} hash['title'] = title hash['journal'] = journal.gsub(/\./, '. ').strip vol = volume.split(/,\s+/) if vol.size > 1 hash['volume'] = vol.shift.sub(/Vol\./, '') hash['pages'], hash['year'] = vol.pop.split(' ') hash['issue'] = vol.shift.sub(/No\./, '') unless vol.empty? end return Reference.new(hash) end # CODE def entry_id field_fetch('CODE') end # TITLE def title field_fetch('TITLE') end # FIELD def field field_fetch('FIELD') end # JOURNAL def journal field_fetch('JOURNAL') end # VOLUME def volume field_fetch('VOLUME') end # KEYWORD ';;' def keyword unless @data['KEYWORD'] @data['KEYWORD'] = fetch('KEYWORD').split(/;;\s*/) end @data['KEYWORD'] end # AUTHOR def author field_fetch('AUTHOR') end end end bio-1.4.3.0001/lib/bio/db/phyloxml/0000755000004100000410000000000012200110570016443 5ustar www-datawww-databio-1.4.3.0001/lib/bio/db/phyloxml/phyloxml_elements.rb0000644000004100000410000010671112200110570022546 0ustar www-datawww-data# # = bio/db/phyloxml_elements.rb - PhyloXML Element classes # # Copyright:: Copyright (C) 2009 # Diana Jaunzeikare # License:: The Ruby License # # $Id:$ # # == Description # # This file containts the classes to represent PhyloXML elements. # # == References # # * http://www.phyloxml.org # # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby require 'bio/tree' require 'bio/sequence' require 'bio/reference' # Autoload definition module Bio module PhyloXML autoload :Parser, 'bio/db/phyloxml/phyloxml_parser' autoload :Writer, 'bio/db/phyloxml/phyloxml_writer' end end require 'libxml' module Bio # This is general Taxonomy class. class Taxonomy #pattern = [a-zA-Z0-9_]{2,10} Can refer to any code/abbreviation/mnemonic, such as Bsu for Bacillus subtilis. attr_accessor :code # String. attr_accessor :scientific_name # An array of strings attr_accessor :common_names # value comes from list: domain kingdom, subkingdom, branch, infrakingdom, # superphylum, phylum, subphylum, infraphylum, microphylum, superdivision, # division, subdivision, infradivision, superclass, class, subclass, # infraclass, superlegion, legion, sublegion, infralegion, supercohort, # cohort, subcohort, infracohort, superorder, order, suborder, # superfamily, family, subfamily, supertribe, tribe, subtribe, infratribe, # genus, subgenus, superspecies, species, subspecies, variety, subvariety, # form, subform, cultivar, unknown, other attr_accessor :rank # is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'. attr_accessor :authority # An array of strings. Holds synonyms for scientific names or common names. attr_accessor :synonyms def initialize @common_names = [] @synonyms = [] end end module PhyloXML # Taxonomy class class Taxonomy < Bio::Taxonomy # String. Unique identifier of a taxon. attr_accessor :taxonomy_id #Used to link other elements to a taxonomy (on the xml-level) attr_accessor :id_source # Uri object attr_accessor :uri # Array of Other objects. Used to save additional information from other than # PhyloXML namspace. attr_accessor :other def initialize super @other = [] end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml taxonomy = LibXML::XML::Node.new('taxonomy') taxonomy["type"] = @type if @type != nil taxonomy["id_source"] = @id_source if @id_source != nil PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id], [:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")], [:simple, 'scientific_name', @scientific_name], [:simple, 'authority', @authority], [:simplearr, 'common_name', @common_names], [:simplearr, 'synonym', @synonyms], [:simple, 'rank', @rank], [:complex, 'uri',@uri]]) #@todo anything else return taxonomy end end # Object to hold one phylogeny element (and its subelements.) Extended version of Bio::Tree. class Tree < Bio::Tree # String. Name of tree (name subelement of phylogeny element). attr_accessor :name # Id object. attr_accessor :phylogeny_id # String. Description of tree. attr_accessor :description # Boolean. Can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications). attr_accessor :rerootable # Boolean. Required element. attr_accessor :rooted # Array of Property object. Allows for typed and referenced properties from external resources to be attached. attr_accessor :properties # CladeRelation object. This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade. attr_accessor :clade_relations # SequenceRelation object. This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology. attr_accessor :sequence_relations # Array of confidence object attr_accessor :confidences # String. attr_accessor :branch_length_unit # String. Indicate the type of phylogeny (i.e. 'gene tree'). attr_accessor :type # String. Date attr_accessor :date # Array of Other objects. Used to save additional information from other than # PhyloXML namspace. attr_accessor :other def initialize super @sequence_relations = [] @clade_relations = [] @confidences = [] @properties = [] @other = [] end end # == Description # Class to hold clade element of phyloXML. class Node # Events at the root node of a clade (e.g. one gene duplication). attr_accessor :events # String. Used to link other elements to a clade (node) (on the xml-level). attr_accessor :id_source # String. Name of the node. attr_accessor :name # Float. Branch width for this node (including parent branch). Applies for the whole clade unless overwritten in sub-clades. attr_reader :width def width=(str) @width = str.to_f end # Array of Taxonomy objects. Describes taxonomic information for a clade. attr_accessor :taxonomies # Array of Confidence objects. Indicates the support for a clade/parent branch. attr_accessor :confidences # BranchColor object. Apply for the whole clade unless overwritten in sub-clade. attr_accessor :color # Id object attr_accessor :node_id # Array of Sequence objects. Represents a molecular sequence (Protein, DNA, RNA) associated with a node. attr_accessor :sequences # BinaryCharacters object. The names and/or counts of binary characters present, gained, and lost at the root of a clade. attr_accessor :binary_characters # Array of Distribution objects. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications. attr_accessor :distributions # Date object. A date associated with a clade/node. attr_accessor :date #Array of Reference objects. A literature reference for a clade. attr_accessor :references #An array of Property objects, for example depth for sea animals. attr_accessor :properties # Array of Other objects. Used to save additional information from other than # PhyloXML namspace. attr_accessor :other def initialize @confidences = [] @sequences = [] @taxonomies = [] @distributions = [] @references = [] @properties = [] @other = [] end # Converts to a Bio::Tree::Node object. If it contains several taxonomies # Bio::Tree::Node#scientific name will get the scientific name of the first # taxonomy. # # If there are several confidence values, the first with bootstrap type will # be returned as Bio::Tree::Node#bootstrap # # tree = phyloxmlparser.next_tree # # node = tree.get_node_by_name("A").to_biotreenode # # --- # *Returns*:: Bio::Tree::Node def to_biotreenode node = Bio::Tree::Node.new node.name = @name node.scientific_name = @taxonomies[0].scientific_name if not @taxonomies.empty? #@todo what if there are more? node.taxonomy_id = @taxonomies[0].taxononmy_id if @taxonomies[0] != nil if not @confidences.empty? @confidences.each do |confidence| if confidence.type == "bootstrap" node.bootstrap = confidence.value break end end end return node end # Extracts the relevant information from node (specifically taxonomy and # sequence) to create Bio::Sequence object. Node can have several sequences, # so parameter to this method is to specify which sequence to extract. # # --- # *Returns*:: Bio::Sequence def extract_biosequence(seq_i=0) seq = @sequences[seq_i].to_biosequence seq.classification = [] @taxonomies.each do |t| seq.classification << t.scientific_name if t.rank == "species" seq.species = t.scientific_name end end #seq.division => .. http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2 # It doesn't seem there is anything in PhyloXML corresponding to this. return seq end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml(branch_length, write_branch_length_as_subelement) clade = LibXML::XML::Node.new('clade') PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]]) if branch_length != nil if write_branch_length_as_subelement clade << LibXML::XML::Node.new('branch_length', branch_length.to_s) else clade["branch_length"] = branch_length.to_s end end #generate all elements, except clade PhyloXML::Writer.generate_xml(clade, self, [ [:attr, "id_source"], [:objarr, 'confidence', 'confidences'], [:simple, 'width', @width], [:complex, 'branch_color', @branch_color], [:simple, 'node_id', @node_id], [:objarr, 'taxonomy', 'taxonomies'], [:objarr, 'sequence', 'sequences'], [:complex, 'events', @events], [:complex, 'binary_characters', @binary_characters], [:objarr, 'distribution', 'distributions'], [:complex, 'date', @date], [:objarr, 'reference', 'references'], [:objarr, 'propery', 'properties']]) return clade end end #Node # == Description # Events at the root node of a clade (e.g. one gene duplication). class Events #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned attr_accessor :type # Integer attr_reader :duplications, :speciations, :losses # Confidence object attr_reader :confidence def confidence=(type, value) @confidence = Confidence.new(type, value) end def confidence=(conf) @confidence = conf end def duplications=(str) @duplications = str.to_i end def losses=(str) @losses = str.to_i end def speciations=(str) @speciations=str.to_i end def type=(str) @type = str #@todo add unit test for this if not ['transfer','fusion','speciation_or_duplication','other','mixed', 'unassigned'].include?(str) raise "Warning #{str} is not one of the allowed values" end end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml #@todo add unit test events = LibXML::XML::Node.new('events') PhyloXML::Writer.generate_xml(events, self, [ [:simple, 'type', @type], [:simple, 'duplications', @duplications], [:simple, 'speciations', @speciations], [:simple, 'losses', @losses], [:complex, 'confidence', @confidence]]) return events end end # A general purpose confidence element. For example this can be used to express # the bootstrap support value of a clade (in which case the 'type' attribute # is 'bootstrap'). class Confidence # String. The type of confidence measure, for example, bootstrap. attr_accessor :type # Float. The value of confidence measure. attr_accessor :value def initialize(type, value) @type = type @value = value.to_f end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml if @type == nil raise "Type is a required attribute for confidence." else confidence = LibXML::XML::Node.new('confidence', @value.to_s) confidence["type"] = @type return confidence end end end # == Description # # The geographic distribution of the items of a clade (species, sequences), # intended for phylogeographic applications. class Distribution # String. Free text description of location. attr_accessor :desc # Array of Point objects. Holds coordinates of the location. attr_accessor :points # Array of Polygon objects. attr_accessor :polygons def initialize @points = [] @polygons = [] end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml distr = LibXML::XML::Node.new('distribution') PhyloXML::Writer.generate_xml(distr, self, [ [:simple, 'desc', @desc], [:objarr, 'point', 'points'], [:objarr, 'polygon', 'polygons']]) return distr end end #Distribution class # == Description # # The coordinates of a point with an optional altitude. Required attribute # 'geodetic_datum' is used to indicate the geodetic datum (also called # 'map datum'), for example Google's KML uses 'WGS84'. class Point # Float. Latitude attr_accessor :lat # Float. Longitute attr_accessor :long # Float. Altitude attr_accessor :alt # String. Altitude unit. attr_accessor :alt_unit # Geodedic datum / map datum attr_accessor :geodetic_datum def lat=(str) @lat = str.to_f unless str.nil? end def long=(str) @long = str.to_f unless str.nil? end def alt=(str) @alt = str.to_f unless str.nil? end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml raise "Geodedic datum is a required attribute of Point element." if @geodetic_datum.nil? p = LibXML::XML::Node.new('point') p["geodetic_datum"] = @geodetic_datum p["alt_unit"] = @alt_unit if @alt_unit != nil PhyloXML::Writer.generate_xml(p, self, [ [:simple, 'lat', @lat], [:simple, 'long', @long], [:simple, 'alt', @alt]]) return p #@todo check if characters are correctly generated, like Zuric end end # == Description # # A polygon defined by a list of Points objects. class Polygon # Array of Point objects. attr_accessor :points def initialize @points = [] end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml if @points.length > 2 pol = LibXML::XML::Node.new('polygon') @points.each do |p| pol << p.to_xml end return pol end end end # == Description # Element Sequence is used to represent a molecular sequence (Protein, DNA, # RNA) associated with a node. class Sequence # Type of sequence (rna, dna, protein) attr_accessor :type # Full name (e.g. muscle Actin ) attr_accessor :name # String. Used to link with other elements. attr_accessor :id_source # String. One intended use for 'id_ref' is to link a sequence to a taxonomy # (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node. attr_accessor :id_ref # short (maximal ten characters) symbol of the sequence (e.g. 'ACTM') attr_accessor :symbol # Accession object. Holds source and identifier for the sequence. attr_accessor :accession # String. Location of a sequence on a genome/chromosome attr_accessor :location # String. The actual sequence is stored here. attr_reader :mol_seq # Boolean. used to indicated that this molecular sequence is aligned with # all other sequences in the same phylogeny for which 'is aligned' is true # as well (which, in most cases, means that gaps were introduced, and that # all sequences for which 'is aligned' is true must have the same length) attr_reader :is_aligned # Uri object attr_accessor :uri # Array of Annotation objects. Annotations of molecular sequence. attr_accessor :annotations # DomainArchitecture object. Describes domain architecture of a protein. attr_accessor :domain_architecture # Array of Other objects. Used to save additional information from other than # PhyloXML namspace. attr_accessor :other def initialize @annotations = [] @other = [] end def is_aligned=(str) if str=='true' @is_aligned=true elsif str=='false' @is_aligned = false else @is_aligned = nil end end def is_aligned? @is_aligned end def mol_seq=(str) if str =~ /^[a-zA-Z\.\-\?\*_]+$/ @mol_seq = str else raise "mol_seq element of Sequence does not follow the pattern." end end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml seq = LibXML::XML::Node.new('sequence') if @type != nil if ["dna", "rna", "protein"].include?(@type) seq["type"] = @type else raise "Type attribute of Sequence has to be one of dna, rna or a." end end PhyloXML::Writer.generate_xml(seq, self, [ [:attr, 'id_source'], [:attr, 'id_ref'], [:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")], [:complex, 'accession', @accession], [:simple, 'name', @name], [:simple, 'location', @location]]) if @mol_seq != nil molseq = LibXML::XML::Node.new('mol_seq', @mol_seq) molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil seq << molseq end PhyloXML::Writer.generate_xml(seq, self, [ #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")], [:complex, 'uri', @uri], [:objarr, 'annotation', 'annotations'], [:complex, 'domain_architecture', @domain_architecture]]) #@todo test domain_architecture #any return seq end # converts Bio::PhyloXML:Sequence to Bio::Sequence object. # --- # *Returns*:: Bio::Sequence def to_biosequence #type is not a required attribute in phyloxml (nor any other Sequence #element) it might not hold any value, so we will not check what type it is. seq = Bio::Sequence.auto(@mol_seq) seq.id_namespace = @accession.source seq.entry_id = @accession.value # seq.primary_accession = @accession.value could be this seq.definition = @name #seq.comments = @name //this one? if @uri != nil h = {'url' => @uri.uri, 'title' => @uri.desc } ref = Bio::Reference.new(h) seq.references << ref end seq.molecule_type = 'RNA' if @type == 'rna' seq.molecule_type = 'DNA' if @type == 'dna' #@todo deal with the properties. There might be properties which look #like bio sequence attributes or features return seq end end # == Description # Element Accession is used to capture the local part in a sequence # identifier. class Accession #String. Source of the accession id. Example: "UniProtKB" attr_accessor :source #String. Value of the accession id. Example: "P17304" attr_accessor :value # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml raise "Source attribute is required for Accession object." if @source == nil accession = LibXML::XML::Node.new('accession', @value) accession['source'] = @source return accession end end # A uniform resource identifier. In general, this is expected to be an URL # (for example, to link to an image on a website, in which case the 'type' # attribute might be 'image' and 'desc' might be 'image of a California # sea hare') class Uri # String. Description of the uri. For example, image of a California sea hare' attr_accessor :desc # String. For example, image. attr_accessor :type # String. URL of the resource. attr_accessor :uri # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml if @uri != nil xml_node = LibXML::XML::Node.new('uri', @uri) Writer.generate_xml(xml_node, self, [ [:attr, 'desc'], [:attr, 'type']]) return xml_node end end end # == Description # # The annotation of a molecular sequence. It is recommended to annotate by # using the optional 'ref' attribute (some examples of acceptable values # for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation', # 'EC:1.1.1.1'). class Annotation # String. For example, 'GO:0008270', 'KEGG:Tetrachloroethene degradation', # 'EC:1.1.1.1' attr_accessor :ref # String attr_accessor :source # String. evidence for a annotation as free text (e.g. 'experimental') attr_accessor :evidence # String. Type of the annotation. attr_accessor :type # String. Free text description. attr_accessor :desc # Confidence object. Type and value of support for a annotation. attr_accessor :confidence # Array of Property objects. Allows for further, typed and referenced # annotations from external resources attr_accessor :properties # Uri object. attr_accessor :uri def initialize #@todo add unit test for this, since didn't break anything when changed from property to properties @properties = [] end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml annot = LibXML::XML::Node.new('annotation') annot["ref"] = @ref if @ref != nil PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc], [:complex, 'confidence', @confidence], [:objarr, 'property', 'properties'], [:complex, 'uri', @uri]]) return annot end end class Id # The provider of Id, for example, NCBI. attr_accessor :provider # The value of Id. attr_accessor :value # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml xml_node = LibXML::XML::Node.new('id', @value) xml_node["provider"] = @provider if @provider != nil return xml_node end end # == Description # This indicates the color of a node when rendered (the color applies # to the whole node and its children unless overwritten by the # color(s) of sub clades). class BranchColor #Integer attr_reader :red, :green, :blue def red=(str) @red = str.to_i end def green=(str) @green = str.to_i end def blue=(str) @blue = str.to_i end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml #@todo add unit test if @red == nil raise "Subelement red of BranchColor element should not be nil" elsif @green == nil raise "Subelement green of BranchColor element should not be nil" elsif @blue == nil raise "Subelement blue of BranchColor element should not be nil" end c = LibXML::XML::Node.new('branch_color') PhyloXML::Writer.generate_xml(c, self, [ [:simple, 'red', @red], [:simple, 'green', @green], [:simple, 'blue', @blue]]) return c end end # == Description # A date associated with a clade/node. Its value can be numerical by # using the 'value' element and/or free text with the 'desc' element' # (e.g. 'Silurian'). If a numerical value is used, it is recommended to # employ the 'unit' attribute to indicate the type of the numerical # value (e.g. 'mya' for 'million years ago'). class Date # String. Units in which value is stored. attr_accessor :unit # Free text description of the date. attr_accessor :desc # Integer. Minimum and maximum of the value. attr_reader :minimum, :maximum # Integer. Value of the date. attr_reader :value def minimum=(str) @minimum = str.to_i end def maximum=(str) @maximum = str.to_i end def value= (str) @value = str.to_i end # Returns value + unit, for exampe "7 mya" def to_s return "#{value} #{unit}" end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml date = LibXML::XML::Node.new('date') PhyloXML::Writer.generate_xml(date, self, [ [:attr, 'unit'], [:simple, 'desc', @desc], [:simple, 'value', @value], [:simple, 'minimum', @minimum], [:simple, 'maximum', @maximum]]) return date end end # == Description # This is used describe the domain architecture of a protein. Attribute # 'length' is the total length of the protein class DomainArchitecture # Integer. Total length of the protein attr_accessor :length # Array of ProteinDomain objects. attr_reader :domains def length=(str) @length = str.to_i end def initialize @domains = [] end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml xml_node = LibXML::XML::Node.new('domain_architecture') PhyloXML::Writer.generate_xml(xml_node, self,[ [:attr, 'length'], [:objarr, 'domain', 'domains']]) return xml_node end end # == Description # To represent an individual domain in a domain architecture. The # name/unique identifier is described via the 'id' attribute. class ProteinDomain #Float, for example to store E-values 4.7E-14 attr_accessor :confidence # String attr_accessor :id, :value # Integer. Beginning of the domain. attr_reader :from # Integer. End of the domain. attr_reader :to def from=(str) @from = str.to_i end def to=(str) @to = str.to_i end def confidence=(str) @confidence = str.to_f end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml if @from == nil raise "from attribute of ProteinDomain class is required." elsif @to == nil raise "to attribute of ProteinDomain class is required." else xml_node = LibXML::XML::Node.new('domain', @value) xml_node["from"] = @from.to_s xml_node["to"] = @to.to_s xml_node["id"] = @id if @id != nil xml_node["confidence"] = @confidence.to_s return xml_node end end end #Property allows for typed and referenced properties from external resources #to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a #property is its mixed (free text) content. Attribute 'datatype' indicates #the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string', #'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double', #'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to #which a property applies to (e.g. 'node' for the parent node of a clade, #'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows #to attached a property specifically to one element (on the xml-level). #Optional attribute 'unit' is used to indicate the unit of the property. #An example: 200 class Property # String attr_accessor :ref, :unit, :id_ref, :value # String attr_reader :datatype, :applies_to def datatype=(str) #@todo add unit test or maybe remove, if assume that xml is valid. unless ['xsd:string','xsd:boolean','xsd:decimal','xsd:float','xsd:double', 'xsd:duration','xsd:dateTime','xsd:time','xsd:date','xsd:gYearMonth', 'xsd:gYear','xsd:gMonthDay','xsd:gDay','xsd:gMonth','xsd:hexBinary', 'xsd:base64Binary','xsd:anyURI','xsd:normalizedString','xsd:token', 'xsd:integer','xsd:nonPositiveInteger','xsd:negativeInteger', 'xsd:long','xsd:int','xsd:short','xsd:byte','xsd:nonNegativeInteger', 'xsd:unsignedLong','xsd:unsignedInt','xsd:unsignedShort', 'xsd:unsignedByte','xsd:positiveInteger'].include?(str) raise "Warning: #{str} is not in the list of allowed values." end @datatype = str end def applies_to=(str) unless ['phylogeny','clade','node','annotation','parent_branch','other'].include?(str) puts "Warning: #{str} is not in the list of allowed values." end @applies_to = str end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml #@todo write unit test for this raise "ref is an required element of property" if @ref.nil? raise "datatype is an required element of property" if @datatype.nil? raise "applies_to is an required element of property" if @applies_to.nil? property = LibXML::XML::Node.new('property') Writer.generate_xml(property, self, [ [:attr, 'ref'], [:attr, 'unit'], [:attr, 'datatype'], [:attr, 'applies_to'], [:attr, 'id_ref']]) property << @value if @value != nil return property end end # == Description # A literature reference for a clade. It is recommended to use the 'doi' # attribute instead of the free text 'desc' element whenever possible. class Reference # String. Digital Object Identifier. attr_accessor :doi # String. Free text description. attr_accessor :desc # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml ref = LibXML::XML::Node.new('reference') Writer.generate_xml(ref, self, [ [:attr, 'doi'], [:simple, 'desc', @desc]]) return ref end end # == Description # # This is used to express a typed relationship between two clades. # For example it could be used to describe multiple parents of a clade. class CladeRelation # Float attr_accessor :distance # String. Id of the referenced parents of a clade. attr_accessor :id_ref_0, :id_ref_1 # String attr_accessor :type # Confidence object attr_accessor :confidence def distance=(str) @distance = str.to_f end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element." else cr = LibXML::XML::Node.new('clade_relation') Writer.generate_xml(cr, self, [ [:attr, 'id_ref_0'], [:attr, 'id_ref_1'], [:attr, 'distance'], [:attr, 'type'], [:complex, 'confidence', @confidnece]]) return cr end end end # == Description # The names and/or counts of binary characters present, gained, and # lost at the root of a clade. class BinaryCharacters attr_accessor :bc_type, :gained, :lost, :present, :absent attr_reader :gained_count, :lost_count, :present_count, :absent_count def gained_count=(str) @gained_count = str.to_i end def lost_count=(str) @lost_count = str.to_i end def present_count=(str) @present_count = str.to_i end def absent_count=(str) @absent_count = str.to_i end def initialize @gained = [] @lost = [] @present = [] @absent = [] end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml bc = LibXML::XML::Node.new('binary_characters') bc['type'] = @bc_type PhyloXML::Writer.generate_xml(bc, self, [ [:attr, 'gained_count'], [:attr, 'lost_count'], [:attr, 'present_count'], [:attr, 'absent_count']]) if not @gained.empty? gained_xml = LibXML::XML::Node.new('gained') PhyloXML::Writer.generate_xml(gained_xml, self, [[:simplearr, 'bc', @gained]]) bc << gained_xml end if not @lost.empty? lost_xml = LibXML::XML::Node.new('lost') PhyloXML::Writer.generate_xml(lost_xml, self, [[:simplearr, 'bc', @lost]]) bc << lost_xml end if not @present.empty? present_xml = LibXML::XML::Node.new('present') PhyloXML::Writer.generate_xml(present_xml, self, [[:simplearr, 'bc', @present]]) bc << present_xml end if not @absent.empty? absent_xml = LibXML::XML::Node.new('absent') PhyloXML::Writer.generate_xml(absent_xml, self, [[:simplearr, 'bc', @absent]]) bc << absent_xml end return bc end end # == Description # This is used to express a typed relationship between two sequences. # For example it could be used to describe an orthology (in which case # attribute 'type' is 'orthology'). class SequenceRelation # String attr_accessor :id_ref_0, :id_ref_1, :type # Float attr_reader :distance #@todo it has Confidences objects. def distance=(str) @distance = str.to_f if str != nil end def type=(str) #@todo do warning instead? #@todo do validation at actually writing xml allowed_values = ["orthology", "one_to_one_orthology", "super_orthology", "paralogy", "ultra_paralogy", "xenology", "unknown", "other"] if not allowed_values.include? str raise "SequenceRelation#type has to be one one of #{allowed_values.join("; ")}" else @type = str end end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element." else sr = LibXML::XML::Node.new('sequence_relation') sr['id_ref_0'] = @id_ref_0 sr['id_ref_1'] = @id_ref_1 sr['distance'] = @distance.to_s if @distance != nil sr['type'] = @type return sr end end end class Other attr_accessor :element_name, :attributes, :children, :value def initialize @children = [] @attributes = Hash.new end # Converts elements to xml representation. Called by PhyloXML::Writer class. def to_xml o = LibXML::XML::Node.new(@element_name) @attributes.each do |key, value| o[key] = value end o << value if value != nil children.each do |child_node| o << child_node.to_xml end return o end end end #module PhyloXML end #end module Bio bio-1.4.3.0001/lib/bio/db/phyloxml/phyloxml.xsd0000644000004100000410000007651412200110570021054 0ustar www-datawww-data phyloXML is an XML language to describe evolutionary trees and associated data. Version: 1.10. License: dual-licensed under the LGPL or Ruby's License. Copyright (c) 2008-2009 Christian M Zmasek. 'phyloxml' is the name of the root element. Phyloxml contains an arbitrary number of 'phylogeny' elements (each representing one phylogeny) possibly followed by elements from other namespaces. Element Phylogeny is used to represent a phylogeny. The required attribute 'rooted' is used to indicate whether the phylogeny is rooted or not. The attribute 'rerootable' can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications). The attribute 'type' can be used to indicate the type of phylogeny (i.e. 'gene tree'). It is recommended to use the attribute 'branch_length_unit' if the phylogeny has branch lengths. Element clade is used in a recursive manner to describe the topology of a phylogenetic tree. Element Clade is used in a recursive manner to describe the topology of a phylogenetic tree. The parent branch length of a clade can be described either with the 'branch_length' element or the 'branch_length' attribute (it is not recommended to use both at the same time, though). Usage of the 'branch_length' attribute allows for a less verbose description. Element 'confidence' is used to indicate the support for a clade/parent branch. Element 'events' is used to describe such events as gene-duplications at the root node/parent branch of a clade. Element 'width' is the branch width for this clade (including parent branch). Both 'color' and 'width' elements apply for the whole clade unless overwritten in-sub clades. Attribute 'id_source' is used to link other elements to a clade (on the xml-level). Element Taxonomy is used to describe taxonomic information for a clade. Element 'code' is intended to store UniProt/Swiss-Prot style organism codes (e.g. 'APLCA' for the California sea hare 'Aplysia californica') or other styles of mnemonics (e.g. 'Aca'). Element 'authority' is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'. Element 'id' is used for a unique identifier of a taxon (for example '6500' with 'ncbi_taxonomy' as 'provider' for the California sea hare). Attribute 'id_source' is used to link other elements to a taxonomy (on the xml-level). Element Sequence is used to represent a molecular sequence (Protein, DNA, RNA) associated with a node. 'symbol' is a short (maximal ten characters) symbol of the sequence (e.g. 'ACTM') whereas 'name' is used for the full name (e.g. 'muscle Actin'). 'location' is used for the location of a sequence on a genome/chromosome. The actual sequence can be stored with the 'mol_seq' element. Attribute 'type' is used to indicate the type of sequence ('dna', 'rna', or 'protein'). One intended use for 'id_ref' is to link a sequence to a taxonomy (via the taxonomy's 'id_source') in case of multiple sequences and taxonomies per node. Element 'mol_seq' is used to store molecular sequences. The 'is_aligned' attribute is used to indicated that this molecular sequence is aligned with all other sequences in the same phylogeny for which 'is aligned' is true as well (which, in most cases, means that gaps were introduced, and that all sequences for which 'is aligned' is true must have the same length). Element Accession is used to capture the local part in a sequence identifier (e.g. 'P17304' in 'UniProtKB:P17304', in which case the 'source' attribute would be 'UniProtKB'). This is used describe the domain architecture of a protein. Attribute 'length' is the total length of the protein To represent an individual domain in a domain architecture. The name/unique identifier is described via the 'id' attribute. 'confidence' can be used to store (i.e.) E-values. Events at the root node of a clade (e.g. one gene duplication). The names and/or counts of binary characters present, gained, and lost at the root of a clade. A literature reference for a clade. It is recommended to use the 'doi' attribute instead of the free text 'desc' element whenever possible. The annotation of a molecular sequence. It is recommended to annotate by using the optional 'ref' attribute (some examples of acceptable values for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation', 'EC:1.1.1.1'). Optional element 'desc' allows for a free text description. Optional element 'confidence' is used to state the type and value of support for a annotation. Similarly, optional attribute 'evidence' is used to describe the evidence for a annotation as free text (e.g. 'experimental'). Optional element 'property' allows for further, typed and referenced annotations from external resources. Property allows for typed and referenced properties from external resources to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a property is its mixed (free text) content. Attribute 'datatype' indicates the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string', 'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double', 'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to which a property applies to (e.g. 'node' for the parent node of a clade, 'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows to attached a property specifically to one element (on the xml-level). Optional attribute 'unit' is used to indicate the unit of the property. An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property> A uniform resource identifier. In general, this is expected to be an URL (for example, to link to an image on a website, in which case the 'type' attribute might be 'image' and 'desc' might be 'image of a California sea hare'). A general purpose confidence element. For example this can be used to express the bootstrap support value of a clade (in which case the 'type' attribute is 'bootstrap'). A general purpose identifier element. Allows to indicate the provider (or authority) of an identifier. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications. The location can be described either by free text in the 'desc' element and/or by the coordinates of one or more 'Points' (similar to the 'Point' element in Google's KML format) or by 'Polygons'. The coordinates of a point with an optional altitude (used by element 'Distribution'). Required attributes are the 'geodetic_datum' used to indicate the geodetic datum (also called 'map datum', for example Google's KML uses 'WGS84'). Attribute 'alt_unit' is the unit for the altitude (e.g. 'meter'). A polygon defined by a list of 'Points' (used by element 'Distribution'). A date associated with a clade/node. Its value can be numerical by using the 'value' element and/or free text with the 'desc' element' (e.g. 'Silurian'). If a numerical value is used, it is recommended to employ the 'unit' attribute to indicate the type of the numerical value (e.g. 'mya' for 'million years ago'). The elements 'minimum' and 'maximum' are used the indicate a range/confidence interval This indicates the color of a clade when rendered (the color applies to the whole clade unless overwritten by the color(s) of sub clades). This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology (in which case attribute 'type' is 'orthology'). This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade. bio-1.4.3.0001/lib/bio/db/phyloxml/phyloxml_parser.rb0000644000004100000410000007337312200110570022235 0ustar www-datawww-data# # = bio/db/phyloxml_parser.rb - PhyloXML parser # # Copyright:: Copyright (C) 2009 # Diana Jaunzeikare # License:: The Ruby License # # $Id:$ # # == Description # # This file containts parser for PhyloXML. # # == Requirements # # Libxml2 XML parser is required. Install libxml-ruby bindings from # http://libxml.rubyforge.org or # # gem install -r libxml-ruby # # == References # # * http://www.phyloxml.org # # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby require 'uri' require 'libxml' require 'bio/tree' require 'bio/db/phyloxml/phyloxml_elements' module Bio module PhyloXML # == Description # # Bio::PhyloXML::Parser is for parsing phyloXML format files. # # == Requirements # # Libxml2 XML parser is required. Install libxml-ruby bindings from # http://libxml.rubyforge.org or # # gem install -r libxml-ruby # # == Usage # # require 'bio' # # # Create new phyloxml parser # phyloxml = Bio::PhyloXML::Parser.open('example.xml') # # # Print the names of all trees in the file # phyloxml.each do |tree| # puts tree.name # end # # # == References # # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html # class Parser include LibXML # After parsing all the trees, if there is anything else in other xml format, # it is saved in this array of PhyloXML::Other objects attr_reader :other # Initializes LibXML::Reader and reads the file until it reaches the first # phylogeny element. # # Example: Create a new Bio::PhyloXML::Parser object. # # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml") # # If the optional code block is given, Bio::PhyloXML object is passed to # the block as an argument. When the block terminates, the Bio::PhyloXML # object is automatically closed, and the open method returns the value # of the block. # # Example: Get the first tree in the file. # # tree = Bio::PhyloXML::Parser.open("example.xml") do |px| # px.next_tree # end # # --- # *Arguments*: # * (required) _filename_: Path to the file to parse. # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true. # *Returns*:: (without block) Bio::PhyloXML::Parser object # *Returns*:: (with block) the value of the block def self.open(filename, validate=true) obj = new(nil, validate) obj.instance_eval { filename = _secure_filename(filename) _validate(:file, filename) if validate # XML::Parser::Options::NONET for security reason @reader = XML::Reader.file(filename, { :options => LibXML::XML::Parser::Options::NONET }) _skip_leader } if block_given? then begin ret = yield obj ensure obj.close if obj and !obj.closed? end ret else obj end end # Initializes LibXML::Reader and reads the file until it reaches the first # phylogeny element. # # Create a new Bio::PhyloXML::Parser object. # # p = Bio::PhyloXML::Parser.open_uri("http://www.phyloxml.org/examples/apaf.xml") # # If the optional code block is given, Bio::PhyloXML object is passed to # the block as an argument. When the block terminates, the Bio::PhyloXML # object is automatically closed, and the open_uri method returns the # value of the block. # # --- # *Arguments*: # * (required) _uri_: (URI or String) URI to the data to parse # * (optional) _validate_: For URI reader, the "validate" option is ignored and no validation is executed. # *Returns*:: (without block) Bio::PhyloXML::Parser object # *Returns*:: (with block) the value of the block def self.open_uri(uri, validate=true) case uri when URI uri = uri.to_s else # raises error if not a String uri = uri.to_str # raises error if invalid URI URI.parse(uri) end obj = new(nil, validate) obj.instance_eval { @reader = XML::Reader.file(uri) _skip_leader } if block_given? then begin ret = yield obj ensure obj.close if obj and !obj.closed? end else obj end end # Special class for closed PhyloXML::Parser object. # It raises error for any methods except essential methods. # # Bio::PhyloXML internal use only. class ClosedPhyloXMLParser #:nodoc: def method_missing(*arg) raise LibXML::XML::Error, 'closed PhyloXML::Parser object' end end #class ClosedPhyloXMLParser # Closes the LibXML::Reader inside the object. # It also closes the opened file if it is created by using # Bio::PhyloXML::Parser.open method. # # When closed object is closed again, or closed object is used, # it raises LibXML::XML::Error. # --- # *Returns*:: nil def close @reader.close @reader = ClosedPhyloXMLParser.new nil end # If the object is closed by using the close method or equivalent, # returns true. Otherwise, returns false. # --- # *Returns*:: true or false def closed? if @reader.kind_of?(ClosedPhyloXMLParser) then true else false end end # Initializes LibXML::Reader and reads from the IO until it reaches # the first phylogeny element. # # Create a new Bio::PhyloXML::Parser object. # # p = Bio::PhyloXML::Parser.for_io($stdin) # # --- # *Arguments*: # * (required) _io_: IO object # * (optional) _validate_: For IO reader, the "validate" option is ignored and no validation is executed. # *Returns*:: Bio::PhyloXML::Parser object def self.for_io(io, validate=true) obj = new(nil, validate) obj.instance_eval { @reader = XML::Reader.io(io, { :options => LibXML::XML::Parser::Options::NONET }) _skip_leader } obj end # (private) returns PhyloXML schema def _schema XML::Schema.document(XML::Document.file(File.join(File.dirname(__FILE__),'phyloxml.xsd'))) end private :_schema # (private) do validation # --- # *Arguments*: # * (required) data_type_: :file for filename, :string for string # * (required) _arg_: filename or string # *Returns*:: (undefined) def _validate(data_type, arg) options = { :options => (LibXML::XML::Parser::Options::NOERROR | # no error messages LibXML::XML::Parser::Options::NOWARNING | # no warning messages LibXML::XML::Parser::Options::NONET) # no network access } case data_type when :file # No validation when special file e.g. FIFO (named pipe) return unless File.file?(arg) xml_instance = XML::Document.file(arg, options) when :string xml_instance = XML::Document.string(arg, options) else # no validation for unknown data type return end schema = _schema begin flag = xml_instance.validate_schema(schema) do |msg, flag| # The document of libxml-ruby says that the block is called # when validation failed, but it seems it is never called # even when validation failed! raise "Validation of the XML document against phyloxml.xsd schema failed. #{msg}" end rescue LibXML::XML::Error => evar raise "Validation of the XML document against phyloxml.xsd schema failed, or XML error occurred. #{evar.message}" end unless flag then raise "Validation of the XML document against phyloxml.xsd schema failed." end end private :_validate # (private) It seems that LibXML::XML::Reader reads from the network # even if LibXML::XML::Parser::Options::NONET is set. # So, for URI-like filename, '://' is replaced with ':/'. def _secure_filename(filename) # for safety, URI-like filename is checked. if /\A[a-zA-Z]+\:\/\// =~ filename then # for example, "http://a/b" is changed to "http:/a/b". filename = filename.sub(/\:\/\//, ':/') end filename end private :_secure_filename # (private) loops through until reaches phylogeny stuff def _skip_leader #loops through until reaches phylogeny stuff # Have to leave this way, if accepting strings, instead of files @reader.read until is_element?('phylogeny') nil end private :_skip_leader # Initializes LibXML::Reader and reads the PhyloXML-formatted string # until it reaches the first phylogeny element. # # Create a new Bio::PhyloXML::Parser object. # # str = File.read("./phyloxml_examples.xml") # p = Bio::PhyloXML::Parser.new(str) # # # Deprecated usage: Reads data from a file. str is a filename. # # p = Bio::PhyloXML::Parser.new("./phyloxml_examples.xml") # # Taking filename is deprecated. Use Bio::PhyloXML::Parser.open(filename). # # --- # *Arguments*: # * (required) _str_: PhyloXML-formatted string # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true. # *Returns*:: Bio::PhyloXML::Parser object def initialize(str, validate=true) @other = [] return unless str # For compatibility, if filename-like string is given, # treat it as a filename. if /[\<\>\r\n]/ !~ str and File.exist?(str) then # assume that str is filename warn "Bio::PhyloXML::Parser.new(filename) is deprecated. Use Bio::PhyloXML::Parser.open(filename)." filename = _secure_filename(str) _validate(:file, filename) if validate @reader = XML::Reader.file(filename) _skip_leader return end # initialize for string @reader = XML::Reader.string(str, { :options => LibXML::XML::Parser::Options::NONET }) _skip_leader end # Iterate through all trees in the file. # # phyloxml = Bio::PhyloXML::Parser.open('example.xml') # phyloxml.each do |tree| # puts tree.name # end # def each while tree = next_tree yield tree end end # Access the specified tree in the file. It parses trees until the specified # tree is reached. # # # Get 3rd tree in the file (starts counting from 0). # parser = PhyloXML::Parser.open('phyloxml_examples.xml') # tree = parser[2] # def [](i) tree = nil (i+1).times do tree = self.next_tree end return tree end # Parse and return the next phylogeny tree. If there are no more phylogeny # element, nil is returned. If there is something else besides phylogeny # elements, it is saved in the PhyloXML::Parser#other. # # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml") # tree = p.next_tree # # --- # *Returns*:: Bio::PhyloXML::Tree def next_tree() if not is_element?('phylogeny') if @reader.node_type == XML::Reader::TYPE_END_ELEMENT if is_end_element?('phyloxml') return nil else @reader.read @reader.read if is_end_element?('phyloxml') return nil end end end # phyloxml can hold only phylogeny and "other" elements. If this is not # phylogeny element then it is other. Also, "other" always comes after # all phylogenies @other << parse_other #return nil for tree, since this is not valid phyloxml tree. return nil end tree = Bio::PhyloXML::Tree.new # keep track of current node in clades array/stack. Current node is the # last element in the clades array clades = [] clades.push tree #keep track of current edge to be able to parse branch_length tag current_edge = nil # we are going to parse clade iteratively by pointing (and changing) to # the current node in the tree. Since the property element is both in # clade and in the phylogeny, we need some boolean to know if we are # parsing the clade (there can be only max 1 clade in phylogeny) or # parsing phylogeny parsing_clade = false while not is_end_element?('phylogeny') do break if is_end_element?('phyloxml') # parse phylogeny elements, except clade if not parsing_clade if is_element?('phylogeny') @reader["rooted"] == "true" ? tree.rooted = true : tree.rooted = false @reader["rerootable"] == "true" ? tree.rerootable = true : tree.rerootable = false parse_attributes(tree, ["branch_length_unit", 'type']) end parse_simple_elements(tree, [ "name", 'description', "date"]) if is_element?('confidence') tree.confidences << parse_confidence end end if @reader.node_type == XML::Reader::TYPE_ELEMENT case @reader.name when 'clade' #parse clade element parsing_clade = true node= Bio::PhyloXML::Node.new branch_length = @reader['branch_length'] parse_attributes(node, ["id_source"]) #add new node to the tree tree.add_node(node) # The first clade will always be root since by xsd schema phyloxml can # have 0 to 1 clades in it. if tree.root == nil tree.root = node else current_edge = tree.add_edge(clades[-1], node, Bio::Tree::Edge.new(branch_length)) end clades.push node #end if clade element else parse_clade_elements(clades[-1], current_edge) if parsing_clade end end #end clade element, go one parent up if is_end_element?('clade') #if we have reached the closing tag of the top-most clade, then our # curent node should point to the root, If thats the case, we are done # parsing the clade element if clades[-1] == tree.root parsing_clade = false else # set current node (clades[-1) to the previous clade in the array clades.pop end end #parsing phylogeny elements if not parsing_clade if @reader.node_type == XML::Reader::TYPE_ELEMENT case @reader.name when 'property' tree.properties << parse_property when 'clade_relation' clade_relation = CladeRelation.new parse_attributes(clade_relation, ["id_ref_0", "id_ref_1", "distance", "type"]) #@ add unit test for this if not @reader.empty_element? @reader.read if is_element?('confidence') clade_relation.confidence = parse_confidence end end tree.clade_relations << clade_relation when 'sequence_relation' sequence_relation = SequenceRelation.new parse_attributes(sequence_relation, ["id_ref_0", "id_ref_1", "distance", "type"]) if not @reader.empty_element? @reader.read if is_element?('confidence') sequence_relation.confidence = parse_confidence end end tree.sequence_relations << sequence_relation when 'phylogeny' #do nothing else tree.other << parse_other #puts "Not recognized element. #{@reader.name}" end end end # go to next element @reader.read end #end while not #move on to the next tag after /phylogeny which is text, since phylogeny #end tag is empty element, which value is nil, therefore need to move to #the next meaningful element (therefore @reader.read twice) @reader.read @reader.read return tree end # return tree of specified name. # @todo Implement this method. # def get_tree_by_name(name) # while not is_end_element?('phyloxml') # if is_element?('phylogeny') # @reader.read # @reader.read # # if is_element?('name') # @reader.read # if @reader.value == name # puts "equasl" # tree = next_tree # puts tree # end # end # end # @reader.read # end # # end private #### # Utility methods ### def is_element?(str) @reader.node_type == XML::Reader::TYPE_ELEMENT and @reader.name == str ? true : false end def is_end_element?(str) @reader.node_type==XML::Reader::TYPE_END_ELEMENT and @reader.name == str ? true : false end def has_reached_end_element?(str) if not(is_end_element?(str)) raise "Warning: Should have reached element here" end end # Parses a simple XML element. for example 1 # It reads in the value and assigns it to object.speciation = 1 # Also checks if have reached end tag ( and gives warning # if not def parse_simple_element(object, name) if is_element?(name) @reader.read object.send("#{name}=", @reader.value) @reader.read has_reached_end_element?(name) end end def parse_simple_elements(object, elements) elements.each do |elmt| parse_simple_element(object, elmt) end end #Parses list of attributes #use for the code like: clade_relation.type = @reader["type"] def parse_attributes(object, arr_of_attrs) arr_of_attrs.each do |attr| object.send("#{attr}=", @reader[attr]) end end def parse_clade_elements(current_node, current_edge) #no loop inside, loop is already outside if @reader.node_type == XML::Reader::TYPE_ELEMENT case @reader.name when 'branch_length' # @todo add unit test for this. current_edge is nil, if the root clade # has branch_length attribute. @reader.read branch_length = @reader.value current_edge.distance = branch_length.to_f if current_edge != nil @reader.read when 'width' @reader.read current_node.width = @reader.value @reader.read when 'name' @reader.read current_node.name = @reader.value @reader.read when 'events' current_node.events = parse_events when 'confidence' current_node.confidences << parse_confidence when 'sequence' current_node.sequences << parse_sequence when 'property' current_node.properties << parse_property when 'taxonomy' current_node.taxonomies << parse_taxonomy when 'distribution' current_node.distributions << parse_distribution when 'node_id' id = Id.new id.type = @reader["type"] @reader.read id.value = @reader.value @reader.read #has_reached_end_element?('node_id') #@todo write unit test for this. There is no example of this in the example files current_node.id = id when 'color' color = BranchColor.new parse_simple_element(color, 'red') parse_simple_element(color, 'green') parse_simple_element(color, 'blue') current_node.color = color #@todo add unit test for this when 'date' date = Date.new date.unit = @reader["unit"] #move to the next token, which is always empty, since date tag does not # have text associated with it @reader.read @reader.read #now the token is the first tag under date tag while not(is_end_element?('date')) parse_simple_element(date, 'desc') parse_simple_element(date, 'value') parse_simple_element(date, 'minimum') parse_simple_element(date, 'maximum') @reader.read end current_node.date = date when 'reference' reference = Reference.new() reference.doi = @reader['doi'] if not @reader.empty_element? while not is_end_element?('reference') parse_simple_element(reference, 'desc') @reader.read end end current_node.references << reference when 'binary_characters' current_node.binary_characters = parse_binary_characters when 'clade' #do nothing else current_node.other << parse_other #puts "No match found in parse_clade_elements.(#{@reader.name})" end end end #parse_clade_elements def parse_events() events = PhyloXML::Events.new @reader.read #go to next element while not(is_end_element?('events')) do parse_simple_elements(events, ['type', 'duplications', 'speciations', 'losses']) if is_element?('confidence') events.confidence = parse_confidence #@todo could add unit test for this (example file does not have this case) end @reader.read end return events end #parse_events def parse_taxonomy taxonomy = PhyloXML::Taxonomy.new parse_attributes(taxonomy, ["id_source"]) @reader.read while not(is_end_element?('taxonomy')) do if @reader.node_type == XML::Reader::TYPE_ELEMENT case @reader.name when 'code' @reader.read taxonomy.code = @reader.value @reader.read when 'scientific_name' @reader.read taxonomy.scientific_name = @reader.value @reader.read when 'rank' @reader.read taxonomy.rank = @reader.value @reader.read when 'authority' @reader.read taxonomy.authority = @reader.value @reader.read when 'id' taxonomy.taxonomy_id = parse_id('id') when 'common_name' @reader.read taxonomy.common_names << @reader.value @reader.read #has_reached_end_element?('common_name') when 'synonym' @reader.read taxonomy.synonyms << @reader.value @reader.read #has_reached_end_element?('synonym') when 'uri' taxonomy.uri = parse_uri else taxonomy.other << parse_other end end @reader.read #move to next tag in the loop end return taxonomy end #parse_taxonomy private def parse_sequence sequence = Sequence.new parse_attributes(sequence, ["type", "id_source", "id_ref"]) @reader.read while not(is_end_element?('sequence')) if @reader.node_type == XML::Reader::TYPE_ELEMENT case @reader.name when 'symbol' @reader.read sequence.symbol = @reader.value @reader.read when 'name' @reader.read sequence.name = @reader.value @reader.read when 'location' @reader.read sequence.location = @reader.value @reader.read when 'mol_seq' sequence.is_aligned = @reader["is_aligned"] @reader.read sequence.mol_seq = @reader.value @reader.read has_reached_end_element?('mol_seq') when 'accession' sequence.accession = Accession.new sequence.accession.source = @reader["source"] @reader.read sequence.accession.value = @reader.value @reader.read has_reached_end_element?('accession') when 'uri' sequence.uri = parse_uri when 'annotation' sequence.annotations << parse_annotation when 'domain_architecture' sequence.domain_architecture = DomainArchitecture.new sequence.domain_architecture.length = @reader["length"] @reader.read @reader.read while not(is_end_element?('domain_architecture')) sequence.domain_architecture.domains << parse_domain @reader.read #go to next domain element end else sequence.other << parse_other #@todo add unit test end end @reader.read end return sequence end #parse_sequence def parse_uri uri = Uri.new parse_attributes(uri, ["desc", "type"]) parse_simple_element(uri, 'uri') return uri end def parse_annotation annotation = Annotation.new parse_attributes(annotation, ['ref', 'source', 'evidence', 'type']) if not @reader.empty_element? while not(is_end_element?('annotation')) parse_simple_element(annotation, 'desc') if is_element?('desc') annotation.confidence = parse_confidence if is_element?('confidence') annotation.properties << parse_property if is_element?('property') if is_element?('uri') annotation.uri = parse_uri end @reader.read end end return annotation end def parse_property property = Property.new parse_attributes(property, ["ref", "unit", "datatype", "applies_to", "id_ref"]) @reader.read property.value = @reader.value @reader.read has_reached_end_element?('property') return property end #parse_property def parse_confidence type = @reader["type"] @reader.read value = @reader.value.to_f @reader.read has_reached_end_element?('confidence') return Confidence.new(type, value) end #parse_confidence def parse_distribution distribution = Distribution.new @reader.read while not(is_end_element?('distribution')) do parse_simple_element(distribution, 'desc') distribution.points << parse_point if is_element?('point') distribution.polygons << parse_polygon if is_element?('polygon') @reader.read end return distribution end #parse_distribution def parse_point point = Point.new point.geodetic_datum = @reader["geodetic_datum"] point.alt_unit = @reader["alt_unit"] @reader.read while not(is_end_element?('point')) do parse_simple_elements(point, ['lat', 'long'] ) if is_element?('alt') @reader.read point.alt = @reader.value.to_f @reader.read has_reached_end_element?('alt') end #advance reader @reader.read end return point end #parse_point def parse_polygon polygon = Polygon.new @reader.read while not(is_end_element?('polygon')) do polygon.points << parse_point if is_element?('point') @reader.read end #@todo should check for it at all? Probably not if xml is valid. if polygon.points.length <3 puts "Warning: should have at least 3 points" end return polygon end #parse_polygon def parse_id(tag_name) id = Id.new id.provider = @reader["provider"] @reader.read id.value = @reader.value @reader.read #@todo shouldn't there be another read? has_reached_end_element?(tag_name) return id end #parse_id def parse_domain domain = ProteinDomain.new parse_attributes(domain, ["from", "to", "confidence", "id"]) @reader.read domain.value = @reader.value @reader.read has_reached_end_element?('domain') @reader.read return domain end def parse_binary_characters b = PhyloXML::BinaryCharacters.new b.bc_type = @reader['type'] parse_attributes(b, ['gained_count', 'absent_count', 'lost_count', 'present_count']) if not @reader.empty_element? @reader.read while not is_end_element?('binary_characters') parse_bc(b, 'lost') parse_bc(b, 'gained') parse_bc(b, 'absent') parse_bc(b, 'present') @reader.read end end return b end #parse_binary_characters def parse_bc(object, element) if is_element?(element) @reader.read while not is_end_element?(element) if is_element?('bc') @reader.read object.send(element) << @reader.value @reader.read has_reached_end_element?('bc') end @reader.read end end end #parse_bc def parse_other other_obj = PhyloXML::Other.new other_obj.element_name = @reader.name #parse attributes code = @reader.move_to_first_attribute while code ==1 other_obj.attributes[@reader.name] = @reader.value code = @reader.move_to_next_attribute end while not is_end_element?(other_obj.element_name) do @reader.read if @reader.node_type == XML::Reader::TYPE_ELEMENT other_obj.children << parse_other #recursice call to parse children elsif @reader.node_type == XML::Reader::TYPE_TEXT other_obj.value = @reader.value end end #just a check has_reached_end_element?(other_obj.element_name) return other_obj end #parse_other end #class phyloxmlParser end #module PhyloXML end #module Bio bio-1.4.3.0001/lib/bio/db/phyloxml/phyloxml_writer.rb0000644000004100000410000001477312200110570022254 0ustar www-datawww-data# # = bio/db/phyloxml_writer.rb - PhyloXML writer # # Copyright:: Copyright (C) 2009 # Diana Jaunzeikare # License:: The Ruby License # # $Id:$ # # == Description # # This file containts writer for PhyloXML. # # == Requirements # # Libxml2 XML parser is required. Install libxml-ruby bindings from # http://libxml.rubyforge.org or # # gem install -r libxml-ruby # # == References # # * http://www.phyloxml.org # # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby require 'libxml' require 'bio/db/phyloxml/phyloxml_elements' module Bio module PhyloXML # == Description # # Bio::PhyloXML::Writer is for writing phyloXML (version 1.10) format files. # # == Requirements # # Libxml2 XML parser is required. Install libxml-ruby bindings from # http://libxml.rubyforge.org or # # gem install -r libxml-ruby # # == Usage # # require 'bio' # # # Create new phyloxml parser # phyloxml = Bio::PhyloXML::Parser.open('example.xml') # # # Read in some trees from file # tree1 = phyloxml.next_tree # tree2 = phyloxml.next_tree # # # Create new phyloxml writer # writer = Bio::PhyloXML::Writer.new('tree.xml') # # # Write tree to the file tree.xml # writer.write(tree1) # # # Add another tree to the file # writer.write(tree2) # # == References # # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html class Writer include LibXML SCHEMA_LOCATION = 'http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd' attr_accessor :write_branch_length_as_subelement # # Create new Writer object. As parameters provide filename of xml file # you wish to create. Optional parameter is whether to indent or no. # Default is true. By default branch_length is written as subelement of # clade element. # def initialize(filename, indent=true) @write_branch_length_as_subelement = true #default value @filename = filename @indent = indent @doc = XML::Document.new() @doc.root = XML::Node.new('phyloxml') @root = @doc.root @root['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance' @root['xsi:schemaLocation'] = SCHEMA_LOCATION @root['xmlns'] = 'http://www.phyloxml.org' #@todo save encoding to be UTF-8. (However it is the default one). #it gives error NameError: uninitialized constant LibXML::XML::Encoding #@doc.encoding = XML::Encoding::UTF_8 @doc.save(@filename, :indent => true) end # # Write a tree to a file in phyloxml format. # # require 'Bio' # writer = Bio::PhyloXML::Writer.new # writer.write(tree) # def write(tree) @root << phylogeny = XML::Node.new('phylogeny') PhyloXML::Writer.generate_xml(phylogeny, tree, [ [:attr, 'rooted'], [:simple, 'name', tree.name], [:complex, 'id', tree.phylogeny_id], [:simple, 'description', tree.description], [:simple, 'date', tree.date], [:objarr, 'confidence', 'confidences']]) root_clade = tree.root.to_xml(nil, @write_branch_length_as_subelement) phylogeny << root_clade tree.children(tree.root).each do |node| root_clade << node_to_xml(tree, node, tree.root) end Bio::PhyloXML::Writer::generate_xml(phylogeny, tree, [ [:objarr, 'clade_relation', 'clade_relations'], [:objarr, 'sequence_relation', 'sequence_relations'], [:objarr, 'property', 'properties']] ) @doc.save(@filename, :indent => @indent) end #writer#write # # PhyloXML Schema allows to save data in different xml format after all # phylogeny elements. This method is to write these additional data. # # parser = PhyloXML::Parser.open('phyloxml_examples.xml') # writer = PhyloXML::Writer.new('new.xml') # # parser.each do |tree| # writer.write(tree) # end # # # When all the trees are read in by the parser, whats left is saved at # # PhyloXML::Parser#other # writer.write(parser.other) # def write_other(other_arr) other_arr.each do |other_obj| @root << other_obj.to_xml end @doc.save(@filename, :indent => @indent) end #class method # # Used by to_xml methods of PhyloXML element classes. Generally not to be # invoked directly. # def self.generate_xml(root, elem, subelement_array) #example usage: generate_xml(node, self, [[ :complex,'accession', ], [:simple, 'name', @name], [:simple, 'location', @location]]) subelement_array.each do |subelem| if subelem[0] == :simple root << XML::Node.new(subelem[1], subelem[2].to_s) if subelem[2] != nil and not subelem[2].to_s.empty? elsif subelem[0] == :complex root << subelem[2].send("to_xml") if subelem[2] != nil elsif subelem[0] == :pattern #seq, self, [[:pattern, 'symbol', @symbol, "\S{1,10}"] if subelem[2] != nil if subelem[2] =~ subelem[3] root << XML::Node.new(subelem[1], subelem[2]) else raise "#{subelem[2]} is not a valid value of #{subelem[1]}. It should follow pattern #{subelem[3]}" end end elsif subelem[0] == :objarr #[:objarr, 'annotation', 'annotations']]) obj_arr = elem.send(subelem[2]) obj_arr.each do |arr_elem| root << arr_elem.to_xml end elsif subelem[0] == :simplearr # [:simplearr, 'common_name', @common_names] subelem[2].each do |elem_val| root << XML::Node.new(subelem[1], elem_val) end elsif subelem[0] == :attr #[:attr, 'rooted'] obj = elem.send(subelem[1]) if obj != nil root[subelem[1]] = obj.to_s end else raise "Not supported type of element by method generate_xml." end end return root end private def node_to_xml(tree, node, parent) edge = tree.get_edge(parent, node) branch_length = edge.distance clade = node.to_xml(branch_length, @write_branch_length_as_subelement) tree.children(node).each do |new_node| clade << node_to_xml(tree, new_node, node) end return clade end end end end bio-1.4.3.0001/lib/bio/db/aaindex.rb0000644000004100000410000001603612200110570016533 0ustar www-datawww-data# # = bio/db/aaindex.rb - AAindex database class # # Copyright:: Copyright (C) 2001 # KAWASHIMA Shuichi # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # == Description # # Classes for Amino Acid Index Database (AAindex and AAindex2). # * AAindex Manual: http://www.genome.jp/dbget-bin/show_man?aaindex # # == Examples # # aax1 = Bio::AAindex.auto("PRAM900102.aaindex1") # aax2 = Bio::AAindex.auto("DAYM780301.aaindex2") # # aax1 = Bio::AAindex1.new("PRAM900102.aaindex1") # aax1.entry_id # aax1.index # # aax2 = Bio::AAindex2.new("DAYM780301.aaindex2") # aax2.entry_id # aax2.matrix # aax2.matrix[2,2] # aax2.matrix('R', 'A') # aax2['R', 'A'] # # == References # # * http://www.genome.jp/aaindex/ # require "bio/db" require "matrix" module Bio # Super class for AAindex1 and AAindex2 class AAindex < KEGGDB # Delimiter DELIMITER ="\n//\n" # Delimiter RS = DELIMITER # Bio::DB API TAGSIZE = 2 # Auto detecter for two AAindex formats. # returns a Bio::AAindex1 object or a Bio::AAindex2 object. def self.auto(str) case str when /^I /m Bio::AAindex1.new(str) when /^M /m Bio::AAindex2.new(str) else raise end end # def initialize(entry) super(entry, TAGSIZE) end # Returns entry_id in the H line. def entry_id if @data['entry_id'] @data['entry_id'] else @data['entry_id'] = field_fetch('H') end end # Returns definition in the D line. def definition if @data['definition'] @data['definition'] else @data['definition'] = field_fetch('D') end end # Returns database links in the R line. # cf.) ['LIT:123456', 'PMID:12345678'] def dblinks if @data['ref'] @data['ref'] else @data['ref'] = field_fetch('R').split(' ') end end # Returns authors in the A line. def author if @data['author'] @data['author'] else @data['author'] = field_fetch('A') end end # Returns title in the T line. def title if @data['title'] @data['title'] else @data['title'] = field_fetch('T') end end # Returns journal name in the J line. def journal if @data['journal'] @data['journal'] else @data['journal'] = field_fetch('J') end end # Returns comment (if any). def comment if @data['comment'] @data['comment'] else @data['comment'] = field_fetch('*') end end end # Class for AAindex1 format. class AAindex1 < AAindex def initialize(entry) super(entry) end # Returns correlation_coefficient (Hash) in the C line. # # cf.) {'ABCD12010203' => 0.999, 'CDEF123456' => 0.543, ...} def correlation_coefficient if @data['correlation_coefficient'] @data['correlation_coefficient'] else hash = {} ary = field_fetch('C').split(' ') ary.each do |x| next unless x =~ /^[A-Z]/ hash[x] = ary[ary.index(x) + 1].to_f end @data['correlation_coefficient'] = hash end end # Returns the index (Array) in the I line. # # an argument: :string, :float, :zscore or :integer def index(type = :float) aa = %w( A R N D C Q E G H I L K M F P S T W Y V ) values = field_fetch('I', 1).split(' ') if values.size != 20 raise "Invalid format in #{entry_id} : #{values.inspect}" end if type == :zscore and values.size > 0 sum = 0.0 values.each do |a| sum += a.to_f end mean = sum / values.size # / 20 var = 0.0 values.each do |a| var += (a.to_f - mean) ** 2 end sd = Math.sqrt(var) end if type == :integer figure = 0 values.each do |a| figure = [ figure, a[/\..*/].length - 1 ].max end end hash = {} aa.each_with_index do |a, i| case type when :string hash[a] = values[i] when :float hash[a] = values[i].to_f when :zscore hash[a] = (values[i].to_f - mean) / sd when :integer hash[a] = (values[i].to_f * 10 ** figure).to_i end end return hash end end # Class for AAindex2 format. class AAindex2 < AAindex def initialize(entry) super(entry) end # Returns row labels. def rows if @data['rows'] @data['rows'] else label_data @rows end end # Returns col labels. def cols if @data['cols'] @data['cols'] else label_data @cols end end # Returns the value of amino acids substitution (aa1 -> aa2). def [](aa1 = nil, aa2 = nil) matrix[cols.index(aa1), rows.index(aa2)] end # Returns amino acids matrix in Matrix. def matrix(aa1 = nil, aa2 = nil) return self[aa1, aa2] if aa1 and aa2 if @data['matrix'] @data['matrix'] else ma = [] label_data.each_line do |line| ma << line.strip.split(/\s+/).map {|x| x.to_f } end ma_len = ma.size ma.each do |row| row_size = row.size if row_size < ma_len (row_size..ma_len-1).each do |i| row[i] = ma[i][row_size-1] end end end mat = Matrix[*ma] @data['matrix'] = mat end end # Returns amino acids matrix in Matrix for the old format (<= ver 5.0). def old_matrix # for AAindex <= ver 5.0 return @data['matrix'] if @data['matrix'] @aa = {} # used to determine row/column of the aa attr_reader :aa alias_method :aa, :rows alias_method :aa, :cols field = field_fetch('I') case field when / (ARNDCQEGHILKMFPSTWYV)\s+(.*)/ # 20x19/2 matrix aalist = $1 values = $2.split(/\s+/) 0.upto(aalist.length - 1) do |i| @aa[aalist[i].chr] = i end ma = Array.new 20.times do ma.push(Array.new(20)) # 2D array of 20x(20) end for i in 0 .. 19 do for j in i .. 19 do ma[i][j] = values[i + j*(j+1)/2].to_f ma[j][i] = ma[i][j] end end @data['matrix'] = Matrix[*ma] when / -ARNDCQEGHILKMFPSTWYV / # 21x20/2 matrix (with gap) raise NotImplementedError when / ACDEFGHIKLMNPQRSTVWYJ- / # 21x21 matrix (with gap) raise NotImplementedError end end private def label_data if @data['data'] @data['data'] else label, data = get('M').split("\n", 2) if /M rows = (\S+), cols = (\S+)/.match(label) rows, cols = $1, $2 @rows = rows.split('') @cols = cols.split('') end @data['data'] = data end end end # class AAindex2 end # module Bio bio-1.4.3.0001/lib/bio/db/nexus.rb0000644000004100000410000015556712200110570016301 0ustar www-datawww-data# # = bio/db/nexus.rb - Nexus Standard phylogenetic tree parser / formatter # # Copyright:: Copyright (C) 2006 Christian M Zmasek # # License:: The Ruby License # # $Id: nexus.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $ # # == Description # # This file contains classes that implement a parser for NEXUS formatted # data as well as objects to store, access, and write the parsed data. # # The following five blocks: # taxa, characters, distances, trees, data # are recognizable and parsable. # # The parser can deal with (nested) comments (indicated by square brackets), # unless the comments are inside a command or data item (e.g. # "Dim[comment]ensions" or inside a matrix). # # Single or double quoted TaxLabels are processed as follows (by way # of example): "mus musculus" -> mus_musculus # # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # # Get first taxa block: # taxa_block = nexus.get_taxa_blocks[ 0 ] # # Get number of taxa: # number_of_taxa = taxa_block.get_number_of_taxa.to_i # # Get name of first taxon: # first_taxon = taxa_block.get_taxa[ 0 ] # # # Get first data block: # data_block = nexus.get_data_blocks[ 0 ] # # Get first characters name: # seq_name = data_block.get_row_name( 0 ) # # Get first characters row named "taxon_2" as Bio::Sequence sequence: # seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ] # # Get third characters row as Bio::Sequence sequence: # seq_2 = data_block.get_sequence( 2 ) # # Get first characters row named "taxon_3" as String: # string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" ) # # Get name of first taxon: # taxon_0 = data_block.get_taxa[ 0 ] # # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0) # characters_matrix = data_block.get_matrix # # # Get first characters block (same methods as Nexus::DataBlock except # # it lacks get_taxa method): # characters_block = nexus.get_characters_blocks[ 0 ] # # # Get trees block(s): # trees_block = nexus.get_trees_blocks[ 0 ] # # Get first tree named "best" as String: # string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ] # # Get first tree named "best" as Bio::Db::Newick object: # tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ] # # Get first tree as Bio::Db::Newick object: # tree_first = trees_block.get_tree( 0 ) # # # Get distances block(s): # distances_blocks = nexus.get_distances_blocks # # Get matrix as Bio::Nexus::NexusMatrix object: # matrix = distances_blocks[ 0 ].get_matrix # # Get value (column 0 are names): # val = matrix.get_value( 1, 5 ) # # # Get blocks for which no class exists (private blocks): # private_blocks = nexus.get_blocks_by_name( "my_block" ) # # Get first block names "my_block": # my_block_0 = private_blocks[ 0 ] # # Get first token in first block names "my_block": # first_token = my_block_0.get_tokens[ 0 ] # # # == References # # * Maddison DR, Swofford DL, Maddison WP (1997). NEXUS: an extensible file # format for systematic information. # Syst Biol. 1997 46(4):590-621. # require 'bio/sequence' require 'bio/tree' require 'bio/db/newick' module Bio # == DESCRIPTION # Bio::Nexus is a parser for nexus formatted data. # It contains classes and constants enabling the representation and # processing of nexus data. # # == USAGE # # # Parsing a nexus formatted string str: # nexus = Bio::Nexus.new( nexus_str ) # # # Obtaining of the nexus blocks as array of GenericBlock or # # any of its subclasses (such as DistancesBlock): # blocks = nexus.get_blocks # # # Getting a block by name: # my_blocks = nexus.get_blocks_by_name( "my_block" ) # # # Getting distance blocks: # distances_blocks = nexus.get_distances_blocks # # # Getting trees blocks: # trees_blocks = nexus.get_trees_blocks # # # Getting data blocks: # data_blocks = nexus.get_data_blocks # # # Getting characters blocks: # character_blocks = nexus.get_characters_blocks # # # Getting taxa blocks: # taxa_blocks = nexus.get_taxa_blocks # class Nexus END_OF_LINE = "\n" INDENTENTION = " " DOUBLE_QUOTE = "\"" SINGLE_QUOTE = "'" BEGIN_NEXUS = "#NEXUS" DELIMITER = ";" BEGIN_BLOCK = "Begin" END_BLOCK = "End" + DELIMITER BEGIN_COMMENT = "[" END_COMMENT = "]" TAXA = "Taxa" CHARACTERS = "Characters" DATA = "Data" DISTANCES = "Distances" TREES = "Trees" TAXA_BLOCK = TAXA + DELIMITER CHARACTERS_BLOCK = CHARACTERS + DELIMITER DATA_BLOCK = DATA + DELIMITER DISTANCES_BLOCK = DISTANCES + DELIMITER TREES_BLOCK = TREES + DELIMITER DIMENSIONS = "Dimensions" FORMAT = "Format" NTAX = "NTax" NCHAR = "NChar" DATATYPE = "DataType" TAXLABELS = "TaxLabels" MATRIX = "Matrix" # End of constants. # Nexus parse error class, # indicates error during parsing of nexus formatted data. class NexusParseError < RuntimeError; end # Creates a new nexus parser for 'nexus_str'. # # --- # *Arguments*: # * (required) _nexus_str_: String - nexus formatted data def initialize( nexus_str ) @blocks = Array.new @current_cmd = nil @current_subcmd = nil @current_block_name = nil @current_block = nil parse( nexus_str ) end # Returns an Array of all blocks found in the String 'nexus_str' # set via Bio::Nexus.new( nexus_str ). # # --- # *Returns*:: Array of GenericBlocks or any of its subclasses def get_blocks @blocks end # A convenience methods which returns an array of # all nexus blocks for which the name equals 'name' found # in the String 'nexus_str' set via Bio::Nexus.new( nexus_str ). # # --- # *Arguments*: # * (required) _name_: String # *Returns*:: Array of GenericBlocks or any of its subclasses def get_blocks_by_name( name ) found_blocks = Array.new @blocks.each do | block | if ( name == block.get_name ) found_blocks.push( block ) end end found_blocks end # A convenience methods which returns an array of # all data blocks. # # --- # *Returns*:: Array of DataBlocks def get_data_blocks get_blocks_by_name( DATA_BLOCK.chomp( ";").downcase ) end # A convenience methods which returns an array of # all characters blocks. # # --- # *Returns*:: Array of CharactersBlocks def get_characters_blocks get_blocks_by_name( CHARACTERS_BLOCK.chomp( ";").downcase ) end # A convenience methods which returns an array of # all trees blocks. # # --- # *Returns*:: Array of TreesBlocks def get_trees_blocks get_blocks_by_name( TREES_BLOCK.chomp( ";").downcase ) end # A convenience methods which returns an array of # all distances blocks. # # --- # *Returns*:: Array of DistancesBlock def get_distances_blocks get_blocks_by_name( DISTANCES_BLOCK.chomp( ";").downcase ) end # A convenience methods which returns an array of # all taxa blocks. # # --- # *Returns*:: Array of TaxaBlocks def get_taxa_blocks get_blocks_by_name( TAXA_BLOCK.chomp( ";").downcase ) end # Returns a String listing how many of each blocks it parsed. # # --- # *Returns*:: String def to_s str = String.new if get_blocks.length < 1 str << "empty" else str << "number of blocks: " << get_blocks.length.to_s if get_characters_blocks.length > 0 str << " [characters blocks: " << get_characters_blocks.length.to_s << "] " end if get_data_blocks.length > 0 str << " [data blocks: " << get_data_blocks.length.to_s << "] " end if get_distances_blocks.length > 0 str << " [distances blocks: " << get_distances_blocks.length.to_s << "] " end if get_taxa_blocks.length > 0 str << " [taxa blocks: " << get_taxa_blocks.length.to_s << "] " end if get_trees_blocks.length > 0 str << " [trees blocks: " << get_trees_blocks.length.to_s << "] " end end str end alias to_str to_s private # The master method for parsing. # Stores the resulting block in array @blocks. # # --- # *Arguments*: # * (required) _str_: String - the String to be parsed def parse( str ) str = str.chop if str[-1..-1] == ';' ary = str.split(/[\s+=]/) ary.collect! { |x| x.strip!; x.empty? ? nil : x } ary.compact! in_comment = false comment_level = 0 # Main loop while token = ary.shift # Quotes: if ( token.index( SINGLE_QUOTE ) == 0 || token.index( DOUBLE_QUOTE ) == 0 ) token << "_" << ary.shift token = token.chop if token[-1..-1] == ';' token = token.slice( 1, token.length - 2 ) end # Comments: open = token.count( BEGIN_COMMENT ) close = token.count( END_COMMENT ) comment = comment_level > 0 comment_level = comment_level + open - close if ( open > 0 && open == close ) next elsif comment_level > 0 || comment next elsif equal?( token, END_BLOCK ) end_block() elsif equal?( token, BEGIN_BLOCK ) begin_block() @current_block_name = token = ary.shift @current_block_name.downcase! @current_block = create_block() @blocks.push( @current_block ) elsif ( @current_block_name != nil ) process_token( token.chomp( DELIMITER ), ary ) end end # main loop @blocks.compact! end # parse # Operations required when beginnig of block encountered. # # --- def begin_block() if @current_block_name != nil raise NexusParseError, "Cannot have nested nexus blocks (\"end;\" might be missing)" end reset_command_state() end # Operations required when ending of block encountered. # # --- def end_block() if @current_block_name == nil raise NexusParseError, "Cannot have two or more \"end;\" tokens in sequence" end @current_block_name = nil end # This calls various process_token_for__block methods # depeding on state of @current_block_name. # # --- # *Arguments*: # * (required) _token_: String # * (required) _ary_: Array def process_token( token, ary ) case @current_block_name when TAXA_BLOCK.downcase process_token_for_taxa_block( token ) when CHARACTERS_BLOCK.downcase process_token_for_character_block( token, ary ) when DATA_BLOCK.downcase process_token_for_data_block( token, ary ) when DISTANCES_BLOCK.downcase process_token_for_distances_block( token, ary ) when TREES_BLOCK.downcase process_token_for_trees_block( token, ary ) else process_token_for_generic_block( token ) end end # Resets @current_cmd and @current_subcmd to nil. # # --- def reset_command_state() @current_cmd = nil @current_subcmd = nil end # Creates GenericBlock (or any of its subclasses) the type of # which is determined by the state of @current_block_name. # # --- # *Returns*:: GenericBlock (or any of its subclasses) object def create_block() case @current_block_name when TAXA_BLOCK.downcase return Bio::Nexus::TaxaBlock.new( @current_block_name ) when CHARACTERS_BLOCK.downcase return Bio::Nexus::CharactersBlock.new( @current_block_name ) when DATA_BLOCK.downcase return Bio::Nexus::DataBlock.new( @current_block_name ) when DISTANCES_BLOCK.downcase return Bio::Nexus::DistancesBlock.new( @current_block_name ) when TREES_BLOCK.downcase return Bio::Nexus::TreesBlock.new( @current_block_name ) else return Bio::Nexus::GenericBlock.new( @current_block_name ) end end # This processes the tokens (between Begin Taxa; and End;) for a taxa block # Example of a currently parseable taxa block: # Begin Taxa; # Dimensions NTax=4; # TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse'; # End; # # --- # *Arguments*: # * (required) _token_: String def process_token_for_taxa_block( token ) if ( equal?( token, DIMENSIONS ) ) @current_cmd = DIMENSIONS @current_subcmd = nil elsif ( equal?( token, TAXLABELS ) ) @current_cmd = TAXLABELS @current_subcmd = nil elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) ) @current_subcmd = NTAX elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) ) @current_block.set_number_of_taxa( token ) elsif ( cmds_equal_to?( TAXLABELS, nil ) ) @current_block.add_taxon( token ) end end # This processes the tokens (between Begin Taxa; and End;) for a character # block # Example of a currently parseable character block: # Begin Characters; # Dimensions NChar=20 # NTax=4; # Format DataType=DNA # Missing=x # Gap=- MatchChar=.; # Matrix # fish ACATA GAGGG TACCT CTAAG # frog ACTTA GAGGC TACCT CTAGC # snake ACTCA CTGGG TACCT TTGCG # mouse ACTCA GACGG TACCT TTGCG; # End; # # --- # *Arguments*: # * (required) _token_: String # * (required) _ary_: Array def process_token_for_character_block( token, ary ) if ( equal?( token, DIMENSIONS ) ) @current_cmd = DIMENSIONS @current_subcmd = nil elsif ( equal?( token, FORMAT ) ) @current_cmd = FORMAT @current_subcmd = nil elsif ( equal?( token, MATRIX ) ) @current_cmd = MATRIX @current_subcmd = nil elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) ) @current_subcmd = NTAX elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) ) @current_subcmd = NCHAR elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) ) @current_subcmd = DATATYPE elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) ) @current_subcmd = CharactersBlock::MISSING elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) ) @current_subcmd = CharactersBlock::GAP elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) ) @current_subcmd = CharactersBlock::MATCHCHAR elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) ) @current_block.set_number_of_taxa( token ) elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) ) @current_block.set_number_of_characters( token ) elsif ( cmds_equal_to?( FORMAT, DATATYPE ) ) @current_block.set_datatype( token ) elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) ) @current_block.set_missing( token ) elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) ) @current_block.set_gap_character( token ) elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) ) @current_block.set_match_character( token ) elsif ( cmds_equal_to?( MATRIX, nil ) ) @current_block.set_matrix( make_matrix( token, ary, @current_block.get_number_of_characters, true ) ) end end # This processes the tokens (between Begin Trees; and End;) for a trees block # Example of a currently parseable taxa block: # Begin Trees; # Tree best=(fish,(frog,(snake, mouse))); # Tree other=(snake,(frog,( fish, mouse))); # End; # # --- # *Arguments*: # * (required) _token_: String # * (required) _ary_: Array def process_token_for_trees_block( token, ary ) if ( equal?( token, TreesBlock::TREE ) ) @current_cmd = TreesBlock::TREE @current_subcmd = nil elsif ( cmds_equal_to?( TreesBlock::TREE, nil ) ) @current_block.add_tree_name( token ) tree_string = ary.shift while ( tree_string.index( ";" ) == nil ) tree_string << ary.shift end @current_block.add_tree( tree_string ) @current_cmd = nil end end # This processes the tokens (between Begin Taxa; and End;) for a character # block. # Example of a currently parseable character block: # Begin Distances; # Dimensions nchar=20 ntax=5; # Format Triangle=Upper; # Matrix # taxon_1 0.0 1.0 2.0 4.0 7.0 # taxon_2 1.0 0.0 3.0 5.0 8.0 # taxon_3 3.0 4.0 0.0 6.0 9.0 # taxon_4 7.0 3.0 1.0 0.0 9.5 # taxon_5 1.2 1.3 1.4 1.5 0.0; # End; # # --- # *Arguments*: # * (required) _token_: String # * (required) _ary_: Array def process_token_for_distances_block( token, ary ) if ( equal?( token, DIMENSIONS ) ) @current_cmd = DIMENSIONS @current_subcmd = nil elsif ( equal?( token, FORMAT ) ) @current_cmd = FORMAT @current_subcmd = nil elsif ( equal?( token, MATRIX ) ) @current_cmd = MATRIX @current_subcmd = nil elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) ) @current_subcmd = NTAX elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) ) @current_subcmd = NCHAR elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) ) @current_subcmd = DATATYPE elsif ( @current_cmd == FORMAT && equal?( token, DistancesBlock::TRIANGLE ) ) @current_subcmd = DistancesBlock::TRIANGLE elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) ) @current_block.set_number_of_taxa( token ) elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) ) @current_block.set_number_of_characters( token ) elsif ( cmds_equal_to?( FORMAT, DistancesBlock::TRIANGLE ) ) @current_block.set_triangle( token ) elsif ( cmds_equal_to?( MATRIX, nil ) ) @current_block.set_matrix( make_matrix( token, ary, @current_block.get_number_of_taxa, false ) ) end end # This processes the tokens (between Begin Taxa; and End;) for a data # block. # Example of a currently parseable data block: # Begin Data; # Dimensions ntax=5 nchar=14; # Format Datatype=RNA gap=# MISSING=x MatchChar=^; # TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard"; # Matrix # taxon_1 A- CCGTCGA-GTTA # taxon_2 T- CCG-CGA-GATA # taxon_3 A- C-GTCGA-GATA # taxon_4 A- CCTCGA--GTTA # taxon_5 T- CGGTCGT-CTTA; # End; # # --- # *Arguments*: # * (required) _token_: String # * (required) _ary_: Array def process_token_for_data_block( token, ary ) if ( equal?( token, DIMENSIONS ) ) @current_cmd = DIMENSIONS @current_subcmd = nil elsif ( equal?( token, FORMAT ) ) @current_cmd = FORMAT @current_subcmd = nil elsif ( equal?( token, TAXLABELS ) ) @current_cmd = TAXLABELS @current_subcmd = nil elsif ( equal?( token, MATRIX ) ) @current_cmd = MATRIX @current_subcmd = nil elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) ) @current_subcmd = NTAX elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) ) @current_subcmd = NCHAR elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) ) @current_subcmd = DATATYPE elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) ) @current_subcmd = CharactersBlock::MISSING elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) ) @current_subcmd = CharactersBlock::GAP elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) ) @current_subcmd = CharactersBlock::MATCHCHAR elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) ) @current_block.set_number_of_taxa( token ) elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) ) @current_block.set_number_of_characters( token ) elsif ( cmds_equal_to?( FORMAT, DATATYPE ) ) @current_block.set_datatype( token ) elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) ) @current_block.set_missing( token ) elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) ) @current_block.set_gap_character( token ) elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) ) @current_block.set_match_character( token ) elsif ( cmds_equal_to?( TAXLABELS, nil ) ) @current_block.add_taxon( token ) elsif ( cmds_equal_to?( MATRIX, nil ) ) @current_block.set_matrix( make_matrix( token, ary, @current_block.get_number_of_characters, true ) ) end end # Makes a NexusMatrix out of token from token Array ary # Used by process_token_for_X_block methods which contain # data in a matrix form. Column 0 contains names. # This will shift tokens from ary. # --- # *Arguments*: # * (required) _token_: String # * (required) _ary_: Array # * (required) _size_: Integer # * (optional) _scan_token_: true or false # *Returns*:: NexusMatrix def make_matrix( token, ary, size, scan_token = false ) matrix = NexusMatrix.new col = -1 row = 0 done = false while ( !done ) if ( col == -1 ) # name col = 0 matrix.set_value( row, col, token ) # name is in col 0 else # values col = add_token_to_matrix( token, scan_token, matrix, row, col ) if ( col == size.to_i ) col = -1 row += 1 end end token = ary.shift if ( token.index( DELIMITER ) != nil ) col = add_token_to_matrix( token.chomp( ";" ), scan_token, matrix, row, col ) done = true end end # while matrix end # Helper method for make_matrix. # # --- # *Arguments*: # * (required) _token_: String # * (required) _scan_token_: true or false - add whole token # or # scan into chars # * (required) _matrix_: NexusMatrix - the matrix to which to add token # * (required) _row_: Integer - the row for matrix # * (required) _col_: Integer - the starting row # *Returns*:: Integer - ending row def add_token_to_matrix( token, scan_token, matrix, row, col ) if ( scan_token ) token.scan(/./) { |w| col += 1 matrix.set_value( row, col, w ) } else col += 1 matrix.set_value( row, col, token ) end col end # This processes the tokens (between Begin Taxa; and End;) for a block # for which a specific parser is not available. # Example of a currently parseable generic block: # Begin Taxa; # token1 token2 token3 ... # End; # # --- # *Arguments*: # * (required) _token_: String def process_token_for_generic_block( token ) @current_block.add_token( token ) end # Returns true if Strings str1 and str2 are # equal - ignoring case. # # --- # *Arguments*: # * (required) _str1_: String # * (required) _str2_: String # *Returns*:: true or false def equal?( str1, str2 ) if ( str1 == nil || str2 == nil ) return false else return ( str1.downcase == str2.downcase ) end end # Returns true if @current_cmd == command # and @current_subcmd == subcommand, false otherwise # --- # *Arguments*: # * (required) _command_: String # * (required) _subcommand_: String # *Returns*:: true or false def cmds_equal_to?( command, subcommand ) return ( @current_cmd == command && @current_subcmd == subcommand ) end # Classes to represent nexus data follow. # == DESCRIPTION # Bio::Nexus::GenericBlock represents a generic nexus block. # It is mainly intended to be extended into more specific classes, # although it is used for blocks not represented by more specific # block classes. # It has a name and a array for the tokenized content of a # nexus block. # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # # Get blocks for which no class exists (private blocks) # as Nexus::GenericBlock: # private_blocks = nexus.get_blocks_by_name( "my_block" ) # # Get first block names "my_block": # my_block_0 = private_blocks[ 0 ] # # Get first token in first block names "my_block": # first_token = my_block_0.get_tokens[ 0 ] # # Get name of block (would return "my_block" in this case): # name = my_block_0.get_name # # Return data of block as nexus formatted String: # name = my_block_0.to_nexus # class GenericBlock # Creates a new GenericBlock object named 'name'. # --- # *Arguments*: # * (required) _name_: String def initialize( name ) @name = name.chomp(";") @tokens = Array.new end # Gets the name of this block. # # --- # *Returns*:: String def get_name @name end # Returns contents as Array of Strings. # # --- # *Returns*:: Array def get_tokens @tokens end # Same as to_nexus. # # --- # *Returns*:: String def to_s to_nexus end alias to_str to_s # Should return a String describing this block as nexus formatted data. # --- # *Returns*:: String def to_nexus str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]" end # Adds a token to this. # # --- # *Arguments*: # * (required) _token_: String def add_token( token ) @tokens.push( token ) end end # class GenericBlock # == DESCRIPTION # Bio::Nexus::TaxaBlock represents a taxa nexus block. # # = Example of Taxa block: # Begin Taxa; # Dimensions NTax=4; # TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse'; # End; # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # # Get first taxa block: # taxa_block = nexus.get_taxa_blocks[ 0 ] # # Get number of taxa: # number_of_taxa = taxa_block.get_number_of_taxa.to_i # # Get name of first taxon: # first_taxon = taxa_block.get_taxa[ 0 ] # class TaxaBlock < GenericBlock # Creates a new TaxaBlock object named 'name'. # --- # *Arguments*: # * (required) _name_: String def initialize( name ) super( name ) @number_of_taxa = 0 @taxa = Array.new end # Returns a String describing this block as nexus formatted data. # --- # *Returns*:: String def to_nexus line_1 = String.new line_1 << DIMENSIONS if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) ) line_1 << " " << NTAX << "=" << get_number_of_taxa end line_1 << DELIMITER line_2 = String.new line_2 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa ) << DELIMITER Nexus::Util::to_nexus_helper( TAXA_BLOCK, [ line_1, line_2 ] ) end # Gets the "number of taxa" property. # # --- # *Returns*:: Integer def get_number_of_taxa @number_of_taxa end # Gets the taxa of this block. # # --- # *Returns*:: Array def get_taxa @taxa end # Sets the "number of taxa" property. # # --- # *Arguments*: # * (required) _number_of_taxa_: Integer def set_number_of_taxa( number_of_taxa ) @number_of_taxa = number_of_taxa end # Adds a taxon name to this block. # # --- # *Arguments*: # * (required) _taxon_: String def add_taxon( taxon ) @taxa.push( taxon ) end end # class TaxaBlock # == DESCRIPTION # Bio::Nexus::CharactersBlock represents a characters nexus block. # # = Example of Characters block: # Begin Characters; # Dimensions NChar=20 # NTax=4; # Format DataType=DNA # Missing=x # Gap=- MatchChar=.; # Matrix # fish ACATA GAGGG TACCT CTAAG # frog ACTTA GAGGC TACCT CTAGC # snake ACTCA CTGGG TACCT TTGCG # mouse ACTCA GACGG TACCT TTGCG; # End; # # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # # # Get first characters block (same methods as Nexus::DataBlock except # # it lacks get_taxa method): # characters_block = nexus.get_characters_blocks[ 0 ] # class CharactersBlock < GenericBlock MISSING = "Missing" GAP = "Gap" MATCHCHAR = "MatchChar" # Creates a new CharactersBlock object named 'name'. # --- # *Arguments*: # * (required) _name_: String def initialize( name ) super( name ) @number_of_taxa = 0 @number_of_characters = 0 @data_type = String.new @gap_character = String.new @missing = String.new @match_character = String.new @matrix = NexusMatrix.new end # Returns a String describing this block as nexus formatted data. # # --- # *Returns*:: String def to_nexus line_1 = String.new line_1 << DIMENSIONS if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) ) line_1 << " " << NTAX << "=" << get_number_of_taxa end if ( Nexus::Util::larger_than_zero( get_number_of_characters ) ) line_1 << " " << NCHAR << "=" << get_number_of_characters end line_1 << DELIMITER line_2 = String.new line_2 << FORMAT if ( Nexus::Util::longer_than_zero( get_datatype ) ) line_2 << " " << DATATYPE << "=" << get_datatype end if ( Nexus::Util::longer_than_zero( get_missing ) ) line_2 << " " << MISSING << "=" << get_missing end if ( Nexus::Util::longer_than_zero( get_gap_character ) ) line_2 << " " << GAP << "=" << get_gap_character end if ( Nexus::Util::longer_than_zero( get_match_character ) ) line_2 << " " << MATCHCHAR << "=" << get_match_character end line_2 << DELIMITER line_3 = String.new line_3 << MATRIX Nexus::Util::to_nexus_helper( CHARACTERS_BLOCK, [ line_1, line_2, line_3 ] + get_matrix.to_nexus_row_array ) end # Gets the "number of taxa" property. # # --- # *Returns*:: Integer def get_number_of_taxa @number_of_taxa end # Gets the "number of characters" property. # # --- # *Returns*:: Integer def get_number_of_characters @number_of_characters end # Gets the "datatype" property. # --- # *Returns*:: String def get_datatype @data_type end # Gets the "gap character" property. # --- # *Returns*:: String def get_gap_character @gap_character end # Gets the "missing" property. # --- # *Returns*:: String def get_missing @missing end # Gets the "match character" property. # --- # *Returns*:: String def get_match_character @match_character end # Gets the matrix. # --- # *Returns*:: Bio::Nexus::NexusMatrix def get_matrix @matrix end # Returns character data as Bio::Sequence object Array # for matrix rows named 'name'. # --- # *Arguments*: # * (required) _name_: String # *Returns*:: Bio::Sequence def get_sequences_by_name( name ) seq_strs = get_characters_strings_by_name( name ) seqs = Array.new seq_strs.each do | seq_str | seqs.push( create_sequence( seq_str, name ) ) end seqs end # Returns the characters in the matrix at row 'row' as # Bio::Sequence object. Column 0 of the matrix is set as # the definition of the Bio::Sequence object. # --- # *Arguments*: # * (required) _row_: Integer # *Returns*:: Bio::Sequence def get_sequence( row ) create_sequence( get_characters_string( row ), get_row_name( row ) ) end # Returns the String in the matrix at row 'row' and column 0, # which usually is interpreted as a sequence name (if the matrix # contains molecular sequence characters). # # --- # *Arguments*: # * (required) _row_: Integer # *Returns*:: String def get_row_name( row ) get_matrix.get_name( row ) end # Returns character data as String Array # for matrix rows named 'name'. # # --- # *Arguments*: # * (required) _name_: String # *Returns*:: Array of Strings def get_characters_strings_by_name( name ) get_matrix.get_row_strings_by_name( name, "" ) end # Returns character data as String # for matrix row 'row'. # # --- # *Arguments*: # * (required) _row_: Integer # *Returns*:: String def get_characters_string( row ) get_matrix.get_row_string( row, "" ) end # Sets the "number of taxa" property. # --- # *Arguments*: # * (required) _number_of_taxa_: Integer def set_number_of_taxa( number_of_taxa ) @number_of_taxa = number_of_taxa end # Sets the "number of characters" property. # --- # *Arguments*: # * (required) _number_of_characters_: Integer def set_number_of_characters( number_of_characters ) @number_of_characters = number_of_characters end # Sets the "data type" property. # --- # *Arguments*: # * (required) _data_type_: String def set_datatype( data_type ) @data_type = data_type end # Sets the "gap character" property. # --- # *Arguments*: # * (required) _gap_character_: String def set_gap_character( gap_character ) @gap_character = gap_character end # Sets the "missing" property. # --- # *Arguments*: # * (required) _missing_: String def set_missing( missing ) @missing = missing end # Sets the "match character" property. # --- # *Arguments*: # * (required) _match_character_: String def set_match_character( match_character ) @match_character = match_character end # Sets the matrix. # --- # *Arguments*: # * (required) _matrix_: Bio::Nexus::NexusMatrix def set_matrix( matrix ) @matrix = matrix end private # Creates a Bio::Sequence object with sequence 'seq_str' # and definition 'definition'. # --- # *Arguments*: # * (required) _seq_str_: String # * (optional) _defintion_: String # *Returns*:: Bio::Sequence def create_sequence( seq_str, definition = "" ) seq = Bio::Sequence.auto( seq_str ) seq.definition = definition seq end end # class CharactersBlock # == DESCRIPTION # Bio::Nexus::DataBlock represents a data nexus block. # A data block is a Bio::Nexus::CharactersBlock with the added # capability to store taxa names. # # = Example of Data block: # Begin Data; # Dimensions ntax=5 nchar=14; # Format Datatype=RNA gap=# MISSING=x MatchChar=^; # TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard"; # Matrix # taxon_1 A- CCGTCGA-GTTA # taxon_2 T- CCG-CGA-GATA # taxon_3 A- C-GTCGA-GATA # taxon_4 A- CCTCGA--GTTA # taxon_5 T- CGGTCGT-CTTA; # End; # # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # # # Get first data block: # data_block = nexus.get_data_blocks[ 0 ] # # Get first characters name: # seq_name = data_block.get_row_name( 0 ) # # Get first characters row named "taxon_2" as Bio::Sequence sequence: # seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ] # # Get third characters row as Bio::Sequence sequence: # seq_2 = data_block.get_sequence( 2 ) # # Get first characters row named "taxon_3" as String: # string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" ) # # Get name of first taxon: # taxon_0 = data_block.get_taxa[ 0 ] # # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0) # characters_matrix = data_block.get_matrix # class DataBlock < CharactersBlock # Creates a new DataBlock object named 'name'. # --- # *Arguments*: # * (required) _name_: String def initialize( name ) super( name ) @taxa = Array.new end # Returns a String describing this block as nexus formatted data. # --- # *Returns*:: String def to_nexus line_1 = String.new line_1 << DIMENSIONS if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) ) line_1 << " " << NTAX << "=" << get_number_of_taxa end if ( Nexus::Util::larger_than_zero( get_number_of_characters ) ) line_1 << " " << NCHAR << "=" << get_number_of_characters end line_1 << DELIMITER line_2 = String.new line_2 << FORMAT if ( Nexus::Util::longer_than_zero( get_datatype ) ) line_2 << " " << DATATYPE << "=" << get_datatype end if ( Nexus::Util::longer_than_zero( get_missing ) ) line_2 << " " << MISSING << "=" << get_missing end if ( Nexus::Util::longer_than_zero( get_gap_character ) ) line_2 << " " << GAP << "=" << get_gap_character end if ( Nexus::Util::longer_than_zero( get_match_character ) ) line_2 << " " << MATCHCHAR << "=" << get_match_character end line_2 << DELIMITER line_3 = String.new line_3 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa ) line_3 << DELIMITER line_4 = String.new line_4 << MATRIX Nexus::Util::to_nexus_helper( DATA_BLOCK, [ line_1, line_2, line_3, line_4 ] + get_matrix.to_nexus_row_array ) end # Gets the taxa of this block. # --- # *Returns*:: Array def get_taxa @taxa end # Adds a taxon name to this block. # --- # *Arguments*: # * (required) _taxon_: String def add_taxon( taxon ) @taxa.push( taxon ) end end # class DataBlock # == DESCRIPTION # Bio::Nexus::DistancesBlock represents a distances nexus block. # # = Example of Distances block: # Begin Distances; # Dimensions nchar=20 ntax=5; # Format Triangle=Upper; # Matrix # taxon_1 0.0 1.0 2.0 4.0 7.0 # taxon_2 1.0 0.0 3.0 5.0 8.0 # taxon_3 3.0 4.0 0.0 6.0 9.0 # taxon_4 7.0 3.0 1.0 0.0 9.5 # taxon_5 1.2 1.3 1.4 1.5 0.0; # End; # # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # # Get distances block(s): # distances_blocks = nexus.get_distances_blocks # # Get matrix as Bio::Nexus::NexusMatrix object: # matrix = distances_blocks[ 0 ].get_matrix # # Get value (column 0 are names): # val = matrix.get_value( 1, 5 ) # class DistancesBlock < GenericBlock TRIANGLE = "Triangle" # Creates a new DistancesBlock object named 'name'. # --- # *Arguments*: # * (required) _name_: String def initialize( name ) super( name ) @number_of_taxa = 0 @number_of_characters = 0 @triangle = String.new @matrix = NexusMatrix.new end # Returns a String describing this block as nexus formatted data. # --- # *Returns*:: String def to_nexus line_1 = String.new line_1 << DIMENSIONS if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) ) line_1 << " " << NTAX << "=" << get_number_of_taxa end if ( Nexus::Util::larger_than_zero( get_number_of_characters ) ) line_1 << " " << NCHAR << "=" << get_number_of_characters end line_1 << DELIMITER line_2 = String.new line_2 << FORMAT if ( Nexus::Util::longer_than_zero( get_triangle ) ) line_2 << " " << TRIANGLE << "=" << get_triangle end line_2 << DELIMITER line_3 = String.new line_3 << MATRIX Nexus::Util::to_nexus_helper( DISTANCES_BLOCK, [ line_1, line_2, line_3 ] + get_matrix.to_nexus_row_array( " " ) ) end # Gets the "number of taxa" property. # --- # *Returns*:: Integer def get_number_of_taxa @number_of_taxa end # Gets the "number of characters" property. # --- # *Returns*:: Integer def get_number_of_characters @number_of_characters end # Gets the "triangle" property. # --- # *Returns*:: String def get_triangle @triangle end # Gets the matrix. # --- # *Returns*:: Bio::Nexus::NexusMatrix def get_matrix @matrix end # Sets the "number of taxa" property. # --- # *Arguments*: # * (required) _number_of_taxa_: Integer def set_number_of_taxa( number_of_taxa ) @number_of_taxa = number_of_taxa end # Sets the "number of characters" property. # --- # *Arguments*: # * (required) _number_of_characters_: Integer def set_number_of_characters( number_of_characters ) @number_of_characters = number_of_characters end # Sets the "triangle" property. # --- # *Arguments*: # * (required) _triangle_: String def set_triangle( triangle ) @triangle = triangle end # Sets the matrix. # --- # *Arguments*: # * (required) _matrix_: Bio::Nexus::NexusMatrix def set_matrix( matrix ) @matrix = matrix end end # class DistancesBlock # == DESCRIPTION # Bio::Nexus::TreesBlock represents a trees nexus block. # # = Example of Trees block: # Begin Trees; # Tree best=(fish,(frog,(snake, mouse))); # Tree other=(snake,(frog,( fish, mouse))); # End; # # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # Get trees block(s): # trees_block = nexus.get_trees_blocks[ 0 ] # # Get first tree named "best" as String: # string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ] # # Get first tree named "best" as Bio::Db::Newick object: # tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ] # # Get first tree as Bio::Db::Newick object: # tree_first = trees_block.get_tree( 0 ) # class TreesBlock < GenericBlock TREE = "Tree" def initialize( name ) super( name ) @trees = Array.new @tree_names = Array.new end # Returns a String describing this block as nexus formatted data. # --- # *Returns*:: String def to_nexus trees_ary = Array.new for i in 0 .. @trees.length - 1 trees_ary.push( TREE + " " + @tree_names[ i ] + "=" + @trees[ i ] ) end Nexus::Util::to_nexus_helper( TREES_BLOCK, trees_ary ) end # Returns an array of strings describing trees # --- # *Returns*:: Array def get_tree_strings @trees end # Returns an array of tree names. # --- # *Returns*:: Array def get_tree_names @tree_names end # Returns an array of strings describing trees # for which name matches the tree name. # --- # *Arguments*: # * (required) _name_: String # *Returns*:: Array def get_tree_strings_by_name( name ) found_trees = Array.new i = 0 @tree_names.each do | n | if ( n == name ) found_trees.push( @trees[ i ] ) end i += 1 end found_trees end # Returns tree i (same order as in nexus data) as # newick parsed tree object. # --- # *Arguments*: # * (required) _i_: Integer # *Returns*:: Bio::Newick def get_tree( i ) newick = Bio::Newick.new( @trees[ i ] ) tree = newick.tree tree end # Returns an array of newick parsed tree objects # for which name matches the tree name. # --- # *Arguments*: # * (required) _name_: String # *Returns*:: Array of Bio::Newick def get_trees_by_name( name ) found_trees = Array.new i = 0 @tree_names.each do | n | if ( n == name ) found_trees.push( get_tree( i ) ) end i += 1 end found_trees end # Adds a tree name to this block. # --- # *Arguments*: # * (required) _tree_name_: String def add_tree_name( tree_name ) @tree_names.push( tree_name ) end # Adds a tree to this block. # --- # *Arguments*: # * (required) _tree_as_string_: String def add_tree( tree_as_string ) @trees.push( tree_as_string ) end end # class TreesBlock # == DESCRIPTION # Bio::Nexus::NexusMatrix represents a characters or distance matrix, # where the names are stored in column zero. # # # == USAGE # # require 'bio/db/nexus' # # # Create a new parser: # nexus = Bio::Nexus.new( nexus_data_as_string ) # # Get distances block(s): # distances_block = nexus.get_distances_blocks[ 0 ] # # Get matrix as Bio::Nexus::NexusMatrix object: # matrix = distances_blocks.get_matrix # # Get value (column 0 are names): # val = matrix.get_value( 1, 5 ) # # Return first row as String (all columns except column 0), # # values are separated by "_": # row_str_0 = matrix.get_row_string( 0, "_" ) # # Return all rows named "ciona" as String (all columns except column 0), # # values are separated by "+": # ciona_rows = matrix.get_row_strings_by_name( "ciona", "+" ) class NexusMatrix # Nexus matrix error class. class NexusMatrixError < RuntimeError; end # Creates new NexusMatrix. def initialize() @rows = Hash.new @max_row = -1 @max_col = -1 end # Sets the value at row 'row' and column 'col' to 'value'. # --- # *Arguments*: # * (required) _row_: Integer # * (required) _col_: Integer # * (required) _value_: Object def set_value( row, col, value ) if ( ( row < 0 ) || ( col < 0 ) ) raise( NexusTableError, "attempt to use negative values for row or column" ) end if ( row > get_max_row() ) set_max_row( row ) end if ( col > get_max_col() ) set_max_col( col ) end row_map = nil if ( @rows.has_key?( row ) ) row_map = @rows[ row ] else row_map = Hash.new @rows[ row ] = row_map end row_map[ col ] = value end # Returns the value at row 'row' and column 'col'. # --- # *Arguments*: # * (required) _row_: Integer # * (required) _col_: Integer # *Returns*:: Object def get_value( row, col ) if ( ( row > get_max_row() ) || ( row < 0 ) ) raise( NexusMatrixError, "value for row (" + row.to_s + ") is out of range [max row: " + get_max_row().to_s + "]" ) elsif ( ( col > get_max_col() ) || ( row < 0 ) ) raise( NexusMatrixError, "value for column (" + col.to_s + ") is out of range [max column: " + get_max_col().to_s + "]" ) end r = @rows[ row ] if ( ( r == nil ) || ( r.length < 1 ) ) return nil end r[ col ] end # Returns the maximal columns number. # --- # *Returns*:: Integer def get_max_col return @max_col end # Returns the maximal row number. # --- # *Returns*:: Integer def get_max_row return @max_row end # Returns true of matrix is empty. # # --- # *Returns*:: true or false def is_empty? return get_max_col < 0 || get_max_row < 0 end # Convenience method which return the value of # column 0 and row 'row' which is usually the name. # # --- # *Arguments*: # * (required) _row_: Integer # *Returns*:: String def get_name( row ) get_value( row, 0 ).to_s end # Returns the values of columns 1 to maximal column length # in row 'row' concatenated as string. Individual values can be # separated by 'spacer'. # # --- # *Arguments*: # * (required) _row_: Integer # * (optional) _spacer_: String # *Returns*:: String def get_row_string( row, spacer = "" ) row_str = String.new if is_empty? return row_str end for col in 1 .. get_max_col row_str << get_value( row, col ) << spacer end row_str end # Returns all rows as Array of Strings separated by 'spacer' # for which column 0 is 'name'. # --- # *Arguments*: # * (required) _name_: String # * (optional) _spacer_: String # *Returns*:: Array def get_row_strings_by_name( name, spacer = "" ) row_strs = Array.new if is_empty? return row_strs end for row in 0 .. get_max_row if ( get_value( row, 0 ) == name ) row_strs.push( get_row_string( row, spacer ) ) end end row_strs end # Returns matrix as String, returns "empty" if empty. # --- # *Returns*:: String def to_s if is_empty? return "empty" end str = String.new row_array = to_nexus_row_array( spacer = " ", false ) row_array.each do | row | str << row << END_OF_LINE end str end alias to_str to_s # Helper method to produce nexus formatted data. # --- # *Arguments*: # * (optional) _spacer_: String # * (optional) _append_delimiter_: true or false # *Returns*:: Array def to_nexus_row_array( spacer = "", append_delimiter = true ) ary = Array.new if is_empty? return ary end max_length = 10 for row in 0 .. get_max_row l = get_value( row, 0 ).length if ( l > max_length ) max_length = l end end for row in 0 .. get_max_row row_str = String.new ary.push( row_str ) name = get_value( row, 0 ) name = name.ljust( max_length + 1 ) row_str << name << " " << get_row_string( row, spacer ) if ( spacer != nil && spacer.length > 0 ) row_str.chomp!( spacer ) end if ( append_delimiter && row == get_max_row ) row_str << DELIMITER end end ary end private # Returns row data as Array. # --- # *Arguments*: # * (required) _row_: Integer # *Returns*:: Array def get_row( row ) return @rows[ row ] end # Sets maximal column number. # --- # *Arguments*: # * (required) _max_col_: Integer def set_max_col( max_col ) @max_col = max_col end # Sets maximal row number. # --- # *Arguments*: # * (required) _max_row_: Integer def set_max_row( max_row ) @max_row = max_row end end # NexusMatrix # End of classes to represent nexus data. # = DESCRIPTION # Bio::Nexus::Util is a class containing static helper methods # class Util # Helper method to produce nexus formatted data. # --- # *Arguments*: # * (required) _block_: Nexus:GenericBlock or its subclasses # * (required) _block_: Array # *Returns*:: String def Util::to_nexus_helper( block, lines ) str = String.new str << BEGIN_BLOCK << " " << block << END_OF_LINE lines.each do | line | if ( line != nil ) str << INDENTENTION << line << END_OF_LINE end end # do str << END_BLOCK << END_OF_LINE str end # Returns string as array separated by " ". # --- # *Arguments*: # * (required) _ary_: Array # *Returns*:: String def Util::array_to_string( ary ) str = String.new ary.each do | e | str << e << " " end str.chomp!( " " ) str end # Returns true if Integer i is not nil and larger than 0. # --- # *Arguments*: # * (required) _i_: Integer # *Returns*:: true or false def Util::larger_than_zero( i ) return ( i != nil && i.to_i > 0 ) end # Returns true if String str is not nil and longer than 0. # --- # *Arguments*: # * (required) _str_: String # *Returns*:: true or false def Util::longer_than_zero( str ) return ( str != nil && str.length > 0 ) end end # class Util end # class Nexus end #module Bio bio-1.4.3.0001/lib/bio/db/sanger_chromatogram/0000755000004100000410000000000012200110570020611 5ustar www-datawww-databio-1.4.3.0001/lib/bio/db/sanger_chromatogram/abif.rb0000644000004100000410000001225012200110570022037 0ustar www-datawww-data# # = bio/db/sanger_chromatogram/abif.rb - Abif class # # Copyright:: Copyright (C) 2009 Anthony Underwood , # License:: The Ruby License # require 'bio/db/sanger_chromatogram/chromatogram' module Bio # == Description # # This class inherits from the SangerChromatogram superclass. It captures the information contained # within an ABIF format chromatogram file generated by DNA sequencing. See the SangerChromatogram class # for usage. class Abif < SangerChromatogram DATA_TYPES = { 1 => 'byte', 2 => 'char', 3 => 'word', 4 => 'short', 5 => 'long', 7 => 'float', 8 => 'double', 10 => 'date', 11 => 'time', 18 => 'pString', 19 => 'cString', 12 => 'thumb', 13 => 'bool', 6 => 'rational', 9 => 'BCD', 14 => 'point', 15 => 'rect', 16 => 'vPoint', 17 => 'vRect', 20 => 'tag', 128 => 'deltaComp', 256 => 'LZWComp', 384 => 'deltaLZW', 1024 => 'user'} # User defined data types have tags numbers >= 1024 PACK_TYPES = { 'byte' => 'C', 'char' => 'c', 'word' => 'n', 'short' => 'n', 'long' => 'N', 'date' => 'nCC', 'time' => 'CCCC', 'pString' => 'CA*', 'cString' => 'Z*', 'float' => 'g', 'double' => 'G', 'bool' => 'C', 'thumb' => 'NNCC', 'rational' => 'NN', 'point' => 'nn', 'rect' => 'nnnn', 'vPoint' => 'NN', 'vRect' => 'NNNN', 'tag' => 'NN'} # Specifies how to pack each data type #sequence attributes # The sample title as entered when sequencing the sample (String) attr_accessor :sample_title # The chemistry used when sequencing e.g Dye terminators => 'term.' (String) attr_accessor :chemistry # see SangerChromatogram class for how to create an Abif object and its usage def initialize(string) header = string.slice(0,128) # read in header info @chromatogram_type, @version, @directory_tag_name, @directory_tag_number, @directory_element_type, @directory_element_size, @directory_number_of_elements, @directory_data_size, @directory_data_offset, @directory_data_handle= header.unpack("a4 n a4 N n n N N N N") @version = @version/100.to_f get_directory_entries(string) # get sequence @sequence = @directory_entries["PBAS"][1].data.map{|char| char.chr.downcase}.join("") #get peak indices @peak_indices = @directory_entries["PLOC"][1].data #get qualities @qualities = @directory_entries["PCON"][1].data # get sample title @sample_title = @directory_entries["SMPL"][1].data @directory_entries["PDMF"].size > 2 ? @dye_mobility = @directory_entries["PDMF"][2].data : @dye_mobility = @directory_entries["PDMF"][1].data #get trace data @chemistry = @directory_entries["phCH"][1].data base_order = @directory_entries["FWO_"][1].data.map{|char| char.chr.downcase} (9..12).each do |data_index| self.instance_variable_set("@#{base_order[data_index-9]}trace", @directory_entries["DATA"][data_index].data) end end # Returns the data for the name. # If not found, returns nil. # --- # *Arguments*: # * (required) _name_: (String) name of the data # * (required) tag_number: (Integer) tag number (default 1) # *Returns*:: any data type or nil def data(name, tag_number = 1) d = @directory_entries[name] d ? d[tag_number].data : nil end private def get_directory_entries(string) @directory_entries = Hash.new offset = @directory_data_offset @directory_number_of_elements.times do entry = DirectoryEntry.new entry_fields = string.slice(offset, @directory_element_size) entry.name, entry.tag_number, entry.element_type, entry.element_size, entry.number_of_elements, entry.data_size, entry.data_offset = entry_fields.unpack("a4 N n n N N N") # populate the entry with the data it refers to if entry.data_size > 4 get_entry_data(entry, string) else get_entry_data(entry, entry_fields) end if @directory_entries.has_key?(entry.name) @directory_entries[entry.name][entry.tag_number] = entry else @directory_entries[entry.name] = Array.new @directory_entries[entry.name][entry.tag_number] = entry end offset += @directory_element_size end end def get_entry_data(entry, string) if entry.data_size > 4 raw_data = string.slice(entry.data_offset, entry.data_size) else raw_data = string.slice(20,4) end if entry.element_type > 1023 # user defined data: not processed as yet by this bioruby module entry.data = raw_data else pack_type = PACK_TYPES[DATA_TYPES[entry.element_type]] pack_type.match(/\*/) ? unpack_string = pack_type : unpack_string = "#{pack_type}#{entry.number_of_elements}" entry.data = raw_data.unpack(unpack_string) if pack_type == "CA*" # pascal string where the first byte is a charcter count and should therefore be removed entry.data.shift end end end class DirectoryEntry attr_accessor :name, :tag_number, :element_type, :element_size, :number_of_elements, :data_size, :data_offset attr_accessor :data end end end bio-1.4.3.0001/lib/bio/db/sanger_chromatogram/chromatogram.rb0000644000004100000410000001274012200110570023625 0ustar www-datawww-data# # = bio/db/sanger_chromatogram/chromatogram.rb - Sanger Chromatogram class # # Copyright:: Copyright (C) 2009 Anthony Underwood , # License:: The Ruby License # # $Id:$ # require 'bio/sequence/adapter' module Bio # == Description # # This is the Superclass for the Abif and Scf classes that allow importing of the common scf # and abi sequence chromatogram formats # The following attributes are Common to both the Abif and Scf subclasses # # * *chromatogram_type* (String): This is extracted from the chromatogram file itself and will # probably be either .scf or ABIF for Scf and Abif files respectively. # * *version* (String): The version of the Scf or Abif file # * *sequence* (String): the sequence contained within the chromatogram as a string. # * *qualities* (Array): the quality scores of each base as an array of integers. These will # probably be phred scores. # * *peak_indices* (Array): if the sequence traces contained within the chromatogram are imagined # as being plotted on an x,y graph, the peak indices are the x positions of the peaks that # represent the nucleotides bases found in the sequence from the chromatogram. For example if # the peak_indices are [16,24,37,49 ....] and the sequence is AGGT...., at position 16 the # traces in the chromatogram were base-called as an A, position 24 a G, position 37 a G, # position 49 a T etc # * *atrace*, *ctrace*, *gtrace*, *ttrace* (Array): If the sequence traces contained within # the chromatogram are imagined as being plotted on an x,y graph, these attributes are arrays of # y positions for each of the 4 nucleotide bases along the length of the x axis. If these were # plotted joined by lines of different colours then the resulting graph should look like the # original chromatogram file when viewed in a chromtogram viewer such as Chromas, 4Peaks or # FinchTV. # * *dye_mobility* (String): The mobility of the dye used when sequencing. This can influence the # base calling # # == Usage # filename = "path/to/sequence_chromatogram_file" # # for Abif files # chromatogram_ff = Bio::Abif.open(filename) # for Scf files # chromatogram_ff = Bio::Scf.open(filename) # # chromatogram = chromatogram_ff.next_entry # chromatogram.to_seq # => returns a Bio::Sequence object # chromatogram.sequence # => returns the sequence contained within the chromatogram as a string # chromatogram.qualities # => returns an array of quality values for each base # chromatogram.atrace # => returns an array of the a trace y positions # class SangerChromatogram # The type of chromatogram file .scf for Scf files and ABIF doe Abif files attr_accessor :chromatogram_type # The Version of the Scf or Abif file (String) attr_accessor :version # The sequence contained within the chromatogram (String) attr_accessor :sequence # An array of quality scores for each base in the sequence (Array) attr_accessor :qualities # An array 'x' positions (see description) on the trace where the bases occur/have been called (Array) attr_accessor :peak_indices # An array of 'y' positions (see description) for the 'A' trace from the chromatogram (Array attr_accessor :atrace # An array of 'y' positions (see description) for the 'C' trace from the chromatogram (Array attr_accessor :ctrace # An array of 'y' positions (see description) for the 'G' trace from the chromatogram (Array attr_accessor :gtrace # An array of 'y' positions (see description) for the 'T' trace from the chromatogram (Array attr_accessor :ttrace #The mobility of the dye used when sequencing (String) attr_accessor :dye_mobility def self.open(filename) Bio::FlatFile.open(self, filename) end # Returns a Bio::Sequence::NA object based on the sequence from the chromatogram def seq Bio::Sequence::NA.new(@sequence) end # Returns a Bio::Sequence object based on the sequence from the chromatogram def to_biosequence Bio::Sequence.adapter(self, Bio::Sequence::Adapter::SangerChromatogram) end alias :to_seq :to_biosequence # Returns the sequence from the chromatogram as a string def sequence_string @sequence end # Reverses and complements the current chromatogram object including its sequence, traces # and qualities def complement! # reverse traces tmp_trace = @atrace @atrace = @ttrace.reverse @ttrace = tmp_trace.reverse tmp_trace = @ctrace @ctrace = @gtrace.reverse @gtrace = tmp_trace.reverse # reverse base qualities if !@aqual.nil? # if qualities exist tmp_qual = @aqual @aqual = @tqual.reverse @tqual = tmp_qual.reverse tmp_qual = @cqual @cqual = @gqual.reverse @gqual = tmp_qual.reverse end #reverse qualities @qualities = @qualities.reverse #reverse peak indices @peak_indices = @peak_indices.map{|index| @atrace.size - index} @peak_indices.reverse! # reverse sequence @sequence = @sequence.reverse.tr('atgcnrykmswbvdh','tacgnyrmkswvbhd') end # Returns a new chromatogram object of the appropriate subclass (scf or abi) where the # sequence, traces and qualities have all been revesed and complemented def complement chromatogram = self.dup chromatogram.complement! return chromatogram end end end bio-1.4.3.0001/lib/bio/db/sanger_chromatogram/scf.rb0000644000004100000410000001661612200110570021723 0ustar www-datawww-data# # = bio/db/sanger_chromatogram/scf.rb - Scf class # # Copyright:: Copyright (C) 2009 Anthony Underwood , # License:: The Ruby License # require 'bio/db/sanger_chromatogram/chromatogram' module Bio # == Description # # This class inherits from the SangerChromatogram superclass. It captures the information contained # within an scf format chromatogram file generated by DNA sequencing. See the SangerChromatogram class # for usage class Scf < SangerChromatogram # sequence attributes # The quality of each base at each position along the length of the sequence is captured # by the nqual attributes where n is one of a, c, g or t. Generally the quality will be # high for the base that is called at a particular position and low for all the other bases. # However at positions of poor sequence quality, more than one base may have similar top scores. # By analysing the nqual attributes it may be possible to determine if the base calling was # correct or not. # The quality of the A base at each sequence position attr_accessor :aqual # The quality of the C base at each sequence position attr_accessor :cqual # The quality of the G base at each sequence position attr_accessor :gqual # The quality of the T base at each sequence position attr_accessor :tqual # A hash of extra information extracted from the chromatogram file attr_accessor :comments # see SangerChromatogram class for how to create an Scf object and its usage def initialize(string) header = string.slice(0,128) # read in header info @chromatogram_type, @samples, @sample_offset, @bases, @bases_left_clip, @bases_right_clip, @bases_offset, @comment_size, @comments_offset, @version, @sample_size, @code_set, @header_spare = header.unpack("a4 NNNNNNNN a4 NN N20") get_traces(string) get_bases_peakIndices_and_qualities(string) get_comments(string) if @comments["DYEP"] @dye_mobility = @comments["DYEP"] else @dye_mobility = "Unnown" end end private def get_traces(string) if @version == "3.00" # read in trace info offset = @sample_offset length = @samples * @sample_size # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short @sample_size == 2 ? byte = "n" : byte = "c" for base in ["a" , "c" , "g" , "t"] trace_read = string.slice(offset,length).unpack("#{byte}#{@samples}") # convert offsets for sample_num in (0..trace_read.size-1) if trace_read[sample_num] > 30000 trace_read[sample_num] = trace_read[sample_num] - 65536 end end # For 8-bit data we need to emulate a signed/unsigned # cast that is implicit in the C implementations..... if @sample_size == 1 for sample_num in (0..trace_read.size-1) trace_read[sample_num] += 256 if trace_read[sample_num] < 0 end end trace_read = convert_deltas_to_values(trace_read) self.instance_variable_set("@#{base}trace", trace_read) offset += length end elsif @version == "2.00" @atrace = [] @ctrace = [] @gtrace = [] @ttrace = [] # read in trace info offset = @sample_offset length = @samples * @sample_size * 4 # determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short @sample_size == 2 ? byte = "n" : byte = "c" trace_read = string.slice(offset,length).unpack("#{byte}#{@samples*4}") (0..(@samples-1)*4).step(4) do |offset2| @atrace << trace_read[offset2] @ctrace << trace_read[offset2+1] @gtrace << trace_read[offset2+2] @ttrace << trace_read[offset2+3] end end end def get_bases_peakIndices_and_qualities(string) if @version == "3.00" # now go and get the peak index information offset = @bases_offset length = @bases * 4 get_v3_peak_indices(string,offset,length) # now go and get the accuracy information offset += length; get_v3_accuracies(string,offset,length) # OK, now go and get the base information. offset += length; length = @bases; get_v3_sequence(string,offset,length) #combine accuracies to get quality scores @qualities= convert_accuracies_to_qualities elsif @version == "2.00" @peak_indices = [] @aqual = [] @cqual = [] @gqual = [] @tqual = [] @qualities = [] @sequence = "" # now go and get the base information offset = @bases_offset length = @bases * 12 all_bases_info = string.slice(offset,length) (0..length-1).step(12) do |offset2| base_info = all_bases_info.slice(offset2,12).unpack("N C C C C a C3") @peak_indices << base_info[0] @aqual << base_info[1] @cqual << base_info[2] @gqual << base_info[3] @tqual << base_info[4] @sequence += base_info[5].downcase case base_info[5].downcase when "a" @qualities << base_info[1] when "c" @qualities << base_info[2] when "g" @qualities << base_info[3] when "t" @qualities << base_info[4] else @qualities << 0 end end end end def get_v3_peak_indices(string,offset,length) @peak_indices = string.slice(offset,length).unpack("N#{length/4}") end def get_v3_accuracies(string,offset,length) qualities = string.slice(offset,length) qual_length = length/4; qual_offset = 0; for base in ["a" , "c" , "g" , "t"] self.instance_variable_set("@#{base}qual",qualities.slice(qual_offset,qual_length).unpack("C#{qual_length}")) qual_offset += qual_length end end def get_v3_sequence(string,offset,length) @sequence = string.slice(offset,length).unpack("a#{length}").join('').downcase end def convert_deltas_to_values(trace_read) p_sample = 0; for sample_num in (0..trace_read.size-1) trace_read[sample_num] = trace_read[sample_num] + p_sample p_sample = trace_read[sample_num]; end p_sample = 0; for sample_num in (0..trace_read.size-1) trace_read[sample_num] = trace_read[sample_num] + p_sample p_sample = trace_read[sample_num]; end return trace_read end def convert_accuracies_to_qualities qualities = Array.new for base_pos in (0..@sequence.length-1) case sequence.slice(base_pos,1) when "a" qualities << @aqual[base_pos] when "c" qualities << @cqual[base_pos] when "g" qualities << @gqual[base_pos] when "t" qualities << @tqual[base_pos] else qualities << 0 end end return qualities end def get_comments(string) @comments = Hash.new comment_string = string.slice(@comments_offset,@comment_size) comment_string.gsub!(/\0/, "") comment_array = comment_string.split("\n") comment_array.each do |comment| comment =~ /(\w+)=(.*)/ @comments[$1] = $2 end end end end bio-1.4.3.0001/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb0000644000004100000410000000144312200110570026707 0ustar www-datawww-data# # = bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb - Bio::SangerChromatogram to Bio::Sequence adapter module # # Copyright:: Copyright (C) 2009 Anthony Underwood , # License:: The Ruby License # # $Id:$ # require 'bio/sequence' require 'bio/sequence/adapter' # Internal use only. Normal users should not use this module. # # Bio::SangerChromatogram to Bio::Sequence adapter module. # It is internally used in Bio::SangerChromatogram#to_biosequence. # module Bio::Sequence::Adapter::SangerChromatogram extend Bio::Sequence::Adapter private def_biosequence_adapter :seq # primary accession def_biosequence_adapter :primary_accession do |orig| orig.version end end #module Bio::Sequence::Adapter::SangerChromatogram bio-1.4.3.0001/lib/bio/data/0000755000004100000410000000000012200110570015113 5ustar www-datawww-databio-1.4.3.0001/lib/bio/data/na.rb0000644000004100000410000000717412200110570016047 0ustar www-datawww-data# # = bio/data/na.rb - Nucleic Acids # # Copyright:: Copyright (C) 2001, 2005 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # == Synopsis # # Bio::NucleicAcid class contains data related to nucleic acids. # # == Usage # # Examples: # # require 'bio' # # puts "### na = Bio::NucleicAcid.new" # na = Bio::NucleicAcid.new # # puts "# na.to_re('yrwskmbdhvnatgc')" # p na.to_re('yrwskmbdhvnatgc') # # puts "# Bio::NucleicAcid.to_re('yrwskmbdhvnatgc')" # p Bio::NucleicAcid.to_re('yrwskmbdhvnatgc') # # puts "# na.weight('A')" # p na.weight('A') # # puts "# Bio::NucleicAcid.weight('A')" # p Bio::NucleicAcid.weight('A') # # puts "# na.weight('atgc')" # p na.weight('atgc') # # puts "# Bio::NucleicAcid.weight('atgc')" # p Bio::NucleicAcid.weight('atgc') # module Bio class NucleicAcid module Data # IUPAC code # * Faisst and Meyer (Nucleic Acids Res. 20:3-26, 1992) # * http://www.ncbi.nlm.nih.gov/collab/FT/ NAMES = { 'y' => '[tc]', 'r' => '[ag]', 'w' => '[at]', 's' => '[gc]', 'k' => '[tg]', 'm' => '[ac]', 'b' => '[tgc]', 'd' => '[atg]', 'h' => '[atc]', 'v' => '[agc]', 'n' => '[atgc]', 'a' => 'a', 't' => 't', 'g' => 'g', 'c' => 'c', 'u' => 'u', 'A' => 'Adenine', 'T' => 'Thymine', 'G' => 'Guanine', 'C' => 'Cytosine', 'U' => 'Uracil', 'Y' => 'pYrimidine', 'R' => 'puRine', 'W' => 'Weak', 'S' => 'Strong', 'K' => 'Keto', 'M' => 'aroMatic', 'B' => 'not A', 'D' => 'not C', 'H' => 'not G', 'V' => 'not T', } WEIGHT = { # Calculated by BioPerl's Bio::Tools::SeqStats.pm :-) 'a' => 135.15, 't' => 126.13, 'g' => 151.15, 'c' => 111.12, 'u' => 112.10, :adenine => 135.15, :thymine => 126.13, :guanine => 151.15, :cytosine => 111.12, :uracil => 112.10, :deoxyribose_phosphate => 196.11, :ribose_phosphate => 212.11, :hydrogen => 1.00794, :water => 18.015, } def weight(x = nil, rna = nil) if x if x.length > 1 if rna phosphate = WEIGHT[:ribose_phosphate] else phosphate = WEIGHT[:deoxyribose_phosphate] end hydrogen = WEIGHT[:hydrogen] water = WEIGHT[:water] total = 0.0 x.each_byte do |byte| base = byte.chr.downcase if WEIGHT[base] total += WEIGHT[base] + phosphate - hydrogen * 2 else raise "Error: invalid nucleic acid '#{base}'" end end total -= water * (x.length - 1) else WEIGHT[x.to_s.downcase] end else WEIGHT end end def [](x) NAMES[x] end # backward compatibility def names NAMES end alias na names def name(x) NAMES[x.to_s.upcase] end def to_re(seq, rna = false) replace = { 'y' => '[tcy]', 'r' => '[agr]', 'w' => '[atw]', 's' => '[gcs]', 'k' => '[tgk]', 'm' => '[acm]', 'b' => '[tgcyskb]', 'd' => '[atgrwkd]', 'h' => '[atcwmyh]', 'v' => '[agcmrsv]', 'n' => '[atgcyrwskmbdhvn]' } replace.default = '.' str = seq.to_s.downcase str.gsub!(/[^atgcu]/) { |na| replace[na] } if rna str.tr!("t", "u") end Regexp.new(str) end end # as instance methods include Data # as class methods extend Data end end # module Bio bio-1.4.3.0001/lib/bio/data/aa.rb0000644000004100000410000001162112200110570016022 0ustar www-datawww-data# # = bio/data/aa.rb - Amino Acids # # Copyright:: Copyright (C) 2001, 2005 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # module Bio class AminoAcid module Data # IUPAC code # * http://www.iupac.org/ # * http://www.chem.qmw.ac.uk/iubmb/newsletter/1999/item3.html # * http://www.ebi.ac.uk/RESID/faq.html NAMES = { 'A' => 'Ala', 'C' => 'Cys', 'D' => 'Asp', 'E' => 'Glu', 'F' => 'Phe', 'G' => 'Gly', 'H' => 'His', 'I' => 'Ile', 'K' => 'Lys', 'L' => 'Leu', 'M' => 'Met', 'N' => 'Asn', 'P' => 'Pro', 'Q' => 'Gln', 'R' => 'Arg', 'S' => 'Ser', 'T' => 'Thr', 'V' => 'Val', 'W' => 'Trp', 'Y' => 'Tyr', 'B' => 'Asx', # D/N 'Z' => 'Glx', # E/Q 'J' => 'Xle', # I/L 'U' => 'Sec', # 'uga' (stop) 'O' => 'Pyl', # 'uag' (stop) 'X' => 'Xaa', # (unknown) 'Ala' => 'alanine', 'Cys' => 'cysteine', 'Asp' => 'aspartic acid', 'Glu' => 'glutamic acid', 'Phe' => 'phenylalanine', 'Gly' => 'glycine', 'His' => 'histidine', 'Ile' => 'isoleucine', 'Lys' => 'lysine', 'Leu' => 'leucine', 'Met' => 'methionine', 'Asn' => 'asparagine', 'Pro' => 'proline', 'Gln' => 'glutamine', 'Arg' => 'arginine', 'Ser' => 'serine', 'Thr' => 'threonine', 'Val' => 'valine', 'Trp' => 'tryptophan', 'Tyr' => 'tyrosine', 'Asx' => 'asparagine/aspartic acid [DN]', 'Glx' => 'glutamine/glutamic acid [EQ]', 'Xle' => 'isoleucine/leucine [IL]', 'Sec' => 'selenocysteine', 'Pyl' => 'pyrrolysine', 'Xaa' => 'unknown [A-Z]', } # AAindex FASG760101 - Molecular weight (Fasman, 1976) # Fasman, G.D., ed. # Handbook of Biochemistry and Molecular Biology", 3rd ed., # Proteins - Volume 1, CRC Press, Cleveland (1976) WEIGHT = { 'A' => 89.09, 'C' => 121.15, # 121.16 according to the Wikipedia 'D' => 133.10, 'E' => 147.13, 'F' => 165.19, 'G' => 75.07, 'H' => 155.16, 'I' => 131.17, 'K' => 146.19, 'L' => 131.17, 'M' => 149.21, 'N' => 132.12, 'P' => 115.13, 'Q' => 146.15, 'R' => 174.20, 'S' => 105.09, 'T' => 119.12, 'U' => 168.06, 'V' => 117.15, 'W' => 204.23, 'Y' => 181.19, } def weight(x = nil) if x if x.length > 1 total = 0.0 x.each_byte do |byte| aa = byte.chr.upcase if WEIGHT[aa] total += WEIGHT[aa] else raise "Error: invalid amino acid '#{aa}'" end end total -= NucleicAcid.weight[:water] * (x.length - 1) else WEIGHT[x] end else WEIGHT end end def [](x) NAMES[x] end # backward compatibility def names NAMES end alias aa names def name(x) str = NAMES[x] if str and str.length == 3 NAMES[str] else str end end def to_1(x) case x.to_s.length when 1 x when 3 three2one(x) else name2one(x) end end alias one to_1 def to_3(x) case x.to_s.length when 1 one2three(x) when 3 x else name2three(x) end end alias three to_3 def one2three(x) if x and x.length != 1 raise ArgumentError else NAMES[x] end end def three2one(x) if x and x.length != 3 raise ArgumentError else reverse[x] end end def one2name(x) if x and x.length != 1 raise ArgumentError else three2name(NAMES[x]) end end def name2one(x) str = reverse[x.to_s.downcase] if str and str.length == 3 three2one(str) else str end end def three2name(x) if x and x.length != 3 raise ArgumentError else NAMES[x] end end def name2three(x) reverse[x.downcase] end def to_re(seq) replace = { 'B' => '[DNB]', 'Z' => '[EQZ]', 'J' => '[ILJ]', 'X' => '[ACDEFGHIKLMNPQRSTVWYUOX]', } replace.default = '.' str = seq.to_s.upcase str.gsub!(/[^ACDEFGHIKLMNPQRSTVWYUO]/) { |aa| replace[aa] } Regexp.new(str) end private def reverse hash = Hash.new NAMES.each do |k, v| hash[v] = k end hash end end # as instance methods include Data # as class methods extend Data private # override when used as an instance method to improve performance alias orig_reverse reverse def reverse unless @reverse @reverse = orig_reverse end @reverse end end end # module Bio bio-1.4.3.0001/lib/bio/data/codontable.rb0000644000004100000410000005616512200110570017567 0ustar www-datawww-data# # = bio/data/codontable.rb - Codon Table # # Copyright:: Copyright (C) 2001, 2004 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # # == Data source # # Data in this class is converted from the NCBI's genetic codes page. # # * (()) # # === Examples # # Obtain a codon table No.1 -- Standard (Eukaryote) # # table = Bio::CodonTable[1] # # Obtain a copy of the codon table No.1 to modify. In this example, # reassign a seleno cystein ('U') to the 'tga' codon. # # table = Bio::CodonTable.copy(1) # table['tga'] = 'U' # # Create a new codon table by your own from the Hash which contains # pairs of codon and amino acid. You can also define the table name # in the second argument. # # hash = { 'ttt' => 'F', 'ttc' => 'ttc', ... } # table = Bio::CodonTable.new(hash, "my codon table") # # Obtain a translated amino acid by codon. # # table = Bio::CodonTable[1] # table['ttt'] # => F # # Reverse translation of a amino acid into a list of relevant codons. # # table = Bio::CodonTable[1] # table.revtrans("A") # => ["gcg", "gct", "gca", "gcc"] # module Bio class CodonTable # Select a codon table by number. This method will return one of the # hard coded codon tables in this class as a Bio::CodonTable object. def self.[](i) hash = TABLES[i] raise "ERROR: Unknown codon table No.#{i}" unless hash definition = DEFINITIONS[i] start = STARTS[i] stop = STOPS[i] self.new(hash, definition, start, stop) end # Similar to Bio::CodonTable[num] but returns a copied codon table. # You can modify the codon table without influencing hard coded tables. def self.copy(i) ct = self[i] return Marshal.load(Marshal.dump(ct)) end # Create your own codon table by giving a Hash table of codons and relevant # amino acids. You can also able to define the table's name as a second # argument. # # Two Arrays 'start' and 'stop' can be specified which contains a list of # start and stop codons used by 'start_codon?' and 'stop_codon?' methods. def initialize(hash, definition = nil, start = [], stop = []) @table = hash @definition = definition @start = start @stop = stop.empty? ? generate_stop : stop end # Accessor methods for a Hash of the currently selected codon table. attr_accessor :table # Accessor methods for the name of the currently selected table. attr_accessor :definition # Accessor methods for an Array which contains a list of start or stop # codons respectively. attr_accessor :start, :stop # Translate a codon into a relevant amino acid. This method is used for # translating a DNA sequence into amino acid sequence. def [](codon) @table[codon] end # Modify the codon table. Use with caution as it may break hard coded # tables. If you want to modify existing table, you should use copy # method instead of [] method to generate CodonTable object to be modified. # # # This is OK. # table = Bio::CodonTable.copy(1) # table['tga'] = 'U' # # # Not recommended as it overrides the hard coded table # table = Bio::CodonTable[1] # table['tga'] = 'U' # def []=(codon, aa) @table[codon] = aa end # Iterates on codon table hash. # # table = Bio::CodonTable[1] # table.each do |codon, aa| # puts "#{codon} -- #{aa}" # end # def each(&block) @table.each(&block) end # Reverse translation of a amino acid into a list of relevant codons. # # table = Bio::CodonTable[1] # table.revtrans("A") # => ["gcg", "gct", "gca", "gcc"] # def revtrans(aa) unless @reverse @reverse = {} @table.each do |k, v| @reverse[v] ||= [] @reverse[v] << k end end @reverse[aa.upcase] end # Returns true if the codon is a start codon in the currently selected # codon table, otherwise false. def start_codon?(codon) @start.include?(codon.downcase) end # Returns true if the codon is a stop codon in the currently selected # codon table, otherwise false. def stop_codon?(codon) @stop.include?(codon.downcase) end def generate_stop list = [] @table.each do |codon, aa| if aa == '*' list << codon end end return list end private :generate_stop DEFINITIONS = { 1 => "Standard (Eukaryote)", 2 => "Vertebrate Mitochondrial", 3 => "Yeast Mitochondorial", 4 => "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma", 5 => "Invertebrate Mitochondrial", 6 => "Ciliate Macronuclear and Dasycladacean", 9 => "Echinoderm Mitochondrial", 10 => "Euplotid Nuclear", 11 => "Bacteria", 12 => "Alternative Yeast Nuclear", 13 => "Ascidian Mitochondrial", 14 => "Flatworm Mitochondrial", 15 => "Blepharisma Macronuclear", 16 => "Chlorophycean Mitochondrial", 21 => "Trematode Mitochondrial", 22 => "Scenedesmus obliquus mitochondrial", 23 => "Thraustochytrium Mitochondrial", } STARTS = { 1 => %w(ttg ctg atg gtg), # gtg added (cf. NCBI #SG1 document) 2 => %w(att atc ata atg gtg), 3 => %w(ata atg), 4 => %w(tta ttg ctg att atc ata atg gtg), 5 => %w(ttg att atc ata atg gtg), 6 => %w(atg), 9 => %w(atg gtg), 10 => %w(atg), 11 => %w(ttg ctg att atc ata atg gtg), 12 => %w(ctg atg), 13 => %w(atg), 14 => %w(atg), 15 => %w(atg), 16 => %w(atg), 21 => %w(atg gtg), 22 => %w(atg), 23 => %w(att atg gtg), } STOPS = { 1 => %w(taa tag tga), 2 => %w(taa tag aga agg), 3 => %w(taa tag), 4 => %w(taa tag), 5 => %w(taa tag), 6 => %w(tga), 9 => %w(taa tag), 10 => %w(taa tag), 11 => %w(taa tag tga), 12 => %w(taa tag tga), 13 => %w(taa tag), 14 => %w(tag), 15 => %w(taa tga), 16 => %w(taa tga), 21 => %w(taa tag), 22 => %w(tca taa tga), 23 => %w(tta taa tag tga), } TABLES = { # codon table 1 1 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 2 2 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'M', 'aca' => 'T', 'aaa' => 'K', 'aga' => '*', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => '*', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 3 3 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'T', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'T', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'T', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'T', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'M', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 4 4 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 5 5 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'M', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'S', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'S', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 6 6 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => 'Q', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => 'Q', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 9 9 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'N', 'aga' => 'S', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'S', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 10 10 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'C', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 11 11 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 12 12 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'S', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 13 13 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'M', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'G', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'G', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 14 14 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => 'Y', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'N', 'aga' => 'S', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'S', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 15 15 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => 'Q', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 16 16 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => 'L', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 21 21 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'W', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'M', 'aca' => 'T', 'aaa' => 'N', 'aga' => 'S', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'S', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 22 22 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => 'L', 'tca' => '*', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => 'L', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, # codon table 23 23 => { 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C', 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C', 'tta' => '*', 'tca' => 'S', 'taa' => '*', 'tga' => '*', 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W', 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R', 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R', 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R', 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R', 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S', 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S', 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R', 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R', 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G', 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G', 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G', 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G', }, } end # CodonTable end # module Bio bio-1.4.3.0001/lib/bio/pathway.rb0000644000004100000410000005410412200110570016210 0ustar www-datawww-data# # = bio/pathway.rb - Binary relations and Graph algorithms # # Copyright: Copyright (C) 2001 # Toshiaki Katayama , # Shuichi Kawashima # License:: The Ruby License # # $Id:$ # require 'matrix' module Bio # Bio::Pathway is a general graph object initially constructed by the # list of the (()) objects. The basic concept of the # Bio::Pathway object is to store a graph as an adjacency list (in the # instance variable @graph), and converting the list into an adjacency # matrix by calling to_matrix method on demand. However, in some # cases, it is convenient to have the original list of the # (())s, Bio::Pathway object also stores the list (as # the instance variable @relations) redundantly. # # Note: you can clear the @relations list by calling clear_relations! # method to reduce the memory usage, and the content of the @relations # can be re-generated from the @graph by to_relations method. class Pathway # Initial graph (adjacency list) generation from the list of Relation. # # Generate Bio::Pathway object from the list of Bio::Relation objects. # If the second argument is true, undirected graph is generated. # # r1 = Bio::Relation.new('a', 'b', 1) # r2 = Bio::Relation.new('a', 'c', 5) # r3 = Bio::Relation.new('b', 'c', 3) # list = [ r1, r2, r3 ] # g = Bio::Pathway.new(list, 'undirected') # def initialize(relations, undirected = false) @undirected = undirected @relations = relations @graph = {} # adjacency list expression of the graph @index = {} # numbering each node in matrix @label = {} # additional information on each node self.to_list # generate adjacency list end # Read-only accessor for the internal list of the Bio::Relation objects attr_reader :relations # Read-only accessor for the adjacency list of the graph. attr_reader :graph # Read-only accessor for the row/column index (@index) of the # adjacency matrix. Contents of the hash @index is created by # calling to_matrix method. attr_reader :index # Accessor for the hash of the label assigned to the each node. You can # label some of the nodes in the graph by passing a hash to the label # and select subgraphs which contain labeled nodes only by subgraph method. # # hash = { 1 => 'red', 2 => 'green', 5 => 'black' } # g.label = hash # g.label # g.subgraph # => new graph consists of the node 1, 2, 5 only # attr_accessor :label # Returns true or false respond to the internal state of the graph. def directed? @undirected ? false : true end # Returns true or false respond to the internal state of the graph. def undirected? @undirected ? true : false end # Changes the internal state of the graph from 'undirected' to # 'directed' and re-generate adjacency list. The undirected graph # can be converted to directed graph, however, the edge between two # nodes will be simply doubled to both ends. # # Note: this method can not be used without the list of the # Bio::Relation objects (internally stored in @relations variable). # Thus if you already called clear_relations! method, call # to_relations first. def directed if undirected? @undirected = false self.to_list end end # Changes the internal state of the graph from 'directed' to # 'undirected' and re-generate adjacency list. # # Note: this method can not be used without the list of the # Bio::Relation objects (internally stored in @relations variable). # Thus if you already called clear_relations! method, call # to_relations first. def undirected if directed? @undirected = true self.to_list end end # Clear @relations array to reduce the memory usage. def clear_relations! @relations.clear end # Reconstruct @relations from the adjacency list @graph. def to_relations @relations.clear @graph.each_key do |from| @graph[from].each do |to, w| @relations << Relation.new(from, to, w) end end return @relations end # Graph (adjacency list) generation from the Relations # # Generate the adjcancecy list @graph from @relations (called by # initialize and in some other cases when @relations has been changed). def to_list @graph.clear @relations.each do |rel| append(rel, false) # append to @graph without push to @relations end end # Add an Bio::Relation object 'rel' to the @graph and @relations. # If the second argument is false, @relations is not modified (only # useful when genarating @graph from @relations internally). def append(rel, add_rel = true) @relations.push(rel) if add_rel if @graph[rel.from].nil? @graph[rel.from] = {} end if @graph[rel.to].nil? @graph[rel.to] = {} end @graph[rel.from][rel.to] = rel.relation @graph[rel.to][rel.from] = rel.relation if @undirected end # Remove an edge indicated by the Bio::Relation object 'rel' from the # @graph and the @relations. def delete(rel) @relations.delete_if do |x| x === rel end @graph[rel.from].delete(rel.to) @graph[rel.to].delete(rel.from) if @undirected end # Returns the number of the nodes in the graph. def nodes @graph.keys.length end # Returns the number of the edges in the graph. def edges edges = 0 @graph.each_value do |v| edges += v.size end edges end # Convert adjacency list to adjacency matrix # # Returns the adjacency matrix expression of the graph as a Matrix # object. If the first argument was assigned, the matrix will be # filled with the given value. The second argument indicates the # value of the diagonal constituents of the matrix besides the above. # # The result of this method depends on the order of Hash#each # (and each_key, etc.), which may be variable with Ruby version # and Ruby interpreter variations (JRuby, etc.). # For a workaround to remove such dependency, you can use @index # to set order of Hash keys. Note that this behavior might be # changed in the future. Be careful that @index is overwritten by # this method. # def to_matrix(default_value = nil, diagonal_value = nil) #-- # Note: following code only fills the outer Array with the reference # to the same inner Array object. # # matrix = Array.new(nodes, Array.new(nodes)) # # so create a new Array object for each row as follows: #++ matrix = Array.new nodes.times do matrix.push(Array.new(nodes, default_value)) end if diagonal_value nodes.times do |i| matrix[i][i] = diagonal_value end end # assign index number if @index.empty? then # assign index number for each node @graph.keys.each_with_index do |k, i| @index[k] = i end else # begin workaround removing depencency to order of Hash#each # assign index number from the preset @index indices = @index.to_a indices.sort! { |i0, i1| i0[1] <=> i1[1] } indices.collect! { |i0| i0[0] } @index.clear v = 0 indices.each do |k, i| if @graph[k] and !@index[k] then @index[k] = v; v += 1 end end @graph.each_key do |k| unless @index[k] then @index[k] = v; v += 1 end end # end workaround removing depencency to order of Hash#each end if @relations.empty? # only used after clear_relations! @graph.each do |from, hash| hash.each do |to, relation| x = @index[from] y = @index[to] matrix[x][y] = relation end end else @relations.each do |rel| x = @index[rel.from] y = @index[rel.to] matrix[x][y] = rel.relation matrix[y][x] = rel.relation if @undirected end end Matrix[*matrix] end # Pretty printer of the adjacency matrix. # # The dump_matrix method accepts the same arguments as to_matrix. # Useful when you want to check the internal state of the matrix # (for debug purpose etc.) easily. # # This method internally calls to_matrix method. # Read documents of to_matrix for important informations. # def dump_matrix(*arg) matrix = self.to_matrix(*arg) sorted = @index.sort {|a,b| a[1] <=> b[1]} "[# " + sorted.collect{|x| x[0]}.join(", ") + "\n" + matrix.to_a.collect{|row| ' ' + row.inspect}.join(",\n") + "\n]" end # Pretty printer of the adjacency list. # # Useful when you want to check the internal state of the adjacency # list (for debug purpose etc.) easily. # # The result of this method depends on the order of Hash#each # (and each_key, etc.), which may be variable with Ruby version # and Ruby interpreter variations (JRuby, etc.). # For a workaround to remove such dependency, you can use @index # to set order of Hash keys. Note that this behavior might be # changed in the future. # def dump_list # begin workaround removing depencency to order of Hash#each if @index.empty? then pref = nil enum = @graph else pref = {}.merge(@index) i = pref.values.max @graph.each_key do |node| pref[node] ||= (i += 1) end graph_to_a = @graph.to_a graph_to_a.sort! { |x, y| pref[x[0]] <=> pref[y[0]] } enum = graph_to_a end # end workaround removing depencency to order of Hash#each list = "" enum.each do |from, hash| list << "#{from} => " # begin workaround removing depencency to order of Hash#each if pref then ary = hash.to_a ary.sort! { |x,y| pref[x[0]] <=> pref[y[0]] } hash = ary end # end workaround removing depencency to order of Hash#each a = [] hash.each do |to, relation| a.push("#{to} (#{relation})") end list << a.join(", ") + "\n" end list end # Select labeled nodes and generate subgraph # # This method select some nodes and returns new Bio::Pathway object # consists of selected nodes only. If the list of the nodes (as # Array) is assigned as the argument, use the list to select the # nodes from the graph. If no argument is assigned, internal # property of the graph @label is used to select the nodes. # # hash = { 'a' => 'secret', 'b' => 'important', 'c' => 'important' } # g.label = hash # g.subgraph # list = [ 'a', 'b', 'c' ] # g.subgraph(list) # def subgraph(list = nil) if list @label.clear list.each do |node| @label[node] = true end end sub_graph = Pathway.new([], @undirected) @graph.each do |from, hash| next unless @label[from] sub_graph.graph[from] ||= {} hash.each do |to, relation| next unless @label[to] sub_graph.append(Relation.new(from, to, relation)) end end return sub_graph end # Not implemented yet. def common_subgraph(graph) raise NotImplementedError end # Not implemented yet. def clique raise NotImplementedError end # Returns completeness of the edge density among the surrounded nodes. # # Calculates the value of cliquishness around the 'node'. This value # indicates completeness of the edge density among the surrounded nodes. # # Note: cliquishness (clustering coefficient) for a directed graph # is also calculated. # Reference: http://en.wikipedia.org/wiki/Clustering_coefficient # # Note: Cliquishness (clustering coefficient) for a node that has # only one neighbor node is undefined. Currently, it returns NaN, # but the behavior may be changed in the future. # def cliquishness(node) neighbors = @graph[node].keys sg = subgraph(neighbors) if sg.graph.size != 0 edges = sg.edges nodes = neighbors.size complete = (nodes * (nodes - 1)) return edges.quo(complete) else return 0.0 end end # Returns frequency of the nodes having same number of edges as hash # # Calculates the frequency of the nodes having the same number of edges # and returns the value as Hash. def small_world freq = Hash.new(0) @graph.each_value do |v| freq[v.size] += 1 end return freq end # Breadth first search solves steps and path to the each node and # forms a tree contains all reachable vertices from the root node. # This method returns the result in 2 hashes - 1st one shows the # steps from root node and 2nd hash shows the structure of the tree. # # The weight of the edges are not considered in this method. def breadth_first_search(root) visited = {} distance = {} predecessor = {} visited[root] = true distance[root] = 0 predecessor[root] = nil queue = [ root ] while from = queue.shift next unless @graph[from] @graph[from].each_key do |to| unless visited[to] visited[to] = true distance[to] = distance[from] + 1 predecessor[to] = from queue.push(to) end end end return distance, predecessor end # Alias for the breadth_first_search method. alias bfs breadth_first_search # Calculates the shortest path between two nodes by using # breadth_first_search method and returns steps and the path as Array. def bfs_shortest_path(node1, node2) distance, route = breadth_first_search(node1) step = distance[node2] node = node2 path = [ node2 ] while node != node1 and route[node] node = route[node] path.unshift(node) end return step, path end # Depth first search yields much information about the structure of # the graph especially on the classification of the edges. This # method returns 5 hashes - 1st one shows the timestamps of each # node containing the first discoverd time and the search finished # time in an array. The 2nd, 3rd, 4th, and 5th hashes contain 'tree # edges', 'back edges', 'cross edges', 'forward edges' respectively. # # If $DEBUG is true (e.g. ruby -d), this method prints the progression # of the search. # # The weight of the edges are not considered in this method. # # Note: The result of this method depends on the order of Hash#each # (and each_key, etc.), which may be variable with Ruby version # and Ruby interpreter variations (JRuby, etc.). # For a workaround to remove such dependency, you can use @index # to set order of Hash keys. Note that this bahavior might be # changed in the future. def depth_first_search visited = {} timestamp = {} tree_edges = {} back_edges = {} cross_edges = {} forward_edges = {} count = 0 # begin workaround removing depencency to order of Hash#each if @index.empty? then preference_of_nodes = nil else preference_of_nodes = {}.merge(@index) i = preference_of_nodes.values.max @graph.each_key do |node0| preference_of_nodes[node0] ||= (i += 1) end end # end workaround removing depencency to order of Hash#each dfs_visit = Proc.new { |from| visited[from] = true timestamp[from] = [count += 1] ary = @graph[from].keys # begin workaround removing depencency to order of Hash#each if preference_of_nodes then ary = ary.sort_by { |node0| preference_of_nodes[node0] } end # end workaround removing depencency to order of Hash#each ary.each do |to| if visited[to] if timestamp[to].size > 1 if timestamp[from].first < timestamp[to].first # forward edge (black) p "#{from} -> #{to} : forward edge" if $DEBUG forward_edges[from] = to else # cross edge (black) p "#{from} -> #{to} : cross edge" if $DEBUG cross_edges[from] = to end else # back edge (gray) p "#{from} -> #{to} : back edge" if $DEBUG back_edges[from] = to end else # tree edge (white) p "#{from} -> #{to} : tree edge" if $DEBUG tree_edges[to] = from dfs_visit.call(to) end end timestamp[from].push(count += 1) } ary = @graph.keys # begin workaround removing depencency to order of Hash#each if preference_of_nodes then ary = ary.sort_by { |node0| preference_of_nodes[node0] } end # end workaround removing depencency to order of Hash#each ary.each do |node| unless visited[node] dfs_visit.call(node) end end return timestamp, tree_edges, back_edges, cross_edges, forward_edges end # Alias for the depth_first_search method. alias dfs depth_first_search # Topological sort of the directed acyclic graphs ("dags") by using # depth_first_search. def dfs_topological_sort # sorted by finished time reversely and collect node names only timestamp, = self.depth_first_search timestamp.sort {|a,b| b[1][1] <=> a[1][1]}.collect {|x| x.first } end # Dijkstra method to solve the shortest path problem in the weighted graph. def dijkstra(root) distance, predecessor = initialize_single_source(root) @graph[root].each do |k, v| distance[k] = v predecessor[k] = root end queue = distance.dup queue.delete(root) while queue.size != 0 min = queue.min {|a, b| a[1] <=> b[1]} u = min[0] # extranct a node having minimal distance @graph[u].each do |k, v| # relaxing procedure of root -> 'u' -> 'k' if distance[k] > distance[u] + v distance[k] = distance[u] + v predecessor[k] = u end end queue.delete(u) end return distance, predecessor end # Bellman-Ford method for solving the single-source shortest-paths # problem in the graph in which edge weights can be negative. def bellman_ford(root) distance, predecessor = initialize_single_source(root) for i in 1 ..(self.nodes - 1) do @graph.each_key do |u| @graph[u].each do |v, w| # relaxing procedure of root -> 'u' -> 'v' if distance[v] > distance[u] + w distance[v] = distance[u] + w predecessor[v] = u end end end end # negative cyclic loop check @graph.each_key do |u| @graph[u].each do |v, w| if distance[v] > distance[u] + w return false end end end return distance, predecessor end # Floyd-Wardshall alogrithm for solving the all-pairs shortest-paths # problem on a directed graph G = (V, E). def floyd_warshall inf = 1 / 0.0 m = self.to_matrix(inf, 0) d = m.dup n = self.nodes for k in 0 .. n - 1 do for i in 0 .. n - 1 do for j in 0 .. n - 1 do if d[i, j] > d[i, k] + d[k, j] d[i, j] = d[i, k] + d[k, j] end end end end return d end # Alias for the floyd_warshall method. alias floyd floyd_warshall # Kruskal method for finding minimam spaninng trees def kruskal # initialize rel = self.to_relations.sort{|a, b| a <=> b} index = [] for i in 0 .. (rel.size - 1) do for j in (i + 1) .. (rel.size - 1) do if rel[i] == rel[j] index << j end end end index.sort{|x, y| y<=>x}.each do |idx| rel[idx, 1] = [] end mst = [] seen = Hash.new() @graph.each_key do |x| seen[x] = nil end i = 1 # initialize end rel.each do |r| if seen[r.node[0]] == nil seen[r.node[0]] = 0 end if seen[r.node[1]] == nil seen[r.node[1]] = 0 end if seen[r.node[0]] == seen[r.node[1]] && seen[r.node[0]] == 0 mst << r seen[r.node[0]] = i seen[r.node[1]] = i elsif seen[r.node[0]] != seen[r.node[1]] mst << r v1 = seen[r.node[0]].dup v2 = seen[r.node[1]].dup seen.each do |k, v| if v == v1 || v == v2 seen[k] = i end end end i += 1 end return Pathway.new(mst) end private def initialize_single_source(root) inf = 1 / 0.0 # inf.infinite? -> true distance = {} predecessor = {} @graph.each_key do |k| distance[k] = inf predecessor[k] = nil end distance[root] = 0 return distance, predecessor end end # Pathway # Bio::Relation is a simple object storing two nodes and the relation of them. # The nodes and the edge (relation) can be any Ruby object. You can also # compare Bio::Relation objects if the edges have Comparable property. class Relation # Create new binary relation object consists of the two object 'node1' # and 'node2' with the 'edge' object as the relation of them. def initialize(node1, node2, edge) @node = [node1, node2] @edge = edge end attr_accessor :node, :edge # Returns one node. def from @node[0] end # Returns another node. def to @node[1] end def relation @edge end # Used by eql? method def hash @node.sort.push(@edge).hash end # Compare with another Bio::Relation object whether havind same edges # and same nodes. The == method compares Bio::Relation object's id, # however this case equality === method compares the internal property # of the Bio::Relation object. def ===(rel) if self.edge == rel.edge if self.node[0] == rel.node[0] and self.node[1] == rel.node[1] return true elsif self.node[0] == rel.node[1] and self.node[1] == rel.node[0] return true else return false end else return false end end # Method eql? is an alias of the === method and is used with hash method # to make uniq arry of the Bio::Relation objects. # # a1 = Bio::Relation.new('a', 'b', 1) # a2 = Bio::Relation.new('b', 'a', 1) # a3 = Bio::Relation.new('b', 'c', 1) # p [ a1, a2, a3 ].uniq alias eql? === # Used by the each method to compare with another Bio::Relation object. # This method is only usable when the edge objects have the property of # the module Comparable. def <=>(rel) unless self.edge.kind_of? Comparable raise "[Error] edges are not comparable" end if self.edge > rel.edge return 1 elsif self.edge < rel.edge return -1 elsif self.edge == rel.edge return 0 end end end # Relation end # Bio bio-1.4.3.0001/lib/bio/shell.rb0000644000004100000410000000201412200110570015633 0ustar www-datawww-data# # = bio/shell.rb - Loading all BioRuby shell features # # Copyright:: Copyright (C) 2005, 2006 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio' require 'yaml' require 'open-uri' require 'fileutils' require 'pp' module Bio::Shell require 'bio/shell/setup' require 'bio/shell/irb' require 'bio/shell/web' require 'bio/shell/script' require 'bio/shell/core' require 'bio/shell/interface' require 'bio/shell/object' require 'bio/shell/demo' require 'bio/shell/plugin/entry' require 'bio/shell/plugin/seq' require 'bio/shell/plugin/midi' require 'bio/shell/plugin/codon' require 'bio/shell/plugin/flatfile' require 'bio/shell/plugin/obda' require 'bio/shell/plugin/das' require 'bio/shell/plugin/keggapi' require 'bio/shell/plugin/soap' require 'bio/shell/plugin/emboss' require 'bio/shell/plugin/blast' require 'bio/shell/plugin/psort' require 'bio/shell/plugin/ncbirest' require 'bio/shell/plugin/togows' extend Ghost end bio-1.4.3.0001/lib/bio/reference.rb0000644000004100000410000004527112200110570016476 0ustar www-datawww-data# # = bio/reference.rb - Journal reference classes # # Copyright:: Copyright (C) 2001, 2006, 2008 # Toshiaki Katayama , # Ryan Raaum , # Jan Aerts # License:: The Ruby License # # $Id:$ # require 'enumerator' module Bio # = DESCRIPTION # # A class for journal reference information. # # = USAGE # # hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], # 'title' => "Title of the study.", # 'journal' => "Theor. J. Hoge", # 'volume' => 12, # 'issue' => 3, # 'pages' => "123-145", # 'year' => 2001, # 'pubmed' => 12345678, # 'medline' => 98765432, # 'abstract' => "Hoge fuga. ...", # 'url' => "http://example.com", # 'mesh' => [], # 'affiliations' => []} # ref = Bio::Reference.new(hash) # # # Formats in the BiBTeX style. # ref.format("bibtex") # # # Short-cut for Bio::Reference#format("bibtex") # ref.bibtex # class Reference # Author names in an Array, [ "Hoge, J.P.", "Fuga, F.B." ]. attr_reader :authors # String with title of the study attr_reader :title # String with journal name attr_reader :journal # volume number (typically Fixnum) attr_reader :volume # issue number (typically Fixnum) attr_reader :issue # page range (typically String, e.g. "123-145") attr_reader :pages # year of publication (typically Fixnum) attr_reader :year # pubmed identifier (typically Fixnum) attr_reader :pubmed # medline identifier (typically Fixnum) attr_reader :medline # DOI identifier (typically String, e.g. "10.1126/science.1110418") attr_reader :doi # Abstract text in String. attr_reader :abstract # An URL String. attr_reader :url # MeSH terms in an Array. attr_reader :mesh # Affiliations in an Array. attr_reader :affiliations # Sequence number in EMBL/GenBank records attr_reader :embl_gb_record_number # Position in a sequence that this reference refers to attr_reader :sequence_position # Comments for the reference (typically Array of String, or nil) attr_reader :comments # Create a new Bio::Reference object from a Hash of values. # Data is extracted from the values for keys: # # * authors - expected value: Array of Strings # * title - expected value: String # * journal - expected value: String # * volume - expected value: Fixnum or String # * issue - expected value: Fixnum or String # * pages - expected value: String # * year - expected value: Fixnum or String # * pubmed - expected value: Fixnum or String # * medline - expected value: Fixnum or String # * abstract - expected value: String # * url - expected value: String # * mesh - expected value: Array of Strings # * affiliations - expected value: Array of Strings # # # hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], # 'title' => "Title of the study.", # 'journal' => "Theor. J. Hoge", # 'volume' => 12, # 'issue' => 3, # 'pages' => "123-145", # 'year' => 2001, # 'pubmed' => 12345678, # 'medline' => 98765432, # 'abstract' => "Hoge fuga. ...", # 'url' => "http://example.com", # 'mesh' => [], # 'affiliations' => []} # ref = Bio::Reference.new(hash) # --- # *Arguments*: # * (required) _hash_: Hash # *Returns*:: Bio::Reference object def initialize(hash) @authors = hash['authors'] || [] # [ "Hoge, J.P.", "Fuga, F.B." ] @title = hash['title'] || '' # "Title of the study." @journal = hash['journal'] || '' # "Theor. J. Hoge" @volume = hash['volume'] || '' # 12 @issue = hash['issue'] || '' # 3 @pages = hash['pages'] || '' # 123-145 @year = hash['year'] || '' # 2001 @pubmed = hash['pubmed'] || '' # 12345678 @medline = hash['medline'] || '' # 98765432 @doi = hash['doi'] @abstract = hash['abstract'] || '' @url = hash['url'] @mesh = hash['mesh'] || [] @embl_gb_record_number = hash['embl_gb_record_number'] || nil @sequence_position = hash['sequence_position'] || nil @comments = hash['comments'] @affiliations = hash['affiliations'] || [] end # If _other_ is equal with the self, returns true. # Otherwise, returns false. # --- # *Arguments*: # * (required) _other_: any object # *Returns*:: true or false def ==(other) return true if super(other) return false unless other.instance_of?(self.class) flag = false [ :authors, :title, :journal, :volume, :issue, :pages, :year, :pubmed, :medline, :doi, :abstract, :url, :mesh, :embl_gb_record_number, :sequence_position, :comments, :affiliations ].each do |m| begin flag = (self.__send__(m) == other.__send__(m)) rescue NoMethodError, ArgumentError, NameError flag = false end break unless flag end flag end # Formats the reference in a given style. # # Styles: # 0. nil - general # 1. endnote - Endnote # 2. bibitem - Bibitem (option available) # 3. bibtex - BiBTeX (option available) # 4. rd - rd (option available) # 5. nature - Nature (option available) # 6. science - Science # 7. genome_biol - Genome Biology # 8. genome_res - Genome Research # 9. nar - Nucleic Acids Research # 10. current - Current Biology # 11. trends - Trends in * # 12. cell - Cell Press # # See individual methods for details. Basic usage is: # # # ref is Bio::Reference object # # using simplest possible call (for general style) # puts ref.format # # # output in Nature style # puts ref.format("nature") # alternatively, puts ref.nature # # # output in Nature short style (see Bio::Reference#nature) # puts ref.format("nature",true) # alternatively, puts ref.nature(true) # --- # *Arguments*: # * (optional) _style_: String with style identifier # * (optional) _options_: Options for styles accepting one # *Returns*:: String def format(style = nil, *options) case style when 'embl' return embl when 'endnote' return endnote when 'bibitem' return bibitem(*options) when 'bibtex' return bibtex(*options) when 'rd' return rd(*options) when /^nature$/i return nature(*options) when /^science$/i return science when /^genome\s*_*biol/i return genome_biol when /^genome\s*_*res/i return genome_res when /^nar$/i return nar when /^current/i return current when /^trends/i return trends when /^cell$/i return cell else return general end end # Returns reference formatted in the Endnote style. # # # ref is a Bio::Reference object # puts ref.endnote # # %0 Journal Article # %A Hoge, J.P. # %A Fuga, F.B. # %D 2001 # %T Title of the study. # %J Theor. J. Hoge # %V 12 # %N 3 # %P 123-145 # %M 12345678 # %U http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=12345678 # %X Hoge fuga. ... # --- # *Returns*:: String def endnote lines = [] lines << "%0 Journal Article" @authors.each do |author| lines << "%A #{author}" end lines << "%D #{@year}" unless @year.to_s.empty? lines << "%T #{@title}" unless @title.empty? lines << "%J #{@journal}" unless @journal.empty? lines << "%V #{@volume}" unless @volume.to_s.empty? lines << "%N #{@issue}" unless @issue.to_s.empty? lines << "%P #{@pages}" unless @pages.empty? lines << "%M #{@pubmed}" unless @pubmed.to_s.empty? u = @url.to_s.empty? ? pubmed_url : @url lines << "%U #{u}" unless u.empty? lines << "%X #{@abstract}" unless @abstract.empty? @mesh.each do |term| lines << "%K #{term}" end lines << "%+ #{@affiliations.join(' ')}" unless @affiliations.empty? return lines.join("\n") end # Returns reference formatted in the EMBL style. # # # ref is a Bio::Reference object # puts ref.embl # # RP 1-1859 # RX PUBMED; 1907511. # RA Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.; # RT "Nucleotide and derived amino acid sequence of the cyanogenic # RT beta-glucosidase (linamarase) from white clover (Trifolium repens L.)"; # RL Plant Mol. Biol. 17(2):209-219(1991). def embl r = self Bio::Sequence::Format::NucFormatter::Embl.new('').instance_eval { reference_format_embl(r) } end # Returns reference formatted in the bibitem style # # # ref is a Bio::Reference object # puts ref.bibitem # # \bibitem{PMID:12345678} # Hoge, J.P., Fuga, F.B. # Title of the study., # {\em Theor. J. Hoge}, 12(3):123--145, 2001. # --- # *Arguments*: # * (optional) _item_: label string (default: "PMID:#{pubmed}"). # *Returns*:: String def bibitem(item = nil) item = "PMID:#{@pubmed}" unless item pages = @pages.sub('-', '--') return <<-"END".enum_for(:each_line).collect {|line| line.strip}.join("\n") \\bibitem{#{item}} #{@authors.join(', ')} #{@title}, {\\em #{@journal}}, #{@volume}(#{@issue}):#{pages}, #{@year}. END end # Returns reference formatted in the BiBTeX style. # # # ref is a Bio::Reference object # puts ref.bibtex # # @article{PMID:12345678, # author = {Hoge, J.P. and Fuga, F.B.}, # title = {Title of the study.}, # journal = {Theor. J. Hoge}, # year = {2001}, # volume = {12}, # number = {3}, # pages = {123--145}, # } # # # using a different section (e.g. "book") # # (but not really configured for anything other than articles) # puts ref.bibtex("book") # # @book{PMID:12345678, # author = {Hoge, J.P. and Fuga, F.B.}, # title = {Title of the study.}, # journal = {Theor. J. Hoge}, # year = {2001}, # volume = {12}, # number = {3}, # pages = {123--145}, # } # --- # *Arguments*: # * (optional) _section_: BiBTeX section as String # * (optional) _label_: Label string cited by LaTeX documents. # Default is "PMID:#{pubmed}". # * (optional) _keywords_: Hash of additional keywords, # e.g. { 'abstract' => 'This is abstract.' }. # You can also override default keywords. # To disable default keywords, specify false as # value, e.g. { 'url' => false, 'year' => false }. # *Returns*:: String def bibtex(section = nil, label = nil, keywords = {}) section = "article" unless section authors = authors_join(' and ', ' and ') thepages = pages.to_s.empty? ? nil : pages.sub(/\-/, '--') unless label then label = "PMID:#{pubmed}" end theurl = if !(url.to_s.empty?) then url elsif pmurl = pubmed_url and !(pmurl.to_s.empty?) then pmurl else nil end hash = { 'author' => authors.empty? ? nil : authors, 'title' => title.to_s.empty? ? nil : title, 'number' => issue.to_s.empty? ? nil : issue, 'pages' => thepages, 'url' => theurl } keys = %w( author title journal year volume number pages url ) keys.each do |k| hash[k] = self.__send__(k.intern) unless hash.has_key?(k) end hash.merge!(keywords) { |k, v1, v2| v2.nil? ? v1 : v2 } bib = [ "@#{section}{#{label}," ] keys.concat((hash.keys - keys).sort) keys.each do |kw| ref = hash[kw] bib.push " #{kw.ljust(12)} = {#{ref}}," if ref end bib.push "}\n" return bib.join("\n") end # Returns reference formatted in a general/generic style. # # # ref is a Bio::Reference object # puts ref.general # # Hoge, J.P., Fuga, F.B. (2001). "Title of the study." Theor. J. Hoge 12:123-145. # --- # *Returns*:: String def general authors = @authors.join(', ') "#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}." end # Return reference formatted in the RD style. # # # ref is a Bio::Reference object # puts ref.rd # # == Title of the study. # # * Hoge, J.P. and Fuga, F.B. # # * Theor. J. Hoge 2001 12:123-145 [PMID:12345678] # # Hoge fuga. ... # # An optional string argument can be supplied, but does nothing. # --- # *Arguments*: # * (optional) str: String (default nil) # *Returns*:: String def rd(str = nil) @abstract ||= str lines = [] lines << "== " + @title lines << "* " + authors_join(' and ') lines << "* #{@journal} #{@year} #{@volume}:#{@pages} [PMID:#{@pubmed}]" lines << @abstract return lines.join("\n\n") end # Formats in the Nature Publishing Group # (http://www.nature.com) style. # # # ref is a Bio::Reference object # puts ref.nature # # Hoge, J.P. & Fuga, F.B. Title of the study. Theor. J. Hoge 12, 123-145 (2001). # # # optionally, output short version # puts ref.nature(true) # or puts ref.nature(short=true) # # Hoge, J.P. & Fuga, F.B. Theor. J. Hoge 12, 123-145 (2001). # --- # *Arguments*: # * (optional) _short_: Boolean (default false) # *Returns*:: String def nature(short = false) if short if @authors.size > 4 authors = "#{@authors[0]} et al." elsif @authors.size == 1 authors = "#{@authors[0]}" else authors = authors_join(' & ') end "#{authors} #{@journal} #{@volume}, #{@pages} (#{@year})." else authors = authors_join(' & ') "#{authors} #{@title} #{@journal} #{@volume}, #{@pages} (#{@year})." end end # Returns reference formatted in the # Science[http://www.sciencemag.org] style. # # # ref is a Bio::Reference object # puts ref.science # # J.P. Hoge, F.B. Fuga, Theor. J. Hoge 12 123 (2001). # --- # *Returns*:: String def science if @authors.size > 4 authors = rev_name(@authors[0]) + " et al." else authors = @authors.collect {|name| rev_name(name)}.join(', ') end page_from, = @pages.split('-') "#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})." end # Returns reference formatted in the Genome Biology # (http://genomebiology.com) style. # # # ref is a Bio::Reference object # puts ref.genome_biol # # Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145. # --- # *Returns*:: String def genome_biol authors = @authors.collect {|name| strip_dots(name)}.join(', ') journal = strip_dots(@journal) "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}." end # Returns reference formatted in the Current Biology # (http://current-biology.com) style. (Same as the Genome Biology style) # # # ref is a Bio::Reference object # puts ref.current # # Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145. # --- # *Returns*:: String def current self.genome_biol end # Returns reference formatted in the Genome Research # (http://genome.org) style. # # # ref is a Bio::Reference object # puts ref.genome_res # # Hoge, J.P. and Fuga, F.B. 2001. # Title of the study. Theor. J. Hoge 12: 123-145. # --- # *Returns*:: String def genome_res authors = authors_join(' and ') "#{authors} #{@year}.\n #{@title} #{@journal} #{@volume}: #{@pages}." end # Returns reference formatted in the Nucleic Acids Reseach # (http://nar.oxfordjournals.org) style. # # # ref is a Bio::Reference object # puts ref.nar # # Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge, 12, 123-145. # --- # *Returns*:: String def nar authors = authors_join(' and ') "#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}." end # Returns reference formatted in the # CELL[http://www.cell.com] Press style. # # # ref is a Bio::Reference object # puts ref.cell # # Hoge, J.P. and Fuga, F.B. (2001). Title of the study. Theor. J. Hoge 12, 123-145. # --- # *Returns*:: String def cell authors = authors_join(' and ') "#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}." end # Returns reference formatted in the # TRENDS[http://www.trends.com] style. # # # ref is a Bio::Reference object # puts ref.trends # # Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge 12, 123-145 # --- # *Returns*:: String def trends if @authors.size > 2 authors = "#{@authors[0]} et al." elsif @authors.size == 1 authors = "#{@authors[0]}" else authors = authors_join(' and ') end "#{authors} (#{@year}) #{@title} #{@journal} #{@volume}, #{@pages}" end # Returns a valid URL for pubmed records # # *Returns*:: String def pubmed_url unless @pubmed.to_s.empty? head = "http://www.ncbi.nlm.nih.gov/pubmed" return "#{head}/#{@pubmed}" end '' end private def strip_dots(data) data.tr(',.', '') if data end def authors_join(amp, sep = ', ') authors = @authors.clone if authors.length > 1 last = authors.pop authors = authors.join(sep) + "#{amp}" + last elsif authors.length == 1 authors = authors.pop else authors = "" end end def rev_name(name) if name =~ /,/ name, initial = name.split(/,\s+/) name = "#{initial} #{name}" end return name end end end bio-1.4.3.0001/lib/bio/appl/0000755000004100000410000000000012200110570015136 5ustar www-datawww-databio-1.4.3.0001/lib/bio/appl/sosui/0000755000004100000410000000000012200110570016300 5ustar www-datawww-databio-1.4.3.0001/lib/bio/appl/sosui/report.rb0000644000004100000410000000436512200110570020150 0ustar www-datawww-data# # = bio/appl/sosui/report.rb - SOSUI report class # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # # == Example # # == References # # * http://bp.nuap.nagoya-u.ac.jp/sosui/ # * http://bp.nuap.nagoya-u.ac.jp/sosui/sosui_submit.html # module Bio class SOSUI # = SOSUI output report parsing class # # == References # * http://bp.nuap.nagoya-u.ac.jp/sosui/ # * http://bp.nuap.nagoya-u.ac.jp/sosui/sosui_submit.html class Report # Delimiter DELIMITER = "\n>" RS = DELIMITER # Query entry_id attr_reader :entry_id # Returns the prediction result whether "MEMBRANE PROTEIN" or # "SOLUBLE PROTEIN". attr_reader :prediction # Transmembrane helixes ary attr_reader :tmhs # Parser for SOSUI output report. def initialize(output_report) entry = output_report.split(/\n/) @entry_id = entry[0].strip.sub(/^>/,'') @prediction = entry[1].strip @tms = 0 @tmhs = [] parse_tmh(entry) if /MEMBRANE/ =~ @prediction end private # Parser for TMH lines. def parse_tmh(entry) entry.each do |line| if /NUMBER OF TM HELIX = (\d+)/ =~ line @tms = $1 elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line tmh = $1.to_i range = Range.new($2.to_i, $3.to_i) grade = $4 seq = $5 @tmhs.push(TMH.new(range, grade, seq)) end end end # = Bio::SOSUI::Report::TMH # Container class for transmembrane helix information. # # TM 1 31- 53 SECONDARY HIRMTFLRKVYSILSLQVLLTTV class TMH # Returns aRng of transmembrane helix attr_reader :range # Retruns ``PRIMARY'' or ``SECONDARY'' of helix. attr_reader :grade # Returns the sequence. of transmembrane helix. attr_reader :sequence # Sets values. def initialize(range, grade, sequence) @range = range @grade = grade @sequence = sequence end end end # class Report end # class SOSUI end # module Bio bio-1.4.3.0001/lib/bio/appl/psort.rb0000644000004100000410000003006512200110570016636 0ustar www-datawww-datamodule Bio # # = bio/appl/psort.rb - PSORT, protein sorting site prediction systems # # Copyright:: Copyright (C) 2003-2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # require 'bio/appl/psort/report' autoload :FastaFormat, 'bio/db/fasta' autoload :Command, 'bio/command' require 'cgi' require 'uri' # == A client for PSORT WWW Server # # A client for PSORT WWW Server for predicting protein subcellular # localization. # # PSORT family members, # 1. PSORT # 2. PSORT II # 3. iPSORT # 4. PSORT-B http://psort.org # 5. WoLF-PSORT # # == References # # * http://psort.hgc.jp/ # class PSORT # a Hash for PSORT official hosts: # Key value (host) # ------- ----------------------- # IMSUT psort.ims.u-tokyo.ac.jp # Okazaki psort.nibb.ac.jp # Peking srs.pku.edu.cn:8088 ServerURI = { :IMSUT => { :PSORT1 => URI.parse("http://psort.hgc.jp/cgi-bin/okumura.pl"), :PSORT2 => URI.parse("http://psort.hgc.jp/cgi-bin/runpsort.pl") }, :Okazaki => { :PSORT1 => URI.parse("http://psort.nibb.ac.jp/cgi-bin/okumura.pl"), :PSORT2 => URI.parse("http://psort.nibb.ac.jp/cgi-bin/runpsort.pl") }, :Peking => { :PSORT1 => URI.parse("http:///src.pku.edu.cn:8088/cgi-bin/okumura.pl"), :PSORT2 => URI.parse("http://src.pku.edu.cn:8088/cgi-bin/runpsort.pl") }, } # = Generic CGI client class # A generic CGI client class for Bio::PSORT::* classes. # The class provides an interface for CGI argument processing and output # report parsing. # # == Example # # class NewClient < CGIDriver # def initialize(host, path) # super(host, path) # end # end # private # def make_args(query) # # ... # end # def parse_report(output) # # ... # end # class CGIDriver # CGI query argument in Hash ({key => value, ...}). attr_accessor :args # CGI output raw text attr_reader :report # Sets remote host name and cgi path or uri. # # == Examples # # CGIDriver.new("localhost", "/cgi-bin/psort_www.pl") # # CGIDriver.new("http://localhost/cgi-bin/psort_www.pl") # # CGIDriver.new(URI.parse("http://localhost/cgi-bin/psort_www.pl")) # def initialize(host = '', path = '') case host.to_s when /^http:/ uri = host.to_s else uri = 'http://' + host + '/' + path end @uri = URI.parse(uri) @args = {} @report = '' end # Executes a CGI ``query'' and returns aReport def exec(query) data = make_args(query) begin result = nil Bio::Command.start_http(@uri.host) {|http| result = http.post(@uri.path, data) } @report = result.body output = parse_report(@report) end return output end private # Bio::CGIDriver#make_args. An API skelton. def make_args(args_hash) # The routin should be provided in the inherited class end # Bio::CGIDriver#parse_report. An API skelton. def parse_report(result_body) # The routin should be provided in the inherited class end # Erases HTML tags def erase_html_tags(str) return str.gsub(/<\S.*?>/, '') end # Returns CGI argument text in String (key=value&) from a Hash ({key=>value}). def args_join(hash, delim = '&') tmp = [] hash.each do |key, val| tmp << CGI.escape(key.to_s) + '=' + CGI.escape(val.to_s) end return tmp.join(delim) # not ';' but '&' in the psort cgi script. end end # class CGIDriver # = Bio::PSORT::PSORT1 # # Bio::PSORT::PSORT1 is a wapper class for the original PSORT program. # # == Example # # serv = Bio::PSORT::PSORT1.imsut # serv.title = 'Query_title_splited_by_white space' # serv.exec(seq, false) # seq.class => String # serv.exec(seq) # report = serv.exec(Bio::FastaFormat.new(seq)) # report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # # == References # # 1. Nakai, K. and Kanehisa, M., A knowledge base for predicting protein # localization sites in eukaryotic cells, Genomics 14, 897-911 (1992). # [PMID:1478671] # class PSORT1 autoload :Report, 'bio/appl/psort/report' # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) # connecting to the IMSUT server. def self.imsut self.new(Remote.new(ServerURI[:IMSUT][:PSORT1])) end # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) # connecting to the NIBB server. def self.okazaki self.new(Remote.new(ServerURI[:Okazaki][:PSORT1])) end # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) # connecting to the Peking server. def self.peking self.new(Remote.new(ServerURI[:Peking][:PSORT1])) end # Sets a cgi client (Bio::PSORT::PSORT1::Remote). # def initialize(driver, origin = 'yeast', title = 'MYSEQ') @serv = driver @origin = origin # Gram-positive bacterium, Gram-negative bacterium, # yeast, aminal, plant @title = title @sequence = '' end # An accessor of the origin argument. Default setting is "yeast". # Usable values: # 1. Gram-positive bacterium # 2. Gram-negative bacterium # 3. yeast # 4. animal # 5. plant attr_accessor :origin # An accessor of the query sequence argument. attr_accessor :sequence # An accessor of the title argument. Default setting is 'MYSEQ'. # The value is automatically setted if you use a query in # Bio::FastaFormat. attr_accessor :title # Executes the query (faa) and returns an Bio::PSORT::PSORT1::Report. # # The ``faa'' argument is acceptable a sequence both in String and in # Bio::FastaFormat. # # If you set the second argument is ``parsing = false'', # returns ourput text without any parsing. def exec(faa, parsing = true) if faa.class == Bio::FastaFormat @title = faa.entry_id if @title == 'MYSEQ' @sequence = faa.seq @serv.args = {'title' => @title, 'origin' => @origin} @serv.parsing = parsing return @serv.exec(sequence) else self.exec(Bio::FastaFormat.new(faa), parsing) end end # = Bio::PSORT::PSORT1::Remote # PSORT1 specific CGIDriver. class Remote < CGIDriver # Accessor for Bio::PSORT::PSORT1::Remote#origin to contein target domain. # Taget domains: # 1. Gram-positive bacterium # 2. Gram-negative bacterium # 3. yeast # 4. animal # 5. plant attr_accessor :origin # Accessor for Bio::POSRT::PSORT1#sequence to contein the query sequence. attr_accessor :title # Accessor for Bio::PSORT::PSORT1#title to contain the query title. attr_accessor :parsing # Sets remote ``host'' and cgi ``path''. def initialize(host, path = nil, title = 'MYSEQ', origin = 'yeast') @title = title @origin = origin @parsing = true super(host, path) end private # Returns parsed CGI argument. # An API implementation. def make_args(query) @args.update({'sequence' => query}) return args_join(@args) end # Returns parsed output report. # An API implementation. def parse_report(str) str = erase_html_tags(str) str = Bio::PSORT::PSORT1::Report.parser(str) if @parsing return str end end # Class Remote end # class PSORT1 # = Bio::PSORT::PSORT2 # Bio::PSORT::PSORT2 is a wapper class for the original PSORT program. # # == Example # # serv = Bio::PSORT::PSORT2.imsut # serv.title = 'Query_title_splited_by_white space' # serv.exec(seq, false) # seq.class => String # serv.exec(seq) # report = serv.exec(Bio::FastaFormat.new(seq)) # report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # # == References # 1. Nakai, K. and Horton, P., PSORT: a program for detecting the sorting # signals of proteins and predicting their subcellular localization, # Trends Biochem. Sci, 24(1) 34-35 (1999). # [PMID:10087920] class PSORT2 autoload :Report, 'bio/appl/psort/report' # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote). # # PSORT official hosts: # key host path # ------- ----------------------- -------------------- --------- # IMSUT psort.ims.u-tokyo.ac.jp /cgi-bin/runpsort.pl (default) # Okazaki psort.nibb.ac.jp /cgi-bin/runpsort.pl # Peking srs.pku.edu.cn:8088 /cgi-bin/runpsort.pl def self.remote(host, path = nil) self.new(Remote.new(host, path)) end # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote) # connecting to the IMSUT server. def self.imsut self.remote(ServerURI[:IMSUT][:PSORT2]) end # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote) # connecting to the NIBB server. def self.okazaki self.remote(ServerURI[:Okazaki][:PSORT2]) end # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote) # connecting to the Peking server. def self.peking self.remote(ServerURI[:Peking][:PSORT2]) end # An accessor of the origin argument. # Default setting is ``yeast''. attr_accessor :origin # An accessor of the title argument. Default setting is ``QUERY''. # The value is automatically setted if you use a query in # Bio::FastaFormat. attr_accessor :title # Sets a server CGI Driver (Bio::PSORT::PSORT2::Remote). def initialize(driver, origin = 'yeast') @serv = driver @origin = origin @title = '' end # Executes PSORT II prediction and returns Report object # (Bio::PSORT::PSORT2::Report) if parsing = true. # Returns PSORT II report in text if parsing = false. def exec(faa, parsing = true) if faa.class == Bio::FastaFormat @title = faa.entry_id if @title == nil @sequence = faa.seq @serv.args = {'origin' => @origin, 'title' => @title} @serv.parsing = parsing return @serv.exec(@sequence) else self.exec(Bio::FastaFormat.new(faa), parsing) end end # = Bio::PSORT::PSORT2::Remote # PSORT2 specific CGIDriver class Remote < CGIDriver # Sets remote ``host'' and cgi ``path''. def initialize(host, path) @origin = 'yeast' super(host, path) @parsing = true end # An accessor of the origin argument. # Default setting is ``yeast''. attr_accessor :origin # An accessor of the output parsing. # Default setting is ``true''. attr_accessor :parsing private # Returns parsed CGI argument. # An API implementation. def make_args(query) @args.update({'sequence' => query}) return args_join(@args) end # Returns parsed output report. # An API implementation. def parse_report(str) str = str.gsub(/\n
    /i, Report::BOUNDARY) str = erase_html_tags(str) str = Bio::PSORT::PSORT2::Report.parser(str, self.args['title']) if @parsing return str end end # class Remote end # class PSORT2 class IPSORT end # class IPSORT class PSORTB end # class PSORTB class WoLF_PSORT end # class PSORTB end # class PSORT end # module Bio bio-1.4.3.0001/lib/bio/appl/hmmer.rb0000644000004100000410000000410712200110570016575 0ustar www-datawww-data# # = bio/appl/hmmer.rb - HMMER wrapper # # Copyright:: Copyright (C) 2002 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # require 'bio/command' require 'shellwords' module Bio # == Description # # A wapper for HMMER programs (hmmsearch or hmmpfam). # # === Examples # # require 'bio' # program = 'hmmsearch' # or 'hmmpfam' # hmmfile = 'test.hmm' # seqfile = 'test.faa' # # factory = Bio::HMMER.new(program, hmmfile, seqfile) # report = factory.query # report.class # => Bio::HMMER::Report # # === References # # * HMMER # http://hmmer.wustl.edu/ # class HMMER autoload :Report, 'bio/appl/hmmer/report' # Prgrams name. (hmmsearch or hmmpfam). attr_accessor :program # Name of hmmfile. attr_accessor :hmmfile # Name of seqfile. attr_accessor :seqfile # Command line options. attr_accessor :options # Shows the raw output from the hmmer search. attr_reader :output # Sets a program name, a profile hmm file name, a query sequence file name # and options in string. # # Program names: hmmsearch, hmmpfam # def initialize(program, hmmfile, seqfile, options = []) @program = program @hmmfile = hmmfile @seqfile = seqfile @output = '' begin @options = options.to_ary rescue NameError #NoMethodError # backward compatibility @options = Shellwords.shellwords(options) end end # Gets options by String. # backward compatibility. def option Bio::Command.make_command_line(@options) end # Sets options by String. # backward compatibility. def option=(str) @options = Shellwords.shellwords(str) end # Executes the hmmer search and returns the report # (Bio::HMMER::Report object). def query cmd = [ @program, *@options ] cmd.concat([ @hmmfile, @seqfile ]) report = nil @output = Bio::Command.query_command(cmd, nil) report = parse_result(@output) return report end private def parse_result(data) Report.new(data) end end # class HMMER end # module Bio bio-1.4.3.0001/lib/bio/appl/pts1.rb0000644000004100000410000001410412200110570016352 0ustar www-datawww-datamodule Bio # # = bio/appl/pts1.rb - A web service client of PTS1, predicting for the # peroxisomal targeting signal type 1. # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: The Ruby License # # $Id:$ # require 'uri' require 'net/http' require 'bio/db/fasta' require 'bio/command' # = Bio::PTS1 - A web service client class for PTS1 predictor. # # == Peroxisomal targeting signal type 1 (PTS1) predictor # # Bio::PTS1 class is a client of the PTS1 predictor. # # == Examples # # require 'bio' # sp = Bio::SPTR.new(Bio::Fetch.query("sp", "p53_human")) # faa = sp.seq.to_fasta(sp.entry_id) # pts1 = Bio::PTS1.new # report = pts1.exec_remote(faa) # report.output #=> "\nPTS1 Prediction Server ..." # report.prediction #=> "Not targeted" # report.cterm #=> "KLMFKTEGPDSD" # report.score #=> "-79.881" # report.fp #=> "67.79%" # report.sppta #=> "-1.110" # report.spptna #=> "-41.937" # report.profile #=> "-36.834" # # == References # # * The PTS1 predictor # http://mendel.imp.ac.at/pts1/ # # * Neuberger G, Maurer-Stroh S, Eisenhaber B, Hartig A, Eisenhaber F. # Motif refinement of the peroxisomal targeting signal 1 and evaluation # of taxon-specific differences. # J Mol Biol. 2003 May 2;328(3):567-79. PMID: 12706717 # # * Neuberger G, Maurer-Stroh S, Eisenhaber B, Hartig A, Eisenhaber F. # Prediction of peroxisomal targeting signal 1 containing proteins from # amino acid sequence. # J Mol Biol. 2003 May 2;328(3):581-92. PMID: 12706718 # class PTS1 # Organism specific parameter value: function names. FUNCTION = { 'METAZOA-specific' => 1, 'FUNGI-specific' => 2, 'GENERAL' => 3, } # Output report. attr_reader :output # Short-cut for Bio::PTS1.new(Bio::PTS1::FUNCTION['METAZOA-specific']) def self.new_with_metazoa_function self.new('METAZOA-specific') end # Short-cut for Bio::PTS1.new(Bio::PTS1::FUNCTION['FUNGI-specific']) def self.new_with_fungi_function self.new('FUNGI-specific') end # Short-cut for Bio::PTS1.new(Bio::PTS1::FUNCTION['GENERAL']) def self.new_with_general_function self.new('GENERAL') end # Constructs Bio::PTS1 web service client. # # == Examples # # serv_default_metazoa_specific = Bio::PTS1.new # serv_general_function = Bio::PTS1.new('GENERAL') # serv_fungi_specific = Bio::PTS1.new(2) # See Bio::PTS1::FUNCTION. # def initialize(func = 'METAZOA-specific') @uri = "http://mendel.imp.ac.at/jspcgi/cgi-bin/pts1/pts1.cgi" @output = nil @function = function(func) end # Sets and shows the function parameter. # # Organism specific parameter: function names (Bio::PTS1::FUNTION.keys). # # # == Examples # # # sets function name parameter. # serv = Bio::PTS1.new # serv.function('METAZOA-specific') # # # shows function name parameter. # serv.function #=> "METAZOA-specific" # def function(func = nil) return @function.keys.join('') if func == nil if FUNCTION.values.include?(func) @function = Hash[*FUNCTION.find {|x| x[1] == func}] elsif FUNCTION[func] @function = {func => FUNCTION[func]} else raise ArgumentError, "Invalid argument: #{func}", "Available function names: #{FUNCTION.keys.inspect}" end @function end # Executes the query request and returns result output in Bio::PTS1::Report. # The query argument is available both aSting in fasta format text and # aBio::FastaFormat. # # == Examples # # require 'bio' # pts1 = Bio::PTS1.new # pts1.exec(">title\nKLMFKTEGPDSD") # # pts1.exec(Bio::FastaFormat.new(">title\nKLMFKTEGPDSD")) # def exec(query) seq = set_sequence_in_fastaformat(query) @form_data = {'function' => @function.values.join(''), 'sequence' => seq.seq, 'name' => seq.definition } result = Bio::Command.post_form(@uri, @form_data) @output = Report.new(result.body) return @output end private # Sets query sequence in Fasta Format if any. def set_sequence_in_fastaformat(query) if query.class == Bio::FastaFormat return query else return Bio::FastaFormat.new(query) end end # = Parser for the PTS1 prediction Report (in HTML). # # class Report # Query sequence name. attr_reader :entry_id # Amino acids subsequence at C-terminal region. attr_reader :cterm # Score attr_reader :score # Profile attr_reader :profile # S_ppt (non accessibility) attr_reader :spptna # S_ppt (accessibility) attr_reader :sppta # False positive probability attr_reader :fp # Prediction ("Targeted", "Twilight zone" and "Not targeted") attr_reader :prediction # Raw output attr_reader :output # Parsing PTS1 HTML report. # # == Example # # report = Bio::PTS1::Report.new(str) # report.cterm # def initialize(str) @cterm = '' @score = 0 @profile = 0 @spptna = 0 @sppta = 0 @fp = 0 @prediction = 0 if /PTS1 query prediction/m =~ str @output = str parse else raise end end private def parse @output.each_line do |line| case line when /Name<\/td><td>(\S.+)<\/td><\/tr>/ @entry_id = $1 when /C-terminus<\/td><td>(\w+)<\/td>/ @cterm = $1 when /Score<\/b><td><b>(-?\d.+?)<\/b><\/td><\/tr>/ @score = $1 when /Profile<\/i><\/td><td>(.+?)<\/td>/ @profile = $1 when /S_ppt \(non-accessibility\)<\/i><\/td><td>(.+?)<\/td>/ @spptna = $1 when /S_ppt \(accessibility\)<\/i><\/td><td>(.+?)<\/td>/ @sppta = $1 when /P\(false positive\)<\/i><\/td><td>(.+?)<\/td>/ @fp = $1 when /Prediction classification<\/i><\/td><td>(\w.+?)<\/td>/ @prediction = $1 else end end end end # class Report end # class PTS1 end # module Bio ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/sim4/�������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016012� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/sim4/report.rb����������������������������������������������������������0000644�0000041�0000041�00000042410�12200110570�017653� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/sim4/report.rb - sim4 result parser # # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # # The sim4 report parser classes. # # == References # # * Florea, L., et al., A Computer program for aligning a cDNA sequence # with a genomic DNA sequence, Genome Research, 8, 967--974, 1998. # http://www.genome.org/cgi/content/abstract/8/9/967 # module Bio class Sim4 # Bio::Sim4::Report is the sim4 report parser class. # Its object may contain some Bio::Sim4::Report::Hit objects. class Report #< DB #-- # format: A=0, A=3, or A=4 #++ # Delimiter of each entry. Bio::FlatFile uses it. # In Bio::Sim4::Report, it it nil (1 entry 1 file). DELIMITER = RS = nil # 1 entry 1 file # Creates new Bio::Sim4::Report object from String. # You can use Bio::FlatFile to read a file. # Currently, format A=0, A=3, and A=4 are supported. # (A=1, A=2, A=5 are NOT supported yet.) # # Note that 'seq1' in sim4 result is always regarded as 'query', # and 'seq2' is always regarded as 'subject'(target, hit). # # Note that first 'seq1' informations are used for # Bio::Sim4::Report#query_id, #query_def, #query_len, and #seq1 methods. def initialize(text) @hits = [] @all_hits = [] overrun = '' text.each_line("\n\nseq1 = ") do |str| str = str.sub(/\A\s+/, '') str.sub!(/\n(^seq1 \= .*)/m, "\n") # remove trailing hits for sure tmp = $1.to_s hit = Hit.new(overrun + str) overrun = tmp unless hit.instance_eval { @data.empty? } then @hits << hit end @all_hits << hit end @seq1 = @all_hits[0].seq1 end # Returns hits of the entry. # Unlike Bio::Sim4::Report#all_hits, it returns # hits which have alignments. # Returns an Array of Bio::Sim4::Report::Hit objects. attr_reader :hits # Returns all hits of the entry. # Unlike Bio::Sim4::Report#hits, it returns # results of all trials of pairwise alignment. # This would be a Bio::Sim4 specific method. # Returns an Array of Bio::Sim4::Report::Hit objects. attr_reader :all_hits # Returns sequence informations of 'seq1'. # Returns a Bio::Sim4::Report::SeqDesc object. # This would be a Bio::Sim4 specific method. attr_reader :seq1 # Bio::Sim4::Report::SeqDesc stores sequence information of # query or subject of sim4 report. class SeqDesc #-- # description/definitions of a sequence #++ # Creates a new object. # It is designed to be called internally from Bio::Sim4::Report object. # Users shall not use it directly. def initialize(seqid, seqdef, len, filename) @entry_id = seqid @definition = seqdef @len = len @filename = filename end # identifier of the sequence attr_reader :entry_id # definition of the sequence attr_reader :definition # sequence length of the sequence attr_reader :len # filename of the sequence attr_reader :filename # Parses part of sim4 result text and creates new SeqDesc object. # It is designed to be called internally from Bio::Sim4::Report object. # Users shall not use it directly. def self.parse(str, str2 = nil) /^seq[12] \= (.*)(?: \((.*)\))?\,\s*(\d+)\s*bp\s*$/ =~ str seqid = $2 filename = $1 len = $3.to_i if str2 then seqdef = str2.sub(/^\>\s*/, '') seqid =seqdef.split(/\s+/, 2)[0] unless seqid else seqdef = (seqid or filename) seqid = filename unless seqid end self.new(seqid, seqdef, len, filename) end end #class SeqDesc # Sequence segment pair of the sim4 result. # Similar to Bio::Blast::Report::HSP but lacks many methods. # For mRNA-genome mapping programs, # unlike other homology search programs, # the class is used not only for exons but also for introns. # (Note that intron data would not be available according to run-time # options of the program.) class SegmentPair #-- # segment pair (like Bio::BLAST::*::Report::HSP) #++ # Creates a new SegmentPair object. # It is designed to be called internally from # Bio::Sim4::Report::Hit object. # Users shall not use it directly. def initialize(seq1, seq2, midline = nil, percent_identity = nil, direction = nil) @seq1 = seq1 @seq2 = seq2 @midline = midline @percent_identity = percent_identity @direction = direction end # Returns segment informations of 'seq1'. # Returns a Bio::Sim4::Report::Segment object. # These would be Bio::Sim4 specific methods. attr_reader :seq1 # Returns segment informations of 'seq2'. # Returns a Bio::Sim4::Report::Segment object. # These would be Bio::Sim4 specific methods. attr_reader :seq2 # Returns the "midline" of the segment pair. # Returns nil if no alignment data are available. attr_reader :midline # Returns percent identity of the segment pair. attr_reader :percent_identity # Returns directions of mapping. # Maybe one of "->", "<-", "==" or "" or nil. # This would be a Bio::Sim4 specific method. attr_reader :direction # Parses part of sim4 result text and creates a new SegmentPair object. # It is designed to be called internally from # Bio::Sim4::Report::Hit class. # Users shall not use it directly. def self.parse(str, aln) /^(\d+)\-(\d+)\s*\((\d+)\-(\d+)\)\s*([\d\.]+)\%\s*([\-\<\>\=]*)/ =~ str self.new(Segment.new($1, $2, aln[0]), Segment.new($3, $4, aln[2]), aln[1], $5, $6) end # Parses part of sim4 result text and creates a new SegmentPair # object when the seq1 is a intron. # It is designed to be called internally from # Bio::Sim4::Report::Hit class. # Users shall not use it directly. def self.seq1_intron(prev_e, e, aln) self.new(Segment.new(prev_e.seq1.to+1, e.seq1.from-1, aln[0]), Segment.new(nil, nil, aln[2]), aln[1]) end # Parses part of sim4 result text and creates a new SegmentPair # object when seq2 is a intron. # It is designed to be called internally from # Bio::Sim4::Report::Hit class. # Users shall not use it directly. def self.seq2_intron(prev_e, e, aln) self.new(Segment.new(nil, nil, aln[0]), Segment.new(prev_e.seq2.to+1, e.seq2.from-1, aln[2]), aln[1]) end # Parses part of sim4 result text and creates a new SegmentPair # object for regions which can not be aligned correctly. # It is designed to be called internally from # Bio::Sim4::Report::Hit class. # Users shall not use it directly. def self.both_intron(prev_e, e, aln) self.new(Segment.new(prev_e.seq1.to+1, e.seq1.from-1, aln[0]), Segment.new(prev_e.seq2.to+1, e.seq2.from-1, aln[2]), aln[1]) end #-- # Bio::BLAST::*::Report::Hsp compatible methods # Methods already defined: midline, percent_identity #++ # start position of the query (the first position is 1) def query_from; @seq1.from; end # end position of the query (including its position) def query_to; @seq1.to; end # query sequence (with gaps) of the alignment of the segment pair. def qseq; @seq1.seq; end # start position of the hit(target) (the first position is 1) def hit_from; @seq2.from; end # end position of the hit(target) (including its position) def hit_to; @seq2.to; end # hit(target) sequence (with gaps) of the alignment # of the segment pair. def hseq; @seq2.seq; end # Returns alignment length of the segment pair. # Returns nil if no alignment data are available. def align_len (@midline and @seq1.seq and @seq2.seq) ? @midline.length : nil end end #class SegmentPair # Segment informations of a segment pair. class Segment #-- # the segment of a sequence #++ # Creates a new Segment object. # It is designed to be called internally from # Bio::Sim4::Report::SegmentPair class. # Users shall not use it directly. def initialize(pos_st, pos_ed, seq = nil) @from = pos_st.to_i @to = pos_ed.to_i @seq = seq end # start position of the segment (the first position is 1) attr_reader :from # end position of the segment (including its position) attr_reader :to # sequence (with gaps) of the segment attr_reader :seq end #class Segment # Hit object of the sim4 result. # Similar to Bio::Blast::Report::Hit but lacks many methods. class Hit # Parses part of sim4 result text and creates a new Hit object. # It is designed to be called internally from Bio::Sim4::Report class. # Users shall not use it directly. def initialize(str) @data = str.split(/\n(?:\r?\n)+/) parse_seqdesc end # Parses sequence descriptions. def parse_seqdesc # seq1: query, seq2: target(hit) a0 = @data.shift.split(/\r?\n/) if @data[0].to_s =~ /^\>/ then a1 = @data.shift.split(/\r?\n/) else a1 = [] end @seq1 = SeqDesc.parse(a0[0], a1[0]) @seq2 = SeqDesc.parse(a0[1], a1[1]) if @data[0].to_s.sub!(/\A\(complement\)\s*$/, '') then @complement = true @data.shift if @data[0].strip.empty? else @complement = nil end end private :parse_seqdesc # Returns sequence informations of 'seq1'. # Returns a Bio::Sim4::Report::SeqDesc object. # This would be Bio::Sim4 specific method. attr_reader :seq1 # Returns sequence informations of 'seq2'. # Returns a Bio::Sim4::Report::SeqDesc object. # This would be Bio::Sim4 specific method. attr_reader :seq2 # Returns true if the hit reports '-'(complemental) strand # search result. # Otherwise, return false or nil. # This would be a Bio::Sim4 specific method. def complement? @complement end # Parses segment pair. def parse_segmentpairs aln = (self.align ? self.align.dup : []) exo = [] #exons itr = [] #introns sgp = [] #segmentpairs prev_e = nil return unless @data[0] @data[0].split(/\r?\n/).each do |str| ai = (prev_e ? aln.shift : nil) a = (aln.shift or []) e = SegmentPair.parse(str, a) exo << e if ai then # intron data in alignment if ai[1].strip.empty? then i = SegmentPair.both_intron(prev_e, e, ai) elsif ai[2].strip.empty? then i = SegmentPair.seq1_intron(prev_e, e, ai) else i = SegmentPair.seq2_intron(prev_e, e, ai) end itr << i sgp << i end sgp << e prev_e = e end @exons = exo @introns = itr @segmentpairs = sgp end private :parse_segmentpairs # Parses alignment. def parse_align s1 = []; ml = []; s2 = [] blocks = [] blocks.push [ s1, ml, s2 ] dat = @data[1..-1] return unless dat dat.each do |str| a = str.split(/\r?\n/) ruler = a.shift # First line, for example, # " 50 . : . : . : . : . :" # When the number is 0, forced to be a separated block if /^\s*(\d+)/ =~ ruler and $1.to_i == 0 and !ml.empty? then s1 = []; ml = []; s2 = [] blocks.push [ s1, ml, s2 ] end # For example, # " 190 GAGTCATGCATGATACAA CTTATATATGTACTTAGCGGCA" # " ||||||||||||||||||<<<...<<<-||-|||||||||||||||||||" # " 400 GAGTCATGCATGATACAACTT...AGCGCT ATATATGTACTTAGCGGCA" if /^(\s*\d+\s)(.+)$/ =~ a[0] then range = ($1.length)..($1.length + $2.chomp.length - 1) a.collect! { |x| x[range] } s1 << a.shift ml << a.shift s2 << a.shift end end #each alx_all = [] blocks.each do |ary| s1, ml, s2 = ary alx = ml.join('').split(/([\<\>]+\.+[\<\>]+)/) seq1 = s1.join(''); seq2 = s2.join('') i = 0 alx.collect! do |x| len = x.length y = [ seq1[i, len], x, seq2[i, len] ] i += len y end # adds virtual intron information if necessary alx_all.push([ '', '', '' ]) unless alx_all.empty? alx_all.concat alx end @align = alx_all end private :parse_align # Returns exons of the hit. # Each exon is a Bio::Sim4::Report::SegmentPair object. def exons unless defined?(@exons); parse_segmentpairs; end @exons end # Returns segment pairs (exons and introns) of the hit. # Each segment pair is a Bio::Sim4::Report::SegmentPair object. # Returns an array of Bio::Sim4::Report::SegmentPair objects. # (Note that intron data is not always available # according to run-time options of the program.) def segmentpairs unless defined?(@segmentpairs); parse_segmentpairs; end @segmentpairs end # Returns introns of the hit. # Some of them would contain untranscribed regions. # Returns an array of Bio::Sim4::Report::SegmentPair objects. # (Note that intron data is not always available # according to run-time options of the program.) def introns unless defined?(@introns); parse_segmentpairs; end @introns end # Returns alignments. # Returns an Array of arrays. # Each array contains sequence of seq1, midline, sequence of seq2, # respectively. # This would be a Bio::Sim4 specific method. def align unless defined?(@align); parse_align; end @align end #-- # Bio::BLAST::*::Report::Hit compatible methods #++ # Length of the query sequence. # Same as Bio::Sim4::Report#query_len. def query_len; seq1.len; end # Identifier of the query sequence. # Same as Bio::Sim4::Report#query_id. def query_id; seq1.entry_id; end # Definition of the query sequence # Same as Bio::Sim4::Report#query_def. def query_def; seq1.definition; end # length of the hit(target) sequence def target_len; seq2.len; end # Identifier of the hit(target) sequence def target_id; seq2.entry_id; end # Definition of the hit(target) sequence def target_def; seq2.definition; end alias hit_id target_id alias len target_len alias definition target_def alias hsps exons # Iterates over each exon of the hit. # Yields a Bio::Sim4::Report::SegmentPair object. def each(&x) #:yields: segmentpair exons.each(&x) end end #class Hit #-- #Bio::BLAST::*::Report compatible methods #++ # Returns number of hits. # Same as hits.size. def num_hits; @hits.size; end # Iterates over each hits of the sim4 result. # Same as hits.each. # Yields a Bio::Sim4::Report::Hit object. def each_hit(&x) #:yields: hit @hits.each(&x) end alias each each_hit # Returns the definition of query sequence. # The value will be filename or (first word of) sequence definition # according to sim4 run-time options. def query_def; @seq1.definition; end # Returns the identifier of query sequence. # The value will be filename or (first word of) sequence definition # according to sim4 run-time options. def query_id; @seq1.entry_id; end # Returns the length of query sequence. def query_len; @seq1.len; end end #class Report end #class Sim4 end #module Bio =begin = Bio::Sim4::Report = References * ((<URL:http://www.genome.org/cgi/content/abstract/8/9/967>)) Florea, L., et al., A Computer program for aligning a cDNA sequence with a genomic DNA sequence, Genome Research, 8, 967--974, 1998. =end ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast.rb����������������������������������������������������������������0000644�0000041�0000041�00000036171�12200110570�016600� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast.rb - BLAST wrapper # # Copyright:: Copyright (C) 2001,2008 Mitsuteru C. Nakao <n@bioruby.org> # Copyright:: Copyright (C) 2002,2003 Toshiaki Katayama <k@bioruby.org> # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk> # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # require 'bio/command' require 'shellwords' require 'stringio' require 'bio/io/flatfile' module Bio # == Description # # The Bio::Blast class contains methods for running local or remote BLAST # searches, as well as for parsing of the output of such BLASTs (i.e. the # BLAST reports). For more information on similarity searches and the BLAST # program, see http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html. # # == Usage # # require 'bio' # # # To run an actual BLAST analysis: # # 1. create a BLAST factory # remote_blast_factory = Bio::Blast.remote('blastp', 'swissprot', # '-e 0.0001', 'genomenet') # #or: # local_blast_factory = Bio::Blast.local('blastn','/path/to/db') # # # 2. run the actual BLAST by querying the factory # report = remote_blast_factory.query(sequence_text) # # # Then, to parse the report, see Bio::Blast::Report # # == See also # # * Bio::Blast::Report # * Bio::Blast::Report::Hit # * Bio::Blast::Report::Hsp # # == References # # * http://www.ncbi.nlm.nih.gov/blast/ # * http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html # * http://blast.genome.jp/ideas/ideas.html#blast # class Blast autoload :Fastacmd, 'bio/io/fastacmd' autoload :Report, 'bio/appl/blast/report' autoload :Report_tab, 'bio/appl/blast/report' autoload :Default, 'bio/appl/blast/format0' autoload :WU, 'bio/appl/blast/wublast' autoload :Bl2seq, 'bio/appl/bl2seq/report' autoload :RPSBlast, 'bio/appl/blast/rpsblast' autoload :NCBIOptions, 'bio/appl/blast/ncbioptions' autoload :Remote, 'bio/appl/blast/remote' # This is a shortcut for Bio::Blast.new: # Bio::Blast.local(program, database, options) # is equivalent to # Bio::Blast.new(program, database, options, 'local') # --- # *Arguments*: # * _program_ (required): 'blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx' # * _db_ (required): name of the local database # * _options_: blastall options \ # (see http://www.genome.jp/dbget-bin/show_man?blast2) # * _blastall_: full path to blastall program (e.g. "/opt/bin/blastall"; DEFAULT: "blastall") # *Returns*:: Bio::Blast factory object def self.local(program, db, options = '', blastall = nil) f = self.new(program, db, options, 'local') if blastall then f.blastall = blastall end f end # Bio::Blast.remote does exactly the same as Bio::Blast.new, but sets # the remote server 'genomenet' as its default. # --- # *Arguments*: # * _program_ (required): 'blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx' # * _db_ (required): name of the remote database # * _options_: blastall options \ # (see http://www.genome.jp/dbget-bin/show_man?blast2) # * _server_: server to use (DEFAULT = 'genomenet') # *Returns*:: Bio::Blast factory object def self.remote(program, db, option = '', server = 'genomenet') self.new(program, db, option, server) end # Bio::Blast.report parses given data, # and returns an array of report # (Bio::Blast::Report or Bio::Blast::Default::Report) objects, # or yields each report object when a block is given. # # Supported formats: NCBI default (-m 0), XML (-m 7), tabular (-m 8). # # --- # *Arguments*: # * _input_ (required): input data # * _parser_: type of parser. see Bio::Blast::Report.new # *Returns*:: Undefiend when a block is given. Otherwise, an Array containing report (Bio::Blast::Report or Bio::Blast::Default::Report) objects. def self.reports(input, parser = nil) begin istr = input.to_str rescue NoMethodError istr = nil end if istr then input = StringIO.new(istr) end raise 'unsupported input data type' unless input.respond_to?(:gets) # if proper parser is given, emulates old behavior. case parser when :xmlparser, :rexml ff = Bio::FlatFile.new(Bio::Blast::Report, input) if block_given? then ff.each do |e| yield e end return [] else return ff.to_a end when :tab istr = input.read unless istr rep = Report.new(istr, parser) if block_given? then yield rep return [] else return [ rep ] end end # preparation of the new format autodetection rule if needed if !defined?(@@reports_format_autodetection_rule) or !@@reports_format_autodetection_rule then regrule = Bio::FlatFile::AutoDetect::RuleRegexp blastxml = regrule[ 'Bio::Blast::Report', /\<\!DOCTYPE BlastOutput PUBLIC / ] blast = regrule[ 'Bio::Blast::Default::Report', /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ] tblast = regrule[ 'Bio::Blast::Default::Report_TBlast', /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ] tab = regrule[ 'Bio::Blast::Report_tab', /^([^\t]*\t){11}[^\t]*$/ ] auto = Bio::FlatFile::AutoDetect[ blastxml, blast, tblast, tab ] # sets priorities blastxml.is_prior_to blast blast.is_prior_to tblast tblast.is_prior_to tab # rehash auto.rehash @@report_format_autodetection_rule = auto end # Creates a FlatFile object with dummy class ff = Bio::FlatFile.new(Object, input) ff.dbclass = nil # file format autodetection 3.times do break if ff.eof? or ff.autodetect(31, @@report_format_autodetection_rule) end # If format detection failed, assumed to be tabular (-m 8) ff.dbclass = Bio::Blast::Report_tab unless ff.dbclass if block_given? then ff.each do |entry| yield entry end ret = [] else ret = ff.to_a end ret end #-- # the method Bio::Blast.reports is moved from bio/appl/blast/report.rb. #++ # Note that this is the old implementation of Bio::Blast.reports. # The aim of this method is keeping compatibility for older BLAST # XML documents which might not be parsed by the new # Bio::Blast.reports nor Bio::FlatFile. # (Though we are not sure whether such documents exist or not.) # # Bio::Blast.reports_xml parses given data, # and returns an array of Bio::Blast::Report objects, or # yields each Bio::Blast::Report object when a block is given. # # It can be used only for XML format. # For default (-m 0) format, consider using Bio::FlatFile, or # Bio::Blast.reports. # # --- # *Arguments*: # * _input_ (required): input data # * _parser_: type of parser. see Bio::Blast::Report.new # *Returns*:: Undefiend when a block is given. Otherwise, an Array containing Bio::Blast::Report objects. def self.reports_xml(input, parser = nil) ary = [] input.each_line("</BlastOutput>\n") do |xml| xml.sub!(/[^<]*(<?)/, '\1') # skip before <?xml> tag next if xml.empty? # skip trailing no hits rep = Report.new(xml, parser) if rep.reports then if block_given? rep.reports.each { |r| yield r } else ary.concat rep.reports end else if block_given? yield rep else ary.push rep end end end return ary end # Program name (_-p_ option for blastall): blastp, blastn, blastx, tblastn # or tblastx attr_accessor :program # Database name (_-d_ option for blastall) attr_accessor :db # Options for blastall attr_reader :options # Sets options for blastall def options=(ary) @options = set_options(ary) end # Server to submit the BLASTs to attr_reader :server # Sets server to submit the BLASTs to. # The exec_xxxx method should be defined in Bio::Blast or # Bio::Blast::Remote::Xxxx class. def server=(str) @server = str begin m = Bio::Blast::Remote.const_get(@server.capitalize) rescue NameError m = nil end if m and !(self.is_a?(m)) then # lazy include Bio::Blast::Remote::XXX module self.class.class_eval { include m } end return @server end # Full path for blastall. (default: 'blastall'). attr_accessor :blastall # Substitution matrix for blastall -M attr_accessor :matrix # Filter option for blastall -F (T or F). attr_accessor :filter # Returns a String containing blast execution output in as is the Bio::Blast#format. attr_reader :output # Output report format for blastall -m # # 0, pairwise; 1; 2; 3; 4; 5; 6; 7, XML Blast outpu;, 8, tabular; # 9, tabular with comment lines; 10, ASN text; 11, ASN binery [intege]. attr_accessor :format # attr_writer :parser # to change :xmlparser, :rexml, :tab # Creates a Bio::Blast factory object. # # To run any BLAST searches, a factory has to be created that describes a # certain BLAST pipeline: the program to use, the database to search, any # options and the server to use. E.g. # # blast_factory = Bio::Blast.new('blastn','dbsts', '-e 0.0001 -r 4', 'genomenet') # # --- # *Arguments*: # * _program_ (required): 'blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx' # * _db_ (required): name of the (local or remote) database # * _options_: blastall options \ # (see http://www.genome.jp/dbget-bin/show_man?blast2) # * _server_: server to use (e.g. 'genomenet'; DEFAULT = 'local') # *Returns*:: Bio::Blast factory object def initialize(program, db, opt = [], server = 'local') @program = program @db = db @blastall = 'blastall' @matrix = nil @filter = nil @output = '' @parser = nil @format = nil @options = set_options(opt, program, db) self.server = server end # This method submits a sequence to a BLAST factory, which performs the # actual BLAST. # # # example 1 # seq = Bio::Sequence::NA.new('agggcattgccccggaagatcaagtcgtgctcctg') # report = blast_factory.query(seq) # # # example 2 # str <<END_OF_FASTA # >lcl|MySequence # MPPSAISKISNSTTPQVQSSSAPNLTMLEGKGISVEKSFRVYSEEENQNQHKAKDSLGF # KELEKDAIKNSKQDKKDHKNWLETLYDQAEQKWLQEPKKKLQDLIKNSGDNSRVILKDS # END_OF_FASTA # report = blast_factory.query(str) # # Bug note: When multi-FASTA is given and the format is 7 (XML) or 8 (tab), # it should return an array of Bio::Blast::Report objects, # but it returns a single Bio::Blast::Report object. # This is a known bug and should be fixed in the future. # # --- # *Arguments*: # * _query_ (required): single- or multiple-FASTA formatted sequence(s) # *Returns*:: a Bio::Blast::Report (or Bio::Blast::Default::Report) object when single query is given. When multiple sequences are given as the query, it returns an array of Bio::Blast::Report (or Bio::Blast::Default::Report) objects. If it can not parse result, nil will be returnd. def query(query) case query when Bio::Sequence query = query.output(:fasta) when Bio::Sequence::NA, Bio::Sequence::AA, Bio::Sequence::Generic query = query.to_fasta('query', 70) else query = query.to_s end @output = self.__send__("exec_#{@server}", query) report = parse_result(@output) return report end # Returns options of blastall def option # backward compatibility Bio::Command.make_command_line(options) end # Set options for blastall def option=(str) # backward compatibility self.options = Shellwords.shellwords(str) end private def set_options(opt = nil, program = nil, db = nil) opt = @options unless opt # when opt is a String, splits to an array begin a = opt.to_ary rescue NameError #NoMethodError # backward compatibility a = Shellwords.shellwords(opt) end ncbiopt = NCBIOptions.new(a) if fmt = ncbiopt.get('-m') then @format = fmt.to_i else dummy = Bio::Blast::Report #dummy to load XMLParser or REXML if defined?(XMLParser) or defined?(REXML) @format ||= 7 else @format ||= 8 end end mtrx = ncbiopt.get('-M') @matrix = mtrx if mtrx fltr = ncbiopt.get('-F') @filter = fltr if fltr # special treatment for '-p' if program then @program = program ncbiopt.delete('-p') else program = ncbiopt.get('-p') @program = program if program end # special treatment for '-d' if db then @db = db ncbiopt.delete('-d') else db = ncbiopt.get('-d') @db = db if db end # returns an array of string containing options return ncbiopt.options end # parses result def parse_result(str) if @format.to_i == 0 then ary = Bio::FlatFile.open(Bio::Blast::Default::Report, StringIO.new(str)) { |ff| ff.to_a } case ary.size when 0 return nil when 1 return ary[0] else return ary end else Report.new(str, @parser) end end # returns an array containing NCBI BLAST options def make_command_line_options set_options cmd = [] if @program cmd.concat([ '-p', @program ]) end if @db cmd.concat([ '-d', @db ]) end if @format cmd.concat([ '-m', @format.to_s ]) end if @matrix cmd.concat([ '-M', @matrix ]) end if @filter cmd.concat([ '-F', @filter ]) end ncbiopts = NCBIOptions.new(@options) ncbiopts.make_command_line_options(cmd) end # makes command line. def make_command_line cmd = make_command_line_options cmd.unshift @blastall cmd end # Local execution of blastall def exec_local(query) cmd = make_command_line @output = Bio::Command.query_command(cmd, query) return @output end # This method is obsolete. # # Runs genomenet with '-m 8' option. # Note that the format option is overwritten. def exec_genomenet_tab(query) warn "Bio::Blast#server=\"genomenet_tab\" is deprecated." @format = 8 exec_genomenet(query) end end # class Blast end # module Bio �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/phylip/�����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016443� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/phylip/alignment.rb�����������������������������������������������������0000644�0000041�0000041�00000006564�12200110570�020761� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/phylip/alignment.rb - phylip multiple alignment format parser # # Copyright:: Copyright (C) 2006 # GOTO Naohisa <ng@bioruby.org> # # License:: The Ruby License # # $Id: alignment.rb,v 1.2 2007/04/05 23:35:40 trevor Exp $ # # = About Bio::Phylip::PhylipFormat # # Please refer document of Bio::Phylip::PhylipFormat class. # module Bio module Phylip # This is phylip multiple alignment format parser. # The two formats, interleaved and non-interleaved, are # automatically determined. # class PhylipFormat # create a new object from a string def initialize(str) @data = str.strip.split(/(?:\r\n|\r|\n)/) @first_line = @data.shift @number_of_sequences, @alignment_length = @first_line.to_s.strip.split(/\s+/).collect { |x| x.to_i } end # number of sequences attr_reader :number_of_sequences # alignment length attr_reader :alignment_length # If the alignment format is "interleaved", returns true. # If not, returns false. # It would mistake to determine if the alignment is very short. def interleaved? unless defined? @interleaved_flag then if /\A +/ =~ @data[1].to_s then @interleaved_flag = false else @interleaved_flag = true end end @interleaved_flag end # Gets the alignment. Returns a Bio::Alignment object. def alignment unless defined? @alignment then do_parse a = Bio::Alignment.new (0...@number_of_sequences).each do |i| a.add_seq(@sequences[i], @sequence_names[i]) end @alignment = a end @alignment end private def do_parse if interleaved? then do_parse_interleaved else do_parse_noninterleaved end end def do_parse_interleaved first_block = @data[0, @number_of_sequences] @data[0, @number_of_sequences] = '' @sequence_names = Array.new(@number_of_sequences) { '' } @sequences = Array.new(@number_of_sequences) do ' ' * @alignment_length end first_block.each_with_index do |x, i| n, s = x.split(/ +/, 2) @sequence_names[i] = n @sequences[i].replace(s.gsub(/\s+/, '')) end i = 0 @data.each do |x| if x.strip.length <= 0 then i = 0 else @sequences[i] << x.gsub(/\s+/, '') i = (i + 1) % @number_of_sequences end end @data.clear true end def do_parse_noninterleaved @sequence_names = Array.new(@number_of_sequences) { '' } @sequences = Array.new(@number_of_sequences) do ' ' * @alignment_length end curseq = nil i = 0 @data.each do |x| next if x.strip.length <= 0 if !curseq or curseq.length > @alignment_length or /^\s/ !~ x then p i n, s = x.strip.split(/ +/, 2) @sequence_names[i] = n curseq = @sequences[i] curseq.replace(s.gsub(/\s+/, '')) i += 1 else curseq << x.gsub(/\s+/, '') end end @data.clear true end end #class PhylipFormat end #module Phylip end #module Bio ��������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/phylip/distance_matrix.rb�����������������������������������������������0000644�0000041�0000041�00000005202�12200110570�022145� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/phylip/distance_matrix.rb - phylip distance matrix parser # # Copyright:: Copyright (C) 2006 # GOTO Naohisa <ng@bioruby.org> # # License:: The Ruby License # # $Id: distance_matrix.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $ # # = About Bio::Phylip::DistanceMatrix # # Please refer document of Bio::Phylip::DistanceMatrix class. # require 'matrix' module Bio module Phylip # This is a parser class for phylip distance matrix data # created by dnadist, protdist, or restdist commands. # class DistanceMatrix # creates a new distance matrix object def initialize(str) data = str.strip.split(/(?:\r\n|\r|\n)/) @otus = data.shift.to_s.strip.to_i prev = nil data.collect! do |x| if /\A +/ =~ x and prev then prev.concat x.strip.split(/\s+/) nil else prev = x.strip.split(/\s+/) prev end end data.compact! if data.size != @otus then raise "inconsistent data (OTUs=#{@otus} but #{data.size} rows)" end @otu_names = data.collect { |x| x.shift } mat = data.collect do |x| if x.size != @otus then raise "inconsistent data (OTUs=#{@otus} but #{x.size} columns)" end x.collect { |y| y.to_f } end @matrix = Matrix.rows(mat, false) @original_matrix = Matrix.rows(data, false) end # distance matrix (returns Ruby's Matrix object) attr_reader :matrix # matrix contains values as original strings. # Use it when you doubt precision of floating-point numbers. attr_reader :original_matrix # number of OTUs attr_reader :otus # names of OTUs attr_reader :otu_names # Generates a new phylip distance matrix formatted text as a string. def self.generate(matrix, otu_names = nil, options = {}) if matrix.row_size != matrix.column_size then raise "must be a square matrix" end otus = matrix.row_size names = (0...otus).collect do |i| name = ((otu_names and otu_names[i]) or "OTU#{i.to_s}") name end data = (0...otus).collect do |i| x = (0...otus).collect { |j| sprintf("%9.6f", matrix[i, j]) } x.unshift(sprintf("%-10s", names[i])[0, 10]) str = x[0, 7].join(' ') + "\n" 7.step(otus + 1, 7) do |k| str << ' ' + x[k, 7].join(' ') + "\n" end str end sprintf("%5d\n", otus) + data.join('') end end #class DistanceMatrix end #module Phylip end #module Bio ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/emboss.rb���������������������������������������������������������������0000644�0000041�0000041�00000014124�12200110570�016755� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/emboss.rb - EMBOSS wrapper # # Copyright:: Copyright (C) 2002, 2005 Toshiaki Katayama<k@bioruby.org> # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk> # License:: The Ruby License # # $Id: emboss.rb,v 1.9 2008/01/10 03:51:06 ngoto Exp $ # module Bio # == Description # # This file holds classes pertaining to the EMBOSS software suite. # # This class provides a wrapper for the applications of the EMBOSS suite, which # is a mature and stable collection of open-source applications that can handle # a huge range of sequence formats. # Applications include: # * Sequence alignment # * Rapid database searching with sequence patterns # * Protein motif identification, including domain analysis # * Nucleotide sequence pattern analysis---for example to identify CpG islands or repeats # * Codon usage analysis for small genomes # * Rapid identification of sequence patterns in large scale sequence sets # * Presentation tools for publication # # See the emboss website for more information: http://emboss.sourceforge.net. # # # == Usage # # require 'bio' # # # Suppose that you could get the sequence for XLRHODOP by running # # the EMBOSS command +seqret embl:xlrhodop+ on the command line. # # Then you can get the output of that command in a String object # # by using Bio::EMBOSS.run method. # xlrhodop = Bio::EMBOSS.run('seqret', 'embl:xlrhodop') # puts xlrhodop # # # Or all in one go: # puts Bio::EMBOSS.run('seqret', 'embl:xlrhodop') # # # Similarly: # puts Bio::EMBOSS.run('transeq', '-sbegin', '110','-send', '1171', # 'embl:xlrhodop') # puts Bio::EMBOSS.run('showfeat', 'embl:xlrhodop') # puts Bio::EMBOSS.run('seqret', 'embl:xlrhodop', '-osformat', 'acedb') # # # A shortcut exists for this two-step process for +seqret+ and +entret+. # puts Bio::EMBOSS.seqret('embl:xlrhodop') # puts Bio::EMBOSS.entret('embl:xlrhodop') # # # You can use %w() syntax. # puts Bio::EMBOSS.run(*%w( transeq -sbegin 110 -send 1171 embl:xlrhodop )) # # # You can also use Shellwords.shellwords. # require 'shellwords' # str = 'transeq -sbegin 110 -send 1171 embl:xlrhodop' # cmd = Shellwords.shellwords(str) # puts Bio::EMBOSS.run(*cmd) # # # == Pre-requisites # # You must have the EMBOSS suite installed locally. You can download from the # project website (see References below). # # = Rereferences # # * http://emboss.sourceforge.net # * Rice P, Longden I and Bleasby A. \ # EMBOSS: the European Molecular Biology Open Software Suite. \ # Trends Genet. 2000 Jun ; 16(6): 276-7 # class EMBOSS # Combines the initialization and execution for the emboss +seqret+ command. # # puts Bio::EMBOSS.seqret('embl:xlrhodop') # # is equivalent to: # # object = Bio::EMBOSS.new('seqret embl:xlrhodop') # puts object.exec # --- # *Arguments*: # * (required) _arg_: argument given to the emboss seqret command # *Returns*:: String def self.seqret(arg) str = self.retrieve('seqret', arg) end # Combines the initialization and execution for the emboss +entret+ command. # # puts Bio::EMBOSS.entret('embl:xlrhodop') # # is equivalent to: # # object = Bio::EMBOSS.new('entret embl:xlrhodop') # puts object.exec # --- # *Arguments*: # * (required) _arg_: argument given to the emboss entret command # *Returns*:: String def self.entret(arg) str = self.retrieve('entret', arg) end # WARNING: Bio::EMBOSS.new will be changed in the future because # Bio::EMBOSS.new(cmd_line) is inconvenient and potential security hole. # Using Bio::EMBOSS.run(program, options...) is strongly recommended. # # Initializes a new Bio::EMBOSS object. This provides a holder that can # subsequently be executed (see Bio::EMBOSS.exec). The object does _not_ # hold any actual data when initialized. # # e = Bio::EMBOSS.new('seqret embl:xlrhodop') # # For e to actually hold data, it has to be executed: # puts e.exec # # For an overview of commands that can be used with this method, see the # emboss website. # --- # *Arguments*: # * (required) _command_: emboss command # *Returns*:: Bio::EMBOSS object def initialize(cmd_line) warn 'Bio::EMBOSS.new(cmd_line) is inconvenient and potential security hole. Using Bio::EMBOSS.run(program, options...) is strongly recommended.' @cmd_line = cmd_line + ' -stdout -auto' end # A Bio::EMBOSS object has to be executed before it can return any result. # obj_A = Bio::EMBOSS.new('transeq -sbegin 110 -send 1171 embl:xlrhodop') # puts obj_A.result #=> nil # obj_A.exec # puts obj_A.result #=> a FASTA-formatted sequence # # obj_B = Bio::EMBOSS.new('showfeat embl:xlrhodop') # obj_B.exec # puts obj_B.result def exec begin @io = IO.popen(@cmd_line, "w+") @result = @io.read return @result ensure @io.close end end # Pipe for the command attr_reader :io # Result of the executed command attr_reader :result # Runs an emboss program and get the result as string. # Note that "-auto -stdout" are automatically added to the options. # # Example 1: # # result = Bio::EMBOSS.run('seqret', 'embl:xlrhodop') # # Example 2: # # result = Bio::EMBOSS.run('water', # '-asequence', 'swissprot:slpi_human', # '-bsequence', 'swissprot:slpi_mouse') # # Example 3: # options = %w( -asequence swissprot:slpi_human # -bsequence swissprot:slpi_mouse ) # result = Bio::EMBOSS.run('needle', *options) # # For an overview of commands that can be used with this method, see the # emboss website. # --- # *Arguments*: # * (required) _program_: command name, or filename of an emboss program # * _options_: options given to the emboss program # *Returns*:: String def self.run(program, *options) cmd = [ program, *options ] cmd.push '-auto' cmd.push '-stdout' return Bio::Command.query_command(cmd) end private def self.retrieve(cmd, arg) cmd = [ cmd, arg, '-auto', '-stdout' ] return Bio::Command.query_command(cmd) end end # EMBOSS end # Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/gcg/��������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�015676� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/gcg/msf.rb��������������������������������������������������������������0000644�0000041�0000041�00000012156�12200110570�017015� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/gcg/msf.rb - GCG multiple sequence alignment (.msf) parser class # # Copyright:: Copyright (C) 2003, 2006 # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # # = About Bio::GCG::Msf # # Please refer document of Bio::GCG::Msf. # #--- # (depends on autoload) #require 'bio/appl/gcg/seq' #+++ module Bio module GCG # The msf is a multiple sequence alignment format developed by Wisconsin. # Bio::GCG::Msf is a msf format parser. class Msf #< DB # delimiter used by Bio::FlatFile DELIMITER = RS = nil # Creates a new Msf object. def initialize(str) str = str.sub(/\A[\r\n]+/, '') preamble, @data = str.split(/^\/\/$/, 2) preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '') @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this preamble.sub!(/.*\.\.\s*$/m, '') @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s d = $&.to_s if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then @entry_id = m[1].to_s.strip @length = (m[2] ? m[2].to_i : nil) @seq_type = m[3] @date = m[4].to_s.strip @checksum = (m[6] ? m[6].to_i : nil) end @seq_info = [] preamble.each_line do |x| if /Name\: / =~ x then s = {} x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } @seq_info << s end end @description.sub!(/\A(\r\n|\r|\n)/, '') @align = nil end # description attr_reader :description # ID of the alignment attr_reader :entry_id # alignment length attr_reader :length # sequence type ("N" for DNA/RNA or "P" for protein) attr_reader :seq_type # date attr_reader :date # checksum attr_reader :checksum # heading # ('!!NA_MULTIPLE_ALIGNMENT 1.0' or whatever like this) attr_reader :heading #--- ## data (internally used, will be obsoleted) #attr_reader :data # ## seq. info. (internally used, will be obsoleted) #attr_reader :seq_info #+++ # symbol comparison table def symbol_comparison_table unless defined?(@symbol_comparison_table) /Symbol comparison table\: +(\S+)/ =~ @description @symbol_comparison_table = $1 end @symbol_comparison_table end # gap weight def gap_weight unless defined?(@gap_weight) /GapWeight\: +(\S+)/ =~ @description @gap_weight = $1 end @gap_weight end # gap length weight def gap_length_weight unless defined?(@gap_length_weight) /GapLengthWeight\: +(\S+)/ =~ @description @gap_length_weight = $1 end @gap_length_weight end # CompCheck field def compcheck unless defined?(@compcheck) if /CompCheck\: +(\d+)/ =~ @description then @compcheck = $1.to_i else @compcheck = nil end end @compcheck end # parsing def do_parse return if @align a = @data.split(/\r?\n\r?\n/) @seq_data = Array.new(@seq_info.size) @seq_data.collect! { |x| Array.new } a.each do |x| next if x.strip.empty? b = x.sub(/\A[\r\n]+/, '').split(/[\r\n]+/) nw = 0 if b.size > @seq_info.size then if /^ +/ =~ b.shift.to_s nw = $&.to_s.length end end if nw > 0 then b.each_with_index { |y, i| y[0, nw] = ''; @seq_data[i] << y } else b.each_with_index { |y, i| @seq_data[i] << y.strip.split(/ +/, 2)[1].to_s } end end case seq_type when 'P', 'p' k = Bio::Sequence::AA when 'N', 'n' k = Bio::Sequence::NA else k = Bio::Sequence::Generic end @seq_data.collect! do |x| y = x.join('') y.gsub!(/[\s\d]+/, '') k.new(y) end aln = Bio::Alignment.new @seq_data.each_with_index do |x, i| aln.store(@seq_info[i]['Name'], x) end @align = aln end private :do_parse # returns Bio::Alignment object. def alignment do_parse @align end # gets seq data (used internally) (will be obsoleted) def seq_data do_parse @seq_data end # validates checksum def validate_checksum do_parse valid = true total = 0 @seq_data.each_with_index do |x, i| sum = Bio::GCG::Seq.calc_checksum(x) if sum != @seq_info[i]['Check'].to_i valid = false break end total += sum end return false unless valid if @checksum != 0 # "Check:" field of BioPerl is always 0 valid = ((total % 10000) == @checksum) end valid end end #class Msf end #module GCG end # module Bio ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/gcg/seq.rb��������������������������������������������������������������0000644�0000041�0000041�00000012644�12200110570�017022� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/gcg/seq.rb - GCG sequence file format class (.seq/.pep file) # # Copyright:: Copyright (C) 2003, 2006 # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id: seq.rb,v 1.3 2007/04/05 23:35:39 trevor Exp $ # # = About Bio::GCG::Seq # # Please refer document of Bio::GCG::Seq. # module Bio module GCG # # = Bio::GCG::Seq # # This is GCG sequence file format (.seq or .pep) parser class. # # = References # # * Information about GCG Wisconsin Package(R) # http://www.accelrys.com/products/gcg_wisconsin_package . # * EMBOSS sequence formats # http://www.hgmp.mrc.ac.uk/Software/EMBOSS/Themes/SequenceFormats.html # * BioPerl document # http://docs.bioperl.org/releases/bioperl-1.2.3/Bio/SeqIO/gcg.html class Seq #< DB # delimiter used by Bio::FlatFile DELIMITER = RS = nil # Creates new instance of this class. # str must be a GCG seq formatted string. def initialize(str) @heading = str[/.*/] # '!!NA_SEQUENCE 1.0' or like this str = str.sub(/.*/, '') str.sub!(/.*\.\.$/m, '') @definition = $&.to_s.sub(/^.*\.\.$/, '').to_s desc = $&.to_s if m = /(.+)\s+Length\:\s+(\d+)\s+(.+)\s+Type\:\s+(\w)\s+Check\:\s+(\d+)/.match(desc) then @entry_id = m[1].to_s.strip @length = (m[2] ? m[2].to_i : nil) @date = m[3].to_s.strip @seq_type = m[4] @checksum = (m[5] ? m[5].to_i : nil) end @data = str @seq = nil @definition.strip! end # ID field. attr_reader :entry_id # Description field. attr_reader :definition # "Length:" field. # Note that sometimes this might differ from real sequence length. attr_reader :length # Date field of this entry. attr_reader :date # "Type:" field, which indicates sequence type. # "N" means nucleic acid sequence, "P" means protein sequence. attr_reader :seq_type # "Check:" field, which indicates checksum of current sequence. attr_reader :checksum # heading # ('!!NA_SEQUENCE 1.0' or whatever like this) attr_reader :heading #--- ## data (internally used, will be obsoleted) #attr_reader :data #+++ # Sequence data. # The class of the sequence is Bio::Sequence::NA, Bio::Sequence::AA # or Bio::Sequence::Generic, according to the sequence type. def seq unless @seq then case @seq_type when 'N', 'n' k = Bio::Sequence::NA when 'P', 'p' k = Bio::Sequence::AA else k = Bio::Sequence end @seq = k.new(@data.tr('^-a-zA-Z.~', '')) end @seq end # If you know the sequence is AA, use this method. # Returns a Bio::Sequence::AA object. # # If you call naseq for protein sequence, # or aaseq for nucleic sequence, RuntimeError will be raised. def aaseq if seq.is_a?(Bio::Sequence::AA) then @seq else raise 'seq_type != \'P\'' end end # If you know the sequence is NA, use this method. # Returens a Bio::Sequence::NA object. # # If you call naseq for protein sequence, # or aaseq for nucleic sequence, RuntimeError will be raised. def naseq if seq.is_a?(Bio::Sequence::NA) then @seq else raise 'seq_type != \'N\'' end end # Validates checksum. # If validation succeeds, returns true. # Otherwise, returns false. def validate_checksum checksum == self.class.calc_checksum(seq) end #--- # class methods #+++ # Calculates checksum from given string. def self.calc_checksum(str) # Reference: Bio::SeqIO::gcg of BioPerl-1.2.3 idx = 0 sum = 0 str.upcase.tr('^A-Z.~', '').each_byte do |c| idx += 1 sum += idx * c idx = 0 if idx >= 57 end (sum % 10000) end # Creates a new GCG sequence format text. # Parameters can be omitted. # # Examples: # Bio::GCG::Seq.to_gcg(:definition=>'H.sapiens DNA', # :seq_type=>'N', :entry_id=>'gi-1234567', # :seq=>seq, :date=>date) # def self.to_gcg(hash) seq = hash[:seq] if seq.is_a?(Bio::Sequence::NA) then seq_type = 'N' elsif seq.is_a?(Bio::Sequence::AA) then seq_type = 'P' else seq_type = (hash[:seq_type] or 'P') end if seq_type == 'N' then head = '!!NA_SEQUENCE 1.0' else head = '!!AA_SEQUENCE 1.0' end date = (hash[:date] or Time.now.strftime('%B %d, %Y %H:%M')) entry_id = hash[:entry_id].to_s.strip len = seq.length checksum = self.calc_checksum(seq) definition = hash[:definition].to_s.strip seq = seq.upcase.gsub(/.{1,50}/, "\\0\n") seq.gsub!(/.{10}/, "\\0 ") w = len.to_s.size + 1 i = 1 seq.gsub!(/^/) { |x| s = sprintf("\n%*d ", w, i); i += 50; s } [ head, "\n", definition, "\n\n", "#{entry_id} Length: #{len} #{date} " \ "Type: #{seq_type} Check: #{checksum} ..\n", seq, "\n" ].join('') end end #class Seq end #module GCG end #module Bio ��������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/bl2seq/�����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016326� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/bl2seq/report.rb��������������������������������������������������������0000644�0000041�0000041�00000010643�12200110570�020172� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser # # Copyright:: Copyright (C) 2005 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # # Bio::Blast::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser. # # = Acknowledgements # # Thanks to Tomoaki NISHIYAMA <tomoakin __at__ kenroku.kanazawa-u.ac.jp> # for providing bl2seq parser patches based on # lib/bio/appl/blast/format0.rb. # module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast class Bl2seq # Bio::Blast::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser. # It inherits Bio::Blast::Default::Report. # Most of its methods are the same as Bio::Blast::Default::Report, # but it lacks many methods. class Report < Bio::Blast::Default::Report # Delimiter of each entry. Bio::FlatFile uses it. # In Bio::Blast::Bl2seq::Report, it it nil (1 entry 1 file). DELIMITER = RS = nil DELIMITER_OVERRUN = nil undef format0_parse_header undef program, version, version_number, version_date, message, converged?, reference, db # Splits headers. def format0_split_headers(data) @f0query = data.shift end private :format0_split_headers # Splits the search results. def format0_split_search(data) iterations = [] while r = data[0] and /^\>/ =~ r iterations << Iteration.new(data) end if iterations.size <= 0 then iterations << Iteration.new(data) end iterations end private :format0_split_search # Stores format0 database statistics. # Internal use only. Users must not use the class. class F0dbstat < Bio::Blast::Default::Report::F0dbstat #:nodoc: # Returns number of sequences in database. def db_num unless defined?(@db_num) parse_params @db_num = @hash['Number of Sequences'].to_i end @db_num end # Returns number of letters in database. def db_len unless defined?(@db_len) parse_params @db_len = @hash['length of database'].to_i end @db_len end end #class F0dbstat # Bio::Blast::Bl2seq::Report::Iteration stores information about # a iteration. # Normally, it may contain some Bio::Blast::Bl2seq::Report::Hit objects. # # Note that its main existance reason is to keep complatibility # between Bio::Blast::Default::Report::* classes. class Iteration < Bio::Blast::Default::Report::Iteration # Creates a new Iteration object. # It is designed to be called only internally from # the Bio::Blast::Default::Report class. # Users shall not use the method directly. def initialize(data) @f0stat = [] @f0dbstat = Bio::Blast::Default::Report::AlwaysNil.instance @hits = [] @num = 1 while r = data[0] and /^\>/ =~ r @hits << Hit.new(data) end end # Returns the hits of the iteration. # It returns an array of Bio::Blast::Bl2seq::Report::Hit objects. def hits; @hits; end undef message, pattern_in_database, pattern, pattern_positions, hits_found_again, hits_newly_found, hits_for_pattern, parse_hitlist, converged? end #class Iteration # Bio::Blast::Bl2seq::Report::Hit contains information about a hit. # It may contain some Bio::Blast::Default::Report::HSP objects. # All methods are the same as Bio::Blast::Default::Report::Hit class. # Please refer to Bio::Blast::Default::Report::Hit. class Hit < Bio::Blast::Default::Report::Hit end #class Hit # Bio::Blast::Bl2seq::Report::HSP holds information about the hsp # (high-scoring segment pair). # NOTE that the HSP class below is NOT used because # Ruby's constants namespace are normally statically determined # and HSP object is created in Bio::Blast::Default::Report::Hit class. # Please refer to Bio::Blast::Default::Report::HSP. class HSP < Bio::Blast::Default::Report::HSP end #class HSP end #class Report end #class Bl2seq end #class Blast end #module Bio ###################################################################### =begin = Bio::Blast::Bl2seq::Report NCBI bl2seq (BLAST 2 sequences) output parser =end ���������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/hmmer/������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016246� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/hmmer/report.rb���������������������������������������������������������0000644�0000041�0000041�00000034267�12200110570�020122� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer # # Copyright:: Copyright (C) 2002 # Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>, # Copyright:: Copyright (C) 2005 # Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp> # License:: The Ruby License # # $Id:$ # # == Description # # Parser class for hmmsearch and hmmpfam in the HMMER package. # # == Examples # # #for multiple reports in a single output file (example.hmmpfam) # Bio::HMMER.reports(File.read("example.hmmpfam")) do |report| # report.program['name'] # report.parameter['HMM file'] # report.query_info['Query sequence'] # report.hits.each do |hit| # hit.accession # hit.description # hit.score # hit.evalue # hit.hsps.each do |hsp| # hsp.accession # hsp.domain # hsp.evalue # hsp.midline # end # end # # == References # # * HMMER # http://hmmer.wustl.edu/ # require 'bio/appl/hmmer' module Bio class HMMER # A reader interface for multiple reports text into a report # (Bio::HMMER::Report). # # === Examples # # # Iterator # Bio::HMMER.reports(reports_text) do |report| # report # end # # # Array # reports = Bio::HMMER.reports(reports_text) # def self.reports(multiple_report_text) ary = [] multiple_report_text.each_line("\n//\n") do |report| if block_given? yield Report.new(report) else ary << Report.new(report) end end return ary end # A parser class for a search report by hmmsearch or hmmpfam program in the # HMMER package. # # === Examples # # Examples # #for multiple reports in a single output file (example.hmmpfam) # Bio::HMMER.reports(File.read("example.hmmpfam")) do |report| # report.program['name'] # report.parameter['HMM file'] # report.query_info['Query sequence'] # report.hits.each do |hit| # hit.accession # hit.description # hit.score # hit.evalue # hit.hsps.each do |hsp| # hsp.accession # hsp.domain # hsp.evalue # hsp.midline # end # end # # === References # # * HMMER # http://hmmer.wustl.edu/ # class Report # Delimiter of each entry for Bio::FlatFile support. DELIMITER = RS = "\n//\n" # A Hash contains program information used. # Valid keys are 'name', 'version', 'copyright' and 'license'. attr_reader :program # A hash contains parameters used. # Valid keys are 'HMM file' and 'Sequence file'. attr_reader :parameter # A hash contains the query information. # Valid keys are 'query sequence', 'Accession' and 'Description'. attr_reader :query_info # attr_reader :hits # Returns an Array of Bio::HMMER::Report::Hsp objects. # Under special circumstances, some HSPs do not have # parent Hit objects. If you want to access such HSPs, # use this method. attr_reader :hsps # statistics by hmmsearch. attr_reader :histogram # statistics by hmmsearch. Keys are 'mu', 'lambda', 'chi-sq statistic' and 'P(chi-square)'. attr_reader :statistical_detail # statistics by hmmsearch. attr_reader :total_seq_searched # statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'. attr_reader :whole_seq_top_hits # statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'. attr_reader :domain_top_hits # Parses a HMMER search report (by hmmpfam or hmmsearch program) and # reutrns a Bio::HMMER::Report object. # # === Examples # # hmmpfam_report = Bio::HMMER::Report.new(File.read("hmmpfam.out")) # # hmmsearch_report = Bio::HMMER::Report.new(File.read("hmmsearch.out")) # def initialize(data) # The input data is divided into six data fields, i.e. header, # query infomation, hits, HSPs, alignments and search statistics. # However, header and statistics data don't necessarily exist. subdata, is_hmmsearch = get_subdata(data) # if header exists, parse it if subdata["header"] @program, @parameter = parse_header_data(subdata["header"]) else @program, @parameter = [{}, {}] end @query_info = parse_query_info(subdata["query"]) @hits = parse_hit_data(subdata["hit"]) @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch) if @hsps != [] # split alignment subdata into an array of alignments aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1) # append alignment information to corresponding Hsp aln_ary.each_with_index do |aln, i| @hsps[i].set_alignment(aln) end end # assign each Hsp object to its parent Hit hits_hash = {} @hits.each do |hit| hits_hash[hit.accession] = hit end @hsps.each do |hsp| if hits_hash.has_key?(hsp.accession) hits_hash[hsp.accession].append_hsp(hsp) end end # parse statistics (for hmmsearch) if is_hmmsearch @histogram, @statistical_detail, @total_seq_searched, \ @whole_seq_top_hits, @domain_top_hits = \ parse_stat_data(subdata["statistics"]) end end # Iterates each hit (Bio::HMMER::Report::Hit). def each @hits.each do |hit| yield hit end end alias :each_hit :each # Bio::HMMER::Report#get_subdata def get_subdata(data) subdata = {} header_prefix = '\Ahmm(search|pfam) - search' query_prefix = '^Query (HMM|sequence): .*\nAccession: ' hit_prefix = '^Scores for (complete sequences|sequence family)' hsp_prefix = '^Parsed for domains:' aln_prefix = '^Alignments of top-scoring domains:\n' stat_prefix = '^\nHistogram of all scores:' # if header exists, get it if data =~ /#{header_prefix}/ is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m] else is_hmmsearch = false # if no header, assumed to be hmmpfam end # get query, Hit and Hsp data subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m] subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m] subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m] # get alignment data if is_hmmsearch data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m subdata["alignment"] = $1 else data =~ /#{aln_prefix}(.+?)\/\/\n/m subdata["alignment"] = $1 raise "multiple reports found" if $'.length > 0 end # handle -A option of HMMER cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z' subdata["alignment"].sub!(/#{cutoff_line}/, '') # get statistics data subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m] [subdata, is_hmmsearch] end private :get_subdata # Bio::HMMER::Report#parse_header_data def parse_header_data(data) data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m program_data = $1 parameter_data = $2 program = {} program['name'], program['version'], program['copyright'], \ program['license'] = program_data.split(/\n/) parameter = {} parameter_data.each_line do |x| if /^(.+?):\s+(.*?)\s*$/ =~ x parameter[$1] = $2 end end [program, parameter] end private :parse_header_data # Bio::HMMER::Report#parse_query_info def parse_query_info(data) hash = {} data.each_line do |x| if /^(.+?):\s+(.*?)\s*$/ =~ x hash[$1] = $2 elsif /\s+\[(.+)\]/ =~ x hash['comments'] = $1 end end hash end private :parse_query_info # Bio::HMMER::Report#parse_hit_data def parse_hit_data(data) data.sub!(/.+?---\n/m, '').chop! hits = [] return hits if data == "\t[no hits above thresholds]\n" data.each_line do |l| hits.push(Hit.new(l)) end hits end private :parse_hit_data # Bio::HMMER::Report#parse_hsp_data def parse_hsp_data(data, is_hmmsearch) data.sub!(/.+?---\n/m, '').chop! hsps=[] return hsps if data == "\t[no hits above thresholds]\n" data.each_line do |l| hsps.push(Hsp.new(l, is_hmmsearch)) end return hsps end private :parse_hsp_data # Bio::HMMER::Report#parse_stat_data def parse_stat_data(data) data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '') histogram = $1.strip statistical_detail = {} data.sub!(/(.+?)\n\n/m, '') $1.each_line do |l| statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l end total_seq_searched = nil data.sub!(/(.+?)\n\n/m, '') $1.each_line do |l| total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l end whole_seq_top_hits = {} data.sub!(/(.+?)\n\n/m, '') $1.each_line do |l| if /^\s*(.+?):\s*(\d+)\s*$/ =~ l whole_seq_top_hits[$1] = $2.to_i elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l whole_seq_top_hits[$1] = $2 end end domain_top_hits = {} data.each_line do |l| if /^\s*(.+?):\s*(\d+)\s*$/ =~ l domain_top_hits[$1] = $2.to_i elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l domain_top_hits[$1] = $2 end end [histogram, statistical_detail, total_seq_searched, \ whole_seq_top_hits, domain_top_hits] end private :parse_stat_data # Container class for HMMER search hits. class Hit # An Array of Bio::HMMER::Report::Hsp objects. attr_reader :hsps # attr_reader :accession alias target_id accession alias hit_id accession alias entry_id accession # attr_reader :description alias definition description # Matching scores (total of all HSPs). attr_reader :score alias bit_score score # E-value attr_reader :evalue # Number of domains attr_reader :num # Sets hit data. def initialize(hit_data) @hsps = Array.new if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ hit_data @accession, @description, @score, @evalue, @num = \ [$1, $2, $3.to_f, $4.to_f, $5.to_i] end end # Iterates on each Hsp object (Bio::HMMER::Report::Hsp). def each @hsps.each do |hsp| yield hsp end end alias :each_hsp :each # Shows the hit description. def target_def if @hsps.size == 1 "<#{@hsps[0].domain}> #{@description}" else "<#{@num.to_s}> #{@description}" end end # Appends a Bio::HMMER::Report::Hsp object. def append_hsp(hsp) @hsps << hsp end end # class Hit # Container class for HMMER search hsps. class Hsp # attr_reader :accession alias target_id accession # attr_reader :domain # attr_reader :seq_f # attr_reader :seq_t # attr_reader :seq_ft # attr_reader :hmm_f # attr_reader :hmm_t # attr_reader :hmm_ft # Score attr_reader :score alias bit_score score # E-value attr_reader :evalue # Alignment midline attr_reader :midline # attr_reader :hmmseq # attr_reader :flatseq # attr_reader :query_frame # attr_reader :target_frame # CS Line attr_reader :csline # RF Line attr_reader :rfline # Sets hsps. def initialize(hsp_data, is_hmmsearch) @is_hmmsearch = is_hmmsearch @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,\ score, evalue = hsp_data.split(' ') @seq_f = seq_f.to_i @seq_t = seq_t.to_i @hmm_f = hmm_f.to_i @hmm_t = hmm_t.to_i @score = score.to_f @evalue = evalue.to_f @hmmseq = '' @flatseq = '' @midline = '' @query_frame = 1 @target_frame = 1 # CS and RF lines are rarely used. @csline = nil @rfline = nil end # def set_alignment(alignment) # First, split the input alignment into an array of # "alignment blocks." One block usually has three lines, # i.e. hmmseq, midline and flatseq. # However, although infrequent, it can contain CS or RF lines. alignment.split(/ (?:\d+|-)\s*\n\n/).each do |blk| lines = blk.split(/\n/) cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil aln_width = lines[0][/\S+/].length @csline = @csline.to_s + cstmp[19, aln_width] if cstmp @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp @hmmseq += lines[0][19, aln_width] @midline += lines[1][19, aln_width] @flatseq += lines[2][19, aln_width] end @csline = @csline[3...-3] if @csline @rfline = @rfline[3...-3] if @rfline @hmmseq = @hmmseq[3...-3] @midline = @midline[3...-3] @flatseq = @flatseq[3...-3] end # def query_seq @is_hmmsearch ? @hmmseq : @flatseq end # def target_seq @is_hmmsearch ? @flatseq : @hmmseq end # def target_from @is_hmmsearch ? @seq_f : @hmm_f end # def target_to @is_hmmsearch ? @seq_t : @hmm_t end # def query_from @is_hmmsearch ? @hmm_f : @seq_f end # def query_to @is_hmmsearch ? @hmm_t : @seq_t end end # class Hsp end # class Report end # class HMMER end # module Bio =begin # # for multiple reports in a single output file (hmmpfam) # Bio::HMMER.reports(ARGF.read) do |report| report.hits.each do |hit| hit.hsps.each do |hsp| end end end =end �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/mafft.rb����������������������������������������������������������������0000644�0000041�0000041�00000016220�12200110570�016561� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/mafft.rb - MAFFT wrapper class # # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp> # License:: The Ruby License # # $Id: mafft.rb,v 1.18 2007/07/16 12:27:29 ngoto Exp $ # # Bio::MAFFT is a wrapper class to execute MAFFT. # MAFFT is a very fast multiple sequence alignment software. # # = Important Notes # # Though Bio::MAFFT class currently supports only MAFFT version 3, # you can use MAFFT version 5 because the class is a wrapper class. # # == References # # * K. Katoh, K. Misawa, K. Kuma and T. Miyata. # MAFFT: a novel method for rapid multiple sequence alignment based # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002. # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059 # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/ # require 'tempfile' require 'bio/command' require 'bio/db/fasta' require 'bio/io/flatfile' module Bio # Bio::MAFFT is a wrapper class to execute MAFFT. # MAFFT is a very fast multiple sequence alignment software. # # Though Bio::MAFFT class currently supports only MAFFT version 3, # you can use MAFFT version 5 because the class is a wrapper class. class MAFFT autoload :Report, 'bio/appl/mafft/report' # Creates a new alignment factory. # When +n+ is a number (1,2,3, ...), performs 'fftns n'. # When +n+ is :i or 'i', performs 'fftnsi'. def self.fftns(n = nil) opt = [] if n.to_s == 'i' then self.new2(nil, 'fftnsi', *opt) else opt << n.to_s if n self.new2(nil, 'fftns', *opt) end end # Creates a new alignment factory. # Performs 'fftnsi'. def self.fftnsi self.new2(nil, 'fftnsi') end # Creates a new alignment factory. # When +n+ is a number (1,2,3, ...), performs 'nwns n'. # When +n+ is :i or 'i', performs 'nwnsi'. # In both case, if all_positive is true, add option '--all-positive'. def self.nwns(n = nil, ap = nil) opt = [] opt << '--all-positive' if ap if n.to_s == 'i' then self.new2(nil, 'nwnsi', *opt) else opt << n.to_s if n self.new2(nil, 'nwns', *opt) end end # Creates a new alignment factory. # Performs 'nwnsi'. # If +all_positive+ is true, add option '--all-positive'. def self.nwnsi(all_positive = nil) opt = [] opt << '--all-positive' if all_positive self.new2(nil, 'nwnsi', *opt) end # Creates a new alignment factory. # Performs 'nwns --all-positive n' or 'nwnsi --all-positive'. # Same as Bio::MAFFT.nwap(n, true). def self.nwap(n = nil) self.nwns(n, true) end # Creates a new alignment factory. # +dir+ is the path of the MAFFT program. # +prog+ is the name of the program. # +opt+ is options of the program. def self.new2(dir, prog, *opt) if dir then prog = File.join(dir, prog) end self.new(prog, opt) end # Creates a new alignment factory. # +program+ is the name of the program. # +opt+ is options of the program. def initialize(program = 'mafft', opt = []) @program = program @options = opt @command = nil @output = nil @report = nil @data_stdout = nil @exit_status = nil end # program name (usually 'mafft' in UNIX) attr_accessor :program # options attr_accessor :options # option is deprecated. Instead, please use options. def option warn "Bio::MAFFT#option is deprecated. Please use options." options end # Shows last command-line string. Returns nil or an array of String. # Note that filenames described in the command-line may already # be removed because they are temporary files. attr_reader :command #--- # last message to STDERR when executing the program. #attr_reader :log #+++ #log is deprecated (no replacement) and returns empty string. def log warn "Bio::MAFFT#log is deprecated (no replacement) and returns empty string." '' end # Shows latest raw alignment result. # Return a string. (Changed in bioruby-1.1.0). # Compatibility note: # If you want an array of Bio::FastaFormat instances, # you should use report.data instead. attr_reader :output # Shows last alignment result (instance of Bio::MAFFT::Report class) # performed by the factory. attr_reader :report # Last exit status attr_reader :exit_status # Last output to the stdout. attr_accessor :data_stdout # Clear the internal data and status, except program and options. def reset @command = nil @output = nil @report = nil @exit_status = nil @data_stdout = nil end # Executes the program. # If +seqs+ is not nil, perform alignment for seqs. # If +seqs+ is nil, simply executes the program. # # Compatibility note: When seqs is nil, # returns true if the program exits normally, and # returns false if the program exits abnormally. def query(seqs) if seqs then query_align(seqs) else exec_local(@options) @exit_status.exitstatus == 0 ? true : false end end # Note that this method will be renamed to query_alignment. # # Performs alignment for seqs. # +seqs+ should be Bio::Alignment or Array of sequences or nil. # # Compatibility Note: arg is deprecated and ignored. def query_align(seqs, *arg) if arg.size > 0 then warn '2nd and other arguments of Bio::MAFFT#query_align is ignored' end unless seqs.is_a?(Bio::Alignment) seqs = Bio::Alignment.new(seqs) end query_string(seqs.output_fasta(:width => 70)) end # Performs alignment for seqs. # +seqs+ should be Bio::Alignment or Array of sequences or nil. def query_alignment(seqs) query_align(seqs) end # Performs alignment for +str+. # Str should be a string that can be recognized by the program. # # Compatibility Note: arg is deprecated and ignored. def query_string(str, *arg) if arg.size > 0 then warn '2nd and other arguments of Bio::MAFFT#query_string is ignored' end begin tf_in = Tempfile.open('align') tf_in.print str ensure tf_in.close(false) end r = query_by_filename(tf_in.path, *arg) tf_in.close(true) r end # Performs alignment of sequences in the file named +fn+. # # Compatibility Note: 2nd argument (seqtype) is deprecated and ignored. def query_by_filename(fn, *arg) if arg.size > 0 then warn '2nd argument of Bio::MAFFT#query_filename is ignored' end opt = @options + [ fn ] exec_local(opt) @report = Report.new(@output) @report end private # Executes a program in the local machine. def exec_local(opt) @command = [ @program, *opt ] #STDERR.print "DEBUG: ", @command.join(" "), "\n" @data_stdout = nil @exit_status = nil Bio::Command.call_command(@command) do |io| io.close_write @data_stdout = io.read end @output = @data_stdout @exit_status = $? end end #class MAFFT end #module Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/spidey/�����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016433� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/spidey/report.rb��������������������������������������������������������0000644�0000041�0000041�00000046030�12200110570�020276� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/spidey/report.rb - SPIDEY result parser # # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org> # License:: The Ruby License # # $Id: report.rb,v 1.10 2007/04/05 23:35:40 trevor Exp $ # # NCBI Spidey result parser. # Currently, output of default (-p 0 option) or -p 1 option are supported. # # == Notes # # The mRNA sequence is regarded as a query, and # the enomic sequence is regarded as a target (subject, hit). # # == References # # * Wheelan, S.J., et al., Spidey: a tool for mRNA-to-genomic alignments, # Genome Research, 11, 1952--1957, 2001. # http://www.genome.org/cgi/content/abstract/11/11/1952 # * http://www.ncbi.nlm.nih.gov/spidey/ # require 'bio' module Bio class Spidey # Spidey report parser class. # Please see bio/appl/spidey/report.rb for details. # # Its object may contain some Bio::Spidey::Report::Hit objects. class Report #< DB #-- # File format: -p 0 (default) or -p 1 options #++ # Delimiter of each entry. Bio::FlatFile uses it. DELIMITER = RS = "\n--SPIDEY " # (Integer) excess read size included in DELIMITER. DELIMITER_OVERRUN = 9 # "--SPIDEY ".length # Creates a new Bio::Spidey::Report object from String. # You can use Bio::FlatFile to read a file. def initialize(str) str = str.sub(/\A\s+/, '') str.sub!(/\n(^\-\-SPIDEY .*)/m, '') # remove trailing entries for sure @entry_overrun = $1 data = str.split(/\r?\n(?:\r?\n)+/) d0 = data.shift.to_s.split(/\r?\n/) @hit = Hit.new(data, d0) @all_hits = [ @hit ] if d0.empty? or /\ANo alignment found\.\s*\z/ =~ d0[-1] then @hits = [] else @hits = [ @hit ] end end # piece of next entry. Bio::FlatFile uses it. attr_reader :entry_overrun # Returns an Array of Bio::Spidey::Report::Hit objects. # Because current version of SPIDEY supports only 1 genomic sequences, # the number of hits is 1 or 0. attr_reader :hits # Returns an Array of Bio::Spidey::Report::Hit objects. # Unlike Bio::Spidey::Report#hits, the method returns # results of all trials of pairwise alignment. # This would be a Bio::Spidey specific method. attr_reader :all_hits # SeqDesc stores sequence information of query or subject. class SeqDesc #-- # description/definitions of a sequence #++ # Creates a new SeqDesc object. # It is designed to be called from Bio::Spidey::Report::* classes. # Users shall not call it directly. def initialize(seqid, seqdef, len) @entry_id = seqid @definition = seqdef @len = len end # Identifier of the sequence. attr_reader :entry_id # Definition of the sequence. attr_reader :definition # Length of the sequence. attr_reader :len # Parses piece of Spidey result text and creates a new SeqDesc object. # It is designed to be called from Bio::Spidey::Report::* classes. # Users shall not call it directly. def self.parse(str) /^(Genomic|mRNA)\:\s*(([^\s]*) (.+))\, (\d+) bp\s*$/ =~ str.to_s seqid = $3 seqdef = $2 len = ($5 ? $5.to_i : nil) self.new(seqid, seqdef, len) end end #class SeqDesc # Sequence segment pair of Spidey result. # Similar to Bio::Blast::Report::Hsp but lacks many methods. # For mRNA-genome mapping programs, unlike other homology search # programs, the class is used not only for exons but also for introns. # (Note that intron data would not be available according to run-time # options of the program.) class SegmentPair #-- # segment pair (like Bio::BLAST::*::Report::Hsp) #++ # Creates a new SegmentPair object. # It is designed to be called from Bio::Spidey::Report::* classes. # Users shall not call it directly. def initialize(genomic, mrna, midline, aaseqline, percent_identity, mismatches, gaps, splice_site, align_len) @genomic = genomic @mrna = mrna @midline = midline @aaseqline = aaseqline @percent_identity = percent_identity @mismaches = mismatches @gaps = gaps @splice_site = splice_site @align_len = align_len end # Returns segment informations of the 'Genomic'. # Returns a Bio::Spidey::Report::Segment object. # This would be a Bio::Spidey specific method. attr_reader :genomic # Returns segment informations of the 'mRNA'. # Returns a Bio::Spidey::Report::Segment object. # This would be a Bio::Spidey specific method. attr_reader :mrna # Returns the middle line of the alignment of the segment pair. # Returns nil if no alignment data are available. attr_reader :midline # Returns amino acide sequence in alignment. # Returns String, because white spaces is also important. # Returns nil if no alignment data are available. attr_reader :aaseqline # Returns percent identity of the segment pair. attr_reader :percent_identity # Returns mismatches. attr_reader :mismatches alias mismatch_count mismatches # Returns gaps. attr_reader :gaps # Returns splice site information. # Returns a hash which contains :d and :a for keys and # 0, 1, or nil for values. # This would be a Bio::Spidey specific methods. attr_reader :splice_site # Returns alignment length of the segment pair. # Returns nil if no alignment data are available. attr_reader :align_len # Creates a new SegmentPair object when the segment pair is an intron. # It is designed to be called internally from # Bio::Spidey::Report::* classes. # Users shall not call it directly. def self.new_intron(from, to, strand, aln) genomic = Segment.new(from, to, strand, aln[0]) mrna = Segment.new(nil, nil, nil, aln[2]) midline = aln[1] aaseqline = aln[3] self.new(genomic, mrna, midline, aaseqline, nil, nil, nil, nil, nil) end # Parses a piece of Spidey result text and creates a new # SegmentPair object. # It is designed to be called internally from # Bio::Spidey::Report::* classes. # Users shall not call it directly. def self.parse(str, strand, complement, aln) /\AExon\s*\d+(\(\-\))?\:\s*(\d+)\-(\d+)\s*\(gen\)\s+(\d+)\-(\d+)\s*\(mRNA\)\s+id\s*([\d\.]+)\s*\%\s+mismatches\s+(\d+)\s+gaps\s+(\d+)\s+splice site\s*\(d +a\)\s*\:\s*(\d+)\s+(\d+)/ =~ str if strand == 'minus' then genomic = Segment.new($3, $2, strand, aln[0]) else genomic = Segment.new($2, $3, 'plus', aln[0]) end if complement then mrna = Segment.new($4, $5, 'minus', aln[2]) else mrna = Segment.new($4, $5, 'plus', aln[2]) end percent_identity = $6 mismatches = ($7 ? $7.to_i : nil) gaps = ($8 ? $8.to_i : nil) splice_site = { :d => ($9 ? $9.to_i : nil), :a => ($10 ? $10.to_i : nil) } midline = aln[1] aaseqline = aln[3] self.new(genomic, mrna, midline, aaseqline, percent_identity, mismatches, gaps, splice_site, (midline ? midline.length : nil)) end #-- # Bio::BLAST::*::Report::Hsp compatible methods # Methods already defined: midline, percent_identity, # gaps, align_len, mismatch_count #++ # Returns start position of the mRNA (query) (the first position is 1). def query_from; @mrna.from; end # Returns end position (including its position) of the mRNA (query). def query_to; @mrna.to; end # Returns the sequence (with gaps) of the mRNA (query). def qseq; @mrna.seq; end # Returns strand information of the mRNA (query). # Returns 'plus', 'minus', or nil. def query_strand; @mrna.strand; end # Returns start position of the genomic (target, hit) # (the first position is 1). def hit_from; @genomic.from; end # Returns end position (including its position) of the # genomic (target, hit). def hit_to; @genomic.to; end # Returns the sequence (with gaps) of the genomic (target, hit). def hseq; @genomic.seq; end # Returns strand information of the genomic (target, hit). # Returns 'plus', 'minus', or nil. def hit_strand; @genomic.strand; end end #class SegmentPair # Segment informations of a segment pair. class Segment # Creates a new Segment object. # It is designed to be called internally from # Bio::Spidey::Report::* classes. # Users shall not call it directly. def initialize(pos_st, pos_ed, strand = nil, seq = nil) @from = pos_st ? pos_st.to_i : nil @to = pos_ed ? pos_ed.to_i : nil @strand = strand @seq = seq end # start position attr_reader :from # end position attr_reader :to # strand information attr_reader :strand # sequence data attr_reader :seq end #class Segment # Hit object of Spidey result. # Similar to Bio::Blast::Report::Hit but lacks many methods. class Hit # Creates a new Hit object. # It is designed to be called internally from # Bio::Spidey::Report::* classes. # Users shall not call it directly. def initialize(data, d0) @data = data @d0 = d0 end # Fetches fields. def field_fetch(t, ary) reg = Regexp.new(/^#{Regexp.escape(t)}\:\s*(.+)\s*$/) if ary.find { |x| reg =~ x } $1.strip else nil end end private :field_fetch # Parses information about strand. def parse_strand x = field_fetch('Strand', @d0) if x =~ /^(.+)Reverse +complement\s*$/ then @strand = $1.strip @complement = true else @strand = x @complement = nil end end private :parse_strand # Returns strand information of the hit. # Returns 'plus', 'minus', or nil. # This would be a Bio::Spidey specific method. def strand unless defined?(@strand); parse_strand; end @strand end # Returns true if the result reports 'Reverse complement'. # Otherwise, return false or nil. # This would be a Bio::Spidey specific method. def complement? unless defined?(@complement); parse_strand; end @complement end # Returns number of exons in the hit. def number_of_exons unless defined?(@number_of_exons) @number_of_exons = field_fetch('Number of exons', @d0).to_i end @number_of_exons end # Returns number of splice sites of the hit. def number_of_splice_sites unless defined?(@number_of_splice_sites) @number_of_splice_sites = field_fetch('Number of splice sites', @d0).to_i end @number_of_splice_sites end # Returns overall percent identity of the hit. def percent_identity unless defined?(@percent_identity) x = field_fetch('overall percent identity', @d0) @percent_identity = (/([\d\.]+)\s*\%/ =~ x.to_s) ? $1 : nil end @percent_identity end # Returns missing mRNA ends of the hit. def missing_mrna_ends unless defined?(@missing_mrna_ends) @missing_mrna_ends = field_fetch('Missing mRNA ends', @d0) end @missing_mrna_ends end # Returns sequence informations of the 'Genomic'. # Returns a Bio::Spidey::Report::SeqDesc object. # This would be a Bio::Spidey specific method. def genomic unless defined?(@genomic) @genomic = SeqDesc.parse(@d0.find { |x| /^Genomic\:/ =~ x }) end @genomic end # Returns sequence informations of the mRNA. # Returns a Bio::Spidey::Report::SeqDesc object. # This would be a Bio::Spidey specific method. def mrna unless defined?(@mrna) @mrna = SeqDesc.parse(@d0.find { |x| /^mRNA\:/ =~ x }) end @mrna end # Parses segment pairs. def parse_segmentpairs aln = self.align.dup ex = [] itr = [] segpairs = [] cflag = self.complement? strand = self.strand if strand == 'minus' then d_to = 1; d_from = -1 else d_to = -1; d_from = 1 end @d0.each do |x| #p x if x =~ /^Exon\s*\d+(\(.*\))?\:/ then if a = aln.shift then y = SegmentPair.parse(x, strand, cflag, a[1]) ex << y if a[0][0].to_s.length > 0 then to = y.genomic.from + d_to i0 = SegmentPair.new_intron(nil, to, strand, a[0]) itr << i0 segpairs << i0 end segpairs << y if a[2][0].to_s.length > 0 then from = y.genomic.to + d_from i2 = SegmentPair.new_intron(from, nil, strand, a[2]) itr << i2 segpairs << i2 end else y = SegmentPair.parse(x, strand, cflag, []) ex << y segpairs << y end end end @exons = ex @introns = itr @segmentpairs = segpairs end private :parse_segmentpairs # Returns exons of the hit. # Returns an array of Bio::Spidey::Report::SegmentPair object. def exons unless defined?(@exons); parse_segmentpairs; end @exons end # Returns introns of the hit. # Some of them would contain untranscribed regions. # Returns an array of Bio::Spidey::Report::SegmentPair objects. # (Note that intron data is not always available # according to run-time options of the program.) def introns unless defined?(@introns); parse_segmentpairs; end @introns end # Returns segment pairs (exons and introns) of the hit. # Each segment pair is a Bio::Spidey::Report::SegmentPair object. # Returns an array of Bio::Spidey::Report::SegmentPair objects. # (Note that intron data is not always available # according to run-time options of the program.) def segmentpairs unless defined?(@segmentparis); parse_segmentpairs; end @segmentpairs end # Returns alignments. # Returns an Array of arrays. # This would be a Bio::Spidey specific method. def align unless defined?(@align); parse_align; end @align end # Parses alignment lines. def parse_align_lines(data) misc = [ [], [], [], [] ] data.each do |x| a = x.split(/\r?\n/) if g = a.shift then misc[0] << g (1..3).each do |i| if y = a.shift then if y.length < g.length y << ' ' * (g.length - y.length) end misc[i] << y else misc[i] << ' ' * g.length end end end end misc.collect! { |x| x.join('') } left = [] if /\A +/ =~ misc[2] then len = $&.size left = misc.collect { |x| x[0, len] } misc.each { |x| x[0, len] = '' } end right = [] if / +\z/ =~ misc[2] then len = $&.size right = misc.collect { |x| x[(-len)..-1] } misc.each { |x| x[(-len)..-1] = '' } end body = misc [ left, body, right ] end private :parse_align_lines # Parses alignments. def parse_align r = [] data = @data while !data.empty? a = [] while x = data.shift and !(x =~ /^(Genomic|Exon\s*\d+)\:/) a.push x end r.push parse_align_lines(a) unless a.empty? end @align = r end private :parse_align #-- # Bio::BLAST::*::Report::Hit compatible methods #++ # Length of the mRNA (query) sequence. # Same as Bio::Spidey::Report#query_len. def query_len; mrna.len; end # Identifier of the mRNA (query). # Same as Bio::Spidey::Report#query_id. def query_id; mrna.entry_id; end # Definition of the mRNA (query). # Same as Bio::Spidey::Report#query_def. def query_def; mrna.definition; end # The genomic (target) sequence length. def target_len; genomic.len; end # Identifier of the genomic (target) sequence. def target_id; genomic.entry_id; end # Definition of the genomic (target) sequence. def target_def; genomic.definition; end alias hit_id target_id alias len target_len alias definition target_def alias hsps exons # Iterates over each exon of the hit. # Yields Bio::Spidey::Report::SegmentPair object. def each(&x) #:yields: segmentpair exons.each(&x) end end #class Hit # Returns sequence informationsof the mRNA. # Returns a Bio::Spidey::Report::SeqDesc object. # This would be a Bio::Spidey specific method. def mrna; @hit.mrna; end #-- #Bio::BLAST::*::Report compatible methods #++ # Returns number of hits. # Same as hits.size. def num_hits; @hits.size; end # Iterates over each hits. # Same as hits.each. # Yields a Bio::Spidey::Report::Hit object. def each_hit(&x) #:yields: hit @hits.each(&x) end alias each each_hit # Returns definition of the mRNA (query) sequence. def query_def; @hit.mrna.definition; end # Returns identifier of the mRNA (query) sequence. def query_id; @hit.mrna.entry_id; end # Returns the length of the mRNA (query) sequence. def query_len; @hit.mrna.len; end end #class Report end #class Spidey end #module Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/clustalw/���������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016774� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/clustalw/report.rb������������������������������������������������������0000644�0000041�0000041�00000012007�12200110570�020634� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/clustalw/report.rb - CLUSTAL W format data (*.aln) class # # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp> # Copyright (C) 2010 Pjotr Prins <pjotr.prins@thebird.nl> # # License:: The Ruby License # # $Id: report.rb,v 1.13 2007/07/18 08:47:39 ngoto Exp $ # # Bio::ClustalW::Report is a CLUSTAL W report (*.aln file) parser. # CLUSTAL W is a very popular software for multiple sequence alignment. # # == References # # * Thompson,J.D., Higgins,D.G. and Gibson,T.J.. # CLUSTAL W: improving the sensitivity of progressive multiple sequence # alignment through sequence weighting, position-specific gap penalties # and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994. # http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673 # * http://www.ebi.ac.uk/clustalw/ # * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/ # require 'bio/sequence' require 'bio/db' require 'bio/alignment' require 'bio/appl/clustalw' module Bio class ClustalW # CLUSTAL W result data (*.aln file) parser class. class Report < Bio::DB # Delimiter of each entry. Bio::FlatFile uses it. # In Bio::ClustalW::Report, it it nil (1 entry 1 file). DELIMITER = nil # Creates new instance. # +str+ should be a CLUSTAL format string. # +seqclass+ should on of following: # * Class: Bio::Sequence::AA, Bio::Sequence::NA, ... # * String: 'PROTEIN', 'DNA', ... def initialize(str, seqclass = nil) @raw = str @align = nil @match_line = nil @header = nil case seqclass when /PROTEIN/i @seqclass = Bio::Sequence::AA when /[DR]NA/i @seqclass = Bio::Sequence::NA else if seqclass.is_a?(Module) then @seqclass = seqclass else @seqclass = Bio::Sequence end end end # string of whole result attr_reader :raw # sequence class (one of Bio::Sequence, Bio::Sequence::NA, # Bio::Sequence::AA, ...) attr_reader :seqclass # Shows first line of the result data, for example, # 'CLUSTAL W (1.82) multiple sequence alignment'. # Returns a string. def header @header or (do_parse or @header) end # Returns the Bio::Sequence in the matrix at row 'row' as # Bio::Sequence object. When _row_ is out of range a nil is returned. # --- # *Arguments*: # * (required) _row_: Integer # *Returns*:: Bio::Sequence def get_sequence(row) a = alignment return nil if row < 0 or row >= a.keys.size id = a.keys[row] seq = a.to_hash[id] s = Bio::Sequence.new(seq.seq) s.definition = id s end # Shows "match line" of CLUSTAL's alignment result, for example, # ':* :* .* * .*::*. ** :* . * . '. # Returns a string. def match_line @match_line or (do_parse or @match_line) end # Gets an multiple alignment. # Returns a Bio::Alignment object. def alignment do_parse() unless @align @align end # This will be deprecated. Instead, please use alignment. # # Gets an multiple alignment. # Returns a Bio::Alignment object. def align warn "Bio::ClustalW#align will be deprecated. Please use \'alignment\'." alignment end # This will be deprecated. Instead, please use alignment.output_fasta. # # Gets an fasta-format string of the sequences. # Returns a string. def to_fasta(*arg) warn "Bio::ClustalW::report#to_fasta is deprecated. Please use \'alignment.output_fasta\'" alignment.output_fasta(*arg) end # Compatibility note: Behavior of the method will be changed # in the future. # # Gets an array of the sequences. # Returns an array of Bio::FastaFormat objects. def to_a alignment.to_fastaformat_array end private # Parses Clustal W result text. def do_parse return nil if @align a = @raw.split(/\r?\n\r?\n/) @header = a.shift.to_s xalign = Bio::Alignment.new @match_line = '' if a.size > 0 then a[0].gsub!(/\A(\r?\n)+/, '') a.collect! { |x| x.split(/\r?\n/) } a.each { |x| x.each { |y| y.sub!(/ +\d+\s*$/, '') }} #for -SEQNOS=on option @tagsize = ( a[0][0].rindex(/\s/) or -1 ) + 1 a.each do |x| @match_line << x.pop.to_s[@tagsize..-1] end a[0].each do |y| xalign.store(y[0, @tagsize].sub(/\s+\z/, ''), '') end a.each do |x| x.each do |y| name = y[0, @tagsize].sub(/\s+\z/, '') seq = y[@tagsize..-1] xalign[name] << seq end end xalign.collect! { |x| @seqclass.new(x) } end @align = xalign nil end end #class Report end #class ClustalW end #module Bio �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/iprscan/����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016575� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/iprscan/report.rb�������������������������������������������������������0000644�0000041�0000041�00000027351�12200110570�020445� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/iprscan/report.rb - a class for iprscan output. # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao <mn@kazusa.or.jp> # License:: The Ruby License # # $Id: report.rb,v 1.9 2007/07/18 11:11:57 nakao Exp $ # # == Report classes for the iprscan program. # module Bio class Iprscan # = DESCRIPTION # Class for InterProScan report. It is used to parse results and reformat # results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format. # # See ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html # # == USAGE # # Read a marged.txt and split each entry. # Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| # report.query_id # report.matches.size # report.matches.each do |match| # match.ipr_id #=> 'IPR...' # match.ipr_description # match.method # match.accession # match.description # match.match_start # match.match_end # match.evalue # end # # report.to_gff3 # # report.to_html # end # # Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| # report.class #=> Bio::Iprscan::Report # end # class Report # Entry delimiter pattern. RS = DELIMITER = "\n\/\/\n" # Qeury sequence name (entry_id). attr_accessor :query_id alias :entry_id :query_id # Qeury sequence length. attr_accessor :query_length # CRC64 checksum of query sequence. attr_accessor :crc64 # Matched InterPro motifs in Hash. Each InterPro motif have :name, # :definition, :accession and :motifs keys. And :motifs key contains # motifs in Array. Each motif have :method, :accession, :definition, # :score, :location_from and :location_to keys. attr_accessor :matches # == USAGE # Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report| # report # end # def self.parse_raw(io) entry = '' while line = io.gets if entry != '' and entry.split("\t").first == line.split("\t").first entry << line elsif entry != '' yield Bio::Iprscan::Report.parse_raw_entry(entry) entry = line else entry << line end end yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != '' end # Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report # object. def self.parse_raw_entry(str) report = self.new str.split(/\n/).each do |line| line = line.split("\t") report.matches << Match.new(:query_id => line[0], :crc64 => line[1], :query_length => line[2].to_i, :method => line[3], :accession => line[4], :description => line[5], :match_start => line[6].to_i, :match_end => line[7].to_i, :evalue => line[8], :status => line[9], :date => line[10]) if line[11] report.matches.last.ipr_id = line[11] report.matches.last.ipr_description = line[12] end report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13] end report.query_id = report.matches.first.query_id report.query_length = report.matches.first.query_length report end # Parser method for a xml formated entry. Retruns a Bio::Iprscan::Report # object. # def self.parse_xml(str) # end # Splits the entry stream. # # == Usage # # Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report| # report.class #=> Bio::Iprscan::Report # end # def self.parse_txt(io) io.each("\n\nSequence") do |entry| if entry =~ /Sequence$/ entry = entry.sub(/Sequence$/, '') end unless entry =~ /^Sequence/ entry = 'Sequence' + entry end yield self.parse_txt_entry(entry) end end # Parser method for a txt formated entry. Returns a Bio::Iprscan::Report # object. # def self.parse_txt_entry(str) unless str =~ /^Sequence / raise ArgumentError, "Invalid format: \n\n#{str}" end header, *matches = str.split(/\n\n/) report = self.new report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end ipr_line = '' go_annotation = '' matches.each do |m| m = m.split(/\n/).map {|x| x.split(/ +/) } m.each do |match| case match[0] when 'method' when /(Molecular Function|Cellular Component|Biological Process):/ go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/) when 'InterPro' ipr_line = match else pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */) pos_scores.each do |pos_score| report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => match[0], :accession => match[1], :description => match[2], :evalue => pos_score[3], :status => pos_score[0], :match_start => pos_score[1].to_i, :match_end => pos_score[2].to_i, :go_terms => go_annotation) end end end end return report end # Splits entry stream. # # == Usage # Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report| # report # end def self.parse_ptxt(io) io.each("\n\/\/\n") do |entry| yield self.parse_ptxt_entry(entry) end end # Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report # object. # # == Usage # # File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| # report = Bio::Iprscan::Report.parse_ptxt_entry(e) # end # def self.parse_ptxt_entry(str) report = self.new ipr_line = '' str.split(/\n/).each do |line| line = line.split("\t") if line.size == 2 report.query_id = line[0] report.query_length = line[1].to_i elsif line.first == '//' elsif line.first == 'InterPro' ipr_line = line else startp, endp = line[4].split("-") report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => line[0], :accession => line[1], :description => line[2], :evalue => line[3], :match_start => startp.to_i, :match_end => endp.to_i) end end report end # def initialize @query_id = nil @query_length = nil @crc64 = nil @matches = [] end # Output interpro matches in the format_type. def output(format_type) case format_type when 'raw', :raw format_raw else raise NameError, "Invalid format_type." end end # def format_html # end # def format_xml # end # def format_ebixml # end # def format_txt # end def format_raw @matches.map { |match| [self.query_id, self.crc64, self.query_length, match.method_name, match.accession, match.description, match.match_start, match.match_end, match.evalue, match.status, match.date, match.ipr_id, match.ipr_description, match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ') ].join("\t") }.join("\n") end # def format_gff3 # end # Returns a Hash (key as an Interpro ID and value as a Match). # # report.to_hash.each do |ipr_id, matches| # matches.each do |match| # report.matches.ipr_id == ipr_id #=> true # end # end # def to_hash unless @ipr_ids @ipr_ids = {} @matches.each_with_index do |match, i| @ipr_ids[match.ipr_id] ||= [] @ipr_ids[match.ipr_id] << match end return @ipr_ids else return @ipr_ids end end # == Description # Container class for InterProScan matches. # # == Usage # match = Match.new(:query_id => ...) # # match.ipr_id = 'IPR001234' # match.ipr_id #=> 'IPR001234' # class Match def initialize(hash) @data = Hash.new hash.each do |key, value| @data[key.to_sym] = value end end # Date for computation. def date; @data[:date]; end # CRC64 checksum of query sequence. def crc64; @data[:crc64]; end # E-value of the match def evalue; @data[:evalue]; end # Status of the match (T for true / M for marginal). def status; @data[:status]; end # the corresponding InterPro entry (if any). def ipr_id; @data[:ipr_id]; end # the length of the sequence in AA. def length; @data[:length]; end # the analysis method launched. def method_name; @data[:method]; end # the Gene Ontology description for the InterPro entry, in "Aspect :term (ID)" format. def go_terms; @data[:go_terms]; end # Id of the input sequence. def query_id; @data[:query_id]; end # the end of the domain match. def match_end; @data[:match_end]; end # the database members entry for this match. def accession; @data[:accession]; end # the database mambers description for this match. def description; @data[:description]; end # the start of the domain match. def match_start; @data[:match_start]; end # the descriotion of the InterPro entry. def ipr_odescription; @data[:ipr_description]; end def method_missing(name, arg = nil) if arg name = name.to_s.sub(/=$/, '') @data[name.to_sym] = arg else @data[name.to_sym] end end end # class Match end # class Report end # class Iprscan end # module Bio ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/genscan/����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016554� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/genscan/report.rb�������������������������������������������������������0000644�0000041�0000041�00000020633�12200110570�020420� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/genscan/report.rb - Genscan report classes # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao <n@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Description # # # == Example # # == References # require 'bio/db/fasta' module Bio # = Bio::Genscan class Genscan # = Bio::Genscan::Report - Class for Genscan report output. # # Parser for the Genscan report output. # * Genscan http://genes.mit.edu/GENSCAN.html class Report # Returns Genscan version. attr_reader :genscan_version # Returns attr_reader :date_run # Returns attr_reader :time # Returns Name of query sequence. attr_reader :query_name alias_method :sequence_name, :query_name alias_method :name, :query_name # Returns Length of the query sequence. attr_reader :length # Returns C+G content of the query sequence. attr_reader :gccontent # Returns attr_reader :isochore # Returns attr_reader :matrix # Returns Array of Bio::Genscan::Report::Gene. attr_reader :predictions alias_method :prediction, :predictions alias_method :genes, :predictions # Bio::Genscan::Report.new(str) # # Parse a Genscan report output string. def initialize(report) @predictions = [] @genscan_version = nil @date_run = nil @time = nil @query_name = nil @length = nil @gccontent = nil @isochore = nil @matrix = nil report.each_line("\n") do |line| case line when /^GENSCAN/ parse_headline(line) when /^Sequence/ parse_sequence(line) when /^Parameter/ parse_parameter(line) when /^Predicted genes/ break end end # rests i = report.index(/^Predicted gene/) j = report.index(/^Predicted peptide sequence/) # genes/exons genes_region = report[i...j] genes_region.each_line("\n") do |line| if /Init|Intr|Term|PlyA|Prom|Sngl/ =~ line gn, en = line.strip.split(" +")[0].split(/\./).map {|i| i.to_i } add_exon(gn, en, line) end end # sequences (peptide|CDS) sequence_region = report[j...report.size] sequence_region.gsub!(/^Predicted .+?:/, '') sequence_region.gsub!(/^\s*$/, '') sequence_region.split(Bio::FastaFormat::RS).each do |ff| add_seq(Bio::FastaFormat.new(ff)) end end # Bio::Genscan::Report#parse_headline def parse_headline(line) tmp = line.chomp.split(/\t/) @genscan_version = tmp[0].split(' ')[1] @date_run = tmp[1].split(': ')[1] @time = tmp[2].split(': ')[1] end private :parse_headline # Bio::Genscan::Report#parse_sequence def parse_sequence(line) if /^Sequence (\S+) : (\d+) bp : (\d+[\.\d]+)% C\+G : Isochore (\d+.+?)$/ =~ line @query_name = $1 @length = $2.to_i @gccontent = $3.to_f @isochore = $4 else raise "Error: [#{line.inspect}]" end end private :parse_sequence # Bio::Genscan::Report#parse_parameter def parse_parameter(line) if /^Parameter matrix: (\w.+)$/ =~ line.chomp @matrix = $1 else raise "Error: [#{line}]" end end private :parse_parameter # Bio::Genscan::Report#add_gene def add_gene(gn) @predictions[gn - 1] = Gene.new(gn) end private :add_gene # Bio::Genscan::Report#add_exon def add_exon(gn, en, line) exon = Exon.parser(line) case line when /Prom/ begin @predictions[gn - 1].set_promoter(exon) rescue NameError add_gene(gn) @predictions[gn - 1].set_promoter(exon) end when /PlyA/ @predictions[gn - 1].set_polyA(exon) else begin @predictions[gn - 1].exons[en - 1] = exon rescue NameError add_gene(gn) @predictions[gn - 1].exons[en - 1] = exon end end end private :add_exon # Bio::Genscan::Report#add_seq def add_seq(seq) if /peptide_(\d+)/ =~ seq.definition gn = $1.to_i @predictions[gn - 1].set_aaseq(seq) elsif /CDS_(\d+)/ =~ seq.definition gn = $1.to_i @predictions[gn - 1].set_naseq(seq) end end private :add_seq # = Container class of predicted gene structures. class Gene # Bio::Genescan::Report::Gene.new(gene_number) def initialize(gn) @number = gn.to_i @aaseq = Bio::FastaFormat.new("") @naseq = Bio::FastaFormat.new("") @promoter = nil @exons = [] @polyA = nil end # Returns "Gn", gene number field. attr_reader :number # Returns Bio::FastaFormat object. attr_reader :aaseq # Returns Bio::FastaFormat object. attr_reader :naseq # Returns Array of Bio::Genscan::Report::Exon. attr_reader :exons # Returns Bio::Genscan::Report::Exon object. attr_reader :promoter # Returns Bio::Genscan::Report::Exon object. attr_reader :polyA # Bio::Genescan::Report::Gene#seq_aaseq def set_aaseq(seq) @aaseq = seq end # Bio::Genescan::Report::Gene#seq_naseq def set_naseq(seq) @naseq = seq end # Bio::Genescan::Report::Gene#seq_promoter def set_promoter(segment) @promoter = segment end # Bio::Genescan::Report::Gene#seq_polyA def set_polyA(segment) @polyA = segment end end # class Gene # = Container class of a predicted gene structure. class Exon # TYPES = { 'Init' => 'Initial exon', 'Intr' => 'Internal exon', 'Term' => 'Terminal exon', 'Sngl' => 'Single-exon gene', 'Prom' => 'Promoter', 'PlyA' => 'poly-A signal' } # Bio::Genescan::Report::Exon.parser def self.parser(line) e = line.strip.split(/ +/) case line when /PlyA/, /Prom/ e[12] = e[6].clone e[11] = 0 [6,7,8,9,10].each {|i| e[i] = nil } end self.new(e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10], e[11], e[12]) end # Returns attr_reader :gene_number # Returns "Ex", exon number field attr_reader :number # Returns "Type" field. attr_reader :exon_type # Returns "S" field. attr_reader :strand # Returns Returns first position of the region. "Begin" field. attr_reader :first # Returns Returns last position of the region. "End" field. attr_reader :last # Returns "Fr" field. attr_reader :frame # Returns "Ph" field. attr_reader :phase # Returns "CodRg" field. attr_reader :score # Returns "P" field. attr_reader :p_value # Returns "Tscr" field. attr_reader :t_score alias_method :coding_region_score, :score # Bio::Genescan::Report::Exon.new(gene_number, exon_type, strand, first, # end, length, frame, phase, acceptor_score, donor_score, score, p_value, # t_score) def initialize(gnex, t, s, b, e, len, fr, ph, iac, dot, cr, prob, ts) @gene_number, @number = gnex.split(".").map {|n| n.to_i } @exon_type = t @strand = s @first = b.to_i @last = e.to_i @length = len.to_i @frame = fr @phase = ph @i_ac = iac.to_i @do_t = dot.to_i @score = cr.to_i @p_value = prob.to_f @t_score = ts.to_f end # Bio::Genescan::Report::Exon#exon_type_long # # Returns a human-readable "Type" of exon. def exon_type_long TYPES[exon_type] end # Bio::Genescan::Report::Exon#range # # Returns Range object of the region. def range Range.new(@first, @last) end # Bio::Genescan::Report::Exon#acceptor_score # # "I/Ac" field. def acceptor_score @i_ac end alias_method :initiation_score, :acceptor_score # Bio::Genescan::Report::Exon#donor_score # # "Do/T" field. def donor_score @do_t end alias_method :termination_score, :donor_score end # class Exon end # class Report end # class Genscan end # module Bio �����������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/psort/������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016305� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/psort/report.rb���������������������������������������������������������0000644�0000041�0000041�00000030654�12200110570�020155� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/psort/report.rb - PSORT systems report classes # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao <n@bioruby.org> # License:: The Ruby License # # $Id:$ # # == A Report classes for PSORT Systems # require 'bio/appl/psort' module Bio autoload :Sequence, 'bio/sequence' class PSORT class PSORT1 # = Bio::PSORT::PSORT1::Report # Parser class for PSORT1 output report. # # == Example class Report # Returns aBio::PSORT::PSORT1::Report. def self.parser(output_report) self.default_parser(output_report) end # Returns aBio::PSORT::PSORT1::Report. def self.default_parser(output_report) rpt = self.new rpt.raw = output_report query_info = output_report.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/) result_info = output_report.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0] step1 = output_report.scan(/^\*\*\* Reasoning Step: 1\n\n(.+?)\n\n/m)[0][0] step2 = output_report.scan(/^\*\*\* Reasoning Step: 2\n\n(.+?)\n\n/m)[0][0] final_result = output_report.scan(/\n\n----- Final Results -----\n\n(.+?)\n\n\n/m)[0][0] rpt.entry_id = query_info[2].scan(/^>(\S+) */).to_s rpt.origin = query_info[0].scan(/ORIGIN (\w+)/).to_s rpt.sequence = Bio::Sequence::AA.new(query_info[3..query_info.size].to_s) # rpt.reasoning rpt.final_result = final_result.split(/\n/).map {|x| x = x.strip.split(/---/).map {|y| y.strip } { 'prediction' => x[0], 'certainty' => x[1].scan(/Certainty= (\d\.\d{3})/).to_s, 'comment' => x[1].scan(/\((\w+)\)/).to_s } } return rpt end attr_accessor :entry_id attr_accessor :origin attr_accessor :title attr_accessor :sequence attr_accessor :result_info attr_accessor :reasoning attr_accessor :final_result attr_accessor :raw # Constructs aBio::PSORT::PSORT1::Report object. def initialize(entry_id = '', origin = '', title = '', sequence = '', result_info = '', reasoning = {}, final_result = []) @entry_id = entry_id @origin = origin @title = title @sequence = sequence @result_info = result_info @reasoning = reasoning @final_result = final_result @raw = '' end end # class Report end # class PSORT1 class PSORT2 # Subcellular localization name codes used by PSORT2 SclNames = { 'csk' => 'cytoskeletal', 'cyt' => 'cytoplasmic', 'nuc' => 'nuclear', 'mit' => 'mitochondrial', 'ves' => 'vesicles of secretory system', 'end' => 'endoplasmic reticulum', 'gol' => 'Golgi', 'vac' => 'vacuolar', 'pla' => 'plasma membrane', 'pox' => 'peroxisomal', 'exc' => 'extracellular, including cell wall', '---' => 'other' } # Feature name codes Features = [ 'psg', # PSG: PSG score 'gvh', # GvH: GvH score 'alm', # ALOM: $xmax 'tms', # ALOM: $count 'top', # MTOP: Charge difference: $mtopscr 'mit', # MITDISC: Score: $score 'mip', # Gavel: motif at $isite 'nuc', # NUCDISC: NLS Score: $score 'erl', # KDEL: ($seg|none) 'erm', # ER Membrane Retention Signals: ($cseg|none) $scr 'pox', # SKL: ($pat|none) $scr 'px2', # PTS2: (found|none) ($#match < 0) ? 0 : ($#match+1); 'vac', # VAC: (found|none) ($#match < 0) ? 0 : ($#match+1); 'rnp', # RNA-binding motif: (found|none) ($#match < 0) ? 0 : ($#match+1); 'act', # Actinin-type actin-binding motif: (found|none) $hit 'caa', # Prenylation motif: (2|1|0) CaaX,CXC,CC,nil 'yqr', # memYQRL: (found|none) $scr 'tyr', # Tyrosines in the tail: (none|\S+[,]) # 10 * scalar(@ylist) / ($end - $start + 1); 'leu', # Dileucine motif in the tail: (none|found) $scr 'gpi', # >>> Seem to be GPI anchored 'myr', # NMYR: (none|\w) $scr 'dna', # checking 63 PROSITE DNA binding motifs: $hit 'rib', # checking 71 PROSITE ribosomal protein motifs: $hit 'bac', # checking 33 PROSITE prokaryotic DNA binding motifs: $hit 'm1a', # $mtype eq '1a' 'm1b', # $mtype eq '1b' 'm2', # $mtype eq '2 ' 'mNt', # $mtype eq 'Nt' 'm3a', # $mtype eq '3a' 'm3b', # $mtype eq '3b' 'm_', # $mtype eq '__' tms == 0 'ncn', # NNCN: ($NetOutput[1] > $NetOutput[0]) ? $output : (-$output); 'lps', # COIL: $count 'len' # $leng ] # Feature name codes (long version). FeaturesLong = { 'psg' => 'PSG', 'gvh' => 'GvH', 'tms' => 'ALOM', 'alm' => 'ALOM', 'top' => 'MTOP', 'mit' => 'MITDISC', 'mip' => 'Gavel', 'nuc' => 'NUCDISC', 'erl' => 'KDEL', 'erm' => 'ER Membrane Retention Signals', 'pox' => 'SKL', 'px2' => 'PTS2', 'vac' => 'VAC', 'rnp' => 'RNA-binding motif', 'act' => 'Actinin-type actin-binding motif', 'caa' => 'Prenylation motif', 'yqr' => 'memYQRL', 'tyr' => 'Tyrosines in the tail', 'leu' => 'Dileucine motif in the tail', 'gpi' => '>>> Seems to be GPI anchored', 'myr' => 'NMYR', 'dna' => 'checking 63 PROSITE DNA binding motifs', 'rib' => 'checking 71 PROSITE ribosomal protein motifs', 'bac' => 'ochecking 33 PROSITE prokaryotic DNA binding motifs:', 'm1a' => '', 'm1b' => '', 'm2' => '', 'mNt' => '', 'm3a' => '', 'm3b' => '', 'm_' => '', 'ncn' => 'NNCN', 'lps' => 'COIL', 'len' => 'AA' # length of input sequence } # = Bio::PSORT::PSORT2::Report # Report parser classe for PSORT II(PSORT2). # == Example class Report # Report boundary string. BOUNDARY = '-' * 75 # Report delimiter. RS = DELIMITER = "\)\n\n#{BOUNDARY}" # entry_id of query sequence. attr_accessor :entry_id # Given subcellular localization (three letters code). attr_accessor :scl # Definition of query sequence. attr_accessor :definition # Sequence of query sequence. attr_accessor :seq # k parameter of k-nearest neighbors classifier. attr_accessor :k # Feature vector used the kNN prediction. attr_accessor :features # Probability vector of kNN prediction. attr_accessor :prob # Predicted subcellular localization (three letters code). attr_accessor :pred # Raw text of output report. attr_accessor :raw # Constructs aBio::PSORT::PSORT2::Report object. def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) @entry_id = entry_id @scl = scl @definition = definition @seq = seq @features = features @prob = prob @pred = pred @k = k @raw = raw end # Parses output report with output format detection automatically. def self.parser(str, entry_id) case str when /^ psg:/ # default report self.default_parser(str, entry_id) when /^PSG:/ # -v report self.v_parser(str, entry_id) when /: too short length / self.too_short_parser(str, entry_id) when /PSORT II server/ tmp = self.new(ent, entry_id) else raise ArgumentError, "invalid format\n[#{str}]" end end # Parser for ``too short length'' report. # # $id: too short length ($leng), skipped\n"; def self.too_short_parser(ent, entry_id = nil) report = self.new(ent) report.entry_id = entry_id if ent =~ /^(.+)?: too short length/ report.entry_id = $1 unless report.entry_id report.scl = '---' end report end # Parser for the default report format. # ``psort report'' output. def self.default_parser(ent, entry_id = nil) report = self.new(ent, entry_id) ent = ent.split(/\n\n/).map {|e| e.chomp } report.set_header_line(ent[0]) # feature matrix ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe| pair = fe.split(/: /) report.features[pair[0].strip] = pair[1].strip.to_f } report.prob = self.set_kNN_prob(ent[2]) report.set_prediction(ent[3]) return report end # Returns header information. def set_header_line(str) str.sub!(/^-+\n/,'') tmp = str.split(/\t| /) @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id case tmp.join(' ').chomp when /\(\d+ aa\) (.+)$/ @definition = $1 else @definition = tmp.join(' ').chomp end scl = @definition.split(' ')[0] @scl = scl if SclNames.keys.index(scl) end # Returns @prob value. def self.set_kNN_prob(str) prob = Hash.new Bio::PSORT::PSORT2::SclNames.keys.each {|a| prob.update( {a => 0.0} ) } str.gsub(/\t/,'').split(/\n/).each {|a| val,scl = a.strip.split(/ %: /) key = Bio::PSORT::PSORT2::SclNames.index(scl) prob[key] = val.to_f } return prob end # Returns @prob and @k values. def set_prediction(str) case str when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/ @entry_id ||= $1 unless @entry_id @pred = $2 @k = $3 else raise ArgumentError, "Invalid format at(#{self.entry_id}):\n[#{str}]\n" end end # Parser for the verbose output report format. # ``psort -v report'' and WWW server output. def self.v_parser(ent, entry_id = nil) report = Bio::PSORT::PSORT2::Report.new(ent, entry_id) ent = ent.split(/\n\n/).map {|e| e.chomp } ent.each_with_index {|e, i| unless /^(\w|-|\>|\t)/ =~ e j = self.__send__(:search_j, i, ent) ent[i - j] += e ent[i] = nil end if /^none/ =~ e # psort output bug j = self.__send__(:search_j, i, ent) ent[i - j] += e ent[i] = nil end } ent.compact! if /^ PSORT II server/ =~ ent[0] # for WWW version ent.shift delline = '' ent.each {|e| delline = e if /^Results of Subprograms/ =~ e } i = ent.index(delline) ent.delete(delline) ent.delete_at(i - 1) end report.set_header_line(ent.shift) report.seq = Bio::Sequence::AA.new(ent.shift) fent, pent = self.divent(ent) report.set_features(fent) report.prob = self.set_kNN_prob(pent[0].strip) report.set_prediction(pent[1].strip) return report end # def self.search_j(i, ent) j = 1 1.upto(ent.size) {|x| if ent[i - x] j = x break end } return j end private_class_method :search_j # Divides entry body def self.divent(entry) boundary = entry.index(BOUNDARY) return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)] end # Sets @features values. def set_features(features_ary) features_ary.each {|fent| key = fent.split(/\:( |\n)/)[0].strip self.features[key] = fent # unless /^\>/ =~ key } self.features['AA'] = self.seq.length end end # class Report end # class PSORT2 end # class PSORT end # module Bio ������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/targetp/����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016604� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/targetp/report.rb�������������������������������������������������������0000644�0000041�0000041�00000007562�12200110570�020456� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/targetp/report.rb - TargetP report class # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao <n@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Description # # TargetP class for http://www.cbs.dtu.dk/services/TargetP/ # # == Example # == References # module Bio class TargetP # = A parser and container class for TargetP report. class Report # Delimiter DELIMITER = "\n \n" # Delimiter RS = DELIMITER # Returns the program version. attr_reader :version # Returns the query sequences. attr_reader :query_sequences # Returns 'included' or 'not included'. # If the value is 'included', Bio::TargetP::Report#prediction['TPlen'] # contains a valid value. attr_reader :cleavage_site_prediction # Returns ``PLANT'' or ``NON-PLANT'' networks. attr_reader :networks # Returns a Hash of the prediction results. # # {"Name"=>"MGI_2141503", "Loc."=>"_", "RC"=>3, "SP"=>0.271, # "other"=>0.844, "mTP"=>0.161, "cTP"=>0.031, "Length"=>640} # # Keys: Name, Len, SP, mTP, other, Loc, RC # Optional key for PLANT networks: cTP # Optional key in Cleavage site: TPlen # # Use 'Length' and 'Loc.' instead of 'Len' and 'Loc' respectively # for the version 1.0 report. attr_reader :prediction # Returns a Hash of cutoff values. attr_reader :cutoff # Sets output report. def initialize(str) @version = nil @query_sequences = nil @cleavage_site_prediction = nil @networks = nil @prediction = {} @cutoff = {} parse_entry(str) end alias pred prediction # Returns the name of query sequence. def name @prediction['Name'] end alias entry_id name # Returns length of query sequence. def query_len if @prediction['Len'] @prediction['Len'] else @prediction['Length'] end end alias length query_len # Returns the predicted localization signal: # 1. S (Signal peptide) # 2. M (mTP) # 3. C (cTP) # 4. * # 5. _ def loc if @prediction['Loc'] @prediction['Loc'] # version 1.0 else @prediction['Loc.'] # version 1.1 end end # Returns RC. def rc @prediction['RC'] end private # def parse_entry(str) labels = [] cutoff = [] values = [] str.split("\n").each {|line| case line when /targetp v(\d+.\d+)/,/T A R G E T P\s+(\d+.\d+)/ @version = $1 when /Number of (query|input) sequences:\s+(\d+)/ @query_sequences = $1.to_i when /Cleavage site predictions (\w.+)\./ @cleavage_site_prediction = $1 when /Using (\w+.+) networks/ @networks = $1 when /Name +Len/ labels = line.sub(/^\#\s*/,'').split(/\s+/) when /cutoff/ cutoff = line.split(/\s+/) cutoff.shift labels[2, 4].each_with_index {|loc, i| next if loc =~ /Loc/ @cutoff[loc] = cutoff[i].to_f } when /-----$/ when /^ +$/, '' else values = line.sub(/^\s*/,'').split(/\s+/) values.each_with_index {|val, i| label = labels[i] case label when 'RC', /Len/ val = val.to_i when 'SP','mTP','cTP','other' val = val.to_f end @prediction[label] = val } end } end end # class Report end # class TargetP end # moudel Bio ����������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/tcoffee.rb��������������������������������������������������������������0000644�0000041�0000041�00000002734�12200110570�017104� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/tcoffee.rb - T-Coffee application wrapper class # # Copyright:: Copyright (C) 2006-2007 # Jeffrey Blakeslee and John Conery University of Oregon <jeffb@uoregon.edu> # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id: tcoffee.rb,v 1.1 2007/07/16 12:25:50 ngoto Exp $ # # Bio::Tcoffee is a wrapper class to execute T-Coffee. # # == References # # * http://www.tcoffee.org/Projects_home_page/t_coffee_home_page.html # * Notredame, C., Higgins, D.G. and Heringa, J. # T-Coffee: A novel method for fast and accurate multiple sequence # alignment. J. Mol. Biol. 302: 205-217, 2000. # module Bio # Bio::Tcoffee is a wrapper class to execute t-coffee. # # Please refer documents in bio/apple/tcoffee.rb for references. class Tcoffee < Bio::Alignment::FactoryTemplate::FileInFileOutWithTree # default program name DEFAULT_PROGRAM = 't_coffee'.freeze # default report parser DEFAULT_PARSER = Bio::ClustalW::Report private # generates options specifying input filename. # returns an array of string def _option_input_file(fn) [ '-infile', fn ] end # generates options specifying output filename. # returns an array of string def _option_output_file(fn) [ '-outfile', fn ] end # generates options specifying output filename. # returns an array of string def _option_output_dndfile(fn) [ '-newtree', fn ] end end #class TCoffee end #module Bio ������������������������������������bio-1.4.3.0001/lib/bio/appl/muscle.rb���������������������������������������������������������������0000644�0000041�0000041�00000002576�12200110570�016765� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/muscle.rb - MUSCLE application wrapper class # # Copyright:: Copyright (C) 2006-2007 # Jeffrey Blakeslee and John Conery University of Oregon <jeffb@uoregon.edu> # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id: muscle.rb,v 1.1 2007/07/16 12:25:50 ngoto Exp $ # # # Bio::Muscle is a wrapper class to execute MUSCLE. # # == References # # * http://www.drive5.com/muscle/ # * Edgar R.C. # MUSCLE: multiple sequence alignment with high accuracy and # high throughput. Nucleic Acids Res. 32: 1792-1797, 2004. # * Edgar, R.C. # MUSCLE: a multiple sequence alignment method with reduced time # and space complexity. BMC Bioinformatics 5: 113, 2004. # module Bio # Bio::Muscle is a wrapper class to execute MUSCLE. # # Please refer documents in bio/apple/muscle.rb for references. class Muscle < Bio::Alignment::FactoryTemplate::StdinInFileOut # default program name DEFAULT_PROGRAM = 'muscle'.freeze # default report parser DEFAULT_PARSER = Bio::Alignment::MultiFastaFormat private # generates options specifying input filename. # returns an array of string def _option_input_file(fn) [ '-in', fn ] end # generates options specifying output filename. # returns an array of string def _option_output_file(fn) [ '-out', fn ] end end #class Muscle end #module Bio ����������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/tmhmm/������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016260� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/tmhmm/report.rb���������������������������������������������������������0000644�0000041�0000041�00000010462�12200110570�020123� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/tmhmm/report.rb - TMHMM report class # # Copyright:: Copyright (C) 2003 # Mitsuteru C. Nakao <n@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Description # # # == Example # == References # require 'enumerator' module Bio # = TMHMM class for http://www.cbs.dtu.dk/services/TMHMM/ class TMHMM # Splits multiple reports into a report entry. def TMHMM.reports(data) entry = [] ent_state = '' data.each_line do |line| if /^\#/ =~ line if ent_state == 'next' ent_state = 'entry' elsif ent_state == 'tmh' ent_state = 'next' end else ent_state = 'tmh' end if ent_state != 'next' entry << line else if block_given? yield Bio::TMHMM::Report.new(entry) else Bio::TMHMM::Report.new(entry) end entry = [line] end end if block_given? yield Bio::TMHMM::Report.new(entry) else Bio::TMHMM::Report.new(entry) end end # = TMHMM report parser class. class Report # Returns an Array of Bio::TMHMM::TMH. attr_reader :tmhs # Returns attr_reader :entry_id # Returns attr_reader :query_len # Returns attr_reader :predicted_tmhs # Returns attr_reader :exp_aas_in_tmhs # Returns attr_reader :exp_first_60aa # Returns attr_reader :total_prob_of_N_in alias length query_len # def initialize(entry = nil) begin str = entry.to_str rescue NoMethodError end if str then entry = str.enum_for(:each_line) end parse_header(entry) @tmhs = parse_tmhs(entry) end # Returns an Array of Bio::TMHMM::TMH including only "TMhelix". def helix @tmhs.map {|t| t if t.status == 'TMhelix' }.compact end # def to_s [ [ ["Length:", @query_len], ["Number of predicted TMHs:", @predicted_tmhs], ["Exp number of AAs in THMs:", @exp_aas_in_tmhs], ["Exp number, first 60 AAs:", @exp_first_60aa], ["Total prob of N-in:", @total_prob_of_N_in] ].map {|e| "\# " + [@entry_id, e].flatten.join("\t") }, tmhs.map {|ent| ent.to_s } ].flatten.join("\n") end private # def parse_header(raw) raw.each do |line| next unless /^#/.match(line) case line when / (\S.+) Length: +(\d+)/ @entry_id = $1.strip @query_len = $2.to_i when /Number of predicted TMHs: +(\d+)/ @predicted_tmhs = $1.to_i when /Exp number of AAs in TMHs: +([\d\.]+)/ @exp_aas_in_tmhs = $1.to_f when /Exp number, first 60 AAs: +([\d\.]+)/ @exp_first_60aa = $1.to_f when /Total prob of N-in: +([\d\.]+)/ @total_prob_of_N_in = $1.to_f end end end # def parse_tmhs(raw) tmhs = [] raw.each do |line| case line when /^[^\#]/ eid,version,status,r0,r1 = line.split(/\s+/) tmhs << Bio::TMHMM::TMH.new(eid.strip, version.strip, status.strip, Range.new(r0.to_i, r1.to_i)) end end tmhs end end # class Report # = Container class of the trainsmembrane helix(TMH) and the other # segments. class TMH # Returns attr_accessor :entry_id # Returns attr_accessor :version # Returns the status of the TMH. ("outside", "TMhelix" or "inside"). attr_accessor :status # Returns an Range of TMH position. attr_accessor :range alias pos range # def initialize(entry_id = nil, version = nil, status = nil, range = nil) @entry_id = entry_id @version = version @status = status @range = range end # def to_s [@entry_id, @version, @status, @range.first, @range.last].join("\t") end end # class TMH end # class TMHMM end # module Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/sim4.rb�����������������������������������������������������������������0000644�0000041�0000041�00000006457�12200110570�016353� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/sim4.rb - sim4 wrapper class # # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org> # License:: The Ruby License # # $Id: sim4.rb,v 1.10 2007/04/05 23:35:39 trevor Exp $ # # The sim4 execution wrapper class. # # == References # # * Florea, L., et al., A Computer program for aligning a cDNA sequence # with a genomic DNA sequence, Genome Research, 8, 967--974, 1998. # http://www.genome.org/cgi/content/abstract/8/9/967 # require 'tempfile' require 'bio/command' module Bio # The sim4 execution wrapper class. class Sim4 autoload :Report, 'bio/appl/sim4/report' # Creates a new sim4 execution wrapper object. # [+program+] Program name. Usually 'sim4' in UNIX. # [+database+] Default file name of database('seq2'). # [+option+] Options (array of strings). def initialize(program = 'sim4', database = nil, opt = []) @program = program @options = opt @database = database #seq2 @command = nil @output = nil @report = nil end # default file name of database('seq2') attr_accessor :database # name of the program (usually 'sim4' in UNIX) attr_reader :program # options attr_accessor :options # option is deprecated. Instead, please use options. def option warn "option is deprecated. Please use options." options end # last command-line strings executed by the object attr_reader :command #--- # last messages of program reported to the STDERR #attr_reader :log #+++ #log is deprecated (no replacement) and returns empty string. def log warn "log is deprecated (no replacement) and returns empty string." '' end # last result text (String) attr_reader :output # last result. Returns a Bio::Sim4::Report object. attr_reader :report # Executes the sim4 program. # <tt>seq1</tt> shall be a Bio::Sequence object. # Returns a Bio::Sim4::Report object. def query(seq1) tf = Tempfile.open('sim4') tf.print seq1.to_fasta('seq1', 70) tf.close(false) r = exec_local(tf.path) tf.close(true) r end # Executes the sim4 program. # Perform mRNA-genome alignment between given sequences. # <tt>seq1</tt> and <tt>seq2</tt> should be Bio::Sequence objects. # Returns a Bio::Sim4::Report object. def query_pairwise(seq1, seq2) tf = Tempfile.open('sim4') tf.print seq1.to_fasta('seq1', 70) tf.close(false) tf2 = Tempfile.open('seq2') tf2.print seq1.to_fasta('seq2', 70) tf2.close(false) r = exec_local(tf.path, tf2.path) tf.close(true) tf2.close(true) r end # Executes the sim4 program. # Perform mRNA-genome alignment between sequences in given files. # <tt>filename1</tt> and <tt>filename2</tt> should be file name strings. # If <tt>filename2</tt> is not specified, using <tt>self.database</tt>. def exec_local(filename1, filename2 = nil) @command = [ @program, filename1, (filename2 or @database), *@options ] @output = nil @report = nil Bio::Command.call_command(@command) do |io| io.close_write @output = io.read @report = Bio::Sim4::Report.new(@output) end @report end alias exec exec_local end #class Sim4 end #module Bio �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/clustalw.rb�������������������������������������������������������������0000644�0000041�0000041�00000013753�12200110570�017332� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/clustalw.rb - CLUSTAL W wrapper class # # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp> # License:: The Ruby License # # $Id: clustalw.rb,v 1.19 2007/07/16 12:27:29 ngoto Exp $ # # Bio::ClustalW is a CLUSTAL W execution wrapper class. # It can also be called as an alignment factory. # CLUSTAL W is a very popular software for multiple sequence alignment. # # == References # # * Thompson,J.D., Higgins,D.G. and Gibson,T.J.. # CLUSTAL W: improving the sensitivity of progressive multiple sequence # alignment through sequence weighting, position-specific gap penalties # and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994. # http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673 # * http://www.ebi.ac.uk/clustalw/ # * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/ # require 'tempfile' require 'bio/command' require 'bio/sequence' require 'bio/alignment' module Bio # Bio::ClustalW is a CLUSTAL W execution wrapper class. # Its object is also called an alignment factory. # CLUSTAL W is a very popular software for multiple sequence alignment. class ClustalW autoload :Report, 'bio/appl/clustalw/report' # Creates a new CLUSTAL W execution wrapper object (alignment factory). def initialize(program = 'clustalw', opt = []) @program = program @options = opt @command = nil @output = nil @report = nil @data_stdout = nil @exit_status = nil @output_dnd = nil end # name of the program (usually 'clustalw' in UNIX) attr_accessor :program # options attr_accessor :options # option is deprecated. Instead, please use options. def option warn "Bio::ClustalW#option is deprecated. Please use options." options end # Returns last command-line strings executed by this factory. # Note that filenames described in the command-line may already # be removed because they are temporary files. # Returns an array. attr_reader :command # This method will be deprecated. # # Returns last messages of CLUSTAL W execution. def log #warn 'Bio::ClustalW#log will be deprecated.' @data_stdout end # Returns last raw alignment result (String or nil). attr_reader :output # Returns last alignment result. # Returns a Bio::ClustalW::Report object. attr_reader :report # Last exit status attr_reader :exit_status # Last output to the stdout. attr_accessor :data_stdout # Clear the internal data and status, except program and options. def reset @command = nil @output = nil @report = nil @exit_status = nil @data_stdout = nil @output_dnd = nil end # Executes the program(clustalw). # If +seqs+ is not nil, perform alignment for seqs. # If +seqs+ is nil, simply executes CLUSTAL W. # # Compatibility note: When seqs is nil, # returns true if the program exits normally, and # returns false if the program exits abnormally. def query(seqs) if seqs then query_align(seqs) else exec_local(@options) @exit_status.exitstatus == 0 ? true : false end end # Note that this method will be renamed to query_alignment. # # Performs alignment for +seqs+. # +seqs+ should be Bio::Alignment or Array of sequences or nil. # # Compatibility Note: Nucleic or amino is not determined by this method. def query_align(seqs) unless seqs.is_a?(Bio::Alignment) seqs = Bio::Alignment.new(seqs) end query_string(seqs.output_fasta(:width => 70, :avoid_same_name => true)) end # Performs alignment for +seqs+. # +seqs+ should be Bio::Alignment or Array of sequences or nil. def query_alignment(seqs) query_align(seqs) end # Performs alignment for +str+. # +str+ should be a string that can be recognized by CLUSTAL W. # # Compatibility Note: 2nd argument is deprecated and ignored. def query_string(str, *arg) if arg.size > 0 then warn '2nd argument of Bio::ClustalW#query_string is ignored' end begin tf_in = Tempfile.open('align') tf_in.print str ensure tf_in.close(false) end r = query_by_filename(tf_in.path) tf_in.close(true) r end # Performs alignment of sequences in the file named +path+. # # Compatibility Note: 2nd argument (seqtype) is deprecated and ignored. def query_by_filename(path, *arg) if arg.size > 0 then warn '2nd argument of Bio::ClustalW#query_by_filename is ignored' end tf_out = Tempfile.open('clustalout') tf_out.close(false) tf_dnd = Tempfile.open('clustaldnd') tf_dnd.close(false) opt = [ "-align", "-infile=#{path}", "-outfile=#{tf_out.path}", "-newtree=#{tf_dnd.path}", "-outorder=input" ] #opt << "-type=#{seqtype}" if seqtype opt.concat(@options) exec_local(opt) tf_out.open @output = tf_out.read tf_out.close(true) tf_dnd.open @output_dnd = tf_dnd.read tf_dnd.close(true) @report = Report.new(@output) @report end # Returns last alignment guild-tree (file.dnd). attr_reader :output_dnd #--- # Returns last error messages (to stderr) of CLUSTAL W execution. #attr_reader :errorlog #+++ #errorlog is deprecated (no replacement) and returns empty string. def errorlog warn "errorlog is deprecated (no replacement) and returns empty string." '' end private # Executes the program in the local machine. def exec_local(opt) @command = [ @program, *opt ] #STDERR.print "DEBUG: ", @command.join(" "), "\n" @data_stdout = nil @exit_status = nil Bio::Command.call_command(@command) do |io| io.close_write @data_stdout = io.read end @exit_status = $? end end #class ClustalW end #module Bio ���������������������bio-1.4.3.0001/lib/bio/appl/fasta/������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016234� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/fasta/format10.rb�������������������������������������������������������0000644�0000041�0000041�00000021520�12200110570�020212� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/fasta/format10.rb - FASTA output (-m 10) parser # # Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id:$ # require 'bio/appl/fasta' require 'bio/io/flatfile/splitter' module Bio class Fasta # Summarized results of the fasta execution results. class Report # Splitter for Bio::FlatFile class FastaFormat10Splitter < Bio::FlatFile::Splitter::Template # creates a new splitter object def initialize(klass, bstream) super(klass, bstream) @delimiter = '>>>' @real_delimiter = /^\s*\d+\>\>\>\z/ end # do nothing and returns nil def skip_leader nil end # gets an entry def get_entry p0 = stream_pos() pieces = [] overrun = nil first = true while e = stream.gets(@delimiter) pieces.push e if @real_delimiter =~ e then if first then first = nil else overrun = $& break end end end ent = (pieces.empty? ? nil : pieces.join('')) if ent and overrun then ent[-overrun.length, overrun.length] = '' stream.ungets(overrun) end p1 = stream_pos() self.entry_start_pos = p0 self.entry = ent self.entry_ended_pos = p1 return ent end end #FastaFormat10Splitter # Splitter for Bio::FlatFile FLATFILE_SPLITTER = FastaFormat10Splitter def initialize(data) # Split outputs containing multiple query sequences' results chunks = data.split(/^(\s*\d+\>\>\>.*)/, 3) if chunks.size >= 3 then if chunks[0].strip.empty? then qdef_line = chunks[1] data = chunks[1..2].join('') overruns = chunks[3..-1] elsif /^\>\>\>/ =~ chunks[0] then qdef_line = nil data = chunks.shift overruns = chunks else qdef_line = chunks[1] data = chunks[0..2].join('') overruns = chunks[3..-1] end @entry_overrun = overruns.join('') if qdef_line and /^ *\d+\>\>\>([^ ]+) .+ \- +(\d+) +(nt|aa)\s*$/ =~ qdef_line then @query_def = $1 @query_len = $2.to_i end end # header lines - brief list of the hits if list_start = data.index("\nThe best scores are") then data = data[(list_start + 1)..-1] data.sub!(/(.*)\n\n>>>/m, '') @list = $1 else if list_start = data.index(/\n!!\s+/) then data = data[list_start..-1] data.sub!(/\n!!\s+/, '') data.sub!(/.*/) { |x| @list = x; '' } else data = data.sub(/.*/) { |x| @list = x; '' } end end # body lines - fasta execution result program, *hits = data.split(/\n>>/) # trailing lines - log messages of the execution @log = hits.pop @log.sub!(/.*<\n/m, '') @log.strip! # parse results @program = Program.new(program) @hits = [] hits.each do |x| @hits.push(Hit.new(x)) end end # piece of next entry. Bio::FlatFile uses it. attr_reader :entry_overrun # Query definition. For older reports, the value may be nil. attr_reader :query_def # Query sequence length. For older reports, the value may be nil. attr_reader :query_len # Returns the 'The best scores are' lines as a String. attr_reader :list # Returns the trailing lines including library size, execution date, # fasta function used, and fasta versions as a String. attr_reader :log # Returns a Bio::Fasta::Report::Program object. attr_reader :program # Returns an Array of Bio::Fasta::Report::Hit objects. attr_reader :hits # Iterates on each Bio::Fasta::Report::Hit object. def each @hits.each do |x| yield x end end # Returns an Array of Bio::Fasta::Report::Hit objects having # better evalue than 'evalue_max'. def threshold(evalue_max = 0.1) list = [] @hits.each do |x| list.push(x) if x.evalue < evalue_max end return list end # Returns an Array of Bio::Fasta::Report::Hit objects having # longer overlap length than 'length_min'. def lap_over(length_min = 0) list = [] @hits.each do |x| list.push(x) if x.overlap > length_min end return list end # Log of the fasta execution environments. class Program def initialize(data) @definition, *program = data.split(/\n/) @program = {} pat = /;\s+([^:]+):\s+(.*)/ program.each do |x| if pat.match(x) @program[$1] = $2 end end end # Returns a String containing query and library filenames. attr_reader :definition # Accessor for a Hash containing 'mp_name', 'mp_ver', 'mp_argv', # 'pg_name', 'pg_ver, 'pg_matrix', 'pg_gap-pen', 'pg_ktup', # 'pg_optcut', 'pg_cgap', 'mp_extrap', 'mp_stats', and 'mp_KS' values. attr_reader :program end class Hit def initialize(data) score, query, target = data.split(/\n>/) @definition, *score = score.split(/\n/) @score = {} pat = /;\s+([^:]+):\s+(.*)/ score.each do |x| if pat.match(x) @score[$1] = $2 end end @query = Query.new(query) @target = Target.new(target) end attr_reader :definition, :score, :query, :target # E-value score def evalue if @score['fa_expect'] @score['fa_expect'].to_f elsif @score['sw_expect'] @score['sw_expect'].to_f elsif @score['fx_expect'] @score['fx_expect'].to_f elsif @score['tx_expect'] @score['tx_expect'].to_f end end # Bit score def bit_score if @score['fa_bits'] @score['fa_bits'].to_f elsif @score['sw_bits'] @score['sw_bits'].to_f elsif @score['fx_bits'] @score['fx_bits'].to_f elsif @score['tx_bits'] @score['tx_bits'].to_f end end def direction @score['fa_frame'] || @score['sw_frame'] || @score['fx_frame'] || @score['tx_frame'] end # Smith-Waterman score def sw @score['sw_score'].to_i end # percent identity def identity @score['sw_ident'].to_f end # overlap length def overlap @score['sw_overlap'].to_i end # Shortcuts for the methods of Bio::Fasta::Report::Hit::Query def query_id @query.entry_id end def target_id @target.entry_id end def query_def @query.definition end def target_def @target.definition end def query_len @query.length end # Shortcuts for the methods of Bio::Fasta::Report::Hit::Target def target_len @target.length end def query_seq @query.sequence end def target_seq @target.sequence end def query_type @query.moltype end def target_type @target.moltype end # Information on matching region def query_start @query.start end def query_end @query.stop end def target_start @target.start end def target_end @target.stop end def lap_at [ query_start, query_end, target_start, target_end ] end class Query def initialize(data) @definition, *data = data.split(/\n/) @data = {} @sequence = '' pat = /;\s+([^:]+):\s+(.*)/ data.each do |x| if pat.match(x) @data[$1] = $2 else @sequence += x end end end # Returns the definition of the entry as a String. # You can access this value by Report::Hit#query_def method. attr_reader :definition # Returns a Hash containing 'sq_len', 'sq_offset', 'sq_type', # 'al_start', 'al_stop', and 'al_display_start' values. # You can access most of these values by Report::Hit#query_* methods. attr_reader :data # Returns the sequence (with gaps) as a String. # You can access this value by the Report::Hit#query_seq method. attr_reader :sequence # Returns the first word in the definition as a String. # You can get this value by Report::Hit#query_id method. def entry_id @definition[/\S+/] end # Returns the sequence length. # You can access this value by the Report::Hit#query_len method. def length @data['sq_len'].to_i end # Returns 'p' for protein sequence, 'D' for nucleotide sequence. def moltype @data['sq_type'] end # Returns alignment start position. You can also access this value # by Report::Hit#query_start method for shortcut. def start @data['al_start'].to_i end # Returns alignment end position. You can access this value # by Report::Hit#query_end method for shortcut. def stop @data['al_stop'].to_i end end # Same as Bio::Fasta::Report::Hit::Query but for Target. class Target < Query; end end end # Report end # Fasta end # Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/fasta.rb����������������������������������������������������������������0000644�0000041�0000041�00000013530�12200110570�016563� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/fasta.rb - FASTA wrapper # # Copyright:: Copyright (C) 2001, 2002 Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id:$ # require 'net/http' require 'uri' require 'bio/command' require 'shellwords' module Bio class Fasta autoload :Report, 'bio/appl/fasta/format10' #autoload :?????, 'bio/appl/fasta/format6' # Returns a FASTA factory object (Bio::Fasta). def initialize(program, db, opt = [], server = 'local') @format = 10 @program = program @db = db @server = server @ktup = nil @matrix = nil @output = '' begin a = opt.to_ary rescue NameError #NoMethodError # backward compatibility a = Shellwords.shellwords(opt) end @options = [ '-Q', '-H', '-m', @format.to_s, *a ] # need -a ? end attr_accessor :program, :db, :options, :server, :ktup, :matrix # Returns a String containing fasta execution output in as is format. attr_reader :output def option # backward compatibility Bio::Command.make_command_line(@options) end def option=(str) # backward compatibility @options = Shellwords.shellwords(str) end # Accessors for the -m option. def format=(num) @format = num.to_i if i = @options.index('-m') then @options[i+1, 1] = @format.to_s else @options << '-m' << @format.to_s end end attr_reader :format # OBSOLETE. Does nothing and shows warning messages. # # Historically, selecting parser to use ('format6' or 'format10' were # expected, but only 'format10' was available as a working parser). # def self.parser(parser) warn 'Bio::Fasta.parser is obsoleted and will soon be removed.' end # Returns a FASTA factory object (Bio::Fasta) to run FASTA search on # local computer. def self.local(program, db, option = '') self.new(program, db, option, 'local') end # Returns a FASTA factory object (Bio::Fasta) to execute FASTA search on # remote server. # # For the develpper, you can add server 'hoge' by adding # exec_hoge(query) method. # def self.remote(program, db, option = '', server = 'genomenet') self.new(program, db, option, server) end # Execute FASTA search and returns Report object (Bio::Fasta::Report). def query(query) return self.send("exec_#{@server}", query.to_s) end private def parse_result(data) Report.new(data) end def exec_local(query) cmd = [ @program, *@options ] cmd.concat([ '@', @db ]) cmd.push(@ktup) if @ktup report = nil @output = Bio::Command.query_command(cmd, query) report = parse_result(@output) return report end # == Available databases for Fasta.remote(@program, @db, option, 'genomenet') # # See http://fasta.genome.jp/ideas/ideas.html#fasta for more details. # # ----------+-------+--------------------------------------------------- # @program | query | @db (supported in GenomeNet) # ----------+-------+--------------------------------------------------- # fasta | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd, # | | pir, prf, pdbstr # +-------+--------------------------------------------------- # | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, # | | htgs, dbsts, embl-nonst, embnonst-upd, epd, # | | genes-nt, genome, vgenes.nuc # ----------+-------+--------------------------------------------------- # tfasta | AA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, # | | htgs, dbsts, embl-nonst, embnonst-upd, # | | genes-nt, genome, vgenes.nuc # ----------+-------+--------------------------------------------------- # def exec_genomenet(query) host = "fasta.genome.jp" #path = "/sit-bin/nph-fasta" path = "/sit-bin/fasta" # 2005.08.12 form = { 'style' => 'raw', 'prog' => @program, 'dbname' => @db, 'sequence' => query, 'other_param' => Bio::Command.make_command_line_unix(@options), 'ktup_value' => @ktup, 'matrix' => @matrix, } form.keys.each do |k| form.delete(k) unless form[k] end report = nil begin http = Bio::Command.new_http(host) http.open_timeout = 3000 http.read_timeout = 6000 result = Bio::Command.http_post_form(http, path, form) # workaround 2006.8.1 - fixed for new batch queuing system case result.code when "302" result_location = result.header['location'] result_uri = URI.parse(result_location) result_path = result_uri.path done = false until done result = http.get(result_path) if result.body[/Your job ID is/] sleep 15 else done = true end end end @output = result.body.to_s # workaround 2005.08.12 re = %r{<A HREF="http://#{host}(/tmp/[^"]+)">Show all result</A>}i # " if path = @output[re, 1] result = http.get(path) @output = result.body txt = @output.to_s.split(/\<pre\>/)[1] raise 'cannot understand response' unless txt txt.sub!(/\<\/pre\>.*\z/m, '') txt.sub!(/.*^((T?FASTA|SSEARCH) (searches|compares))/m, '\1') txt.sub!(/^\<form method\=\"POST\" name\=\"clust_check\"\>.*\n/, '') txt.sub!(/^\<select +name\=\"allch\".+\r?\n/i, '') # 2009.11.26 txt.gsub!(/\<input[^\>]+value\=\"[^\"]*\"[^\>]*\>/i, '') txt.gsub!(/\<(a|form|select|input|option|img)\s+[^\>]+\>/i, '') txt.gsub!(/\<\/(a|form|select|input|option|img)\>/i, '') txt.gsub!(/\<\;/, '<') txt.gsub!(/\>\;/, '>') # 2009.11.26 @output = txt report = parse_result(@output.dup) else raise 'cannot understand response' end end return report end end # Fasta end # Bio ������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016243� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/rpsblast.rb�������������������������������������������������������0000644�0000041�0000041�00000021614�12200110570�020426� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/rpsblast.rb - NCBI RPS Blast default output parser # # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # # == Description # # NCBI RPS Blast (Reversed Position Specific Blast) default # (-m 0 option) output parser class, Bio::Blast::RPSBlast::Report # and related classes/modules. # # == References # # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), # "Gapped BLAST and PSI-BLAST: a new generation of protein database search # programs", Nucleic Acids Res. 25:3389-3402. # * ftp://ftp.ncbi.nih.gov/blast/documents/rpsblast.html # * http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml # require 'bio/io/flatfile' module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast # NCBI RPS Blast (Reversed Position Specific Blast) namespace. # Currently, this module is existing only for separating namespace. # To parse RPSBlast results, see Bio::Blast::RPSBlast::Report documents. module RPSBlast # Flatfile splitter for RPS-BLAST reports. # It is internally used when reading RPS-BLAST report. # Normally, users do not need to use it directly. # # Note for Windows: RPS-BLAST results generated in Microsoft Windows # may not be parsed correctly due to the line feed code problem. # For a workaroud, convert line feed codes from Windows(DOS) to UNIX. # class RPSBlastSplitter < Bio::FlatFile::Splitter::Template # Separator used to distinguish start of each report ReportHead = /\A\n*(RPS\-BLAST|Query\=)/ # Delimiter used for IO#gets Delimiter = "\n\n" # creates a new splitter object def initialize(klass, bstream) super(klass, bstream) @entry_head = nil end # Skips leader of the entry. # In this class, only skips space characters. def skip_leader stream.skip_spaces return nil end # Rewinds the stream def rewind @entry_head = nil super end # gets an entry def get_entry p0 = stream_pos() pieces = [] flag_head = false # reached to start of header flag_body = false # reached to start of body (Query=...) while x = stream.gets(Delimiter) if ReportHead =~ x then case $1 when 'RPS-BLAST' if pieces.empty? then @entry_head = nil flag_head = true else stream.ungets(x) break end when 'Query=' if flag_body then stream.ungets(x) break else @entry_head = pieces.join('') if flag_head flag_body = true end else raise 'Bug: should not reach here' end end #if ReportHead... pieces.push x end #while p1 = stream_pos() self.entry_start_pos = p0 self.entry = if pieces.empty? then nil elsif !flag_head and @entry_head then @entry_head + pieces.join('') else pieces.join('') end self.entry_ended_pos = p1 return self.entry end end #class RPSBlastSplitter # NCBI RPS Blast (Reversed Position Specific Blast) # default output parser. # # It supports defalut (-m 0 option) output of the "rpsblast" command. # # Because this class inherits Bio::Blast::Default::Report, # almost all methods are eqaul to Bio::Blast::Default::Report. # Only DELIMITER (and RS) and few methods are different. # # By using Bio::FlatFile, (for example, Bio::FlatFile.open), # rpsblast result generated from multiple query sequences is # automatically splitted into multiple # Bio::BLast::RPSBlast::Report objects corresponding to # query sequences. # # Note for multi-fasta results WITH using Bio::FlatFile: # Each splitted result is concatenated with header of the # result which describes RPS-BLAST version and database # information, if possible. # # Note for multi-fasta results WITHOUT using Bio::FlatFile: # When parsing an output of rpsblast command running with # multi-fasta sequences WITHOUT using Bio::FlatFile, # each query's result is stored as an "iteration" of PSI-Blast. # This behavior may be changed in the future. # # Note for nucleotide results: This class is not tested with # nucleotide query and/or nucleotide databases. # class Report < Bio::Blast::Default::Report # Delimter of each entry for RPS-BLAST. DELIMITER = RS = "\nRPS-BLAST" # (Integer) excess read size included in DELIMITER. DELIMITER_OVERRUN = 9 # "RPS-BLAST" # splitter for Bio::FlatFile support FLATFILE_SPLITTER = RPSBlastSplitter # Creates a new Report object from a string. # # Using Bio::FlatFile.open (or some other methods) # is recommended instead of using this method directly. # Refer Bio::Blast::RPSBlast::Report document for more information. # # Note for multi-fasta results WITHOUT using Bio::FlatFile: # When parsing an output of rpsblast command running with # multi-fasta sequences WITHOUT using Bio::FlatFile, # each query's result is stored as an "iteration" of PSI-Blast. # This behavior may be changed in the future. # # Note for nucleotide results: This class is not tested with # nucleotide query and/or nucleotide databases. # def initialize(str) str = str.sub(/\A\s+/, '') # remove trailing entries for sure str.sub!(/\n(RPS\-BLAST.*)/m, "\n") @entry_overrun = $1 @entry = str data = str.split(/(?:^[ \t]*\n)+/) if data[0] and /\AQuery\=/ !~ data[0] then format0_split_headers(data) end @iterations = format0_split_search(data) format0_split_stat_params(data) end # Returns definition of the query. # For a result of multi-fasta input, the first query's definition # is returned (The same as <tt>iterations.first.query_def</tt>). def query_def iterations.first.query_def end # Returns length of the query. # For a result of multi-fasta input, the first query's length # is returned (The same as <tt>iterations.first.query_len</tt>). def query_len iterations.first.query_len end private # Splits headers into the first line, reference, query line and # database line. def format0_split_headers(data) @f0header = data.shift @f0references = [] while data[0] and /\ADatabase\:/ !~ data[0] @f0references.push data.shift end @f0database = data.shift # In special case, a void line is inserted after database name. if /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then @f0database.concat "\n" @f0database.concat data.shift end end # Splits the search results. def format0_split_search(data) iterations = [] dummystr = 'Searching..................................................done' if r = data[0] and /^Searching/ =~ r then dummystr = data.shift end while r = data[0] and /^Query\=/ =~ r iterations << Iteration.new(data, dummystr) end iterations end # Iteration class for RPS-Blast. # Though RPS-Blast does not iterate like PSI-BLAST, # it aims to store a result of single query sequence. # # Normally, the instance of the class is generated # by Bio::Blast::RPSBlast::Report object. # class Iteration < Bio::Blast::Default::Report::Iteration # Creates a new Iteration object. # It is designed to be called only internally from # the Bio::Blast::RPSBlast::Report class. # Users shall not use the method directly. def initialize(data, dummystr) if /\AQuery\=/ =~ data[0] then sc = StringScanner.new(data.shift) sc.skip(/\s*/) if sc.skip_until(/Query\= */) then q = [] begin q << sc.scan(/.*/) sc.skip(/\s*^ ?/) end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/) @query_len = sc[1].delete(',').to_i if r @query_def = q.join(' ') end end data.unshift(dummystr) super(data) end # definition of the query attr_reader :query_def # length of the query sequence attr_reader :query_len end #class Iteration end #class Report end #module RPSBlast end #module Blast end #module Bio ��������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/genomenet.rb������������������������������������������������������0000644�0000041�0000041�00000022131�12200110570�020550� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/genomenet.rb - Remote BLAST wrapper using GenomeNet # # Copyright:: Copyright (C) 2001,2008 Mitsuteru C. Nakao <n@bioruby.org> # Copyright:: Copyright (C) 2002,2003 Toshiaki Katayama <k@bioruby.org> # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk> # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # require 'net/http' require 'uri' require 'bio/command' require 'shellwords' module Bio::Blast::Remote # == Description # # The Bio::Blast::Remote::GenomeNet class contains methods for running # remote BLAST searches on GenomeNet (http://blast.genome.jp/). # # == Usage # # require 'bio' # # # To run an actual BLAST analysis: # # 1. create a BLAST factory # blast_factory = Bio::Blast.remote('blastp', 'nr-aa', # '-e 0.0001', 'genomenet') # #or: # blast_factory = Bio::Blast::Remote.genomenet('blastp', 'nr-aa', # '-e 0.0001') # # # 2. run the actual BLAST by querying the factory # report = blast_factory.query(sequence_text) # # # Then, to parse the report, see Bio::Blast::Report # # === Available databases for Bio::Blast::Remote::GenomeNet # # Up-to-date available databases can be obtained by using # Bio::Blast::Remote::GenomeNet.databases(program). # Short descriptions of databases # # ----------+-------+--------------------------------------------------- # program | query | db (supported in GenomeNet) # ----------+-------+--------------------------------------------------- # blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd, # ----------+-------+ pir, prf, pdbstr # blastx | NA | # ----------+-------+--------------------------------------------------- # blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss, # ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd, # tblastn | AA | genes-nt, genome, vgenes.nuc # ----------+-------+--------------------------------------------------- # # === BLAST options # # Options are basically the same as those of the blastall command # in NCBI BLAST. See http://www.genome.jp/tools-bin/show_man?blast2 # # == See also # # * Bio::Blast # * Bio::Blast::Report # * Bio::Blast::Report::Hit # * Bio::Blast::Report::Hsp # # == References # # * http://www.ncbi.nlm.nih.gov/blast/ # * http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/similarity.html # * http://www.genome.jp/tools/blast/ # module GenomeNet Host = "www.genome.jp".freeze # Creates a remote BLAST factory using GenomeNet. # Returns Bio::Blast object. # # Note for future improvement: In the future, it might return # Bio::Blast::Remote::GenomeNet or other object. # def self.new(program, db, options = []) Bio::Blast.new(program, db, options, 'genomenet') end # Information for GenomeNet BLAST search. module Information include Bio::Blast::Remote::Information # gets information from remote host and parses database information def _parse_databases if defined? @parse_databases return nil if @parse_databases end databases = {} dbdescs = {} key = nil host = Bio::Blast::Remote::Genomenet::Host http = Bio::Command.new_http(host) result = http.get('/tools/blast/') #p result.body result.body.each_line do |line| case line when /\"set\_dbtype\(this\.form\,\'(prot|nucl)\'\)\"/ key = $1 databases[key] ||= [] dbdescs[key] ||= {} when /\<input *type\=\"radio\" *name\=\"dbname\" *value\=\"([^\"]+)\"[^\>]*\>([^\<\>]+)/ db = $1.freeze desc = $2.strip.freeze databases[key].push db dbdescs[key][db] = desc end end # mine-aa and mine-nt should be removed [ 'prot', 'nucl' ].each do |mol| ary = databases[mol] || [] hash = dbdescs[mol] || {} [ 'mine-aa', 'mine-nt' ].each do |k| ary.delete(k) hash.delete(k) end databases[mol] = ary.freeze dbdescs[mol] = hash end [ databases, dbdescs ].each do |h| prot = h['prot'] nucl = h['nucl'] h.delete('prot') h.delete('nucl') h['blastp'] = prot h['blastx'] = prot h['blastn'] = nucl h['tblastn'] = nucl h['tblastx'] = nucl end @databases = databases @database_descriptions = dbdescs @parse_databases = true true end private :_parse_databases end #module Information extend Information private # executes BLAST and returns result as a string def exec_genomenet(query) host = Host #host = "blast.genome.jp" #path = "/sit-bin/nph-blast" #path = "/sit-bin/blast" #2005.08.12 path = "/tools-bin/blast" #2012.01.12 options = make_command_line_options opt = Bio::Blast::NCBIOptions.new(options) program = opt.delete('-p') db = opt.delete('-d') # When database name starts with mine-aa or mine-nt, # space-separated list of KEGG organism codes can be given. # For example, "mine-aa eco bsu hsa". if /\A(mine-(aa|nt))\s+/ =~ db.to_s then db = $1 myspecies = {} myspecies["myspecies-#{$2}"] = $' end matrix = opt.delete('-M') || 'blosum62' filter = opt.delete('-F') || 'T' opt_v = opt.delete('-v') || 500 # default value for GenomeNet opt_b = opt.delete('-b') || 250 # default value for GenomeNet # format, not for form parameters, but included in option string opt_m = opt.get('-m') || '7' # default of BioRuby GenomeNet factory opt.set('-m', opt_m) optstr = Bio::Command.make_command_line_unix(opt.options) form = { 'style' => 'raw', 'prog' => program, 'dbname' => db, 'sequence' => query, 'other_param' => optstr, 'matrix' => matrix, 'filter' => filter, 'V_value' => opt_v, 'B_value' => opt_b, 'alignment_view' => 0, } form.merge!(myspecies) if myspecies form.keys.each do |k| form.delete(k) unless form[k] end begin http = Bio::Command.new_http(host) http.open_timeout = 300 http.read_timeout = 600 result = Bio::Command.http_post_form(http, path, form) @output = result.body # workaround 2008.8.13 if result.code == '302' then newuri = URI.parse(result['location']) newpath = newuri.path result = http.get(newpath) @output = result.body # waiting for BLAST finished while /Your job ID is/ =~ @output and /Your result will be displayed here\.?\<br\>/i =~ @output if /This page will be reloaded automatically in\s*((\d+)\s*min\.)?\s*((\d+)\s*sec\.)?/ =~ @output then reloadtime = $2.to_i * 60 + $4.to_i reloadtime = 300 if reloadtime > 300 reloadtime = 1 if reloadtime < 1 else reloadtime = 5 end if $VERBOSE then $stderr.puts "waiting #{reloadtime} sec to reload #{newuri.to_s}" end sleep(reloadtime) result = http.get(newpath) @output = result.body end end # workaround 2005.08.12 + 2011.01.27 + 2011.7.22 if /\<A +HREF=\"(http\:\/\/[\-\.a-z0-9]+\.genome\.jp)?(\/tmp\/[^\"]+)\"\>Show all result\<\/A\>/i =~ @output.to_s then all_prefix = $1 all_path = $2 all_prefix = "http://#{Host}" if all_prefix.to_s.empty? all_uri = all_prefix + all_path @output = Bio::Command.read_uri(all_uri) case all_path when /\.txt\z/ ; # don't touch the data else txt = @output.to_s.split(/\<pre\>/)[1] raise 'cannot understand response' unless txt txt.sub!(/\<\/pre\>.*\z/m, '') txt.sub!(/.*^ \-{20,}\s*/m, '') @output = txt end else raise 'cannot understand response' end end # for -m 0 (NCBI BLAST default) output, html tags are removed. if opt_m.to_i == 0 then #@output_bak = @output txt = @output.sub!(/^\<select .*/, '') #txt.gsub!(/^\s*\<img +src\=\"\/Fig\/arrow\_top\.gif\"\>.+$\r?\n/, '') txt.gsub!(/^.+\<\/form\>$/, '') #txt.gsub!(/^\<form *method\=\"POST\" name\=\"clust\_check\"\>.+$\r?\n/, '') txt.gsub!(/\<a href\=\"\/tmp[^\"]\>\&uarr\;\ \;Top\<\/a\>/, '') txt.gsub!(/\<[^\>\<]+\>/m, '') txt.gsub!(/\>\;/, '>') txt.gsub!(/\<\;/, '<') @output = txt end return @output end end # class GenomeNet # alias for lazy load Genomenet = GenomeNet end # module Bio::Blast::Remote ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/xmlparser.rb������������������������������������������������������0000644�0000041�0000041�00000015444�12200110570�020615� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/xmlparser.rb - BLAST XML output (-m 7) parser by XMLParser # # Copyright:: Copyright (C) 2001 # Mitsuteru C. Nakao <n@bioruby.org> # Copyright:: Copyright (C) 2003 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Description # # A parser for blast XML report (format 7) based on the XMLParser. # This file is automatically loaded by bio/appl/blast/report.rb if # the XMLParser installed. # # BioRuby provides two implements of the paser for the blast XML format report # (format 7) based on the XMLParser and the REXML. # begin require 'xmlparser' rescue LoadError end module Bio class Blast class Report private def xmlparser_parse(xml) parser = XMLParser.new def parser.default; end begin tag_stack = Array.new hash = Hash.new parser.parse(xml) do |type, name, data| case type when XMLParser::START_ELEM tag_stack.push(name) hash.update(data) case name when 'Iteration' iteration = Iteration.new @iterations.push(iteration) when 'Hit' hit = Hit.new hit.query_id = @query_id hit.query_def = @query_def hit.query_len = @query_len @iterations.last.hits.push(hit) when 'Hsp' hsp = Hsp.new @iterations.last.hits.last.hsps.push(hsp) end when XMLParser::END_ELEM case name when /^BlastOutput/ xmlparser_parse_program(name,hash) hash = Hash.new when /^Parameters$/ xmlparser_parse_parameters(hash) hash = Hash.new when /^Iteration/ xmlparser_parse_iteration(name, hash) hash = Hash.new when /^Hit/ xmlparser_parse_hit(name, hash) hash = Hash.new when /^Hsp$/ xmlparser_parse_hsp(hash) hash = Hash.new when /^Statistics$/ xmlparser_parse_statistics(hash) hash = Hash.new end tag_stack.pop when XMLParser::CDATA if hash[tag_stack.last].nil? hash[tag_stack.last] = data unless data.strip.empty? else hash[tag_stack.last].concat(data) if data end when XMLParser::PI end end rescue XMLParserError line = parser.line column = parser.column print "Parse error at #{line}(#{column}) : #{$!}\n" end end def xmlparser_parse_program(tag, hash) case tag when 'BlastOutput_program' @program = hash[tag] when 'BlastOutput_version' @version = hash[tag] when 'BlastOutput_reference' @reference = hash[tag] when 'BlastOutput_db' @db = hash[tag].strip when 'BlastOutput_query-ID' @query_id = hash[tag] when 'BlastOutput_query-def' @query_def = hash[tag] when 'BlastOutput_query-len' @query_len = hash[tag].to_i end end # set parameter of the key as val def xml_set_parameter(key, val) #labels = { # 'matrix' => 'Parameters_matrix', # 'expect' => 'Parameters_expect', # 'include' => 'Parameters_include', # 'sc-match' => 'Parameters_sc-match', # 'sc-mismatch' => 'Parameters_sc-mismatch', # 'gap-open' => 'Parameters_gap-open', # 'gap-extend' => 'Parameters_gap-extend', # 'filter' => 'Parameters_filter', # 'pattern' => 'Parameters_pattern', # 'entrez-query' => 'Parameters_entrez-query', #} k = key.sub(/\AParameters\_/, '') @parameters[k] = case k when 'expect', 'include' val.to_f when /\Agap\-/, /\Asc\-/ val.to_i else val end end def xmlparser_parse_parameters(hash) hash.each do |k, v| xml_set_parameter(k, v) end end def xmlparser_parse_iteration(tag, hash) case tag when 'Iteration_iter-num' @iterations.last.num = hash[tag].to_i when 'Iteration_message' @iterations.last.message = hash[tag].to_s # for new BLAST XML format when 'Iteration_query-ID' @iterations.last.query_id = hash[tag].to_s when 'Iteration_query-def' @iterations.last.query_def = hash[tag].to_s when 'Iteration_query-len' @iterations.last.query_len = hash[tag].to_i end end def xmlparser_parse_hit(tag, hash) hit = @iterations.last.hits.last case tag when 'Hit_num' hit.num = hash[tag].to_i when 'Hit_id' hit.hit_id = hash[tag].clone when 'Hit_def' hit.definition = hash[tag].clone when 'Hit_accession' hit.accession = hash[tag].clone when 'Hit_len' hit.len = hash[tag].clone.to_i end end def xmlparser_parse_hsp(hash) hsp = @iterations.last.hits.last.hsps.last hsp.num = hash['Hsp_num'].to_i hsp.bit_score = hash['Hsp_bit-score'].to_f hsp.score = hash['Hsp_score'].to_i hsp.evalue = hash['Hsp_evalue'].to_f hsp.query_from = hash['Hsp_query-from'].to_i hsp.query_to = hash['Hsp_query-to'].to_i hsp.hit_from = hash['Hsp_hit-from'].to_i hsp.hit_to = hash['Hsp_hit-to'].to_i hsp.pattern_from = hash['Hsp_pattern-from'].to_i hsp.pattern_to = hash['Hsp_pattern-to'].to_i hsp.query_frame = hash['Hsp_query-frame'].to_i hsp.hit_frame = hash['Hsp_hit-frame'].to_i hsp.identity = hash['Hsp_identity'].to_i hsp.positive = hash['Hsp_positive'].to_i hsp.gaps = hash['Hsp_gaps'].to_i hsp.align_len = hash['Hsp_align-len'].to_i hsp.density = hash['Hsp_density'].to_i hsp.qseq = hash['Hsp_qseq'] hsp.hseq = hash['Hsp_hseq'] hsp.midline = hash['Hsp_midline'] end def xmlparser_parse_statistics(hash) labels = { 'db-num' => 'Statistics_db-num', 'db-len' => 'Statistics_db-len', 'hsp-len' => 'Statistics_hsp-len', 'eff-space' => 'Statistics_eff-space', 'kappa' => 'Statistics_kappa', 'lambda' => 'Statistics_lambda', 'entropy' => 'Statistics_entropy' } labels.each do |k,v| case k when 'db-num', 'db-len', 'hsp-len' @iterations.last.statistics[k] = hash[v].to_i else @iterations.last.statistics[k] = hash[v].to_f end end end end # class Report end # class Blast end # module Bio =begin This file is automatically loaded by bio/appl/blast/report.rb =end ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/format0.rb��������������������������������������������������������0000644�0000041�0000041�00000127264�12200110570�020154� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/format0.rb - BLAST default output (-m 0) parser # # Copyright:: Copyright (C) 2003-2006 GOTO Naohisa <ng@bioruby.org> # License:: The Ruby License # # == Description # # NCBI BLAST default (-m 0 option) output parser. # # == References # # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), # "Gapped BLAST and PSI-BLAST: a new generation of protein database search # programs", Nucleic Acids Res. 25:3389-3402. # * http://www.ncbi.nlm.nih.gov/blast/ # require 'strscan' require 'singleton' require 'bio/io/flatfile' module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast module Default #:nodoc: # Bio::Blast::Default::Report parses NCBI BLAST default output # and stores information in the data. # It may store some Bio::Blast::Default::Report::Iteration objects. class Report #< DB # Delimiter of each entry. Bio::FlatFile uses it. DELIMITER = RS = "\nBLAST" # (Integer) excess read size included in DELIMITER. DELIMITER_OVERRUN = 5 # "BLAST" # Opens file by using Bio::FlatFile.open. def self.open(filename, *mode) Bio::FlatFile.open(self, filename, *mode) end # Creates a new Report object from BLAST result text. def initialize(str) str = str.sub(/\A\s+/, '') str.sub!(/\n(T?BLAST.*)/m, "\n") # remove trailing entries for sure @entry_overrun = $1 @entry = str data = str.split(/(?:^[ \t]*\n)+/) format0_split_headers(data) @iterations = format0_split_search(data) format0_split_stat_params(data) end # piece of next entry. Bio::FlatFile uses it. attr_reader :entry_overrun # (PSI-BLAST) # Returns iterations. # It returns an array of Bio::Blast::Default::Report::Iteration class. # Note that normal blastall result usually contains one iteration. attr_reader :iterations # Returns whole entry as a string. def to_s; @entry; end # Defines attributes which delegate to @f0dbstat objects. def self.delegate_to_f0dbstat(*names) names.each do |x| module_eval("def #{x}; @f0dbstat.#{x}; end") end end private_class_method :delegate_to_f0dbstat # number of sequences in database attr_reader :db_num if false #dummy delegate_to_f0dbstat :db_num # number of letters in database attr_reader :db_len if false #dummy delegate_to_f0dbstat :db_len # posted date of the database attr_reader :posted_date if false #dummy delegate_to_f0dbstat :posted_date # effective length of the database attr_reader :eff_space if false #dummy delegate_to_f0dbstat :eff_space # name of the matrix attr_reader :matrix if false #dummy delegate_to_f0dbstat :matrix # match score of the matrix attr_reader :sc_match if false #dummy delegate_to_f0dbstat :sc_match # mismatch score of the matrix attr_reader :sc_mismatch if false #dummy delegate_to_f0dbstat :sc_mismatch # gap open penalty attr_reader :gap_open if false #dummy delegate_to_f0dbstat :gap_open # gap extend penalty attr_reader :gap_extend if false #dummy delegate_to_f0dbstat :gap_extend # e-value threshold specified when BLAST was executed attr_reader :expect if false #dummy delegate_to_f0dbstat :expect # number of hits. Note that this may differ from <tt>hits.size</tt>. attr_reader :num_hits if false #dummy delegate_to_f0dbstat :num_hits # Same as <tt>iterations.last.kappa</tt>. def kappa; @iterations.last.kappa; end # Same as <tt>iterations.last.lambda</tt>. def lambda; @iterations.last.lambda; end # Same as <tt>iterations.last.entropy</tt>. def entropy; @iterations.last.entropy; end # Same as <tt>iterations.last.gapped_kappa</tt>. def gapped_kappa; @iterations.last.gapped_kappa; end # Same as <tt>iterations.last.gapped_lambda</tt>. def gapped_lambda; @iterations.last.gapped_lambda; end # Same as <tt>iterations.last.gapped_entropy</tt>. def gapped_entropy; @iterations.last.gapped_entropy; end # Returns program name. def program; format0_parse_header; @program; end # Returns version of the program. def version; format0_parse_header; @version; end # Returns version number string of the program. def version_number; format0_parse_header; @version_number; end # Returns released date of the program. def version_date; format0_parse_header; @version_date; end # Returns length of the query. def query_len; format0_parse_query; @query_len; end # Returns definition of the query. def query_def; format0_parse_query; @query_def; end # (PHI-BLAST) # Same as <tt>iterations.first.pattern</tt>. # Note that it returns the FIRST iteration's value. def pattern; @iterations.first.pattern; end # (PHI-BLAST) # Same as <tt>iterations.first.pattern_positions</tt>. # Note that it returns the FIRST iteration's value. def pattern_positions @iterations.first.pattern_positions end # (PSI-BLAST) # Iterates over each iteration. # Same as <tt>iterations.each</tt>. # Yields a Bio::Blast::Default::Report::Iteration object. def each_iteration @iterations.each do |x| yield x end end # Iterates over each hit of the last iteration. # Same as <tt>iterations.last.each_hit</tt>. # Yields a Bio::Blast::Default::Report::Hit object. # This is very useful in most cases, e.g. for blastall results. def each_hit @iterations.last.each do |x| yield x end end alias each each_hit # Same as <tt>iterations.last.hits</tt>. # Returns the last iteration's hits. # Returns an array of Bio::Blast::Default::Report::Hit object. # This is very useful in most cases, e.g. for blastall results. def hits @iterations.last.hits end # (PSI-BLAST) # Same as <tt>iterations.last.message</tt>. def message @iterations.last.message end # (PSI-BLAST) # Same as <tt>iterations.last.converged?</tt>. # Returns true if the last iteration is converged, # otherwise, returns false. def converged? @iterations.last.converged? end # Returns the bibliography reference of the BLAST software. # Note that this method shows only the first reference. # When you want to get additional references, # you can use <tt>references</tt> method. def reference references[0] end # Returns the bibliography references of the BLAST software. # Returns an array of strings. def references unless defined?(@references) @references = @f0references.collect do |x| x.to_s.gsub(/\s+/, ' ').strip end end #unless @references end # Returns the name (filename or title) of the database. def db unless defined?(@db) if /Database *\: *(.*)/m =~ @f0database then a = $1.split(/^/) a.pop if a.size > 1 @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ') end end #unless @db end private # Parses the query lines (begins with "Query = "). def format0_parse_query unless defined?(@query_def) sc = StringScanner.new(@f0query) sc.skip(/\s*/) if sc.skip_until(/Query\= */) then q = [] begin q << sc.scan(/.*/) sc.skip(/\s*^ ?/) end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/) @query_len = sc[1].delete(',').to_i if r @query_def = q.join(' ') end end end # Parses the first line of the BLAST result. def format0_parse_header unless defined?(@program) if /([\-\w]+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s @program = $1 @version = "#{$1} #{$2} [#{$3}]" @version_number = $2 @version_date = $3 end end end # Splits headers into the first line, reference, query line and # database line. def format0_split_headers(data) @f0header = data.shift @f0references = [] while data[0] and /\AQuery\=/ !~ data[0] @f0references.push data.shift end @f0query = data.shift # In special case, a void line is inserted after query name. if data[0] and /\A +\( *([\,\d]+) *letters *\)\s*\z/ =~ data[0] then @f0query.concat "\n" @f0query.concat data.shift end @f0database = data.shift # In special case, a void line is inserted after database name. if data[0] and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then @f0database.concat "\n" @f0database.concat data.shift end end # Splits the statistical parameters. def format0_split_stat_params(data) dbs = [] while r = data.first and /^ *Database\:/ =~ r dbs << data.shift end @f0dbstat = self.class::F0dbstat.new(dbs) i = -1 while r = data[0] and /^Lambda/ =~ r #i -= 1 unless /^Gapped/ =~ r if itr = @iterations[i] then x = data.shift; itr.instance_eval { @f0stat << x } x = @f0dbstat; itr.instance_eval { @f0dbstat = x } end end @f0dbstat.f0params = data end # Splits the search results. def format0_split_search(data) iterations = [] while r = data[0] and /^Searching/ =~ r iterations << Iteration.new(data) end iterations end # Stores format0 database statistics. # Internal use only. Users must not use the class. class F0dbstat #:nodoc: # Creates new F0dbstat class. # Internal use only. def initialize(ary) @f0dbstat = ary @hash = {} end attr_reader :f0dbstat attr_accessor :f0params # Parses colon-separeted lines (in +ary+) and stores to +hash+. def parse_colon_separated_params(hash, ary) ary.each do |str| sc = StringScanner.new(str) sc.skip(/\s*/) while sc.rest? if sc.match?(/Number of sequences better than +([e\+\-\.\d]+) *\: *(.+)/) then ev = sc[1] ev = '1' + ev if ev[0] == ?e @expect = ev.to_f @num_hits = sc[2].tr(',', '').to_i end if sc.skip(/([\-\,\.\'\(\)\#\w ]+)\: *(.*)/) then hash[sc[1]] = sc[2] else #p sc.peek(20) raise ScanError end sc.skip(/\s*/) end #while end #each end #def private :parse_colon_separated_params # Parses parameters. def parse_params unless defined?(@parse_params) parse_colon_separated_params(@hash, @f0params) #p @hash if val = @hash['Matrix'] then if /blastn *matrix *\: *([e\+\-\.\d]+) +([e\+\-\.\d]+)/ =~ val then @matrix = 'blastn' @sc_match = $1.to_i @sc_mismatch = $2.to_i else @matrix = val end end if val = @hash['Gap Penalties'] then if /Existence\: *([e\+\-\.\d]+)/ =~ val then @gap_open = $1.to_i end if /Extension\: *([e\+\-\.\d]+)/ =~ val then @gap_extend = $1.to_i end end #@db_num = @hash['Number of Sequences'] unless defined?(@db_num) #@db_len = @hash['length of database'] unless defined?(@db_len) if val = @hash['effective search space'] then @eff_space = val.tr(',', '').to_i end @parse_params = true end #unless end private :parse_params # Returns name of the matrix. def matrix; parse_params; @matrix; end # Returns the match score of the matrix. def sc_match; parse_params; @sc_match; end # Returns the mismatch score of the matrix. def sc_mismatch; parse_params; @sc_mismatch; end # Returns gap open penalty value. def gap_open; parse_params; @gap_open; end # Returns gap extend penalty value. def gap_extend; parse_params; @gap_extend; end # Returns effective length of the database. def eff_space; parse_params; @eff_space; end # Returns e-value threshold specified when BLAST was executed. def expect; parse_params; @expect; end # Returns number of hits. def num_hits; parse_params; @num_hits; end # Parses database statistics lines. def parse_dbstat a = @f0dbstat[0].to_s.split(/^/) d = [] i = 3 while i > 0 and line = a.pop case line when /^\s+Posted date\:\s*(.*)$/ unless defined?(@posted_date) @posted_date = $1.strip i -= 1; d.clear end when /^\s+Number of letters in database\:\s*(.*)$/ unless defined?(@db_len) @db_len = $1.tr(',', '').to_i i -= 1; d.clear end when /^\s+Number of sequences in database\:\s*(.*)$/ unless defined?(@db_num) @db_num = $1.tr(',', '').to_i i -= 1; d.clear end else d.unshift(line) end end #while a.concat(d) while line = a.shift if /^\s+Database\:\s*(.*)$/ =~ line a.unshift($1) a.each { |x| x.strip! } @database = a.join(' ') break #while end end end #def private :parse_dbstat # Returns name (title or filename) of the database. def database unless defined?(@database); parse_dbstat; end; @database end # Returns posted date of the database. def posted_date unless defined?(@posted_date); parse_dbstat; end; @posted_date end # Returns number of letters in database. def db_len unless defined?(@db_len); parse_dbstat; end; @db_len end # Returns number of sequences in database. def db_num unless defined?(@db_num); parse_dbstat; end; @db_num end end #class F0dbstat # Provides a singleton object of which any methods always return nil. # Internal use only. Users must not use the class. class AlwaysNil #:nodoc: include Singleton def method_missing(*arg) nil end end #class AlwaysNil # Bio::Blast::Default::Report::Iteration stores information about # a iteration. # It may contain some Bio::Blast::Default::Report::Hit objects. # Note that a PSI-BLAST (blastpgp command) result usually contain # multiple iterations in it, and a normal BLAST (blastall command) # result usually contain one iteration in it. class Iteration # Creates a new Iteration object. # It is designed to be called only internally from # the Bio::Blast::Default::Report class. # Users shall not use the method directly. def initialize(data) @f0stat = [] @f0dbstat = AlwaysNil.instance @f0hitlist = [] @hits = [] @num = 1 r = data.shift @f0message = [ r ] r.gsub!(/^Results from round (\d+).*\z/) { |x| @num = $1.to_i @f0message << x '' } r = data.shift while /^Number of occurrences of pattern in the database is +(\d+)/ =~ r # PHI-BLAST @pattern_in_database = $1.to_i @f0message << r r = data.shift end if /^Results from round (\d+)/ =~ r then @num = $1.to_i @f0message << r r = data.shift end if r and !(/\*{5} No hits found \*{5}/ =~ r) then @f0hitlist << r begin @f0hitlist << data.shift end until r = data[0] and /^\>/ =~ r if r and /^CONVERGED\!/ =~ r then r.sub!(/(.*\n)*^CONVERGED\!.*\n/) { |x| @f0hitlist << x; '' } end if defined?(@pattern_in_database) and r = data.first then #PHI-BLAST while /^\>/ =~ r @hits << Hit.new(data) r = data.first break unless r while /^Significant alignments for pattern/ =~ r data.shift r = data.first end end else #not PHI-BLAST while r = data[0] and /^\>/ =~ r @hits << Hit.new(data) end end end if /^CONVERGED\!\s*$/ =~ @f0hitlist[-1].to_s then @message = 'CONVERGED!' @flag_converged = true end end # (PSI-BLAST) Iteration round number. attr_reader :num # (PSI-BLAST) Messages of the iteration. attr_reader :message # (PHI-BLAST) Number of occurrences of pattern in the database. attr_reader :pattern_in_database # Returns the hits of the iteration. # It returns an array of Bio::Blast::Default::Report::Hit objects. def hits parse_hitlist @hits end # Iterates over each hit of the iteration. # Yields a Bio::Blast::Default::Report::Hit object. def each hits.each do |x| yield x end end # (PSI-BLAST) Returns true if the iteration is converged. # Otherwise, returns false. def converged? @flag_converged end # (PHI-BLAST) Returns pattern string. # Returns nil if it is not a PHI-BLAST result. def pattern #PHI-BLAST if !defined?(@pattern) and defined?(@pattern_in_database) then @pattern = nil @pattern_positions = [] @f0message.each do |r| sc = StringScanner.new(r) if sc.skip_until(/^ *pattern +([^\s]+)/) then @pattern = sc[1] unless @pattern sc.skip_until(/(?:^ *| +)at position +(\d+) +of +query +sequence/) @pattern_positions << sc[1].to_i end end end @pattern end # (PHI-BLAST) Returns pattern positions. # Returns nil if it is not a PHI-BLAST result. def pattern_positions #PHI-BLAST pattern @pattern_positions end # (PSI-BLAST) # Returns hits which have been found again in the iteration. # It returns an array of Bio::Blast::Default::Report::Hit objects. def hits_found_again parse_hitlist @hits_found_again end # (PSI-BLAST) # Returns hits which have been newly found in the iteration. # It returns an array of Bio::Blast::Default::Report::Hit objects. def hits_newly_found parse_hitlist @hits_newly_found end # (PHI-BLAST) Returns hits for pattern. ???? def hits_for_pattern parse_hitlist @hits_for_pattern end # Parses list of hits. def parse_hitlist unless defined?(@parse_hitlist) @hits_found_again = [] @hits_newly_found = [] @hits_unknown_state = [] i = 0 a = @hits_newly_found flag = true @f0hitlist.each do |x| sc = StringScanner.new(x) if flag then if sc.skip_until(/^Sequences used in model and found again\:\s*$/) a = @hits_found_again end flag = nil next end next if sc.skip(/^CONVERGED\!$/) if sc.skip(/^Sequences not found previously or not previously below threshold\:\s*$/) then a = @hits_newly_found next elsif sc.skip(/^Sequences.+\:\s*$/) then #possibly a bug or unknown format? a = @hits_unknown_state next elsif sc.skip(/^Significant (matches|alignments) for pattern/) then # PHI-BLAST # do nothing when 'alignments' if sc[1] == 'matches' then unless defined?(@hits_for_pattern) @hits_for_pattern = [] end a = [] @hits_for_pattern << a end next end b = x.split(/^/) b.collect! { |y| y.empty? ? nil : y } b.compact! if i + b.size > @hits.size then ((@hits.size - i)...(b.size)).each do |j| y = b[j]; y.strip! y.reverse! z = y.split(/\s+/, 3) z.each { |y| y.reverse! } h = Hit.new([ z.pop.to_s.sub(/\.+\z/, '') ]) bs = z.pop.to_s bs = '1' + bs if bs[0] == ?e bs = (bs.empty? ? nil : bs.to_f) ev = z.pop.to_s ev = '1' + ev if ev[0] == ?e ev = (ev.empty? ? (1.0/0.0) : ev.to_f) h.instance_eval { @bit_score = bs; @evalue = ev } @hits << h end end a.concat(@hits[i, b.size]) i += b.size end #each @hits_found_again.each do |x| x.instance_eval { @again = true } end @parse_hitlist = true end #unless end private :parse_hitlist # Parses statistics for the iteration. def parse_stat unless defined?(@parse_stat) @f0stat.each do |x| gapped = nil sc = StringScanner.new(x) sc.skip(/\s*/) if sc.skip(/Gapped\s*/) then gapped = true end s0 = [] h = {} while r = sc.scan(/\w+/) #p r s0 << r sc.skip(/ */) end sc.skip(/\s*/) while r = sc.scan(/[e\+\-\.\d]+/) #p r h[s0.shift] = r sc.skip(/ */) end if gapped then @gapped_lambda = (v = h['Lambda']) ? v.to_f : nil @gapped_kappa = (v = h['K']) ? v.to_f : nil @gapped_entropy = (v = h['H']) ? v.to_f : nil else @lambda = (v = h['Lambda']) ? v.to_f : nil @kappa = (v = h['K']) ? v.to_f : nil @entropy = (v = h['H']) ? v.to_f : nil end end #each @parse_stat = true end #unless end #def private :parse_stat # Defines attributes which call +parse_stat+ before accessing. def self.method_after_parse_stat(*names) names.each do |x| module_eval("def #{x}; parse_stat; @#{x}; end") end end private_class_method :method_after_parse_stat # lambda of the database attr_reader :lambda if false #dummy method_after_parse_stat :lambda # kappa of the database attr_reader :kappa if false #dummy method_after_parse_stat :kappa # entropy of the database attr_reader :entropy if false #dummy method_after_parse_stat :entropy # gapped lambda of the database attr_reader :gapped_lambda if false #dummy method_after_parse_stat :gapped_lambda # gapped kappa of the database attr_reader :gapped_kappa if false #dummy method_after_parse_stat :gapped_kappa # gapped entropy of the database attr_reader :gapped_entropy if false #dummy method_after_parse_stat :gapped_entropy # Defines attributes which delegate to @f0dbstat objects. def self.delegate_to_f0dbstat(*names) names.each do |x| module_eval("def #{x}; @f0dbstat.#{x}; end") end end private_class_method :delegate_to_f0dbstat # name (title or filename) of the database attr_reader :database if false #dummy delegate_to_f0dbstat :database # posted date of the database attr_reader :posted_date if false #dummy delegate_to_f0dbstat :posted_date # number of letters in database attr_reader :db_num if false #dummy delegate_to_f0dbstat :db_num # number of sequences in database attr_reader :db_len if false #dummy delegate_to_f0dbstat :db_len # effective length of the database attr_reader :eff_space if false #dummy delegate_to_f0dbstat :eff_space # e-value threshold specified when BLAST was executed attr_reader :expect if false #dummy delegate_to_f0dbstat :expect end #class Iteration # Bio::Blast::Default::Report::Hit contains information about a hit. # It may contain some Bio::Blast::Default::Report::HSP objects. class Hit # Creates a new Hit object. # It is designed to be called only internally from the # Bio::Blast::Default::Report::Iteration class. # Users should not call the method directly. def initialize(data) @f0hitname = data.shift @hsps = [] while r = data[0] and /\A\s+Score/ =~ r @hsps << HSP.new(data) end @again = false end # Hsp(high-scoring segment pair)s of the hit. # Returns an array of Bio::Blast::Default::Report::HSP objects. attr_reader :hsps # Iterates over each hsp(high-scoring segment pair) of the hit. # Yields a Bio::Blast::Default::Report::HSP object. def each @hsps.each { |x| yield x } end # (PSI-BLAST) # Returns true if the hit is found again in the iteration. # Otherwise, returns false or nil. def found_again? @again end # Returns first hsp's score. def score (h = @hsps.first) ? h.score : nil end # Returns first hsp's bit score. # (shown in hit list of BLAST result) def bit_score unless defined?(@bit_score) if h = @hsps.first then @bit_score = h.bit_score end end @bit_score end # Returns first hsp's e-value. # (shown in hit list of BLAST result) def evalue unless defined?(@evalue) if h = @hsps.first then @evalue = h.evalue end end @evalue end # Parses name of the hit. def parse_hitname unless defined?(@parse_hitname) sc = StringScanner.new(@f0hitname) sc.skip(/\s*/) sc.skip(/\>/) d = [] begin d << sc.scan(/.*/) sc.skip(/\s*/) end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/) @len = (r ? sc[1].delete(',').to_i : nil) @definition = d.join(" ") @parse_hitname = true end end private :parse_hitname # Returns length of the hit. def len; parse_hitname; @len; end # Returns definition of the hit. def definition; parse_hitname; @definition; end def target_id; definition[/^\s*(\S+)/, 1]; end #-- # Aliases to keep compatibility with Bio::Fasta::Report::Hit. alias target_def definition alias target_len len #++ # Sends given method to the first hsp or returns nil if # there are no hsps. def hsp_first(m) (h = hsps.first) ? h.send(m) : nil end private :hsp_first #-- # Shortcut methods for the best Hsp # (Compatibility method with FASTA) #++ # Same as hsps.first.identity. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def identity; hsp_first :identity; end # Same as hsps.first.align_len. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def overlap; hsp_first :align_len; end # Same as hsps.first.qseq. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def query_seq; hsp_first :qseq; end # Same as hsps.first.hseq. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def target_seq; hsp_first :hseq; end # Same as hsps.first.midline. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def midline; hsp_first :midline; end # Same as hsps.first.query_from. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def query_start; hsp_first :query_from; end # Same as hsps.first.query_to. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def query_end; hsp_first :query_to; end # Same as hsps.first.hit_from. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def target_start; hsp_first :hit_from; end # Same as hsps.first.hit_to. # Returns nil if there are no hsp in the hit. # (Compatibility method with FASTA) def target_end; hsp_first :hit_to; end # Returns an array which contains # [ query_start, query_end, target_start, target_end ]. # (Compatibility method with FASTA) def lap_at [ query_start, query_end, target_start, target_end ] end end #class Hit # Bio::Blast::Default::Report::HSP holds information about the hsp # (high-scoring segment pair). class HSP # Creates new HSP object. # It is designed to be called only internally from the # Bio::Blast::Default::Report::Hit class. # Users should not call the method directly. def initialize(data) @f0score = data.shift @f0alignment = [] while r = data[0] and /^(Query|Sbjct)\:/ =~ r @f0alignment << data.shift end end # Parses scores, identities, positives, gaps, and so on. def parse_score unless defined?(@parse_score) sc = StringScanner.new(@f0score) while sc.rest? sc.skip(/\s*/) if sc.skip(/Expect(?:\(\d+\))? *\= *([e\+\-\.\d]+)/) then ev = sc[1].to_s ev = '1' + ev if ev[0] == ?e @evalue = ev.to_f elsif sc.skip(/Score *\= *([e\+\-\.\d]+) *bits *\( *([e\+\-\.\d]+) *\)/) then bs = sc[1] bs = '1' + bs if bs[0] == ?e @bit_score = bs.to_f @score = sc[2].to_i elsif sc.skip(/(Identities|Positives|Gaps) *\= (\d+) *\/ *(\d+) *\(([\.\d]+) *\% *\)/) then alen = sc[3].to_i @align_len = alen unless defined?(@align_len) raise ScanError if alen != @align_len case sc[1] when 'Identities' @identity = sc[2].to_i @percent_identity = sc[4].to_i when 'Positives' @positive = sc[2].to_i @percent_positive = sc[4].to_i when 'Gaps' @gaps = sc[2].to_i @percent_gaps = sc[4].to_i else raise ScanError end elsif sc.skip(/Strand *\= *(Plus|Minus) *\/ *(Plus|Minus)/) then @query_strand = sc[1] @hit_strand = sc[2] if sc[1] == sc[2] then @query_frame = 1 @hit_frame = 1 elsif sc[1] == 'Plus' then # Plus/Minus # complement sequence against xml(-m 7) # In xml(-m 8), -1=>Plus, 1=>Minus ??? #@query_frame = -1 #@hit_frame = 1 @query_frame = 1 @hit_frame = -1 else # Minus/Plus @query_frame = -1 @hit_frame = 1 end elsif sc.skip(/Frame *\= *([\-\+]\d+)( *\/ *([\-\+]\d+))?/) then @query_frame = sc[1].to_i if sc[2] then @hit_frame = sc[3].to_i end elsif sc.skip(/Score *\= *([e\+\-\.\d]+) +\(([e\+\-\.\d]+) *bits *\)/) then #WU-BLAST @score = sc[1].to_i bs = sc[2] bs = '1' + bs if bs[0] == ?e @bit_score = bs.to_f elsif sc.skip(/P *\= * ([e\+\-\.\d]+)/) then #WU-BLAST @p_sum_n = nil pv = sc[1] pv = '1' + pv if pv[0] == ?e @pvalue = pv.to_f elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\+\-\.\d]+)/) then #WU-BLAST @p_sum_n = sc[1].to_i pv = sc[2] pv = '1' + pv if pv[0] == ?e @pvalue = pv.to_f elsif sc.skip(/Method\:\s*(.+)/) then # signature of composition-based statistics method # for example, "Method: Composition-based stats." @stat_method = sc[1] else raise ScanError end sc.skip(/\s*\,?\s*/) end @parse_score = true end end private :parse_score # Defines attributes which call parse_score before accessing. def self.method_after_parse_score(*names) names.each do |x| module_eval("def #{x}; parse_score; @#{x}; end") end end private_class_method :method_after_parse_score # bit score attr_reader :bit_score if false #dummy method_after_parse_score :bit_score # score attr_reader :score if false #dummy method_after_parse_score :score # e-value attr_reader :evalue if false #dummy method_after_parse_score :evalue # frame of the query attr_reader :query_frame if false #dummy method_after_parse_score :query_frame # frame of the hit attr_reader :hit_frame if false #dummy method_after_parse_score :hit_frame # Identity (number of identical nucleotides or amino acids) attr_reader :identity if false #dummy method_after_parse_score :identity # percent of identical nucleotides or amino acids attr_reader :percent_identity if false #dummy method_after_parse_score :percent_identity # Positives (number of positive hit amino acids or nucleotides) attr_reader :positive if false #dummy method_after_parse_score :positive # percent of positive hit amino acids or nucleotides attr_reader :percent_positive if false #dummy method_after_parse_score :percent_positive # Gaps (number of gaps) attr_reader :gaps if false #dummy method_after_parse_score :gaps # percent of gaps attr_reader :percent_gaps if false #dummy method_after_parse_score :percent_gaps # aligned length attr_reader :align_len if false #dummy method_after_parse_score :align_len # strand of the query ("Plus" or "Minus" or nil) attr_reader :query_strand if false #dummy method_after_parse_score :query_strand # strand of the hit ("Plus" or "Minus" or nil) attr_reader :hit_strand if false #dummy method_after_parse_score :hit_strand # statistical method for calculating evalue and/or score # (nil or a string) # (note that composition-based statistics for blastp or tblastn # were enabled by default after NCBI BLAST 2.2.17) attr_reader :stat_method if false #dummy method_after_parse_score :stat_method # Parses alignments. def parse_alignment unless defined?(@parse_alignment) qpos1 = nil qpos2 = nil spos1 = nil spos2 = nil qseq = [] sseq = [] mseq = [] pos_st = nil len_seq = 0 nextline = :q @f0alignment.each do |x| sc = StringScanner.new(x) while sc.rest? #p pos_st, len_seq #p nextline.to_s if r = sc.skip(/Query\: *(\d+) */) then pos_st = r pos1 = sc[1] len_seq = sc.skip(/[^ ]*/) seq = sc[0] sc.skip(/ *(\d+) *\n/) pos2 = sc[1] raise ScanError unless nextline == :q qpos1 = pos1.to_i unless qpos1 qpos2 = pos2.to_i qseq << seq nextline = :m elsif r = sc.scan(/Sbjct\: *(\d+) *.+ +(\d+) *\n/) then pos1 = sc[1] pos2 = sc[2] raise ScanError unless pos_st raise ScanError unless len_seq seq = r[pos_st, len_seq] if nextline == :m then mseq << (' ' * len_seq) end spos1 = pos1.to_i unless spos1 spos2 = pos2.to_i sseq << seq nextline = :q elsif r = sc.scan(/ {6}.+/) then raise ScanError unless nextline == :m mseq << r[pos_st, len_seq] sc.skip(/\n/) nextline = :s elsif r = sc.skip(/pattern +\d+.+/) then # PHI-BLAST # do nothing sc.skip(/\n/) else raise ScanError end end #while end #each #p qseq, sseq, mseq @qseq = qseq.join('') @hseq = sseq.join('') @midline = mseq.join('') @query_from = qpos1 @query_to = qpos2 @hit_from = spos1 @hit_to = spos2 @parse_alignment = true end #unless end #def private :parse_alignment # Defines attributes which call parse_alignment before accessing. def self.method_after_parse_alignment(*names) names.each do |x| module_eval("def #{x}; parse_alignment; @#{x}; end") end end private_class_method :method_after_parse_alignment # query sequence (with gaps) of the alignment of the hsp attr_reader :qseq if false #dummy method_after_parse_alignment :qseq # hit sequence (with gaps) of the alignment of the hsp attr_reader :hseq if false #dummy method_after_parse_alignment :hseq # middle line of the alignment of the hsp attr_reader :midline if false #dummy method_after_parse_alignment :midline # start position of the query (the first position is 1) attr_reader :query_from if false #dummy method_after_parse_alignment :query_from # end position of the query (including its position) attr_reader :query_to if false #dummy method_after_parse_alignment :query_to # start position of the hit (the first position is 1) attr_reader :hit_from if false #dummy method_after_parse_alignment :hit_from # end position of the hit (including its position) attr_reader :hit_to if false #dummy method_after_parse_alignment :hit_to end #class HSP end #class Report # NCBI BLAST default (-m 0 option) output parser for TBLAST. # All methods are equal to Bio::Blast::Default::Report. # Only DELIMITER (and RS) is different. class Report_TBlast < Report # Delimter of each entry for TBLAST. Bio::FlatFile uses it. DELIMITER = RS = "\nTBLAST" # (Integer) excess read size included in DELIMITER. DELIMITER_OVERRUN = 6 # "TBLAST" end #class Report_TBlast end #module Default end #class Blast end #module Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/ddbj.rb�����������������������������������������������������������0000644�0000041�0000041�00000007021�12200110570�017473� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/ddbj.rb - Remote BLAST wrapper using DDBJ web service # # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # require 'bio/io/ddbjrest' module Bio::Blast::Remote # Remote BLAST factory using DDBJ Web API for Biology # (http://xml.nig.ac.jp/). # module DDBJ # Creates a remote BLAST factory using DDBJ. # Returns Bio::Blast object. # # Note for future improvement: In the future, it might return # Bio::Blast::Remote::DDBJ or other object. # def self.new(program, db, options = []) Bio::Blast.new(program, db, options, 'ddbj') end # Information about DDBJ BLAST. module Information include Bio::Blast::Remote::Information # (private) parse database information def _parse_databases if defined? @parse_databases return nil if @parse_databases end drv = Bio::DDBJ::REST::Blast.new str = drv.getSupportDatabaseList databases = {} dbdescs = {} keys = [ 'blastn', 'blastp' ] keys.each do |key| databases[key] ||= [] dbdescs[key] ||= {} end prefix = '' prefix_count = 0 str.each_line do |line| a = line.strip.split(/\s*\-\s*/, 2) case a.size when 1 prefix = a[0].to_s.strip prefix += ': ' unless prefix.empty? prefix_count = 0 next #each_line when 0 prefix = '' if prefix_count > 0 next #each_line end name = a[0].to_s.strip.freeze desc = a[1].to_s.strip key = case desc when /\(NT\)\s*$/ 'blastn' when /\(AA\)\s*$/ 'blastp' else warn "DDBJ BLAST: could not determine the database is NT or AA: #{line.chomp}" if $VERBOSE next #each_line end desc = (prefix + desc).freeze prefix_count += 1 databases[key].push name dbdescs[key][name] = desc end databases['blastp'] ||= [] dbdescs['blastp'] ||= [] databases['blastn'].freeze databases['blastp'].freeze databases['blastx'] = databases['blastp'] dbdescs['blastx'] = dbdescs['blastp'] databases['tblastn'] = databases['blastn'] dbdescs['tblastn'] = dbdescs['blastn'] databases['tblastx'] = databases['blastn'] dbdescs['tblastx'] = dbdescs['blastn'] @databases = databases @database_descriptions = dbdescs @parse_databases = true true end private :_parse_databases end #module Information extend Information # executes BLAST and returns result as a string def exec_ddbj(query) options = make_command_line_options opt = Bio::Blast::NCBIOptions.new(options) # REST objects are cached @ddbj_remote_blast ||= Bio::DDBJ::REST::Blast.new @ddbj_request_manager ||= Bio::DDBJ::REST::RequestManager.new program = opt.delete('-p') db = opt.delete('-d') optstr = Bio::Command.make_command_line_unix(opt.options) # using searchParamAsync qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr) @output = qid result = @ddbj_request_manager.wait_getAsyncResult(qid) @output = result return @output end end #module DDBJ # for lazy load DDBJ module Ddbj = DDBJ end #module Bio::Blast::Remote ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/report.rb���������������������������������������������������������0000644�0000041�0000041�00000042243�12200110570�020110� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/report.rb - BLAST Report class # # Copyright:: Copyright (C) 2003 Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # require 'bio/io/flatfile' module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast # = Bio::Blast::Report # # Parsed results of the blast execution for Tab-delimited and XML output # format. Tab-delimited reports are consists of # # Query id, # Subject id, # percent of identity, # alignment length, # number of mismatches (not including gaps), # number of gap openings, # start of alignment in query, # end of alignment in query, # start of alignment in subject, # end of alignment in subject, # expected value, # bit score. # # according to the MEGABLAST document (README.mbl). As for XML output, # see the following DTDs. # # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_Entity.mod # class Report #-- # require lines moved here to avoid circular require #++ require 'bio/appl/blast/xmlparser' require 'bio/appl/blast/rexml' require 'bio/appl/blast/format8' # for Bio::FlatFile support (only for XML data) DELIMITER = RS = "</BlastOutput>\n" # Specify to use XMLParser to parse XML (-m 7) output. def self.xmlparser(data) self.new(data, :xmlparser) end # Specify to use REXML to parse XML (-m 7) output. def self.rexml(data) self.new(data, :rexml) end # Specify to use tab delimited output parser. def self.tab(data) self.new(data, :tab) end def auto_parse(data) if /<?xml/.match(data[/.*/]) if defined?(XMLParser) xmlparser_parse(data) @reports = blastxml_split_reports else rexml_parse(data) @reports = blastxml_split_reports end else tab_parse(data) end end private :auto_parse # Passing a BLAST output from 'blastall -m 7' or '-m 8' as a String. # Formats are auto detected. def initialize(data, parser = nil) @iterations = [] @parameters = {} case parser when :xmlparser # format 7 xmlparser_parse(data) @reports = blastxml_split_reports when :rexml # format 7 rexml_parse(data) @reports = blastxml_split_reports when :tab # format 8 tab_parse(data) when false # do not parse, creates an empty object else auto_parse(data) end end # Returns an Array of Bio::Blast::Report::Iteration objects. attr_reader :iterations # Returns a Hash containing execution parameters. Valid keys are: # 'matrix', 'expect', 'include', 'sc-match', 'sc-mismatch', # 'gap-open', 'gap-extend', 'filter' attr_reader :parameters #-- # Shortcut for BlastOutput values. #++ # program name (e.g. "blastp") (String) attr_reader :program # BLAST version (e.g. "blastp 2.2.18 [Mar-02-2008]") (String) attr_reader :version # reference (String) attr_reader :reference # database name or title (String) attr_reader :db # query ID (String) attr_reader :query_id # query definition line (String) attr_reader :query_def # query length (Integer) attr_reader :query_len # Matrix used (-M) : shortcuts for @parameters def matrix; @parameters['matrix']; end # Expectation threshold (-e) : shortcuts for @parameters def expect; @parameters['expect']; end # Inclusion threshold (-h) : shortcuts for @parameters def inclusion; @parameters['include']; end # Match score for NT (-r) : shortcuts for @parameters def sc_match; @parameters['sc-match']; end # Mismatch score for NT (-q) : shortcuts for @parameters def sc_mismatch; @parameters['sc-mismatch']; end # Gap opening cost (-G) : shortcuts for @parameters def gap_open; @parameters['gap-open']; end # Gap extension cost (-E) : shortcuts for @parameters def gap_extend; @parameters['gap-extend']; end # Filtering options (-F) : shortcuts for @parameters def filter; @parameters['filter']; end # PHI-BLAST pattern : shortcuts for @parameters def pattern; @parameters['pattern']; end # Limit of request to Entrez : shortcuts for @parameters def entrez_query; @parameters['entrez-query']; end # Iterates on each Bio::Blast::Report::Iteration object. (for blastpgp) def each_iteration @iterations.each do |x| yield x end end # Iterates on each Bio::Blast::Report::Hit object of the the last Iteration. # Shortcut for the last iteration's hits (for blastall) def each_hit @iterations.last.each do |x| yield x end end alias each each_hit # Returns a Array of Bio::Blast::Report::Hits of the last iteration. # Shortcut for the last iteration's hits def hits @iterations.last.hits end # Returns a Hash containing execution statistics of the last iteration. # Valid keys are: # 'db-num', 'db-len', 'hsp-len', 'eff-space', 'kappa', 'lambda', 'entropy' # Shortcut for the last iteration's statistics. def statistics @iterations.last.statistics end # Number of sequences in BLAST db def db_num; statistics['db-num']; end # Length of BLAST db def db_len; statistics['db-len']; end # Effective HSP length def hsp_len; statistics['hsp-len']; end # Effective search space def eff_space; statistics['eff-space']; end # Karlin-Altschul parameter K def kappa; statistics['kappa']; end # Karlin-Altschul parameter Lamba def lambda; statistics['lambda']; end # Karlin-Altschul parameter H def entropy; statistics['entropy']; end # Returns a String (or nil) containing execution message of the last # iteration (typically "CONVERGED"). # Shortcut for the last iteration's message (for checking 'CONVERGED') def message @iterations.last.message end # Bio::Blast::Report::Iteration class Iteration def initialize @message = nil @statistics = {} @num = 1 @hits = [] end # Returns an Array of Bio::Blast::Report::Hit objects. attr_reader :hits # Returns a Hash containing execution statistics. # Valid keys are: # 'db-len', 'db-num', 'eff-space', 'entropy', 'hsp-len', 'kappa', 'lambda' attr_reader :statistics # Returns the number of iteration counts. attr_accessor :num # Returns a String (or nil) containing execution message (typically # "CONVERGED"). attr_accessor :message # Iterates on each Bio::Blast::Report::Hit object. def each @hits.each do |x| yield x end end # query ID, only available for new BLAST XML format attr_accessor :query_id # query definition, only available for new BLAST XML format attr_accessor :query_def # query length, only available for new BLAST XML format attr_accessor :query_len end #class Iteration # Bio::Blast::Report::Hit class Hit def initialize @hsps = [] end # Returns an Array of Bio::Blast::Report::Hsp objects. attr_reader :hsps # Hit number attr_accessor :num # SeqId of subject attr_accessor :hit_id # Length of subject attr_accessor :len # Definition line of subject attr_accessor :definition # Accession attr_accessor :accession # Iterates on each Hsp object. def each @hsps.each do |x| yield x end end # Compatible method with Bio::Fasta::Report::Hit class. attr_accessor :query_id # Compatible method with Bio::Fasta::Report::Hit class. attr_accessor :query_def # Compatible method with Bio::Fasta::Report::Hit class. attr_accessor :query_len # Compatible method with Bio::Fasta::Report::Hit class. alias target_id accession # Compatible method with Bio::Fasta::Report::Hit class. alias target_def definition # Compatible method with Bio::Fasta::Report::Hit class. alias target_len len # Shortcut methods for the best Hsp, some are also compatible with # Bio::Fasta::Report::Hit class. def evalue; @hsps.first.evalue; end def bit_score; @hsps.first.bit_score; end def identity; @hsps.first.identity; end def percent_identity; @hsps.first.percent_identity; end def overlap; @hsps.first.align_len; end def query_seq; @hsps.first.qseq; end def target_seq; @hsps.first.hseq; end def midline; @hsps.first.midline; end def query_start; @hsps.first.query_from; end def query_end; @hsps.first.query_to; end def target_start; @hsps.first.hit_from; end def target_end; @hsps.first.hit_to; end def lap_at [ query_start, query_end, target_start, target_end ] end end # Bio::Blast::Report::Hsp class Hsp def initialize @hsp = {} end attr_reader :hsp # HSP number attr_accessor :num # Score (in bits) of HSP attr_accessor :bit_score # Sscore of HSP attr_accessor :score # E-value of HSP attr_accessor :evalue # Start of HSP in query attr_accessor :query_from # End of HSP attr_accessor :query_to # Start of HSP in subject attr_accessor :hit_from # End of HSP attr_accessor :hit_to # Start of PHI-BLAST pattern attr_accessor :pattern_from # End of PHI-BLAST pattern attr_accessor :pattern_to # Translation frame of query attr_accessor :query_frame # Translation frame of subject attr_accessor :hit_frame # Number of identities in HSP attr_accessor :identity # Number of positives in HSP attr_accessor :positive # Number of gaps in HSP attr_accessor :gaps # Length of the alignment used attr_accessor :align_len # Score density attr_accessor :density # Alignment string for the query (with gaps) attr_accessor :qseq # Alignment string for subject (with gaps) attr_accessor :hseq # Formating middle line attr_accessor :midline # Available only for '-m 8' format outputs. attr_accessor :percent_identity # Available only for '-m 8' format outputs. attr_accessor :mismatch_count end # When the report contains results for multiple query sequences, # returns an array of Bio::Blast::Report objects corresponding to # the multiple queries. # Otherwise, returns nil. # # Note for "No hits found": # When no hits found for a query sequence, the result for the query # is completely void and no information available in the result XML, # including query ID and query definition. # The only trace is that iteration number is skipped. # This means that if the no-hit query is the last query, # the query can not be detected, because the result XML is # completely the same as the result XML without the query. attr_reader :reports private # (private method) # In new BLAST XML (blastall >= 2.2.14), results of multiple queries # are stored in <Iteration>. This method splits iterations into # multiple Bio::Blast objects and returns them as an array. def blastxml_split_reports unless self.iterations.find { |iter| iter.query_id || iter.query_def || iter.query_len } then # traditional BLAST XML format, or blastpgp result. return nil end # new BLAST XML format (blastall 2.2.14 or later) origin = self reports = [] prev_iternum = 0 firsttime = true orig_iters = self.iterations orig_iters.each do |iter| blast = self.class.new(nil, false) # When no hits found, the iteration is skipped in NCBI BLAST XML. # So, filled with empty report object. if prev_iternum + 1 < iter.num then ((prev_iternum + 1)...(iter.num)).each do |num| empty_i = Iteration.new empty_i.num = num empty_i.instance_eval { if firsttime then @query_id = origin.query_id @query_def = origin.query_def @query_len = origin.query_len firsttime = false end } empty = self.class.new(nil, false) empty.instance_eval { # queriy_* are copied from the empty_i @query_id = empty_i.query_id @query_def = empty_i.query_def @query_len = empty_i.query_len # others are copied from the origin @program = origin.program @version = origin.version @reference = origin.reference @db = origin.db @parameters.update(origin.parameters) # the empty_i is added to the iterations @iterations.push empty_i } reports.push empty end end blast.instance_eval { if firsttime then @query_id = origin.query_id @query_def = origin.query_def @query_len = origin.query_len firsttime = false end # queriy_* are copied from the iter @query_id = iter.query_id if iter.query_id @query_def = iter.query_def if iter.query_def @query_len = iter.query_len if iter.query_len # others are copied from the origin @program = origin.program @version = origin.version @reference = origin.reference @db = origin.db @parameters.update(origin.parameters) # rewrites hit's query_id, query_def, query_len iter.hits.each do |h| h.query_id = @query_id h.query_def = @query_def h.query_len = @query_len end # the iter is added to the iterations @iterations.push iter } prev_iternum = iter.num reports.push blast end #orig_iters.each # This object's iterations is set as first report's iterations @iterations.clear if rep = reports.first then @iterations = rep.iterations end return reports end # Flatfile splitter for NCBI BLAST XML format. # It is internally used when reading BLAST XML. # Normally, users do not need to use it directly. class BlastXmlSplitter < Bio::FlatFile::Splitter::Default # creates a new splitter object def initialize(klass, bstream) super(klass, bstream) @parsed_entries = [] @raw_unsupported = false end # rewinds def rewind ret = super @parsed_entries.clear @raw_unsupported = false ret end # do nothing def skip_leader nil end # get an entry and return the entry as a string def get_entry if @parsed_entries.empty? then @raw_unsupported = false ent = super prepare_parsed_entries(ent) self.parsed_entry = @parsed_entries.shift else raise 'not supported for new BLAST XML format' end ent end # get an entry as a Bio::Blast::Report object def get_parsed_entry if @parsed_entries.empty? then ent = get_entry else self.parsed_entry = @parsed_entries.shift self.entry = nil @raw_unsupported = true end self.parsed_entry end # current raw entry as a String def entry raise 'not supported for new BLAST XML format' if @raw_unsupported super end # start position of the entry def entry_start_pos if entry_pos_flag then raise 'not supported for new BLAST XML format' if @raw_unsupported end super end # (end position of the entry) + 1 def entry_ended_pos if entry_pos_flag then raise 'not supported for new BLAST XML format' if @raw_unsupported end super end private # (private method) to prepare parsed entry def prepare_parsed_entries(ent) if ent then blast = dbclass.new(ent) if blast.reports and blast.reports.size >= 1 then # new blast xml using <Iteration> for multiple queries @parsed_entries.concat blast.reports else # traditional blast xml @parsed_entries.push blast end end end end #class BlastXmlSplitter # splitter for Bio::FlatFile support FLATFILE_SPLITTER = BlastXmlSplitter end # Report # NCBI BLAST tabular (-m 8) output parser. # All methods are equal to Bio::Blast::Report. # Only DELIMITER (and RS) is different. # class Report_tab < Report # Delimter of each entry. Bio::FlatFile uses it. DELIMITER = RS = nil end #class Report_tabular end # Blast end # Bio #if __FILE__ == $0 =begin begin # p is suitable than pp for the following test script require 'pp' alias p pp rescue end # for multiple xml reports (iterates on each Blast::Report) Bio::Blast.reports(ARGF) do |rep| rep.iterations.each do |itr| itr.hits.each do |hit| hit.hsps.each do |hsp| end end end end # for multiple xml reports (returns Array of Blast::Report) reps = Bio::Blast.reports(ARGF.read) # for a single report (xml or tab) format auto detect, parser auto selected rep = Bio::Blast::Report.new(ARGF.read) # to use xmlparser explicitly for a report rep = Bio::Blast::Report.xmlparser(ARGF.read) # to use resml explicitly for a report rep = Bio::Blast::Report.rexml(ARGF.read) # to use a tab delimited report rep = Bio::Blast::Report.tab(ARGF.read) =end #end �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/format8.rb��������������������������������������������������������0000644�0000041�0000041�00000003701�12200110570�020151� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/format8.rb - BLAST tab-delimited output (-m 8) parser # # Copyright:: Copyright (C) 2002, 2003, 2007 Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Note # # This file is automatically loaded by bio/appl/blast/report.rb # module Bio class Blast class Report private def tab_parse(data) iteration = Iteration.new @iterations.push(iteration) @query_id = @query_def = data[/\S+/] query_prev = '' target_prev = '' hit_num = 1 hsp_num = 1 hit = '' data.each_line do |line| ary = line.chomp.split("\t") query_id, target_id, hsp = tab_parse_hsp(ary) if query_prev != query_id or target_prev != target_id hit = Hit.new hit.num = hit_num hit_num += 1 hit.query_id = hit.query_def = query_id hit.accession = hit.definition = target_id iteration.hits.push(hit) hsp_num = 1 end hsp.num = hsp_num hsp_num += 1 hit.hsps.push(hsp) query_prev = query_id target_prev = target_id end end def tab_parse_hsp(ary) query_id, target_id, percent_identity, align_len, mismatch_count, gaps, query_from, query_to, hit_from, hit_to, evalue, bit_score = *ary hsp = Hsp.new hsp.align_len = align_len.to_i hsp.gaps = gaps.to_i hsp.query_from = query_from.to_i hsp.query_to = query_to.to_i hsp.hit_from = hit_from.to_i hsp.hit_to = hit_to.to_i hsp.evalue = evalue.strip.to_f hsp.bit_score = bit_score.to_f hsp.percent_identity = percent_identity.to_f hsp.mismatch_count = mismatch_count.to_i return query_id, target_id, hsp end end end end ���������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/rexml.rb����������������������������������������������������������0000644�0000041�0000041�00000010221�12200110570�017713� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/rexml.rb - BLAST XML output (-m 7) parser by REXML # # Copyright:: Copyright (C) 2002, 2003 Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Note # # This file is automatically loaded by bio/appl/blast/report.rb # begin require 'rexml/document' rescue LoadError end module Bio class Blast class Report private def rexml_parse(xml) dom = REXML::Document.new(xml) rexml_parse_program(dom) dom.elements.each("*//Iteration") do |e| @iterations.push(rexml_parse_iteration(e)) end end def rexml_parse_program(dom) hash = {} dom.root.each_element_with_text do |e| name, text = e.name, e.text case name when 'BlastOutput_param' e.elements["Parameters"].each_element_with_text do |p| xml_set_parameter(p.name, p.text) end else hash[name] = text if text.strip.size > 0 end end @program = hash['BlastOutput_program'] @version = hash['BlastOutput_version'] @reference = hash['BlastOutput_reference'] @db = hash['BlastOutput_db'] @query_id = hash['BlastOutput_query-ID'] @query_def = hash['BlastOutput_query-def'] @query_len = hash['BlastOutput_query-len'].to_i end def rexml_parse_iteration(e) iteration = Iteration.new e.elements.each do |i| case i.name when 'Iteration_iter-num' iteration.num = i.text.to_i when 'Iteration_hits' i.elements.each("Hit") do |h| iteration.hits.push(rexml_parse_hit(h)) end when 'Iteration_message' iteration.message = i.text when 'Iteration_stat' i.elements["Statistics"].each_element_with_text do |s| k = s.name.sub(/Statistics_/, '') v = s.text =~ /\D/ ? s.text.to_f : s.text.to_i iteration.statistics[k] = v end # for new BLAST XML format when 'Iteration_query-ID' iteration.query_id = i.text when 'Iteration_query-def' iteration.query_def = i.text when 'Iteration_query-len' iteration.query_len = i.text.to_i end end #case i.name return iteration end def rexml_parse_hit(e) hit = Hit.new hash = {} hit.query_id = @query_id hit.query_def = @query_def hit.query_len = @query_len e.elements.each do |h| case h.name when 'Hit_hsps' h.elements.each("Hsp") do |s| hit.hsps.push(rexml_parse_hsp(s)) end else hash[h.name] = h.text end end hit.num = hash['Hit_num'].to_i hit.hit_id = hash['Hit_id'] hit.len = hash['Hit_len'].to_i hit.definition = hash['Hit_def'] hit.accession = hash['Hit_accession'] return hit end def rexml_parse_hsp(e) hsp = Hsp.new hash = {} e.each_element_with_text do |h| hash[h.name] = h.text end hsp.num = hash['Hsp_num'].to_i hsp.bit_score = hash['Hsp_bit-score'].to_f hsp.score = hash['Hsp_score'].to_i hsp.evalue = hash['Hsp_evalue'].to_f hsp.query_from = hash['Hsp_query-from'].to_i hsp.query_to = hash['Hsp_query-to'].to_i hsp.hit_from = hash['Hsp_hit-from'].to_i hsp.hit_to = hash['Hsp_hit-to'].to_i hsp.pattern_from = hash['Hsp_pattern-from'].to_i hsp.pattern_to = hash['Hsp_pattern-to'].to_i hsp.query_frame = hash['Hsp_query-frame'].to_i hsp.hit_frame = hash['Hsp_hit-frame'].to_i hsp.identity = hash['Hsp_identity'].to_i hsp.positive = hash['Hsp_positive'].to_i hsp.gaps = hash['Hsp_gaps'].to_i hsp.align_len = hash['Hsp_align-len'].to_i hsp.density = hash['Hsp_density'].to_i hsp.qseq = hash['Hsp_qseq'] hsp.hseq = hash['Hsp_hseq'] hsp.midline = hash['Hsp_midline'] return hsp end end end end �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/wublast.rb��������������������������������������������������������0000644�0000041�0000041�00000034262�12200110570�020260� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/wublast.rb - WU-BLAST default output parser # # Copyright:: Copyright (C) 2003, 2008 Naohisa GOTO <ng@bioruby.org> # License:: The Ruby License # # # == Description # # WU-BLAST default output parser. # # The parser is still incomplete and may contain many bugs, # because I didn't have WU-BLAST license. # It was tested under web-based WU-BLAST results and # obsolete version downloaded from http://blast.wustl.edu/ . # # == References # # * http://blast.wustl.edu/ # * http://www.ebi.ac.uk/blast2/ # module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast module WU #:nodoc: # Bio::Blast::WU::Report parses WU-BLAST default output # and stores information in the data. # It may contain a Bio::Blast::WU::Report::Iteration object. # Because it inherits Bio::Blast::Default::Report, # please also refer Bio::Blast::Default::Report. class Report < Default::Report # Returns parameters (???) def parameters parse_parameters @parameters end # Returns parameter matrix (???) def parameter_matrix parse_parameters @parameter_matrix end # Returns e-value threshold specified when BLAST was executed. def expect; parse_parameters; @parameters['E']; end # Returns warning messages. def warnings unless defined?(@warnings) @warnings = @f0warnings iterations.each { |x| @warnings.concat(x.warnings) } end @warnings end # Returns notice messages. def notice unless defined?(@notice) @notice = @f0notice.to_s.gsub(/\s+/, ' ').strip end #unless @notice end # (WU-BLAST) Returns record number of the query. # It may only be available for reports with multiple queries. # Returns an Integer or nil. def query_record_number format0_parse_query @query_record_number end # (WU-BLAST) Returns exit code for the execution. # Returns an Integer or nil. def exit_code if defined? @exit_code then @exit_code else nil end end # (WU-BLAST) Returns the message bundled with the exit code output. # The message will be shown when WU-BLAST ignores a fatal error # due to the command line option "-nonnegok", "-novalidctxok", # or "-shortqueryok". # # Returns a String or nil. def exit_code_message if defined? @exit_code_message then @exit_code_message else nil end end # (WU-BLAST) Returns "NOTE:" information. # Returns nil or an array containing String. def notes if defined? @notes then @notes else nil end end # (WU-BLAST) Returns fatal error information. # Returns nil or an array containing String. def fatal_errors if defined? @fatal_errors then @fatal_errors else nil end end # Returns the name (filename or title) of the database. def db unless defined?(@db) if /Database *\: *(.*)/m =~ @f0database then a = $1.split(/^/) if a.size > 1 and /\ASearching\..+ done\s*\z/ =~ a[-1] then a.pop end if a.size > 1 and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\.?\s*\z/ =~ a[-1] then a.pop end @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ') end end #unless @db end private # Parses the query lines (begins with "Query = "). def format0_parse_query unless defined?(@query_def) sc = StringScanner.new(@f0query) sc.skip(/\s*/) if sc.skip_until(/Query\= */) then q = [] begin q << sc.scan(/.*/) sc.skip(/\s*^ ?/) end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *(\; *record *([\,\d]+) *)?\)\s*\z/) @query_len = sc[1].delete(',').to_i if r @query_record_number = sc[3].delete(',').to_i if r and sc[2] @query_def = q.join(' ') end end end # Splits headers. def format0_split_headers(data) @f0header = data.shift @f0references = [] while r = data.first case r when /^Reference\: / @f0references.push data.shift when /^Copyright / @f0copyright = data.shift when /^Notice\: / @f0notice = data.shift when /^Query\= / break else break end end @f0query = data.shift @f0warnings ||= [] while r = data.first case r when /^WARNING\: / @f0warnings << data.shift when /^NOTE\: / @notes ||= [] @notes << data.shift else break #from the above "while" end end return if r = data.first and /\A(Parameters\:|EXIT CODE *\d+)/ =~ r if r = data.first and !(/^Database\: / =~ r) @f0translate_info = data.shift end @f0database = data.shift end # Splits search data. def format0_split_search(data) @f0warnings ||= [] while r = data.first and r =~ /^WARNING\: / @f0warnings << data.shift end [ Iteration.new(data) ] end # Splits statistics parameters. def format0_split_stat_params(data) @f0warnings ||= [] while r = data.first and r =~ /^WARNING\: / @f0warnings << data.shift end @f0wu_params = [] @f0wu_stats = [] ary = @f0wu_params while r = data.shift case r when /\AStatistics\:/ ary = @f0wu_stats when /\AEXIT CODE *(\d+)\s*(.*)$/ @exit_code = $1.to_i if $2 and !$2.empty? then @exit_code_message = r.sub(/\AEXIT CODE *(\d+)\s*/, '') end r = nil when /\AFATAL\: / @fatal_errors ||= [] @fatal_errors.push r r = nil when /\AWARNING\: / @f0warnings ||= [] @f0warnings << r r = nil end ary << r if r end @f0dbstat = F0dbstat.new(@f0wu_stats) itr = @iterations[0] x = @f0dbstat itr.instance_eval { @f0dbstat = x } if itr end # Splits parameters. def parse_parameters unless defined?(@parse_parameters) @parameters = {} @parameter_matrix = [] @f0wu_params.each do |x| if /^ Query/ =~ x then @parameter_matrix << x else x.split(/^/).each do |y| if /\A\s*(.+)\s*\=\s*(.*)\s*/ =~ y then @parameters[$1] = $2 elsif /\AParameters\:/ =~ y then ; #ignore this elsif /\A\s*(.+)\s*$/ =~ y then @parameters[$1] = true end end end end if ev = @parameters['E'] then ev = '1' + ev if ev[0] == ?e @parameters['E'] = ev.to_f end @parse_parameters = true end end # Stores database statistics. # Internal use only. Users must not use the class. class F0dbstat < Default::Report::F0dbstat #:nodoc: def initialize(ary) @f0stat = ary @hash = {} end #-- #undef :f0params #undef :matrix, :gap_open, :gap_extend, # :eff_space, :expect, :sc_match, :sc_mismatch, # :num_hits #++ # Parses database statistics. def parse_dbstat unless defined?(@parse_dbstat) parse_colon_separated_params(@hash, @f0stat) @database = @hash['Database'] @posted_date = @hash['Posted'] if val = @hash['# of letters in database'] then @db_len = val.tr(',', '').to_i end if val = @hash['# of sequences in database'] then @db_num = val.tr(',', '').to_i end @parse_dbstat = true end #unless end #def private :parse_dbstat end #class F0dbstat #-- #class Frame #end #class FrameParams #++ # Iteration class for WU-BLAST report. # Though WU-BLAST does not iterate like PSI-BLAST, # Bio::Blast::WU::Report::Iteration aims to keep compatibility # with Bio::Blast::Default::Report::* classes. # It may contain some Bio::Blast::WU::Report::Hit objects. # Because it inherits Bio::Blast::Default::Report::Iteration, # please also refer Bio::Blast::Default::Report::Iteration. class Iteration < Default::Report::Iteration # Creates a new Iteration object. # It is designed to be called only internally from # the Bio::Blast::WU::Report class. # Users shall not use the method directly. def initialize(data) @f0stat = [] @f0dbstat = Default::Report::AlwaysNil.instance @f0hitlist = [] @hits = [] @num = 1 @f0message = [] @f0warnings = [] return unless r = data.first return if /\AParameters\:$/ =~ r return if /\AEXIT CODE *\d+/ =~ r @f0hitlist << data.shift return unless r = data.shift unless /\*{3} +NONE +\*{3}/ =~ r then @f0hitlist << r while r = data.first and /^WARNING\: / =~ r @f0warnings << data.shift end while r = data.first and /^\>/ =~ r @hits << Hit.new(data) end end #unless end # Returns warning messages. def warnings @f0warnings end private # Parses hit list. def parse_hitlist unless defined?(@parse_hitlist) r = @f0hitlist.shift.to_s if /Reading/ =~ r and /Frame/ =~ r then flag_tblast = true spnum = 5 else flag_tblast = nil spnum = 4 end i = 0 @f0hitlist.each do |x| b = x.split(/^/) b.collect! { |y| y.empty? ? nil : y } b.compact! b.each do |y| y.strip! y.reverse! z = y.split(/\s+/, spnum) z.each { |y| y.reverse! } dfl = z.pop h = @hits[i] unless h then h = Hit.new([ dfl.to_s.sub(/\.+\z/, '') ]) @hits[i] = h end z.pop if flag_tblast #ignore Reading Frame scr = z.pop scr = (scr ? scr.to_i : nil) pval = z.pop.to_s pval = '1' + pval if pval[0] == ?e pval = (pval.empty? ? (1.0/0.0) : pval.to_f) nnum = z.pop.to_i h.instance_eval { @score = scr @pvalue = pval @n_number = nnum } i += 1 end end #each @parse_hitlist = true end #unless end end #class Iteration # Bio::Blast::WU::Report::Hit contains information about a hit. # It may contain some Bio::Blast::WU::Report::HSP objects. # # Because it inherits Bio::Blast::Default::Report::Hit, # please also refer Bio::Blast::Default::Report::Hit. class Hit < Default::Report::Hit # Creates a new Hit object. # It is designed to be called only internally from the # Bio::Blast::WU::Report::Iteration class. # Users should not call the method directly. def initialize(data) @f0hitname = data.shift @hsps = [] while r = data.first if r =~ /^\s*(?:Plus|Minus) +Strand +HSPs\:/ then data.shift r = data.first end if /\A\s+Score/ =~ r then @hsps << HSP.new(data) else break end end @again = false end # Returns score. def score @score end # p-value attr_reader :pvalue # n-number (???) attr_reader :n_number end #class Hit # Bio::Blast::WU::Report::HSP holds information about the hsp # (high-scoring segment pair). # # Because it inherits Bio::Blast::Default::Report::HSP, # please also refer Bio::Blast::Default::Report::HSP. class HSP < Default::Report::HSP # p-value attr_reader :pvalue if false #dummy method_after_parse_score :pvalue # p_sum_n (???) attr_reader :p_sum_n if false #dummy method_after_parse_score :p_sum_n end #class HSP end #class Report # WU-BLAST default output parser for TBLAST. # All methods are equal to Bio::Blast::WU::Report. # Only DELIMITER (and RS) is different. class Report_TBlast < Report # Delimter of each entry for TBLAST. Bio::FlatFile uses it. DELIMITER = RS = "\nTBLAST" # (Integer) excess read size included in DELIMITER. DELIMITER_OVERRUN = 6 # "TBLAST" end #class Report_TBlast end #module WU end #class Blast end #module Bio ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/remote.rb���������������������������������������������������������0000644�0000041�0000041�00000006010�12200110570�020060� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/remote.rb - remote BLAST wrapper basic module # # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast # Bio::Blast::Remote is a namespace for Remote Blast factory. module Remote autoload :GenomeNet, 'bio/appl/blast/genomenet' autoload :Genomenet, 'bio/appl/blast/genomenet' autoload :DDBJ, 'bio/appl/blast/ddbj' autoload :Ddbj, 'bio/appl/blast/ddbj' # creates a remote BLAST factory using GenomeNet def self.genomenet(program, db, options = []) GenomeNet.new(program, db, options) #Bio::Blast.new(program, db, options, 'genomenet') end # creates a remote BLAST factory using DDBJ Web service def self.ddbj(program, db, options = []) DDBJ.new(program, db, options) #Bio::Blast.new(program, db, options, 'ddbj') end # Common methods for meta-information processing # (e.g. list of databases). module Information # (private) parses database information and stores data def _parse_databases raise NotImplementedError end private :_parse_databases # Returns a list of available nucleotide databases. # # Note: see the note of databases method. # # --- # *Returns*:: Array containing String objects def nucleotide_databases _parse_databases @databases['blastn'] end # Returns a list of available protein databases. # # Note: see the note of databases method. # --- # *Returns*:: Array containing String objects def protein_databases _parse_databases @databases['blastp'] end # Returns a list of available databases for given program. # # Note: It parses remote sites to obtain database information # (e.g. http://blast.genome.jp/ for Bio::Blast::Remote::GenomeNet). # Thus, if the site is changed, this method can not return correct data. # Please tell BioRuby developers when the site is changed. # # --- # *Arguments*: # * _program_ (required): blast program('blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx') # *Returns*:: Array containing String objects def databases(program) _parse_databases @databases[program] || [] end # Returns a short description of given database. # # Note: see the note of databases method. # --- # *Arguments*: # * _program_ (required): 'blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx' # * _db_ (required): database name # *Returns*:: String def database_description(program, db) _parse_databases h = @database_descriptions[program] h ? (h[db] || '') : '' end # Resets data and clears cached data in this module. def reset @parse_databases = false true end end #module Information end #module Remote end #class Blast end #module Bio ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blast/ncbioptions.rb����������������������������������������������������0000644�0000041�0000041�00000013015�12200110570�021117� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blast/ncbioptions.rb - NCBI Tools-style options parser # # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # # == Description # # Bio::Blast::NCBIOptions is a class to parse and store NCBI Tools-style # command-line options. # It is internally used in Bio::Blast and some other classes. # require 'shellwords' module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast # A class to parse and store NCBI-tools style command-line options. # It is internally used in Bio::Blast and some other classes. # class NCBIOptions # creates a new object from an array def initialize(options = []) #@option_pairs = [] @option_pairs = _parse_options(options) end # (protected) option pairs. internal use only. attr_reader :option_pairs protected :option_pairs # (private) parse options from given array def _parse_options(options) i = 0 pairs = [] while i < options.size opt = options[i].to_s if m = /\A(\-.)/.match(opt) then key = m[1] if m.post_match.empty? then i += 1 val = options.fetch(i) rescue '' else val = m.post_match end pairs.push([ key, val ]) elsif '-' == opt then pairs.push [ opt ] else #warn "Arguments must start with \'-\'" if $VERBOSE pairs.push [ opt ] end i += 1 end pairs end private :_parse_options # Normalize options. # For two or more same options (e.g. '-p blastn -p blastp'), # only the last option is used. (e.g. '-p blastp' for above example). # # Note that completely illegal options are left untouched. # # --- # *Returns*:: self def normalize! hash = {} newpairs = [] @option_pairs.reverse_each do |pair| if pair.size == 2 then key = pair[0] unless hash[key] then newpairs.push pair hash[key] = pair end else newpairs.push pair end end newpairs.reverse! @option_pairs = newpairs self end # current options as an array of strings def options @option_pairs.flatten end # parses a string and returns a new object def self.parse(str) options = Shellwords.shellwords(str) self.new(options) end # (private) key string to regexp def _key_to_regexp(key) key = key.sub(/\A\-/, '') Regexp.new('\A\-' + Regexp.escape(key) + '\z') end private :_key_to_regexp # Return the option. # --- # *Arguments*: # * _key_: option name as a string, e.g. 'm', 'p', or '-m', '-p'. # *Returns*:: String or nil def get(key) re = _key_to_regexp(key) # Note: the last option is used when two or more same option exist. value = nil @option_pairs.reverse_each do |pair| if re =~ pair[0] then value = pair[1] break end end return value end # Delete the given option. # --- # *Arguments*: # * _key_: option name as a string, e.g. 'm', 'p', or '-m', '-p'. # *Returns*:: String or nil def delete(key) re = _key_to_regexp(key) # Note: the last option is used for return value # when two or more same option exist. oldvalue = nil @option_pairs = @option_pairs.delete_if do |pair| if re =~ pair[0] then oldvalue = pair[1] true else false end end return oldvalue end # Sets the option to given value. # # For example, if you want to set '-p blastall' option, # obj.set('p', 'blastall') # or # obj.set('-p', 'blastall') # (above two are equivalent). # # --- # *Arguments*: # * _key_: option name as a string, e.g. 'm', 'p'. # * _value_: value as a string, e.g. '7', 'blastp'. # *Returns*:: previous value; String or nil def set(key, value) re = _key_to_regexp(key) oldvalue = nil flag = false # Note: only the last options is modified for multiple same options. @option_pairs.reverse_each do |pair| if re =~ pair[0] then oldvalue = pair[1] pair[1] = value flag = true break end end unless flag then key = "-#{key}" unless key[0, 1] == '-' @option_pairs.push([ key, value ]) end oldvalue end # Adds options from given array. # Note that existing options will also be normalized. # --- # *Arguments*: # * _options_: options as an Array of String objects. # *Returns*:: self def add_options(options) @option_pairs.concat _parse_options(options) self.normalize! self end # If self == other, returns true. Otherwise, returns false. def ==(other) return true if super(other) begin oopts = other.options rescue return false end return self.options == oopts end # Returns an array for command-line options. # prior_options are preferred to be used. def make_command_line_options(prior_options = []) newopts = self.class.new(self.options) #newopts.normalize! prior_pairs = _parse_options(prior_options) prior_pairs.each do |pair| newopts.delete(pair[0]) end newopts.option_pairs[0, 0] = prior_pairs newopts.options end end #class NCBIOptions end #class Blast end #module Bio �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blat/�������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016060� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/blat/report.rb����������������������������������������������������������0000644�0000041�0000041�00000043236�12200110570�017730� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/blat/report.rb - BLAT result parser # # Copyright:: Copyright (C) 2004, 2006, 2008 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # # BLAT result parser (psl / pslx format). # # == Important Notes # # In BLAT results, the start position of a sequnece is numbered as 0. # On the other hand, in many other homology search programs, # the start position of a sequence is numbered as 1. # To keep compatibility, the BLAT parser adds 1 to every position number. # # == References # # * Kent, W.J., BLAT--the BLAST-like alignment tool, # Genome Research, 12, 656--664, 2002. # http://www.genome.org/cgi/content/abstract/12/4/656 # * http://genome.ucsc.edu/goldenPath/help/blatSpec.html require 'bio' module Bio class Blat # Bio::Blat::Report is a BLAT report parser class. # Its object may contain some Bio::Blat::Report::Hits objects. # # In BLAT results, the start position of a sequnece is numbered as 0. # On the other hand, in many other homology search programs, # the start position of a sequence is numbered as 1. # To keep compatibility, the BLAT parser adds 1 to every position number # except Bio::Blat::Report::Seqdesc and some Bio::Blat specific methods. # # Note that Bio::Blat::Report#query_def, #query_id, #query_len methods # simply return first hit's query_*. # If multiple query sequences are given, these values # will be incorrect. # class Report #< DB # Delimiter of each entry. Bio::FlatFile uses it. # In Bio::Blat::Report, it it nil (1 entry 1 file). DELIMITER = RS = nil # 1 file 1 entry # Splitter for Bio::FlatFile FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented # Creates a new Bio::Blat::Report object from BLAT result text (String). # You can use Bio::FlatFile to read a file. # Currently, results created with options -out=psl (default) or # -out=pslx are supported. def initialize(text = '') flag = false head = [] @hits = [] text.each_line do |line| if flag then @hits << Hit.new(line) else # for headerless data if /^\d/ =~ line then flag = true redo end line = line.chomp if /\A\-+\s*\z/ =~ line flag = true else head << line end end end @columns = parse_header(head) unless head.empty? end # Adds a header line if the header data is not yet given and # the given line is suitable for header. # Returns self if adding header line is succeeded. # Otherwise, returns false (the line is not added). def add_header_line(line) return false if defined? @columns line = line.chomp case line when /^\d/ @columns = (defined? @header_lines) ? parse_header(@header_lines) : [] return false when /\A\-+\s*\z/ @columns = (defined? @header_lines) ? parse_header(@header_lines) : [] return self else @header_lines ||= [] @header_lines.push line end end # Adds a line to the entry if the given line is regarded as # a part of the current entry. # If the current entry (self) is empty, or the line has the same # query name, the line is added and returns self. # Otherwise, returns false (the line is not added). def add_line(line) if /\A\s*\z/ =~ line then return @hits.empty? ? self : false end hit = Hit.new(line.chomp) if @hits.empty? or @hits.first.query.name == hit.query.name then @hits.push hit return self else return false end end # hits of the result. # Returns an Array of Bio::Blat::Report::Hit objects. attr_reader :hits # Returns descriptions of columns. # Returns an Array. # This would be a Bio::Blat specific method. attr_reader :columns # Parses headers. def parse_header(ary) while x = ary.shift if /psLayout version (\S+)/ =~ x then @psl_version = $1 break elsif !(x.strip.empty?) ary.unshift(x) break end end a0 = ary.collect { |x| x.split(/\t/) } k = [] a0.each do |x| x.each_index do |i| y = x[i].strip k[i] = k[i].to_s + (y.sub!(/\-\z/, '') ? y : y + ' ') end end k.each { |x| x.strip! } k end private :parse_header # version of the psl format (String or nil). attr_reader :psl_version # Bio::Blat::Report::SeqDesc stores sequence information of # query or subject of the BLAT report. # It also includes some hit information. class SeqDesc # Creates a new SeqDesc object. # It is designed to be called internally from Bio::Blat::Report class. # Users shall not use it directly. def initialize(gap_count, gap_bases, name, size, st, ed, starts, seqs) @gap_count = gap_count.to_i @gap_bases = gap_bases.to_i @name = name @size = size.to_i @start = st.to_i @end = ed.to_i @starts = starts.collect { |x| x.to_i } @seqs = seqs end # gap count attr_reader :gap_count # gap bases attr_reader :gap_bases # name of the sequence attr_reader :name # length of the sequence attr_reader :size # start position of the first segment attr_reader :start # end position of the final segment attr_reader :end # start positions of segments. # Returns an array of numbers. attr_reader :starts # sequences of segments. # Returns an array of String. # Returns nil if there are no sequence data. attr_reader :seqs end #class SeqDesc # Sequence segment pair of BLAT result. # Similar to Bio::Blast::Report::Hsp but lacks many methods. class SegmentPair # Creates a new SegmentPair object. # It is designed to be called internally from Bio::Blat::Report class. # Users shall not use it directly. def initialize(query_len, target_len, strand, blksize, qstart, tstart, qseq, tseq, protein_flag) @blocksize = blksize @qseq = qseq @hseq = hseq @hit_strand = 'plus' w = (protein_flag ? 3 : 1) # 3 means query=protein target=dna case strand when '-' # query is minus strand @query_strand = 'minus' # convert positions @query_from = query_len - qstart @query_to = query_len - qstart - blksize + 1 # To keep compatibility, with other homology search programs, # we add 1 to each position number. @hit_from = tstart + 1 @hit_to = tstart + blksize * w # - 1 + 1 when '+-' # hit is minus strand @query_strand = 'plus' @hit_strand = 'minus' # To keep compatibility, with other homology search programs, # we add 1 to each position number. @query_from = qstart + 1 @query_to = qstart + blksize # - 1 + 1 # convert positions @hit_from = target_len - tstart @hit_to = target_len - tstart - blksize * w + 1 else #when '+', '++' @query_strand = 'plus' # To keep compatibility with other homology search programs, # we add 1 to each position number. @query_from = qstart + 1 @query_to = qstart + blksize # - 1 + 1 @hit_from = tstart + 1 @hit_to = tstart + blksize * w # - 1 + 1 end end # Returns query start position. # CAUTION: In Blat's raw result(psl format), first position is 0. # To keep compatibility, the parser add 1 to the position. attr_reader :query_from # Returns query end position. # CAUTION: In Blat's raw result(psl format), first position is 0. # To keep compatibility, the parser add 1 to the position. attr_reader :query_to # Returns query sequence. # If sequence data is not available, returns nil. attr_reader :qseq # Returns strand information of the query. # Returns 'plus' or 'minus'. attr_reader :query_strand # Returns target (subject, hit) start position. # CAUTION: In Blat's raw result(psl format), first position is 0. # To keep compatibility, the parser add 1 to the position. attr_reader :hit_from # Returns target (subject, hit) end position. # CAUTION: In Blat's raw result(psl format), first position is 0. # To keep compatibility, the parser add 1 to the position. attr_reader :hit_to # Returns the target (subject, hit) sequence. # If sequence data is not available, returns nil. attr_reader :hseq # Returns strand information of the target (subject, hit). # Returns 'plus' or 'minus'. attr_reader :hit_strand # Returns block size (length) of the segment pair. # This would be a Bio::Blat specific method. attr_reader :blocksize # Returns alignment length of the segment pair. # Returns nil if no alignment data are available. def align_len @qseq ? @qseq.size : nil end end #class SegmentPair # Hit class for the BLAT result parser. # Similar to Bio::Blast::Report::Hit but lacks many methods. # Its object may contain some Bio::Blat::Report::SegmentPair objects. class Hit # Creates a new Hit object from a piece of BLAT result text. # It is designed to be called internally from Bio::Blat::Report object. # Users shall not use it directly. def initialize(str) @data = str.chomp.split(/\t/) end # Raw data of the hit. # (Note that it doesn't add 1 to position numbers.) attr_reader :data # split comma-separeted text def split_comma(str) str.to_s.sub(/\s*\,+\s*\z/, '').split(/\s*\,\s*/) end private :split_comma # Returns sequence informations of the query. # Returns a Bio::Blat::Report::SeqDesc object. # This would be Bio::Blat specific method. def query unless defined?(@query) d = @data @query = SeqDesc.new(d[4], d[5], d[9], d[10], d[11], d[12], split_comma(d[19]), split_comma(d[21])) end @query end # Returns sequence informations of the target(hit). # Returns a Bio::Blat::Report::SeqDesc object. # This would be Bio::Blat specific method. def target unless defined?(@target) d = @data @target = SeqDesc.new(d[6], d[7], d[13], d[14], d[15], d[16], split_comma(d[20]), split_comma(d[22])) end @target end # Match nucleotides. def match; @data[0].to_i; end # Mismatch nucleotides. def mismatch; @data[1].to_i; end # "rep. match". # Number of bases that match but are part of repeats. # Note that current version of BLAT always set 0. def rep_match; @data[2].to_i; end # "N's". Number of 'N' bases. def n_s; @data[3].to_i; end # Returns strand information of the hit. # Returns '+' or '-'. # This would be a Bio::Blat specific method. def strand; @data[8]; end # Number of blocks(exons, segment pairs). def block_count; @data[17].to_i; end # Sizes of all blocks(exons, segment pairs). # Returns an array of numbers. def block_sizes unless defined?(@block_sizes) then @block_sizes = split_comma(@data[18]).collect { |x| x.to_i } end @block_sizes end # Returns blocks(exons, segment pairs) of the hit. # Returns an array of Bio::Blat::Report::SegmentPair objects. def blocks unless defined?(@blocks) bs = block_sizes qst = query.starts tst = target.starts qseqs = query.seqs tseqs = target.seqs pflag = self.protein? @blocks = (0...block_count).collect do |i| SegmentPair.new(query.size, target.size, strand, bs[i], qst[i], tst[i], qseqs[i], tseqs[i], pflag) end end @blocks end alias exons blocks #-- # Bio::BLAST::*::Report::Hit compatible methods #++ alias hsps blocks # Returns the length of query sequence. def query_len; query.size; end # Returns the name of query sequence. def query_def; query.name; end alias query_id query_def # Returns the length of the target(subject) sequence. def target_len; target.size; end alias len target_len # Returns the name of the target(subject) sequence. def target_def; target.name; end alias target_id target_def alias definition target_def #Iterates over each block(exon, segment pair) of the hit. # Yields a Bio::Blat::Report::SegmentPair object. def each(&x) #:yields: segmentpair exons.each(&x) end #-- # methods described in the BLAT FAQ at the UCSC genome browser. # (http://genome.ucsc.edu/FAQ/FAQblat#blat4) #++ # Calculates the pslCalcMilliBad value defined in the # BLAT FAQ (http://genome.ucsc.edu/FAQ/FAQblat#blat4). # # The algorithm is taken from the BLAT FAQ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4). def milli_bad w = (self.protein? ? 3 : 1) qalen = w * (self.query.end - self.query.start) talen = self.target.end - self.target.start alen = (if qalen < talen then qalen; else talen; end) return 0 if alen <= 0 d = qalen - talen d = 0 if d < 0 total = w * (self.match + self.rep_match + self.mismatch) return 0 if total == 0 return (1000 * (self.mismatch * w + self.query.gap_count + (3 * Math.log(1 + d)).round) / total) end # Calculates the percent identity compatible with the BLAT web server # as described in the BLAT FAQ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4). # # The algorithm is taken from the BLAT FAQ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4). def percent_identity 100.0 - self.milli_bad * 0.1 end # When the output data comes from the protein query, returns true. # Otherwise (nucleotide query), returns false. # It returns nil if this cannot be determined. # # The algorithm is taken from the BLAT FAQ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4). # # Note: It seems that it returns true only when protein query # with nucleotide database (blat options: -q=prot -t=dnax). def protein? return nil if self.block_sizes.empty? case self.strand[1,1] when '+' if self.target.end == self.target.starts[-1] + 3 * self.block_sizes[-1] then true else false end when '-' if self.target.start == self.target.size - self.target.starts[-1] - 3 * self.block_sizes[-1] then true else false end else nil end end # Calculates the score compatible with the BLAT web server # as described in the BLAT FAQ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4). # # The algorithm is taken from the BLAT FAQ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4). def score w = (self.protein? ? 3 : 1) w * (self.match + (self.rep_match >> 1)) - w * self.mismatch - self.query.gap_count - self.target.gap_count end end #class Hit #-- #Bio::BLAST::*::Report compatible methods #++ # Returns number of hits. # Same as hits.size. def num_hits; @hits.size; end # Iterates over each Bio::Blat::Report::Hit object. # Same as hits.each. def each_hit(&x) #:yields: hit @hits.each(&x) end alias each each_hit # Returns the name of query sequence. # CAUTION: query_* methods simply return first hit's query_*. # If multiple query sequences are given, these values # will be incorrect. def query_def; (x = @hits.first) ? x.query_def : nil; end # Returns the length of query sequence. # CAUTION: query_* methods simply return first hit's query_*. # If multiple query sequences are given, these values # will be incorrect. def query_len; (x = @hits.first) ? x.query_len : nil; end alias query_id query_def end #class Report end #class Blat end #module Bio =begin = Bio::Blat::Report BLAT result parser. (psl / pslx format) = References * ((<URL:http://www.genome.org/cgi/content/abstract/12/4/656>)) Kent, W.J., BLAT--the BLAST-like alignment tool, Genome Research, 12, 656--664, 2002. =end ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/probcons.rb�������������������������������������������������������������0000644�0000041�0000041�00000002250�12200110570�017307� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/probcons.rb - ProbCons application wrapper class # # Copyright:: Copyright (C) 2006-2007 # Jeffrey Blakeslee and John Conery University of Oregon <jeffb@uoregon.edu> # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id: probcons.rb,v 1.1 2007/07/16 12:25:50 ngoto Exp $ # # Bio::Probcons is a wrapper class to execute ProbCons # (Probabilistic Consistency-based Multiple Alignment # of Amino Acid Sequences). # # == References # # * http://probcons.stanford.edu/ # * Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. # ProbCons: Probabilistic Consistency-based Multiple Sequence Alignment. # Genome Research 15: 330-340, 2005. # module Bio # Bio::Probcons is a wrapper class to execute PROBCONS # (Probabilistic Consistency-based Multiple Alignment # of Amino Acid Sequences). # # Please refer documents in bio/apple/probcons.rb for references. class Probcons < Bio::Alignment::FactoryTemplate::FileInStdoutOut # default program name DEFAULT_PROGRAM = 'probcons'.freeze # default report parser DEFAULT_PARSER = Bio::Alignment::MultiFastaFormat end #class Probcons end #module Bio ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/mafft/������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016233� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/mafft/report.rb���������������������������������������������������������0000644�0000041�0000041�00000015007�12200110570�020076� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/mafft/report.rb - MAFFT report class # # Copyright:: Copyright (C) 2003, 2007 Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id: report.rb,v 1.13 2007/07/16 12:21:39 ngoto Exp $ # # MAFFT result parser class. # MAFFT is a very fast multiple sequence alignment software. # # Since a result of MAFFT is simply a multiple-fasta format, # the significance of this class is to keep standard form and # interface between Bio::ClustalW::Report. # # Bio::Alignment::MultiFastaFormat is a generic data class for # fasta-formatted multiple sequence alignment data. # Bio::MAFFT::Report inherits Bio::Alignment::MultiFastaFormat. # # == References # # * K. Katoh, K. Misawa, K. Kuma and T. Miyata. # MAFFT: a novel method for rapid multiple sequence alignment based # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002. # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059 # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/ # require 'stringio' require 'bio/db/fasta' require 'bio/io/flatfile' require 'bio/alignment' require 'bio/appl/mafft' module Bio module Alignment # Data class for fasta-formatted multiple sequence alignment data, # which is simply multiple entiries of fasta formatted sequences. class MultiFastaFormat # delimiter for flatfile DELIMITER = RS = nil # Creates a new data object. # +str+ should be a (multi-)fasta formatted string. def initialize(str) ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str)) @data = ff.to_a @alignment = nil @seq_method = nil end # Gets an multiple alignment. # Returns a Bio::Alignment object. # +method+ should be one of :naseq, :aaseq, :seq, or nil (default). # nil means to automatically determine nucleotide or amino acid. # # This method returns previously parsed object # if the same method is given (or guessed method is the same). def alignment(method = nil) m = determine_seq_method(@data, method) if !@alignment or m != @seq_method then @seq_method = m @alignment = do_parse(@data, @seq_method) end @alignment end # Gets an array of the fasta formatted sequence objects. # Returns an array of Bio::FastaFormat objects. def entries @data end private # determines seqtype. # if nil is given, try to guess DNA or protein. def determine_seq_method(data, m = nil) case m when :aaseq :aaseq when :naseq :naseq when :seq :seq when nil # auto-detection score = 0 data[0, 3].each do |e| k = e.to_seq.guess if k == Bio::Sequence::NA then score += 1 elsif k == Bio::Sequence::AA then score -= 1 end end if score > 0 then :naseq elsif score < 0 then :aaseq else :seq end else raise 'one of :naseq, :aaseq, :seq, or nil should be given' end end # Parses a result. def do_parse(ary, seqmethod) a = Bio::Alignment.new a.add_sequences(ary) do |x| [ x.__send__(seqmethod), x.definition ] end a end end #class MultiFastaFormat end #module Alignment class MAFFT # MAFFT result parser class. # MAFFT is a very fast multiple sequence alignment software. # # Since a result of MAFFT is simply a multiple-fasta format, # the significance of this class is to keep standard form and # interface between Bio::ClustalW::Report. class Report < Bio::Alignment::MultiFastaFormat # Creates a new Report object. # +str+ should be multi-fasta formatted text as a string. # # Compatibility Note: the old usage (to get array of Bio::FastaFormat # objects) is deprecated. # # Compatibility Note 2: the argument +seqclass+ is deprecated. # # +seqclass+ should be one of following: # Class: Bio::Sequence::AA, Bio::Sequence::NA, ... # String: 'PROTEIN', 'DNA', ... # def initialize(str, seqclass = nil) if str.is_a?(Array) then warn "Array of Bio::FastaFormat objects will be no longer accepted." @data = str else super(str) end if seqclass then warn "the 2nd argument (seqclass) will be no deprecated." case seqclass when /PROTEIN/i @seqclass = Bio::Sequence::AA when /[DR]NA/i @seqclass = Bio::Sequence::NA else if seqclass.is_a?(Module) then @seqclass = seqclass else @seqclass = nil end end end end # sequence data. Returns an array of Bio::FastaFormat. attr_reader :data # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...) # # Compatibility note: This method will be removed in the tufure. attr_reader :seqclass # Gets an multiple alignment. # Returns a Bio::Alignment object. def alignment(method = nil) super end # This method will be deprecated. Instead, please use alignment. # # Gets an multiple alignment. # Returns a Bio::Alignment object. def align warn "Bio::MAFFT::Report#align is deprecated. Please use \'alignment\'." alignment end # This will be deprecated. Instead, please use alignment.output_fasta. # # Gets an fasta-format string of the sequences. # Returns a string. # Same as align.to_fasta. # Please refer to Bio::Alignment#output_fasta for arguments. def to_fasta(*arg) warn "Bio::MAFFT::report#to_fasta is deprecated. Please use \'alignment.output_fasta\'" alignment.output_fasta(*arg) end # Compatibility note: Behavior of the method will be changed # in the future. # # Gets an array of the sequences. # Returns an array of Bio::FastaFormat instances. def to_a @data end private # Parsing a result. def do_parse(ary, seqmethod) if @seqclass then a = Bio::Alignment.new a.add_sequences(ary) do |x| [ @seqclass.new(x.seq), x.definition ] end else super(ary, seqmethod) end end end #class Report end #class MAFFT end #module Bio �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/meme/�������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016061� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/meme/motif.rb�����������������������������������������������������������0000644�0000041�0000041�00000002051�12200110570�017522� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/meme/motif.rb - Class to represent a sequence motif # # Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>, # # License:: The Ruby License # # == Description # # This file contains a minimal class to represent meme motifs # # == References # # * http://meme.sdsc.edu/meme/intro.html # module Bio module Meme # == Description # # This class minimally represents a sequence motif according to the MEME program # # TODO: integrate with Bio::Sequence class # TODO: parse PSSM data # class Motif attr_accessor :sequence_name, :strand, :motif, :start_pos, :end_pos, :pvalue # Creates a new Bio::Meme::Motif object # arguments are def initialize(sequence_name, strand, motif, start_pos, end_pos, pvalue) @sequence_name = sequence_name.to_s @strand = strand.to_s @motif = motif.to_i @start_pos = start_pos.to_i @end_pos = end_pos.to_i @pvalue = pvalue.to_f end # Computes the motif length def length @end_pos - @start_pos end end end end ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/meme/mast.rb������������������������������������������������������������0000644�0000041�0000041�00000010326�12200110570�017354� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/meme/mast.rb - Wrapper for running MAST program # # Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>, # # License:: The Ruby License # # == Description # # This file contains a wrapper for running the MAST tool for searching sequence databases using motifs # # == References # # * http://meme.sdsc.edu/meme/intro.html # require "bio/command" module Bio module Meme # == Description # # Bio::Meme::Mast is a wrapper for searching a database using sequence motifs. The code # will read options from a Hash and run the program. Parsing of the output is provided by # Bio::Meme::Mast::Report. Before running, options[:mfile] and options[:d] must be set # in the constructor or Mast.config(options = {}) # # == Usage # # mast = Mast.new('/path/to/mast') # or with options # mast = Mast.new('/path/to/mast', {:mfile => 'meme.out', :d => '/shared/db/nr'}) # # report = Mast::Report.new(mast.run) # report.each do |motif| # puts motif.length # end # # class Mast include Bio::Command autoload :Report, 'bio/appl/meme/mast/report' # A Hash of options for Mast attr_accessor :options DEFAULT_OPTIONS = { # required :mfile => nil, :d => nil, # optional :stdin => nil, # may not work as expected :count => nil, :alphabet => nil, :stdout => true, :text => false, :sep => false, :norc => false, :dna => false, :comp => false, :rank => nil, :smax => nil, :ev => nil, :mt => nil, :w => false, :bfile => nil, :seqp => false, :mf => nil, :df => nil, :minseqs => nil, :mev => nil, :m => nil, :diag => nil, :best => false, :remcorr => false, :brief => false, :b => false, :nostatus => true, :hit_list => true, } # The command line String to be executed attr_reader :cmd # Create a mast instance # # m = Mast.new('/usr/local/bin/mast') # --- # *Arguments*: # * (required) _mast_location_: String # *Raises*:: ArgumentError if mast program is not found # *Returns*:: a Bio::Meme::Mast object def initialize(mast_location, options = {}) unless File.exists?(mast_location) raise ArgumentError.new("mast: command not found : #{mast_location}") end @binary = mast_location options.empty? ? config(DEFAULT_OPTIONS) : config(options) end # Builds the command line string # any options passed in will be merged with DEFAULT_OPTIONS # Mast usage: mast <mfile> <opts> <flags> # # mast.config({:mfile => "meme.out", :d => "/path/to/fasta/db"}) # --- # *Arguments*: # * (required) _options_: Hash (see DEFAULT_OPTIONS) # *Returns*:: the command line string def config(options) @options = DEFAULT_OPTIONS.merge(options) mfile, opts, flags = "", "", "" @options.each_pair do |opt, val| if val.nil? or val == false next elsif opt == :mfile mfile = val elsif val == true flags << " -#{opt}" else opts << " -#{opt} #{val}" end end @cmd = "#{@binary} #{mfile + opts + flags}" end # Checks if input/database files exist and options are valid # *Raises*:: ArgumentError if the motifs file does not exist # *Raises*:: ArgumentError if the database file does not exist # *Raises*:: ArgumentError if there is an invalid option def check_options @options.each_key do |k| raise ArgumentError.new("Invalid option: #{k}") unless DEFAULT_OPTIONS.has_key?(k) end raise ArgumentError.new("Motif file not found: #{@options[:mfile]}") if @options[:mfile].nil? or !File.exists?(@options[:mfile]) raise ArgumentError.new("Database not found: #{@options[:d]}") if @options[:d].nil? or !File.exists?(@options[:d]) end # Run the mast program # --- # *Returns*:: Bio::Meme::Mast::Report object def run check_options call_command(@cmd) {|io| @output = io.read } Report.new(@output) end end # End class Mast end # End module Meme end # End module Bio����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/meme/mast/��������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�017025� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/meme/mast/report.rb�����������������������������������������������������0000644�0000041�0000041�00000004745�12200110570�020677� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/meme/mast/report.rb - Mast output parser class # # Copyright:: Copyright (C) 2008, Adam Kraut <adamnkraut@gmail.com>, # # License:: The Ruby License # # == Description # # This file contains a class to parse Mast output # # == Examples # # == References # # * http://meme.sdsc.edu/meme/intro.html require "bio/appl/meme/mast" require "bio/appl/meme/motif" module Bio module Meme class Mast # == Description # # A class to parse the output from Mast # # WARNING: Currently support is only for -hit_list (machine readable) format # HTML (default) output is not supported # # == Examples # class Report attr_reader :motifs def initialize(mast_hitlist) @motifs = parse_hit_list(mast_hitlist) end # Iterates each motif (Bio::Meme::Motif) def each @motifs.each do |motif| yield motif end end alias :each_motif :each private # Each line corresponds to one motif occurrence in one sequence. # The format of the hit lines is # [<sequence_name> <strand><motif> <start> <end> <p-value>]+ # where # <sequence_name> is the name of the sequence containing the hit # <strand> is the strand (+ or - for DNA, blank for protein), # <motif> is the motif number, # <start> is the starting position of the hit, # <end> is the ending position of the hit, and # <p-value> is the position p-value of the hit. def parse_hit_list(data) motifs = [] data.each_line do |line| line.chomp! # skip comments next if line =~ /^#/ fields = line.split(/\s/) if fields.size == 5 motifs << Motif.new(fields[0], strand = nil, fields[1], fields[2], fields[3], fields[4]) elsif fields.size == 6 motifs << Motif.new(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5]) else raise RuntimeError.new("Could not parse mast output") end end motifs end end # Result end # Mast end # Meme end # Bio ���������������������������bio-1.4.3.0001/lib/bio/appl/paml/�������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016067� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/baseml.rb����������������������������������������������������������0000644�0000041�0000041�00000005363�12200110570�017666� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/baseml.rb - Wrapper for running PAML program baseml # # Copyright:: Copyright (C) 2008 # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains Bio::PAML::Baseml, a wrapper class running baseml. # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'bio/appl/paml/common' module Bio::PAML # == Description # # Bio::PAML::Baseml is a wrapper for running PAML baseml program. # # Because most of the methods in this class are inherited from # Bio::PAML::Common, see documents of Bio::PAML::Common for details. # # == Examples # # Example 1: # # require 'bio' # # Reads multi-fasta formatted file and gets a Bio::Alignment object. # alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat, # 'example.fst').alignment # # Reads newick tree from a file # tree = Bio::FlatFile.open(Bio::Newick, 'example.tree').tree # # Creates a Baseml object # baseml = Bio::PAML::Baseml.new # # Sets parameters # baseml.parameters[:runmode] = 0 # baseml.parameters[:RateAncestor] = 1 # # You can also set many parameters at a time. # baseml.parameters.update({ :alpha => 0.5, :fix_alpha => 0 }) # # Executes baseml with the alignment and the tree # report = baseml.query(alignment, tree) # class Baseml < Common autoload :Report, 'bio/appl/paml/baseml/report' # Default program name DEFAULT_PROGRAM = 'baseml'.freeze # Default parameters when running baseml. # # The parameters whose values are different from the baseml defalut # value (described in pamlDOC.pdf) in PAML 4.1 are: # seqfile, outfile, treefile, ndata, noisy, verbose # DEFAULT_PARAMETERS = { # Essential argumemts :seqfile => nil, :outfile => nil, # Optional arguments :treefile => nil, :noisy => 0, :verbose => 1, :runmode => 0, :model => 5, :Mgene => 0, :ndata => 1, :clock => 0, :fix_kappa => 0, :kappa => 2.5, :fix_alpha => 1, :alpha => 0.0, :Malpha => 0, :ncatG => 5, :fix_rho => 1, :rho => 0.0, :nparK => 0, :nhomo => 0, :getSE => 0, :RateAncestor => 0, :Small_Diff => 1e-6, :cleandata => 1, :fix_blength => 0, :method => 0 } end #class Baseml end #module Bio::PAML �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/common.rb����������������������������������������������������������0000644�0000041�0000041�00000026211�12200110570�017706� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/common.rb - Basic wrapper class common to PAML programs # # Copyright:: Copyright (C) 2008 # Michael D. Barton <mail@michaelbarton.me.uk>, # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains Bio::PAML::Common, a basic wrapper class for # running PAML programs. # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'tempfile' require 'bio/command' require 'bio/alignment' module Bio module PAML autoload :Codeml, 'bio/appl/paml/codeml' autoload :Baseml, 'bio/appl/paml/baseml' autoload :Yn00, 'bio/appl/paml/yn00' #-- # The autoload of Common::Report, Codeml::Report, Codeml::Rates, # Baseml::Report, Yn00::Report are described inside the classes. #++ # == Description # # Bio::PAML::Common is a basic wrapper class for PAML programs. # The class provides methods for generating the necessary configuration # file, and running a program. # class Common autoload :Report, 'bio/appl/paml/common_report' # Default parameters. Should be redefined in subclass. DEFAULT_PARAMETERS = {} # Default program. Should be redifined in subclass. DEFAULT_PROGRAM = nil # Parameters described in the control file. (Hash) # Each key of the hash must be a Symbol object, and each value # must be a String object or nil. attr_accessor :parameters # Preferred order of parameters. DEFAULT_PARAMETERS_ORDER = %w( seqfile outfile treefile noisy verbose runmode seqtype CodonFreq ndata clock aaDist aaRatefile model NSsites icode Mgene fix_kappa kappa fix_omega omega fix_alpha alpha Malpha ncatG fix_rho rho nparK nhomo getSE RateAncestor Small_Diff cleandata fix_blength method ).collect { |x| x.to_sym } # Creates a wrapper instance, which will run using the specified # binary location or the command in the PATH. # If program is specified as nil, DEFAULT_PROGRAM is used. # Default parameters are automatically loaded and merged with # the specified parameters. # --- # *Arguments*: # * (optional) _program_: path to the program, or command name (String) # * (optional) _params_: parameters (Hash) def initialize(program = nil, params = {}) @program = program || self.class::DEFAULT_PROGRAM set_default_parameters self.parameters.update(params) end # Runs the program on the parameters in the passed control file. # No parameters checks are performed. # All internal parameters are ignored and are kept untouched. # The output and report attributes are cleared in this method. # # Warning about PAML's behavior: # PAML writes supplemental output files in the current directory # with fixed file names which can not be changed with parameters # or command-line options, for example, rates, rst, and rub. # This behavior may ovarwrite existing files, especially # previous supplemental results. # # --- # *Arguments*: # * (optional) _control_file_: file name of control file (String) # *Returns*:: messages printed to the standard output (String) def run(control_file) exec_local([ control_file ]) end # Runs the program on the internal parameters with the specified # sequence alignment and tree. # # Note that parameters[:seqfile] and parameters[:outfile] # are always modified, and parameters[:treefile] is modified # when tree is specified. # # To prevent overwrite of existing files by PAML, this method # automatically creates a temporary directory and the program # is run inside the directory. After the end of the program, # the temporary directory is automatically removed. # # --- # *Arguments*: # * (required) _alignment_: Bio::Alignment object or similar object # * (optional) _tree_: Bio::Tree object # *Returns*:: Report object def query(alignment, tree = nil) astr = alignment.output(:phylipnon) if tree then tstr = [ sprintf("%3d %2d\n", tree.leaves.size, 1), "\n", tree.output(:newick, { :indent => false, :bootstrap_style => :disabled, :branch_length_style => :disabled }) ].join('') else tstr = nil end str = _query_by_string(astr, tstr) @report = self.class::Report.new(str) @report end # Runs the program on the internal parameters with the specified # sequence alignment data string and tree data string. # # Note that parameters[:outfile] is always modified, and # parameters[:seqfile] and parameters[:treefile] are modified when # alignment and tree are specified respectively. # # It raises RuntimeError if seqfile is not specified in the argument # or in the parameter. # # For other information, see the document of query method. # # --- # *Arguments*: # * (optional) _alignment_: String # * (optional) _tree_: String or nil # *Returns*:: contents of output file (String) def query_by_string(alignment = nil, tree = nil) _query_by_string(alignment, tree) end # (private) implementation of query_by_string(). def _query_by_string(alignment = nil, tree = nil) @parameters ||= {} Bio::Command.mktmpdir('paml') do |path| #$stderr.puts path.inspect filenames = [] begin # preparing outfile outfile = Tempfile.new('out', path) outfile.close(false) outfn = File.basename(outfile.path) self.parameters[:outfile] = outfn filenames.push outfn # preparing seqfile if alignment then seqfile = Tempfile.new('seq', path) seqfile.print alignment seqfile.close(false) seqfn = File.basename(seqfile.path) self.parameters[:seqfile] = seqfn filenames.push seqfn end # preparing treefile if tree then treefile = Tempfile.new('tree', path) treefile.print tree treefile.close(false) treefn = File.basename(treefile.path) self.parameters[:treefile] = treefn filenames.push treefn end # preparing control file ctlfile = Tempfile.new('control', path) ctlfile.print self.dump_parameters ctlfile.close(false) ctlfn = File.basename(ctlfile.path) filenames.push ctlfn # check parameters if errors = check_parameters then msg = errors.collect { |e| "error in parameter #{e[0]}: #{e[1]}" } raise RuntimeError, msg.join("; ") end # exec command stdout = exec_local([ ctlfn ], { :chdir => path }) # get main output outfile.open @output = outfile.read # get supplemental result files @supplemental_outputs = {} (Dir.entries(path) - filenames).each do |name| next unless /\A\w/ =~ name fn = File.join(path, name) if File.file?(fn) then @supplemental_outputs[name] = File.read(fn) end end ensure outfile.close(true) if outfile seqfile.close(true) if seqfile treefile.close(true) if treefile ctlfile.close(true) if ctlfile end end @output end private :_query_by_string # the last result of the program (String) attr_reader :output # Report object created from the last result attr_reader :report # the last exit status of the program attr_reader :exit_status # the last output to the stdout (String) attr_reader :data_stdout # the last executed command (Array of String) attr_reader :command # contents of supplemental output files (Hash). # Each key is a file name and value is content of the file. attr_reader :supplemental_outputs # Loads parameters from the specified string. # Note that all previous parameters are erased. # Returns the parameters as a hash. # --- # *Arguments*: # * (required) _str_: contents of a PAML control file (String) # *Returns*:: parameters (Hash) def load_parameters(str) hash = {} str.each_line do |line| param, value = parse_parameter(line) hash[param] = value if param end self.parameters = hash end # Loads system-wide default parameters. # Note that all previous parameters are erased. # Returns the parameters as a hash. # --- # *Returns*:: parameters (Hash) def set_default_parameters self.parameters = self.class::DEFAULT_PARAMETERS.merge(Hash.new) end # Shows parameters (content of control file) as a string. # The string can be used for control file. # --- # *Returns*:: string representation of the parameters (String) def dump_parameters keyorder = DEFAULT_PARAMETERS_ORDER keys = parameters.keys str = '' keys.sort do |x, y| (keyorder.index(x) || (keyorder.size + keys.index(x))) <=> (keyorder.index(y) || (keyorder.size + keys.index(y))) end.each do |key| value = parameters[key] # Note: spaces are required in both side of the "=". str.concat "#{key.to_s} = #{value.to_s}\n" if value end str end private # (private) clear attributes except program and parameters def reset @command = nil @output = nil @report = nil @exit_status = nil @data_stdout = nil @supplemental_outputs = nil end # (private) parses a parameter in a line # --- # *Arguments*: # * (required) _line_: single line string (String) # *Returns*:: parameter name (Symbol or nil), value (String or nil) def parse_parameter(line) # remove comment line = line.sub(/\*.*/, '') # Note: spaces are required in both side of the "=". param, value = line.strip.split(/\s+=\s+/, 2) if !param or param.empty? then param = nil else param = param.to_sym end return param, value end # (private) Runs the program on the parameters in the passed control file. # No parameter check are executed. # --- # *Arguments*: # * (optional) _control_file_: file name of control file (String) # *Returns*:: messages printed to the standard output (String) def exec_local(arguments, options = {}) reset cmd = [ @program, *arguments ] @command = cmd stdout = Bio::Command.query_command(cmd, nil, options) @exit_status = $? @data_stdout = stdout stdout end # (private) Checks parameters. # Returns nil if no errors found. Otherwise, returns an Array # containing [ parameter, message ] pairs. # --- # *Arguments*: # *Returns*:: nil or Array def check_parameters errors = [] param = self.parameters if !param[:seqfile] or param[:seqfile].empty? then errors.push([ :seqfile, 'seqfile not specified' ]) end errors.empty? ? nil : errors end end #class Common end #module PAML end #module Bio ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/yn00/��������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016655� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/yn00/report.rb�����������������������������������������������������0000644�0000041�0000041�00000001204�12200110570�020512� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/baseml/report.rb - parser class for PAML program yn00 # # Copyright:: Copyright (C) 2008 # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains Bio::PAML::Yn00::Report, a parser class for a result # of yn00. # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'bio/appl/paml/yn00' module Bio::PAML class Yn00 # UNDER CONSTRUCTION. # # Bio::PAML::Yn00::Report is a parser class for a yn00 result. # class Report < Bio::PAML::Common::Report end #class Report end #class Yn00 end #module Bio::PAML ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/baseml/������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�017332� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/baseml/report.rb���������������������������������������������������0000644�0000041�0000041�00000001224�12200110570�021171� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/baseml/report.rb - parser class for PAML program baseml # # Copyright:: Copyright (C) 2008 # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains Bio::PAML::Baseml::Report, a parser class for a result # of baseml. # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'bio/appl/paml/baseml' module Bio::PAML class Baseml # UNDER CONSTRUCTION. # # Bio::PAML::Baseml::Report is a parser class for a baseml result. # class Report < Bio::PAML::Common::Report end #class Report end #class Baseml end #module Bio::PAML ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/yn00.rb������������������������������������������������������������0000644�0000041�0000041�00000005451�12200110570�017207� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/baseml.rb - Wrapper for running PAML program yn00 # # Copyright:: Copyright (C) 2008 # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains Bio::PAML::Yn00, a wrapper class running yn00. # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'bio/appl/paml/common' module Bio::PAML # == Description # # Bio::PAML::Yn00 is a wrapper for running PAML yn00 program. # # Because most of the methods in this class are inherited from # Bio::PAML::Common, see documents of Bio::PAML::Common for details. # # == Examples # # Example 1: # # require 'bio' # # Reads multi-fasta formatted file and gets a Bio::Alignment object. # alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat, # 'example.fst').alignment # # Creates a Yn00 object # baseml = Bio::PAML::Yn00.new # # Sets parameters # baseml.parameters[:verbose] = 1 # baseml.parameters[:icode] = 0 # # You can also set many parameters at a time. # baseml.parameters.update({ :weighting => 0, :commonf3x4 => 0 }) # # Executes yn00 with the alignment # report = yn00.query(alignment) # class Yn00 < Common autoload :Report, 'bio/appl/paml/yn00/report' # Default program name DEFAULT_PROGRAM = 'yn00'.freeze # Default parameters when running baseml. # # The parameters whose values are different from the baseml defalut # value (described in pamlDOC.pdf) in PAML 4.1 are: # seqfile, outfile, treefile, ndata, noisy, verbose # DEFAULT_PARAMETERS = { # Essential argumemts :seqfile => nil, :outfile => nil, # Optional arguments :verbose => 1, :icode => 0, :weighting => 0, :commonf3x4 => 0 } # Runs the program on the internal parameters with the specified # sequence alignment. # Note that parameters[:seqfile] and parameters[:outfile] # are always modified. # # For other important information, see the document of # Bio::PAML::Common#query. # # --- # *Arguments*: # * (required) _alignment_: Bio::Alignment object or similar object # *Returns*:: Report object def query(alignment) super(alignment) end # Runs the program on the internal parameters with the specified # sequence alignment as a String object. # # For other important information, see the document of # query and Bio::PAML::Common#query_by_string methods. # # --- # *Arguments*: # * (required) _alignment_: Bio::Alignment object or similar object # *Returns*:: Report object def query_by_string(alignment = nil) super(alignment) end end #class Yn00 end #module Bio::PAML �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/codeml/������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�017332� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/codeml/rates.rb����������������������������������������������������0000644�0000041�0000041�00000003634�12200110570�021003� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/codeml/rates.rb - Codeml rates report file parser # # Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk> # # License:: The Ruby License # # == Description # # This file contains a class that implement a simple interface to Codeml rates estimation file # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'delegate' require 'bio/appl/paml/codeml' module Bio::PAML class Codeml # == Description # # A simple class for parsing the codeml rates file. # # WARNING: The order of the parsed data should be correct, however will # not necessarily correspond to the position in the alignment. For instance # codeml ignores columns that contains gaps, and therefore there will not # be any estimated rate data. # # == Usage # # site_rates = Bio::PAML::Codeml::Rates.new(File.open(@tmp_dir + "/rates").read) # site_rate.first[:freq] # => Number of times that column appears # site_rate.[5][:rate] # => Estimated rate of evolution # site_rate.last[:data] # => The content of the column, as a string # # # This class delegates to an array, so will respond to all array methods # site_rates.max {|x,y| x[:rate] <=> y[:rate] } # => Fastest evolving column # site_rates.detect {|x| x[:freq] > 1 } # => Columns appearing more than once class Rates < DelegateClass(Array) def initialize(rates) super(parse_rates(rates)) end private def parse_rates(text) re = /\s+(\d+)\s+(\d+)\s+([A-Z\*]+)\s+(\d+\.\d+)\s+(\d)/ array = Array.new text.each_line do |line| if re =~ line match = Regexp.last_match array[match[1].to_i] = { :freq => match[2].to_i, :data => match[3], :rate => match[4].to_f } end end array.compact end end end end ����������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/codeml/report.rb���������������������������������������������������0000644�0000041�0000041�00000042711�12200110570�021177� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/codeml/report.rb - Codeml report parser # # Copyright:: Copyright (C) 2008-2010 # Michael D. Barton <mail@michaelbarton.me.uk>, # Pjotr Prins <pjotr.prins@thebird.nl> # # License:: The Ruby License # require 'bio/appl/paml/codeml' module Bio::PAML class Codeml # == Description # # Run PAML codeml and get the results from the output file. The # Codeml::Report object is returned by Bio::PAML::Codeml.query. For # example # # codeml = Bio::PAML::Codeml.new('codeml', :runmode => 0, # :RateAncestor => 1, :alpha => 0.5, :fix_alpha => 0) # result = codeml.query(alignment, tree) # # where alignment and tree are Bioruby objects. This class assumes we have a # buffer containing the output of codeml. # # == References # # Phylogenetic Analysis by Maximum Likelihood (PAML) is a package of # programs for phylogenetic analyses of DNA or protein sequences using # maximum likelihood. It is maintained and distributed for academic use # free of charge by Ziheng Yang. Suggestion citation # # Yang, Z. 1997 # PAML: a program package for phylogenetic analysis by maximum likelihood # CABIOS 13:555-556 # # http://abacus.gene.ucl.ac.uk/software/paml.html # # == Examples # #-- # The following is not shown in the documentation # # >> require 'bio' # >> require 'bio/test/biotestfile' # >> buf = BioTestFile.read('paml/codeml/models/results0-3.txt') #++ # # Invoke Bioruby's PAML codeml parser, after having read the contents # of the codeml result file into _buf_ (for example using File.read) # # >> c = Bio::PAML::Codeml::Report.new(buf) # # Do we have two models? # # >> c.models.size # => 2 # >> c.models[0].name # => "M0" # >> c.models[1].name # => "M3" # # Check the general information # # >> c.num_sequences # => 6 # >> c.num_codons # => 134 # >> c.descr # => "M0-3" # # Test whether the second model M3 is significant over M0 # # >> c.significant # => true # # Now fetch the results of the first model M0, and check its values # # >> m0 = c.models[0] # >> m0.tree_length # => 1.90227 # >> m0.lnL # => -1125.800375 # >> m0.omega # => 0.58589 # >> m0.dN_dS # => 0.58589 # >> m0.kappa # => 2.14311 # >> m0.alpha # => nil # # We also have a tree (as a string) # # >> m0.tree # => "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.400074): 0.000004, PITG_23257T0: 0.952614): 0.000004, PITG_23264T0: 0.445507): 0.000004, PITG_23267T0: 0.011814, PITG_23293T0: 0.092242);" # # Check the M3 and its specific values # # >> m3 = c.models[1] # >> m3.lnL # => -1070.964046 # >> m3.classes.size # => 3 # >> m3.classes[0] # => {:w=>0.00928, :p=>0.56413} # # And the tree # # >> m3.tree # => "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.762597): 0.000004, PITG_23257T0: 2.721710): 0.000004, PITG_23264T0: 0.924326): 0.014562, PITG_23267T0: 0.000004, PITG_23293T0: 0.237433);" # # Next take the overall posterior analysis # # >> c.nb_sites.size # => 44 # >> c.nb_sites[0].to_a # => [17, "I", 0.988, 3.293] # # or by field # # >> codon = c.nb_sites[0] # >> codon.position # => 17 # >> codon.probability # => 0.988 # >> codon.dN_dS # => 3.293 # # with aliases # # >> codon.p # => 0.988 # >> codon.w # => 3.293 # # Now we generate special string 'graph' for positive selection. The # following returns a string the length of the input alignment and # shows the locations of positive selection: # # >> c.nb_sites.graph[0..32] # => " ** * * *" # # And with dN/dS (high values are still an asterisk *) # # >> c.nb_sites.graph_omega[0..32] # => " 3* 6 6 2" # # We also provide the raw buffers to adhere to the principle of # unexpected use. Test the raw buffers for content: # # >> c.header.to_s =~ /seed/ # => 1 # >> m0.to_s =~ /one-ratio/ # => 3 # >> m3.to_s =~ /discrete/ # => 3 # >> c.footer.to_s =~ /Bayes/ # => 16 # # Finally we do a test on an M7+M8 run. Again, after loading the # results file into _buf_ # #-- # >> buf78 = BioTestFile.read('paml/codeml/models/results7-8.txt') # # #++ # # Invoke Bioruby's PAML codeml parser # # >> c = Bio::PAML::Codeml::Report.new(buf78) # # Do we have two models? # # >> c.models.size # => 2 # >> c.models[0].name # => "M7" # >> c.models[1].name # => "M8" # # Assert the results are significant # # >> c.significant # => true # # Compared to M0/M3 there are some differences. The important ones # are the parameters and the full Bayesian result available for M7/M8. # This is the naive Bayesian result: # # >> c.nb_sites.size # => 10 # # And this is the full Bayesian result: # # >> c.sites.size # => 30 # >> c.sites[0].to_a # => [17, "I", 0.672, 2.847] # >> c.sites.graph[0..32] # => " ** * * *" # # Note the differences of omega with earlier M0-M3 naive Bayesian # analysis: # # >> c.sites.graph_omega[0..32] # => " 24 3 3 2" # # The locations are the same, but the omega differs. # class Report < Bio::PAML::Common::Report attr_reader :models, :header, :footer # Parse codeml output file passed with +buf+, where buf contains # the content of a codeml result file def initialize buf # split the main buffer into sections for each model, header and footer. sections = buf.split("\nModel ") model_num = sections.size-1 raise ReportError,"Incorrect codeml data models=#{model_num}" if model_num > 2 foot2 = sections[model_num].split("\nNaive ") if foot2.size == 2 # We have a dual model sections[model_num] = foot2[0] @footer = 'Naive '+foot2[1] @models = [] sections[1..-1].each do | model_buf | @models.push Model.new(model_buf) end else # A single model is run sections = buf.split("\nTREE #") model_num = sections.size-1 raise ReportError,"Can not parse single model file" if model_num != 1 @models = [] @models.push sections[1] @footer = sections[1][/Time used/,1] @single = ReportSingle.new(buf) end @header = sections[0] end # Give a short description of the models, for example 'M0-3' def descr num = @models.size case num when 0 'No model' when 1 @models[0].name else @models[0].name + '-' + @models[1].modelnum.to_s end end # Return the number of condons in the codeml alignment def num_codons @header.scan(/seed used = \d+\n\s+\d+\s+\d+/).to_s.split[5].to_i/3 end # Return the number of sequences in the codeml alignment def num_sequences @header.scan(/seed used = \d+\n\s+\d+\s+\d+/).to_s.split[4].to_i end # Return a PositiveSites (naive empirical bayesian) object def nb_sites PositiveSites.new("Naive Empirical Bayes (NEB)",@footer,num_codons) end # Return a PositiveSites Bayes Empirical Bayes (BEB) analysis def sites PositiveSites.new("Bayes Empirical Bayes (BEB)",@footer,num_codons) end # If the number of models is two we can calculate whether the result is # statistically significant, or not, at the 1% significance level. For # example, for M7-8 the LRT statistic, or twice the log likelihood # difference between the two compared models, may be compared against # chi-square, with critical value 9.21 at the 1% significance level. # # Here we support a few likely combinations, M0-3, M1-2 and M7-8, used # most often in literature. For other combinations, or a different # significance level, you'll have to calculate chi-square yourself. # # Returns true or false. If no result is calculated this method # raises an error def significant raise ReportError,"Wrong number of models #{@models.size}" if @models.size != 2 lnL1 = @models[0].lnL model1 = @models[0].modelnum lnL2 = @models[1].lnL model2 = @models[1].modelnum case [model1, model2] when [0,3] 2*(lnL2-lnL1) > 13.2767 # chi2: p=0.01, df=4 when [1,2] 2*(lnL2-lnL1) > 9.2103 # chi2: p=0.01, df=2 when [7,8] 2*(lnL2-lnL1) > 9.2103 # chi2: p=0.01, df=2 else raise ReportError,"Significance calculation for #{descr} not supported" end end #:stopdoc: # compatibility call for older interface (single models only) def tree_log_likelihood @single.tree_log_likelihood end # compatibility call for older interface (single models only) def tree_length @single.tree_length end # compatibility call for older interface (single models only) def alpha @single.alpha end # compatibility call for older interface (single models only) def tree @single.tree end #:startdoc: end # Report # ReportSingle is a simpler parser for a codeml report # containing a single run. This is retained for # backward compatibility mostly. # # The results of a single model (old style report parser) # #-- # >> buf = BioTestFile.read('paml/codeml/output.txt') #++ # # >> single = Bio::PAML::Codeml::Report.new(buf) # # >> single.tree_log_likelihood # => -1817.465211 # # >> single.tree_length # => 0.77902 # # >> single.alpha # => 0.58871 # # >> single.tree # => "(((rabbit: 0.082889, rat: 0.187866): 0.038008, human: 0.055050): 0.033639, goat-cow: 0.096992, marsupial: 0.284574);" # class ReportSingle < Bio::PAML::Common::Report attr_reader :tree_log_likelihood, :tree_length, :alpha, :tree # Do not use def initialize(codeml_report) @tree_log_likelihood = pull_tree_log_likelihood(codeml_report) @tree_length = pull_tree_length(codeml_report) @alpha = pull_alpha(codeml_report) @tree = pull_tree(codeml_report) end private # Do not use def pull_tree_log_likelihood(text) text[/lnL\(.+\):\s+(-?\d+(\.\d+)?)/,1].to_f end # Do not use def pull_tree_length(text) text[/tree length\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f end # Do not use def pull_alpha(text) text[/alpha .+ =\s+(-?\d+(\.\d+)?)/,1].to_f end # Do not use def pull_tree(text) text[/([^\n]+)\n\nDetailed/m,1] end end # ReportSingle # Model class contains one of the models of a codeml run (e.g. M0) # which is used as a test hypothesis for positive selection. This # class is used by Codeml::Report. class Model # Create a model using the relevant information from the codeml # result data (text buffer) def initialize buf @buf = buf end # Return the model number def modelnum @buf[0..0].to_i end # Return the model name, e.g. 'M0' or 'M7' def name 'M'.to_s+modelnum.to_s end # Return codeml log likelihood of model def lnL @buf[/lnL\(.+\):\s+(-?\d+(\.\d+)?)/,1].to_f end # Return codeml omega of model def omega @buf[/omega \(dN\/dS\)\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f end alias dN_dS omega # Return codeml kappa of model, when available def kappa return nil if @buf !~ /kappa/ @buf[/kappa \(ts\/tv\)\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f end # Return codeml alpha of model, when available def alpha return nil if @buf !~ /alpha/ @buf[/alpha .+ =\s+(-?\d+(\.\d+)?)/,1].to_f end # Return codeml treee length def tree_length @buf[/tree length\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f end # Return codeml tree def tree @buf[/([^\n]+)\n\nDetailed/m,1] end # Return classes when available. For M3 it parses # # dN/dS (w) for site classes (K=3) # p: 0.56413 0.35613 0.07974 # w: 0.00928 1.98252 23.44160 # # and turns it into an array of Hash # # >> m3.classes[0] # => {:w=>0.00928, :p=>0.56413} def classes return nil if @buf !~ /classes/ # probs = @buf.scan(/\np:\s+(\w+)\s+(\S+)\s+(\S+)/) probs = @buf.scan(/\np:.*?\n/).to_s.split[1..3].map { |f| f.to_f } ws = @buf.scan(/\nw:.*?\n/).to_s.split[1..3].map { |f| f.to_f } ret = [] probs.each_with_index do | prob, i | ret.push :p => prob, :w => ws[i] end ret end # Return the model information as a String def to_s @buf end end # A record of codon sites, across the sequences in the alignment, # showing evidence of positive selection. # # This class is used for storing both codeml's full Bayesian and naive # Bayesian analysis class PositiveSite attr_reader :position attr_reader :aaref attr_reader :probability attr_reader :omega def initialize fields @position = fields[0].to_i @aaref = fields[1].to_s @probability = fields[2].to_f @omega = fields[3].to_f end # Return dN/dS (or omega) for this codon def dN_dS omega end alias w dN_dS alias p probability # Return contents as Array - useful for printing def to_a [ @position, @aaref, @probability, @omega ] end end # List for the positive selection sites. PAML returns: # # Naive Empirical Bayes (NEB) analysis # Positively selected sites (*: P>95%; **: P>99%) # (amino acids refer to 1st sequence: PITG_23265T0) # # Pr(w>1) post mean +- SE for w # # 17 I 0.988* 3.293 # 18 H 1.000** 17.975 # 23 F 0.991** 6.283 # (...) # 131 V 1.000** 22.797 # 132 R 1.000** 10.800 # (newline) # # these can be accessed using normal iterators. Also special # methods are available for presenting this data # class PositiveSites < Array attr_reader :descr def initialize search, buf, num_codons @num_codons = num_codons if buf.index(search)==nil raise ReportError,"No NB sites found for #{search}" end # Set description of this class @descr = search lines = buf.split("\n") # find location of 'search' start = 0 lines.each_with_index do | line, i | if line.index(search) != nil start = i break end end raise ReportError,"Out of bound error for <#{buf}>" if lines[start+6]==nil lines[start+6..-1].each do | line | break if line.strip == "" fields = line.split push PositiveSite.new(fields) end num = size() @buf = lines[start..start+num+7].join("\n") end # Generate a graph - which is a simple string pointing out the positions # showing evidence of positive selection pressure. # # >> c.sites.graph[0..32] # => " ** * * *" # def graph graph_to_s(lambda { |site| "*" }) end # Generate a graph - which is a simple string pointing out the positions # showing evidence of positive selection pressure, with dN/dS values # (high values are an asterisk *) # # >> c.sites.graph_omega[0..32] # => " 24 3 3 2" # def graph_omega graph_to_s(lambda { |site| symbol = "*" symbol = site.omega.to_i.to_s if site.omega.abs <= 10.0 symbol }) end # Graph of amino acids of first sequence at locations def graph_seq graph_to_s(lambda { |site | symbol = site.aaref }) end # Return the positive selection information as a String def to_s @buf end # :nodoc: # Creates a graph of sites, adjusting for gaps. This generator # is also called from HtmlPositiveSites. The _fill_ is used # to fill out the gaps def graph_to_s func, fill=' ' ret = "" pos = 0 each do | site | symbol = func.call(site) gapsize = site.position-pos-1 ret += fill*gapsize + symbol pos = site.position end gapsize = @num_codons - pos - 1 ret += fill*gapsize if gapsize > 0 ret end end # Supporting error class class ReportError < RuntimeError end end # Codeml end # Bio::PAML �������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/common_report.rb���������������������������������������������������0000644�0000041�0000041�00000001514�12200110570�021300� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/common_report.rb - basic report class for PAML results # # Copyright:: Copyright (C) 2008 # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains Bio::PAML::Common::Report, a basic report class # for PAML program's results. # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'bio/appl/paml/common' module Bio::PAML class Common # UNDER CONSTRUCTION. # # Bio::PAML::Common::Report is a basic report class for PAML program's # results. It will have common function for baseml and codeml. # # Normally, users should not use this class directly. class Report # Creates a new Report object. def initialize(str) end end #class Report end #class Common end #module Bio::PAML ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/appl/paml/codeml.rb����������������������������������������������������������0000644�0000041�0000041�00000020127�12200110570�017661� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/appl/paml/codeml.rb - Wrapper for running PAML program codeml # # Copyright:: Copyright (C) 2008 # Michael D. Barton <mail@michaelbarton.me.uk>, # Naohisa Goto <ng@bioruby.org> # # License:: The Ruby License # # == Description # # This file contains a wrapper for running the CODEML tool for estimating evolutionary rate # # == References # # * http://abacus.gene.ucl.ac.uk/software/paml.html # require 'bio/appl/paml/common' module Bio module PAML # == Description # # Bio::PAML::Codeml is a wrapper for estimating evolutionary rate using the CODEML # tool. The class provides methods for generating the necessary configuration # file, and running codeml with the specified binary. Codeml output is # returned when codeml is run. Bio::PAML::Codeml::Report and Bio::PAML::Codeml::Rates # provide simple classes for parsing and accessing the Codeml report and # rates files respectively. # # == Examples # # Example 1: # # require 'bio' # # Reads multi-fasta formatted file and gets a Bio::Alignment object. # alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat, # 'example.fst').alignment # # Reads newick tree from a file # tree = Bio::FlatFile.open(Bio::Newick, 'example.tree').tree # # Creates a Codeml object # codeml = Bio::PAML::Codeml.new # # Sets parameters # codeml.parameters[:runmode] = 0 # codeml.parameters[:RateAncestor] = 1 # # You can also set many parameters at a time. # codeml.parameters.update({ :alpha => 0.5, :fix_alpha => 0 }) # # Executes codeml with the alignment and the tree # report = codeml.query(alignment, tree) # # Example 2 (Obsolete usage): # # # Create a control file, setting some Codeml options # # Default parameters are used otherwise, see RDoc for defaults # # The names of the parameters correspond to those specified # # in the Codeml documentation # control_file = Tempfile.new('codeml_ctl') # control_file.close(false) # # Prepare output file as a temporary file # output_file = Tempfile.new('codeml_test') # output_file.close(false) # Bio::PAML::Codeml.create_control_file(config_file.path, { # :model => 1, # :fix_kappa => 1, # :aaRatefile => TEST_DATA + '/wag.dat', # :seqfile => TEST_DATA + '/abglobin.aa', # :treefile => TEST_DATA + '/abglobin.trees', # :outfile => output_file.path, # }) # # # Create an instance of Codeml specifying where the codeml binary is # codeml = Bio::PAML::Codeml.new('/path/to/codeml') # # # Run codeml using a control file # # Returns the command line output # codeml_output = codeml.run(control_file) # class Codeml < Common autoload :Report, 'bio/appl/paml/codeml/report' autoload :Rates, 'bio/appl/paml/codeml/rates' # Default program name DEFAULT_PROGRAM = 'codeml'.freeze # Default parameters when running codeml. # # The parameters whose values are different from the codeml defalut # value (described in pamlDOC.pdf) in PAML 4.1 are: # seqfile, outfile, treefile, ndata, noisy, verbose, cleandata # DEFAULT_PARAMETERS = { # Essential argumemts :seqfile => nil, :outfile => nil, # Optional arguments :treefile => nil, :noisy => 0, :verbose => 1, :runmode => 0, :seqtype => 2, :CodonFreq => 2, :ndata => 1, :clock => 0, :aaDist => 0, :aaRatefile => 'wag.dat', :model => 2, :NSsites => 0, :icode => 0, :Mgene => 0, :fix_kappa => 0, :kappa => 2, :fix_omega => 0, :omega => 0.4, :fix_alpha => 0, :alpha => 0.0, :Malpha => 0, :ncatG => 3, :fix_rho => 1, :rho => 0.0, :getSE => 0, :RateAncestor => 0, :Small_Diff => 0.5e-6, :cleandata => 1, :fix_blength => 0, :method => 0 } # OBSOLETE. This method should not be used. # Instead, use parameters. def options warn 'The method Codeml#options will be changed to be used for command line arguments in the future. Instead, use Codeml#parameters.' parameters end # OBSOLETE. This method should not be used. # Instead, use parameters=(hash). def options=(hash) warn 'The method Codeml#options=() will be changed to be used for command line arguments in the future. Instead, use Codeml#parameters=().' self.parameters=(hash) end # Obsolete. This method will be removed in the future. # Helper method for creating a codeml control file. # Note that default parameters are automatically merged. def self.create_control_file(parameters, filename) parameters = DEFAULT_PARAMETERS.merge(parameters) File.open(filename, 'w') do |file| parameters.each do |key, value| file.puts "#{key.to_s} = #{value.to_s}" if value end end filename end # OBSOLETE. This method will soon be removed. # Instead, use create_control_file(parameters, filename). def self.create_config_file(parameters, filename) warn "The method Codeml.create_config_file(parameters, filename) will soon be removed. Instead, use Codeml.create_control_file(filename, parameters)." create_control_file(parameters, filename) end # Runs the program on the internal parameters with the specified # sequence alignment and tree. # # Note that parameters[:seqfile] and parameters[:outfile] # are always modified, and parameters[:treefile] and # parameters[:aaRatefile] are modified when tree and aarate are # specified respectively. # # For other important information, see the document of # Bio::PAML::Common#query. # # --- # *Arguments*: # * (required) _alignment_: Bio::Alignment object or similar object # * (optional) _tree_: Bio::Tree object # * (optional) _aarate_: String or nil # *Returns*:: Report object def query(alignment, tree = nil, aarate = nil) begin aaratefile = prepare_aaratefile(aarate) ret = super(alignment, tree) ensure finalize_aaratefile(aaratefile) end ret end # Runs the program on the internal parameters with the specified # sequence alignment data string and tree data string. # # Note that parameters[:outfile] is always modified, and # parameters[:seqfile], parameters[:treefile], and # parameters[:aaRatefile] are modified when # alignment, tree, and aarate are specified respectively. # # It raises RuntimeError if seqfile is not specified in the argument # or in the parameter. # # For other important information, see the document of query method. # # --- # *Arguments*: # * (optional) _alignment_: String # * (optional) _tree_: String or nil # * (optional) _aarate_: String or nil # *Returns*:: contents of output file (String) def query_by_string(alignment = nil, tree = nil, aarate = nil) begin aaratefile = prepare_aaratefile(aarate) ret = super(alignment, tree) ensure finalize_aaratefile(aaratefile) end ret end private # (private) prepares temporary file for aaRatefile if needed def prepare_aaratefile(aarate) if aarate then aaratefile = Tempfile.new('codeml_aarate') aaratefile.print aarate aaratefile.close(false) self.parameters[:aaRatefile] = aaratefile.path end aaratefile end # (private) removes temporary file for aaRatefile if needed def finalize_aaratefile(aaratefile) aaratefile.close(true) if aaratefile end end # End class Codeml end # End module PAML end # End module Bio �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/tree/������������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�015141� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/tree/output.rb���������������������������������������������������������������0000644�0000041�0000041�00000017321�12200110570�017032� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/tree/output.rb - Phylogenetic tree formatter # # Copyright:: Copyright (C) 2004-2006 # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # # == Description # # This file contains formatter of Newick, NHX and Phylip distance matrix. # # == References # # * http://evolution.genetics.washington.edu/phylip/newick_doc.html # * http://www.phylosoft.org/forester/NHX.html # module Bio class Tree #--- # newick output #+++ # default options DEFAULT_OPTIONS = { :indent => ' ' } def __get_option(key, options) if (r = options[key]) != nil then r elsif @options && (r = @options[key]) != nil then r else DEFAULT_OPTIONS[key] end end private :__get_option # formats Newick label (unquoted_label or quoted_label) def __to_newick_format_label(str, options) if __get_option(:parser, options) == :naive then return str.to_s end str = str.to_s if /([\(\)\,\:\[\]\_\'\x00-\x1f\x7f])/ =~ str then # quoted_label return "\'" + str.gsub(/\'/, "\'\'") + "\'" end # unquoted_label return str.gsub(/ /, '_') end private :__to_newick_format_label # formats leaf def __to_newick_format_leaf(node, edge, options) label = __to_newick_format_label(get_node_name(node), options) dist = get_edge_distance_string(edge) bs = get_node_bootstrap_string(node) if __get_option(:branch_length_style, options) == :disabled dist = nil end case __get_option(:bootstrap_style, options) when :disabled label + (dist ? ":#{dist}" : '') when :molphy label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '') when :traditional label + (bs ? bs : '') + (dist ? ":#{dist}" : '') else # default: same as molphy style label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '') end end private :__to_newick_format_leaf # formats leaf for NHX def __to_newick_format_leaf_NHX(node, edge, options) label = __to_newick_format_label(get_node_name(node), options) dist = get_edge_distance_string(edge) bs = get_node_bootstrap_string(node) if __get_option(:branch_length_style, options) == :disabled dist = nil end nhx = {} # bootstrap nhx[:B] = bs if bs and !(bs.empty?) # EC number nhx[:E] = node.ec_number if node.instance_eval { defined?(@ec_number) && self.ec_number } # scientific name nhx[:S] = node.scientific_name if node.instance_eval { defined?(@scientific_name) && self.scientific_name } # taxonomy id nhx[:T] = node.taxonomy_id if node.instance_eval { defined?(@taxonomy_id) && self.taxonomy_id } # :D (gene duplication or speciation) if node.instance_eval { defined?(@events) && !(self.events.empty?) } then if node.events.include?(:gene_duplication) nhx[:D] = 'Y' elsif node.events.include?(:speciation) nhx[:D] = 'N' end end # log likelihood nhx[:L] = edge.log_likelihood if edge.instance_eval { defined?(@log_likelihood) && self.log_likelihood } # width nhx[:W] = edge.width if edge.instance_eval { defined?(@width) && self.width } # merges other parameters flag = node.instance_eval { defined? @nhx_parameters } nhx.merge!(node.nhx_parameters) if flag flag = edge.instance_eval { defined? @nhx_parameters } nhx.merge!(edge.nhx_parameters) if flag nhx_string = nhx.keys.sort{ |a,b| a.to_s <=> b.to_s }.collect do |key| "#{key.to_s}=#{nhx[key].to_s}" end.join(':') nhx_string = "[&&NHX:" + nhx_string + "]" unless nhx_string.empty? label + (dist ? ":#{dist}" : '') + nhx_string end private :__to_newick_format_leaf_NHX # def __to_newick(parents, source, depth, format_leaf, options, &block) result = [] if indent_string = __get_option(:indent, options) then indent0 = indent_string * depth indent = indent_string * (depth + 1) newline = "\n" else indent0 = indent = newline = '' end out_edges = self.out_edges(source) if block_given? then out_edges.sort! { |edge1, edge2| yield(edge1[1], edge2[1]) } else out_edges.sort! do |edge1, edge2| o1 = edge1[1].order_number o2 = edge2[1].order_number if o1 and o2 then o1 <=> o2 else edge1[1].name.to_s <=> edge2[1].name.to_s end end end out_edges.each do |src, tgt, edge| if parents.include?(tgt) then ;; elsif self.out_degree(tgt) == 1 then result << indent + __send__(format_leaf, tgt, edge, options) else result << __to_newick([ src ].concat(parents), tgt, depth + 1, format_leaf, options) + __send__(format_leaf, tgt, edge, options) end end indent0 + "(" + newline + result.join(',' + newline) + (result.size > 0 ? newline : '') + indent0 + ')' end private :__to_newick # Returns a newick formatted string. # If block is given, the order of the node is sorted # (as the same manner as Enumerable#sort). # # Available options: # <tt>:indent</tt>:: # indent string; set false to disable (default: ' ') # <tt>:bootstrap_style</tt>:: # <tt>:disabled</tt> disables bootstrap representations. # <tt>:traditional</tt> for traditional style. # <tt>:molphy</tt> for Molphy style (default). def output_newick(options = {}, &block) #:yields: node1, node2 root = @root root ||= self.nodes.first return '();' unless root __to_newick([], root, 0, :__to_newick_format_leaf, options, &block) + __to_newick_format_leaf(root, Edge.new, options) + ";\n" end alias newick output_newick # Returns a NHX (New Hampshire eXtended) formatted string. # If block is given, the order of the node is sorted # (as the same manner as Enumerable#sort). # # Available options: # <tt>:indent</tt>:: # indent string; set false to disable (default: ' ') # def output_nhx(options = {}, &block) #:yields: node1, node2 root = @root root ||= self.nodes.first return '();' unless root __to_newick([], root, 0, :__to_newick_format_leaf_NHX, options, &block) + __to_newick_format_leaf_NHX(root, Edge.new, options) + ";\n" end # Returns formatted text (or something) of the tree # Currently supported format is: :newick, :nhx def output(format, *arg, &block) case format when :newick output_newick(*arg, &block) when :nhx output_nhx(*arg, &block) when :phylip_distance_matrix output_phylip_distance_matrix(*arg, &block) else raise 'Unknown format' end end #--- # This method isn't suitable to written in this file? #+++ # Generates phylip-style distance matrix as a string. # if nodes is not given, all leaves in the tree are used. # If the names of some of the given (or default) nodes # are not defined or are empty, the names are automatically generated. def output_phylip_distance_matrix(nodes = nil, options = {}) nodes = self.leaves unless nodes names = nodes.collect do |x| y = get_node_name(x) y = sprintf("%x", x.__id__.abs) if y.empty? y end m = self.distance_matrix(nodes) Bio::Phylip::DistanceMatrix.generate(m, names, options) end end #class Tree end #module Bio ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/�����������������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�015311� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/script.rb��������������������������������������������������������������0000644�0000041�0000041�00000000734�12200110570�017146� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/script.rb - script mode for the BioRuby shell # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: script.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell class Script def initialize(script) Bio::Shell.cache[:binding] = TOPLEVEL_BINDING Bio::Shell.load_session eval(File.read(script), TOPLEVEL_BINDING) exit end end # Script end ������������������������������������bio-1.4.3.0001/lib/bio/shell/demo.rb����������������������������������������������������������������0000644�0000041�0000041�00000011627�12200110570�016571� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/demo.rb - demo mode for the BioRuby shell # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: demo.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell private def demo(part = nil) demo = Demo.new if part demo.send(part) else demo.all end end class Demo def initialize @bind = Bio::Shell.cache[:binding] end def all sequence && entry && shell && pdb && true end def tutorial end def aldh2 end def mito run(%q[entry = getent("data/kumamushi.gb")], "Load kumamushi gene from GenBank database entry ...", false) && run(%q[disp entry], "Check the contents ...", false) && run(%q[kuma = flatparse(entry)], "Parse the database entry ...", true) && run(%q[web], "Start BioRuby on Rails...", false) && run(%q[puts kuma.entry_id], "Extract entry ID ...", false) && run(%q[puts kuma.definition], "Extract definition ...", false) && run(%q[gene = kuma.seq], "Extract DNA sequence of the gene ...", true) && run(%q[doublehelix(gene)], "Show the sequence in ascii art ...", false) && run(%q[seqstat(gene)], "Statistics of the gene ...", false) && run(%q[config :color], "Change to color mode...", false) && run(%q[seqstat(gene)], "Statistics of the gene ...", false) && #run(%q[codontable], "Codontalble ...", false) && run(%q[protein = gene.translate], "Translate DNA into protein ...", true) && run(%q[comp = protein.composition], "Composition of the amino acids ...", false) && run(%q[pp comp], "Check the composition ...", false) && run(%q[puts protein.molecular_weight], "Molecular weight ...", false) && run(%q[midifile("data/kumamushi.mid", gene)], "Gene to music ...", false) && run(%q[`open "data/kumamushi.mid"`], "Let's listen ...", false) && true end def sequence run(%q[dna = getseq("atgc" * 100)], "Generating DNA sequence ...", true) && run(%q[doublehelix dna], "Double helix representation", false) && run(%q[protein = dna.translate], "Translate DNA into Protein ...", true) && run(%q[protein.molecular_weight], "Calculating molecular weight ...", true) && run(%q[protein.composition], "Amino acid composition ...", true) && true end def entry run(%q[kuma = getobj("gb:AF237819")], "Obtain an entry from GenBank database", false) && run(%q[kuma.definition], "Definition of the entry", true) && run(%q[kuma.naseq], "Sequence of the entry", true) && run(%q[kuma.naseq.translate], "Translate the sequence to protein", true) && run(%q[midifile("data/AF237819.mid", kuma.naseq)], "Generate gene music ...", false) && true end def shell run(%q[pwd], "Show current working directory ...", false) && run(%q[dir], "Show directory contents ...", false) && run(%q[dir "shell/session"], "Show directory contents ...", false) && true end def pdb run(%q[ent_1bl8 = getent("pdb:1bl8")], "Retrieving PDB entry 1BL8 ...", false) && run(%q[head ent_1bl8], "Head part of the entry ...", false) && run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) && run(%q[disp "data/1bl8.pdb"], "Look through the entire entry ...", false) && run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) && run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) && run(%q[pdb_1bl8.each_heterogen { |heterogen| p heterogen.resName }], "Showing each heterogen object ...", false) && true end def pdb_hetdic # run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read], # "Retrieving the het_dic database ...", false) && # run(%q[savefile("data/het_dictionary.txt", het_dic)], # "Saving the file ... ", false) && run(%q[het_dic.size], "Bytes of the file ...", true) && run(%q[disp "data/het_dictionary.txt"], "Take a look on the contents ...", true) && run(%q[flatindex("het_dic", "data/het_dictionary.txt")], "Creating index to make the seaarchable database ...", false) && run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) && run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) && run(%q[osake.conect], "Showing connect table (conect) of the molecule ...", true) && true end private def run(cmd, msg, echo) comment(msg) splash(cmd) result = eval(cmd, @bind) if echo pp result end continue? end def comment(msg) puts "### #{msg}" end def splash(msg) Bio::Shell.splash_message_action("bioruby> #{msg}") print "bioruby> #{msg}" gets end def continue? Bio::Shell.ask_yes_or_no("Continue? [y/n] ") end end end ���������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016607� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/psort.rb��������������������������������������������������������0000644�0000041�0000041�00000002256�12200110570�020310� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/psort.rb - plugin for PSORT # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: psort.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell private def psort1(str) seq = getseq(str) if seq.is_a?(Bio::Sequence::NA) seq = seq.translate end psort = Bio::PSORT::PSORT1.imsut fasta = seq.to_fasta results = psort.exec(fasta).final_result results.each do |result| puts "#{result["certainty"].to_f*100.0}\t#{result["prediction"]}" end return results.first["prediction"] end def psort2(str) seq = getseq(str) if seq.is_a?(Bio::Sequence::NA) seq = seq.translate end psort = Bio::PSORT::PSORT2.imsut fasta = seq.to_fasta results = psort.exec(fasta).prob.sort_by{|x, y| y}.reverse results.each do |loc, prob| next if prob <= 0.0 puts "#{prob}\t#{Bio::PSORT::PSORT2::SclNames[loc]}" end return results.first.first end def psort2locations names = Bio::PSORT::PSORT2::SclNames names.sort.each do |loc, desc| puts "#{loc}\t#{desc}" end return names end end ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/keggapi.rb������������������������������������������������������0000644�0000041�0000041�00000011416�12200110570�020546� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/keggapi.rb - plugin for KEGG API # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: keggapi.rb,v 1.12 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell module Private module_function def keggapi_definition2tab(list) ary = [] list.each do |entry| ary << "#{entry.entry_id}\t#{entry.definition}" end return ary end end private def keggapi(wsdl = nil) if wsdl @keggapi = Bio::KEGG::API.new(wsdl) else @keggapi ||= Bio::KEGG::API.new end return @keggapi end # DBGET def binfo(db = "all") result = keggapi.binfo(db) puts result return result end def bfind(str) result = keggapi.bfind(str) return result end def bget(str) result = keggapi.bget(str) if block_given? yield result else puts result return result end end def btit(str) result = keggapi.btit(str) puts result return result end def bconv(str) result = keggapi.bconv(str) puts result return result end # DATABASES def keggdbs list = keggapi.list_databases result = Bio::Shell::Private.keggapi_definition2tab(list).join("\n") puts result return list.map {|x| x.entry_id} end def keggorgs list = keggapi.list_organisms result = Bio::Shell::Private.keggapi_definition2tab(list).sort.join("\n") puts result return list.map {|x| x.entry_id} end def keggpathways(org = "map") list = keggapi.list_pathways(org) result = Bio::Shell::Private.keggapi_definition2tab(list).join("\n") puts result return list.map {|x| x.entry_id} end # use KEGG DAS insetad def kegggenomeseq(org) result = "" require 'net/ftp' Net::FTP.open("ftp.genome.jp", "anonymous") do |ftp| path = "/pub/kegg/genomes/#{org}" list = ftp.nlst(path) file = list.grep(/.*genome$/).shift if file open("ftp://ftp.genome.jp/#{file}") do |file| result = file.read end end end return result end end =begin == BioRuby extensions --- get_all_best_best_neighbors_by_gene(genes_id) --- get_all_best_neighbors_by_gene(genes_id) --- get_all_reverse_best_neighbors_by_gene(genes_id) --- get_all_paralogs_by_gene(genes_id) --- get_all_genes_by_motifs(motif_id_list) --- get_all_oc_members_by_gene(genes_id) --- get_all_pc_members_by_gene(genes_id) --- get_all_genes_by_organism(org) --- get_all_linkdb_by_entry(entry_id, db) --- save_image(url, filename = nil) --- get_entries(ary = []) --- get_aaseqs(ary = []) --- get_naseqs(ary = []) --- get_definitions(ary = []) == Original KEGG API methods --- get_linkdb_by_entry(entry_id, db, start, max_results) --- get_best_best_neighbors_by_gene(genes_id, start, max_results) --- get_best_neighbors_by_gene(genes_id, start, max_results) --- get_reverse_best_neighbors_by_gene(genes_id, start, max_results) --- get_paralogs_by_gene(genes_id, start, max_results) --- get_motifs_by_gene(genes_id, db) --- get_genes_by_motifs(motif_id_list, start, max_results) --- get_ko_by_gene(genes_id) --- get_ko_by_ko_class(ko_class_id) --- get_genes_by_ko_class(ko_class_id, org, start, max_results) --- get_genes_by_ko(ko_id, org) --- get_oc_members_by_gene(genes_id, start, max_results) --- get_pc_members_by_gene(genes_id, start, max_results) --- mark_pathway_by_objects(pathway_id, object_id_list) --- color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list) --- get_html_of_marked_pathway_by_objects(pathway_id, object_id_list) --- get_html_of_colored_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list) --- get_genes_by_pathway(pathway_id) --- get_enzymes_by_pathway(pathway_id) --- get_compounds_by_pathway(pathway_id) --- get_glycans_by_pathway(pathway_id) --- get_reactions_by_pathway(pathway_id) --- get_kos_by_pathway(pathway_id) --- get_pathways_by_genes(genes_id_list) --- get_pathways_by_enzymes(enzyme_id_list) --- get_pathways_by_compounds(compound_id_list) --- get_pathways_by_glycans(glycan_id_list) --- get_pathways_by_reactions(reaction_id_list) --- get_pathways_by_kos(ko_id_list, org) --- get_linked_pathways(pathway_id) --- get_genes_by_enzyme(enzyme_id, org) --- get_enzymes_by_gene(genes_id) --- get_enzymes_by_compound(compound_id) --- get_enzymes_by_glycan(glycan_id) --- get_enzymes_by_reaction(reaction_id) --- get_compounds_by_enzyme(enzyme_id) --- get_compounds_by_reaction(reaction_id) --- get_glycans_by_enzyme(enzyme_id) --- get_glycans_by_reaction(reaction_id) --- get_reactions_by_enzyme(enzyme_id) --- get_reactions_by_compound(compound_id) --- get_reactions_by_glycan(glycan_id) --- get_genes_by_organism(org, start, max_results) --- get_number_of_genes_by_organism(org) --- convert_mol_to_kcf(mol_text) =end ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/blast.rb��������������������������������������������������������0000644�0000041�0000041�00000001425�12200110570�020243� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/blast.rb - plugin for BLAST services # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: blast.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell private # GenomeNet def keggblast(query) server = Bio::Blast.remote("blastp", "genes", "", "genomenet_tab") if query[/^>/] data = Bio::FastaFormat.new(query) desc = data.definition tmp = getseq(data.seq) else desc = "query" tmp = getseq(query) end if tmp.respond_to?(:translate) aaseq = tmp.translate else aaseq = tmp end fasta = aaseq.to_fasta(desc, 60) result = server.query(fasta) puts server.output return result end end �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/emboss.rb�������������������������������������������������������0000644�0000041�0000041�00000000572�12200110570�020430� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/emboss.rb - methods to use EMBOSS # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: emboss.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell private def seqret(usa) Bio::EMBOSS.seqret(usa) end def entret(usa) Bio::EMBOSS.entret(usa) end end ��������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/soap.rb���������������������������������������������������������0000644�0000041�0000041�00000004125�12200110570�020100� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/soap.rb - web services # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama <k@bioruby.org> # License:: Ruby's # # $Id: soap.rb,v 1.1 2007/07/09 11:17:09 k Exp $ # module Bio::Shell private def ncbisoap(wsdl = nil) if wsdl @ncbisoap = Bio::NCBI::SOAP.new(wsdl) else @ncbisoap ||= Bio::NCBI::SOAP.new end return @ncbisoap end def ebisoap(wsdl = nil) case wsdl when :ipscan @ebisoap = Bio::EBI::SOAP::InterProScan.new(wsdl) when :emboss @ebisoap = Bio::EBI::SOAP::Emboss.new(wsdl) when :clustalw @ebisoap = Bio::EBI::SOAP::ClustalW.new(wsdl) when :tcoffee @ebisoap = Bio::EBI::SOAP::TCoffee.new(wsdl) when :muscle @ebisoap = Bio::EBI::SOAP::Muscle.new(wsdl) when :fasta @ebisoap = Bio::EBI::SOAP::Fasta.new(wsdl) when :wublast @ebisoap = Bio::EBI::SOAP::WUBlast.new(wsdl) when :mpsrch @ebisoap = Bio::EBI::SOAP::MPsrch.new(wsdl) when :scanps @ebisoap = Bio::EBI::SOAP::ScanPS.new(wsdl) when :msd @ebisoap = Bio::EBI::SOAP::MSD.new(wsdl) when :ontology @ebisoap = Bio::EBI::SOAP::Ontology.new(wsdl) when :citation @ebisoap = Bio::EBI::SOAP::Citation.new(wsdl) when /^http/ @ebisoap = Bio::EBI::SOAP.new(wsdl) else @ebisoap ||= Bio::EBI::SOAP.new end return @ebisoap end def ddbjsoap(wsdl = nil) case wsdl when :blast @ddbjsoap = Bio::DDBJ::XML::Blast.new when :fasta @ddbjsoap = Bio::DDBJ::XML::Fasta.new when :clustalw @ddbjsoap = Bio::DDBJ::XML::ClustalW.new when :ddbj @ddbjsoap = Bio::DDBJ::XML::DDBJ.new when :gib @ddbjsoap = Bio::DDBJ::XML::Gib.new when :gtop @ddbjsoap = Bio::DDBJ::XML::Gtop.new when :pml @ddbjsoap = Bio::DDBJ::XML::PML.new when :srs @ddbjsoap = Bio::DDBJ::XML::SRS.new when :txsearch @ddbjsoap = Bio::DDBJ::XML::TxSearch.new when /^http/ @ddbjsoap = Bio::DDBJ::XML.new(wsdl) else @ddbjsoap ||= Bio::DDBJ::XML.new end return @ddbjsoap end end �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/codon.rb��������������������������������������������������������0000644�0000041�0000041�00000013712�12200110570�020242� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/codon.rb - plugin for the codon table # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: codon.rb,v 1.16 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell class ColoredCodonTable @@properties = { :basic => %w( H K R ), :polar => %w( S T Y Q N S ), :acidic => %w( D E ), :nonpolar => %w( F L I M V P A C W G ), :stop => %w( * ), } def initialize(number, cuhash = nil) @aacode = Bio::AminoAcid.names @table = Bio::CodonTable[number] @number = number @cuhash = cuhash setup_colors if Bio::Shell.config[:color] generate_colored_text else generate_mono_text end end attr_reader :table def setup_colors c = Bio::Shell.colors @colors = { :text => c[:none], :aa => c[:green], :start => c[:red], :stop => c[:red], :basic => c[:cyan], :polar => c[:blue], :acidic => c[:magenta], :nonpolar => c[:yellow], } end def generate_mono_text @table.each do |codon, aa| if aa == '*' code = 'STOP' aa = '' unless @cuhash else code = @aacode[aa] end if @cuhash percent = @cuhash[codon].to_s.rjust(6) eval("@#{codon} = '#{aa}#{percent}'") else eval("@#{codon} = ' #{code} #{aa} '") end end @hydrophilic = [ @@properties[:basic].join(" "), "(basic),", @@properties[:polar].join(" "), "(polar),", @@properties[:acidic].join(" "), "(acidic)", ].join(" ") @hydrophobic = @@properties[:nonpolar].join(" ") + " (nonpolar)" end def generate_colored_text @table.each do |codon, aa| property, = @@properties.detect {|key, list| list.include?(aa)} if aa == '*' if @cuhash color_code = "#{@colors[:stop]}STOP" color_aa = "#{@colors[:stop]}#{aa}" else color_code = "#{@colors[:stop]}STP" case codon when 'tga' color_aa = "#{@colors[:text]}U" when 'tag' color_aa = "#{@colors[:text]}O" else color_aa = "#{@colors[:text]}*" end end else color_code = "#{@colors[property]}#{@aacode[aa]}" if @table.start_codon?(codon) if @cuhash color_aa = "#{@colors[:aa]}#{aa}" else color_aa = "#{@colors[:start]}#{aa}" end else if @cuhash color_aa = "#{@colors[property]}#{aa}" else color_aa = "#{@colors[:aa]}#{aa}" end end end if @cuhash percent = @cuhash[codon].to_s.rjust(6) eval("@#{codon} = '#{color_aa}#{@colors[:text]}#{percent}'") else eval("@#{codon} = ' #{color_code} #{color_aa}#{@colors[:text]} '") end end @hydrophilic = [ "#{@colors[:basic]}basic#{@colors[:text]},", "#{@colors[:polar]}polar#{@colors[:text]},", "#{@colors[:acidic]}acidic#{@colors[:text]}" ].join(" ") @hydrophobic = "#{@colors[:nonpolar]}nonpolar" end def output header = <<-END # # = Codon table #{@number} : #{@table.definition} # # hydrophilic: #{@hydrophilic} # hydrophobic: #{@hydrophobic} END table = <<-END # # *---------------------------------------------* # | | 2nd | | # | 1st |-------------------------------| 3rd | # | | U | C | A | G | | # |-------+-------+-------+-------+-------+-----| # | U U |#{@ttt}|#{@tct}|#{@tat}|#{@tgt}| u | # | U U |#{@ttc}|#{@tcc}|#{@tac}|#{@tgc}| c | # | U U |#{@tta}|#{@tca}|#{@taa}|#{@tga}| a | # | UUU |#{@ttg}|#{@tcg}|#{@tag}|#{@tgg}| g | # |-------+-------+-------+-------+-------+-----| # | CCCC |#{@ctt}|#{@cct}|#{@cat}|#{@cgt}| u | # | C |#{@ctc}|#{@ccc}|#{@cac}|#{@cgc}| c | # | C |#{@cta}|#{@cca}|#{@caa}|#{@cga}| a | # | CCCC |#{@ctg}|#{@ccg}|#{@cag}|#{@cgg}| g | # |-------+-------+-------+-------+-------+-----| # | A |#{@att}|#{@act}|#{@aat}|#{@agt}| u | # | A A |#{@atc}|#{@acc}|#{@aac}|#{@agc}| c | # | AAAAA |#{@ata}|#{@aca}|#{@aaa}|#{@aga}| a | # | A A |#{@atg}|#{@acg}|#{@aag}|#{@agg}| g | # |-------+-------+-------+-------+-------+-----| # | GGGG |#{@gtt}|#{@gct}|#{@gat}|#{@ggt}| u | # | G |#{@gtc}|#{@gcc}|#{@gac}|#{@ggc}| c | # | G GGG |#{@gta}|#{@gca}|#{@gaa}|#{@gga}| a | # | GG G |#{@gtg}|#{@gcg}|#{@gag}|#{@ggg}| g | # *---------------------------------------------* # END if @cuhash text = table else text = header + table end if Bio::Shell.config[:color] text.gsub(/^\s+#/, @colors[:text]) else text.gsub(/^\s+#/, '') end end end private def codontable(num = 1, codon_usage = nil) cct = ColoredCodonTable.new(num, codon_usage) if codon_usage return cct else puts cct.output return cct.table end end def codontables tables = Bio::CodonTable::DEFINITIONS tables.sort.each do |i, definition| puts "#{i}\t#{definition}" end return tables end def aminoacids names = Bio::AminoAcid.names names.sort.each do |aa, code| if aa.length == 1 puts "#{aa}\t#{code}\t#{names[code]}" end end return names end def nucleicacids names = Bio::NucleicAcid.names %w(a t g c u r y w s k m b v h d n).each do |base| puts "#{base}\t#{names[base]}\t#{names[base.upcase]}" end return names end end ������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/ncbirest.rb�����������������������������������������������������0000644�0000041�0000041�00000003063�12200110570�020747� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/ncbirest.rb - plugin for NCBI eUtils # # Copyright:: Copyright (C) 2009 # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # module Bio::Shell private # NCBI eUtils EFetch service. # # With 1 argument, it gets sequence(s) by using # Bio::NCBI::REST::EFetch.sequence. # Nucleotide or protein database is automatically selected for each id. # # Example: # efetch('AF237819') # # With two or more arguments, and when the 2nd argument is Symbol, # it calls the corresponding Bio::NCBI::REST::EFetch class method. # # Example: # efetch('13054692', :pubmed) # # the same as Bio::NCBI::REST::EFetch.pubmed('13054692') # # Otherwise, it acts the same as Bio::NCBI::REST.efetch. def efetch(ids, *arg) if arg.empty? then ret = Bio::NCBI::REST::EFetch.nucleotide(ids) unless /^LOCUS / =~ ret.to_s then ret = Bio::NCBI::REST::EFetch.protein(ids) end ret elsif arg[0].kind_of?(Symbol) meth = arg[0] case meth.to_s when /\A(journal|omim|pmc|pubmed|sequence|taxonomy)\z/ Bio::NCBI::REST::EFetch.__send__(meth, ids, *(arg[1..-1])) else nil end else Bio::NCBI::REST.efetch(ids, *arg) end end # NCBI eUtils EInfo def einfo Bio::NCBI::REST.einfo end # NCBI eUtils ESearch def esearch(str, *arg) Bio::NCBI::REST.esearch(str, *arg) end # Same as Bio::NCBI::REST.esearch_count def esearch_count(str, *arg) Bio::NCBI::REST.esearch_count(str, *arg) end end �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/midi.rb���������������������������������������������������������0000644�0000041�0000041�00000023546�12200110570�020070� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/midi.rb - Sequence to MIDI converter # # Copyright:: Copyright (C) 2003, 2005 # Natsuhiro Ichinose <ichinose@genome.ist.i.kyoto-u.ac.jp>, # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: midi.rb,v 1.9 2007/04/05 23:35:41 trevor Exp $ # #-- # *TODO* # - add "Ohno" style # - add a accessor to drum pattern # - add a new feature to select music style (pop, trans, ryukyu, ...) # - what is the base? #++ class Bio::Sequence::NA class MidiTrack MidiProg = [ "Acoustic Grand Piano", "Bright Acoustic Piano", "Electric grand Piano", "Honky Tonk Piano", "Eiectric Piano 1", "Electric Piano 2", "Harpsichord", "Clavinet", "Celesra", "Glockenspiel", "Music Box", "Vibraphone", "Marimba", "Xylophone", "Tubular bells", "Dulcimer", "Drawbar Organ", "Percussive Organ", "Rock Organ", "Church Organ", "Reed Organ", "Accordion", "Harmonica", "Tango Accordion", "Nylon Accustic Guitar", "Steel Acoustic Guitar", "Jazz Electric Guitar", "Ciean Electric Guitar", "Muted Electric Guitar", "Overdrive Guitar", "Distorted Guitar", "Guitar Harmonics", "Acoustic Bass", "Electric Fingered Bass", "Electric Picked Bass", "Fretless Bass", "Slap Bass 1", "Slap Bass 2", "Syn Bass 1", "Syn Bass 2", "Violin", "Viola", "Cello", "Contrabass", "Tremolo Strings", "Pizzicato Strings", "Orchestral Harp", "Timpani", "String Ensemble 1", "String Ensemble 2 (Slow)", "Syn Strings 1", "Syn Strings 2", "Choir Aahs", "Voice Oohs", "Syn Choir", "Orchestral Hit", "Trumpet", "Trombone", "Tuba", "Muted Trumpet", "French Horn", "Brass Section", "Syn Brass 1", "Syn Brass 2", "Soprano Sax", "Alto Sax", "Tenor Sax", "Baritone Sax", "Oboe", "English Horn", "Bassoon", "Clarinet", "Piccolo", "Flute", "Recorder", "Pan Flute", "Bottle Blow", "Shakuhachi", "Whistle", "Ocarina", "Syn Square Wave", "Syn Sawtooth Wave", "Syn Calliope", "Syn Chiff", "Syn Charang", "Syn Voice", "Syn Fifths Sawtooth Wave", "Syn Brass & Lead", "New Age Syn Pad", "Warm Syn Pad", "Polysynth Syn Pad", "Choir Syn Pad", "Bowed Syn Pad", "Metal Syn Pad", "Halo Syn Pad", "Sweep Syn Pad", "SFX Rain", "SFX Soundtrack", "SFX Crystal", "SFX Atmosphere", "SFX Brightness", "SFX Goblins", "SFX Echoes", "SFX Sci-fi", "Sitar", "Banjo", "Shamisen", "Koto", "Kalimba", "Bag Pipe", "Fiddle", "Shanai", "Tinkle Bell", "Agogo", "Steel Drums", "Woodblock", "Taiko Drum", "Melodic Tom", "Syn Drum", "Reverse Cymbal", "Guitar Fret Noise", "Breath Noise", "Seashore", "Bird Tweet", "Telephone Ring", "Helicopter", "Applause", "Gun Shot" ] Styles = { # "Ohno" => { # # http://home.hiroshima-u.ac.jp/cato/bunkakoryuron.html # }, "Ichinose" => { :tempo => 120, :scale => [0, 2, 4, 5, 7, 9, 11], :tones => [ {:prog => 9, :base => 60, :range => 2}, {:prog => 13, :base => 48, :range => 2}, {:prog => 41, :base => 48, :range => 2}, {:prog => 44, :base => 36, :range => 2}, ] }, "Okinawan" => { :tempo => 180, :scale => [0,4,5,7,11], :tones => [ {:prog => MidiProg.index("Harpsichord"), :base => 60, :range => 2}, {:prog => MidiProg.index("Dulcimer"), :base => 48, :range => 2}, {:prog => MidiProg.index("Fretless Base"), :base => 36, :range => 1}, ] }, "Major" => { :scale => [0,2,4,5,7,9,11], }, "Minor" => { :scale => [0,2,3,5,7,9,10], }, "Harmonic minor" => { :scale => [0,2,3,5,7,9,11], }, "Whole tone" => { :scale => [0,2,4,6,8,10], }, "Half tone" => { :scale => [0,1,2,3,4,5,6,7,8,9,10,11], }, "Indian" => { :scale => [0,1,4,5,7,8,11], }, "Arabic" => { :scale => [0,2,3,6,7,8,11], }, "Spanish" => { :scale => [0,1,3,4,5,7,8,10], }, "Japanese" => { :scale => [0,2,5,7,9], }, } def initialize(channel = 0, program = nil, base = nil, range = nil, scale = nil) @channel = channel & 0xff @program = program || 0 @base = base || 60 @range = range || 2 @scale = scale || [0, 2, 4, 5, 7, 9, 11] @tunes = [] @tune = 0 @code = [] @time = 0 @range.times do |i| @scale.each do |c| @tunes.push c + i * 12 end end @ttype = { 'aa' => 1, 'at' => 0, 'ac' => 3, 'ag' => -1, 'ta' => 0, 'tt' => -1, 'tc' => 1, 'tg' => -2, 'ca' => 2, 'ct' => 1, 'cc' => 2, 'cg' => 6, 'ga' => -1, 'gt' => -3, 'gc' => 0, 'gg' => -2, } @dtype = [ { 'aa' => 2, 'at' => 4, 'ac' => 4, 'ag' => 2, 'ta' => 2, 'tt' => 4, 'tc' => 4, 'tg' => 2, 'ca' => 2, 'ct' => 3, 'cc' => 1, 'cg' => 2, 'ga' => 1, 'gt' => 2, 'gc' => 2, 'gg' => 3, }, { 'aa' => 3, 'at' => 3, 'ac' => 2, 'ag' => 3, 'ta' => 3, 'tt' => 3, 'tc' => 2, 'tg' => 2, 'ca' => 3, 'ct' => 2, 'cc' => 1, 'cg' => 1, 'ga' => 1, 'gt' => 1, 'gc' => 1, 'gg' => 1, }, { 'aa' => 2, 'at' => 2, 'ac' => 2, 'ag' => 2, 'ta' => 1, 'tt' => 1, 'tc' => 2, 'tg' => 2, 'ca' => 2, 'ct' => 2, 'cc' => 2, 'cg' => 3, 'ga' => 2, 'gt' => 2, 'gc' => 3, 'gg' => 1, }, { 'aa' => 1, 'at' => 1, 'ac' => 1, 'ag' => 1, 'ta' => 1, 'tt' => 1, 'tc' => 1, 'tg' => 1, 'ca' => 1, 'ct' => 1, 'cc' => 1, 'cg' => 3, 'ga' => 1, 'gt' => 1, 'gc' => 1, 'gg' => 1, }, ] @code.concat [0x00, 0xc0 | (@channel & 0xff)] @code.concat icode(@program & 0xff, 1) end def icode(num, n) code = [] n.times do |i| code.push num & 0xff num >>= 8 end code.reverse end def rcode(num) code = [] code.push num & 0x7f while num > 0x7f num >>= 7 code.push num & 0x7f | 0x80 end code.reverse end def c2s(code) ans = "" code.each do |c| ans += c.chr end ans end def push(s) tt = @time % 4 t = @ttype[s[0, 2]] d = @dtype[tt][s[2, 2]] if !t.nil? && !d.nil? @tune += t @tune %= @tunes.length if tt == 0 vel = 90 elsif tt == 1 && d > 1 vel = 100 elsif tt == 2 vel = 60 else vel = 50 end @code.concat rcode(1) @code.concat [0x90 | @channel, @tunes[@tune] + @base, vel] @code.concat rcode(240 * d) @code.concat [0x80 | @channel, @tunes[@tune] + @base, 0] @time += d end end def push_silent(d) @code.concat rcode(1) @code.concat [0x90 | @channel, 0, 0] @code.concat rcode(240 * d) @code.concat [0x80 | @channel, 0, 0] @time += d; end def encode ans ="MTrk" ans += c2s(icode(@code.length + 4, 4)) ans += c2s(@code) ans += c2s([0x00, 0xff, 0x2f, 0x00]) ans end def header(num, tempo = 120) ans = "MThd" ans += c2s(icode(6, 4)) ans += c2s(icode(1, 2)) ans += c2s(icode(num + 1, 2)) ans += c2s(icode(480, 2)) ans += "MTrk" ans += c2s(icode(11, 4)) ans += c2s([0x00, 0xff, 0x51, 0x03]) ans += c2s(icode(60000000 / tempo, 3)) ans += c2s([0x00, 0xff, 0x2f, 0x00]) ans end end # MidiTrack # style: # Hash of :tempo, :scale, :tones # scale: # C C# D D# E F F# G G# A A# B # 0 1 2 3 4 5 6 7 8 9 10 11 # tones: # Hash of :prog, :base, :range -- tone, vol? or len?, octaves # drum: # true (with rhythm part), false (without rhythm part) def to_midi(style = {}, drum = true) default = MidiTrack::Styles["Ichinose"] if style.is_a?(String) style = MidiTrack::Styles[style] || default end tempo = style[:tempo] || default[:tempo] scale = style[:scale] || default[:scale] tones = style[:tones] || default[:tones] track = [] tones.each_with_index do |tone, i| ch = i ch += 1 if i >= 9 # skip rythm track track.push MidiTrack.new(ch, tone[:prog], tone[:base], tone[:range], scale) end if drum rhythm = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] track.push(MidiTrack.new(9, 0, 35, 2, rhythm)) end cur = 0 window_search(4) do |s| track[cur % track.length].push(s) cur += 1 end track.each do |t| t.push_silent(12) end ans = track[0].header(track.length, tempo) track.each do |t| ans += t.encode end return ans end end module Bio::Shell private def midifile(filename, seq, *args) begin print "Saving MIDI file (#{filename}) ... " File.open(filename, "w") do |file| file.puts seq.to_midi(*args) end puts "done" rescue warn "Error: Failed to save (#{filename}) : #{$!}" end end end if $0 == __FILE__ # % for i in file* # do # ruby -r bio bio/shell/plugin/midi.rb $i ${i}.mid # done include Bio::Shell seq_file = ARGV.shift mid_file = ARGV.shift Bio::FlatFile.auto(seq_file) do |ff| ff.each do |f| midifile(mid_file, f.naseq[0..1000]) end end end ����������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/obda.rb���������������������������������������������������������0000644�0000041�0000041�00000001546�12200110570�020047� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/obda.rb - plugin for OBDA # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: obda.rb,v 1.10 2007/04/05 23:45:11 trevor Exp $ # module Bio::Shell private def obda @obda ||= Bio::Registry.new end def obdaentry(dbname, entry_id) db = obda.get_database(dbname) unless db warn "Error: No such database (#{dbname})" return end entry = db.get_by_id(entry_id) if block_given? yield entry else return entry end end def obdadbs result = obda.databases.map {|db| db.database} return result end def biofetch(db, id, style = 'raw', format = 'default') serv = Bio::Fetch.new("http://www.ebi.ac.uk/cgi-bin/dbfetch") result = serv.fetch(db, id, style, format) return result end end ����������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/togows.rb�������������������������������������������������������0000644�0000041�0000041�00000002131�12200110570�020453� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/togows.rb - plugin for TogoWS REST service # # Copyright:: Copyright (C) 2009 # Naohisa Goto <ng@bioruby.org> # License:: The Ruby License # # $Id:$ # module Bio::Shell private # Shortcut method to fetch entry(entries) by using TogoWS REST "entry" # service. Multiple databases may be used. # def togows(ids, *arg) Bio::TogoWS::REST.retrieve(ids, *arg) end # Fetches entry(entries) by using TogoWS REST "entry" service. # Same as Bio::TogoWS::REST.entry(database, ids, *arg). def togowsentry(database, ids, *arg) Bio::TogoWS::REST.entry(database, ids, *arg) end # Database search by using TogoWS REST "search" service. # Same as Bio::TogoWS::REST.search(database, term, *arg). def togowssearch(database, term, *arg) Bio::TogoWS::REST.search(database, term, *arg) end # Data format conversion by using TogoWS REST "convert" service. # Same as Bio::TogoWS::REST.convert(data, format_from, format_to). def togowsconvert(data, format_from, format_to) Bio::TogoWS::REST.convert(data, format_from, format_to) end end ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/seq.rb����������������������������������������������������������0000644�0000041�0000041�00000015035�12200110570�017730� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/seq.rb - plugin for biological sequence manipulations # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: seq.rb,v 1.21 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell private # Convert sequence to colored HTML string def htmlseq(str) if str.kind_of?(Bio::Sequence) seq = str else seq = getseq(str) end if seq.is_a?(Bio::Sequence::AA) scheme = Bio::ColorScheme::Taylor else scheme = Bio::ColorScheme::Nucleotide end html = %Q[<div style="font-family:monospace;">\n] seq.fold(50).each_byte do |c| case c.chr when "\n" html += "<br>\n" else color = scheme[c.chr] html += %Q[<span style="background:\##{color};">#{c.chr}</span>\n] end end html += "</div>\n" return html end def sixtrans(str) seq = getseq(str) [ 1, 2, 3, -1, -2, -3 ].each do |frame| title = "Translation #{frame.to_s.rjust(2)}" puts seq.translate(frame).to_fasta(title, 60) end end # Displays some basic properties of the sequence. def seqstat(str) max = 150 seq = getseq(str) rep = "\n* * * Sequence statistics * * *\n\n" if seq.moltype == Bio::Sequence::NA fwd = seq rev = seq.complement if seq.length > max dot = " ..." fwd = fwd.subseq(1, max) rev = rev.subseq(1, max) end rep << "5'->3' sequence : #{fwd.fold(70,20).strip}#{dot}\n" rep << "3'->5' sequence : #{rev.fold(70,20).strip}#{dot}\n" [ 1, 2, 3, -1, -2, -3 ].each do |frame| pep = seq.subseq(1, max+2).translate(frame).fold(70,20).strip rep << "Translation #{frame.to_s.rjust(2)} : #{pep}#{dot}\n" end rep << "Length : #{seq.length} bp\n" rep << "GC percent : #{seq.gc_percent} %\n" ary = [] seq.composition.sort.each do |base, num| percent = format("%.2f", 100.0 * num / seq.length).rjust(6) count = num.to_s.rjust(seq.length.to_s.length) ary << " #{base} - #{count} (#{percent} %)\n" end rep << "Composition : #{ary.join.strip}\n" rep << "Codon usage :\n" hash = Hash.new("0.0%") seq.codon_usage.sort.each do |codon, num| percent = format("%.1f%", 100.0 * num / (seq.length / 3)) hash[codon] = percent end rep << codontable(1, hash).output begin rep << "Molecular weight : #{seq.molecular_weight}\n" rescue rep << "Molecular weight : #{$!}\n" end begin rep << "Protein weight : #{seq.translate.chomp('*').molecular_weight}\n" rescue rep << "Protein weight : #{$!}\n" end else pep = seq if seq.length > max dot = " ..." pep = seq.subseq(1, max) end rep << "N->C sequence : #{pep.fold(70,20).strip}#{dot}\n" rep << "Length : #{seq.length} aa\n" names = Bio::AminoAcid.names ary = [] seq.composition.sort.each do |aa, num| percent = format("%.2f", 100.0 * num / seq.length).rjust(6) count = num.to_s.rjust(seq.length.to_s.length) code = names[aa] name = names[names[aa]] ary << " #{aa} #{code} - #{count} (#{percent} %) #{name}\n" end rep << "Composition : #{ary.join.strip}\n" begin rep << "Protein weight : #{seq.molecular_weight}\n" rescue rep << "Protein weight : #{$!}\n" end end rep << "//\n" puts rep return rep end # Displays a DNA sequence by ascii art in B-type double helix. # Argument need to be at least 16 bases in length. def doublehelix(str) seq = getseq(str) if seq.length < 16 warn "Error: Sequence must be longer than 16 bases." return end if seq.moltype != Bio::Sequence::NA warn "Error: Sequence must be a DNA sequence." return end pairs = [ [5, 0], [4, 2], [3, 3], [2, 4], [1, 4], [0, 3], [0, 2], [1, 0] ] seq.window_search(16, 16) do |subseq| pairs.each_with_index do |ij, x| base = subseq[x, 1] puts ' ' * ij[0] + base + '-' * ij[1] + base.complement + "\n" end pairs.reverse.each_with_index do |ij, x| base = subseq[x + 8, 1] puts ' ' * ij[0] + base.complement + '-' * ij[1] + base + "\n" end end end end class String def step(window_size) i = 0 0.step(self.length - window_size, window_size) do |i| yield self[i, window_size] end yield self[i + window_size .. -1] if i + window_size < self.length end def skip(window_size, step_size = 1) i = 0 0.step(self.length - window_size, step_size) do |i| yield [self[i, window_size], i + 1, i + window_size] end from = i + step_size to = [self.length, i + step_size + window_size].min yield [self[from, window_size], from + 1, to] if from + 1 <= to end def to_naseq Bio::Sequence::NA.new(self) end def to_aaseq Bio::Sequence::AA.new(self) end # folding both line end justified def fold(fill_column = 72, indent = 0) str = '' # size : allowed length of the actual text unless (size = fill_column - indent) > 0 warn "Error: indent > fill_column (indent is set to 0)" size = fill_column indent = 0 end 0.step(self.length - 1, size) do |n| str << ' ' * indent + self[n, size] + "\n" end return str end # folding with conscious about word boundaries with prefix string def fill(fill_column = 80, indent = 0, separater = ' ', prefix = '', first_line_only = true) # size : allowed length of the actual text unless (size = fill_column - indent) > 0 warn "Error: indent > fill_column (indent is set to 0)" size = fill_column indent = 0 end n = pos = 0 ary = [] while n < self.length pos = self[n, size].rindex(separater) if self[n, size].length < size # last line of the folded str pos = nil end if pos ary << self[n, pos+separater.length] n += pos + separater.length else # line too long or the last line ary << self[n, size] n += size end end str = ary.join("\n") str[0,0] = prefix + ' ' * (indent - prefix.length) if first_line_only head = ' ' * indent else head = prefix + ' ' * (indent - prefix.length) end str.gsub!("\n", "\n#{head}") return str.chomp end end ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/flatfile.rb�����������������������������������������������������0000644�0000041�0000041�00000004456�12200110570�020733� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/flatfile.rb - plugin for flatfile database # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: flatfile.rb,v 1.13 2007/04/05 23:45:11 trevor Exp $ # module Bio::Shell private def flatfile(filename) if block_given? Bio::FlatFile.auto(filename) do |flat| flat.each do |entry| yield flat.entry_raw end end else entry = '' Bio::FlatFile.auto(filename) do |flat| flat.next_entry entry = flat.entry_raw end return entry end end def flatauto(filename) if block_given? Bio::FlatFile.auto(filename) do |flat| flat.each do |entry| yield entry end end else entry = '' Bio::FlatFile.auto(filename) do |flat| entry = flat.next_entry end return entry end end def flatparse(entry) if cls = Bio::FlatFile.autodetect(entry) return cls.new(entry) end end def flatfasta(fastafile, *flatfiles) puts "Saving fasta file (#{fastafile}) ... " File.open(fastafile, "w") do |fasta| flatfiles.each do |flatfile| puts " converting -- #{flatfile}" Bio::FlatFile.auto(flatfile) do |flat| flat.each do |entry| header = "#{entry.entry_id} #{entry.definition}" fasta.puts entry.seq.to_fasta(header, 50) end end end end puts "done" end def flatindex(dbname, *flatfiles) begin dir = Bio::Shell.create_flat_dir(dbname) print "Creating BioFlat index (#{dir}) ... " bdb = format = options = nil Bio::FlatFileIndex.makeindex(bdb, dir, format, options, *flatfiles) puts "done" rescue warn "Error: Failed to create index (#{dir}) : #{$!}" end end def flatsearch(dbname, keyword) dir = Bio::Shell.find_flat_dir(dbname) unless dir warn "Error: Failed to open database (#{dbname})" return end entry = '' Bio::FlatFileIndex.open(dir) do |db| if results = db.include?(keyword) results.each do |entry_id| entry << db.search_primary(entry_id).to_s end else warn "Error: No hits found in #{dbname} (#{keyword})" end end return entry end end ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/das.rb����������������������������������������������������������0000644�0000041�0000041�00000002013�12200110570�017677� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/keggdas.rb - plugin for KEGG DAS # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id: das.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # module Bio class DAS def list_sequences result = "" self.get_dsn.each do |dsn| src = dsn.source_id self.get_entry_points(src).each do |ep| data = [src, ep.entry_id, ep.start.to_i, ep.stop.to_i, "# #{ep.description}"].join("\t") + "\n" puts data result += data end end return result end end end module Bio::Shell private # http://www.biodas.org/ # http://www.dasregistry.org/ def das(url = nil) if url @das = Bio::DAS.new(url) else @das ||= keggdas end end def keggdas(url = "http://das.hgc.jp/cgi-bin/") das(url) end def ensembl(url = "http://das.ensembl.org/") das(url) end def wormbase(url = "http://www.wormbase.org/db/") das(url) end end ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/plugin/entry.rb��������������������������������������������������������0000644�0000041�0000041�00000007235�12200110570�020304� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# # = bio/shell/plugin/entry.rb - extract entry and sequence # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama <k@bioruby.org> # License:: The Ruby License # # $Id:$ # module Bio::Shell private # Read a text file and collect the first word of each line in array def readlist(filename) list = [] File.open(filename).each do |line| list << line[/^\S+/] end return list end # Obtain a Bio::Sequence::NA (DNA) or a Bio::Sequence::AA (Amino Acid) # sequence from # * String -- "atgcatgc" or "MQKKP" # * IO -- io = IO.popen("gzip -dc db.gz") (first entry only) # * "filename" -- "gbvrl.gbk" (first entry only) # * "db:entry" -- "embl:BUM" (entry is retrieved by the ent method) def getseq(arg) seq = "" if arg.kind_of?(Bio::Sequence) seq = arg elsif arg.respond_to?(:gets) or File.exists?(arg) ent = flatauto(arg) elsif arg[/:/] ent = getobj(arg) else tmp = arg end if ent.respond_to?(:seq) tmp = ent.seq elsif ent.respond_to?(:naseq) #seq = ent.naseq tmp = ent.naseq elsif ent.respond_to?(:aaseq) #seq = ent.aaseq tmp = ent.aaseq end if tmp and tmp.is_a?(String) and not tmp.empty? #seq = Bio::Sequence.auto(tmp).seq seq = Bio::Sequence.auto(tmp) end return seq end # Obtain a database entry from # * IO -- IO object (first entry only) # * "filename" -- local file (first entry only) # * "db:entry" -- local BioFlat, OBDA, EMBOSS, KEGG API def getent(arg) entry = "" db, entry_id = arg.to_s.strip.split(/\:/, 2) # local file if arg.respond_to?(:gets) or File.exists?(arg) puts "Retrieving entry from file (#{arg})" entry = flatfile(arg) # BioFlat in ./.bioruby/bioflat/ or ~/.bioinformatics/.bioruby/bioflat/ elsif Bio::Shell.find_flat_dir(db) puts "Retrieving entry from local BioFlat database (#{arg})" entry = flatsearch(db, entry_id) # OBDA in ~/.bioinformatics/seqdatabase.ini elsif obdadbs.include?(db) puts "Retrieving entry from OBDA (#{arg})" entry = obdaentry(db, entry_id) else # EMBOSS USA in ~/.embossrc begin str = entret(arg) rescue SystemCallError str = '' end if $? and $?.exitstatus == 0 and str.length != 0 puts "Retrieving entry from EMBOSS (#{arg})" entry = str # via Internet else case db.to_s.downcase when 'genbank', 'gb', 'nuccore', 'indsc' # NCBI puts "Retrieving entry from NCBI eUtils" entry = efetch(entry_id) when 'embl', 'emb', /\Aembl/, /\Auni/, 'sp', /\Aensembl/ # EBI puts "Retrieving entry from EBI Dbfetch" db = 'embl' if db == 'emb' db = 'uniprotkb' if db == 'uniprot' or db == 'sp' entry = biofetch(db, entry_id) when 'ddbj', 'dbj', 'dad' # TogoWS REST puts "Retrieving entry from TogoWS" db = 'ddbj' if db == 'dbj' entry = togowsentry(db, entry_id) else togodblist = Bio::TogoWS::REST.entry_database_list rescue [] if togodblist.include?(db) then # TogoWS REST puts "Retrieving entry from TogoWS" entry = togowsentry(db, entry_id) else # KEGG API at http://www.genome.jp/kegg/soap/ puts "Retrieving entry from KEGG API (#{arg})" entry = bget(arg) end end end end return entry end # Obtain a parsed object from sources that ent() supports. def getobj(arg) str = getent(arg) flatparse(str) end end �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/�����������������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�016423� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/����������������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�017720� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/��������������������������������������������������0000755�0000041�0000041�00000000000�12200110570�021401� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/������������������������������������������0000755�0000041�0000041�00000000000�12200110570�023054� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/�������������������������������0000755�0000041�0000041�00000000000�12200110570�025225� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/�����������������������0000755�0000041�0000041�00000000000�12200110570�026700� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb���0000644�0000041�0000041�00000002706�12200110570�032753� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������class BiorubyGenerator < Rails::Generator::Base def manifest record do |m| m.directory 'app/controllers' m.directory 'app/helpers' m.directory 'app/views/bioruby' m.directory 'app/views/layouts' m.directory 'public/images/bioruby' m.directory 'public/stylesheets' m.file 'bioruby_controller.rb', 'app/controllers/bioruby_controller.rb' m.file 'bioruby_helper.rb', 'app/helpers/bioruby_helper.rb' m.file '_methods.rhtml', 'app/views/bioruby/_methods.rhtml' m.file '_classes.rhtml', 'app/views/bioruby/_classes.rhtml' m.file '_modules.rhtml', 'app/views/bioruby/_modules.rhtml' m.file '_log.rhtml', 'app/views/bioruby/_log.rhtml' m.file '_variables.rhtml', 'app/views/bioruby/_variables.rhtml' m.file 'commands.rhtml', 'app/views/bioruby/commands.rhtml' m.file 'history.rhtml', 'app/views/bioruby/history.rhtml' m.file 'index.rhtml', 'app/views/bioruby/index.rhtml' m.file 'bioruby.rhtml', 'app/views/layouts/bioruby.rhtml' m.file 'spinner.gif', 'public/images/bioruby/spinner.gif' m.file 'bioruby-gem.png', 'public/images/bioruby/gem.png' m.file 'bioruby-link.gif', 'public/images/bioruby/link.gif' m.file 'bioruby-bg.gif', 'public/images/bioruby/bg.gif' m.file 'bioruby.css', 'public/stylesheets/bioruby.css' end end end ����������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/�������������0000755�0000041�0000041�00000000000�12200110570�030676� 5����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������././@LongLink���������������������������������������������������������������������������������������0000000�0000000�0000000�00000000147�00000000000�011567� L����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.p0000644�0000041�0000041�00000015447�12200110570�033313� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PNG  ��� IHDR���@���@���iq���bKGD������ pHYs�� �� ~���tIME85K^��IDATxi_UyZkϐsN晈A@'(ֶZ/ToPyPkѪuAZA)$$'g{5Q_1\kz?C^&r[wqë<UkK˦DYwklZim 4Q攫Y'g׺wYmw0םˮhO~݀衻wEcYSɗ鉩iLSЛiUϧب:h4.Ch*`_D"U)؝698>~wK{[;wYT먞(V.RCs ) =N[ˬf[I&Ei32%`]yִph32*QIJ@Zyђ>y>~+9[]+8)W1Cy5(aK*),Y�lI$A'6P1O$堩% Od )cX7^rUG3zꇯ~{OE,1EAQ �iim22h 5e'"44p6z0[hU!'8B:c9}y'sj_}_޼^@ҚVOvy3ɸ#ThyuDѣ# !�M`9a:d WL9G;x].b ֏|Wݰ}/}[_~Y8JkJs괙Ēk>+H"GUJbP2'hE<S*CK5.cQM)%By{7<#;Z0Ri RIRqB9  B(P1 <h4BSy/ؠ@e-LgP" !SU ,20A6yk|k=[FSAC-8%QE %1%:=XK#:J&!gGm י `i(MDlCS xmǿ3J& #9 <:iDZ+te)=Ic`izG1X*hZhxG!t㵰D ( 8  -HМGVu3i{C0֗ f7rdb  ^ *1bCDcb�"xؐD3yNI+bx"<j_(PNb 8=#"L@K B^RLSsh,^q"Fũ &JHD@#W<F$S0D1IJS JQ"3P8HD`Di:!GJAKm︩ÏU76QWnµw4vdHU18ó=kP= A9Rd>`4f(d;B-XlJܯ|Y$ҌhiD)tpa. m#lA$~h‡{N^Wٻwͣ8C2n<Sk ɨ9Ydfڦi3EN O ˈIA#9T(pǹ$_t1_#ؠF(IJqfN|SM1f-?tqMpse8?z&{5`>k S,с-YKǕ4w0yc,tĢ!q4U<y%hv 0X٤k>_o o.u1)W4d@ t,(u_T(Pk^b4u \%kv%# %$B)ܷͨ=Z c.g*d ".f&/t=$>ocG?5Ǣ 5+VpwoK7.6v?$ZQԊtXhR4G G #lM{}ۮP}C*=PU$:0e? Kk|I lvRa1W/Jq'sg]|1q<G{w]|feP^Q B4VAd8ԚS$aB[l3`߲k0O'#&t/H[�)Mh@9&XeK\ ,[v,p s{Ò#ے�ti|l6l7=1\P |`ӜIT O IC=x L()8\k09 ki"EUKLt]ԴKtw;iGuq_\l9~,~8Ix K/߼󮼂{{[yb*eQܨъ8R(j*{CoԌ q$l-|&kKNȜa-q'g~[̱G+w392r ```pk(x5vR>痲FTM2eo&A X]JI^7D 2j[b>V[TIBP1>Y:_z3:c %@.{;|ڧDgW>U>f9Mo!P"h\8S҆3,alnZӸk06xt3WF̦<SxVh> r!bnjO9(@!@PB5O~o)cv F8kױwX{yubN|  Vȳ@Rc 3kvHOuc*fu ]!AA' .̼+W9$ xGw" {;keGS|1,XvS%U%R7ϴy8eļVWIV?]r 8^ ;i xZSl4;mЊʟV^tye; ʓ۔NS͈`4`8OIםǜUeVƶkQG6)ŽT\`)'1ny:0x^](=ϙ %| ! J&i4a,kg&fU^kKF( ̋+ &s)8hPleoqǮ _ME4MluP,�21g|ዟ̳៖JC<H37\'*ORHh)Q=6ͼC)4u Ru'#l-[ KX63Jc0"ZaK~0IbOK{r} Ccjr9Ŋ<wѴx@bcO{5%W^MD 4# r<uql1NwEugxodbhk' ,S^y6yt5䧜A0GM__(EoG :wD:X,"Fk8"a5wng��M:B A)CS'Pc?',c> 3mn7ß,YpXiOɵ247n}�Isw[ 8g%3yEh4ԧwi_Y�Zq#7j\9ݢ;yU) fӿH;عxus\40C|K8W>;{WJ?wh[E,B7$xQIdL9ѠugO_毟e Qjp89N~'}E]}oyNy÷~)e!A(xM5y{rsW>e̶oa?`_gQ,bQP`"k‘i5 O9zqsg?į9u>q޵>\UOG$7  T$"l9/F6+HW%,ݣ<Wq_`ţ^E=PĸRL{w(n6Dh(I{4s _ 6o}b_gYwK,S:EQ 4FԯJU'2*|t&&KQ/+W3y= &:&!1D/CVj4R*?vGˀ<pw &IB]D/JVDpDHi+Kb?Q=$i+"N| bB'0s GZ&&m[bqJ^7~ƲAǽ;vϦ@b=QI)EݧTBe'2%T7FV" \=eG?:1},X{FkTP(( �qb(ꈺM)tU~v6M>uZ|譵;_�C. 496Q̶LJ OY�z'u=۾M}YWT. ;bɇ%Nlˬ(Q3n Y5 !`ADZ;3ŋ.d#7Ms�Eg4N&*4鄌Me##fȏ1qeAkz9dw=D^ZC;ɹo~àd!)mD 9QJI(p 6%7KM>uo۸npE8dзy& Q uh3Ɏs=*hy+`r&7ld;zkUڿ;Oپe3[7>tAk-ĊbO@%* t W_n3&@> qW8n&q7G"UQX+(.x/3_810n-Q]tEJ9(fѢBZ B=BDdiF-Ju1EcѼŇOϦg"�{[7m?g9% băR gʥtQ2y2! xG4<%V`@}EL6YbyF=ު~\K,}q|y]Sy'Fl &I8JS03_K (5IE+E`[B (2 BZX  M3ڴ#֑[Kwc=W-�7<qWJfߡr"%<iᨊ2r4CN ^ ; hh"j4cmha0/)3R+JPS*e4Ji8Zt:s/{C<�7^vl˶6M,S I9Ng' Sq PhQ3w%L8)9@O9hyLؔTD@$ARҜysn{_tGRęxđW TFD3jAX\b DA4[!" 3ų=fGhaE6{WPUP.iM[Fy[TYNĄbeZ]lu�vyb_%K-H$򐤬vm2s|ҐȁDRU薄j0IPtm *2rfFus豕~%yќ;㌉;$PtɆRob'q ΓXYh3d"TL%"#lh4HPV lw_RoyE�Cwk^hu~tZE< Ia19^ьdB>S`\}1qt^)O!mڌ1>9 F9U1�˫WOW۵k۹Ftg^gUHizLꨐP&H.I^:ls-JNNUaƼg Q6';t;޼;~[�_~j2{du)P 5ca m `8tȕ OR3b͘19$*}'~g=&1EQ:4qFDZ 9Ԙ-]M-.AT)u$0{.}-�>]Y SkC'Y[βhR0be;}`88:Dh\QW +wsu;/�'}y셧{O]y&IX"Tz=DT21J+D)ihM{z}Ĩ t+aiL-{-~7�O}`/Y~G1 ǔʼnPKXN%(92gёaʦ`S`PJtcNWgo~o4MaC(byHb"ILJ(šQVcD?1>3�{+qi=+뵈\h|BUZH |NkcUhKXC_P�]n۟SSg7K$7$􇈖v| Uga"*O0!IGUyx �ck;n=}Y[DAbMi! 4u,x^(n�~]xʿo;r<pR9ҼI(Feuh2;`gU7}:;3�`O}w6nֿDt+ޒ)V~R'Z{ wj�C7divgWroQqjjZ~(utvo8\LN͹E-MLfg{ @JjyNm����IENDB`�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������././@LongLink���������������������������������������������������������������������������������������0000000�0000000�0000000�00000000146�00000000000�011566� L����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gi0000644�0000041�0000041�00000002627�12200110570�033267� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������GIF89a ���a"9g$h%#K"Gd##In&/53 C!Hb"$Js*u+"H=|,f#}-DD@@= D"Hl'978{+.97!Gj&r)0:.< Ew* E:6=22 F!Gu)-o'26m&@v+k'.?0 EA65A1y+p)"I8x*/18e"343:e$<@1e#<p'.!H0|.12z,"G!Dz,b#<:k&#Hz*#I?2n%8t)>!F>D> G EA B;m% B!Ft*o(A@"HB7B2=f$h#h& C#I2B$L@i& F>9;:$L���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������!�����,���� ���5 H*\ȰÇ#JHŋ3jȱǏ CIɓ#=XI%Gbhd ,Р%b„ ԨAiȐ/.H@E X4*c{@tI!rH=aq$H9u6sE YHt 0TI$T@9�&DC%J)#Ǝ RO1bdk:<BFJ3Р )8"$/#Ft+,HCc̰Ią&>|D 9ң@D 9!�}<Cͤ'02,"(RVhp`�d m ($h(,0(4h8<fҡbHgDh@ZX` ("~q!|\^zHyE^qdžc\W`Hgt1Er_�qN<oY&naH"Pk 6Ej VhGgAae daF| $"`tuzmPVQ W bGVQT@%KQEyGPS;@MF2K@$(�,l'�;���������������������������������������������������������������������������������������������������������././@LongLink���������������������������������������������������������������������������������������0000000�0000000�0000000�00000000150�00000000000�011561� L����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rh0000644�0000041�0000041�00000000274�12200110570�033343� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<div id="variables"> <ul> <% local_variables.each do |var| %> <li><%= link_to_remote var, :update => "index", :url => {:action => "evaluate", :script => var} %></li> <% end %> </ul> </div>������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml���0000644�0000041�0000041�00000001462�12200110570�032671� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<div id="log_<%= @number %>" class="log"> <div class="input"> Input: [<%= link_to_remote @number, :url => {:action => "reload_script", :number => @number} %>] <pre class="script"> <%=h @script %> </pre> <div class="output"> Result: [<%= link_to_remote "methods", :url => {:action => "list_methods", :number => @number} %>] [<%= link_to_remote "classes", :url => {:action => "list_classes", :number => @number} %>] [<%= link_to_remote "modules", :url => {:action => "list_modules", :number => @number} %>] <div id="methods_<%= @number %>" class="methods"></div> <div id="classes_<%= @number %>" class="classes"></div> <div id="modules_<%= @number %>" class="modules"></div> <pre class="result"> <%=h @result %> </pre> <% if @output %> Output: <pre class="output"> <%=h @output %> </pre> <% end %> </div> </div> </div> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css��0000644�0000041�0000041�00000011466�12200110570�033073� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* body */ body { margin: 0; color: #555555; background: url("/images/bioruby/bg.gif") repeat-y center; font-family: "trebuchet ms", verdana, arial, helvetica, sans-serif; font-size: 12px; } div#content { width: 750px; height: auto; margin: 0 auto 0 auto; text-align: left; } /* title */ div#title { width: 550px; padding-right: 200px; margin-bottom: 20px; text-align: left; background: url("/images/bioruby/gem.png") no-repeat left bottom; } div#title .titletop { color: #736451; font-size: 30px; font-weight: normal; text-align: left; text-indent: 70px; margin: 0; padding: 0; padding-top: 20px; margin-bottom: 10px; } div#title .titlesub { color: #000000; font-size: 15px; font-weight: normal; text-align: left; text-indent: 70px; margin: 0; padding: 0; border-bottom: 1px solid #eeeeee; } /* main */ div#main { width: 550px; background-color: #ffffff; padding-top: 0px; padding-left: 10px; } div#notice { background-color: #fcc; border: 1px solid #f00; } div#notice p { margin: 0; padding: 10px; } pre { color: #6e8377; background-color: #eaedeb; border-color: #6e8377; border-style: dashed; border-width: 1px; padding: 5px; width: 500px; overflow: auto; } div.log { width: 500px; margin-top: 15px; padding-top: 5px; border-top: 1px dotted #333333; } div.log div.input pre.script { background-color: #ffffeb; border-style: solid; } div.log div.output div.methods { padding: 5px; background-color: #ffffdd; border: 1px solid #ffcc00; } div.log div.output div.classes { padding: 5px; background-color: #ccffcc; border: 1px solid #00ff00; } div.log div.output div.modules { padding: 5px; background-color: #ffcccc; border: 1px solid #ff0000; } div.log div.output pre.result { border-style: dashed; } div.log div.output pre.output { border-style: dashed; } div.log hr.log { border-style: dotted none none none; border-top-width: 1px; border-color: #6e8377; width: 200px; height: 1px; } /* side */ div#side { width: 150px; float: right; margin-top: 20px; text-align: left; font-size: 12px; color: #e44268; } div#side div.title { font-weight: normal; color: #e44268; text-align: left; border-width: 0px 0px 1px 0px; border-bottom: 1px solid #e44268; } div#side a:link { color: #ffffff; text-decoration: none; } div#side a:visited { color: #ffffff; text-decoration: none; } div#side a:hover { color: #cccccc; text-decoration: underline; } div#side ol,ul { margin: 10px; padding-left: 10px; } div#side li { color: #e44268; } div#side img { padding: 5px; /* centering */ display: block; margin-left: auto; margin-right: auto; border: 0px; } /* history */ div#history { width: 500px; } div#history div.histtime { background-color: #eaedeb; padding: 5px; } div#history div.histline { background-color: #ffffeb; padding: 5px; font-family: monospace; white-space: pre; } /* command */ div#command { width: 500px; } /* image */ img { } /* em */ em { color: #6e8377; font-style: normal; } /* link */ a { text-decoration: none; } a:link { color: #669933; } a:visited { color: #669933; } a:hover { text-decoration: underline; } /* header */ h1 { font-size: 180%; color: #ffffff; background-color: #6e8377; line-height: 64px; text-align: left; padding-left: 20px; } h2 { font-size: 160%; color: #6e8377; border-color: #b9c3be; border-style: dashed; border-width: 0px 0px 1px 0px; } h3 { font-size: 140%; color: #6e8377; border-color: #b9c3be; border-style: dotted; border-width: 0px 0px 1px 0px; } h4 { font-size: 130%; color: #6e8377; border-color: #b9c3be; border-style: solid; border-width: 0px 0px 1px 0px; } h5 { font-size: 120%; color: #6e8377; } h6 { font-size: 110%; color: #6e8377; } /* list */ dt { color: #6e8377; border-color: #b9c3be; border-style: dashed; border-width: 1px; padding: 5px; } ul { color: #6e8377; } /* table */ table { border: 1px solid #cccccc; border-collapse: collapse; } th { vertical-align: top; padding: 5px; } td { vertical-align: top; padding: 5px; } div#method_list table { border: none; } /* form */ input { background-color: #FFFFFF; padding: 2px; font-size: 10px; color: #666666; border: 1px solid #611022; margin-bottom: 2px; } input[type=submit] { background-color: #FFFFFF; padding: 2px; font-size: 10px; color: #ffffff; border: 1px solid #611022; background-color: #E44268; margin-bottom: 2px; } /* textarea */ textarea { background-color: #eaedeb; font-family: monospace; font-size: 12px; overflow: auto; width: 500px; padding: 5px; } /* blockquote */ blockquote { color: #6e8377; background-color: #eaedeb; border-color: #6e8377; border-style: dashed; border-width: 1px; } /* media */ @media print { div#main { margin-left: 0px; } div#side { display: none; } } @media screen { div#main { margin-left: 0px; } div#side { display: block; } } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml��0000644�0000041�0000041�00000001626�12200110570�033062� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<div id="console"> <%- if flash[:notice] -%> <div id="notice"><p><%= flash[:notice] %></p></div><br> <%- end -%> <% form_remote_tag(:url => {:action => "evaluate"}, :position => "top", :before => %(Element.show('spinner')), :complete => %(Element.hide('spinner'))) do %> <img id="spinner" src="/images/bioruby/spinner.gif" style="display:none"> <b>BioRuby script:</b> <br/> <textarea id="script" name="script" rows=10 cols=80></textarea> <br/> <input type="submit" value="Evaluate"> <input type="reset" value="Clear">    Show [ <%= link_to_remote "All", :url => {:action => "results", :limit => 0} %> | <%= link_to_remote "Last 5", :url => {:action => "results", :limit => 5} %> | <%= link_to_remote "Previous", :url => {:action => "results", :limit => 1} %> ] or <%= link_to "Hide", :action => "index" %> results<br/> <% end %> </div> <div id="logs"> </div> ����������������������������������������������������������������������������������������������������������././@LongLink���������������������������������������������������������������������������������������0000000�0000000�0000000�00000000151�00000000000�011562� L����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helpe0000644�0000041�0000041�00000002205�12200110570�033450� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������module BiorubyHelper include Bio::Shell def project_workdir if Bio::Shell.cache[:savedir].match(/\.bioruby$/) Bio::Shell.cache[:workdir] else Bio::Shell.cache[:savedir] end end def have_results Bio::Shell.cache[:results].number > 0 end def local_variables eval("local_variables", Bio::Shell.cache[:binding]) - BiorubyController::HIDE_VARIABLES end def render_log(page) page.insert_html :top, :logs, :partial => "log" page.replace_html "variables", :partial => "variables" page.hide "methods_#{@number}" page.hide "classes_#{@number}" page.hide "modules_#{@number}" end def reference_link(class_or_module) name = class_or_module.to_s case name when /Bio::(.+)/ path = $1.split('::').join('/') url = "http://bioruby.org/rdoc/classes/Bio/#{path}.html" when /Chem::(.+)/ path = $1.split('::').join('/') url = "http://chemruby.org/rdoc/classes/Chem/#{path}.html" else path = name.split('::').join('/') url = "http://www.ruby-doc.org/core/classes/#{path}.html" end return "<a href='#{url}'>#{name}</a>" end end �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif��0000644�0000041�0000041�00000003006�12200110570�033042� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������GIF89a����ŽrrrcccRRRBBB111!!!������������������������������������������������! NETSCAPE2.0���!��,�������v $B(BB##(<M DP(^@ Pd"UP#ł\;1 ޞuh@${,3 _# d53"s5e!�!��,�������b $9.b# N 8C̢qH0@ J`D<H!@"_FsTX@a!3^(! f*F~"()VY#!�!��,�������_ $qKL*: uP#A`�!&8<$0<!>N"EZQCm@ ={$Ce)@e{!�!��,�������a $A"@"* 1&*:D.+`r4Bh 5X YA% Sfb'\L "]SWB]}(!�!��,�������G $q(4b3 Z4 0´*G2RQP&R‘�6Rx)U( 2̋6u�!��,�������o $A(⁈{.B"D(Q,Dc'H$B8)ȑ� 9L>OPUdXh) %_v# '3wz)#!�!��,�������_ $ HYG0CKdHF`J8 AX!b$0D 7�)/azSSV p$!�!��,������] $BHb1ڢD®0B 0 v+#BٌbݤFaaA&.X0@30.} Q+> R!�!��,�������c $PxB +*-kd+P )!L ?'IpJGjPh`Bb14"`Q# } No "tI+ZI!�!��,������\ $P`H* Apx80a V  AU DA 8 %B Be PDY00!�!��,������\ $$I>Q] (d2!8 r^e,)D?!х0 ԅiH0^@$gck0 _TL"q(!�! ��,�������d $dI`ӬBB 7m B7:0(P!X鲪 @@Ra KjD(2E {$ft5C%!�;��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������././@LongLink���������������������������������������������������������������������������������������0000000�0000000�0000000�00000000146�00000000000�011566� L����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtm0000644�0000041�0000041�00000000147�12200110570�033370� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������[ <%= @class %> ] <div id="class_list"> <%= @classes.map{ |x| reference_link(x) }.join(" > ") %> </div>�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml0000644�0000041�0000041�00000000333�12200110570�033446� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<div id="history"> <h2>Command history</h2> <% @history.each do |line| %> <% if line[/^# /] %> <div class="histtime"><%= line %></div> <% else %> <div class="histline"><%= line %></div> <% end %> <% end %> </div> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������././@LongLink���������������������������������������������������������������������������������������0000000�0000000�0000000�00000000146�00000000000�011566� L����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtm0000644�0000041�0000041�00000000216�12200110570�033372� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<div id="command"> <h2>BioRuby shell commands</h2> <ul> <% @bioruby_commands.each do |cmd| %> <li><%= cmd %></li> <% end %> </ul> </div>����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������bio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml0000644�0000041�0000041�00000002316�12200110570�033423� 0����������������������������������������������������������������������������������������������������ustar �www-data������������������������www-data���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>BioRuby shell on Rails <%= stylesheet_link_tag "bioruby.css" %> <%= javascript_include_tag :defaults %>
    Project
    • <%= link_to "#{File.basename(project_workdir)}", "file://#{project_workdir}" %>
    Functions
    • <%= link_to "Console", :action => "index" %>
    • <%= link_to "History", :action => "history" %>
    • <%= link_to "Commands", :action => "commands" %>
    Local variables
    <%= render :partial => "variables" %>

    BioRuby shell on Rails

    Web interface for the BioRuby library

    <%= yield %>
    ././@LongLink0000000000000000000000000000015000000000000011561 Lustar rootrootbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gifbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.0000644000004100000410000000530612200110570033311 0ustar www-datawww-dataGIF89ad>::zj::zrr~::~ffV>~ZZj**~rrFFVVʒrrj&&*j^^RRZZ::jj>..NNzަzzBBjJJZZzJJ*rrVRR>&&z**ZZff~BBbbV::jjھ*&&z66zznnjRR~JJz^^22FFFFV**>""z""rrzzffjBBzffz22zRR^^z vvnn*~~zJJVNNVVRRNNjj>66 zvv>zBB~NNz::>>>BBzVVjbb~zzj..>z..VFF^^JJzj66>22V22~jj~VVzzzjZZ~~~zzzbbzjjzNNzFF~**z&&z>>zZZ>***bbjjj**""~^^***V..~FFJJbb~22>>~RRj22jffNN~nn~..jNNV>>jVVj>>~~vvjFF~66!Created with The GIMP! ,dHp Y8h+W3"2%"!B2޸*ԅdI^%+V bfV2dDٙ'Ϥ JA.'JƎ/ iB)Wdr̙)hEgI>J@ xRphp4JYgMܹ#/o2w颦3.kmˏeX=5:rE۸sf| Q\d_"K,3f͞?];Z/Cv+k^o{ݼ xa䇙7Xɕqh}xulg{_gWq%(b Ec7,7,$PM4VE㈎|L3 d;2/P@@ R%B^{u!`%q~JXI7u H`gفƝ#_`Dkpra]hpW\rY@"UWrgYhtZj6GM(ndޖ`t%}h$(b> cM0gxZv޹2)! pDЌj_ VB#i x1fp~&Nhc3B6S ͪs M`1ʜ!YhgەVi :M^^b  'a. evlڨƧlzH/x t(@ 6/j|&'w_<ʮ6!m^n2f] _;lEݟ5etVwݣ1-Fa~^6 '$x fMXyB-tt0kDҠEf*4@]4` 8@*m%D*KCڶ;AwfaZ S[,\u@&`Ac`  ЀU4#E8 @/XaD0A6@ Z8@/ȀH:a8P vPEN j#C* FX eVlk_N TbqJHQ;YbM /c@d$rFJ\LkmBzBd'WQ}U_z!: x@xڹ!h֘g4"rLTKXЃZӇ- p@ը H37OGԝ/D e)<'?xA"2DDgH`j'hC f Hrh'0vXxJB8g  I u@d 8 hdPCpݩbR0VGQnln4*W&v58 =FDBAӃ8L}AAÂ&Ї405k3J۠*XEj:±!l :fsCe 4K80AKT4I4ZXV3 P \C=е }|0 MLd>& g8h4Z@T &5Q*.mlh_n1Wz7 zi; > D{P@Mx7o,sUu\ɍ;VNojI{V*Z%>L"<L8d-tM"lkFf*ճ3R>9chXŢ iI.H3mMs:u@;././@LongLink0000000000000000000000000000015500000000000011566 Lustar rootrootbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rbbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_contr0000644000004100000410000000702412200110570033504 0ustar www-datawww-dataclass BiorubyController < ApplicationController HIDE_METHODS = Object.methods + [ "singleton_method_added" ] HIDE_MODULES = [ Base64::Deprecated, Base64, PP::ObjectMixin, Bio::Shell, ] HIDE_MODULES << WEBrick if defined?(WEBrick) HIDE_VARIABLES = [ "_", "irb", "_erbout", ] SECURITY_NOTICE = "For security purposes, this functionality is only available to local requests." def index unless local_request? flash[:notice] = SECURITY_NOTICE end end def evaluate if local_request? begin @script = params[:script].strip # write out to history Bio::Shell.store_history(@script) # evaluate ruby script @result = eval(@script, Bio::Shell.cache[:binding]) # *TODO* need to handle with output of print/puts/p/pp etc. here @output = nil rescue @result = $! @output = nil end else @result = SECURITY_NOTICE @output = nil end @number = Bio::Shell.cache[:results].store(@script, @result, @output) render :update do |page| render_log(page) end end def list_methods number = params[:number].to_i script, result, output = Bio::Shell.cache[:results].restore(number) @class = result.class @methods = (result.methods - HIDE_METHODS).sort render :update do |page| page.replace_html "methods_#{number}", :partial => "methods" page.visual_effect :toggle_blind, "methods_#{number}", :duration => 0.5 end end def list_classes number = params[:number].to_i script, result, output = Bio::Shell.cache[:results].restore(number) class_name = result.class @class = class_name @classes = [] loop do @classes.unshift(class_name) if class_name == Object break else class_name = class_name.superclass end end render :update do |page| page.replace_html "classes_#{number}", :partial => "classes" page.visual_effect :toggle_blind, "classes_#{number}", :duration => 0.5 end end def list_modules number = params[:number].to_i script, result, output = Bio::Shell.cache[:results].restore(number) @class = result.class @modules = result.class.included_modules - HIDE_MODULES render :update do |page| page.replace_html "modules_#{number}", :partial => "modules" page.visual_effect :toggle_blind, "modules_#{number}", :duration => 0.5 end end def reload_script number = params[:number].to_i script, result, output = Bio::Shell.cache[:results].restore(number) render :update do |page| page.replace_html :script, script end end def results if Bio::Shell.cache[:results].number > 0 limit = params[:limit].to_i max_num = Bio::Shell.cache[:results].number min_num = [ max_num - limit + 1, 1 ].max min_num = 1 if limit == 0 render :update do |page| # delete all existing results in the current DOM for clean up page.select(".log").each do |element| #page.hide element page.remove element end # add selected results to the current DOM min_num.upto(max_num) do |@number| #page.show "log_#{@number}" @script, @result, @output = Bio::Shell.cache[:results].restore(@number) if @script render_log(page) end end end end end def commands @bioruby_commands = Bio::Shell.private_instance_methods.sort end def history @history = File.readlines(Bio::Shell.history_file) end end ././@LongLink0000000000000000000000000000014600000000000011566 Lustar rootrootbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtmlbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtm0000644000004100000410000000015612200110570033403 0ustar www-datawww-data[ <%= @class %> ]
    <%= @modules.map {|x| reference_link(x) }.sort.join("
    ") %>
    ././@LongLink0000000000000000000000000000014600000000000011566 Lustar rootrootbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtmlbio-1.4.3.0001/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtm0000644000004100000410000000032712200110570033376 0ustar www-datawww-data[ <%= @class %> ]
    <%- step = @methods.size / 4 + 1 -%> <%- 0.step(@methods.size, step) do |i| -%> <%- end -%>
    <%= @methods[i, step].join("
    ") %>
    bio-1.4.3.0001/lib/bio/shell/core.rb0000644000004100000410000003123612200110570016573 0ustar www-datawww-data# # = bio/shell/core.rb - internal methods for the BioRuby shell # # Copyright:: Copyright (C) 2005, 2006 # Toshiaki Katayama # License:: The Ruby License # # $Id:$ # module Bio::Shell::Core SHELLDIR = "shell" DATADIR = "data" SESSION = File.join(SHELLDIR, "session") PLUGIN = File.join(SHELLDIR, "plugin") SCRIPT = File.join(SHELLDIR, "script.rb") CONFIG = File.join(SESSION, "config") OBJECT = File.join(SESSION, "object") HISTORY = File.join(SESSION, "history") BIOFLAT = File.join(DATADIR, "bioflat") MARSHAL = [ Marshal::MAJOR_VERSION, Marshal::MINOR_VERSION ] MESSAGE = "...BioRuby in the shell..." ESC_SEQ = { :k => "\e[30m", :black => "\e[30m", :r => "\e[31m", :red => "\e[31m", :ruby => "\e[31m", :g => "\e[32m", :green => "\e[32m", :y => "\e[33m", :yellow => "\e[33m", :b => "\e[34m", :blue => "\e[34m", :m => "\e[35m", :magenta => "\e[35m", :c => "\e[36m", :cyan => "\e[36m", :w => "\e[37m", :white => "\e[37m", :n => "\e[00m", :none => "\e[00m", :reset => "\e[00m", } def colors ESC_SEQ end def shell_dir File.join(@cache[:savedir], SHELLDIR) end def data_dir File.join(@cache[:savedir], DATADIR) end def session_dir File.join(@cache[:savedir], SESSION) end def plugin_dir File.join(@cache[:savedir], PLUGIN) end def script_file File.join(@cache[:savedir], SCRIPT) end def script_dir File.dirname(script_file) end def config_file File.join(@cache[:savedir], CONFIG) end def object_file File.join(@cache[:savedir], OBJECT) end def history_file File.join(@cache[:savedir], HISTORY) end def bioflat_dir File.join(@cache[:savedir], BIOFLAT) end def ask_yes_or_no(message) loop do STDERR.print "#{message}" answer = gets if answer.nil? # readline support might be broken return false elsif /^\s*[Nn]/.match(answer) return false elsif /^\s*[Yy]/.match(answer) return true else # loop end end end end module Bio::Shell::Ghost include Bio::Shell::Core # A hash to store persistent configurations attr_accessor :config # A hash to store temporal (per session) configurations attr_accessor :cache ### save/restore the environment def configure(savedir) @config = {} @cache = { :savedir => savedir, :workdir => Dir.pwd, } create_save_dir load_config load_plugin end def load_session load_object unless @cache[:mode] == :script load_history opening_splash open_history end end def save_session unless @cache[:mode] == :script closing_splash end if create_save_dir_ask #save_history # changed to use our own... close_history save_object save_config end #STDERR.puts "Leaving directory '#{@cache[:workdir]}'" end ### directories def create_save_dir create_real_dir(session_dir) create_real_dir(plugin_dir) create_real_dir(data_dir) end def create_save_dir_ask if File.directory?(session_dir) @cache[:save] = true end unless @cache[:save] if ask_yes_or_no("Save session in '#{@cache[:workdir]}' directory? [y/n] ") create_real_dir(session_dir) create_real_dir(plugin_dir) create_real_dir(data_dir) create_real_dir(bioflat_dir) @cache[:save] = true else @cache[:save] = false end end return @cache[:save] end def create_real_dir(dir) unless File.directory?(dir) begin STDERR.print "Creating directory (#{dir}) ... " FileUtils.makedirs(dir) STDERR.puts "done" rescue warn "Error: Failed to create directory (#{dir}) : #{$!}" end end end ### bioflat def create_flat_dir(dbname) dir = File.join(bioflat_dir, dbname.to_s.strip) unless File.directory?(dir) FileUtils.makedirs(dir) end return dir end def find_flat_dir(dbname) dir = File.join(bioflat_dir, dbname.to_s.strip) if File.exists?(dir) return dir else return nil end end ### config def load_config load_config_file(config_file) end def load_config_file(file) if File.exists?(file) STDERR.print "Loading config (#{file}) ... " if hash = YAML.load(File.read(file)) @config.update(hash) end STDERR.puts "done" end end def save_config save_config_file(config_file) end def save_config_file(file) begin STDERR.print "Saving config (#{file}) ... " File.open(file, "w") do |f| f.puts @config.to_yaml end STDERR.puts "done" rescue warn "Error: Failed to save (#{file}) : #{$!}" end end def config_show @config.each do |k, v| STDERR.puts "#{k}\t= #{v.inspect}" end end def config_echo bind = Bio::Shell.cache[:binding] flag = ! @config[:echo] @config[:echo] = IRB.conf[:ECHO] = flag eval("conf.echo = #{flag}", bind) STDERR.puts "Echo #{flag ? 'on' : 'off'}" end def config_color bind = Bio::Shell.cache[:binding] flag = ! @config[:color] @config[:color] = flag if flag IRB.conf[:PROMPT_MODE] = :BIORUBY_COLOR eval("conf.prompt_mode = :BIORUBY_COLOR", bind) else IRB.conf[:PROMPT_MODE] = :BIORUBY eval("conf.prompt_mode = :BIORUBY", bind) end end def config_pager(cmd = nil) @config[:pager] = cmd end def config_splash flag = ! @config[:splash] @config[:splash] = flag STDERR.puts "Splash #{flag ? 'on' : 'off'}" opening_splash end def config_message(str = nil) str ||= MESSAGE @config[:message] = str opening_splash end ### plugin def load_plugin load_plugin_dir(plugin_dir) end def load_plugin_dir(dir) if File.directory?(dir) Dir.glob("#{dir}/*.rb").sort.each do |file| STDERR.print "Loading plugin (#{file}) ... " load file STDERR.puts "done" end end end ### object def check_marshal if @config[:marshal] and @config[:marshal] != MARSHAL raise "Marshal version mismatch" end end def load_object begin check_marshal load_object_file(object_file) rescue warn "Error: Load aborted : #{$!}" end end def load_object_file(file) if File.exists?(file) STDERR.print "Loading object (#{file}) ... " begin bind = Bio::Shell.cache[:binding] hash = Marshal.load(File.read(file)) hash.each do |k, v| begin Thread.current[:restore_value] = v eval("#{k} = Thread.current[:restore_value]", bind) rescue STDERR.puts "Warning: object '#{k}' couldn't be loaded : #{$!}" end end rescue warn "Error: Failed to load (#{file}) : #{$!}" end STDERR.puts "done" end end def save_object save_object_file(object_file) end def save_object_file(file) begin STDERR.print "Saving object (#{file}) ... " File.rename(file, "#{file}.old") if File.exist?(file) File.open(file, "w") do |f| bind = Bio::Shell.cache[:binding] list = eval("local_variables", bind) list.collect! { |x| x.to_s } list -= ["_"] hash = {} list.each do |elem| value = eval(elem, bind) if value begin Marshal.dump(value) hash[elem] = value rescue # value could not be dumped. end end end Marshal.dump(hash, f) @config[:marshal] = MARSHAL end STDERR.puts "done" rescue File.rename("#{file}.old", file) if File.exist?("#{file}.old") warn "Error: Failed to save (#{file}) : #{$!}" end end ### history def open_history @cache[:histfile] = File.open(history_file, "a") @cache[:histfile].sync = true end def store_history(line) Bio::Shell.cache[:histfile].puts "# #{Time.now}" Bio::Shell.cache[:histfile].puts line end def close_history if @cache[:histfile] STDERR.print "Saving history (#{history_file}) ... " @cache[:histfile].close STDERR.puts "done" end end def load_history if @cache[:readline] load_history_file(history_file) end end def load_history_file(file) if File.exists?(file) STDERR.print "Loading history (#{file}) ... " File.open(file).each do |line| unless line[/^# /] Readline::HISTORY.push line.chomp end end STDERR.puts "done" end end # not used (use open_history/close_history instead) def save_history if @cache[:readline] save_history_file(history_file) end end def save_history_file(file) begin STDERR.print "Saving history (#{file}) ... " File.open(file, "w") do |f| f.puts Readline::HISTORY.to_a end STDERR.puts "done" rescue warn "Error: Failed to save (#{file}) : #{$!}" end end ### script def script(mode = nil) case mode when :begin, "begin", :start, "start" @cache[:script] = true script_begin when :end, "end", :stop, "stop" @cache[:script] = false script_end save_script else if @cache[:script] @cache[:script] = false script_end save_script else @cache[:script] = true script_begin end end end def script_begin STDERR.puts "-- 8< -- 8< -- 8< -- Script -- 8< -- 8< -- 8< --" @script_begin = Readline::HISTORY.size end def script_end STDERR.puts "-- >8 -- >8 -- >8 -- Script -- >8 -- >8 -- >8 --" @script_end = Readline::HISTORY.size - 2 end def save_script if @script_begin and @script_end and @script_begin <= @script_end if File.exists?(script_file) message = "Overwrite script file (#{script_file})? [y/n] " else message = "Save script file (#{script_file})? [y/n] " end if ask_yes_or_no(message) save_script_file(script_file) else STDERR.puts " ... save aborted." end elsif @script_begin and @script_end and @script_begin - @script_end == 1 STDERR.puts " ... script aborted." else STDERR.puts "Error: Script range #{@script_begin}..#{@script_end} is invalid" end end def save_script_file(file) begin STDERR.print "Saving script (#{file}) ... " File.open(file, "w") do |f| f.puts "#!/usr/bin/env bioruby" f.puts f.puts Readline::HISTORY.to_a[@script_begin..@script_end] f.puts end STDERR.puts "done" rescue @script_begin = nil warn "Error: Failed to save (#{file}) : #{$!}" end end ### splash def splash_message @config[:message] ||= MESSAGE @config[:message].to_s.split(//).join(" ") end def splash_message_color str = splash_message ruby = colors[:ruby] none = colors[:none] return str.sub(/R u b y/) { "#{ruby}R u b y#{none}" } end def splash_message_action(message = nil) s = message || splash_message l = s.length x = " " 0.step(l,2) do |i| l1 = l-i; l2 = l1/2; l4 = l2/2 STDERR.print "#{s[0,i]}#{x*l1}#{s[i,1]}\r" sleep(0.001) STDERR.print "#{s[0,i]}#{x*l2}#{s[i,1]}#{x*(l1-l2)}\r" sleep(0.002) STDERR.print "#{s[0,i]}#{x*l4}#{s[i,1]}#{x*(l2-l4)}\r" sleep(0.004) STDERR.print "#{s[0,i+1]}#{x*l4}\r" sleep(0.008) end end def splash_message_action_color(message = nil) s = message || splash_message l = s.length c = colors x = " " 0.step(l,2) do |i| l1 = l-i; l2 = l1/2; l4 = l2/2 STDERR.print "#{c[:n]}#{s[0,i]}#{x*l1}#{c[:y]}#{s[i,1]}\r" sleep(0.001) STDERR.print "#{c[:n]}#{s[0,i]}#{x*l2}#{c[:g]}#{s[i,1]}#{x*(l1-l2)}\r" sleep(0.002) STDERR.print "#{c[:n]}#{s[0,i]}#{x*l4}#{c[:r]}#{s[i,1]}#{x*(l2-l4)}\r" sleep(0.004) STDERR.print "#{c[:n]}#{s[0,i+1]}#{x*l4}\r" sleep(0.008) end end def opening_splash STDERR.puts if @config[:splash] if @config[:color] splash_message_action_color else splash_message_action end end if @config[:color] STDERR.print splash_message_color else STDERR.print splash_message end STDERR.puts STDERR.puts STDERR.print " Version : BioRuby #{Bio::BIORUBY_VERSION_ID}" STDERR.print " / Ruby #{RUBY_VERSION}" STDERR.puts STDERR.puts end def closing_splash STDERR.puts STDERR.puts if @config[:color] STDERR.print splash_message_color else STDERR.print splash_message end STDERR.puts STDERR.puts end end bio-1.4.3.0001/lib/bio/shell/interface.rb0000644000004100000410000001053412200110570017601 0ustar www-datawww-data# # = bio/shell/interface.rb - core user interface of the BioRuby shell # # Copyright:: Copyright (C) 2005 # Toshiaki Katayama # License:: The Ruby License # # $Id: interface.rb,v 1.19 2007/11/15 07:08:49 k Exp $ # module Bio::Shell private ### work space def ls bind = Bio::Shell.cache[:binding] list = eval("local_variables", bind).reject { |x| eval(x, bind).nil? } puts list.inspect return list end def rm(name) bind = Bio::Shell.cache[:binding] list = eval("local_variables", bind).reject { |x| eval(x, bind).nil? } begin if list.include?(name.to_s) eval("#{name} = nil", bind) else raise end rescue warn "Usage: rm :var or rm 'var' (rm var is not valid)" end end ### script def script(mode = nil) Bio::Shell.script(mode) end ### object def reload_object Bio::Shell.load_object end ### plugin def reload_plugin Bio::Shell.load_plugin end ### config def config(mode = :show, *opts) case mode when :show, "show" Bio::Shell.config_show when :echo, "echo" Bio::Shell.config_echo when :color, "color" Bio::Shell.config_color when :splash, "splash" Bio::Shell.config_splash when :pager, "pager" Bio::Shell.config_pager(*opts) when :message, "message" Bio::Shell.config_message(*opts) else puts "Invalid mode (#{mode}) - :show, :echo, :color, :splash, :massage" end end def reload_config Bio::Shell.load_config end ### pager def pager(cmd = nil) unless Bio::Shell.config[:pager] cmd ||= ENV['PAGER'] end Bio::Shell.config_pager(cmd) puts "Pager is set to '#{cmd ? cmd : 'off'}'" end def disp(*objs) # The original idea is from http://sheepman.parfait.ne.jp/20050215.html if cmd = Bio::Shell.config[:pager] pg = IO.popen(cmd, "w") begin stdout_save = STDOUT.clone STDOUT.reopen(pg) objs.each do |obj| if obj.is_a?(String) if File.exists?(obj) system("#{cmd} #{obj}") else obj.display end else pp obj end end ensure STDOUT.reopen(stdout_save) stdout_save.close pg.close end else objs.each do |obj| if obj.is_a?(String) obj.display else pp obj end end end end def head(arg, num = 10) str = "" if File.exists?(arg) File.open(arg) do |file| num.times do if line = file.gets str << line end end end else arg.to_s.each_with_index do |line, i| break if i >= num str << line end end puts str return str end ### file save def savefile(file, *objs) datadir = Bio::Shell.data_dir message = "Save file '#{file}' in '#{datadir}' directory? [y/n] " if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message) file = File.join(datadir, file) end if File.exists?(file) message = "Overwrite existing '#{file}' file? [y/n] " if ! Bio::Shell.ask_yes_or_no(message) puts " ... save aborted." return end end begin print "Saving file (#{file}) ... " File.open(file, "w") do |f| objs.each do |obj| f.puts obj.to_s end end puts "done" rescue warn "Error: Failed to save (#{file}) : #{$!}" end end ### file system def cd(dir = ENV['HOME']) if dir Dir.chdir(dir) end puts Dir.pwd.inspect end def pwd puts Dir.pwd.inspect end def dir(file = nil) if file if File.directory?(file) files = Dir.glob("#{file}/*") else files = Dir.glob(file) end else files = Dir.glob("*") end if files str = " UGO Date Byte File\n" str << "------ ------------------------------ ----------- ------------\n" files.sort.each { |f| stat = File.lstat(f) mode = format("%6o", stat.mode) date = stat.mtime byte = stat.size name = f.inspect str << format("%s %30s%13d %s\n", mode, date, byte, name) } puts str return files.sort end end end bio-1.4.3.0001/lib/bio/shell/web.rb0000644000004100000410000000426612200110570016423 0ustar www-datawww-data# # = bio/shell/web.rb - GUI for the BioRuby shell # # Copyright:: Copyright (C) 2006 # Nobuya Tanaka , # Toshiaki Katayama # License:: The Ruby License # # $Id: web.rb,v 1.5 2007/06/26 08:38:38 k Exp $ # module Bio::Shell class Web def initialize Bio::Shell.cache[:binding] = binding Bio::Shell.cache[:results] ||= Results.new install_rails setup_rails start_rails end private def setup_rails puts puts ">>>" puts ">>> open http://localhost:3000/bioruby" puts ">>>" puts puts '(You can change the port number by adding "-- -p 4000" option)' puts end def install_rails savedir = Bio::Shell.cache[:savedir] path = File.join(savedir, "script", "generate") unless File.exist?(path) puts "Installing Rails application for BioRuby shell ... " system("rails #{savedir}") puts "done" end path = File.join(savedir, "app", "controllers", "bioruby_controller.rb") unless File.exist?(path) basedir = File.dirname(__FILE__) puts "Installing Rails plugin for BioRuby shell ... " FileUtils.cp_r("#{basedir}/rails/.", savedir) Dir.chdir(savedir) do system("./script/generate bioruby shell") end puts "done" end end def start_rails begin Bio::Shell.cache[:rails] = Thread.new do Dir.chdir(Bio::Shell.cache[:savedir]) do require './config/boot' require 'commands/server' end end end end class Results attr_accessor :number, :script, :result, :output def initialize @number = 0 @script = [] @result = [] @output = [] end def store(script, result, output) @number += 1 @script[@number] = script @result[@number] = result @output[@number] = output return @number end def restore(number) return @script[number], @result[number], @output[number] end end end private # *TODO* stop irb and start rails? #def web #end end bio-1.4.3.0001/lib/bio/shell/setup.rb0000644000004100000410000000500212200110570016773 0ustar www-datawww-data# # = bio/shell/setup.rb - setup initial environment for the BioRuby shell # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama # License:: The Ruby License # # $Id: setup.rb,v 1.8 2007/06/28 11:21:40 k Exp $ # require 'getoptlong' class Bio::Shell::Setup def initialize check_ruby_version # command line options getoptlong # setup working directory savedir = setup_savedir # load configuration and plugins Bio::Shell.configure(savedir) # set default to irb mode Bio::Shell.cache[:mode] = @mode || :irb case Bio::Shell.cache[:mode] when :web # setup rails server Bio::Shell::Web.new when :irb # setup irb server Bio::Shell::Irb.new when :script # run bioruby shell script Bio::Shell::Script.new(@script) end end def check_ruby_version if RUBY_VERSION < "1.8.2" raise "BioRuby shell runs on Ruby version >= 1.8.2" end end # command line argument (working directory or bioruby shell script file) def getoptlong opts = GetoptLong.new opts.set_options( [ '--rails', '-r', GetoptLong::NO_ARGUMENT ], [ '--web', '-w', GetoptLong::NO_ARGUMENT ], [ '--console', '-c', GetoptLong::NO_ARGUMENT ], [ '--irb', '-i', GetoptLong::NO_ARGUMENT ] ) opts.each_option do |opt, arg| case opt when /--rails/, /--web/ @mode = :web when /--console/, /--irb/ @mode = :irb end end end def setup_savedir arg = ARGV.shift # Options after the '--' argument are not parsed by GetoptLong and # are passed to irb or rails. This hack preserve the first option # when working directory of the project is not given. if arg and arg[/^-/] ARGV.unshift arg arg = nil end if arg.nil? # run in the current directory if File.exist?(Bio::Shell::Core::HISTORY) savedir = Dir.pwd else savedir = File.join(ENV['HOME'].to_s, ".bioruby") install_savedir(savedir) end elsif File.file?(arg) # run file as a bioruby shell script savedir = File.join(File.dirname(arg), "..") @script = arg @mode = :script else # run in new or existing directory if arg[/^#{File::SEPARATOR}/] savedir = arg else savedir = File.join(Dir.pwd, arg) end install_savedir(savedir) end return savedir end def install_savedir(savedir) FileUtils.makedirs(savedir) end end # Setup bio-1.4.3.0001/lib/bio/shell/irb.rb0000644000004100000410000000443612200110570016421 0ustar www-datawww-data# # = bio/shell/irb.rb - CUI for the BioRuby shell # # Copyright:: Copyright (C) 2006 # Toshiaki Katayama # License:: The Ruby License # # $Id: irb.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # module Bio::Shell class Irb def initialize require 'irb' begin require 'irb/completion' Bio::Shell.cache[:readline] = true rescue LoadError Bio::Shell.cache[:readline] = false end IRB.setup(nil) setup_irb start_irb end def start_irb Bio::Shell.cache[:irb] = IRB::Irb.new # needed for method completion IRB.conf[:MAIN_CONTEXT] = Bio::Shell.cache[:irb].context # store binding for evaluation Bio::Shell.cache[:binding] = IRB.conf[:MAIN_CONTEXT].workspace.binding # overwrite gets to store history with time stamp io = IRB.conf[:MAIN_CONTEXT].io io.class.class_eval do alias_method :irb_original_gets, :gets end def io.gets line = irb_original_gets if line Bio::Shell.store_history(line) end return line end if File.exists?("./config/boot.rb") require "./config/boot" require "./config/environment" #require 'commands/console' end end def setup_irb # set application name IRB.conf[:AP_NAME] = 'bioruby' # change prompt for bioruby $_ = Bio::Shell.colors IRB.conf[:PROMPT][:BIORUBY_COLOR] = { :PROMPT_I => "bio#{$_[:ruby]}ruby#{$_[:none]}> ", :PROMPT_S => "bio#{$_[:ruby]}ruby#{$_[:none]}%l ", :PROMPT_C => "bio#{$_[:ruby]}ruby#{$_[:none]}+ ", :RETURN => " ==> %s\n" } IRB.conf[:PROMPT][:BIORUBY] = { :PROMPT_I => "bioruby> ", :PROMPT_S => "bioruby%l ", :PROMPT_C => "bioruby+ ", :RETURN => " ==> %s\n" } if Bio::Shell.config[:color] IRB.conf[:PROMPT_MODE] = :BIORUBY_COLOR else IRB.conf[:PROMPT_MODE] = :BIORUBY end # echo mode (uncomment to off by default) #IRB.conf[:ECHO] = Bio::Shell.config[:echo] || false # irb/input-method.rb >= v1.5 (not in 1.8.2) #IRB.conf[:SAVE_HISTORY] = 100000 # not nicely works #IRB.conf[:AUTO_INDENT] = true end end # Irb end bio-1.4.3.0001/lib/bio/shell/object.rb0000644000004100000410000000216212200110570017105 0ustar www-datawww-data# # = bio/shell/object.rb - Object extension for the BioRuby shell # # Copyright:: Copyright (C) 2006 # Nobuya Tanaka , # Toshiaki Katayama # License:: The Ruby License # # $Id: object.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $ # require 'pp' require 'cgi' require 'yaml' ### Object extention class Object # Couldn't work for Fixnum (Marshal) attr_accessor :memo def output(format = :yaml) case format when :yaml self.to_yaml when :html format_html when :inspect format_pp when :png # *TODO* when :svg # *TODO* when :graph # *TODO* (Gruff, RSRuby etc.) else #self.inspect.to_s.fold(80) self.to_s end end private def format_html "
    #{CGI.escapeHTML(format_pp)}
    " end def format_pp str = "" PP.pp(self, str) return str end end class Hash private def format_html html = "" html += "" @data.each do |k, v| html += "" end html += "
    #{k}#{v}
    " return html end end