Bio-FeatureIO-1.6.905000755000765000024 012402372043 14272 5ustar00cjfieldsstaff000000000000Changes100644000765000024 142412402372043 15647 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905Revision history for Perl extension Bio::FeatureIO. 1.6.905 2014-09-05 12:34:22-05:00 America/Chicago * Minor testing fix: move Bio::SeqFeature::Annotated tests to release-only tests, since this requires networking parameter checks via Module::Build. 1.6.904 2014-09-02 15:23:07-05:00 America/Chicago * Fix bugtracker URL 1.6.903 2014-09-02 14:22:48-05:00 America/Chicago * Require Tree::DAG_Node * Remove bundled Test::More/Exception/Warn dists (rely on Bio::Root::Test, though this will be switched to using Test::Most) 1.6.902 2014-08-30 01:00:25-05:00 America/Chicago * Release version that supercedes latest on CPAN * No code changes 1.6.0 2014-08-30 00:53:58-05:00 America/Chicago * initial split off from BioPerl core t000755000765000024 012402372043 14456 5ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905bed.t100644000765000024 137012402372043 15536 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t# -*-Perl-*- Test Harness script for Bioperl # $Id: FeatureIO.t 15112 2008-12-08 18:12:38Z sendu $ use strict; use warnings; use Bio::Root::Test; use Bio::FeatureIO; my ($io, $f, $s, $fcount, $scount); ################################################################################ # # use FeatureIO::bed to read a bed file # ok($io = Bio::FeatureIO->new(-file => test_input_file('1.bed'))); ok($f = $io->next_feature); # Check correct conversion of [0, feature-end+1) bed-coordinates into [1, feature-end] # bioperl coordinates. (here: bed [0, 10)) is($f->start, 1); is($f->end, 10); # Check field values. my @tags = $f->get_tag_values("Name"); is(scalar(@tags), 1); is($tags[0], "test-coordinates-1"); is($f->seq_id, "chr1"); done_testing(); exit; gff.t100644000765000024 2107212402372043 15567 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t# -*-Perl-*- Test Harness script for Bioperl # $Id: FeatureIO.t 15112 2008-12-08 18:12:38Z sendu $ use strict; use warnings; use Bio::Root::Test; use Bio::FeatureIO; # this is mainly GFF3-specific, GFF2/GTF to be added my ($io, $f, $s, $fcount, $scount); ################################################################################ # # use FeatureIO::gff to read a FASTA file. # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('dna1.fa') ) ); #read features while($f = $io->next_feature()){ $fcount++; } is($fcount, 0); #then try to read sequences again. should get seqs now while($s = $io->next_seq()){ $scount++; if ($scount == 1) { is($s->id, 'Test1'); } } is($scount, 1); ################################################################################ # # use FeatureIO::gff to read a GFF3 file. $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('knownGene.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount,0); #then read features while($f = $io->next_feature()) { $fcount++; if ($fcount == 1) { isa_ok($f, 'Bio::SeqFeatureI'); isa_ok($f, 'Bio::SeqFeature::Annotated'); is($f->primary_tag, 'mRNA'); is($f->primary_id, 'A00469'); is($f->seq_id, 'chr17'); is($f->source_tag, 'UCSC'); is($f->score, '.'); is($f->start, 62467934); is($f->end, 62469545); is($f->strand, -1); } elsif ($fcount == 10) { is($f->primary_tag, 'three_prime_UTR'); is($f->primary_id, undef); # no ID attribute is($f->seq_id, 'chr9'); is($f->source_tag, 'UCSC'); is($f->score, '.'); is($f->start, 90517946); is($f->end, 90518841); is($f->strand, -1); } elsif ($fcount == 15) { is($f->primary_tag, 'match'); is($f->primary_id, 'blastresult.1'); is($f->seq_id, 'chr9'); is($f->source_tag, 'BLASTN'); is($f->score, '0.0'); is($f->start, 90518850); is($f->end, 90521248); is($f->strand, 1); } } is($fcount, 15); #then try to read sequences again. should still be undef while($s = $io->next_seq()){ $scount++; } is($scount,1); ################################################################################ # # use FeatureIO::gff to read a GFF3 file w/ directivized FASTA tail # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('hybrid1.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); #then read features while($f = $io->next_feature()){ $fcount++; if ($fcount == 1) { is($f->primary_tag, 'mRNA'); is($f->primary_id, 'A00469'); is($f->seq_id, 'chr17'); is($f->source_tag, 'UCSC'); is($f->score, '.'); is($f->start, 62467934); is($f->end, 62469545); is($f->strand, -1); } elsif ($fcount == 5) { is($f->primary_tag, 'CDS'); is($f->primary_id, undef); is($f->seq_id, 'chr17'); is($f->source_tag, 'UCSC'); is($f->score, '.'); is($f->start, 62469076); is($f->end, 62469236); is($f->strand, -1); } } is($fcount , 6); #then try to read sequences again. while($s = $io->next_seq()){ $scount++; if ($scount == 1) { is($s->id, 'A00469'); } } is($scount , 1); ################################################################################ # # use FeatureIO::gff to read a GFF3 file w/ non-directivized FASTA tail # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('hybrid2.gff3') ) ); #try to read sequences. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); $scount = 0; #then read features while($f = $io->next_feature()){ $fcount++; if ($fcount == 1) { is($f->primary_tag, 'mRNA'); is($f->primary_id, 'A00469'); is($f->seq_id, 'chr17'); is($f->source_tag, 'UCSC'); is($f->score, '.'); is($f->start, 62467934); is($f->end, 62469545); is($f->strand, -1); } elsif ($fcount == 5) { is($f->primary_tag, 'CDS'); is($f->primary_id, undef); is($f->seq_id, 'chr17'); is($f->source_tag, 'UCSC'); is($f->score, '.'); is($f->start, 62469076); is($f->end, 62469236); is($f->strand, -1); } } is($fcount , 6); #try to read sequences. should be undef while($s = $io->next_seq()){ $scount++; if ($scount == 1) { is($s->id, 'A00469'); } } is($scount , 1); ################################################################################ # # use FeatureIO::gff to read a GFF3 file of directives # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new(-file => test_input_file('directives.gff3'), -verbose => -1)); #read features while($f = $io->next_feature()){ $fcount++; if ($fcount == 1) { is($f->primary_tag, 'region'); is($f->primary_id, undef); is($f->seq_id, 'foo'); is($f->source_tag, '.'); is($f->score, '.'); is($f->start, 1); is($f->end, 100); is($f->strand, 1); } } is($fcount , 1); #sequence-region ################################################################################ # # use FeatureIO::gff to read a GFF3 file as aggregated feature groups # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('knownGene.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); #read feature groups my @f = $io->next_feature_group(); is(@f, 3); if (@f) { is($f[0]->primary_tag,'mRNA'); my %types; my $ct = 0; for my $subf ($f[0]->get_SeqFeatures) { $types{$subf->primary_tag}++; $ct++ } is($ct, 7); is($types{'three_prime_UTR'}, 1); is($types{'CDS'}, 5); is($types{'five_prime_UTR'}, 1); %types = (); $ct = 0; is($f[1]->primary_tag,'mRNA'); for my $subf ($f[1]->get_SeqFeatures) { $types{$subf->primary_tag}++; $ct++ } is($ct, 5); is($types{'three_prime_UTR'}, 1); is($types{'CDS'}, 2); is($types{'five_prime_UTR'}, 2); %types = (); $ct = 0; is($f[2]->primary_tag,'match'); for my $subf ($f[2]->get_SeqFeatures) { $types{$subf->primary_tag}++; $ct++ } is($ct, 0); } @f = $io->next_feature_group(); is(@f, 0); #then try to read sequences again. while($s = $io->next_seq()){ $scount++; } is($scount, 1); ################################################################################ # # use FeatureIO::gff to read GFF3 where aggregated feature groups are denoted # using '###'. # # The advantage of using this is the method can be used iteratively w/o worrying # about possibly diffuse parent-child relationships spread throughout the file. $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('knownGene2.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); #read feature groups @f = $io->next_feature_group(); is(@f, 1); is($f[0]->primary_tag,'mRNA'); my %types; my $ct = 0; for my $subf ($f[0]->get_SeqFeatures) { $types{$subf->primary_tag}++; $ct++ } is($ct, 7); is($types{'three_prime_UTR'}, 1); is($types{'CDS'}, 5); is($types{'five_prime_UTR'}, 1); @f = $io->next_feature_group(); is(@f, 1); %types = (); $ct = 0; is($f[0]->primary_tag,'mRNA'); for my $subf ($f[0]->get_SeqFeatures) { $types{$subf->primary_tag}++; $ct++ } is($ct, 5); is($types{'three_prime_UTR'}, 1); is($types{'CDS'}, 2); is($types{'five_prime_UTR'}, 2); @f = $io->next_feature_group(); is(@f, 1); %types = (); $ct = 0; is($f[0]->primary_tag,'match'); for my $subf ($f[0]->get_SeqFeatures) { $types{$subf->primary_tag}++; $ct++ } is($ct, 0); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 1); ################################################################################ # # use FeatureIO::gff to read GFF3 where aggregated feature groups are iterated # through using fast() # # The advantage of using this is the method can be used iteratively; unlike # using '###', this relies on the user trusting the data for features in the # record is grouped together. TODO: { local $TODO = 'Add clustering groups based on grouping within the file'; ok(0); } done_testing(); exit; ptt.t100644000765000024 205212402372043 15611 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t# -*-Perl-*- Test Harness script for Bioperl # $Id: FeatureIO.t 15112 2008-12-08 18:12:38Z sendu $ use strict; use warnings; use Bio::Root::Test; use Bio::FeatureIO; my ($io, $f, $s, $fcount, $scount); $fcount = 0; my $ptt_in = Bio::FeatureIO->new( -file => test_input_file('test.ptt'), -format => 'ptt', ); ok($ptt_in); while (my $f = $ptt_in->next_feature) { $fcount++; if ($fcount==2) { # 2491..3423 + 310 24217063 metF LB002 - COG0685E 5,10-methylenetetrahydrofolate reductase is( $f->start , 2491 ); is( $f->end , 3423 ); is( $f->strand, 1); is( ($f->get_tag_values('PID'))[0],'24217063' ); is( ($f->get_tag_values('Gene'))[0], 'metF' ); is( ($f->get_tag_values('Synonym'))[0], 'LB002' ); ok( ! $f->has_tag('Code') ); is( ($f->get_tag_values('COG'))[0],'COG0685E' ); is( ($f->get_tag_values('Product'))[0], '5,10-methylenetetrahydrofolate reductase' ); } } is($fcount , 367, 'ptt file'); done_testing(); exit; LICENSE100644000765000024 4376512402372043 15417 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905This software is copyright (c) 2014 by BioPerl Team . This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Terms of the Perl programming language system itself a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- This software is Copyright (c) 2014 by BioPerl Team . This is free software, licensed under: The GNU General Public License, Version 1, February 1989 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! --- The Artistic License 1.0 --- This software is Copyright (c) 2014 by BioPerl Team . This is free software, licensed under: The Artistic License 1.0 The Artistic License Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: - "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. - "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder. - "Copyright Holder" is whoever is named in the copyright or copyrights for the package. - "You" is you, if you're thinking about copying or distributing this Package. - "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) - "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as ftp.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) accompany any non-standard executables with their corresponding Standard Version executables, giving the non-standard executables non-standard names, and clearly documenting the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. 7. C or perl subroutines supplied by you and linked into this Package shall not be considered part of this Package. 8. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End dist.ini100644000765000024 112512402372043 16016 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905name = Bio-FeatureIO version = 1.6.905 author = BioPerl Team abstract = Modules for reading, writing, and manipulating sequence features license = Perl_5 copyright_holder = BioPerl Team dist = Bio-FeatureIO repository = http://github.com/bioperl/Bio-FeatureIO repository_at = github repository_type = git [@BioPerl] -remove = PodWeaver -remove = Test::NoTabs -remove = EOLTests [RunExtraTests] [MetaResources] bugtracker.web = http://github.com/bioperl/Bio-FeatureIO/issues [Prereqs] Tree::DAG_Node = 0 META.yml100644000765000024 1707512402372043 15656 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905--- abstract: 'Modules for reading, writing, and manipulating sequence features' author: - 'BioPerl Team ' build_requires: Bio::Root::Test: '0' File::Spec: '0' IO::Handle: '0' IPC::Open3: '0' Test::More: '0' perl: '5.006' warnings: '0' configure_requires: ExtUtils::MakeMaker: '6.30' dynamic_config: 0 generated_by: 'Dist::Zilla version 5.019, CPAN::Meta::Converter version 2.141520' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Bio-FeatureIO requires: Bio::AnnotatableI: '0' Bio::Annotation::Collection: '0' Bio::Annotation::Comment: '0' Bio::Annotation::DBLink: '0' Bio::Annotation::OntologyTerm: '0' Bio::Annotation::SimpleValue: '0' Bio::Annotation::Target: '0' Bio::FeatureHolderI: '0' Bio::LocatableSeq: '0' Bio::Location::Simple: '0' Bio::Ontology::OntologyStore: '0' Bio::OntologyIO: '0' Bio::Root::IO: '0' Bio::Root::Root: '0' Bio::SeqFeature::AnnotationAdaptor: '0' Bio::SeqFeature::Generic: '0' Bio::SeqFeature::TypedSeqFeatureI: '0' Bio::SeqIO: '0' Bio::Tools::GFF: '0' Data::Dumper: '0' Scalar::Util: '0' Symbol: '0' Tree::DAG_Node: '0' URI::Escape: '0' XML::DOM: '0' XML::DOM::XPath: '0' base: '0' constant: '0' strict: '0' resources: bugtracker: http://github.com/bioperl/Bio-FeatureIO/issues homepage: https://metacpan.org/release/Bio-FeatureIO repository: git://github.com/bioperl/bio-featureio.git version: 1.6.905 x_Dist_Zilla: perl: version: '5.018001' plugins: - class: Dist::Zilla::Plugin::GatherDir name: '@BioPerl/@Filter/GatherDir' version: '5.019' - class: Dist::Zilla::Plugin::PruneCruft name: '@BioPerl/@Filter/PruneCruft' version: '5.019' - class: Dist::Zilla::Plugin::ManifestSkip name: '@BioPerl/@Filter/ManifestSkip' version: '5.019' - class: Dist::Zilla::Plugin::MetaYAML name: '@BioPerl/@Filter/MetaYAML' version: '5.019' - class: Dist::Zilla::Plugin::License name: '@BioPerl/@Filter/License' version: '5.019' - class: Dist::Zilla::Plugin::ExtraTests name: '@BioPerl/@Filter/ExtraTests' version: '5.019' - class: Dist::Zilla::Plugin::ExecDir name: '@BioPerl/@Filter/ExecDir' version: '5.019' - class: Dist::Zilla::Plugin::ShareDir name: '@BioPerl/@Filter/ShareDir' version: '5.019' - class: Dist::Zilla::Plugin::MakeMaker config: Dist::Zilla::Role::TestRunner: default_jobs: 1 name: '@BioPerl/@Filter/MakeMaker' version: '5.019' - class: Dist::Zilla::Plugin::Manifest name: '@BioPerl/@Filter/Manifest' version: '5.019' - class: Dist::Zilla::Plugin::TestRelease name: '@BioPerl/@Filter/TestRelease' version: '5.019' - class: Dist::Zilla::Plugin::ConfirmRelease name: '@BioPerl/@Filter/ConfirmRelease' version: '5.019' - class: Dist::Zilla::Plugin::UploadToCPAN name: '@BioPerl/@Filter/UploadToCPAN' version: '5.019' - class: Dist::Zilla::Plugin::MetaConfig name: '@BioPerl/MetaConfig' version: '5.019' - class: Dist::Zilla::Plugin::MetaJSON name: '@BioPerl/MetaJSON' version: '5.019' - class: Dist::Zilla::Plugin::PkgVersion name: '@BioPerl/PkgVersion' version: '5.019' - class: Dist::Zilla::Plugin::PodSyntaxTests name: '@BioPerl/PodSyntaxTests' version: '5.019' - class: Dist::Zilla::Plugin::RunExtraTests config: Dist::Zilla::Role::TestRunner: default_jobs: 1 name: '@BioPerl/RunExtraTests' version: '0.022' - class: Dist::Zilla::Plugin::NextRelease name: '@BioPerl/NextRelease' version: '5.019' - class: Dist::Zilla::Plugin::Test::Compile config: Dist::Zilla::Plugin::Test::Compile: bail_out_on_fail: '0' fail_on_warning: author fake_home: '0' filename: t/00-compile.t module_finder: - ':InstallModules' needs_display: '0' phase: test script_finder: - ':ExecFiles' skips: [] name: '@BioPerl/Test::Compile' version: '2.046' - class: Dist::Zilla::Plugin::PodCoverageTests name: '@BioPerl/PodCoverageTests' version: '5.019' - class: Dist::Zilla::Plugin::MojibakeTests name: '@BioPerl/MojibakeTests' version: '0.7' - class: Dist::Zilla::Plugin::AutoPrereqs name: '@BioPerl/AutoPrereqs' version: '5.019' - class: Dist::Zilla::Plugin::AutoMetaResources name: '@BioPerl/AutoMetaResources' version: '1.20' - class: Dist::Zilla::Plugin::MetaResources name: '@BioPerl/MetaResources' version: '5.019' - class: Dist::Zilla::Plugin::Authority name: '@BioPerl/Authority' version: '1.006' - class: Dist::Zilla::Plugin::Git::Check config: Dist::Zilla::Plugin::Git::Check: untracked_files: die Dist::Zilla::Role::Git::DirtyFiles: allow_dirty: - Changes - dist.ini allow_dirty_match: [] changelog: Changes Dist::Zilla::Role::Git::Repo: repo_root: . name: '@BioPerl/Git::Check' version: '2.023' - class: Dist::Zilla::Plugin::Git::Commit config: Dist::Zilla::Plugin::Git::Commit: add_files_in: [] commit_msg: v%v%n%n%c time_zone: local Dist::Zilla::Role::Git::DirtyFiles: allow_dirty: - Changes - dist.ini allow_dirty_match: [] changelog: Changes Dist::Zilla::Role::Git::Repo: repo_root: . name: '@BioPerl/Git::Commit' version: '2.023' - class: Dist::Zilla::Plugin::Git::Tag config: Dist::Zilla::Plugin::Git::Tag: branch: ~ signed: 0 tag: Bio-FeatureIO-v1.6.905 tag_format: '%N-v%v' tag_message: '%N-v%v' time_zone: local Dist::Zilla::Role::Git::Repo: repo_root: . name: '@BioPerl/Git::Tag' version: '2.023' - class: Dist::Zilla::Plugin::RunExtraTests config: Dist::Zilla::Role::TestRunner: default_jobs: 1 name: RunExtraTests version: '0.022' - class: Dist::Zilla::Plugin::MetaResources name: MetaResources version: '5.019' - class: Dist::Zilla::Plugin::Prereqs config: Dist::Zilla::Plugin::Prereqs: phase: runtime type: requires name: Prereqs version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':InstallModules' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':IncModules' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':TestFiles' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':ExecFiles' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':ShareFiles' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':MainModule' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':AllFiles' version: '5.019' - class: Dist::Zilla::Plugin::FinderCode name: ':NoFiles' version: '5.019' zilla: class: Dist::Zilla::Dist::Builder config: is_trial: '0' version: '5.019' x_authority: cpan:BIOPERLML MANIFEST100644000765000024 135412402372043 15507 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905# This file was automatically generated by Dist::Zilla::Plugin::Manifest v5.019. Changes LICENSE MANIFEST META.json META.yml Makefile.PL README.md dist.ini lib/Bio/FeatureIO.pm lib/Bio/FeatureIO/bed.pm lib/Bio/FeatureIO/gff.pm lib/Bio/FeatureIO/gtf.pm lib/Bio/FeatureIO/interpro.pm lib/Bio/FeatureIO/ptt.pm lib/Bio/FeatureIO/vecscreen_simple.pm lib/Bio/SeqFeature/Annotated.pm t/00-compile.t t/FeatureIO.t t/FeatureIO.x t/bed.t t/data/1.bed t/data/directives.gff3 t/data/dna1.fa t/data/hybrid1.gff3 t/data/hybrid2.gff3 t/data/knownGene.gff3 t/data/knownGene2.gff3 t/data/test.ptt t/data/vecscreen_simple.test_output t/gff.t t/ptt.t t/release-SeqFeature/Annotated.t t/release-mojibake.t t/release-pod-coverage.t t/release-pod-syntax.t t/vecscreen.t README.md100644000765000024 150612402372043 15634 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905# Bio-FeatureIO Bio::FeatureIO is a BioPerl-based parser for feature data from common biological sequence formats, such as GFF3, GTF, and BED. # Installation To install this module type the following: ``` perl Build.PL ./Build ./Build test ./Build install ``` # Dependencies Beyond the core BioPerl distribution, this module requires these other modules and libraries: * [URI::Escape](https://metacpan.org/pod/URI::Escape) - for Bio::FeatureIO::gff * [XML::DOM::XPath](https://metacpan.org/pod/XML::DOM::XPath) - for Bio::FeatureIO::interpro COPYRIGHT AND LICENCE Copyright (C) 2010,2014 by Chris Fields and Allen Day This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.10.1 or, at your option, any later version of Perl 5 you may have available. META.json100644000765000024 2761612402372043 16030 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905{ "abstract" : "Modules for reading, writing, and manipulating sequence features", "author" : [ "BioPerl Team " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 5.019, CPAN::Meta::Converter version 2.141520", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Bio-FeatureIO", "prereqs" : { "configure" : { "requires" : { "ExtUtils::MakeMaker" : "6.30" } }, "develop" : { "requires" : { "Pod::Coverage::TrustPod" : "0", "Test::Pod" : "1.41", "Test::Pod::Coverage" : "1.08" } }, "runtime" : { "requires" : { "Bio::AnnotatableI" : "0", "Bio::Annotation::Collection" : "0", "Bio::Annotation::Comment" : "0", "Bio::Annotation::DBLink" : "0", "Bio::Annotation::OntologyTerm" : "0", "Bio::Annotation::SimpleValue" : "0", "Bio::Annotation::Target" : "0", "Bio::FeatureHolderI" : "0", "Bio::LocatableSeq" : "0", "Bio::Location::Simple" : "0", "Bio::Ontology::OntologyStore" : "0", "Bio::OntologyIO" : "0", "Bio::Root::IO" : "0", "Bio::Root::Root" : "0", "Bio::SeqFeature::AnnotationAdaptor" : "0", "Bio::SeqFeature::Generic" : "0", "Bio::SeqFeature::TypedSeqFeatureI" : "0", "Bio::SeqIO" : "0", "Bio::Tools::GFF" : "0", "Data::Dumper" : "0", "Scalar::Util" : "0", "Symbol" : "0", "Tree::DAG_Node" : "0", "URI::Escape" : "0", "XML::DOM" : "0", "XML::DOM::XPath" : "0", "base" : "0", "constant" : "0", "strict" : "0" } }, "test" : { "requires" : { "Bio::Root::Test" : "0", "File::Spec" : "0", "IO::Handle" : "0", "IPC::Open3" : "0", "Test::More" : "0", "perl" : "5.006", "warnings" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "mailto" : "bioperl-l@bioperl.org", "web" : "http://github.com/bioperl/Bio-FeatureIO/issues" }, "homepage" : "https://metacpan.org/release/Bio-FeatureIO", "repository" : { "type" : "git", "url" : "git://github.com/bioperl/bio-featureio.git", "web" : "https://github.com/bioperl/bio-featureio" } }, "version" : "1.6.905", "x_Dist_Zilla" : { "perl" : { "version" : "5.018001" }, "plugins" : [ { "class" : "Dist::Zilla::Plugin::GatherDir", "name" : "@BioPerl/@Filter/GatherDir", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::PruneCruft", "name" : "@BioPerl/@Filter/PruneCruft", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::ManifestSkip", "name" : "@BioPerl/@Filter/ManifestSkip", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::MetaYAML", "name" : "@BioPerl/@Filter/MetaYAML", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::License", "name" : "@BioPerl/@Filter/License", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::ExtraTests", "name" : "@BioPerl/@Filter/ExtraTests", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::ExecDir", "name" : "@BioPerl/@Filter/ExecDir", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::ShareDir", "name" : "@BioPerl/@Filter/ShareDir", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::MakeMaker", "config" : { "Dist::Zilla::Role::TestRunner" : { "default_jobs" : 1 } }, "name" : "@BioPerl/@Filter/MakeMaker", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::Manifest", "name" : "@BioPerl/@Filter/Manifest", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::TestRelease", "name" : "@BioPerl/@Filter/TestRelease", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::ConfirmRelease", "name" : "@BioPerl/@Filter/ConfirmRelease", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::UploadToCPAN", "name" : "@BioPerl/@Filter/UploadToCPAN", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::MetaConfig", "name" : "@BioPerl/MetaConfig", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::MetaJSON", "name" : "@BioPerl/MetaJSON", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::PkgVersion", "name" : "@BioPerl/PkgVersion", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::PodSyntaxTests", "name" : "@BioPerl/PodSyntaxTests", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::RunExtraTests", "config" : { "Dist::Zilla::Role::TestRunner" : { "default_jobs" : 1 } }, "name" : "@BioPerl/RunExtraTests", "version" : "0.022" }, { "class" : "Dist::Zilla::Plugin::NextRelease", "name" : "@BioPerl/NextRelease", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::Test::Compile", "config" : { "Dist::Zilla::Plugin::Test::Compile" : { "bail_out_on_fail" : "0", "fail_on_warning" : "author", "fake_home" : "0", "filename" : "t/00-compile.t", "module_finder" : [ ":InstallModules" ], "needs_display" : "0", "phase" : "test", "script_finder" : [ ":ExecFiles" ], "skips" : [] } }, "name" : "@BioPerl/Test::Compile", "version" : "2.046" }, { "class" : "Dist::Zilla::Plugin::PodCoverageTests", "name" : "@BioPerl/PodCoverageTests", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::MojibakeTests", "name" : "@BioPerl/MojibakeTests", "version" : "0.7" }, { "class" : "Dist::Zilla::Plugin::AutoPrereqs", "name" : "@BioPerl/AutoPrereqs", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::AutoMetaResources", "name" : "@BioPerl/AutoMetaResources", "version" : "1.20" }, { "class" : "Dist::Zilla::Plugin::MetaResources", "name" : "@BioPerl/MetaResources", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::Authority", "name" : "@BioPerl/Authority", "version" : "1.006" }, { "class" : "Dist::Zilla::Plugin::Git::Check", "config" : { "Dist::Zilla::Plugin::Git::Check" : { "untracked_files" : "die" }, "Dist::Zilla::Role::Git::DirtyFiles" : { "allow_dirty" : [ "Changes", "dist.ini" ], "allow_dirty_match" : [], "changelog" : "Changes" }, "Dist::Zilla::Role::Git::Repo" : { "repo_root" : "." } }, "name" : "@BioPerl/Git::Check", "version" : "2.023" }, { "class" : "Dist::Zilla::Plugin::Git::Commit", "config" : { "Dist::Zilla::Plugin::Git::Commit" : { "add_files_in" : [], "commit_msg" : "v%v%n%n%c", "time_zone" : "local" }, "Dist::Zilla::Role::Git::DirtyFiles" : { "allow_dirty" : [ "Changes", "dist.ini" ], "allow_dirty_match" : [], "changelog" : "Changes" }, "Dist::Zilla::Role::Git::Repo" : { "repo_root" : "." } }, "name" : "@BioPerl/Git::Commit", "version" : "2.023" }, { "class" : "Dist::Zilla::Plugin::Git::Tag", "config" : { "Dist::Zilla::Plugin::Git::Tag" : { "branch" : null, "signed" : 0, "tag" : "Bio-FeatureIO-v1.6.905", "tag_format" : "%N-v%v", "tag_message" : "%N-v%v", "time_zone" : "local" }, "Dist::Zilla::Role::Git::Repo" : { "repo_root" : "." } }, "name" : "@BioPerl/Git::Tag", "version" : "2.023" }, { "class" : "Dist::Zilla::Plugin::RunExtraTests", "config" : { "Dist::Zilla::Role::TestRunner" : { "default_jobs" : 1 } }, "name" : "RunExtraTests", "version" : "0.022" }, { "class" : "Dist::Zilla::Plugin::MetaResources", "name" : "MetaResources", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::Prereqs", "config" : { "Dist::Zilla::Plugin::Prereqs" : { "phase" : "runtime", "type" : "requires" } }, "name" : "Prereqs", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":InstallModules", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":IncModules", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":TestFiles", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":ExecFiles", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":ShareFiles", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":MainModule", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":AllFiles", "version" : "5.019" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":NoFiles", "version" : "5.019" } ], "zilla" : { "class" : "Dist::Zilla::Dist::Builder", "config" : { "is_trial" : "0" }, "version" : "5.019" } }, "x_authority" : "cpan:BIOPERLML" } Makefile.PL100644000765000024 566112402372043 16335 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905 # This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v5.019. use strict; use warnings; use ExtUtils::MakeMaker 6.30; my %WriteMakefileArgs = ( "ABSTRACT" => "Modules for reading, writing, and manipulating sequence features", "AUTHOR" => "BioPerl Team ", "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => "6.30" }, "DISTNAME" => "Bio-FeatureIO", "EXE_FILES" => [], "LICENSE" => "perl", "NAME" => "Bio::FeatureIO", "PREREQ_PM" => { "Bio::AnnotatableI" => 0, "Bio::Annotation::Collection" => 0, "Bio::Annotation::Comment" => 0, "Bio::Annotation::DBLink" => 0, "Bio::Annotation::OntologyTerm" => 0, "Bio::Annotation::SimpleValue" => 0, "Bio::Annotation::Target" => 0, "Bio::FeatureHolderI" => 0, "Bio::LocatableSeq" => 0, "Bio::Location::Simple" => 0, "Bio::Ontology::OntologyStore" => 0, "Bio::OntologyIO" => 0, "Bio::Root::IO" => 0, "Bio::Root::Root" => 0, "Bio::SeqFeature::AnnotationAdaptor" => 0, "Bio::SeqFeature::Generic" => 0, "Bio::SeqFeature::TypedSeqFeatureI" => 0, "Bio::SeqIO" => 0, "Bio::Tools::GFF" => 0, "Data::Dumper" => 0, "Scalar::Util" => 0, "Symbol" => 0, "Tree::DAG_Node" => 0, "URI::Escape" => 0, "XML::DOM" => 0, "XML::DOM::XPath" => 0, "base" => 0, "constant" => 0, "strict" => 0 }, "TEST_REQUIRES" => { "Bio::Root::Test" => 0, "File::Spec" => 0, "IO::Handle" => 0, "IPC::Open3" => 0, "Test::More" => 0, "warnings" => 0 }, "VERSION" => "1.6.905", "test" => { "TESTS" => "t/*.t t/release-SeqFeature/*.t" } ); my %FallbackPrereqs = ( "Bio::AnnotatableI" => 0, "Bio::Annotation::Collection" => 0, "Bio::Annotation::Comment" => 0, "Bio::Annotation::DBLink" => 0, "Bio::Annotation::OntologyTerm" => 0, "Bio::Annotation::SimpleValue" => 0, "Bio::Annotation::Target" => 0, "Bio::FeatureHolderI" => 0, "Bio::LocatableSeq" => 0, "Bio::Location::Simple" => 0, "Bio::Ontology::OntologyStore" => 0, "Bio::OntologyIO" => 0, "Bio::Root::IO" => 0, "Bio::Root::Root" => 0, "Bio::Root::Test" => 0, "Bio::SeqFeature::AnnotationAdaptor" => 0, "Bio::SeqFeature::Generic" => 0, "Bio::SeqFeature::TypedSeqFeatureI" => 0, "Bio::SeqIO" => 0, "Bio::Tools::GFF" => 0, "Data::Dumper" => 0, "File::Spec" => 0, "IO::Handle" => 0, "IPC::Open3" => 0, "Scalar::Util" => 0, "Symbol" => 0, "Test::More" => 0, "Tree::DAG_Node" => 0, "URI::Escape" => 0, "XML::DOM" => 0, "XML::DOM::XPath" => 0, "base" => 0, "constant" => 0, "strict" => 0, "warnings" => 0 ); unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { delete $WriteMakefileArgs{TEST_REQUIRES}; delete $WriteMakefileArgs{BUILD_REQUIRES}; $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs; } delete $WriteMakefileArgs{CONFIGURE_REQUIRES} unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; WriteMakefile(%WriteMakefileArgs); data000755000765000024 012402372043 15367 5ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t1.bed100644000765000024 3512402372043 16301 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/datachr1 0 10 test-coordinates-1 FeatureIO.t100644000765000024 51112402372043 16603 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t# -*-Perl-*- Test Harness script for Bioperl # $Id: FeatureIO.t 15112 2008-12-08 18:12:38Z sendu $ use strict; use warnings; use Bio::Root::Test; use_ok($_) for qw( Bio::FeatureIO Bio::FeatureIO::gff Bio::FeatureIO::ptt Bio::FeatureIO::vecscreen_simple Bio::SeqFeature::Annotated ); done_testing(); exit; FeatureIO.x100644000765000024 1527612402372043 16665 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t# -*-Perl-*- Test Harness script for Bioperl # $Id: FeatureIO.t 15112 2008-12-08 18:12:38Z sendu $ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 35, -requires_module => 'Graph'); use_ok('Bio::FeatureIO'); } my $io; my $f; my $s; my $fcount; my $scount; ################################################################################ # # use FeatureIO::gff to read a FASTA file. # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('dna1.fa') ) ); #read features while($f = $io->next_feature()){ warn $f; $fcount++; } is($fcount, 0); #then try to read sequences again. should get seqs now while($s = $io->next_seq()){ $scount++; } is($scount, 1); ################################################################################ # # use FeatureIO::gff to read a GFF3 file. $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('knownGene.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount,0); #then read features while($f = $io->next_feature()){ $fcount++; } is($fcount, 15); #then try to read sequences again. should still be undef while($s = $io->next_seq()){ $scount++; } is($scount,0); ################################################################################ # # use FeatureIO::gff to read a GFF3 file w/ directivized FASTA tail # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('hybrid1.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); #then read features while($f = $io->next_feature()){ $fcount++; } is($fcount , 6); #then try to read sequences again. while($s = $io->next_seq()){ $scount++; } is($scount , 1); ################################################################################ # # use FeatureIO::gff to read a GFF3 file w/ non-directivized FASTA tail # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('hybrid2.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); #then read features while($f = $io->next_feature()){ $fcount++; } is($fcount , 6); ################################################################################ # # use FeatureIO::gff to read a GFF3 file of directives # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new(-file => test_input_file('directives.gff3'), -verbose => test_debug() ? test_debug() : -1)); #read features while($f = $io->next_feature()){ $fcount++; } is($fcount , 1); #sequence-region ################################################################################ # # use FeatureIO::gff to read a GFF3 file as aggregated feature groups # $fcount = 0; $scount = 0; ok( $io = Bio::FeatureIO->new( -file => test_input_file('hybrid1.gff3') ) ); #try to read sequences first. should be undef while($s = $io->next_seq()){ $scount++; } is($scount , 0); #read feature groups $f = $io->next_feature_group(); is($f , 1); $f = $io->next_feature_group(); is($f , 0); #then try to read sequences again. while($s = $io->next_seq()){ $scount++; } is($scount , 1); ################################################################################ # # use FeatureIO::gff to read a PTT file. # $fcount = 0; my $ptt_in = Bio::FeatureIO->new( -file => test_input_file('test.ptt'), -format => 'ptt', ); ok($ptt_in); while (my $f = $ptt_in->next_feature) { $fcount++; if ($fcount==2) { # 2491..3423 + 310 24217063 metF LB002 - COG0685E 5,10-methylenetetrahydrofolate reductase is( $f->start , 2491 ); is( $f->end , 3423 ); cmp_ok( $f->strand, '>', 0 ); is( ($f->get_tag_values('PID'))[0],'24217063' ); is( ($f->get_tag_values('Gene'))[0], 'metF' ); is( ($f->get_tag_values('Synonym'))[0], 'LB002' ); ok( ! $f->has_tag('Code') ); is( ($f->get_tag_values('COG'))[0],'COG0685E' ); is( ($f->get_tag_values('Product'))[0], '5,10-methylenetetrahydrofolate reductase' ); } } is($fcount , 367); ################################################################################ # # use FeatureIO::vecscreen_simple to read a vecscreen file # { my @expected_features = ( { 'seq_id' => 'C02HBa0072A04.1', 'primary_tag' => 'moderate_match', 'end' => '60548', 'start' => '60522' }, { 'seq_id' => 'SL_FOS91h17_SP6_0', 'primary_tag' => 'strong_match', 'end' => '122', 'start' => '60' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'strong_match', 'end' => '102', 'start' => '35' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'moderate_match', 'end' => '103', 'start' => '76' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'weak_match', 'end' => '104', 'start' => '82' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'suspect_origin', 'end' => '34', 'start' => '1' }, { 'seq_id' => 'SL_FOS91i01_SP6_0', 'primary_tag' => 'strong_match', 'end' => '110', 'start' => '46' }, { 'seq_id' => 'SL_FOS91i01_SP6_0', 'primary_tag' => 'suspect_origin', 'end' => '45', 'start' => '1' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'strong_match', 'end' => '108', 'start' => '41' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'moderate_match', 'end' => '109', 'start' => '82' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'weak_match', 'end' => '110', 'start' => '88' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'weak_match', 'end' => '1329', 'start' => '1313' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'suspect_origin', 'end' => '40', 'start' => '1' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'suspect_origin', 'end' => '1334', 'start' => '1330' } ); my @vs_features; my $vs_in = Bio::FeatureIO->new( -file => test_input_file('vecscreen_simple.test_output'), -format => 'vecscreen_simple', ); ok( $vs_in ); while(my $feat = $vs_in->next_feature) { push @vs_features,$feat; } #convert the array of feature objects to something that can more easily be checked with is_deeply @vs_features = map { my $f = $_; my $rec = { map {$_ => $f->$_()} qw/start end primary_tag seq_id/ }; } @vs_features; is_deeply(\@vs_features,\@expected_features,'vecscreen_simple gets the correct features'); } vecscreen.t100644000765000024 574212402372043 16770 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t# -*-Perl-*- Test Harness script for Bioperl # $Id: FeatureIO.t 15112 2008-12-08 18:12:38Z sendu $ use strict; use warnings; use Bio::Root::Test; use Bio::FeatureIO; { my @expected_features = ( { 'seq_id' => 'C02HBa0072A04.1', 'primary_tag' => 'moderate_match', 'end' => '60548', 'start' => '60522' }, { 'seq_id' => 'SL_FOS91h17_SP6_0', 'primary_tag' => 'strong_match', 'end' => '122', 'start' => '60' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'strong_match', 'end' => '102', 'start' => '35' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'moderate_match', 'end' => '103', 'start' => '76' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'weak_match', 'end' => '104', 'start' => '82' }, { 'seq_id' => 'SL_FOS91h18_T7_0', 'primary_tag' => 'suspect_origin', 'end' => '34', 'start' => '1' }, { 'seq_id' => 'SL_FOS91i01_SP6_0', 'primary_tag' => 'strong_match', 'end' => '110', 'start' => '46' }, { 'seq_id' => 'SL_FOS91i01_SP6_0', 'primary_tag' => 'suspect_origin', 'end' => '45', 'start' => '1' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'strong_match', 'end' => '108', 'start' => '41' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'moderate_match', 'end' => '109', 'start' => '82' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'weak_match', 'end' => '110', 'start' => '88' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'weak_match', 'end' => '1329', 'start' => '1313' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'suspect_origin', 'end' => '40', 'start' => '1' }, { 'seq_id' => 'SL_FOS92b12_T7_0', 'primary_tag' => 'suspect_origin', 'end' => '1334', 'start' => '1330' } ); my @vs_features; my $vs_in = Bio::FeatureIO->new( -file => test_input_file('vecscreen_simple.test_output'), -format => 'vecscreen_simple', ); ok( $vs_in ); while(my $feat = $vs_in->next_feature) { push @vs_features,$feat; } #convert the array of feature objects to something that can more easily be checked with is_deeply @vs_features = map { my $f = $_; my $rec = { map {$_ => $f->$_()} qw/start end primary_tag seq_id/ }; } @vs_features; is_deeply(\@vs_features,\@expected_features,'vecscreen_simple gets the correct features'); } done_testing(); exit; dna1.fa100644000765000024 33412402372043 16642 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data>Test1 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC 00-compile.t100644000765000024 233412402372043 16652 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/tuse 5.006; use strict; use warnings; # this test was generated with Dist::Zilla::Plugin::Test::Compile 2.046 use Test::More tests => 8 + ($ENV{AUTHOR_TESTING} ? 1 : 0); my @module_files = ( 'Bio/FeatureIO.pm', 'Bio/FeatureIO/bed.pm', 'Bio/FeatureIO/gff.pm', 'Bio/FeatureIO/gtf.pm', 'Bio/FeatureIO/interpro.pm', 'Bio/FeatureIO/ptt.pm', 'Bio/FeatureIO/vecscreen_simple.pm', 'Bio/SeqFeature/Annotated.pm' ); # no fake home requested my $inc_switch = -d 'blib' ? '-Mblib' : '-Ilib'; use File::Spec; use IPC::Open3; use IO::Handle; open my $stdin, '<', File::Spec->devnull or die "can't open devnull: $!"; my @warnings; for my $lib (@module_files) { # see L my $stderr = IO::Handle->new; my $pid = open3($stdin, '>&STDERR', $stderr, $^X, $inc_switch, '-e', "require q[$lib]"); binmode $stderr, ':crlf' if $^O eq 'MSWin32'; my @_warnings = <$stderr>; waitpid($pid, 0); is($?, 0, "$lib loaded ok"); if (@_warnings) { warn @_warnings; push @warnings, @_warnings; } } is(scalar(@warnings), 0, 'no warnings found') or diag 'got warnings: ', explain \@warnings if $ENV{AUTHOR_TESTING}; test.ptt100644000765000024 6306212402372043 17266 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/dataLeptospira interrogans serovar Lai str. 56601 chromosome II, complete sequence - 0..358943 367 proteins Location Strand Length PID Gene Synonym Code COG Product 15..2531 + 838 24217062 - LB001 - - hypothetical protein 2491..3423 + 310 24217063 metF LB002 - COG0685E 5,10-methylenetetrahydrofolate reductase 3716..3823 - 35 24217064 - LB004 - - hypothetical protein 3779..3886 + 35 24217065 - LB003 - - hypothetical protein 4066..4890 - 274 24217066 - LB005 - COG2107R hypothetical protein 4975..5085 + 36 24217067 - LB006 - - hypothetical protein 5239..5754 + 171 24217068 - LB007 - COG0561R hypothetical protein 5762..6112 + 116 24217069 - LB008 - COG0561R hypothetical protein 6083..6373 - 96 24217070 - LB009 - - hypothetical protein 6410..7279 + 289 24217071 hemA LB010 - COG0373H glutamyl-tRNA reductase 7249..8892 + 547 24217072 hemC LB011 - - porphobilinogen deaminase 8861..9814 + 317 24217073 hemB LB012 - - delta-aminolevulinic acid dehydratase 9811..11142 + 443 24217074 hemL LB013 - COG0001H glutamate-1-semialdehyde aminotransferase 11142..12059 + 305 24217075 - LB014 - COG0642T two-component hybrid sensor and regulator 12056..12748 + 230 24217076 - LB015 - COG0745TK two-component response regulator 12708..13760 + 350 24217077 hemE LB016 - COG0407H uroporphyrinogen decarboxylase 13667..15106 + 479 24217078 hemF LB017 - COG0635H coproporphyrinogen III oxidase 15186..16169 + 327 24217079 - LB018 - - hypothetical protein 16244..16384 + 46 24217080 - LB019 - - hypothetical protein 16381..17664 + 427 24217081 hemG LB020 - COG1232H protoporphyrinogen oxidase 17941..18075 - 44 24217082 - LB021 - - hypothetical protein 18395..19468 + 357 24217083 - LB022 - COG3547L transposase 19839..20135 - 98 24217084 - LB023 - - ferrochelatase 20176..20748 - 190 24217085 - LB024 - - ferrochelatase 21008..22150 - 380 24217086 - LB025 - - hypothetical protein 22312..23229 + 305 24217087 parA3 LB026 - COG1192D ParA protein 23222..23929 + 235 24217088 parB3 LB027 - COG1475K ParB protein 24029..24631 + 200 24217089 - LB028 - COG0431R putative reductase 24684..24791 - 35 24217090 - LB029 - - hypothetical protein 25233..26396 - 387 24217091 - LB030 - COG3274S probable intercellular adhesion protein C 26476..26817 - 113 24217092 - LB032 - - hypothetical protein 26492..26851 + 119 24217093 - LB031 - COG0664T cyclic nucleotide dependent protein kinase 26903..27112 - 69 24217094 - LB033 - - unknown protein confirmed by proteomics 27247..28002 + 251 24217095 - LB034 - COG1028IQR 3-oxoacyl-(acyl carrier protein) reductase 27978..28529 + 183 24217096 amsI LB035 - COG0394T low molecular weight phosphotyrosine protein phosphatase 28559..29827 + 422 24217097 ndh LB036 - COG1252C NADH dehydrogenase 29824..30282 + 152 24217098 - LB037 - - putative sucrose/H+ symporter 30314..30475 - 53 24217099 - LB038 - - hypothetical protein 30707..31315 - 202 24217100 - LB039 - - hypothetical protein 31361..31627 - 88 24217101 - LB040 - - hypothetical protein 31683..32507 + 274 24217102 - LB041 - COG1028IQR 3-oxoacyl-(acyl carrier protein) reductase 32517..32939 + 140 24217103 - LB042 - COG3832S hypothetical protein 33001..33372 + 123 24217104 - LB043 - - unknown protein confirmed by proteomics 33697..34767 + 356 24217105 - LB044 - - hypothetical protein 35199..35384 - 61 24217106 - LB045 - - hypothetical protein 35388..35525 - 45 24217107 - LB046 - - hypothetical protein 36004..37326 + 440 24217108 - LB047 - COG2849S hypothetical protein 37916..38989 + 357 24217109 - LB048 - COG3547L putative transposase 39952..40101 + 49 24217110 - LB049 - - hypothetical protein 40098..40712 + 204 24217111 - LB050 - - hypothetical protein 40773..41846 + 357 24217112 - LB051 - COG0714R magnesium chelatase, putative 41853..42683 + 276 24217113 - LB052 - COG1721R hypothetical protein 42680..43549 + 289 24217114 - LB053 - - hypothetical protein 43522..44484 + 320 24217115 vwa1 LB054 - - von Willebrand factor type A domain containing protein 44481..45524 + 347 24217116 vwa2 LB055 - - von Willebrand factor type A domain containing protein 45467..46210 + 247 24217117 - LB056 - - TPR-repeat-containing protein 46207..47850 + 547 24217118 batD LB057 - - BatD 47847..49670 + 607 24217119 htpG2 LB058 - COG0326O heat shock protein 90 49688..50257 + 189 24217120 - LB059 - - hypothetical protein 50238..50729 - 163 24217121 - LB060 - - hypothetical protein 50959..52557 + 532 24217122 - LB061 - COG3211R hypothetical protein 52565..52909 - 114 24217123 - LB062 - COG1366T anti-anti-sigma factor 52906..53925 - 339 24217124 - LB063 - COG0061G hypothetical protein 53937..54044 - 35 24217125 - LB064 - - hypothetical protein 54100..54918 + 272 24217126 - LB065 - - hypothetical protein 54915..55394 + 159 24217127 - LB066 - - hypothetical protein 55509..55622 + 37 24217128 - LB067 - - hypothetical protein 55772..56593 + 273 24217129 - LB068 - COG1398I fatty acid desaturase 57309..57893 + 194 24217130 - LB069 - - hypothetical protein 59172..59822 + 216 24217131 - LB070 - COG2085R hypothetical protein 60390..60638 + 82 24217132 - LB071 - - hypothetical protein 61294..61851 + 185 24217133 - LB072 - - hypothetical protein 61981..62949 - 322 24217134 - LB073 - COG1703E lysine arginine ornithine transport system kinase 62957..64969 - 670 24217135 mcm2 LB074 - COG2185I methylmalonyl-CoA mutase 65000..65104 - 34 24217136 - LB076 - - hypothetical protein 65076..66413 + 445 24217137 - LB075 - - hypothetical protein 67553..68245 + 230 24217138 - LB077 - - hypothetical protein 68865..69938 - 357 24217139 - LB078 - COG4232OC TPR-repeat-containing protein 70067..70345 + 92 24217140 hisE LB079 - COG0140E phosphoribosyl-ATP pyrophosphohydrolase 70349..72280 + 643 24217141 - LB080 - - hypothetical protein 72258..72386 + 42 24217142 - LB081 - - hypothetical protein 72749..73513 + 254 24217143 - LB082 - COG1028IQR 3-ketoacyl-acyl carrier protein reductase 74006..74239 + 77 24217144 acp LB083 - COG0236IQ acyl carrier protein 74293..75036 + 247 24217145 - LB084 - COG0571K ribonuclease III 75411..75998 + 195 24217146 - LB085 - COG1051F,COG0494LR MutT/nudix family protein 75995..77080 + 361 24217147 aroB LB086 - COG0337E 3-dehydroquinate synthase 77080..77961 + 293 24217148 - LB087 - COG0668M hypothetical protein 78288..78662 + 124 24217149 - LB088 - - hypothetical protein 79179..80735 + 518 24217150 - LB089 - COG2509R Uncharacterized FAD-dependent dehydrogenase 80785..81324 + 179 24217151 - LB090 - - hypothetical protein 81632..81871 - 79 24217152 - LB091 - - hypothetical protein 81925..82335 - 136 24217153 - LB092 - - hypothetical protein 82392..84329 - 645 24217154 fadD LB093 - COG1022I probable long-chain-fatty-acid--CoA ligase 84491..87430 + 979 24217155 cyaA17 LB094 - COG2114T,COG0840NT adenylate cyclase 87459..87566 - 35 24217156 - LB095 - - hypothetical protein 87988..88740 - 250 24217157 - LB096 - COG0500QR hypothetical protein 88780..90210 - 476 24217158 - LB098 - - Predicted xylanase/chitin deacetilase 90205..90318 + 37 24217159 - LB097 - - hypothetical protein 90403..90918 - 171 24217160 - LB099 - - hypothetical protein 91165..91323 + 52 24217161 - LB100 - - hypothetical protein 91526..91648 - 40 24217162 - LB101 - - hypothetical protein 91672..92481 + 269 24217163 - LB102 - COG0647G phospholysine phosphohistidine inorganic pyrophosphate phosphatase 92488..92916 - 142 24217164 - LB103 - COG0824R hypothetical protein 93181..93759 - 192 24217165 - LB104 - COG1309K putative TetR-family transcriptional regulator 94594..95610 + 338 24217166 - LB105 - COG0640K,COG0500QR transcriptional regulator, ArsR family 95612..96922 + 436 24217167 - LB106 - COG0499H S-adenosyl-L-homocysteine hydrolase 96995..97294 + 99 24217168 - LB107 - - ferredoxin 97341..101084 + 1247 24217169 metH LB108 - COG0646E,COG1410E B12-dependent homocysteine-N5-methyltetrahydrofolate transmethylase 101193..101531 + 112 24217170 - LB109 - - hypothetical protein 101979..102911 - 310 24217171 - LB110 - - putative outermembrane protein 103023..104318 + 431 24217172 - LB111 - COG0205G diphosphate--fructose-6-phosphate 1-phosphotransferase 104517..106262 - 581 24217173 - LB112 - COG2203T,COG2208TK putative regulatory protein contains GAF domain 106440..107324 - 294 24217174 argB LB114 - COG0548E acetylglutamate kinase 107290..108132 + 280 24217175 - LB113 - COG1028IQR 3-oxoacyl-(acyl carrier protein) reductase 108927..109031 - 34 24217176 - LB115 - - hypothetical protein 109072..110814 - 580 24217177 - LB116 - COG3975R hypothetical protein 110807..111298 - 163 24217178 - LB117 - COG1225O bacterioferritin comigratory protein 111451..113616 + 721 24217179 cyaA18 LB118 - COG2114T adenylate cyclase 114803..115147 + 114 24217180 - LB119 - - hypothetical protein 115194..116291 + 365 24217181 - LB120 - - hypothetical protein 116308..117120 + 270 24217182 - LB121 - COG1028IQR 3-oxoacyl-(acyl carrier protein) reductase 117169..118068 - 299 24217183 - LB122 - COG0582L site-specific integrase/recombinase XerD related protein 118184..118723 - 179 24217184 aroK LB123 - COG0703E shikimic acid kinase I 118720..119571 - 283 24217185 - LB124 - COG1639T HD-GYP domain (HD superfamily hydrolase) 119577..120128 - 183 24217186 cheD2 LB125 - COG1871NT chemotaxis protein 120255..121481 + 408 24217187 traB LB126 - COG1916S pheromone shutdown protein 121805..122368 + 187 24217188 - LB127 - - hypothetical protein 122598..122750 - 50 24217189 - LB129 - - hypothetical protein 122743..122871 + 42 24217190 - LB128 - - hypothetical protein 123146..124291 - 381 24217191 - LB130 - - transcriptional regulator, AraC family 124576..125685 - 369 24217192 - LB131 - - putative transcriptional regulator, araC family protein 126389..126511 + 40 24217193 - LB132 - - hypothetical protein 126928..128532 + 534 24217194 - LB133 - - probable protein containing EAL domain 129379..129828 + 149 24217195 - LB134 - - hypothetical protein 130629..130736 - 35 24217196 - LB135 - - hypothetical protein 130768..131166 - 132 24217197 - LB136 - COG1366T anti-sigma factor antagonist 131177..132175 - 332 24217198 - LB137 - - hypothetical protein 132172..133629 - 485 24217199 - LB138 - - hypothetical protein 133632..135125 - 497 24217200 - LB139 - COG2208TK putative regulation protein contains HAMP domain 136387..136539 + 50 24217201 - LB140 - - hypothetical protein 136739..137446 - 235 24217202 - LB141 - - similar to putative lipoprotein qlp42 137699..138250 - 183 24217203 - LB142 - - hypothetical protein 138314..139405 - 363 24217204 - LB143 - COG4254S similar to putative lipoprotein qlp42 139426..139947 - 173 24217205 - LB144 - COG1595K RNA polymerase ECF-type sigma factor 140082..140189 + 35 24217206 - LB145 - - hypothetical protein 141064..141711 + 215 24217207 - LB146 - - hypothetical protein 141831..142478 + 215 24217208 - LB147 - - conserved hyperthetical protein 142975..143316 - 113 24217209 - LB148 - - hypothetical protein 143870..144493 - 207 24217210 pmgA LB149 - COG0406G phosphoglycerate mutase 144510..145457 - 315 24217211 cbiB LB150 - COG1270H cobalamin biosynthesis protein B 145461..146933 - 490 24217212 cbiP LB151 - COG1492H cobyric acid synthase 148052..148672 - 206 24217213 cobP LB152 - COG2087H cobinamide kinase 148612..149349 - 245 24217214 - LB153 - COG1865S hypothetical protein 149292..150656 - 454 24217215 cbiA LB154 - COG1797H cobyrinic acid-diamide synthase 150656..151192 - 178 24217216 cobA LB155 - COG2109H cob(I)alamin adenosyltransferase 151189..151953 - 254 24217217 cbiF LB156 - COG2875H precorrin-3 methylase 151967..153460 - 497 24217218 cbiH LB157 - COG1010H precorrin methylase 153457..154614 - 385 24217219 cbiG LB158 - COG2073H cobalamin biosynthesis protein 154601..155365 - 254 24217220 cobI LB159 - COG2243H precorrin-2 methyltransferase 155362..156597 - 411 24217221 cbiE LB160 - COG2241H,COG2242H precorrin-6Y methylase 156594..157265 - 223 24217222 cbiC LB161 - - cobalamin biosynthesis precorrin isomerase 157262..158365 - 367 24217223 cbiD LB162 - - cobalt-precorrin-6A synthase 158369..159118 - 249 24217224 - LB163 - COG1018C Oxidoreductase FAD-binding family protein 159124..159495 - 123 24217225 - LB164 - - unknown protein confirmed by proteomics 159516..160685 - 389 24217226 cbiX LB165 - COG2138S,COG3411C putative cbiX protein 161652..161771 + 39 24217227 - LB166 - - hypothetical protein 162903..163043 - 46 24217228 - LB168 - - hypothetical protein 163042..165165 + 707 24217229 - LB167 - - hypothetical protein 165171..165743 - 190 24217230 - LB169 - - hypothetical protein 165785..167674 + 629 24217231 - LB170 - COG1086MG similar to capsular polysaccharide biosynthesis protein 167667..168137 + 156 24217232 - LB171 - - hypothetical protein 168134..169744 + 536 24217233 - LB172 - COG2252R hypothetical protein 169746..169901 + 51 24217234 - LB173 - - hypothetical protein 170186..171073 + 295 24217235 htpX LB174 - COG0501O heat shock protein HtpX 171180..171356 - 58 24217236 - LB175 - - hypothetical protein 171595..172128 + 177 24217237 - LB176 - COG0503F adenine phosphoribosyltransferase 172166..173680 + 504 24217238 - LB177 - COG0265O putative serine protease 173695..175278 + 527 24217239 - LB178 - COG0265O putative serine protease 175702..175815 - 37 24217240 - LB179 - - hypothetical protein 175960..176091 - 43 24217241 - LB180 - - hypothetical protein 176650..177882 - 410 24217242 - LB181 - - hypothetical protein 177879..179456 - 525 24217243 - LB182 - - hypothetical protein 179453..179845 - 130 24217244 - LB183 - COG0735P transcriptional regulator (Fur family) 180591..180731 - 46 24217245 - LB184 - - hypothetical protein 181088..181198 - 36 24217246 - LB185 - - hypothetical protein 181530..182207 + 225 24217247 - LB186 - COG5398P heme oxygenase 182182..183390 + 402 24217248 - LB187 - - hypothetical protein 183377..183514 - 45 24217249 - LB188 - - hypothetical protein 183575..184105 - 176 24217250 - LB190 - - hypothetical protein 184098..184205 + 35 24217251 - LB189 - - hypothetical protein 184996..187128 - 710 24217252 - LB191 - COG4771P,COG1629P putative TonB-dependent outer membrane receptor protein 187107..187517 - 136 24217253 - LB192 - - hypothetical protein 187719..187829 + 36 24217254 - LB193 - - hypothetical protein 187833..188411 - 192 24217255 - LB194 - - unknown protein confirmed by proteomics 188742..188870 + 42 24217256 - LB195 - - hypothetical protein 189081..189608 + 175 24217257 - LB196 - - putative LRR repeat family protein 189621..190394 - 257 24217258 - LB197 - - hypothetical protein 190812..191117 - 101 24217259 - LB198 - COG0718S hypothetical protein 191222..192577 - 451 24217260 - LB199 - - putative outermembrane protein 192685..192810 - 41 24217261 - LB200 - - hypothetical protein 192835..194457 - 540 24217262 lonA LB201 - COG1067O putative ATP-dependent protease LA 194483..195541 - 352 24217263 - LB202 - - queuosine biosynthesis protein 195737..197182 + 481 24217264 - LB203 - - hypothetical protein 197202..198344 + 380 24217265 mtfA LB204 - COG0438M mannosyltransferase A 198307..198921 + 204 24217266 - LB205 - - hypothetical protein 199150..199296 + 48 24217267 - LB206 - - hypothetical protein 199351..200739 + 462 24217268 - LB207 - - hypothetical protein 200811..202223 + 470 24217269 - LB208 - COG1696M alginate o-acetyltransferase 202238..203398 + 386 24217270 - LB209 - - hypothetical protein 203395..204792 + 465 24217271 - LB210 - COG1696M alginate o-acetyltransferase 204799..205869 + 356 24217272 - LB211 - - hypothetical protein 205866..206210 + 114 24217273 - LB212 - COG1694R hypothetical protein 206966..207379 - 137 24217274 exbD2 LB213 - COG0848U putative biopolymer transport protein 207376..207903 - 175 24217275 - LB214 - COG0811U MotA/ExbB proton channel family protein 208518..209267 + 249 24217276 ubiE LB215 - COG2226H ubiquinone/menaquinone biosynthesis methlytransferase 209350..211110 + 586 24217277 - LB216 - - hypothetical protein 211107..211502 + 131 24217278 - LB217 - - hypothetical protein 211547..212938 + 463 24217279 - LB218 - COG2339S hypothetical protein 212921..213655 - 244 24217280 - LB219 - - hypothetical protein 213749..214984 - 411 24217281 - LB220 - - hypothetical protein 215512..215634 + 40 24217282 - LB221 - - hypothetical protein 216332..216784 - 150 24217283 - LB222 - COG1734T hypothetical protein 217221..218072 - 283 24217284 - LB223 - COG2801L putative transposase 218069..218374 - 101 24217285 - LB224 - COG2963L putative transposase 219779..222361 + 860 24217286 - LB225 - - hypothetical protein 222462..222995 + 177 24217287 - LB226 - - Fimh-like protein 224010..224129 + 39 24217288 - LB227 - - hypothetical protein 224104..225336 + 410 24217289 - LB228 - - Putative AraC-type Regulator 225796..225906 - 36 24217290 - LB229 - - hypothetical protein 226283..226582 + 99 24217291 - LB230 - COG2963L putative transposase 226579..227424 + 281 24217292 - LB231 - COG2801L putative transposase 228469..228888 - 139 24217293 - LB232 - - hypothetical protein 229573..229791 - 72 24217294 - LB233 - - hypothetical protein 229900..230004 - 34 24217295 - LB234 - - hypothetical protein 230358..232076 - 572 24217296 - LB235 - COG5001T probable protein contain EAL family signaling protein 232421..232525 - 34 24217297 - LB236 - - hypothetical protein 232542..233129 - 195 24217298 - LB237 - - GGDEF family protein 233172..234227 - 351 24217299 - LB238 - - putative CACHE family protein 234316..234507 + 63 24217300 - LB239 - - hypothetical protein 234521..236095 - 524 24217301 - LB240 - - GGDEF family protein 236543..239152 + 869 24217302 - LB241 - COG0642T two-component hybrid sensor and regulator 239627..240328 - 233 24217303 - LB242 - - hypothetical protein 240371..240778 - 135 24217304 - LB243 - - hypothetical protein 241061..242065 - 334 24217305 - LB244 - - hypothetical protein 242934..244322 - 462 24217306 - LB245 - COG0591ER probable sodium:solute symporter 244678..244803 - 41 24217307 - LB246 - - hypothetical protein 245062..245250 - 62 24217308 - LB247 - - hypothetical protein 245365..246822 + 485 24217309 - LB248 - - hypothetical protein 246884..246997 - 37 24217310 - LB249 - - hypothetical protein 247072..247353 - 93 24217311 - LB250 - - hypothetical protein 247350..249314 - 654 24217312 - LB251 - - hypothetical protein 249442..250320 - 292 24217313 - LB252 - COG1752R hypothetical protein 250333..250566 - 77 24217314 - LB253 - - hypothetical protein 250911..252125 - 404 24217315 - LB254 - COG2814G putative sugar transport protein 252638..253927 + 429 24217316 - LB255 - COG0427C 4-Hydroxybutyrate CoA-transferase 253999..254133 + 44 24217317 - LB256 - - hypothetical protein 254196..254714 + 172 24217318 - LB257 - - hypothetical protein 254727..257126 + 799 24217319 - LB258 - COG4870O Cysteine protease 257127..257249 + 40 24217320 - LB259 - - hypothetical protein 257596..257706 - 36 24217321 - LB260 - - hypothetical protein 257787..259139 - 450 24217322 - LB261 - - hypothetical protein 259245..260663 - 472 24217323 galF LB262 - - UDP-glucose pyrophosphorylase 260651..261100 - 149 24217324 - LB263 - - hypothetical protein 261110..262027 + 305 24217325 - LB264 - - hypothetical protein 262747..263928 + 393 24217326 - LB265 - COG0596R hypothetical protein 264495..265445 - 316 24217327 - LB266 - COG0385R hypothetical protein 266337..267488 - 383 24217328 - LB267 - COG0520E aminotransferase 267499..268368 - 289 24217329 - LB268 - COG1633S hypothetical protein 268431..269057 - 208 24217330 - LB269 - COG3714S hypothetical protein 269054..269176 - 40 24217331 - LB270 - - hypothetical protein 269322..269507 + 61 24217332 - LB271 - COG0861P hypothetical protein 269486..270124 + 212 24217333 - LB272 - COG0861P hypothetical protein 270216..272096 + 626 24217334 mcm2 LB273 - COG2185I methylmalonyl-CoA mutase 272093..274291 + 732 24217335 mcm3 LB274 - COG2185I methylmalonyl-CoA mutase 274304..275359 + 351 24217336 argK LB275 - COG1703E arginine/ornithine transport system ATPase 276450..276827 + 125 24217337 - LB276 - - hypothetical protein 276984..277673 + 229 24217338 - LB277 - - hypothetical protein 277675..278124 + 149 24217339 - LB278 - - hypothetical protein 278133..280793 + 886 24217340 - LB279 - - hypothetical protein 280793..281308 + 171 24217341 - LB280 - - hypothetical protein 281387..282001 + 204 24217342 exbB LB281 - COG0811U transport protein ExbB 282091..282495 + 134 24217343 exbD3 LB282 - COG0848U transport protein ExbD 282505..283155 + 216 24217344 - LB283 - COG0810M TonB protein 284176..284292 + 38 24217345 - LB284 - - hypothetical protein 284338..285132 + 264 24217346 - LB285 - COG2908S hypothetical protein 285144..286796 + 550 24217347 gltB2 LB286 - COG0069E glutamate synthase 287132..287860 + 242 24217348 - LB287 - - hypothetical protein 287857..288297 + 146 24217349 - LB288 - COG0824R hypothetical protein 288431..288862 + 143 24217350 - LB289 - - hypothetical protein 289090..291633 + 847 24217351 - LB290 - COG0642T,COG2202T two-component hybrid sensor and regulator 291637..292335 - 232 24217352 - LB291 - COG1028IQR 3-oxoacyl-[acyl-carrier protein] reductase 292348..293097 - 249 24217353 - LB292 - COG0235G probable sugar aldolase 293099..293632 - 177 24217354 - LB293 - COG1791S probable ARD family methionine salvage pathway enzyme 293655..294371 - 238 24217355 - LB294 - COG0231J elongation factor P 294557..295630 + 357 24217356 - LB295 - COG3547L putative transposase 296868..297005 + 45 24217357 - LB296 - - hypothetical protein 297444..298343 + 299 24217358 pstS LB297 - COG0226P phosphate-binding protein PstS 298418..299515 + 365 24217359 - LB298 - COG1509E L-lysine 2,3-aminomutase 299490..300128 + 212 24217360 - LB299 - - receptor tyrosine kinase 300168..300389 + 73 24217361 - LB300 - - hypothetical protein 300382..301155 + 257 24217362 - LB301 - - hypothetical protein 301358..301474 - 38 24217363 - LB302 - - hypothetical protein 301790..302635 - 281 24217364 - LB303 - COG2801L putative transposase 302632..302937 - 101 24217365 - LB304 - - putative transposase 303419..303913 - 164 24217366 - LB305 - COG3216S hypothetical protein 304190..304294 - 34 24217367 - LB306 - - hypothetical protein 304360..304818 - 152 24217368 - LB307 - COG1238S hypothetical protein 304824..304964 - 46 24217369 - LB308 - - hypothetical protein 305503..306762 + 419 24217370 - LB309 - - similar to Fimh-like protein 306995..307822 - 275 24217371 pyrF LB310 - COG0284F orotidine-5'-monophosphate decarboxylase 307834..308679 - 281 24217372 speE2 LB311 - COG0421E spermidine synthase 308686..309552 - 288 24217373 - LB312 - - hypothetical protein 309558..309662 - 34 24217374 - LB314 - - hypothetical protein 309639..309782 + 47 24217375 - LB313 - - hypothetical protein 310349..310483 + 44 24217376 - LB315 - - hypothetical protein 310505..311047 - 180 24217377 - LB316 - - unknown protein confirmed by proteomics 311077..312246 + 389 24217378 - LB317 - COG0787M alanine racemase 312297..313442 + 381 24217379 - LB318 - - hypothetical protein 313463..314374 + 303 24217380 - LB319 - - hypothetical protein 314371..315162 + 263 24217381 - LB320 - - hypothetical protein 316118..316777 - 219 24217382 - LB321 - - hypothetical protein 316778..319093 - 771 24217383 - LB322 - COG0642T two-component hybrid sensor and regulator 319149..319766 + 205 24217384 - LB323 - COG0741M lytic transglycosylase 319842..319994 - 50 24217385 - LB324 - - hypothetical protein 320145..320873 + 242 24217386 - LB325 - COG1309K transcriptional regulator, TetR family 321371..321490 + 39 24217387 - LB326 - - hypothetical protein 321518..323791 - 757 24217388 acn LB327 - COG1048C aconitate hydratase 324060..325211 - 383 24217389 ompA LB328 - COG1360N,COG2885M outer membrane protein OmpA 325387..326403 - 338 24217390 lysS2 LB329 - COG2269J lysyl-tRNA synthetase 326504..326608 - 34 24217391 - LB331 - - hypothetical protein 326607..326936 + 109 24217392 - LB330 - - hypothetical protein 327256..327666 - 136 24217393 - LB332 - - hypothetical protein 327874..328605 + 243 24217394 - LB333 - COG0745TK two-component response regulator 329173..329832 - 219 24217395 - LB334 - - hypothetical protein 329835..330245 - 136 24217396 phhB LB335 - - pterin-4-alpha-carbinolamine dehydratase 330224..330352 - 42 24217397 - LB336 - - hypothetical protein 330565..331380 + 271 24217398 - LB337 - - hypothetical protein 331365..331493 + 42 24217399 - LB338 - - hypothetical protein 331510..331692 + 60 24217400 - LB339 - - hypothetical protein 332599..333030 - 143 24217401 - LB340 - - hypothetical protein 333122..333505 + 127 24217402 - LB341 - - hypothetical protein 333502..333771 + 89 24217403 - LB342 - - hypothetical protein 333876..334343 + 155 24217404 - LB343 - - cytochrome-c oxidase chain III 334727..334846 - 39 24217405 - LB344 - - hypothetical protein 334894..335967 - 357 24217406 - LB345 - COG3547L putative transposase 336410..336715 + 101 24217407 - LB346 - COG2963L putative transposase 337384..337512 + 42 24217408 - LB347 - - hypothetical protein 337515..337955 + 146 24217409 - LB348 - - hypothetical protein 338085..338201 + 38 24217410 - LB349 - - hypothetical protein 339199..339609 - 136 24217411 - LB350 - - hypothetical protein 339811..340188 - 125 24217412 - LB351 - - putative outermembrane protein 340238..340336 - 32 24217413 - LB352 - - unknown protein confirmed by proteomics 340442..341902 - 486 24217414 pyk2 LB353 - - pyruvate kinase 341941..343902 - 653 24217415 - LB354 - - hypothetical protein 343915..344964 - 349 24217416 asd LB355 - COG0136E aspartate-semialdehyde dehydrogenase 345016..346608 - 530 24217417 - LB356 - - hypothetical protein 346596..346715 - 39 24217418 - LB357 - - hypothetical protein 347158..348723 + 521 24217419 - LB358 - - hypothetical protein 348694..350157 - 487 24217420 - LB359 - COG1696M alginate O-acetylation protein 350989..351204 - 71 24217421 - LB360 - - hypothetical protein 351865..352251 - 128 24217422 - LB361 - - hypothetical protein 352294..353268 - 324 24217423 - LB362 - - hypothetical protein 353351..354448 - 365 24217424 - LB363 - - hypothetical protein 354363..355733 - 456 24217425 - LB364 - COG2204T transcriptional regulator (FIS family) 355978..356730 + 250 24217426 parA4 LB365 - COG1192D ParA protein 356714..357559 + 281 24217427 parB4 LB366 - COG1475K ParB family protein 357993..358931 + 312 24217428 - LB367 - - hypothetical protein hybrid1.gff3100644000765000024 102512402372043 17636 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data##gff-version 3 chr17 UCSC mRNA 62467934 62469545 . - . ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1 chr17 UCSC CDS 62468039 62468236 . - 1 Parent=A00469 chr17 UCSC CDS 62468490 62468654 . - 2 Parent=A00469 chr17 UCSC CDS 62468747 62468866 . - 1 Parent=A00469 chr17 UCSC CDS 62469076 62469236 . - 1 Parent=A00469 chr17 UCSC CDS 62469497 62469506 . - 0 Parent=A00469 ##FASTA >A00469 GATTACA GATTACA hybrid2.gff3100644000765000024 101512402372043 17636 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data##gff-version 3 chr17 UCSC mRNA 62467934 62469545 . - . ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1 chr17 UCSC CDS 62468039 62468236 . - 1 Parent=A00469 chr17 UCSC CDS 62468490 62468654 . - 2 Parent=A00469 chr17 UCSC CDS 62468747 62468866 . - 1 Parent=A00469 chr17 UCSC CDS 62469076 62469236 . - 1 Parent=A00469 chr17 UCSC CDS 62469497 62469506 . - 0 Parent=A00469 >A00469 GATTACA GATTACA Bio000755000765000024 012402372043 15472 5ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/libFeatureIO.pm100644000765000024 3337212402372043 20043 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio# $Id: FeatureIO.pm 16108 2009-09-16 17:07:49Z cjfields $ # # BioPerl module for Bio::FeatureIO # # Please direct questions and support issues to # # Reimplementation by Chris Fields # # Original implementation by Allen Day # # Copyright Chris Fields # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =head1 NAME Bio::FeatureIO - Handler for FeatureIO =head1 SYNOPSIS use Bio::FeatureIO; #read from a file $in = Bio::FeatureIO->new(-file => "my.gff" , -format => 'GFF'); #read from a filehandle $in = Bio::FeatureIO->new(-fh => \*GFF , -format => 'GFF'); #read features already attached to a sequence my $feat = Bio::FeatureIO->new(-seq => $seq , -format => 'features'); #read new features for existing sequence my $seq = Bio::FeatureIO->new(-seq => $seq , -format => 'Das'); #write out features $out = Bio::FeatureIO->new(-file => ">outputfilename" , -format => 'GFF' , -version => 3); while ( my $feature = $in->next_feature() ) { $out->write_feature($feature); } =head1 DESCRIPTION An I/O iterator subsystem for genomic sequence features. Bio::FeatureIO is a handler module for the formats in the FeatureIO set (eg, Bio::FeatureIO::GFF). It is the officially sanctioned way of getting at the format objects, which most people should use. The Bio::FeatureIO system can be thought of like biological file handles. They are attached to filehandles with smart formatting rules (eg, GFF format, or BED format) and can either read or write feature objects (Bio::SeqFeature objects, or more correctly, Bio::FeatureHolderI implementing objects, of which Bio::SeqFeature is one such object). If you want to know what to do with a Bio::SeqFeatureI object, read L. The idea is that you request a stream object for a particular format. All the stream objects have a notion of an internal file that is read from or written to. A particular FeatureIO object instance is configured for either input or output. A specific example of a stream object is the Bio::FeatureIO::gff object. Each stream object has functions: $stream->next_feature(); $stream->write_feature($feature); =head1 SUPPORTED FORMATS name module ----------------------------------- BED bed.pm GFF gff.pm GTF gtf.pm InterPro (IPRScan 4.0) interpro.pm PTT (NCBI protein table) ptt.pm =head1 CONSTRUCTORS =head2 Bio::FeatureIO-Enew() $featureIO = Bio::FeatureIO->new(-file => 'filename', -format=>$format); $featureIO = Bio::FeatureIO->new(-fh => \*FILEHANDLE, -format=>$format); $featureIO = Bio::FeatureIO->new(-seq => $seq, -format=>$format); The new() class method constructs a new Bio::FeatureIO object. The returned object can be used to retrieve or print Seq objects. new() accepts the following parameters: =over 4 =item -file A file path to be opened for reading or writing. The usual Perl conventions apply: 'file' # open file for reading '>file' # open file for writing '>>file' # open file for appending '+new(-fh => \*STDIN); Note that you must pass filehandles as references to globs. If neither a filehandle nor a filename is specified, then the module will read from the @ARGV array or STDIN, using the familiar EE semantics. A string filehandle is handy if you want to modify the output in the memory, before printing it out. The following program reads in EMBL formatted entries from a file and prints them out in fasta format with some HTML tags: use Bio::FeatureIO; use IO::String; my $in = Bio::FeatureIO->new('-file' => "my.gff" , '-format' => 'EMBL'); while ( my $f = $in->next_feature() ) { # the output handle is reset for every file my $stringio = IO::String->new($string); my $out = Bio::FeatureIO->new('-fh' => $stringio, '-format' => 'gtf'); # output goes into $string $out->write_feature($f); # modify $string $string =~ s|(>)(\w+)|$1$2|g; # print into STDOUT print $string; } =item -format Specify the format of the file. See above for list of supported formats =item -flush By default, all files (or filehandles) opened for writing sequences will be flushed after each write_seq() (making the file immediately usable). If you don't need this facility and would like to marginally improve the efficiency of writing multiple sequences to the same file (or filehandle), pass the -flush option '0' or any other value that evaluates as defined but false: my $f1 = Bio::FeatureIO->new -file => " "f1"; my $f2 = Bio::FeatureIO->new -file => ">a.f2", -format => "f2", -flush => 0; # go as fast as we can! while($feature = $f1->next_feature) { $f2->write_feature($feature) } =back =head2 Bio::FeatureIO-EnewFh() $fh = Bio::FeatureIO->newFh(-fh => \*FILEHANDLE, -format=>$format); $fh = Bio::FeatureIO->newFh(-format => $format); # etc. This constructor behaves like new(), but returns a tied filehandle rather than a Bio::FeatureIO object. You can read sequences from this object using the familiar EE operator, and write to it using print(). The usual array and $_ semantics work. For example, you can read all sequence objects into an array like this: @features = <$fh>; Other operations, such as read(), sysread(), write(), close(), and printf() are not supported. =head1 OBJECT METHODS See below for more detailed summaries. The main methods are: =over 3 =item next_feature Fetch the next feature from the stream. =item write_feature Write the specified feature(s) to the stream. =item feature_factory This gets/sets the specific Bio::Factory::FeatureFactoryI =back The following methods delegate to the inter =over 3 =item feature_class Set the specific Bio::SeqFeatureI class to return =item type_features Boolean flag, ensures the returned features are typed =item unflatten_features Ensure the returned features are unflattened =back =head2 TIEHANDLE(), READLINE(), PRINT() These provide the tie interface. See L for more details. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to one of the Bioperl mailing lists. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Allen Day Email allenday@ucla.edu =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut #' Let the code begin... package Bio::FeatureIO; BEGIN { $Bio::FeatureIO::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::VERSION = '1.6.905'; use strict; use Symbol; use base qw(Bio::Root::Root Bio::Root::IO); =head2 new Title : new Usage : $stream = Bio::FeatureIO->new(-file => $filename, -format => 'Format') Function: Returns a new feature stream Returns : A Bio::FeatureIO stream initialised with the appropriate format Args : Named parameters: -file => $filename -fh => filehandle to attach to -format => format =cut my $entry = 0; sub new { my ($caller,@args) = @_; my $class = ref($caller) || $caller; # or do we want to call SUPER on an object if $caller is an # object? if( $class =~ /Bio::FeatureIO::(\S+)/ ) { my ($self) = $class->SUPER::new(@args); $self->_initialize(@args); return $self; } else { my %param = @args; @param{ map { lc $_ } keys %param } = values %param; # lowercase keys my $format = $param{'-format'} || $class->_guess_format( $param{-file} || $ARGV[0] ); if( ! $format ) { if ($param{-file}) { $format = $class->_guess_format($param{-file}); } elsif ($param{-fh}) { $format = $class->_guess_format(undef); } } $format = "\L$format"; # normalize capitalization to lower case return unless( $class->_load_format_module($format) ); return "Bio::FeatureIO::$format"->new(@args); } } =head2 newFh Title : newFh Usage : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format') Function: does a new() followed by an fh() Example : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format') $feature = <$fh>; # read a feature object print $fh $feature; # write a feature object Returns : filehandle tied to the Bio::FeatureIO::Fh class Args : See L =cut sub newFh { my $class = shift; return unless my $self = $class->new(@_); return $self->fh; } =head2 fh Title : fh Usage : $obj->fh Function: Example : $fh = $obj->fh; # make a tied filehandle $feature = <$fh>; # read a feature object print $fh $feature; # write a feature object Returns : filehandle tied to Bio::FeatureIO class Args : none =cut sub fh { my $self = shift; my $class = ref($self) || $self; my $s = Symbol::gensym; tie $$s,$class,$self; return $s; } # _initialize is chained for all FeatureIO classes sub _initialize { my($self, %arg) = @_; # flush is initialized by the Root::IO init # initialize the IO part $self->seq($arg{-seq}); $self->_initialize_io(%arg); } =head2 next_feature Title : next_feature Usage : $feature = stream->next_feature Function: Reads the next feature object from the stream and returns it. Certain driver modules may encounter entries in the stream that are either misformatted or that use syntax not yet understood by the driver. If such an incident is recoverable, e.g., by dismissing a feature of a feature table or some other non-mandatory part of an entry, the driver will issue a warning. In the case of a non-recoverable situation an exception will be thrown. Do not assume that you can resume parsing the same stream after catching the exception. Note that you can always turn recoverable errors into exceptions by calling $stream->verbose(2). Returns : a Bio::SeqFeatureI feature object Args : none See L, L =cut sub next_feature { my ($self, $seq) = @_; $self->throw_not_implemented; } =head2 write_feature Title : write_feature Usage : $stream->write_feature($feature) Function: writes the $feature object into the stream Returns : 1 for success and 0 for error Args : Bio::SeqFeature object =cut sub write_feature { my ($self, $seq) = @_; $self->throw_not_implemented(); } =head2 _load_format_module Title : _load_format_module Usage : *INTERNAL FeatureIO stuff* Function: Loads up (like use) a module at run time on demand Example : Returns : Args : =cut sub _load_format_module { my ($self, $format) = @_; my $class = ref($self) || $self; my $module = $class."::$format";#"Bio::Feature::" . $format; my $ok; eval { $ok = $self->_load_module($module); }; if ( $@ ) { print STDERR <seq() OR $obj->seq($newSeq) Example : Returns : Bio::SeqI object Args : newSeq (optional) =cut sub seq { my $self = shift; my $val = shift; $self->{'seq'} = $val if defined($val); return $self->{'seq'}; } =head2 _filehandle Title : _filehandle Usage : $obj->_filehandle($newval) Function: This method is deprecated. Call _fh() instead. Example : Returns : value of _filehandle Args : newvalue (optional) =cut sub _filehandle { my ($self,@args) = @_; return $self->_fh(@args); } =head2 _guess_format Title : _guess_format Usage : $obj->_guess_format($filename) Function: guess format based on file suffix Example : Returns : guessed format of filename (lower case) Args : Notes : See "SUPPORTED FORMATS" =cut sub _guess_format { my $class = shift; return unless $_ = shift; return 'gff' if /\.gff3?$/i; return 'gff' if /\.gtf$/i; return 'bed' if /\.bed$/i; return 'ptt' if /\.ptt$/i; return 'gff'; #the default } sub DESTROY { my $self = shift; $self->close(); } sub TIEHANDLE { my ($class,$val) = @_; return bless {'featio' => $val}, $class; } sub READLINE { my $self = shift; return $self->{'featio'}->next_feature() unless wantarray; my (@list, $obj); push @list, $obj while $obj = $self->{'featio'}->next_feature(); return @list; } sub PRINT { my $self = shift; $self->{'featio'}->write_feature(@_); } 1; release-mojibake.t100644000765000024 64412402372043 20166 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t#!perl BEGIN { unless ($ENV{RELEASE_TESTING}) { require Test::More; Test::More::plan(skip_all => 'these tests are for release candidate testing'); } } use strict; use warnings qw(all); use Test::More; ## no critic (ProhibitStringyEval, RequireCheckingReturnValueOfEval) eval q(use Test::Mojibake); plan skip_all => q(Test::Mojibake required for source encoding testing) if $@; all_files_encoding_ok(); knownGene.gff3100644000765000024 273512402372043 20240 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data##gff-version 3 chr17 UCSC mRNA 62467934 62469545 . - . ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1 chr17 UCSC three_prime_UTR 62467934 62468038 . - . Parent=A00469 chr17 UCSC CDS 62468039 62468236 . - 1 Parent=A00469 chr17 UCSC CDS 62468490 62468654 . - 2 Parent=A00469 chr17 UCSC CDS 62468747 62468866 . - 1 Parent=A00469 chr17 UCSC CDS 62469076 62469236 . - 1 Parent=A00469 chr17 UCSC CDS 62469497 62469506 . - 0 Parent=A00469 chr17 UCSC five_prime_UTR 62469507 62469545 . - . Parent=A00469 chr9 UCSC mRNA 90517946 90527968 . - . ID=AB000114;Ontology_term=GO:0007155,GO:0005194;Ontology_term=GO:0005578;Dbxref=AFFX-U95:41031_at,Genbank-protein:BAA19055,Unigene:Hs.94070,AFFX-U133:205907_s_at,Genbank-mRNA:AB000114,Locuslink:4958,Swissprot:Q99983,Swissprot:OMD_HUMAN,Refseq-mRNA:NM_005014,Refseq-protein:NP_005005,PFAM:PF01462,PFAM:00560;Note=osteomodulin;Alias=OMD; #. UCSC protein . . . . . ID=BAA19055;Parent=AB000114 chr9 UCSC three_prime_UTR 90517946 90518841 . - . Parent=AB000114 chr9 UCSC CDS 90518842 90519167 . - 1 Parent=AB000114 chr9 UCSC CDS 90520309 90521248 . - 0 Parent=AB000114 chr9 UCSC five_prime_UTR 90521249 90521264 . - . Parent=AB000114 chr9 UCSC five_prime_UTR 90527892 90527968 . - . Parent=AB000114 chr9 BLASTN match 90518850 90521248 0.0 + . ID=blastresult.1;Target=BC046356.1 178 1828;Gap=M78 I759 M1561 ##FASTA >A00469 GATTACAGATTACA directives.gff3100644000765000024 17612402372043 20423 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data##gff-version 3 ##sequence-region foo 1 100 ##feature-ontology bar ##attribute-ontology baz ##source-ontology boo ### ##FASTA knownGene2.gff3100644000765000024 274612402372043 20324 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data##gff-version 3 chr17 UCSC mRNA 62467934 62469545 . - . ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1 chr17 UCSC three_prime_UTR 62467934 62468038 . - . Parent=A00469 chr17 UCSC CDS 62468039 62468236 . - 1 Parent=A00469 chr17 UCSC CDS 62468490 62468654 . - 2 Parent=A00469 chr17 UCSC CDS 62468747 62468866 . - 1 Parent=A00469 chr17 UCSC CDS 62469076 62469236 . - 1 Parent=A00469 chr17 UCSC CDS 62469497 62469506 . - 0 Parent=A00469 chr17 UCSC five_prime_UTR 62469507 62469545 . - . Parent=A00469 ### chr9 UCSC mRNA 90517946 90527968 . - . ID=AB000114;Ontology_term=GO:0007155,GO:0005194;Ontology_term=GO:0005578;Dbxref=AFFX-U95:41031_at,Genbank-protein:BAA19055,Unigene:Hs.94070,AFFX-U133:205907_s_at,Genbank-mRNA:AB000114,Locuslink:4958,Swissprot:Q99983,Swissprot:OMD_HUMAN,Refseq-mRNA:NM_005014,Refseq-protein:NP_005005,PFAM:PF01462,PFAM:00560;Note=osteomodulin;Alias=OMD; #. UCSC protein . . . . . ID=BAA19055;Parent=AB000114 chr9 UCSC three_prime_UTR 90517946 90518841 . - . Parent=AB000114 chr9 UCSC CDS 90518842 90519167 . - 1 Parent=AB000114 chr9 UCSC CDS 90520309 90521248 . - 0 Parent=AB000114 chr9 UCSC five_prime_UTR 90521249 90521264 . - . Parent=AB000114 chr9 UCSC five_prime_UTR 90527892 90527968 . - . Parent=AB000114 ### chr9 BLASTN match 90518850 90521248 0.0 + . ID=blastresult.1;Target=BC046356.1 178 1828;Gap=M78 I759 M1561 ##FASTA >A00469 GATTACA GATTACA release-pod-syntax.t100644000765000024 45612402372043 20514 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t#!perl BEGIN { unless ($ENV{RELEASE_TESTING}) { require Test::More; Test::More::plan(skip_all => 'these tests are for release candidate testing'); } } # This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests. use Test::More; use Test::Pod 1.41; all_pod_files_ok(); FeatureIO000755000765000024 012402372043 17315 5ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Biobed.pm100644000765000024 1561612402372043 20576 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/FeatureIO=pod =head1 NAME Bio::FeatureIO::bed - read/write features from UCSC BED format =head1 SYNOPSIS my $in = Bio::FeatureIO(-format => 'bed', -file => 'file.bed'); for my $feat ($in->next_feature) { # do something with $feat (a Bio::SeqFeature::Annotated object) } my $out = Bio::FeatureIO(-format=>'bed'); for my $feat ($seq->get_seqFeatures) { $out->write_feature($feat); } =head1 DESCRIPTION See L. Currently for read and write only the first 6 fields (chr, start, end, name, score, strand) are supported. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Allen Day Email allenday@ucla.edu =head1 CONTRIBUTORS Sendu Bala, bix@sendu.me.uk =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::FeatureIO::bed; BEGIN { $Bio::FeatureIO::bed::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::bed::VERSION = '1.6.905'; use strict; use base qw(Bio::FeatureIO); use Bio::SeqFeature::Annotated; use Bio::Annotation::SimpleValue; use Bio::OntologyIO; use Scalar::Util qw(looks_like_number); =head2 _initialize Title : _initialize Function: initializes BED for reading/writing Args : all optional: name description ---------------------------------------------------------- -name the name for the BED track, stored in header name defaults to localtime() -description the description for the BED track, stored in header. defaults to localtime(). -use_score whether or not the score attribute of features should be used when rendering them. the higher the score the darker the color. defaults to 0 (false) =cut sub _initialize { my($self,%arg) = @_; $self->SUPER::_initialize(%arg); $self->name($arg{-name} || scalar(localtime())); $self->description($arg{-description} || scalar(localtime())); $self->use_score($arg{-use_score} || 0); $self->_print(sprintf('track name="%s" description="%s" useScore=%d', $self->name, $self->description, $self->use_score ? 1 : 0 )."\n") if $self->mode eq 'w'; } =head2 use_score Title : use_score Usage : $obj->use_score($newval) Function: should score be used to adjust feature color when rendering? set to true if so. Example : Returns : value of use_score (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub use_score{ my $self = shift; return $self->{'use_score'} = shift if @_; return $self->{'use_score'}; } =head2 name Title : name Usage : $obj->name($newval) Function: name of BED track Example : Returns : value of name (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub name{ my $self = shift; return $self->{'name'} = shift if @_; return $self->{'name'}; } =head2 description Title : description Usage : $obj->description($newval) Function: description of BED track Example : Returns : value of description (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub description{ my $self = shift; return $self->{'description'} = shift if @_; return $self->{'description'}; } sub write_feature { my($self,$feature) = @_; $self->throw("only Bio::SeqFeature::Annotated objects are writeable") unless $feature->isa('Bio::SeqFeature::Annotated'); my $chrom = $feature->seq_id || ''; my $chrom_start = $feature->start || 0; # output start is supposed to be 0-based my $chrom_end = ($feature->end + 1) || 1; # output end is supposed to not be part of the feature #try to make a reasonable name my $name = undef; my @v; if (@v = ($feature->annotation->get_Annotations('Name'))){ $name = $v[0]; $self->warn("only using first of feature's multiple names: ".join ',', map {$_->value} @v) if scalar(@v) > 1; } elsif (@v = ($feature->annotation->get_Annotations('ID'))){ $name = $v[0]; $self->warn("only using first of feature's multiple IDs: ".join ',', map {$_->value} @v) if scalar(@v) > 1; } else { $name = 'anonymous'; } if (ref($name)) { $name = $name->value; } if (ref($chrom)) { $chrom = $chrom->value; } my $score = $feature->score || 0; my $strand = $feature->strand == 0 ? '-' : '+'; #default to + my $thick_start = ''; #not implemented, used for CDS my $thick_end = ''; #not implemented, used for CDS my $reserved = 0; my $block_count = ''; #not implemented, used for sub features my $block_sizes = ''; #not implemented, used for sub features my $block_starts = ''; #not implemented, used for sub features $self->_print(join("\t",($chrom,$chrom_start,$chrom_end,$name,$score,$strand,$thick_start,$thick_end,$reserved,$block_count,$block_sizes, $block_starts))."\n"); $self->write_feature($_) foreach $feature->get_SeqFeatures(); } sub next_feature { my $self = shift; my $line = $self->_readline || return; my ($seq_id, $start, $end, $name, $score, $strand) = split(/\s+/, $line); $strand ||= '+'; unless (looks_like_number($start) && looks_like_number($end)) { # skip what is probably a header line return $self->next_feature; } # start is 0 based, need it 1-based; # end is one beyond the feature ends and thus already 1-based my $feature = Bio::SeqFeature::Annotated->new(-start => ++$start, -end => $end, $score ? (-score => $score) : (), $strand ? (-strand => $strand eq '+' ? 1 : -1) : ()); $feature->seq_id($seq_id); if ($name) { my $sv = Bio::Annotation::SimpleValue->new(-tagname => 'Name', -value => $name); $feature->annotation->add_Annotation($sv); } return $feature; } 1; gff.pm100644000765000024 6560612402372043 20612 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/FeatureIO=pod =head1 NAME Bio::FeatureIO::gff - read/write GFF feature files =head1 SYNOPSIS my $feature; #get a Bio::SeqFeature::Annotated somehow my $featureOut = Bio::FeatureIO->new( -format => 'gff', -version => 3, -fh => \*STDOUT, -validate_terms => 1, #boolean. validate ontology terms online? default 0 (false). ); $featureOut->write_feature($feature); =head1 DESCRIPTION Currently implemented: version read? write? ------------------------------ GFF 1 N N GFF 2 N N GFF 2.5 (GTF) N Y GFF 3 Y Y =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_list - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR Allen Day, =head1 CONTRIBUTORS Steffen Grossmann, Scott Cain, Rob Edwards =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::FeatureIO::gff; BEGIN { $Bio::FeatureIO::gff::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::gff::VERSION = '1.6.905'; use strict; #these are alphabetical, keep them that way. use Bio::Annotation::DBLink; use Bio::Annotation::OntologyTerm; use Bio::Annotation::SimpleValue; use Bio::Annotation::Target; use Bio::FeatureIO; use Bio::Ontology::OntologyStore; use Bio::OntologyIO; use Bio::SeqFeature::Annotated; use Bio::SeqIO; use URI::Escape; use base qw(Bio::FeatureIO); use constant DEFAULT_VERSION => 3; my $RESERVED_TAGS = "ID|Name|Alias|Parent|Target|Gap|Derives_from|Note|Dbxref|dbxref|Ontology_term|Index|CRUD"; sub _initialize { my($self,%arg) = @_; $self->SUPER::_initialize(%arg); $self->version( $arg{-version} || DEFAULT_VERSION); $self->validate($arg{-validate_terms} || 0); if ($arg{-file} =~ /^>.*/ ) { $self->_print("##gff-version " . $self->version() . "\n"); } else { my $directive; while(($directive = $self->_readline()) && ($directive =~ /^##/) ){ $self->_handle_directive($directive); } $self->_pushback($directive); } #need to validate against SOFA, no SO if ($self->validate) { $self->so( Bio::Ontology::OntologyStore->get_ontology('Sequence Ontology Feature Annotation') ); } } =head2 next_feature() Usage : my $feature = $featureio->next_feature(); Function: reads a feature record from a GFF stream and returns it as an object. Returns : a Bio::SeqFeature::Annotated object Args : N/A =cut sub next_feature { my $self = shift; my $gff_string; my($f) = $self->_buffer_feature(); if($f){ return $f; } return if $self->fasta_mode(); # be graceful about empty lines or comments, and make sure we return undef # if the input is consumed while(($gff_string = $self->_readline()) && defined($gff_string)) { next if $gff_string =~ /^\s*$/; #skip blank lines next if $gff_string =~ /^\#[^#]/; #skip comments, but not directives last; } return unless $gff_string; # looks like we went into FASTA mode without a directive. if($gff_string =~ /^>/){ my $pos = tell($self->_fh); # TODO: SeqIO::fasta uses _fh directly, whereas _pushback uses an in-memory # buffer; this completely breaks with piped data, so fix for files and punt # on the rest for now if ($pos >= 0) { seek($self->_fh, -length($gff_string), 1); } else { $self->warn("In-line parsing of FASTA not yet supported"); $self->_pushback($gff_string); } $self->fasta_mode(1); return; } # got a directive elsif($gff_string =~ /^##/){ $self->_handle_directive($gff_string); return $self->_buffer_feature(); } # got a feature else { return $self->_handle_feature($gff_string); } } =head2 next_feature_group Title : next_feature_group Usage : @feature_group = $stream->next_feature_group Function: Reads the next feature_group from $stream and returns it. Feature groups in GFF3 files are separated by '###' directives. The features in a group might form a hierarchical structure. The complete hierarchy of features is returned, i.e. the returned array represents only the top-level features. Lower-level features can be accessed using the 'get_SeqFeatures' method recursively. Example : # getting the complete hierarchy of features in a GFF3 file my @toplevel_features; while (my @fg = $stream->next_feature_group) { push(@toplevel_features, @fg); } Returns : an array of Bio::SeqFeature::Annotated objects Args : none =cut sub next_feature_group { my $self = shift; my $feat; my %seen_ids; my @all_feats; my @toplevel_feats; $self->{group_not_done} = 1; while ($self->{group_not_done} && ($feat = $self->next_feature()) && defined($feat)) { # we start by collecting all features in the group and # memorizing those which have an ID attribute my $anno_ID = $feat->get_Annotations('ID'); if(ref($anno_ID)) { my $attr_ID = $anno_ID->value; $self->throw("Oops! ID $attr_ID exists more than once in your file!") if (exists($seen_ids{$attr_ID})); $seen_ids{$attr_ID} = $feat; } push(@all_feats, $feat); } # assemble the top-level features foreach $feat (@all_feats) { my @parents = $feat->get_Annotations('Parent'); if (@parents) { foreach my $parent (@parents) { my $parent_id = $parent->value; $self->throw("Parent with ID $parent_id not found!") unless (exists($seen_ids{$parent_id})); $seen_ids{$parent_id}->add_SeqFeature($feat); } } else { push(@toplevel_feats, $feat); } } return @toplevel_feats; } =head2 next_seq() access the FASTA section (if any) at the end of the GFF stream. note that this method will return undef if not all features in the stream have been handled =cut sub next_seq() { my $self = shift; return unless $self->fasta_mode(); #first time next_seq has been called. initialize Bio::SeqIO instance if(!$self->seqio){ $self->seqio( Bio::SeqIO->new(-format => 'fasta', -fh => $self->_fh()) ); } return $self->seqio->next_seq(); } =head2 write_feature() Usage : $featureio->write_feature( Bio::SeqFeature::Annotated->new(...) ); Function: writes a feature in GFF format. the GFF version used is governed by the '-version' argument passed to Bio::FeatureIO->new(), and defaults to GFF version 3. Returns : ###FIXME Args : a Bio::SeqFeature::Annotated object. =cut sub write_feature { my($self,$feature) = @_; if (!$feature) { $self->throw("gff.pm cannot write_feature unless you give a feature to write.\n"); } $self->throw("only Bio::SeqFeature::Annotated objects are writeable") unless $feature->isa('Bio::SeqFeature::Annotated'); if($self->version == 1){ return $self->_write_feature_1($feature); } elsif($self->version == 2){ return $self->_write_feature_2($feature); } elsif($self->version == 2.5){ return $self->_write_feature_25($feature); } elsif($self->version == 3){ return $self->_write_feature_3($feature); } else { $self->throw(sprintf("don't know how to write GFF version %s",$self->version)); } } ################################################################################ =head1 ACCESSORS =cut =head2 fasta_mode() Usage : $obj->fasta_mode($newval) Function: Example : Returns : value of fasta_mode (a scalar) Args : on set, new value (a scalar or undef, optional) Side effect when setting: rewind the file handle a little bit to get the last carriage return that was swallowed when the previous line was processed. =cut sub fasta_mode { my($self,$val) = @_; $self->{'fasta_mode'} = $val if defined($val); if ($val && $val == 1) { # seek $self->_fh(), -1, 1; #rewind 1 byte to get the previous line's \n $self->_pushback("\n"); } return $self->{'fasta_mode'}; } =head2 seqio() Usage : $obj->seqio($newval) Function: holds a Bio::SeqIO instance for handling the GFF3 ##FASTA section. Returns : value of seqio (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub seqio { my($self,$val) = @_; $self->{'seqio'} = $val if defined($val); return $self->{'seqio'}; } =head2 sequence_region() Usage : Function: ###FIXME Returns : Args : =cut sub sequence_region { my ($self,$k,$v) = @_; if(defined($k) && defined($v)){ $self->{'sequence_region'}{$k} = $v; return $v; } elsif(defined($k)){ return $self->{'sequence_region'}{$k}; } else { return; } } =head2 so() Usage : $obj->so($newval) Function: holds a Sequence Ontology instance Returns : value of so (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub so { my $self = shift; my $val = shift; ###FIXME validate $val object's type $self->{so} = $val if defined($val); return $self->{so}; } =head2 validate() Usage : $obj->validate($newval) Function: true if encountered ontology terms in next_feature() mode should be validated. Returns : value of validate (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub validate { my($self,$val) = @_; $self->{'validate'} = $val if defined($val); return $self->{'validate'}; } =head2 version() Usage : $obj->version($newval) Function: version of GFF to read/write. valid values are 1, 2, 2.5, and 3. Returns : value of version (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub version { my $self = shift; my $val = shift; my %valid = map {$_=>1} (1, 2, 2.5, 3); if(defined $val && $valid{$val}){ return $self->{'version'} = $val; } elsif(defined($val)){ $self->throw('invalid version. valid versions: '.join(' ', sort keys %valid)); } return $self->{'version'}; } ################################################################################ =head1 INTERNAL METHODS =cut =head2 _buffer_feature() Usage : Function: ###FIXME Returns : Args : =cut sub _buffer_feature { my ($self,$f) = @_; if ( $f ) { push @{ $self->{'buffer'} }, $f; return $f; } elsif ( $self->{'buffer'} ) { return shift @{ $self->{'buffer'} }; } else { return; } } =head1 _handle_directive() this method is called for lines beginning with '##'. =cut sub _handle_directive { my($self,$directive_string) = @_; $directive_string =~ s/^##//; #remove escape my($directive,@arg) = split /\s+/, $directive_string; if($directive eq 'gff-version'){ my $version = $arg[0]; if($version != 3){ $self->throw("this is not a gff version 3 document, it is version '$version'"); } } elsif($directive eq 'sequence-region'){ # RAE: Sequence regions are in the format sequence-region seqid start end # for these we want to store the seqid, start, and end. Then when we validate # we want to make sure that the features are within the seqid/start/end $self->throw('Both start and end for sequence region should be defined') unless $arg[1] && $arg[2]; my $fta = Bio::Annotation::OntologyTerm->new(); $fta->name( 'region'); my $f = Bio::SeqFeature::Annotated->new(); $f->seq_id( $arg[0] ); $f->start( $arg[1] ); $f->end( $arg[2] ); $f->strand(1); $f->type( $fta ); #cache this in sequence_region(), we may need it for validation later. $self->sequence_region($f->seq_id => $f); #NOTE: is this the right thing to do -- treat this as a feature? -allenday #buffer it to be returned by next_feature() $self->_buffer_feature($f); } elsif($directive eq 'feature-ontology'){ $self->warn("'##$directive' directive handling not yet implemented"); } elsif($directive eq 'attribute-ontology'){ $self->warn("'##$directive' directive handling not yet implemented"); } elsif($directive eq 'source-ontology'){ $self->warn("'##$directive' directive handling not yet implemented"); } elsif($directive eq 'FASTA' or $directive =~ /^>/){ #next_seq() will take care of this. $self->fasta_mode(1); return; } elsif($directive eq '#'){ #all forward references resolved $self->{group_not_done} = 0; } elsif($directive eq 'organism') { my $organism = $arg[0]; $self->organism($organism); } else { $self->throw("don't know what do do with directive: '##".$directive."'"); } } =head1 _handle_feature() this method is called for each line not beginning with '#'. it parses the line and returns a Bio::SeqFeature::Annotated object. =cut sub _handle_feature { my($self,$feature_string) = @_; my $feat = Bio::SeqFeature::Annotated->new(); my($seq,$source,$type,$start,$end,$score,$strand,$phase,$attribute_string) = split /\t/, $feature_string; $feat->seq_id($seq); $feat->source_tag($source); $feat->start($start) unless $start eq '.'; $feat->end($end) unless $end eq '.'; $feat->strand($strand eq '+' ? 1 : $strand eq '-' ? -1 : 0); $feat->score($score); $feat->phase($phase); my $fta = Bio::Annotation::OntologyTerm->new(); if($self->validate()){ # RAE Added a couple of validations based on the GFF3 spec at http://song.sourceforge.net/gff3.shtml # 1. Validate the id if ($seq =~ /[^a-zA-Z0-9\.\-\:\^\*\$\@\!\+\_\?]/) { # I just escaped everything. $self->throw("Validation Error: seqid ($seq) contains characters that are not [a-zA-Z0-9.:^*\$\@!+_?\-] and not escaped"); } if ($seq =~ /\s/) { $self->throw("Validation Error: seqid ($seq) contains unescaped whitespace") } # NOTE i think we're handling this in as a directive, and this test may be removed -allenday if ($seq =~ /^>/) { $self->throw("Validation Error: seqid ($seq) begins with a >") } # 2. Validate the starts and stops. # these need to be within the region's bounds, and # also start <= end. bail out if either is not true. if ($start > $end) { $self->throw("Validation Error: start ($start) must be less than or equal to end in $seq"); } my $region = $self->sequence_region($seq); # NOTE: we can only validate against sequence-region that are declared in the file. # if i reference some region from elsewhere, can't validate. if we want to be really strict # we should bail out here. -allenday if ( defined($region) && $start < $region->start() || $end > $region->end() ) { $self->throw("Validation Error: sequence location ($seq from $start to $end) does not appear to lie within a defined sequence-region") } # 3. Validate the strand. # In the unvalidated version +=1 and -=-1. Everything else is 0. We just need to warn when it is not [+-.?] $self->throw("Validation Error: strand is not one of [+-.?] at $seq") if ($strand =~ /^[^\+\-\.\?]$/); # 4. Validate the phase to be one of [.012] $self->throw("Validation Error: phase is not one of [.012] at $seq") if ($phase =~ /^[^\.012]$/); my $feature_type; if($type =~ /^\D+:\d+$/){ #looks like an identifier ($feature_type) = $self->so->find_terms(-identifier => $type); } else { #looks like a name ($feature_type) = $self->so->find_terms(-name => $type); } if(!$feature_type){ $self->throw("Validation Error: couldn't find ontology term for '$type'."); } $fta->term($feature_type); } else { if($type =~ /^\D+:\d+$/){ #looks like an identifier $fta->identifier($type) } else { $fta->name($type); } } $feat->type($fta); my %attr = (); chomp $attribute_string; unless ( $attribute_string eq '.' ) { my @attributes = split ';', $attribute_string; foreach my $attribute (@attributes){ my($key,$values) = split '=', $attribute; # remove leading and trailing quotes from values $values =~ s/^["']//; $values =~ s/["']$//; #' terminate the quote for emacs my @values = map{uri_unescape($_)} split ',', $values; #minor hack to allow for multiple instances of the same tag if ($attr{$key}) { my @tmparray = @{$attr{$key}}; push @tmparray, @values; $attr{$key} = [@tmparray]; } else { $attr{$key} = [@values]; } } } #Handle Dbxref attributes if($attr{Dbxref} or $attr{dbxref}){ foreach my $value (@{ $attr{Dbxref} }, @{ $attr{dbxref} }){ my $a = Bio::Annotation::DBLink->new(); my($db,$accession) = $value =~ /^(.+?):(.+)$/; if(!$db or !$accession){ #dbxref malformed $self->throw("Error in line:\n$feature_string\nDbxref value '$value' did not conform to GFF3 specification"); next; } $a->database($db); $a->primary_id($accession); $feat->add_Annotation('Dbxref',$a); } } #Handle Ontology_term attributes if($attr{Ontology_term}){ foreach my $id (@{ $attr{Ontology_term} }){ my $a = Bio::Annotation::OntologyTerm->new(); if($self->validate()){ my $ont_name = Bio::Ontology::OntologyStore->guess_ontology($id); my $ont = Bio::Ontology::OntologyStore->get_ontology($ont_name); my($term) = $ont->find_terms(-identifier => $id); $a->term($term); } else { $a->identifier($id); } $feat->add_Annotation('Ontology_term',$a); } } #Handle Gap attributes if($attr{Gap}){ for my $value (@{ $attr{Gap} }) { my $a = Bio::Annotation::SimpleValue->new(); $a->value($value); $feat->add_Annotation('Gap',$a); } } #Handle Target attributes if($attr{Target}){ my $target_collection = Bio::Annotation::Collection->new(); foreach my $target_string (@{ $attr{Target} } ) { #only replace + for space if + has been used in place of it #that is, + could also mean plus strand, and we don't want #to accidentally remove it #presumably you can't use + for space and + for strand in the same string. $target_string =~ s/\+/ /g unless $target_string =~ / /; my ($t_id,$tstart,$tend,$strand,$extra) = split /\s+/, $target_string; if (!$tend || $extra) { # too much or too little stuff in the string $self->throw("The value in the Target string, $target_string, does not conform to the GFF3 specification"); } my $a = Bio::Annotation::Target->new( -target_id => $t_id, -start => $tstart, -end => $tend, ); if ($strand && $strand eq '+') { $strand = 1; } elsif ($strand && $strand eq '-') { $strand = -1; } else { $strand = ''; } $a->strand($strand) if $strand; $feat->add_Annotation('Target',$a); } } #Handle ID attribute. May only have one ID, throw error otherwise if($attr{ID}){ if(scalar( @{ $attr{ID} } ) > 1){ $self->throw("Error in line:\n$feature_string\nA feature may have at most one ID value"); } #ID's must be unique in the file if ($self->{'allIDs'}->{${$attr{ID}}[0]} && $self->validate()) { $self->throw("Validation Error: The ID ${$attr{ID}}[0] occurs more than once in the file, but should be unique"); } $self->{'allIDs'}->{${$attr{ID}}[0]} = 1; my $a = Bio::Annotation::SimpleValue->new(); $a->value( @{ $attr{ID} }[0] ); $feat->add_Annotation('ID',$a); } #Handle Name attribute. May only have one Name, throw error otherwise if($attr{Name}){ if(scalar( @{ $attr{Name} } ) > 1){ $self->throw("Error in line:\n$feature_string\nA feature may have at most one Name value"); } my $a = Bio::Annotation::SimpleValue->new(); $a->value( @{ $attr{Name} }[0] ); $feat->add_Annotation('Name',$a); } foreach my $other_canonical (qw(Alias Parent Note Derives_from Index CRUD)){ if($attr{$other_canonical}){ foreach my $value (@{ $attr{$other_canonical} }){ my $a = Bio::Annotation::SimpleValue->new(); $a->value($value); $feat->add_Annotation($other_canonical,$a); } } } my @non_reserved_tags = grep {/^[a-z]/} keys %attr; foreach my $non_reserved_tag (@non_reserved_tags) { next if ($non_reserved_tag eq 'dbxref'); foreach my $value (@{ $attr{$non_reserved_tag} }){ $feat = $self->_handle_non_reserved_tag($feat,$non_reserved_tag,$value); } } my @illegal_tags = grep {!/($RESERVED_TAGS)/} grep {/^[A-Z]/} keys %attr; if (@illegal_tags > 0) { my $tags = join(", ", @illegal_tags); $self->throw("The following tag(s) are illegal and are causing this parser to die: $tags"); } return $feat; } =head2 _handle_non_reserved_tag() Usage : $self->_handle_non_reserved_tag($feature,$tag,$value) Function: Deal with non-reserved word tags in the ninth column Returns : An updated Bio::SeqFeature::Annotated object Args : A Bio::SeqFeature::Annotated and a tag/value pair Note that this method can be overridden in a subclass to provide special handling of non-reserved word tags. =cut sub _handle_non_reserved_tag { my $self = shift; my ($feat,$tag,$value) = @_; # to customize through subclassing and overriding: #if ($tag eq 'someTagOfInterest') { # do something different # else { do what is below my $a; if ($tag eq 'comment') { $a = Bio::Annotation::Comment->new(); } else { $a = Bio::Annotation::SimpleValue->new(); } $a->value($value); $feat->add_Annotation($tag,$a); return $feat; } =head2 organism Gets/sets the organims from the organism directive =cut sub organism { my $self = shift; my $organism = shift if @_; return $self->{'organism'} = $organism if defined($organism); return $self->{'organism'}; } =head1 _write_feature_1() write a feature in GFF v1 format. currently not implemented. =cut sub _write_feature_1 { my($self,$feature) = @_; $self->throw(sprintf("write_feature unimplemented for GFF version %s",$self->version)); } =head1 _write_feature_2() write a feature in GFF v2 format. currently not implemented. =cut sub _write_feature_2 { my($self,$feature) = @_; $self->throw(sprintf("write_feature unimplemented for GFF version %s",$self->version)); } =head1 _write_feature_25() write a feature in GFF v2.5 (aka GTF) format. =cut sub _write_feature_25 { my($self,$feature,$group) = @_; #the top-level feature is an aggregate of all subfeatures my ($transcript_id, $gene_id) = (($feature->get_Annotations('transcript_id'))[0], ($feature->get_Annotations('gene_id'))[0]); if(!defined($group)){ $group = ($feature->get_Annotations('ID'))[0]; $transcript_id ||= $group; $gene_id ||= $group; } my $seq = ref($feature->seq_id) ? $feature->seq_id->value : $feature->seq_id; my $source = $feature->source->value; my $type = $feature->type->name; $type = 'EXON' if $type eq 'exon'; #a GTF peculiarity, incosistent with the sequence ontology. my $min = $feature->start || '.'; my $max = $feature->end || '.'; my $strand = $feature->strand == 1 ? '+' : $feature->strand == -1 ? '-' : '.'; my $score = defined($feature->score) ? (ref($feature->score) ? $feature->score->value : $feature->score) : '.'; # score is optional my $frame = defined($feature->frame) ? (ref($feature->frame) ? $feature->frame->value : $feature->frame) : (ref($feature->phase) ? $feature->phase->value : $feature->phase); #these are the only valid types in a GTF document if($type eq 'EXON' or $type eq 'CDS' or $type eq 'start_codon' or $type eq 'stop_codon'){ my $attr = sprintf('gene_id "%s"; transcript_id "%s";',$gene_id ? $gene_id->value : '',$transcript_id ? $transcript_id->value : ''); my $outstring = sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $seq,$source,$type,$min,$max,$score,$strand,$frame eq '.' ? 0 : $frame,$attr); $self->_print($outstring); } foreach my $subfeat ($feature->get_SeqFeatures){ $self->_write_feature_25($subfeat,$group); } } =head1 _write_feature_3() write a feature in GFF v3 format. =cut sub _write_feature_3 { my($self,$feature) = @_; my $seq = ref($feature->seq_id) ? $feature->seq_id->value : $feature->seq_id; my $source; if ($feature->source()) { $source = $feature->source->value; } else { $source = $feature->source() || "unknownsource"; } my $type; if ($feature->type()) { $type = $feature->type->name; } else { $type = "region"; } my $min = $feature->start || '.'; my $max = $feature->end || '.'; my $strand = $feature->strand == 1 ? '+' : $feature->strand == -1 ? '-' : '.'; my $score = defined($feature->score) ? (ref($feature->score) ? $feature->score->value : $feature->score) : undef; my $phase = defined($feature->phase) ? (ref($feature->phase) ? $feature->phase->value : $feature->phase) : undef; my @attr; if(my @v = ($feature->get_Annotations('Name'))){ my $vstring = join ',', map {uri_escape($_->value)} @v; push @attr, "Name=$vstring"; } if(my @v = ($feature->get_Annotations('ID'))){ my $vstring = join ',', map {uri_escape($_->value)} @v; push @attr, "ID=$vstring"; $self->throw('GFF3 features may have at most one ID, feature with these IDs is invalid:\n'.$vstring) if scalar(@v) > 1; } if(my @v = ($feature->get_Annotations('Parent'))){ my $vstring = join ',', map {uri_escape($_->value)} @v; push @attr, "Parent=$vstring"; } if(my @v = ($feature->get_Annotations('dblink'))){ my $vstring = join ',', map {uri_escape($_->database .':'. $_->primary_id)} @v; push @attr, "Dbxref=$vstring"; } if(my @v = ($feature->get_Annotations('ontology_term'))){ my $vstring = join ',', map {uri_escape($_->identifier)} @v; push @attr, "Ontology_term=$vstring"; } if(my @v = ($feature->get_Annotations('comment'))){ my $vstring = join ',', map {uri_escape($_->text)} @v; push @attr, "Note=$vstring"; } if(my @v = ($feature->get_Annotations('Target'))){ my %strand_map = ( 1=>'+', 0=>'', -1=>'-', '+' => '+', '-' => '-' ); my $vstring = join ',', map { uri_escape($_->target_id).' '.$_->start.' '.$_->end.(defined $_->strand ? ' '.$strand_map{$_->strand} : '') } @v; push @attr, "Target=$vstring"; } my $attr = join ';', @attr; my $outstring = sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $seq,$source,$type,$min,$max,$score,$strand,$phase,$attr); $self->_print($outstring); foreach my $subfeat ($feature->get_SeqFeatures){ $self->_write_feature_3($subfeat); } } 1; gtf.pm100644000765000024 412012402372043 20570 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/FeatureIO# $Id: gtf.pm 16108 2009-09-16 17:07:49Z cjfields $ # # BioPerl module for Bio::FeatureIO::gtf # # Please direct questions and support issues to # # Cared for by Allen Day # # Copyright Allen Day # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::FeatureIO::gtf - read write features in GTF format =head1 SYNOPSIS L =head1 DESCRIPTION GTF, is also known as GFF v2.5. This class is simply a subclass of Bio::FeatureIO::gff that initializes with -version =E 2.5. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Allen Day Email allenday@ucla.edu =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::FeatureIO::gtf; BEGIN { $Bio::FeatureIO::gtf::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::gtf::VERSION = '1.6.905'; use base qw(Bio::FeatureIO::gff); use strict; # Object preamble - inherits from Bio::Root::Root sub _initialize { my($self,%arg) = @_; $arg{-version} = 2.5; $self->SUPER::_initialize(%arg); return 1; } 1; ptt.pm100644000765000024 1357412402372043 20654 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/FeatureIO=pod =head1 NAME Bio::FeatureIO::ptt - read/write features in PTT format =head1 SYNOPSIS # read features my $fin = Bio::FeatureIO->new(-file=>'genes.ptt', -format=>'ptt'); my @cds; while (my $f = $fin->next_feature) { push @cds, $f if $f->strand > 0; } # write features (NOT IMPLEMENTED) my $fout = Bio::FeatureIO->new(-fh=>\*STDOUT, -format=>'ptt'); for my $f (@cds) { $fout->write_feature($f); } =head1 DESCRIPTION The PTT file format is a table of protein features. It is used mainly by NCBI who produce PTT files for all their published genomes found in L. It has the following format: =over 4 =item Line 1 Description of sequence to which the features belong eg. "Leptospira interrogans chromosome II, complete sequence - 0..358943" It is usually equivalent to the DEFINITION line of a Genbank file, with the length of the sequence appended. It is unclear why "0" is used as a starting range, it should be "1". =item Line 2 Number of feature lines in the table eg. "367 proteins" =item Line 3 Column headers, tab separated eg. "Location Strand Length PID Gene Synonym Code COG Product" Location : "begin..end" span of feature Strand : "+" or "-" Length : number of amino acids excluding the stop codon PID : analogous to Genbank /db_xref="GI:xxxxxxxxx" Gene : analogous to Genbank /gene="xxxx" Synonym : analogous to Genbank /locus_tag="xxxx" Synonym : analogous to Genbank /locus_tag="xxxx" COG : CDD COG code with COG letter categories appended Product : analogous to Genbank /product="xxxx" =item Line 4 onwards Feature lines, nine columns, tab separated, "-" used for empty fields eg. "2491..3423 + 310 24217063 metF LB002 - COG0685E 5,10-methylenetetrahydrofolate reductase" =back =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Torsten Seemann Email torsten.seemann AT infotech.monash.edu.au =head1 CONTRIBUTORS Based on bed.pm and gff.pm by Allen Day. =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::FeatureIO::ptt; BEGIN { $Bio::FeatureIO::ptt::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::ptt::VERSION = '1.6.905'; use strict; use base qw(Bio::FeatureIO); use Bio::SeqFeature::Generic; # map tab-separated column number to field name our %NAME_OF = ( 0 => 'Location', 1 => 'Strand', 2 => 'Length', 3 => 'PID', 4 => 'Gene', 5 => 'Synonym', 6 => 'Code', 7 => 'COG', 8 => 'Product', ); our $NUM_COL = 9; =head2 _initialize Title : _initialize Function: Reading? parses the header of the input Writing? =cut sub _initialize { my($self,%arg) = @_; $self->SUPER::_initialize(%arg); if ($self->mode eq 'r') { # Line 1 my $desc = $self->_readline(); chomp $desc; $self->description($desc); # Line 2 my $line = $self->_readline(); $line =~ m/^(\d+) proteins/ or $self->throw("Invalid protein count"); $self->protein_count($1); # Line 3 $self->_readline(); } } =head2 next_feature Title : next_feature Usage : $io->next_feature() Function: read the next feature from the PTT file Example : Args : Returns : Bio::SeqFeatureI object =cut sub next_feature { my $self = shift; $self->mode eq 'r' || return; # returns if can't read next_feature when we're in write mode my $line = $self->_readline() or return; # returns if end of file, no more features? chomp $line; my @col = split m/\t/, $line; @col==$NUM_COL or $self->throw("Too many columns for PTT line"); $col[0] =~ m/(\d+)\.\.(\d+)/ or $self->throw("Invalid location (column 1)"); my $feat = Bio::SeqFeature::Generic->new(-start=>$1, -end=>$2, -primary=>'CDS'); $col[1] =~ m/^([+-])$/ or $self->throw("Invalid strand (column 2)"); $feat->strand($1 eq '+' ? +1 : -1); for my $i (2 .. $NUM_COL-1) { $feat->add_tag_value($NAME_OF{$i}, $col[$i]) if $col[$i] ne '-'; } return $feat; } =head2 write_feature (NOT IMPLEMENTED) Title : write_feature Usage : $io->write_feature($feature) Function: write a Bio::SeqFeatureI object in PTT format Example : Args : Bio::SeqFeatureI object Returns : =cut sub write_feature { shift->throw_not_implemented; } =head2 description Title : description Usage : $obj->description($newval) Function: set/get the PTT file description for/from line one Example : Returns : value of description (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub description { my $self = shift; return $self->{'description'} = shift if @_; return $self->{'description'}; } =head2 protein_count Title : protein_count Usage : $obj->protein_count($newval) Function: set/get the PTT protein count for/from line two Example : Args : on set, new value (a scalar or undef, optional) Returns : value of protein_count (a scalar) =cut sub protein_count { my $self = shift; return $self->{'protein_count'} = shift if @_; return $self->{'protein_count'}; } 1; release-pod-coverage.t100644000765000024 57212402372043 20760 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t#!perl BEGIN { unless ($ENV{RELEASE_TESTING}) { require Test::More; Test::More::plan(skip_all => 'these tests are for release candidate testing'); } } # This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests. use Test::Pod::Coverage 1.08; use Pod::Coverage::TrustPod; all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' }); interpro.pm100644000765000024 1276612402372043 21711 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/FeatureIO =head1 NAME Bio::FeatureIO::interpro - read features from InterPro XML =head1 SYNOPSIS my $in = Bio::FeatureIO(-format=>'interpro'); while (my $feat = $in->next_feature) { # do something with the Bio::SeqFeatureI object } =head1 DESCRIPTION See L. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Allen Day Email allenday@ucla.edu =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::FeatureIO::interpro; BEGIN { $Bio::FeatureIO::interpro::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::interpro::VERSION = '1.6.905'; use strict; use base qw(Bio::FeatureIO); use Bio::SeqFeature::Annotated; use Bio::OntologyIO; use Bio::Annotation::Comment; use Bio::Annotation::DBLink; use Bio::Annotation::OntologyTerm; use Bio::Annotation::SimpleValue; use Bio::Annotation::Target; use URI::Escape; use XML::DOM; use XML::DOM::XPath; sub _initialize { my($self,%arg) = @_; $self->SUPER::_initialize(%arg); $self->xml_parser(XML::DOM::Parser->new()); my $buf; while(($buf = $self->_readline()) && $buf !~ /_pushback($buf); } sub next_feature { my $self =shift; my $buf; #line buffer my $ok = 0; #true if there is another record in stream my $record; #holds the record to be parsed and returned. #try to dump buffer from last record before moving on to next record my $f = $self->_shift_feature_buffer(); if($f){ return $f; } while(my $buf = $self->_readline()){ $ok = 1 if $buf =~ m!!; } return unless $ok; my $dom = $self->xml_parser->parse($record); my ($pNode) = $dom->findnodes('/protein'); my @iNodes = $pNode->findnodes('/protein/interpro'); foreach my $iNode (@iNodes){ my @cNodes = $iNode->findnodes('classification'); my @mNodes = $iNode->findnodes('match'); #we don't handle these #my @nNodes = $iNode->findnodes('contains'); #my @fNodes = $iNode->findnodes('found_in'); foreach my $mNode (@mNodes){ my @lNodes = $mNode->findnodes('location'); foreach my $lNode (@lNodes){ my $feature = Bio::SeqFeature::Annotated->new( -start => $lNode->getAttribute('start'), -end => $lNode->getAttribute('end'), -score => $lNode->getAttribute('score'), # -seq_id => $pNode->getAttribute('id'), ); $feature->seq_id->value($pNode->getAttribute('id')); #warn $pNode->getAttribute('id'); $feature->source( $lNode->getAttribute('evidence') ); my $t = Bio::Annotation::OntologyTerm->new(-identifier => 'SO:0000417', -name => 'polypeptide_domain'); $feature->add_Annotation('type',$t); my $c = Bio::Annotation::Comment->new(-tagname => 'comment', -text => $iNode->getAttribute('name')); $feature->add_Annotation($c); my $d = Bio::Annotation::DBLink->new(); $d->database($mNode->getAttribute('dbname')); $d->primary_id($mNode->getAttribute('id')); $d->optional_id($mNode->getAttribute('name')); $feature->annotation->add_Annotation('dblink',$d); my $s = Bio::Annotation::SimpleValue->new(-tagname => 'status', -value => $lNode->getAttribute('status')); $feature->annotation->add_Annotation($s); foreach my $cNode (@cNodes){ my $o = Bio::Annotation::OntologyTerm->new(-identifier => $cNode->getAttribute('id')); $feature->annotation->add_Annotation('ontology_term',$o); } $self->_push_feature_buffer($feature); } } } return $self->_shift_feature_buffer; } =head2 _push_feature_buffer() Usage : Function: Returns : Args : =cut sub _push_feature_buffer { my ($self,$f) = @_; if(ref($f)){ push @{ $self->{feature_buffer} }, $f; } } =head2 _shift_feature_buffer() Usage : Function: Returns : Args : =cut sub _shift_feature_buffer { my ($self) = @_; return $self->{feature_buffer} ? shift @{ $self->{feature_buffer} } : undef; } =head2 xml_parser() Usage : $obj->xml_parser($newval) Function: Example : Returns : value of xml_parser (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub xml_parser { my($self,$val) = @_; $self->{'xml_parser'} = $val if defined($val); return $self->{'xml_parser'}; } 1; SeqFeature000755000765000024 012402372043 17536 5ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/BioAnnotated.pm100644000765000024 10052512402372043 22214 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/SeqFeature# $Id$ # # BioPerl module for Bio::SeqFeature::Annotated # # Please direct questions and support issues to # # Cared for by Allen Day # # Copyright Allen Day # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::SeqFeature::Annotated - PLEASE PUT SOMETHING HERE =head1 SYNOPSIS # none yet, complain to authors =head1 DESCRIPTION None yet, complain to authors. =head1 Implemented Interfaces This class implements the following interfaces. =over 4 =item Bio::SeqFeatureI Note that this includes implementing Bio::RangeI. =item Bio::AnnotatableI =item Bio::FeatureHolderI Features held by a feature are essentially sub-features. =back =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to one of the Bioperl mailing lists. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Allen Day Allen Day Eallenday at ucla.eduE =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::SeqFeature::Annotated; BEGIN { $Bio::SeqFeature::Annotated::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::SeqFeature::Annotated::VERSION = '1.6.905'; use strict; use Bio::Annotation::Collection; use Bio::Annotation::OntologyTerm; use Bio::Annotation::Target; use Bio::LocatableSeq; use Bio::Location::Simple; use Bio::Ontology::OntologyStore; use Bio::Tools::GFF; use Bio::SeqFeature::AnnotationAdaptor; use Data::Dumper; use URI::Escape; use base qw(Bio::Root::Root Bio::SeqFeature::TypedSeqFeatureI Bio::AnnotatableI Bio::FeatureHolderI); our %tagclass = ( comment => 'Bio::Annotation::Comment', dblink => 'Bio::Annotation::DBLink', description => 'Bio::Annotation::SimpleValue', gene_name => 'Bio::Annotation::SimpleValue', ontology_term => 'Bio::Annotation::OntologyTerm', reference => 'Bio::Annotation::Reference', __DEFAULT__ => 'Bio::Annotation::SimpleValue', ); our %tag2text = ( 'Bio::Annotation::Comment' => 'text', 'Bio::Annotation::DBLink' => 'primary_id', 'Bio::Annotation::SimpleValue' => 'value', 'Bio::Annotation::SimpleValue' => 'value', 'Bio::Annotation::OntologyTerm' => 'name', 'Bio::Annotation::Reference' => 'title', __DEFAULT__ => 'value', ); ###################################### #get_SeqFeatures #display_name #primary_tag #source_tag x with warning #has_tag #get_tag_values #get_tagset_values #get_all_tags #attach_seq #seq x #entire_seq x #seq_id #gff_string #_static_gff_handler #start x #end x #strand x #location #primary_id =head1 PREAMBLE Okay, where to start... The original idea for this class appears to lump all SeqFeatureI data (primary_tag, source_tag, etc) into AnnotationI objects into an Bio::Annotation::Collection. The type is then checked against SOFA. There have been several requests to have type checking be optionally run. Bio::FeatureHolderI::create_hierarchy_from_ParentIDs Bio::FeatureHolderI::feature_count Bio::FeatureHolderI::get_all_SeqFeatures Bio::FeatureHolderI::set_ParentIDs_from_hierarchy Bio::RangeI::contains Bio::RangeI::disconnected_ranges Bio::RangeI::equals Bio::RangeI::intersection Bio::RangeI::offsetStranded Bio::RangeI::overlap_extent Bio::RangeI::overlaps Bio::RangeI::subtract Bio::RangeI::union Bio::SeqFeature::Annotated::Dumper Bio::SeqFeature::Annotated::MAX_TYPE_CACHE_MEMBERS Bio::SeqFeature::Annotated::add_Annotation Bio::SeqFeature::Annotated::add_SeqFeature Bio::SeqFeature::Annotated::add_tag_value Bio::SeqFeature::Annotated::add_target Bio::SeqFeature::Annotated::annotation Bio::SeqFeature::Annotated::attach_seq Bio::SeqFeature::Annotated::display_name Bio::SeqFeature::Annotated::each_target Bio::SeqFeature::Annotated::end Bio::SeqFeature::Annotated::entire_seq Bio::SeqFeature::Annotated::frame Bio::SeqFeature::Annotated::from_feature Bio::SeqFeature::Annotated::get_Annotations Bio::SeqFeature::Annotated::get_SeqFeatures Bio::SeqFeature::Annotated::get_all_tags Bio::SeqFeature::Annotated::get_tag_values Bio::SeqFeature::Annotated::get_tagset_values Bio::SeqFeature::Annotated::has_tag Bio::SeqFeature::Annotated::length Bio::SeqFeature::Annotated::location Bio::SeqFeature::Annotated::name Bio::SeqFeature::Annotated::new Bio::SeqFeature::Annotated::phase Bio::SeqFeature::Annotated::primary_tag Bio::SeqFeature::Annotated::remove_Annotations Bio::SeqFeature::Annotated::remove_SeqFeatures Bio::SeqFeature::Annotated::remove_tag Bio::SeqFeature::Annotated::score Bio::SeqFeature::Annotated::seq Bio::SeqFeature::Annotated::seq_id Bio::SeqFeature::Annotated::source Bio::SeqFeature::Annotated::source_tag Bio::SeqFeature::Annotated::start Bio::SeqFeature::Annotated::strand Bio::SeqFeature::Annotated::type Bio::SeqFeature::Annotated::uri_escape Bio::SeqFeature::Annotated::uri_unescape Bio::SeqFeature::TypedSeqFeatureI::croak Bio::SeqFeature::TypedSeqFeatureI::ontology_term Bio::SeqFeatureI::generate_unique_persistent_id Bio::SeqFeatureI::gff_string Bio::SeqFeatureI::primary_id Bio::SeqFeatureI::spliced_seq =cut sub new { my ( $caller, @args) = @_; my ($self) = $caller->SUPER::new(@args); $self->_initialize(@args); return $self; } sub _initialize { my ($self,@args) = @_; my ($start, $end, $strand, $frame, $phase, $score, $name, $annot, $location, $display_name, # deprecate $seq_id, $type,$source,$feature ) = $self->_rearrange([qw(START END STRAND FRAME PHASE SCORE NAME ANNOTATION LOCATION DISPLAY_NAME SEQ_ID TYPE SOURCE FEATURE )], @args); defined $start && $self->start($start); defined $end && $self->end($end); defined $strand && $self->strand($strand); defined $frame && $self->frame($frame); defined $phase && $self->phase($phase); defined $score && $self->score($score); defined $source && ref($source) ? $self->source($source) : $self->source_tag($source); defined $type && ref($type) ? $self->type($type) : $self->primary_tag($type); defined $location && $self->location($location); defined $annot && $self->annotation($annot); defined $feature && $self->from_feature($feature); if( defined($display_name) && defined($name) ){ $self->throw('Cannot define (-id and -seq_id) or (-name and -display_name) attributes'); } defined $seq_id && $self->seq_id($seq_id); defined ($name || $display_name) && $self->name($name || $display_name); } =head1 ATTRIBUTE ACCESSORS FOR Bio::SeqFeature::Annotated =cut =head2 from_feature Usage: $obj->from_feature($myfeature); Desc : initialize this object with the contents of another feature object. Useful for converting objects like L to this class Ret : nothing meaningful Args : a single object of some other feature type, Side Effects: throws error on failure Example: =cut sub from_feature { my ($self,$feat,%opts) = @_; # should deal with any SeqFeatureI implementation (i.e. we don't want to # automatically force a OO-heavy implementation on all classes) ref($feat) && ($feat->isa('Bio::SeqFeatureI')) or $self->throw('invalid arguments to from_feature'); #TODO: add overrides in opts for these values, so people don't have to screw up their feature object #if they don't want to ### set most of the data foreach my $fieldname (qw/ start end strand frame score location seq_id source_tag primary_tag/) { #no strict 'refs'; #using symbolic refs, yes, but using them for methods is allowed now $self->$fieldname( $feat->$fieldname ); } # now pick up the annotations/tags of the other feature # We'll use AnnotationAdaptor to convert everything over my %no_copy = map {$_ => 1} qw/seq_id source type frame phase score/; my $adaptor = Bio::SeqFeature::AnnotationAdaptor->new(-feature => $feat); for my $key ( $adaptor->get_all_annotation_keys() ) { next if $no_copy{$key}; my @values = $adaptor->get_Annotations($key); @values = _aggregate_scalar_annotations(\%opts,$key,@values); foreach my $val (@values) { $self->add_Annotation($key,$val) } } } #given a key and its values, make the values into #Bio::Annotation::\w+ objects sub _aggregate_scalar_annotations { my ($opts,$key,@values) = @_; #anything that's not an object, make it a SimpleValue @values = map { ref($_) ? $_ : Bio::Annotation::SimpleValue->new(-value => $_) } @values; #try to make Target objects if($key eq 'Target' && (@values == 3 || @values == 4) && @values == grep {$_->isa('Bio::Annotation::SimpleValue')} @values ) { @values = map {$_->value} @values; #make a strand if it doesn't have one, enforcing start <= end if(@values == 3) { if($values[1] <= $values[2]) { $values[3] = '+'; } else { @values[1,2] = @values[2,1]; $values[3] = '-'; } } return ( Bio::Annotation::Target->new( -target_id => $values[0], -start => $values[1], -end => $values[2], -strand => $values[3], ) ); } #try to make DBLink objects elsif($key eq 'dblink' || $key eq 'Dbxref') { return map { if( /:/ ) { #convert to a DBLink if it has a colon in it my ($db,$id) = split /:/,$_->value; Bio::Annotation::DBLink->new( -database => $db, -primary_id => $id, ); } else { #otherwise leave as a SimpleValue $_ } } @values; } #make OntologyTerm objects elsif($key eq 'Ontology_term') { return map { Bio::Annotation::OntologyTerm->new(-identifier => $_->value) } @values } #make Comment objects elsif($key eq 'comment') { return map { Bio::Annotation::Comment->new( -text => $_->value ) } @values; } return @values; } =head2 seq_id() Usage : $obj->seq_id($newval) Function: holds a string corresponding to the unique seq_id of the sequence underlying the feature (e.g. database accession or primary key). Returns : string representing the seq_id. Args : on set, some string or a Bio::Annotation::SimpleValue object. =cut sub seq_id { my($self,$val) = @_; if (defined($val)) { my $term = undef; if (!ref($val)) { $term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val)); } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) { $term = $val; } if (!defined($term) || ($term->value =~ /^>/)) { $self->throw('give seq_id() a scalar or Bio::Annotation::SimpleValue object, not '.$val); } $self->remove_Annotations('seq_id'); $self->add_Annotation('seq_id', $term); } $self->seq_id('.') unless $self->get_Annotations('seq_id'); # make sure we always have something return ($self->get_Annotations('seq_id'))[0]->value; } =head2 name() Usage : $obj->name($newval) Function: human-readable name for the feature. Returns : value of name (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub name { my($self,$val) = @_; $self->{'name'} = $val if defined($val); return $self->{'name'}; } =head2 type() Usage : $obj->type($newval) Function: a SOFA type for the feature. Returns : Bio::Annotation::OntologyTerm object representing the type. NB: to get a string, use primary_tag(). Args : on set, Bio::Annotation::OntologyTerm object. NB: to set a string (SOFA name or identifier), use primary_tag() =cut use constant MAX_TYPE_CACHE_MEMBERS => 20; sub type { my($self,$val) = @_; if(defined($val)){ my $term = undef; if(!ref($val)){ $self->throw("give type() a Bio::Annotation::OntologyTerm object, not a string"); } elsif(ref($val) && $val->isa('Bio::Annotation::OntologyTerm')){ $term = $val; } else { #we have the wrong type of object $self->throw('give type() a SOFA term name, identifier, or Bio::Annotation::OntologyTerm object, not '.$val); } $self->remove_Annotations('type'); $self->add_Annotation('type',$term); } return $self->get_Annotations('type'); } =head2 source() Usage : $obj->source($newval) Function: holds the source of the feature. Returns : a Bio::Annotation::SimpleValue representing the source. NB: to get a string, use source_tag() Args : on set, a Bio::Annotation::SimpleValue object. NB: to set a string, use source_tag() =cut sub source { my($self,$val) = @_; if (defined($val)) { my $term; if (!ref($val)) { $self->throw("give source() a Bio::Annotation::SimpleValue object, not a string"); #$term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val)); } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) { $term = $val; } else { $self->throw('give source() a scalar or Bio::Annotation::SimpleValue object, not '.$val); } $self->remove_Annotations('source'); $self->add_Annotation('source', $term); } unless ($self->get_Annotations('source')) { $self->source(Bio::Annotation::SimpleValue->new(-value => '.')); } return $self->get_Annotations('source'); } =head2 score() Usage : $score = $feat->score() $feat->score($score) Function: holds a value corresponding to the score of the feature. Returns : a string representing the score. Args : on set, a scalar or a Bio::Annotation::SimpleValue object. =cut sub score { my $self = shift; my $val = shift; if(defined($val)){ my $term = undef; if (!ref($val)) { $term = Bio::Annotation::SimpleValue->new(-value => $val); } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) { $term = $val; } if ($term->value ne '.' && (!defined($term) || ($term->value !~ /^[+-]?\d+\.?\d*(e-\d+)?/))) { $self->throw("'$val' is not a valid score"); } $self->remove_Annotations('score'); $self->add_Annotation('score', $term); } $self->score('.') unless scalar($self->get_Annotations('score')); # make sure we always have something return ($self->get_Annotations('score'))[0]->display_text; } =head2 phase() Usage : $phase = $feat->phase() $feat->phase($phase) Function: get/set on phase information Returns : a string 0,1,2,'.' Args : on set, one of 0,1,2,'.' or a Bio::Annotation::SimpleValue object holding one of 0,1,2,'.' as its value. =cut sub phase { my $self = shift; my $val = shift; if(defined($val)){ my $term = undef; if (!ref($val)) { $term = Bio::Annotation::SimpleValue->new(-value => $val); } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) { $term = $val; } if (!defined($term) || ($term->value !~ /^[0-2.]$/)) { $self->throw("'$val' is not a valid phase"); } $self->remove_Annotations('phase'); $self->add_Annotation('phase', $term); } $self->phase('.') unless $self->get_Annotations('phase'); # make sure we always have something return ($self->get_Annotations('phase'))[0]->value; } =head2 frame() Usage : $frame = $feat->frame() $feat->frame($phase) Function: get/set on phase information Returns : a string 0,1,2,'.' Args : on set, one of 0,1,2,'.' or a Bio::Annotation::SimpleValue object holding one of 0,1,2,'.' as its value. =cut sub frame { my $self = shift; my $val = shift; if(defined($val)){ my $term = undef; if (!ref($val)) { $term = Bio::Annotation::SimpleValue->new(-value => $val); } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) { $term = $val; } if (!defined($term) || ($term->value !~ /^[0-2.]$/)) { $self->throw("'$val' is not a valid frame"); } $self->remove_Annotations('frame'); $self->add_Annotation('frame', $term); } $self->frame('.') unless $self->get_Annotations('frame'); # make sure we always have something return ($self->get_Annotations('frame'))[0]->value; } ############################################################ =head1 SHORTCUT METHODS TO ACCESS Bio::AnnotatableI INTERFACE METHODS =cut =head2 add_Annotation() Usage : Function: $obj->add_Annotation() is a shortcut to $obj->annotation->add_Annotation Returns : Args : =cut sub add_Annotation { my ($self,@args) = @_; return $self->annotation->add_Annotation(@args); } =head2 remove_Annotations() Usage : Function: $obj->remove_Annotations() is a shortcut to $obj->annotation->remove_Annotations Returns : Args : =cut sub remove_Annotations { my ($self,@args) = @_; return $self->annotation->remove_Annotations(@args); } ############################################################ =head1 INTERFACE METHODS FOR Bio::SeqFeatureI Note that no methods are deprecated. Any SeqFeatureI methods must return strings (no objects). =cut =head2 display_name() =cut sub display_name { my $self = shift; return $self->name(@_); } =head2 primary_tag() =cut sub primary_tag { my $self = shift; if (@_) { my $val = shift; my $term; if(!ref($val) && $val){ #we have a plain text annotation coming in. try to map it to SOFA. our %__type_cache; #a little cache of plaintext types we've already seen #clear our cache if it gets too big if(scalar(keys %__type_cache) > MAX_TYPE_CACHE_MEMBERS) { %__type_cache = (); } #set $term to either a cached value, or look up a new one, throwing #up if not found my $anntext = $val; if ($__type_cache{$anntext}) { $term = $__type_cache{$anntext}; } else { my $sofa = Bio::Ontology::OntologyStore->get_instance->get_ontology('Sequence Ontology OBO'); my ($soterm) = $anntext =~ /^\D+:\d+$/ #does it look like an ident? ? ($sofa->find_terms(-identifier => $anntext))[0] #yes, lookup by ident : ($sofa->find_terms(-name => $anntext))[0]; #no, lookup by name #throw if it's not in SOFA unless($soterm){ $self->throw("couldn't find a SOFA term matching type '$val'."); } my $newterm = Bio::Annotation::OntologyTerm->new; $newterm->term($soterm); $term = $newterm; } $self->type($term); } } my $t = $self->type() || return; return $t->name; } =head2 source_tag() =cut sub source_tag { my $self = shift; if (@_) { my $val = shift; if(!ref($val) && $val){ my $term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val)); $self->source($term); } } my $t = $self->source() || return; return $t->display_text; } =head2 attach_seq() Usage : $sf->attach_seq($seq) Function: Attaches a Bio::Seq object to this feature. This Bio::Seq object is for the *entire* sequence: ie from 1 to 10000 Returns : TRUE on success Args : a Bio::PrimarySeqI compliant object =cut sub attach_seq { my ($self, $seq) = @_; if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) { $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures"); } $self->{'seq'} = $seq; # attach to sub features if they want it foreach ( $self->get_SeqFeatures() ) { $_->attach_seq($seq); } return 1; } =head2 seq() Usage : $tseq = $sf->seq() Function: returns a truncated version of seq() with bounds matching this feature Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence bounded by start & end, or undef if there is no sequence attached Args : none =cut sub seq { my ($self) = @_; return unless defined($self->entire_seq()); my $seq = $self->entire_seq->trunc($self->start(), $self->end()); if ( defined $self->strand && $self->strand == -1 ) { $seq = $seq->revcom; } return $seq; } =head2 entire_seq() Usage : $whole_seq = $sf->entire_seq() Function: gives the entire sequence that this seqfeature is attached to Returns : a Bio::PrimarySeqI compliant object, or undef if there is no sequence attached Args : none =cut sub entire_seq { return shift->{'seq'}; } ############################################################ =head1 INTERFACE METHODS FOR Bio::RangeI as inherited via Bio::SeqFeatureI =cut =head2 length() Usage : $feature->length() Function: Get the feature length computed as $feat->end - $feat->start + 1 Returns : integer Args : none =cut sub length { my $self = shift; return $self->end() - $self->start() + 1; } =head2 start() Usage : $obj->start($newval) Function: Get/set on the start coordinate of the feature Returns : integer Args : on set, new value (a scalar or undef, optional) =cut sub start { my ($self,$value) = @_; return $self->location->start($value); } =head2 end() Usage : $obj->end($newval) Function: Get/set on the end coordinate of the feature Returns : integer Args : on set, new value (a scalar or undef, optional) =cut sub end { my ($self,$value) = @_; return $self->location->end($value); } =head2 strand() Usage : $strand = $feat->strand($newval) Function: get/set on strand information, being 1,-1 or 0 Returns : -1,1 or 0 Args : on set, new value (a scalar or undef, optional) =cut sub strand { my $self = shift; return $self->location->strand(@_); } ############################################################ =head1 INTERFACE METHODS FOR Bio::FeatureHolderI This includes methods for retrieving, adding, and removing features. Since this is already a feature, features held by this feature holder are essentially sub-features. =cut =head2 get_SeqFeatures Usage : @feats = $feat->get_SeqFeatures(); Function: Returns an array of Bio::SeqFeatureI objects Returns : An array Args : none =cut sub get_SeqFeatures { return @{ shift->{'sub_array'} || []}; } =head2 add_SeqFeature() Usage : $feat->add_SeqFeature($subfeat); $feat->add_SeqFeature($subfeat,'EXPAND') Function: adds a SeqFeature into the subSeqFeature array. with no 'EXPAND' qualifer, subfeat will be tested as to whether it lies inside the parent, and throw an exception if not. If EXPAND is used, the parent''s start/end/strand will be adjusted so that it grows to accommodate the new subFeature Example : Returns : nothing Args : a Bio::SeqFeatureI object =cut sub add_SeqFeature { my ($self,$val, $expand) = @_; return unless $val; if ((!ref($val)) || !$val->isa('Bio::SeqFeatureI') ) { $self->throw((ref($val) ? ref($val) : $val) ." does not implement Bio::SeqFeatureI."); } if($expand && ($expand eq 'EXPAND')) { $self->_expand_region($val); } else { if ( !$self->contains($val) ) { $self->warn("$val is not contained within parent feature, and expansion is not valid, ignoring."); return; } } push(@{$self->{'sub_array'}},$val); } =head2 remove_SeqFeatures() Usage : $obj->remove_SeqFeatures Function: Removes all sub SeqFeatures. If you want to remove only a subset, remove that subset from the returned array, and add back the rest. Returns : The array of Bio::SeqFeatureI implementing sub-features that was deleted from this feature. Args : none =cut sub remove_SeqFeatures { my ($self) = @_; my @subfeats = @{$self->{'sub_array'} || []}; $self->{'sub_array'} = []; # zap the array. return @subfeats; } ############################################################ =head1 INTERFACE METHODS FOR Bio::AnnotatableI =cut =head2 annotation() Usage : $obj->annotation($annot_obj) Function: Get/set the annotation collection object for annotating this feature. Returns : A Bio::AnnotationCollectionI object Args : newvalue (optional) =cut sub annotation { my ($obj,$value) = @_; # we are smart if someone references the object and there hasn't been # one set yet if(defined $value || ! defined $obj->{'annotation'} ) { $value = Bio::Annotation::Collection->new() unless ( defined $value ); $obj->{'annotation'} = $value; } return $obj->{'annotation'}; } ############################################################ =head2 location() Usage : my $location = $seqfeature->location() Function: returns a location object suitable for identifying location of feature on sequence or parent feature Returns : Bio::LocationI object Args : [optional] Bio::LocationI object to set the value to. =cut sub location { my($self, $value ) = @_; if (defined($value)) { unless (ref($value) and $value->isa('Bio::LocationI')) { $self->throw("object $value pretends to be a location but ". "does not implement Bio::LocationI"); } $self->{'location'} = $value; } elsif (! $self->{'location'}) { # guarantees a real location object is returned every time $self->{'location'} = Bio::Location::Simple->new(); } return $self->{'location'}; } =head2 add_target() Usage : $seqfeature->add_target(Bio::LocatableSeq->new(...)); Function: adds a target location on another reference sequence for this feature Returns : true on success Args : a Bio::LocatableSeq object =cut sub add_target { my ($self,$seq) = @_; $self->throw("$seq is not a Bio::LocatableSeq, bailing out") unless ref($seq) and seq->isa('Bio::LocatableSeq'); push @{ $self->{'targets'} }, $seq; return $seq; } =head2 each_target() Usage : @targets = $seqfeature->each_target(); Function: Returns a list of Bio::LocatableSeqs which are the locations of this object. To obtain the "primary" location, see L. Returns : a list of 0..N Bio::LocatableSeq objects Args : none =cut sub each_target { my ($self) = @_; return $self->{'targets'} ? @{ $self->{'targets'} } : (); } =head2 _expand_region Title : _expand_region Usage : $self->_expand_region($feature); Function: Expand the total region covered by this feature to accomodate for the given feature. May be called whenever any kind of subfeature is added to this feature. add_SeqFeature() already does this. Returns : Args : A Bio::SeqFeatureI implementing object. =cut sub _expand_region { my ($self, $feat) = @_; if(! $feat->isa('Bio::SeqFeatureI')) { $self->warn("$feat does not implement Bio::SeqFeatureI"); } # if this doesn't have start/end set - forget it! if((! defined($self->start())) && (! defined $self->end())) { $self->start($feat->start()); $self->end($feat->end()); $self->strand($feat->strand) unless defined($self->strand()); # $self->strand($feat->strand) unless $self->strand(); } else { my $range = $self->union($feat); $self->start($range->start); $self->end($range->end); $self->strand($range->strand); } } =head2 get_Annotations Usage : my $parent = $obj->get_Annotations('Parent'); my @parents = $obj->get_Annotations('Parent'); Function: a wrapper around Bio::Annotation::Collection::get_Annotations(). Returns : returns annotations as Bio::Annotation::Collection::get_Annotations() does, but additionally returns a single scalar in scalar context instead of list context so that if an annotation tag contains only a single value, you can do: $parent = $feature->get_Annotations('Parent'); instead of: ($parent) = ($feature->get_Annotations('Parent'))[0]; if the 'Parent' tag has multiple values and is called in a scalar context, the number of annotations is returned. Args : an annotation tag name. =cut sub get_Annotations { my $self = shift; my @annotations = $self->annotation->get_Annotations(@_); if(wantarray){ return @annotations; } elsif(scalar(@annotations) == 1){ return $annotations[0]; } else { return scalar(@annotations); } } =head1 Bio::SeqFeatureI implemented methods These are specialized implementations of SeqFeatureI methods which call the internal Bio::Annotation::AnnotationCollection object. Just prior to the 1.5 release the below methods were moved from Bio::SeqFeatureI to Bio::AnnotatableI, and having Bio::SeqFeatureI inherit Bio::AnnotatableI. This behavior forced all Bio::SeqFeatureI-implementing classes to use Bio::AnnotationI objects for any data. It is the consensus of the core developers that this be rolled back in favor of a more flexible approach by rolling back the above changes and making this class Bio::AnnotatableI. The SeqFeatureI tag-related methods are reimplemented in order to approximate the same behavior as before. The methods below allow mapping of the "get_tag_values()"-style annotation access to Bio::AnnotationCollectionI. These need not be implemented in a Bio::AnnotationCollectionI compliant class, as they are built on top of the methods. For usage, see Bio::SeqFeatureI. =cut =head2 has_tag =cut sub has_tag { my ($self,$tag) = @_; return scalar($self->annotation->get_Annotations($tag)); } =head2 add_tag_value =cut sub add_tag_value { my ($self,$tag,@vals) = @_; foreach my $val (@vals){ my $class = $tagclass{$tag} || $tagclass{__DEFAULT__}; my $slot = $tag2text{$class}; my $a = $class->new(); $a->$slot($val); $self->annotation->add_Annotation($tag,$a); } return 1; } =head2 get_tag_values Usage : @annotations = $obj->get_tag_values($tag) Function: returns annotations corresponding to $tag Returns : a list of scalars Args : tag name =cut sub get_tag_values { my ($self,$tag) = @_; if(!$tagclass{$tag} && $self->annotation->get_Annotations($tag)){ #new tag, haven't seen it yet but it exists. add to registry my($proto) = $self->annotation->get_Annotations($tag); # we can only register if there's a method known for obtaining the value if (exists($tag2text{ref($proto)})) { $tagclass{$tag} = ref($proto); } } my $slot = $tag2text{ $tagclass{$tag} || $tagclass{__DEFAULT__} }; return map { $_->$slot } $self->annotation->get_Annotations($tag); } =head2 get_tagset_values Usage : @annotations = $obj->get_tagset_values($tag1,$tag2) Function: returns annotations corresponding to a list of tags. this is a convenience method equivalent to multiple calls to get_tag_values with each tag in the list. Returns : a list of Bio::AnnotationI objects. Args : a list of tag names =cut sub get_tagset_values { my ($self,@tags) = @_; my @r = (); foreach my $tag (@tags){ my $slot = $tag2text{ $tagclass{$tag} || $tagclass{__DEFAULT__} }; push @r, map { $_->$slot } $self->annotation->get_Annotations($tag); } return @r; } =head2 get_all_tags Usage : @tags = $obj->get_all_tags() Function: returns a list of annotation tag names. Returns : a list of tag names Args : none =cut sub get_all_tags { my ($self,@args) = @_; return $self->annotation->get_all_annotation_keys(@args); } =head2 remove_tag Usage : See remove_Annotations(). Function: Returns : Args : Note : Contrary to what the name suggests, this method removes all annotations corresponding to $tag, not just a single anntoation. =cut sub remove_tag { my ($self,@args) = @_; return $self->annotation->remove_Annotations(@args); } 1; release-SeqFeature000755000765000024 012402372043 20140 5ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/tAnnotated.t100644000765000024 563712402372043 22415 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/release-SeqFeature BEGIN { unless ($ENV{RELEASE_TESTING}) { require Test::More; Test::More::plan(skip_all => 'these tests are for release candidate testing'); } } # -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use Bio::Root::Test; test_begin( -requires_modules => [qw(URI::Escape Graph::Directed)], ); use_ok('Bio::SeqFeature::Generic'); use_ok('Bio::SeqFeature::Annotated'); } my $sfa = Bio::SeqFeature::Annotated->new(-start => 1, -end => 5, -strand => "+", -frame => 2, -type => 'nucleotide_motif', -phase => 2, -score => 12, -source => 'program_b', -display_name => 'test.annot', -seq_id => 'test.displayname' ); isa_ok($sfa, "Bio::SeqFeatureI",'isa SeqFeatureI'); isa_ok($sfa, "Bio::AnnotatableI",'isa AnnotatableI'); ok (defined $sfa); my $loc = $sfa->location; ok $loc->isa("Bio::Location::Simple"); ok $sfa->display_name eq 'test.annot'; #test bsfa::from_feature my $sfg = Bio::SeqFeature::Generic->new ( -start => 400, -end => 440, -strand => 1, -primary => 'nucleotide_motif', -source => 'program_a', -tag => { silly => 20, new => 1 } ); my $sfa2; $sfa2 = Bio::SeqFeature::Annotated->new(-feature => $sfg); is $sfa2->type->name,'nucleotide_motif'; is $sfa2->primary_tag, 'nucleotide_motif'; is $sfa2->source->display_text,'program_a'; is $sfa2->source_tag,'program_a'; is $sfa2->strand,1; is $sfa2->start,400; is $sfa2->end,440; is $sfa2->get_Annotations('silly')->value,20; is $sfa2->get_Annotations('new')->value,1; my $sfaa = Bio::SeqFeature::Annotated->new(-feature => $sfa); is $sfaa->type->name,'nucleotide_motif'; is $sfaa->primary_tag, 'nucleotide_motif'; is $sfaa->source->display_text,'program_b'; is $sfaa->source_tag,'program_b'; is $sfaa->strand,1; is $sfaa->start,1; is $sfaa->end,5; is $sfaa->score,12; my $sfa3 = Bio::SeqFeature::Annotated->new( -start => 1, -end => 5, -strand => "+", -frame => 2, -phase => 2, -score => 12, -display_name => 'test.annot', -seq_id => 'test.displayname' ); $sfa3->from_feature($sfg); is $sfa3->type->name,'nucleotide_motif', 'type->name'; is $sfa3->primary_tag, 'nucleotide_motif', 'primary_tag'; is $sfa3->source->display_text,'program_a'; is $sfa3->source_tag,'program_a'; is $sfa3->strand,1; is $sfa3->start,400; is $sfa3->end,440; is $sfa3->get_Annotations('silly')->value,20; is $sfa3->get_Annotations('new')->value,1; # Note there is an API conflict with SeqFeature::Generic, where score is a # simple scalar, and here it is a Bio::Annotation::SimpleValue # By popular vote there is no operator overloading, so this needs to be # resolved is $sfa3->score(), 12; $sfa3->score(11); is $sfa3->score(), 11; $sfa3->score(0); is $sfa3->score(), 0; # test that setting to 0 no longer is overriddent to set score to '.' (fixed in Bio::SeqFeature::Annotated version 1.3.7) done_testing(); exit; vecscreen_simple.test_output100644000765000024 165512402372043 23405 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/t/data>Vector C02HBa0064B17.2 Screen: VecScreen Database: NCBI UniVec vector sequences No hits found >Vector C02HBa0066C13.1 Screen: VecScreen Database: NCBI UniVec vector sequences No hits found >Vector C02HBa0072A04.1 Screen: VecScreen Database: NCBI UniVec vector sequences Moderate match 60522 60548 >Vector SL_FOS91h17_SP6_0 Screen: VecScreen Database: NCBI UniVec vector sequences Strong match 60 122 >Vector SL_FOS91h18_T7_0 Screen: VecScreen Database: NCBI UniVec vector sequences Strong match 35 102 Moderate match 76 103 Weak match 82 104 Suspect origin 1 34 >Vector SL_FOS91i01_SP6_0 Screen: VecScreen Database: NCBI UniVec vector sequences Strong match 46 110 Suspect origin 1 45 >Vector SL_FOS92b12_T7_0 Screen: VecScreen Database: NCBI UniVec vector sequences Strong match 41 108 Moderate match 82 109 Weak match 88 110 1313 1329 Suspect origin 1 40 1330 1334 vecscreen_simple.pm100644000765000024 1163712402372043 23371 0ustar00cjfieldsstaff000000000000Bio-FeatureIO-1.6.905/lib/Bio/FeatureIO=pod =head1 NAME Bio::FeatureIO::vecscreen_simple - read/write features from NCBI vecscreen -f 3 output =head1 SYNOPSIS # read features my $fin = Bio::FeatureIO->new(-file=>'vecscreen.out', -format=>'vecscreen_simple'); my @vec_regions; while (my $f = $fin->next_feature) { push @vec_regions, $f; } # write features NOT IMPLEMENTED =head1 DESCRIPTION vecscreen is a system for quickly identifying segments of a nucleic acid sequence that may be of vector origin. NCBI developed vecscreen to minimize the incidence and impact of vector contamination in public sequence databases. GenBank Annotation Staff use vecscreen to verify that sequences submitted for inclusion in the database are free from contaminating vector sequence. Any sequence can be screened for vector contamination using vecscreen. This module provides parsing for vecscreen '-f 3' output, described in the vecscreen documentation as 'Text list, no alignments' =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Robert Buels Email rmb32 AT cornell.edu =head1 CONTRIBUTORS Based on ptt.pm by Torsten Seeman =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::FeatureIO::vecscreen_simple; BEGIN { $Bio::FeatureIO::vecscreen_simple::AUTHORITY = 'cpan:BIOPERLML'; } $Bio::FeatureIO::vecscreen_simple::VERSION = '1.6.905'; use strict; use base qw(Bio::FeatureIO); use Bio::SeqFeature::Generic; =head2 _initialize Title : _initialize Function: Reading? parses the header of the input Writing? =cut sub _initialize { my($self,%arg) = @_; $self->SUPER::_initialize(%arg); if ($self->mode eq 'r') { $self->{parse_state}->{seqname} = ''; $self->{parse_state}->{matchtype} = ''; } else { $self->throw('vecscreen_simple feature writing not implemented'); } } =head2 next_feature Title : next_feature Usage : $io->next_feature() Function: read the next feature from the vecscreen output file Args : none Returns : Bio::SeqFeatureI object =cut sub next_feature { my $self = shift; return unless $self->mode eq 'r'; # returns if can't read next_feature when we're in write mode while ( my $line = $self->_readline() ) { chomp $line; if ( $line =~ /^>Vector (\S+)/ ) { $self->{parse_state}{seqname} = $1; } elsif ( $line =~ /^\s*WARNING/ ) { $self->warn("$self->{parse_state}{seqname}: vecscreen says: $line\n"); } elsif ( $line =~ /\S/ ) { $self->{parse_state}{seqname} or $self->throw("Unexpected line in vecscreen output '$line'"); #if it's not a vector line, it should be either a match type or #a coordinates line my $lcline = lc $line; if ( $line =~ /^(\d+)\s+(\d+)\s*$/ ) { my ($s,$e) = ($1,$2); my $matchtype = $self->{parse_state}{matchtype}; $matchtype =~ s/\s/_/g; #replace whitespace with underscores for the primary tag return Bio::SeqFeature::Generic->new( -start => $s, -end => $e, -primary => $matchtype, -seq_id => $self->{parse_state}{seqname}, ); } elsif ( $lcline eq 'no hits found' ) { $self->{parse_state}{seqname} = ''; } elsif ( grep $lcline eq $_, 'strong match', 'moderate match', 'weak match', 'suspect origin') { $self->{parse_state}{matchtype} = $lcline; } else { $self->throw("Parse error. Expected a match type or coordinate line but got '$line'"); } } else { #blank line, ignore it and reset parser $self->{parse_state}{seqname} = ''; #< a line with whitespace #indicates a boundary #between output for #different sequences $self->{parse_state}{matchtype} = ''; } } return; } =head2 write_feature (NOT IMPLEMENTED) Title : write_feature Usage : $io->write_feature($feature) Function: write a Bio::SeqFeatureI object in vecscreen -f 3 format Example : Args : Bio::SeqFeatureI object Returns : =cut sub write_feature { shift->throw_not_implemented; } ### 1;#do not remove ###