Bio-Variation-1.7.5000755000766000024 013604710571 14243 5ustar00cjfieldsstaff000000000000Changes100644000766000024 104513604710571 15617 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5Summary of important user-visible changes for Bio-Variation ----------------------------------------------------------- 1.7.5 2020-01-06 13:58:44-06:00 America/Chicago * Remove Bio::DB::EMBL and Bio::DB::NCBIHelper dependencies, which cause a circular dependency (see issue #3) 1.7.4 2019-12-07 16:56:21-06:00 America/Chicago * Add named module (very simple for now) * Minor release to address CPAN indexing and permissions issues 1.7.3 2019-12-07 16:40:04-06:00 America/Chicago * First release after split from bioperl-live. t000755000766000024 013604710571 14427 5ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5SNP.t100644000766000024 121513604710571 15413 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 13); use_ok('Bio::Variation::SNP'); } my($a); # # SNP # ok $a = Bio::Variation::SNP->new(); is $a->id('123'), 123; eval { $a->di('123'); }; ok $@; is $a->validated('by-cluster'), 'by-cluster'; my @alleles = ('A', 'T'); is $a->validated(\@alleles), \@alleles; is $a->desc('abc'), 'abc'; # Bio::Variation::Allele method is $a->chromosome('X'), 'X'; # Bio::Variation::Allele method ok my $s = $a->add_subsnp; ok $s->is_subsnp; is $s->handle('HGBASE'), 'HGBASE'; ok $a->add_subsnp; is $a->each_subsnp, 2; LICENSE100644000766000024 4371313604710571 15361 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5This software is copyright (c) 2020 by See individual modules. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Terms of the Perl programming language system itself a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- This software is Copyright (c) 2020 by See individual modules. This is free software, licensed under: The GNU General Public License, Version 1, February 1989 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! --- The Artistic License 1.0 --- This software is Copyright (c) 2020 by See individual modules. This is free software, licensed under: The Artistic License 1.0 The Artistic License Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: - "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. - "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder. - "Copyright Holder" is whoever is named in the copyright or copyrights for the package. - "You" is you, if you're thinking about copying or distributing this Package. - "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) - "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as ftp.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) accompany any non-standard executables with their corresponding Standard Version executables, giving the non-standard executables non-standard names, and clearly documenting the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. 7. C or perl subroutines supplied by you and linked into this Package shall not be considered part of this Package. 8. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End dist.ini100644000766000024 50613604710571 15751 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5name = Bio-Variation version = 1.7.5 author = Allen Day author = Heikki Lehvaslaiho copyright_holder = See individual modules license = Perl_5 ;; Modules should be fixed so that these don't have to be removed. [@BioPerl] -remove = PodWeaver -remove = Test::EOL -remove = Test::NoTabs META.yml100644000766000024 1637213604710571 15626 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5--- abstract: 'BioPerl variation-related functionality' author: - 'Allen Day ' - 'Heikki Lehvaslaiho ' build_requires: Bio::Root::Test: '0' File::Spec: '0' IO::Handle: '0' IPC::Open3: '0' Test::More: '0' lib: '0' perl: '5.006' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 0 generated_by: 'Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Bio-Variation requires: Bio::DBLinkContainerI: '0' Bio::PrimarySeq: '0' Bio::Root::IO: '0' Bio::Root::Root: '0' Bio::SeqFeature::Generic: '0' Bio::SeqIO: '0' Bio::Tools::CodonTable: '0' Getopt::Long: '0' IO::String: '0' Text::Wrap: '0' XML::Twig: '0' XML::Writer: '0.4' base: '0' constant: '0' strict: '0' utf8: '0' vars: '0' warnings: '0' resources: bugtracker: https://github.com/bioperl/bio-variation/issues homepage: https://metacpan.org/release/Bio-Variation repository: git://github.com/bioperl/bio-variation.git version: 1.7.5 x_Dist_Zilla: perl: version: '5.028001' plugins: - class: Dist::Zilla::Plugin::GatherDir config: Dist::Zilla::Plugin::GatherDir: exclude_filename: [] exclude_match: [] follow_symlinks: 0 include_dotfiles: 0 prefix: '' prune_directory: [] root: . name: '@BioPerl/@Filter/GatherDir' version: '6.012' - class: Dist::Zilla::Plugin::PruneCruft name: '@BioPerl/@Filter/PruneCruft' version: '6.012' - class: Dist::Zilla::Plugin::ManifestSkip name: '@BioPerl/@Filter/ManifestSkip' version: '6.012' - class: Dist::Zilla::Plugin::MetaYAML name: '@BioPerl/@Filter/MetaYAML' version: '6.012' - class: Dist::Zilla::Plugin::License name: '@BioPerl/@Filter/License' version: '6.012' - class: Dist::Zilla::Plugin::ExtraTests name: '@BioPerl/@Filter/ExtraTests' version: '6.012' - class: Dist::Zilla::Plugin::ExecDir name: '@BioPerl/@Filter/ExecDir' version: '6.012' - class: Dist::Zilla::Plugin::ShareDir name: '@BioPerl/@Filter/ShareDir' version: '6.012' - class: Dist::Zilla::Plugin::MakeMaker config: Dist::Zilla::Role::TestRunner: default_jobs: 1 name: '@BioPerl/@Filter/MakeMaker' version: '6.012' - class: Dist::Zilla::Plugin::Manifest name: '@BioPerl/@Filter/Manifest' version: '6.012' - class: Dist::Zilla::Plugin::TestRelease name: '@BioPerl/@Filter/TestRelease' version: '6.012' - class: Dist::Zilla::Plugin::ConfirmRelease name: '@BioPerl/@Filter/ConfirmRelease' version: '6.012' - class: Dist::Zilla::Plugin::UploadToCPAN name: '@BioPerl/@Filter/UploadToCPAN' version: '6.012' - class: Dist::Zilla::Plugin::MetaConfig name: '@BioPerl/MetaConfig' version: '6.012' - class: Dist::Zilla::Plugin::MetaJSON name: '@BioPerl/MetaJSON' version: '6.012' - class: Dist::Zilla::Plugin::PkgVersion name: '@BioPerl/PkgVersion' version: '6.012' - class: Dist::Zilla::Plugin::PodSyntaxTests name: '@BioPerl/PodSyntaxTests' version: '6.012' - class: Dist::Zilla::Plugin::Test::Compile config: Dist::Zilla::Plugin::Test::Compile: bail_out_on_fail: '0' fail_on_warning: author fake_home: 0 filename: t/00-compile.t module_finder: - ':InstallModules' needs_display: 0 phase: test script_finder: - ':PerlExecFiles' skips: [] switch: [] name: '@BioPerl/Test::Compile' version: '2.058' - class: Dist::Zilla::Plugin::PodCoverageTests name: '@BioPerl/PodCoverageTests' version: '6.012' - class: Dist::Zilla::Plugin::MojibakeTests name: '@BioPerl/MojibakeTests' version: '0.8' - class: Dist::Zilla::Plugin::AutoPrereqs name: '@BioPerl/AutoPrereqs' version: '6.012' - class: Dist::Zilla::Plugin::AutoMetaResources name: '@BioPerl/AutoMetaResources' version: '1.21' - class: Dist::Zilla::Plugin::MetaResources name: '@BioPerl/MetaResources' version: '6.012' - class: Dist::Zilla::Plugin::Encoding name: '@BioPerl/Encoding' version: '6.012' - class: Dist::Zilla::Plugin::NextRelease name: '@BioPerl/NextRelease' version: '6.012' - class: Dist::Zilla::Plugin::Git::Check config: Dist::Zilla::Plugin::Git::Check: untracked_files: die Dist::Zilla::Role::Git::DirtyFiles: allow_dirty: - Changes - dist.ini allow_dirty_match: [] changelog: Changes Dist::Zilla::Role::Git::Repo: git_version: 2.20.1 repo_root: . name: '@BioPerl/Git::Check' version: '2.045' - class: Dist::Zilla::Plugin::Git::Commit config: Dist::Zilla::Plugin::Git::Commit: add_files_in: [] commit_msg: v%v%n%n%c Dist::Zilla::Role::Git::DirtyFiles: allow_dirty: - Changes - dist.ini allow_dirty_match: [] changelog: Changes Dist::Zilla::Role::Git::Repo: git_version: 2.20.1 repo_root: . Dist::Zilla::Role::Git::StringFormatter: time_zone: local name: '@BioPerl/Git::Commit' version: '2.045' - class: Dist::Zilla::Plugin::Git::Tag config: Dist::Zilla::Plugin::Git::Tag: branch: ~ changelog: Changes signed: 0 tag: Bio-Variation-v1.7.5 tag_format: '%N-v%v' tag_message: '%N-v%v' Dist::Zilla::Role::Git::Repo: git_version: 2.20.1 repo_root: . Dist::Zilla::Role::Git::StringFormatter: time_zone: local name: '@BioPerl/Git::Tag' version: '2.045' - class: Dist::Zilla::Plugin::FinderCode name: ':InstallModules' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':IncModules' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':TestFiles' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':ExtraTestFiles' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':ExecFiles' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':PerlExecFiles' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':ShareFiles' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':MainModule' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':AllFiles' version: '6.012' - class: Dist::Zilla::Plugin::FinderCode name: ':NoFiles' version: '6.012' zilla: class: Dist::Zilla::Dist::Builder config: is_trial: '0' version: '6.012' x_generated_by_perl: v5.28.1 x_serialization_backend: 'YAML::Tiny version 1.73' MANIFEST100644000766000024 154113604710571 15456 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.012. Changes LICENSE MANIFEST META.json META.yml Makefile.PL bin/bp_flanks dist.ini lib/Bio/Variation.pm lib/Bio/Variation/AAChange.pm lib/Bio/Variation/AAReverseMutate.pm lib/Bio/Variation/Allele.pm lib/Bio/Variation/DNAMutation.pm lib/Bio/Variation/IO.pm lib/Bio/Variation/IO/flat.pm lib/Bio/Variation/IO/xml.pm lib/Bio/Variation/RNAChange.pm lib/Bio/Variation/SNP.pm lib/Bio/Variation/SeqDiff.pm lib/Bio/Variation/VariantI.pm t/00-compile.t t/AAChange.t t/AAReverseMutate.t t/Allele.t t/DNAMutation.t t/RNAChange.t t/SNP.t t/SeqDiff.t t/Variation_IO.t t/author-mojibake.t t/author-pod-coverage.t t/author-pod-syntax.t t/data/mutations.dat t/data/mutations.old.dat t/data/mutations.old.xml t/data/mutations.xml t/data/polymorphism.dat t/data/polymorphism.old.xml t/data/polymorphism.xml META.json100644000766000024 2665513604710571 16003 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5{ "abstract" : "BioPerl variation-related functionality", "author" : [ "Allen Day ", "Heikki Lehvaslaiho " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150010", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Bio-Variation", "prereqs" : { "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "develop" : { "requires" : { "Pod::Coverage::TrustPod" : "0", "Test::Mojibake" : "0", "Test::Pod" : "1.41", "Test::Pod::Coverage" : "1.08" } }, "runtime" : { "requires" : { "Bio::DBLinkContainerI" : "0", "Bio::PrimarySeq" : "0", "Bio::Root::IO" : "0", "Bio::Root::Root" : "0", "Bio::SeqFeature::Generic" : "0", "Bio::SeqIO" : "0", "Bio::Tools::CodonTable" : "0", "Getopt::Long" : "0", "IO::String" : "0", "Text::Wrap" : "0", "XML::Twig" : "0", "XML::Writer" : "0.4", "base" : "0", "constant" : "0", "strict" : "0", "utf8" : "0", "vars" : "0", "warnings" : "0" } }, "test" : { "requires" : { "Bio::Root::Test" : "0", "File::Spec" : "0", "IO::Handle" : "0", "IPC::Open3" : "0", "Test::More" : "0", "lib" : "0", "perl" : "5.006" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "mailto" : "bioperl-l@bioperl.org", "web" : "https://github.com/bioperl/bio-variation/issues" }, "homepage" : "https://metacpan.org/release/Bio-Variation", "repository" : { "type" : "git", "url" : "git://github.com/bioperl/bio-variation.git", "web" : "https://github.com/bioperl/bio-variation" } }, "version" : "1.7.5", "x_Dist_Zilla" : { "perl" : { "version" : "5.028001" }, "plugins" : [ { "class" : "Dist::Zilla::Plugin::GatherDir", "config" : { "Dist::Zilla::Plugin::GatherDir" : { "exclude_filename" : [], "exclude_match" : [], "follow_symlinks" : 0, "include_dotfiles" : 0, "prefix" : "", "prune_directory" : [], "root" : "." } }, "name" : "@BioPerl/@Filter/GatherDir", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::PruneCruft", "name" : "@BioPerl/@Filter/PruneCruft", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::ManifestSkip", "name" : "@BioPerl/@Filter/ManifestSkip", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::MetaYAML", "name" : "@BioPerl/@Filter/MetaYAML", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::License", "name" : "@BioPerl/@Filter/License", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::ExtraTests", "name" : "@BioPerl/@Filter/ExtraTests", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::ExecDir", "name" : "@BioPerl/@Filter/ExecDir", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::ShareDir", "name" : "@BioPerl/@Filter/ShareDir", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::MakeMaker", "config" : { "Dist::Zilla::Role::TestRunner" : { "default_jobs" : 1 } }, "name" : "@BioPerl/@Filter/MakeMaker", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::Manifest", "name" : "@BioPerl/@Filter/Manifest", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::TestRelease", "name" : "@BioPerl/@Filter/TestRelease", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::ConfirmRelease", "name" : "@BioPerl/@Filter/ConfirmRelease", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::UploadToCPAN", "name" : "@BioPerl/@Filter/UploadToCPAN", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::MetaConfig", "name" : "@BioPerl/MetaConfig", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::MetaJSON", "name" : "@BioPerl/MetaJSON", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::PkgVersion", "name" : "@BioPerl/PkgVersion", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::PodSyntaxTests", "name" : "@BioPerl/PodSyntaxTests", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::Test::Compile", "config" : { "Dist::Zilla::Plugin::Test::Compile" : { "bail_out_on_fail" : 0, "fail_on_warning" : "author", "fake_home" : 0, "filename" : "t/00-compile.t", "module_finder" : [ ":InstallModules" ], "needs_display" : 0, "phase" : "test", "script_finder" : [ ":PerlExecFiles" ], "skips" : [], "switch" : [] } }, "name" : "@BioPerl/Test::Compile", "version" : "2.058" }, { "class" : "Dist::Zilla::Plugin::PodCoverageTests", "name" : "@BioPerl/PodCoverageTests", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::MojibakeTests", "name" : "@BioPerl/MojibakeTests", "version" : "0.8" }, { "class" : "Dist::Zilla::Plugin::AutoPrereqs", "name" : "@BioPerl/AutoPrereqs", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::AutoMetaResources", "name" : "@BioPerl/AutoMetaResources", "version" : "1.21" }, { "class" : "Dist::Zilla::Plugin::MetaResources", "name" : "@BioPerl/MetaResources", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::Encoding", "name" : "@BioPerl/Encoding", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::NextRelease", "name" : "@BioPerl/NextRelease", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::Git::Check", "config" : { "Dist::Zilla::Plugin::Git::Check" : { "untracked_files" : "die" }, "Dist::Zilla::Role::Git::DirtyFiles" : { "allow_dirty" : [ "Changes", "dist.ini" ], "allow_dirty_match" : [], "changelog" : "Changes" }, "Dist::Zilla::Role::Git::Repo" : { "git_version" : "2.20.1", "repo_root" : "." } }, "name" : "@BioPerl/Git::Check", "version" : "2.045" }, { "class" : "Dist::Zilla::Plugin::Git::Commit", "config" : { "Dist::Zilla::Plugin::Git::Commit" : { "add_files_in" : [], "commit_msg" : "v%v%n%n%c" }, "Dist::Zilla::Role::Git::DirtyFiles" : { "allow_dirty" : [ "Changes", "dist.ini" ], "allow_dirty_match" : [], "changelog" : "Changes" }, "Dist::Zilla::Role::Git::Repo" : { "git_version" : "2.20.1", "repo_root" : "." }, "Dist::Zilla::Role::Git::StringFormatter" : { "time_zone" : "local" } }, "name" : "@BioPerl/Git::Commit", "version" : "2.045" }, { "class" : "Dist::Zilla::Plugin::Git::Tag", "config" : { "Dist::Zilla::Plugin::Git::Tag" : { "branch" : null, "changelog" : "Changes", "signed" : 0, "tag" : "Bio-Variation-v1.7.5", "tag_format" : "%N-v%v", "tag_message" : "%N-v%v" }, "Dist::Zilla::Role::Git::Repo" : { "git_version" : "2.20.1", "repo_root" : "." }, "Dist::Zilla::Role::Git::StringFormatter" : { "time_zone" : "local" } }, "name" : "@BioPerl/Git::Tag", "version" : "2.045" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":InstallModules", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":IncModules", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":TestFiles", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":ExtraTestFiles", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":ExecFiles", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":PerlExecFiles", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":ShareFiles", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":MainModule", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":AllFiles", "version" : "6.012" }, { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":NoFiles", "version" : "6.012" } ], "zilla" : { "class" : "Dist::Zilla::Dist::Builder", "config" : { "is_trial" : 0 }, "version" : "6.012" } }, "x_generated_by_perl" : "v5.28.1", "x_serialization_backend" : "Cpanel::JSON::XS version 4.09" } Allele.t100644000766000024 157113604710571 16156 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 14); use_ok('Bio::Variation::Allele'); } my($a,$trunc,$rev); $a = Bio::Variation::Allele->new(-seq=>'ACTGACTGACTG', -display_id => 'new-id', -alphabet => 'dna', -accession_number => 'X677667', -desc=>'Sample Bio::Seq object'); isa_ok($a, 'Bio::Variation::Allele'); is $a->accession_number(), 'X677667'; is $a->seq(), 'ACTGACTGACTG'; is $a->display_id(),'new-id' ; is $a->desc, 'Sample Bio::Seq object'; is $a->alphabet(), 'dna'; ok defined($trunc = $a->trunc(1,4)); is $trunc->seq(), 'ACTG'; ok defined($rev = $a->revcom()); is $rev->seq(), 'CAGTCAGTCAGT'; $a->is_reference(1); ok $a->is_reference; $a->repeat_unit('ACTG'); is $a->repeat_unit, 'ACTG'; $a->repeat_count(3); is $a->repeat_count, 3; SeqDiff.t100644000766000024 436113604710571 16301 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 44); use_ok('Bio::Variation::SeqDiff'); use_ok('Bio::Variation::DNAMutation'); use_ok('Bio::Variation::Allele'); } my ($obj, $mm, $aa, $dna, $m); ok $obj = Bio::Variation::SeqDiff->new(); ok $obj->id('id'); is $obj->id, 'id'; ok $obj->sysname('sysname'); is $obj->sysname, 'sysname'; $obj->trivname('trivname'); is $obj->trivname, 'trivname'; ok $obj->chromosome('chr'); is $obj->chromosome, 'chr'; ok $obj->description('desc'); is $obj->description, 'desc'; ok $obj->numbering('numbering'); is $obj->numbering, 'numbering'; ok $obj->offset(100); is $obj->offset, 100; # 12345678901234567890 ok $obj->dna_ori ('gctgctgatcgatcgtagctagctag'); is $obj->dna_ori, 'gctgctgatcgatcgtagctagctag'; # generate mutated DNA seq from the mutation ok $m = Bio::Variation::DNAMutation->new(-isMutation => 1, -start=>14, -end=>14); ok $a = Bio::Variation::Allele->new(-seq=>'c'); $b = Bio::Variation::Allele->new(-seq=>'g'); ok $m->allele_ori($a); ok $m->allele_mut($b); ok $obj->add_Variant($m); my $m2 = Bio::Variation::DNAMutation->new(-isMutation => 1, -start=>19, -end=>19); my $a2 = Bio::Variation::Allele->new(-seq=>'c'); my $b2 = Bio::Variation::Allele->new(-seq=>'g'); $m2->allele_ori($a2); $m2->allele_mut($b2); $obj->add_Variant($m2); #ok $obj->dna_mut('gctgctgatcggtcgtagctagctag'); is $obj->dna_mut, 'gctgctgatcgatggtaggtagctag'; ok $obj->rna_ori('gctgctgatcgatcgtagctagctag'); is $obj->rna_ori, 'gctgctgatcgatcgtagctagctag'; $obj->rna_mut('gctgctgatcgatcgtagctagctag'); is $obj->rna_mut, 'gctgctgatcgatcgtagctagctag'; ok $obj->aa_ori('MHYTRD'); is $obj->aa_ori, 'MHYTRD'; ok $obj->aa_mut('MHGTRD'); is $obj->aa_mut, 'MHGTRD'; foreach $mm ( $obj->each_Variant ) { $mm->primary_tag('a'); isa_ok($mm,'Bio::Variation::VariantI'); } ok $obj->gene_symbol('fos'); is $obj->gene_symbol, 'fos'; ok $obj->rna_offset(10); is $obj->rna_offset, 10; ok $obj->rna_id('transcript#3'); is $obj->rna_id, 'transcript#3'; ok $dna = $obj->seqobj('dna_ori'); isa_ok($dna,'Bio::PrimarySeq'); $obj->aa_mut(''); $aa = $obj->seqobj('aa_mut'); ok not defined $aa; eval { $dna = $obj->seqobj('dna_ri'); }; ok $@; Makefile.PL100644000766000024 414613604710571 16303 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.012. use strict; use warnings; use 5.006; use ExtUtils::MakeMaker; my %WriteMakefileArgs = ( "ABSTRACT" => "BioPerl variation-related functionality", "AUTHOR" => "Allen Day , Heikki Lehvaslaiho ", "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => 0 }, "DISTNAME" => "Bio-Variation", "EXE_FILES" => [ "bin/bp_flanks" ], "LICENSE" => "perl", "MIN_PERL_VERSION" => "5.006", "NAME" => "Bio::Variation", "PREREQ_PM" => { "Bio::DBLinkContainerI" => 0, "Bio::PrimarySeq" => 0, "Bio::Root::IO" => 0, "Bio::Root::Root" => 0, "Bio::SeqFeature::Generic" => 0, "Bio::SeqIO" => 0, "Bio::Tools::CodonTable" => 0, "Getopt::Long" => 0, "IO::String" => 0, "Text::Wrap" => 0, "XML::Twig" => 0, "XML::Writer" => "0.4", "base" => 0, "constant" => 0, "strict" => 0, "utf8" => 0, "vars" => 0, "warnings" => 0 }, "TEST_REQUIRES" => { "Bio::Root::Test" => 0, "File::Spec" => 0, "IO::Handle" => 0, "IPC::Open3" => 0, "Test::More" => 0, "lib" => 0 }, "VERSION" => "1.7.5", "test" => { "TESTS" => "t/*.t" } ); my %FallbackPrereqs = ( "Bio::DBLinkContainerI" => 0, "Bio::PrimarySeq" => 0, "Bio::Root::IO" => 0, "Bio::Root::Root" => 0, "Bio::Root::Test" => 0, "Bio::SeqFeature::Generic" => 0, "Bio::SeqIO" => 0, "Bio::Tools::CodonTable" => 0, "File::Spec" => 0, "Getopt::Long" => 0, "IO::Handle" => 0, "IO::String" => 0, "IPC::Open3" => 0, "Test::More" => 0, "Text::Wrap" => 0, "XML::Twig" => 0, "XML::Writer" => "0.4", "base" => 0, "constant" => 0, "lib" => 0, "strict" => 0, "utf8" => 0, "vars" => 0, "warnings" => 0 ); unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { delete $WriteMakefileArgs{TEST_REQUIRES}; delete $WriteMakefileArgs{BUILD_REQUIRES}; $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs; } delete $WriteMakefileArgs{CONFIGURE_REQUIRES} unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; WriteMakefile(%WriteMakefileArgs); AAChange.t100644000766000024 347413604710571 16353 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 27); use_ok('Bio::Variation::Allele'); use_ok('Bio::Variation::AAChange'); use_ok('Bio::Variation::RNAChange'); } ok my $obj = Bio::Variation::AAChange->new(); isa_ok $obj, 'Bio::Variation::AAChange'; $obj->start(3); is $obj->start, 3; $obj->end(3); is $obj->end, 3; $obj->length(3); is $obj->length, 3; $obj->strand('1'); is $obj->strand, '1'; is $obj->primary_tag, 'Variation'; $obj->source_tag('source'); is $obj->source_tag, 'source'; $obj->frame(2); is $obj->frame,2; $obj->score(2); is $obj->score, 2; $obj->isMutation(1); ok $obj->isMutation; my $a1 = Bio::Variation::Allele->new(-seq => 'V'); $obj->allele_ori($a1); is $obj->allele_ori->seq, 'V'; my $a2 = Bio::Variation::Allele->new('-seq' => 'A'); $obj->add_Allele($a2); is $obj->allele_mut->seq, 'A'; is $obj->similarity_score, 0; $obj->upStreamSeq('upStreamSeq'); is $obj->upStreamSeq, 'upStreamSeq'; $obj->dnStreamSeq('dnStreamSeq'); is $obj->dnStreamSeq, 'dnStreamSeq' ; is $obj->label, 'substitution, conservative'; $obj->status('proven'); is $obj->status, 'proven'; $obj->proof('experimental'); is $obj->proof, 'experimental'; $obj->region('region'); is $obj->region, 'region'; $obj->region_value('region_value'); is $obj->region_value, 'region_value'; $obj->numbering('coding'); is $obj->numbering, 'coding'; my $obj2 = Bio::Variation::RNAChange->new(-start => 7, -end => 7, -cds_end => 100, -codon_pos => 1, -upStreamSeq => 'acgcgcgcgc', -dnStreamSeq => 'acgcgcgcgc' ); $obj2->label('missense'); $obj->RNAChange($obj2); is $obj->trivname, 'V3A', "Trivial name is [". $obj->trivname. "]"; $obj->mut_number(2); is $obj->mut_number, 2; bin000755000766000024 013604710571 14734 5ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5bp_flanks100644000766000024 2275513604710571 17011 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/bin#!/usr/bin/perl # -*-Perl-*- # # Heikki Lehvaslaiho # Finding flanking sequences for a variant. # # # v. 1 16 Mar 2001 # v. 1.1 9 Aug 2001 interface change, more info in fasta header # v. 2.0 23 Nov 2001 new code from the flanks CGI program # support for EMBL-like positions # v. 3.0 21 Feb 2003 new command line interface use Bio::PrimarySeq; use Bio::SeqIO; use Getopt::Long; use strict; use warnings; use constant VERSION => '3.0'; my $help = ''; my $flank = 100; # flank length on both sides of the region my $in_format = 'EMBL'; # format of the file to read in # my $db_class = 'Bio::DB::EMBL'; # class of my @pos; # position(s) in the sequence GetOptions ("help" => \$help, "flanklength=i" => \$flank, "position=s" => \@pos ); @pos = split(/,/,join(',',@pos)); system("perldoc $0") if $help; system("perldoc $0") unless @ARGV; print STDERR "\nYou need to provide --position option\n" and system("perldoc $0") unless @pos; my $file = shift; $file || system("perldoc $0"); my $seq = get_seq($file); exit 0 unless $seq; &extract($seq, \@pos, $flank); # ## end main # sub get_seq { my ($file) = @_; my $IN_FORMAT = 'EMBL'; # format of the local file on disk if (-e $file ) { # local file my $in = Bio::SeqIO->new('-file' => $file, '-format' => $IN_FORMAT); $seq = $in->next_seq(); } elsif ($file =~ /\./) { # sequence version from GenBank eval { Bio::SeqIO->_load_module("Bio::DB::GenBank"); my $gb = Bio::DB::GenBank->new(); $seq = $gb->get_Seq_by_version($file); }; if ($@) { die "Encountered an error: $@"; } } else { # plain accession mumber from more reliable EMBL eval { Bio::SeqIO->_load_module("Bio::DB::EMBL"); my $gb = Bio::DB::EMBL->new(); $seq = $gb->get_Seq_by_acc($file); }; if ($@) { die "Encountered an error: $@"; } } print STDERR "Could not find sequence [$file]" && return unless $seq; return $seq; } sub extract { my ($seq, $pos, $flank) = @_; my ($out_seq); my $OUT_FORMAT = 'FASTA'; # output format, going into STDOUT my $strand = 1; # default for the forward strand my $out = Bio::SeqIO->new('-format' => $OUT_FORMAT); my $count = 1; foreach my $idpos (@$pos) { my ($id, $pos_range, $start, $end, $allele_len); my $inbetween = 0; # handle 23^24 notation as well as plain integer (24) # but set flag and make corrections when needed if ($idpos =~ /:/ ) { # id and position separator ($id, $pos_range) = split /:/, $idpos; } else { # no id $id = $count; $count++; $pos_range = $idpos; } $strand = -1 if $pos_range =~ /-$/; # opposite strand $pos_range = $1 if $pos_range =~ /(.+)-/; # remove trailing '-' if ($pos_range =~ /\^/) { # notation 23^24 used ($start, $end) = split /\^/, $pos_range; print STDERR $id, ": Give adjacent nucleotides flanking '^' character, not [", $start, "] and [", $end, "]\n" and next unless $end == $start + 1; $end = $start; $inbetween = 1; } else { # notation 23..24 used ($start, $end) = split /\.\./, $pos_range; } $end ||= $start; # notation 23 used print STDERR $id, ": Start can not be larger than end. Not [", $start, "] and [", $end, "]\n" and next if $start > $end; $allele_len = $end - $start; # sanity checks next unless defined $start && $start =~ /\d+/ && $start != 0; print STDERR "Position '$start' not in sequence '$file'\n", and next if $start < 1 or $start > $seq->length; print STDERR "End position '$end' not in sequence '$file'\n", and next if $end < 1 or $end > $seq->length; # determine nucleotide positions # left edge my $five_start = $start - $flank; $five_start = 1 if $five_start < 1; # not outside the sequence # right edge my $three_end = $start + $allele_len + $flank; $three_end = $seq->length if $start + $allele_len + $flank > $seq->length; $three_end-- if $inbetween; # extract the sequences my $five_prime = lc $seq->subseq($five_start , $start - 1); # left flank my $snp = uc $seq->subseq($start, $end); # allele (always > 0 length) $snp = lc $snp if $inbetween; my $three_prime; # right flank if ($end < $seq->length) { # make sure we are not beyond reference sequece $three_prime = lc $seq->subseq($end + 1, $three_end); } else { $three_prime = ''; } # allele positions in local, extracted coordinates my $locpos = length($five_prime) + 1; my $loc_end; if ($allele_len) { $loc_end = "..". ($locpos+$allele_len); } else { $loc_end = ''; $loc_end = '^'. ($locpos+1) if $inbetween; } # build FASTA id and description line my $fastaid = uc($id). "_". uc($file). " oripos=$pos_range strand=$strand allelepos=$locpos$loc_end"; #build BioPerl sequence objects if ($strand == -1) { my $five_prime_seq = Bio::PrimarySeq->new(-alphabet=>'dna',-seq=>$five_prime); my $snp_seq = Bio::PrimarySeq->new(-alphabet=>'dna',-seq=>$snp); my $three_prime_seq = Bio::PrimarySeq->new(-alphabet=>'dna',-seq=>$three_prime); my $str = $three_prime_seq->revcom->seq. " ". $snp_seq->revcom->seq. " ". $five_prime_seq->revcom->seq; $str =~ s/ //g; $out_seq = Bio::PrimarySeq->new(-id => $fastaid, -alphabet=>'dna', -seq => $str ); } else { my $str = $five_prime. " ". $snp. " ". $three_prime; $str =~ s/ //g; $out_seq = Bio::PrimarySeq->new(-id => $fastaid, -alphabet=>'dna', -seq => $str ); } $out->write_seq($out_seq); # print sequence out } } =head1 NAME bp_flanks - finding flanking sequences for a variant in a sequence position =head1 SYNOPSIS bp_flanks --position POS [-p POS ...] [--flanklen INT] accession | filename =head1 DESCRIPTION This script allows you to extract a subsequence around a region of interest from an existing sequence. The output if fasta formatted sequence entry where the header line contains additional information about the location. =head1 OPTIONS The script takes one unnamed argument which be either a file name in the local file system or a nucleotide sequence accession number. -p Position uses simple nucleotide sequence feature table --position notation to define the region of interest, typically a SNP or microsatellite repeat around which the flanks are defined. There can be more than one position option or you can give a comma separated list to one position option. The format of a position is: [id:] int | range | in-between [-] The optional id is the name you want to call the new sequence. If it not given in joins running number to the entry name with an underscore. The position is either a point (e.g. 234), a range (e.g 250..300) or insertion point between nucleotides (e.g. 234^235) If the position is not completely within the source sequence the output sequence will be truncated and it will print a warning. The optional hyphen [-] at the end of the position indicates that that you want the retrieved sequence to be in the opposite strand. -f Defaults to 100. This is the length of the nucleotides --flanklen sequence retrieved on both sides of the given position. If the source file does not contain =head1 OUTPUT FORMAT The output is a fasta formatted entry where the description file contains tag=value pairs for information about where in the original sequence the subsequence was taken. The ID of the fasta entry is the name given at the command line joined by hyphen to the filename or accesion of the source sequence. If no id is given a series of consecutive integers is used. The tag=value pairs are: =over 3 =item oripos=int position in the source file =item strand=1|-1 strand of the sequence compared to the source sequence =item allelepos=int position of the region of interest in the current entry. The tag is the same as used by dbSNP@NCBI =back The sequence highlights the allele variant position by showing it in upper case and rest of the sequence in lower case characters. =head1 EXAMPLE % bp_flanks ~/seq/ar.embl >1_/HOME/HEIKKI/SEQ/AR.EMBL oripos=100 strand=1 allelepos=100 taataactcagttcttatttgcacctacttcagtggacactgaatttggaaggtggagga ttttgtttttttcttttaagatctgggcatcttttgaatCtacccttcaagtattaagag acagactgtgagcctagcagggcagatcttgtccaccgtgtgtcttcttctgcacgagac tttgaggctgtcagagcgct =head1 TODO The input files are assumed to be in EMBL format and the sequences are retrieved only from the EMB database. Make this more generic and use the registry. head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: Eheikki-at-bioperl-dot-orgE =cut RNAChange.t100644000766000024 417213604710571 16506 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 31); use_ok('Bio::Variation::Allele'); use_ok('Bio::Variation::RNAChange'); use_ok('Bio::Variation::AAChange'); } ok my $obj = Bio::Variation::RNAChange->new(); $obj->start(4); is $obj->start, 4; $obj->end(4); is $obj->end, 4; $obj->length(1); is $obj->length, 1; $obj->strand('1'); is $obj->strand, '1'; is ($obj->primary_tag, 'Variation' ); $obj->source_tag('source'); is ($obj->source_tag, 'source' ); $obj->frame(2); is ($obj->frame, 2 ); $obj->score(2); is ($obj->score, 2 ); #test gff string #$obj->dna_mut('dna_mut'); #if ($obj->dna_mut eq 'dna_mut' ) { # print "ok 11\n"; #} else { # print "not ok 11\n"; #} my $a1 = Bio::Variation::Allele->new(-seq => 'g'); $obj->allele_ori($a1); is( $obj->allele_ori->seq, 'g' ); my $a2 = Bio::Variation::Allele->new('-seq' => 'a'); $obj->allele_mut($a2); is($obj->allele_mut->seq, 'a' ); $obj->upStreamSeq('gaagattcagccaagctcaaggatg'); is ($obj->upStreamSeq, 'gaagattcagccaagctcaaggatg' ); $obj->cds_end(1000); is ($obj->cds_end, 1000 ); $obj->dnStreamSeq('aagtgcagttagggctgggaagggt'); is ($obj->dnStreamSeq, 'aagtgcagttagggctgggaagggt' ); $obj->codon_pos(1); is ($obj->codon_pos, 1 ); my $obj3 = Bio::Variation::AAChange->new(); $obj3->start(2); $obj->AAChange($obj3); $obj3->allele_ori($a1); $obj3->allele_mut($a2); is ($obj->label, 'missense' , "label is". $obj->label); $obj->status('proven'); is ($obj->status, 'proven' ); $obj->proof('experimental'); is ($obj->proof, 'experimental' ); is ($obj->restriction_changes, '-BccI' ); $obj->region('coding'); is ($obj->region, 'coding' ); $obj->numbering('coding'); is ($obj->numbering, 'coding' ); is ($obj->codon_ori, 'gaa', "Codon_ori is |". $obj->codon_ori. "|"); is($obj->codon_mut, 'aaa' , "Codon_mut is |". $obj->codon_mut. "|"); $obj->codon_pos(1); is ($obj->codon_pos, 1 ); is( $obj->codon_table, 1 ); $obj->codon_table(3); is ( $obj->codon_table, 3 ); $obj->mut_number(2); is ( $obj->mut_number, 2 ); $obj->verbose(2); is ( $obj->verbose, 2 ); 00-compile.t100644000766000024 605513604710571 16627 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/tuse 5.006; use strict; use warnings; # this test was generated with Dist::Zilla::Plugin::Test::Compile 2.058 use Test::More; plan tests => 13 + ($ENV{AUTHOR_TESTING} ? 1 : 0); my @module_files = ( 'Bio/Variation.pm', 'Bio/Variation/AAChange.pm', 'Bio/Variation/AAReverseMutate.pm', 'Bio/Variation/Allele.pm', 'Bio/Variation/DNAMutation.pm', 'Bio/Variation/IO.pm', 'Bio/Variation/IO/flat.pm', 'Bio/Variation/IO/xml.pm', 'Bio/Variation/RNAChange.pm', 'Bio/Variation/SNP.pm', 'Bio/Variation/SeqDiff.pm', 'Bio/Variation/VariantI.pm' ); my @scripts = ( 'bin/bp_flanks' ); # no fake home requested my @switches = ( -d 'blib' ? '-Mblib' : '-Ilib', ); use File::Spec; use IPC::Open3; use IO::Handle; open my $stdin, '<', File::Spec->devnull or die "can't open devnull: $!"; my @warnings; for my $lib (@module_files) { # see L my $stderr = IO::Handle->new; diag('Running: ', join(', ', map { my $str = $_; $str =~ s/'/\\'/g; q{'} . $str . q{'} } $^X, @switches, '-e', "require q[$lib]")) if $ENV{PERL_COMPILE_TEST_DEBUG}; my $pid = open3($stdin, '>&STDERR', $stderr, $^X, @switches, '-e', "require q[$lib]"); binmode $stderr, ':crlf' if $^O eq 'MSWin32'; my @_warnings = <$stderr>; waitpid($pid, 0); is($?, 0, "$lib loaded ok"); shift @_warnings if @_warnings and $_warnings[0] =~ /^Using .*\bblib/ and not eval { +require blib; blib->VERSION('1.01') }; if (@_warnings) { warn @_warnings; push @warnings, @_warnings; } } foreach my $file (@scripts) { SKIP: { open my $fh, '<', $file or warn("Unable to open $file: $!"), next; my $line = <$fh>; close $fh and skip("$file isn't perl", 1) unless $line =~ /^#!\s*(?:\S*perl\S*)((?:\s+-\w*)*)(?:\s*#.*)?$/; @switches = (@switches, split(' ', $1)) if $1; close $fh and skip("$file uses -T; not testable with PERL5LIB", 1) if grep { $_ eq '-T' } @switches and $ENV{PERL5LIB}; my $stderr = IO::Handle->new; diag('Running: ', join(', ', map { my $str = $_; $str =~ s/'/\\'/g; q{'} . $str . q{'} } $^X, @switches, '-c', $file)) if $ENV{PERL_COMPILE_TEST_DEBUG}; my $pid = open3($stdin, '>&STDERR', $stderr, $^X, @switches, '-c', $file); binmode $stderr, ':crlf' if $^O eq 'MSWin32'; my @_warnings = <$stderr>; waitpid($pid, 0); is($?, 0, "$file compiled ok"); shift @_warnings if @_warnings and $_warnings[0] =~ /^Using .*\bblib/ and not eval { +require blib; blib->VERSION('1.01') }; # in older perls, -c output is simply the file portion of the path being tested if (@_warnings = grep { !/\bsyntax OK$/ } grep { chomp; $_ ne (File::Spec->splitpath($file))[2] } @_warnings) { warn @_warnings; push @warnings, @_warnings; } } } is(scalar(@warnings), 0, 'no warnings found') or diag 'got warnings: ', ( Test::More->can('explain') ? Test::More::explain(\@warnings) : join("\n", '', @warnings) ) if $ENV{AUTHOR_TESTING}; DNAMutation.t100644000766000024 460513604710571 17104 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 37); use_ok('Bio::Variation::DNAMutation'); use_ok('Bio::Variation::Allele'); } my($obj,$a1,$a2,$obj2); $obj = Bio::Variation::DNAMutation -> new; ok defined $obj; $obj->start(3); is $obj->start, 3; $obj->end(3); is $obj->end, 3; $obj->length(2); is $obj->length, 2; $obj->strand('1'); is $obj->strand, '1'; is $obj->primary_tag, 'Variation'; $obj->source_tag('source'); is $obj->source_tag, 'source'; $obj->frame(2); is $obj->frame,2; $obj->score(2); is $obj->score, 2; if( $obj->can('dna_mut') ) { #test gff string $obj->dna_mut('dna_mut'); is( $obj->dna_mut,'dna_mut'); } $a1 = Bio::Variation::Allele->new(-seq => 'c'); $obj->allele_ori($a1); is $obj->allele_ori->seq, 'c'; $a2 = Bio::Variation::Allele->new('-seq' => 'g'); $obj->allele_mut($a2); is $obj->allele_mut->seq, 'g'; $obj->upStreamSeq('agcacctcccggcgccagtttgctg'); is $obj->upStreamSeq, 'agcacctcccggcgccagtttgctg'; $obj->dnStreamSeq('tgctgcagcagcagcagcagcagca'); is $obj->dnStreamSeq, 'tgctgcagcagcagcagcagcagca'; is $obj->label, 'point, transversion' ; $obj->status('proven'); is $obj->status, 'proven'; $obj->proof('experimental'); is $obj->proof, 'experimental'; is $obj->restriction_changes, '-BbvI, +BstXI, -Fnu4HI, -TseI'; $obj->region('region'); is $obj->region, 'region'; $obj->region_value('region_value'); is $obj->region_value, 'region_value'; $obj->region_dist(-5); is $obj->region_dist, -5; $obj->numbering('coding'); is $obj->numbering, 'coding'; ok not $obj->CpG; $obj->mut_number(2); is $obj->mut_number, 2; ok defined ($obj2 = Bio::Variation::DNAMutation -> new ('-mut_number' => 2)); is $obj2->mut_number, 2; $obj->isMutation(1); ok $obj->isMutation; $obj->add_Allele($a1); $obj->add_Allele($a2); is scalar ($obj->each_Allele), 2; $obj = Bio::Variation::DNAMutation->new ('-start' => 23, '-end' => 24, '-length' => 2, '-upStreamSeq' => 'gt', '-dnStreamSeq' => 'at', '-proof' => 'experimental', '-isMutation' => 1, '-mut_number' => 2 ); is $obj->start(), 23; is $obj->end(), 24; is $obj->length(), 2; is $obj->upStreamSeq(), 'gt'; is $obj->dnStreamSeq(), 'at'; is $obj->proof(), 'experimental'; is $obj->mut_number(), 2; ok $obj->isMutation; Variation_IO.t100644000766000024 512213604710571 17277 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 26, -requires_modules => ['Text::Wrap 98', 'XML::Writer']); use_ok('Bio::Variation::IO'); } sub io { my ($t_file, $o_file, $out_format) = @_; my $res; my ($o_ext) = $out_format eq 'flat' ? 'dat' : 'xml'; my ($o_format) = $out_format; my ($t_name) = $t_file =~ /(.*)....$/; my( $before ); { local $/ = undef; open my $BEFORE, '<', "$t_name.$o_ext" or die "Could not read file '$t_name.$o_ext': $!\n"; $before = <$BEFORE>; close $BEFORE; } ok $before;#"Error in reading input file [$t_name.$o_ext]"; my $in = Bio::Variation::IO->new( -file => $t_file); my @entries ; while (my $e = $in->next) { push @entries, $e; } my $count = scalar @entries; cmp_ok @entries, '>', 0;# "No SeqDiff objects [$count]"; my $out = Bio::Variation::IO->new( -FILE => ">$o_file", -FORMAT => $o_format); my $out_ok = 1; foreach my $e (@entries) { $out->write($e) or $out_ok = 0; } undef($out); # Flush to disk ok $out_ok;# "error writing variants"; my( $after ); { local $/ = undef; open my $AFTER, '<', $o_file or die "Could not read file '$o_file': $!\n"; $after = <$AFTER>; close $AFTER; } ok $after;# "Error in reading in again the output file [$o_file]"; is $before, $after, "test output file compared to input"; print STDERR `diff $t_file $o_file` if $before ne $after; } io (test_input_file('mutations.dat'), test_output_file(), 'flat'); #1..5 io (test_input_file('polymorphism.dat'), test_output_file(), 'flat'); #6..10 SKIP: { test_skip(-tests => 15, -requires_modules => [qw(XML::Twig XML::Writer IO::String)]); eval { if( $XML::Writer::VERSION >= 0.5 ) { io (test_input_file('mutations.xml'), test_output_file(), 'xml'); #10..12 } else { io (test_input_file('mutations.old.xml'), test_output_file(), 'xml'); #10..12 } }; eval { if( $XML::Writer::VERSION >= 0.5 ) { io (test_input_file('polymorphism.xml'), test_output_file(), 'xml'); #13..14 } else { io (test_input_file('polymorphism.old.xml'), test_output_file(), 'xml'); #13..14 } }; eval { if( $XML::Writer::VERSION >= 0.5 ) { io (test_input_file('mutations.dat'), test_output_file(), 'xml'); #15..25 } else { io (test_input_file('mutations.old.dat'), test_output_file(), 'xml'); #15..25 } }; } AAReverseMutate.t100644000766000024 173713604710571 17761 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 16); use_ok('Bio::Variation::AAReverseMutate'); } my $obj = Bio::Variation::AAReverseMutate->new ('-aa_ori' => 'F', '-aa_mut' => 'S' ); ok defined $obj; isa_ok($obj, 'Bio::Variation::AAReverseMutate'); is $obj->aa_ori, 'F'; is $obj->aa_mut, 'S'; my @points = $obj->each_Variant; # F>S has two solutions is scalar @points, 2; $obj->codon_ori('ttc'); ok defined $obj; #now there should be only one left @points = $obj->each_Variant; is scalar @points, 1; $obj->codon_table(3); is $obj->codon_table, 3; #Check the returned object my $rna = pop @points; isa_ok($rna, 'Bio::Variation::RNAChange'); is $rna->length, 1; is $rna->allele_ori->seq, 't'; is $rna->allele_mut->seq, 'c'; is $rna->codon_ori, 'ttc', "Codon_ori is |". $rna->codon_ori. "|"; is $rna->codon_pos, 2; $obj->codon_table(11); is $obj->codon_table, 11; author-mojibake.t100644000766000024 35313604710571 20016 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t#!perl BEGIN { unless ($ENV{AUTHOR_TESTING}) { print qq{1..0 # SKIP these tests are for testing by the author\n}; exit } } use strict; use warnings qw(all); use Test::More; use Test::Mojibake; all_files_encoding_ok(); data000755000766000024 013604710571 15340 5ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/tmutations.dat100644000766000024 3117413604710571 20243 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/dataID M20132:(362)c.+4G>A; E2K Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: 4 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 4 (M20132::366) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature /codon_table: 1 Feature /codon: gaa>aaa; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, conservative Feature /proof: computed Feature /location: 2 Feature /change: E>K // ID M20132:(362)c.+14T>A; L5X Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: 14 Feature /upflank: ccaagctcaaggatggaagtgcagt Feature /change: t>a Feature /dnflank: agggctgggaagggtctaccctcgg Feature RNA; 1 Feature /label: nonsense Feature /proof: experimental Feature /location: 14 (M20132::376) Feature /upflank: ccaagctcaaggatggaagtgcagt Feature /change: t>a Feature /dnflank: agggctgggaagggtctaccctcgg Feature /codon_table: 1 Feature /codon: tta>taa; 2 Feature /region: coding Feature AA; 1 Feature /label: truncation Feature /proof: computed Feature /location: 5 Feature /change: L>* // ID M20132:(362)c.+4G>A; E2K Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: 4 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 4 (M20132::366) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature /codon_table: 1 Feature /codon: gaa>aaa; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, conservative Feature /proof: computed Feature /location: 2 Feature /change: E>K // ID M20132:(362)c.+100delATCCAG; I34del-2 Feature DNA; 1 Feature /label: deletion Feature /proof: computed Feature /location: 100..105 Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: atccag> Feature /dnflank: aacccgggccccaggcacccagagg Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV Feature RNA; 1 Feature /label: inframe, deletion Feature /proof: experimental Feature /location: 100..105 (M20132::462..467) Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: atccag> Feature /dnflank: aacccgggccccaggcacccagagg Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV Feature /codon_table: 1 Feature /codon: atc>-; 1 Feature /region: coding Feature AA; 1 Feature /label: deletion Feature /proof: computed Feature /location: 34..35 Feature /change: IQ> // ID M20132:(362)c.+101delT; I34delX172 Feature DNA; 1 Feature /label: deletion Feature /proof: computed Feature /location: 101 Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: t> Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I Feature RNA; 1 Feature /label: frameshift, deletion Feature /proof: experimental Feature /location: 101 (M20132::463) Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: t> Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I Feature /codon_table: 1 Feature /codon: atc>-; 2 Feature /region: coding Feature AA; 1 Feature /label: out-of-frame translation, truncation Feature /proof: computed Feature /location: 34 Feature /change: I>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAP Feature GSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPAR Feature GCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* // ID M20132:(362)c.+101insGGGCCC; I34ins+2 Feature DNA; 1 Feature /label: insertion Feature /proof: computed Feature /location: 100^101 Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: >gggccc Feature /dnflank: tccagaacccgggccccaggcaccc Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, Feature +SduI Feature RNA; 1 Feature /label: inframe, insertion Feature /proof: experimental Feature /location: 100^101 (M20132::462^463) Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: >gggccc Feature /dnflank: tccagaacccgggccccaggcaccc Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, Feature +SduI Feature /codon_table: 1 Feature /codon: atc>-; 2 Feature /region: coding Feature AA; 1 Feature /label: insertion, complex Feature /proof: computed Feature /location: 34 Feature /change: I>RAL // ID M20132:(362)c.+100insG; I34ins81X Feature DNA; 1 Feature /label: insertion Feature /proof: computed Feature /location: 99^100 Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: >g Feature /dnflank: atccagaacccgggccccaggcacc Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII Feature RNA; 1 Feature /label: frameshift, insertion Feature /proof: experimental Feature /location: 99^100 (M20132::461^462) Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: >g Feature /dnflank: atccagaacccgggccccaggcacc Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII Feature /codon_table: 1 Feature /codon: atc>-; 1 Feature /region: coding Feature AA; 1 Feature /label: out-of-frame translation, truncation Feature /proof: computed Feature /location: 34 Feature /change: I>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* // ID M20132:(362)c.+100AT>GGGCCC; I34ins82X Feature DNA; 1 Feature /label: complex Feature /proof: computed Feature /location: 100..101 Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: at>gggccc Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI Feature RNA; 1 Feature /label: frameshift, complex Feature /proof: experimental Feature /location: 100..101 (M20132::462..463) Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: at>gggccc Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI Feature /codon_table: 1 Feature /codon: atc>-; 1 Feature /region: coding Feature AA; 1 Feature /label: out-of-frame translation, truncation Feature /proof: computed Feature /location: 34 Feature /change: I>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* // ID M20132:(362+1)c.-1G>A Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: -1 Feature /upflank: ggtggaagattcagccaagctcaag Feature /change: g>a Feature /dnflank: atggaagtgcagttagggctgggaa Feature /re_site: -BccI, -FokI, +Hpy178III Feature RNA; 1 Feature /label: unknown Feature /proof: experimental Feature /location: -1 (M20132::361) Feature /upflank: ggtggaagattcagccaagctcaag Feature /change: g>a Feature /dnflank: atggaagtgcagttagggctgggaa Feature /re_site: -BccI, -FokI, +Hpy178III Feature /region: 5'UTR // ID M20132:(362)c.+2766T>C Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: 2766 Feature /upflank: tctatttccacacccagtgaagcat Feature /change: t>c Feature /dnflank: ggaaaccctatttccccaccccagc Feature /re_site: +Hpy188I, +SfaNI, -XcmI Feature RNA; 1 Feature /label: unknown Feature /proof: experimental Feature /location: 2766 (M20132::3128) Feature /upflank: tctatttccacacccagtgaagcat Feature /change: t>c Feature /dnflank: ggaaaccctatttccccaccccagc Feature /re_site: +Hpy188I, +SfaNI, -XcmI Feature /region: 3'UTR // ID J02933:(521)g.+12165A>G Feature DNA; 1 Feature /label: point, transition Feature /proof: experimental Feature /location: 12165 (J02933::12686) Feature /upflank: cgcacacctgtggtgcctgccaccc Feature /change: a>g Feature /dnflank: ctgggttgcccatgattcatttttg Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, Feature -TspRI Feature /region: 3'UTR; (+1027) Feature RNA; 1 Feature /label: unknown Feature /proof: computed Feature /location: 2428 Feature /upflank: cgcacacctgtggtgcctgccaccc Feature /change: a>g Feature /dnflank: ctgggttgcccatgattcatttttg Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, Feature -TspRI Feature /region: 3'UTR; (-1) // ID J02933:(521)g.+4G>T; V2F Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: 4 (J02933::525) Feature /upflank: gcagcactgcagagatttcatcatg Feature /change: g>t Feature /dnflank: tctcccaggccctcaggctcctctg Feature /re_site: -BsmAI, -Eco31I Feature /region: exon; 1 (+4) Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 4 Feature /upflank: gcagcactgcagagatttcatcatg Feature /change: g>t Feature /dnflank: tctcccaggccctcaggctcctctg Feature /re_site: -BsmAI, -Eco31I Feature /codon_table: 1 Feature /codon: gtc>ttc; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, nonconservative Feature /proof: computed Feature /location: 2 Feature /change: V>F // ID J02933:(521)g.+1168G>T; D34Y Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: 1168 (J02933::1689) Feature /upflank: taaggcctcaggaggagaaacacgg Feature /change: g>t Feature /dnflank: acatgccgtggaagccggggcctca Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, Feature +Tsp4CI Feature /region: exon; 1 (-29) Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 100 Feature /upflank: taaggcctcaggaggagaaacacgg Feature /change: g>t Feature /dnflank: acatgccgtggaagccggggcctca Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, Feature +Tsp4CI Feature /codon_table: 1 Feature /codon: gac>tac; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, nonconservative Feature /proof: computed Feature /location: 34 Feature /change: D>Y // ID J02933:(521+1)g.-4C>G Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: -4 (J02933::518) Feature /upflank: ggcaggggcagcactgcagagattt Feature /change: c>g Feature /dnflank: atcatggtctcccaggccctcaggc Feature /re_site: +BclI, +DpnI, +MboI Feature /region: 5'UTR; (-4) Feature RNA; 1 Feature /label: unknown Feature /proof: experimental Feature /location: -4 Feature /upflank: ggcaggggcagcactgcagagattt Feature /change: c>g Feature /dnflank: atcatggtctcccaggccctcaggc Feature /re_site: +BclI, +DpnI, +MboI Feature /region: 5'UTR; (+31) // mutations.xml100644000766000024 4145313604710571 20274 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/data computed gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI experimental gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI coding computed E K computed ccaagctcaaggatggaagtgcagt t a agggctgggaagggtctaccctcgg experimental ccaagctcaaggatggaagtgcagt t a agggctgggaagggtctaccctcgg coding computed L * computed gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI experimental gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI coding computed E K computed tctgttccagagcgtgcgcgaagtg atccag aacccgggccccaggcacccagagg -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV experimental tctgttccagagcgtgcgcgaagtg atccag aacccgggccccaggcacccagagg -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV coding computed IQ computed ctgttccagagcgtgcgcgaagtga t ccagaacccgggccccaggcaccca -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I experimental ctgttccagagcgtgcgcgaagtga t ccagaacccgggccccaggcaccca -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I coding computed I TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAPGSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPARGCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* computed ctgttccagagcgtgcgcgaagtga gggccc tccagaacccgggccccaggcaccc +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI experimental ctgttccagagcgtgcgcgaagtga gggccc tccagaacccgggccccaggcaccc +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI coding computed I RAL computed tctgttccagagcgtgcgcgaagtg g atccagaacccgggccccaggcacc +BamHI, +BinI, +NlaIV, +XhoII experimental tctgttccagagcgtgcgcgaagtg g atccagaacccgggccccaggcacc +BamHI, +BinI, +NlaIV, +XhoII coding computed I DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* computed tctgttccagagcgtgcgcgaagtg at gggccc ccagaacccgggccccaggcaccca +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI experimental tctgttccagagcgtgcgcgaagtg at gggccc ccagaacccgggccccaggcaccca +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI coding computed I GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* computed ggtggaagattcagccaagctcaag g a atggaagtgcagttagggctgggaa -BccI, -FokI, +Hpy178III experimental ggtggaagattcagccaagctcaag g a atggaagtgcagttagggctgggaa -BccI, -FokI, +Hpy178III 5'UTR computed tctatttccacacccagtgaagcat t c ggaaaccctatttccccaccccagc +Hpy188I, +SfaNI, -XcmI experimental tctatttccacacccagtgaagcat t c ggaaaccctatttccccaccccagc +Hpy188I, +SfaNI, -XcmI 3'UTR experimental cgcacacctgtggtgcctgccaccc a g ctgggttgcccatgattcatttttg +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI 3'UTR computed cgcacacctgtggtgcctgccaccc a g ctgggttgcccatgattcatttttg +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI 3'UTR computed gcagcactgcagagatttcatcatg g t tctcccaggccctcaggctcctctg -BsmAI, -Eco31I exon experimental gcagcactgcagagatttcatcatg g t tctcccaggccctcaggctcctctg -BsmAI, -Eco31I coding computed V F computed taaggcctcaggaggagaaacacgg g t acatgccgtggaagccggggcctca -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI exon experimental taaggcctcaggaggagaaacacgg g t acatgccgtggaagccggggcctca -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI coding computed D Y computed ggcaggggcagcactgcagagattt c g atcatggtctcccaggccctcaggc +BclI, +DpnI, +MboI 5'UTR experimental ggcaggggcagcactgcagagattt c g atcatggtctcccaggccctcaggc +BclI, +DpnI, +MboI 5'UTR Bio000755000766000024 013604710571 15443 5ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/libVariation.pm100644000766000024 202013604710571 20067 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Biopackage Bio::Variation; $Bio::Variation::VERSION = '1.7.5'; use utf8; use strict; use warnings; 1; # ABSTRACT: BioPerl variation-related functionality # AUTHOR: Heikki Lehväslaiho # OWNER: See the individual modules for their copyright holders # LICENSE: Perl_5 =head1 SYNOPSIS See L for examples. =head1 DESCRIPTION These classes are part of "Computational Mutation Expression Toolkit" project at European Bioinformatics Institute , but they are written to be as general as possinble. Bio::Variation name space contains modules to store sequence variation information as differences between the reference sequence and changes sequences. Also included are classes to write out and recrete objects from EMBL-like flat files and XML. Lastly, there are simple classes to calculate values for sequence change objects. See "Computational Mutation Expression Toolkit" web pages for more information: http://www.ebi.ac.uk/mutations/toolkit/ =cut __END__ author-pod-syntax.t100644000766000024 45413604710571 20345 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t#!perl BEGIN { unless ($ENV{AUTHOR_TESTING}) { print qq{1..0 # SKIP these tests are for testing by the author\n}; exit } } # This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests. use strict; use warnings; use Test::More; use Test::Pod 1.41; all_pod_files_ok(); polymorphism.dat100644000766000024 500413604710571 20733 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/dataID M20132:(362)[c.+4G|A|T;c.+31C|A]; [E2|K|X;Q11|K] Feature DNA; 1.1 Feature /label: point, transition Feature /proof: computed Feature /location: 4 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g|a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature DNA; 1.2 Feature /label: point, transversion Feature /proof: computed Feature /location: 4 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g|t Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature RNA; 1.1 Feature /label: missense Feature /proof: experimental Feature /location: 4 (M20132::366) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g|a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature /codon_table: 1 Feature /codon: gaa|aaa; 1 Feature /region: coding Feature RNA; 1.2 Feature /label: nonsense Feature /proof: experimental Feature /location: 4 (M20132::366) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g|t Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature /codon_table: 1 Feature /codon: gaa|taa; 1 Feature /region: coding Feature AA; 1.1 Feature /label: substitution, conservative Feature /proof: computed Feature /location: 2 Feature /change: E|K Feature AA; 1.2 Feature /label: truncation Feature /proof: computed Feature /location: 2 Feature /change: E|* Feature DNA; 2 Feature /label: point, transversion Feature /proof: computed Feature /location: 31 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: c|a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -CviRI, -SfaNI Feature RNA; 2 Feature /label: missense Feature /proof: experimental Feature /location: 31 (M20132::393) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: c|a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -CviRI, -SfaNI Feature /codon_table: 1 Feature /codon: caa|aaa; 1 Feature /region: coding Feature AA; 2 Feature /label: substitution, conservative Feature /proof: computed Feature /location: 11 Feature /change: Q|K // polymorphism.xml100644000766000024 667013604710571 20775 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/data computed gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI computed gaagattcagccaagctcaaggatg g t aagtgcagttagggctgggaagggt -BccI experimental gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI coding experimental gaagattcagccaagctcaaggatg g t aagtgcagttagggctgggaagggt -BccI coding computed E K computed E * computed gaagattcagccaagctcaaggatg c a aagtgcagttagggctgggaagggt -CviRI, -SfaNI experimental gaagattcagccaagctcaaggatg c a aagtgcagttagggctgggaagggt -CviRI, -SfaNI coding computed Q K Variation000755000766000024 013604710571 17377 5ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/BioIO.pm100644000766000024 2230013604710571 20421 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::IO # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =head1 NAME Bio::Variation::IO - Handler for sequence variation IO Formats =head1 SYNOPSIS use Bio::Variation::IO; $in = Bio::Variation::IO->new(-file => "inputfilename" , -format => 'flat'); $out = Bio::Variation::IO->new(-file => ">outputfilename" , -format => 'xml'); while ( my $seq = $in->next() ) { $out->write($seq); } # or use Bio::Variation::IO; #input file format can be read from the file extension (dat|xml) $in = Bio::Variation::IO->newFh(-file => "inputfilename"); $out = Bio::Variation::IO->newFh(-format => 'xml'); # World's shortest flat<->xml format converter: print $out $_ while <$in>; =head1 DESCRIPTION Bio::Variation::IO is a handler module for the formats in the Variation IO set (eg, Bio::Variation::IO::flat). It is the officially sanctioned way of getting at the format objects, which most people should use. The structure, conventions and most of the code is inherited from L module. The main difference is that instead of using methods next_seq and write_seq, you drop '_seq' from the method names. The idea is that you request a stream object for a particular format. All the stream objects have a notion of an internal file that is read from or written to. A particular SeqIO object instance is configured for either input or output. A specific example of a stream object is the Bio::Variation::IO::flat object. Each stream object has functions $stream->next(); and $stream->write($seqDiff); also $stream->type() # returns 'INPUT' or 'OUTPUT' As an added bonus, you can recover a filehandle that is tied to the SeqIO object, allowing you to use the standard EE and print operations to read and write sequence objects: use Bio::Variation::IO; $stream = Bio::Variation::IO->newFh(-format => 'flat'); # read from standard input while ( $seq = <$stream> ) { # do something with $seq } and print $stream $seq; # when stream is in output mode This makes the simplest ever reformatter #!/usr/local/bin/perl $format1 = shift; $format2 = shift; use Bio::Variation::IO; $in = Bio::Variation::IO->newFh(-format => $format1 ); $out = Bio::Variation::IO->newFh(-format => $format2 ); print $out $_ while <$in>; =head1 CONSTRUCTORS =head2 Bio::Variation::IO-Enew() $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format); $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format); $seqIO = Bio::Variation::IO->new(-format => $format); The new() class method constructs a new Bio::Variation::IO object. The returned object can be used to retrieve or print BioSeq objects. new() accepts the following parameters: =over 4 =item -file A file path to be opened for reading or writing. The usual Perl conventions apply: 'file' # open file for reading '>file' # open file for writing '>>file' # open file for appending '+new(-fh => \*STDIN); Note that you must pass filehandles as references to globs. If neither a filehandle nor a filename is specified, then the module will read from the @ARGV array or STDIN, using the familiar EE semantics. =item -format Specify the format of the file. Supported formats include: flat pseudo EMBL format xml seqvar xml format If no format is specified and a filename is given, then the module will attempt to deduce it from the filename. If this is unsuccessful, Fasta format is assumed. The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are all supported. =back =head2 Bio::Variation::IO-EnewFh() $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format); $fh = Bio::Variation::IO->newFh(-format => $format); # etc. #e.g. $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat'); print $out $seqDiff; This constructor behaves like new(), but returns a tied filehandle rather than a Bio::Variation::IO object. You can read sequences from this object using the familiar EE operator, and write to it using print(). The usual array and $_ semantics work. For example, you can read all sequence objects into an array like this: @mutations = <$fh>; Other operations, such as read(), sysread(), write(), close(), and printf() are not supported. =head1 OBJECT METHODS See below for more detailed summaries. The main methods are: =head2 $sequence = $seqIO-Enext() Fetch the next sequence from the stream. =head2 $seqIO-Ewrite($sequence [,$another_sequence,...]) Write the specified sequence(s) to the stream. =head2 TIEHANDLE(), READLINE(), PRINT() These provide the tie interface. See L for more details. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::IO; $Bio::Variation::IO::VERSION = '1.7.5'; use strict; use base qw(Bio::SeqIO Bio::Root::IO); =head2 new Title : new Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format') Function: Returns a new seqstream Returns : A Bio::Variation::IO::Handler initialised with the appropriate format Args : -file => $filename -format => format -fh => filehandle to attach to =cut sub new { my ($class, %param) = @_; my ($format); @param{ map { lc $_ } keys %param } = values %param; # lowercase keys $format = $param{'-format'} || $class->_guess_format( $param{-file} || $ARGV[0] ) || 'flat'; $format = "\L$format"; # normalize capitalization to lower case return unless $class->_load_format_module($format); return "Bio::Variation::IO::$format"->new(%param); } =head2 format Title : format Usage : $format = $stream->format() Function: Get the variation format Returns : variation format Args : none =cut # format() method inherited from Bio::Root::IO sub _load_format_module { my ($class, $format) = @_; my $module = "Bio::Variation::IO::" . $format; my $ok; eval { $ok = $class->_load_module($module); }; if ( $@ ) { print STDERR <next Function: reads the next $seqDiff object from the stream Returns : a Bio::Variation::SeqDiff object Args : =cut sub next { my ($self, $seq) = @_; $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object."); } sub next_seq { my ($self, $seq) = @_; $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'."); $self->next($seq); } =head2 write Title : write Usage : $stream->write($seq) Function: writes the $seq object into the stream Returns : 1 for success and 0 for error Args : Bio::Variation::SeqDiff object =cut sub write { my ($self, $seq) = @_; $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object."); } sub write_seq { my ($self, $seq) = @_; $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'."); $self->write($seq); } =head2 _guess_format Title : _guess_format Usage : $obj->_guess_format($filename) Function: Example : Returns : guessed format of filename (lower case) Args : =cut sub _guess_format { my $class = shift; return unless $_ = shift; return 'flat' if /\.dat$/i; return 'xml' if /\.xml$/i; } 1; author-pod-coverage.t100644000766000024 53613604710571 20613 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t#!perl BEGIN { unless ($ENV{AUTHOR_TESTING}) { print qq{1..0 # SKIP these tests are for testing by the author\n}; exit } } # This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests. use Test::Pod::Coverage 1.08; use Pod::Coverage::TrustPod; all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' }); mutations.old.xml100644000766000024 4147113604710571 21051 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/data computed gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI experimental gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI coding computed E K computed ccaagctcaaggatggaagtgcagt t a agggctgggaagggtctaccctcgg experimental ccaagctcaaggatggaagtgcagt t a agggctgggaagggtctaccctcgg coding computed L * computed gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI experimental gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI coding computed E K computed tctgttccagagcgtgcgcgaagtg atccag aacccgggccccaggcacccagagg -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV experimental tctgttccagagcgtgcgcgaagtg atccag aacccgggccccaggcacccagagg -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV coding computed IQ computed ctgttccagagcgtgcgcgaagtga t ccagaacccgggccccaggcaccca -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I experimental ctgttccagagcgtgcgcgaagtga t ccagaacccgggccccaggcaccca -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I coding computed I TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAPGSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPARGCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* computed ctgttccagagcgtgcgcgaagtga gggccc tccagaacccgggccccaggcaccc +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI experimental ctgttccagagcgtgcgcgaagtga gggccc tccagaacccgggccccaggcaccc +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI coding computed I RAL computed tctgttccagagcgtgcgcgaagtg g atccagaacccgggccccaggcacc +BamHI, +BinI, +NlaIV, +XhoII experimental tctgttccagagcgtgcgcgaagtg g atccagaacccgggccccaggcacc +BamHI, +BinI, +NlaIV, +XhoII coding computed I DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* computed tctgttccagagcgtgcgcgaagtg at gggccc ccagaacccgggccccaggcaccca +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI experimental tctgttccagagcgtgcgcgaagtg at gggccc ccagaacccgggccccaggcaccca +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI coding computed I GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* computed ggtggaagattcagccaagctcaag g a atggaagtgcagttagggctgggaa -BccI, -FokI, +Hpy178III experimental ggtggaagattcagccaagctcaag g a atggaagtgcagttagggctgggaa -BccI, -FokI, +Hpy178III 5'UTR computed tctatttccacacccagtgaagcat t c ggaaaccctatttccccaccccagc +Hpy188I, +SfaNI, -XcmI experimental tctatttccacacccagtgaagcat t c ggaaaccctatttccccaccccagc +Hpy188I, +SfaNI, -XcmI 3'UTR experimental cgcacacctgtggtgcctgccaccc a g ctgggttgcccatgattcatttttg +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI 3'UTR computed cgcacacctgtggtgcctgccaccc a g ctgggttgcccatgattcatttttg +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI 3'UTR computed gcagcactgcagagatttcatcatg g t tctcccaggccctcaggctcctctg -BsmAI, -Eco31I exon experimental gcagcactgcagagatttcatcatg g t tctcccaggccctcaggctcctctg -BsmAI, -Eco31I coding computed V F computed taaggcctcaggaggagaaacacgg g t acatgccgtggaagccggggcctca -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI exon experimental taaggcctcaggaggagaaacacgg g t acatgccgtggaagccggggcctca -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI coding computed D Y computed ggcaggggcagcactgcagagattt c g atcatggtctcccaggccctcaggc +BclI, +DpnI, +MboI 5'UTR experimental ggcaggggcagcactgcagagattt c g atcatggtctcccaggccctcaggc +BclI, +DpnI, +MboI 5'UTR mutations.old.dat100644000766000024 3117413604710571 21020 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/dataID M20132:(362)c.+4G>A; E2K Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: 4 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 4 (M20132::366) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature /codon_table: 1 Feature /codon: gaa>aaa; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, conservative Feature /proof: computed Feature /location: 2 Feature /change: E>K // ID M20132:(362)c.+14T>A; L5X Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: 14 Feature /upflank: ccaagctcaaggatggaagtgcagt Feature /change: t>a Feature /dnflank: agggctgggaagggtctaccctcgg Feature RNA; 1 Feature /label: nonsense Feature /proof: experimental Feature /location: 14 (M20132::376) Feature /upflank: ccaagctcaaggatggaagtgcagt Feature /change: t>a Feature /dnflank: agggctgggaagggtctaccctcgg Feature /codon_table: 1 Feature /codon: tta>taa; 2 Feature /region: coding Feature AA; 1 Feature /label: truncation Feature /proof: computed Feature /location: 5 Feature /change: L>* // ID M20132:(362)c.+4G>A; E2K Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: 4 Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 4 (M20132::366) Feature /upflank: gaagattcagccaagctcaaggatg Feature /change: g>a Feature /dnflank: aagtgcagttagggctgggaagggt Feature /re_site: -BccI Feature /codon_table: 1 Feature /codon: gaa>aaa; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, conservative Feature /proof: computed Feature /location: 2 Feature /change: E>K // ID M20132:(362)c.+100delATCCAG; I34del-2 Feature DNA; 1 Feature /label: deletion Feature /proof: computed Feature /location: 100..105 Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: atccag> Feature /dnflank: aacccgggccccaggcacccagagg Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV Feature RNA; 1 Feature /label: inframe, deletion Feature /proof: experimental Feature /location: 100..105 (M20132::462..467) Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: atccag> Feature /dnflank: aacccgggccccaggcacccagagg Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV Feature /codon_table: 1 Feature /codon: atc>-; 1 Feature /region: coding Feature AA; 1 Feature /label: deletion Feature /proof: computed Feature /location: 34..35 Feature /change: IQ> // ID M20132:(362)c.+101delT; I34delX172 Feature DNA; 1 Feature /label: deletion Feature /proof: computed Feature /location: 101 Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: t> Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I Feature RNA; 1 Feature /label: frameshift, deletion Feature /proof: experimental Feature /location: 101 (M20132::463) Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: t> Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I Feature /codon_table: 1 Feature /codon: atc>-; 2 Feature /region: coding Feature AA; 1 Feature /label: out-of-frame translation, truncation Feature /proof: computed Feature /location: 34 Feature /change: I>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAP Feature GSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPAR Feature GCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* // ID M20132:(362)c.+101insGGGCCC; I34ins+2 Feature DNA; 1 Feature /label: insertion Feature /proof: computed Feature /location: 100^101 Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: >gggccc Feature /dnflank: tccagaacccgggccccaggcaccc Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, Feature +SduI Feature RNA; 1 Feature /label: inframe, insertion Feature /proof: experimental Feature /location: 100^101 (M20132::462^463) Feature /upflank: ctgttccagagcgtgcgcgaagtga Feature /change: >gggccc Feature /dnflank: tccagaacccgggccccaggcaccc Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, Feature +SduI Feature /codon_table: 1 Feature /codon: atc>-; 2 Feature /region: coding Feature AA; 1 Feature /label: insertion, complex Feature /proof: computed Feature /location: 34 Feature /change: I>RAL // ID M20132:(362)c.+100insG; I34ins81X Feature DNA; 1 Feature /label: insertion Feature /proof: computed Feature /location: 99^100 Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: >g Feature /dnflank: atccagaacccgggccccaggcacc Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII Feature RNA; 1 Feature /label: frameshift, insertion Feature /proof: experimental Feature /location: 99^100 (M20132::461^462) Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: >g Feature /dnflank: atccagaacccgggccccaggcacc Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII Feature /codon_table: 1 Feature /codon: atc>-; 1 Feature /region: coding Feature AA; 1 Feature /label: out-of-frame translation, truncation Feature /proof: computed Feature /location: 34 Feature /change: I>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* // ID M20132:(362)c.+100AT>GGGCCC; I34ins82X Feature DNA; 1 Feature /label: complex Feature /proof: computed Feature /location: 100..101 Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: at>gggccc Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI Feature RNA; 1 Feature /label: frameshift, complex Feature /proof: experimental Feature /location: 100..101 (M20132::462..463) Feature /upflank: tctgttccagagcgtgcgcgaagtg Feature /change: at>gggccc Feature /dnflank: ccagaacccgggccccaggcaccca Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI Feature /codon_table: 1 Feature /codon: atc>-; 1 Feature /region: coding Feature AA; 1 Feature /label: out-of-frame translation, truncation Feature /proof: computed Feature /location: 34 Feature /change: I>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* // ID M20132:(362+1)c.-1G>A Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: -1 Feature /upflank: ggtggaagattcagccaagctcaag Feature /change: g>a Feature /dnflank: atggaagtgcagttagggctgggaa Feature /re_site: -BccI, -FokI, +Hpy178III Feature RNA; 1 Feature /label: unknown Feature /proof: experimental Feature /location: -1 (M20132::361) Feature /upflank: ggtggaagattcagccaagctcaag Feature /change: g>a Feature /dnflank: atggaagtgcagttagggctgggaa Feature /re_site: -BccI, -FokI, +Hpy178III Feature /region: 5'UTR // ID M20132:(362)c.+2766T>C Feature DNA; 1 Feature /label: point, transition Feature /proof: computed Feature /location: 2766 Feature /upflank: tctatttccacacccagtgaagcat Feature /change: t>c Feature /dnflank: ggaaaccctatttccccaccccagc Feature /re_site: +Hpy188I, +SfaNI, -XcmI Feature RNA; 1 Feature /label: unknown Feature /proof: experimental Feature /location: 2766 (M20132::3128) Feature /upflank: tctatttccacacccagtgaagcat Feature /change: t>c Feature /dnflank: ggaaaccctatttccccaccccagc Feature /re_site: +Hpy188I, +SfaNI, -XcmI Feature /region: 3'UTR // ID J02933:(521)g.+12165A>G Feature DNA; 1 Feature /label: point, transition Feature /proof: experimental Feature /location: 12165 (J02933::12686) Feature /upflank: cgcacacctgtggtgcctgccaccc Feature /change: a>g Feature /dnflank: ctgggttgcccatgattcatttttg Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, Feature -TspRI Feature /region: 3'UTR; (+1027) Feature RNA; 1 Feature /label: unknown Feature /proof: computed Feature /location: 2428 Feature /upflank: cgcacacctgtggtgcctgccaccc Feature /change: a>g Feature /dnflank: ctgggttgcccatgattcatttttg Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, Feature -TspRI Feature /region: 3'UTR; (-1) // ID J02933:(521)g.+4G>T; V2F Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: 4 (J02933::525) Feature /upflank: gcagcactgcagagatttcatcatg Feature /change: g>t Feature /dnflank: tctcccaggccctcaggctcctctg Feature /re_site: -BsmAI, -Eco31I Feature /region: exon; 1 (+4) Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 4 Feature /upflank: gcagcactgcagagatttcatcatg Feature /change: g>t Feature /dnflank: tctcccaggccctcaggctcctctg Feature /re_site: -BsmAI, -Eco31I Feature /codon_table: 1 Feature /codon: gtc>ttc; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, nonconservative Feature /proof: computed Feature /location: 2 Feature /change: V>F // ID J02933:(521)g.+1168G>T; D34Y Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: 1168 (J02933::1689) Feature /upflank: taaggcctcaggaggagaaacacgg Feature /change: g>t Feature /dnflank: acatgccgtggaagccggggcctca Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, Feature +Tsp4CI Feature /region: exon; 1 (-29) Feature RNA; 1 Feature /label: missense Feature /proof: experimental Feature /location: 100 Feature /upflank: taaggcctcaggaggagaaacacgg Feature /change: g>t Feature /dnflank: acatgccgtggaagccggggcctca Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, Feature +Tsp4CI Feature /codon_table: 1 Feature /codon: gac>tac; 1 Feature /region: coding Feature AA; 1 Feature /label: substitution, nonconservative Feature /proof: computed Feature /location: 34 Feature /change: D>Y // ID J02933:(521+1)g.-4C>G Feature DNA; 1 Feature /label: point, transversion Feature /proof: computed Feature /location: -4 (J02933::518) Feature /upflank: ggcaggggcagcactgcagagattt Feature /change: c>g Feature /dnflank: atcatggtctcccaggccctcaggc Feature /re_site: +BclI, +DpnI, +MboI Feature /region: 5'UTR; (-4) Feature RNA; 1 Feature /label: unknown Feature /proof: experimental Feature /location: -4 Feature /upflank: ggcaggggcagcactgcagagattt Feature /change: c>g Feature /dnflank: atcatggtctcccaggccctcaggc Feature /re_site: +BclI, +DpnI, +MboI Feature /region: 5'UTR; (+31) // SNP.pm100644000766000024 1146713604710571 20566 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# bioperl module for Bio::Variation::SNP # # Copyright Allen Day , Stan Nelson # Human Genetics, UCLA Medical School, University of California, Los Angeles =head1 NAME Bio::Variation::SNP - submitted SNP =head1 SYNOPSIS $SNP = Bio::Variation::SNP->new (); =head1 DESCRIPTION Inherits from Bio::Variation::SeqDiff and Bio::Variation::Allele, with additional methods that are (db)SNP specific (ie, refSNP/subSNP IDs, batch IDs, validation methods). =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to one of the Bioperl mailing lists. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR Allen Day Eallenday@ucla.eduE =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::SNP; $Bio::Variation::SNP::VERSION = '1.7.5'; use strict; use vars qw($AUTOLOAD); use Bio::Root::Root; use base qw(Bio::Variation::SeqDiff Bio::Variation::Allele); =head2 get/set-able methods Usage : $is = $snp->method() Function: for getting/setting attributes Returns : a value. probably a scalar. Args : if you're trying to set an attribute, pass in the new value. Methods: -------- id type observed seq_5 seq_3 ncbi_build ncbi_chr_hits ncbi_ctg_hits ncbi_seq_loc ucsc_build ucsc_chr_hits ucsc_ctg_hits heterozygous heterozygous_SE validated genotype handle batch_id method locus_id symbol mrna protein functional_class =cut #' my %OK_AUTOLOAD = ( id => '', type => '', observed => [], seq_5 => '', seq_3 => '', ncbi_build => '', ncbi_chr_hits => '', ncbi_ctg_hits => '', ncbi_seq_loc => '', ucsc_build => '', ucsc_chr_hits => '', ucsc_ctg_hits => '', heterozygous => '', heterozygous_SE => '', validated => '', genotype => '', handle => '', batch_id => '', method => '', locus_id => '', symbol => '', mrna => '', protein => '', functional_class => '', ); sub AUTOLOAD { my $self = shift; my $param = $AUTOLOAD; $param =~ s/.*:://; $self->throw(__PACKAGE__." doesn't implement $param") unless defined $OK_AUTOLOAD{$param}; if( ref $OK_AUTOLOAD{$param} eq 'ARRAY' ) { push @{$self->{$param}}, shift if @_; return $self->{$param}->[scalar(@{$self->{$param}}) - 1]; } else { $self->{$param} = shift if @_; return $self->{$param}; } } #foreach my $slot (keys %RWSLOT){ # no strict "refs"; #add class methods to package # *$slot = sub { # shift; # $RWSLOT{$slot} = shift if @_; # return $RWSLOT{$slot}; # }; #} =head2 is_subsnp Title : is_subsnp Usage : $is = $snp->is_subsnp() Function: returns 1 if $snp is a subSNP Returns : 1 or undef Args : NONE =cut sub is_subsnp { return shift->{is_subsnp}; } =head2 subsnp Title : subsnp Usage : $subsnp = $snp->subsnp() Function: returns the currently active subSNP of $snp Returns : Bio::Variation::SNP Args : NONE =cut sub subsnp { my $self = shift; return $self->{subsnps}->[ scalar($self->each_subsnp) - 1 ]; } =head2 add_subsnp Title : add_subsnp Usage : $subsnp = $snp->add_subsnp() Function: pushes the previous value returned by subsnp() onto a stack, accessible with each_subsnp(). Sets return value of subsnp() to a new Bio::Variation::SNP object, and returns that object. Returns : Bio::Varitiation::SNP Args : NONE =cut sub add_subsnp { my $self = shift; $self->throw("add_subsnp(): cannot add subSNP to subSNP, only to refSNP") if $self->is_subsnp; my $subsnp = Bio::Variation::SNP->new; push @{$self->{subsnps}}, $subsnp; $self->subsnp->{is_subsnp} = 1; return $self->subsnp; } =head2 each_subsnp Title : each_subsnp Usage : @subsnps = $snp->each_subsnp() Function: returns a list of the subSNPs of a refSNP Returns : list Args : NONE =cut sub each_subsnp { my $self = shift; $self->throw("each_subsnp(): cannot be called on a subSNP") if $self->is_subsnp; return @{$self->{subsnps}}; } 1; polymorphism.old.xml100644000766000024 667113604710571 21553 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/t/data computed gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI computed gaagattcagccaagctcaaggatg g t aagtgcagttagggctgggaagggt -BccI experimental gaagattcagccaagctcaaggatg g a aagtgcagttagggctgggaagggt -BccI coding experimental gaagattcagccaagctcaaggatg g t aagtgcagttagggctgggaagggt -BccI coding computed E K computed E * computed gaagattcagccaagctcaaggatg c a aagtgcagttagggctgggaagggt -CviRI, -SfaNI experimental gaagattcagccaagctcaaggatg c a aagtgcagttagggctgggaagggt -CviRI, -SfaNI coding computed Q K Allele.pm100644000766000024 1476213604710571 21325 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::Allele # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Variation::Allele - Sequence object with allele-specific attributes =head1 SYNOPSIS $allele1 = Bio::Variation::Allele->new ( -seq => 'A', -id => 'AC00001.1', -alphabet => 'dna', -is_reference => 1 ); =head1 DESCRIPTION List of alleles describe known sequence alternatives in a variable region. Alleles are contained in Bio::Variation::VariantI complying objects. See L for details. Bio::Varation::Alleles are PrimarySeqI complying objects which can contain database cross references as specified in Bio::DBLinkContainerI interface, too. A lot of the complexity with dealing with Allele objects are caused by null alleles; Allele objects that have zero length sequence string. In addition describing the allele by its sequence , it possible to give describe repeat structure within the sequence. This done using methods repeat_unit (e.g. 'ca') and repeat_count (e.g. 7). =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::Allele; $Bio::Variation::Allele::VERSION = '1.7.5'; use strict; # Object preamble - inheritance use base qw(Bio::PrimarySeq Bio::DBLinkContainerI); sub new { my($class, @args) = @_; my $self = $class->SUPER::new(@args); my($is_reference, $repeat_unit, $repeat_count) = $self->_rearrange([qw(IS_REFERENCE REPEAT_UNIT REPEAT_COUNT )], @args); $is_reference && $self->is_reference($is_reference); $repeat_unit && $self->repeat_unit($repeat_unit); $repeat_count && $self->repeat_count($repeat_count); return $self; # success - we hope! } =head2 is_reference Title : is_reference Usage : $obj->is_reference() Function: sets and returns boolean values. Unset values return false. Example : $obj->is_reference() Returns : boolean Args : optional true of false value =cut sub is_reference { my ($self,$value) = @_; if( defined $value) { $value ? ($value = 1) : ($value = 0); $self->{'is_reference'} = $value; } if( ! exists $self->{'is_reference'} ) { return 0; } else { return $self->{'is_reference'}; } } =head2 add_DBLink Title : add_DBLink Usage : $self->add_DBLink($ref) Function: adds a link object Example : Returns : Args : =cut sub add_DBLink{ my ($self,$com) = @_; if( ! $com->isa('Bio::Annotation::DBLink') ) { $self->throw("Is not a link object but a [$com]"); } push(@{$self->{'link'}},$com); } =head2 each_DBLink Title : each_DBLink Usage : foreach $ref ( $self->each_DBlink() ) Function: gets an array of DBlink of objects Example : Returns : Args : =cut sub each_DBLink{ my ($self) = @_; return @{$self->{'link'}}; } =head2 repeat_unit Title : repeat_unit Usage : $obj->repeat_unit('ca'); Function: Sets and returns the sequence of the repeat_unit the allele is composed of. Example : Returns : string Args : string =cut sub repeat_unit { my ($self,$value) = @_; if( defined $value) { $self->{'repeat_unit'} = $value; } if ($self->{'seq'} && $self->{'repeat_unit'} && $self->{'repeat_count'} ) { $self->warn("Repeats do not add up!") if ( $self->{'repeat_unit'} x $self->{'repeat_count'}) ne $self->{'seq'}; } return $self->{'repeat_unit'}; } =head2 repeat_count Title : repeat_count Usage : $obj->repeat_count(); Function: Sets and returns the number of repeat units in the allele. Example : Returns : string Args : string =cut sub repeat_count { my ($self,$value) = @_; if( defined $value) { if ( not $value =~ /^\d+$/ ) { $self->throw("[$value] for repeat_count has to be a positive integer\n"); } else { $self->{'repeat_count'} = $value; } } if ($self->{'seq'} && $self->{'repeat_unit'} && $self->{'repeat_count'} ) { $self->warn("Repeats do not add up!") if ( $self->{'repeat_unit'} x $self->{'repeat_count'}) ne $self->{'seq'}; } return $self->{'repeat_count'}; } =head2 count Title : count Usage : $obj->count(); Function: Sets and returns the number of times this allele was observed. Example : Returns : string Args : string =cut sub count { my ($self,$value) = @_; if( defined $value) { if ( not $value =~ /^\d+$/ ) { $self->throw("[$value] for count has to be a positive integer\n"); } else { $self->{'count'} = $value; } } return $self->{'count'}; } =head2 frequency Title : frequency Usage : $obj->frequency(); Function: Sets and returns the frequency of the allele in the observed population. Example : Returns : string Args : string =cut sub frequency { my ($self,$value) = @_; if( defined $value) { if ( not $value =~ /^\d+$/ ) { $self->throw("[$value] for frequency has to be a positive integer\n"); } else { $self->{'frequency'} = $value; } } return $self->{'frequency'}; } 1; IO000755000766000024 013604710571 17706 5ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variationxml.pm100644000766000024 3473413604710571 21237 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation/IO# BioPerl module for Bio::Variation::IO::xml # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =head1 NAME Bio::Variation::IO::xml - XML sequence variation input/output stream =head1 SYNOPSIS Do not use this module directly. Use it via the Bio::Variation::IO class. =head1 DESCRIPTION This object can transform L objects to and from XML file databases. The XML format, although consistent, is still evolving. The current DTD for it is at L. =head1 REQUIREMENTS To use this code you need the module L which creates an interface to L to read XML and modules L and L to write XML out. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::IO::xml; $Bio::Variation::IO::xml::VERSION = '1.7.5'; use vars qw($seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj); use strict; use XML::Twig; use XML::Writer 0.4; use IO::String; use Bio::Variation::SeqDiff; use Bio::Variation::DNAMutation; use Bio::Variation::RNAChange; use Bio::Variation::AAChange; use Bio::Variation::Allele; use base qw(Bio::Variation::IO); # _initialize is where the heavy stuff will happen when new is called sub new { my ($class,@args) = @_; my $self = bless {}, $class; $self->_initialize(@args); return $self; } sub _initialize { my($self,@args) = @_; return unless $self->SUPER::_initialize(@args); } =head2 next Title : next Usage : $haplo = $stream->next() Function: returns the next seqDiff in the stream Returns : Bio::Variation::SeqDiff object Args : NONE =cut sub _seqDiff { my ($t, $term)= @_; $seqdiff->id( $term->att('id') ); $seqdiff->alphabet( $term->att('moltype') ); $seqdiff->offset( $term->att('offset') ); foreach my $child ($term->children) { _variant($t, $child); } } sub _variant { my ($t, $term)= @_; my $var; my $att = $term->atts(); my ($variation_number, $change_number) = split /\./, $att->{number}; # if more than two alleles if ($variation_number and $change_number and $change_number > 1 ) { my $a3 = Bio::Variation::Allele->new; $a3->seq( $term->first_child_text('allele_mut') ) if $term->first_child_text('allele_mut'); if ($term->gi eq 'DNA') { $prevdnaobj->add_Allele($a3); } elsif ($term->gi eq 'RNA') { $prevrnaobj->add_Allele($a3); } else { # AA $prevaaobj->add_Allele($a3); } } else { # create new variants if ($term->gi eq 'DNA') { $var = Bio::Variation::DNAMutation->new(); } elsif ($term->gi eq 'RNA') { $var = Bio::Variation::RNAChange->new(); } else { # AA $var = Bio::Variation::AAChange->new(); } # these are always present $var->start( $att->{start} ); $var->end( $att->{end}); $var->length($att->{len}); $var->mut_number( $att->{number}); $var->upStreamSeq($term->first_child_text('upFlank')); $var->dnStreamSeq($term->first_child_text('dnFlank')); $var->proof($term->first_child_text('proof')); # region my $region = $term->first_child('region'); if ($region) { $var->region($region->text); my $region_atts = $region->atts; $var->region_value( $region_atts->{value} ) if $region_atts->{value}; $var->region_dist( $region_atts->{dist} ) if $region_atts->{dist}; } # alleles my $a1 = Bio::Variation::Allele->new; $a1->seq($term->first_child_text('allele_ori') ) if $term->first_child_text('allele_ori'); $var->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq($term->first_child_text('allele_mut') ) if $term->first_child_text('allele_mut'); $var->isMutation(1) if $term->att('isMutation'); $var->allele_mut($a2); $var->add_Allele($a2); $var->length( $term->att('length') ); $seqdiff->add_Variant($var); # variant specific code if ($term->gi eq 'DNA') { $prevdnaobj = $var; } elsif ($term->gi eq 'RNA') { my $codon = $term->first_child('codon'); if ($codon) { my $codon_atts = $codon->atts; $var->codon_table( $codon->att('codon_table') ) if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1; $var->codon_pos( $codon->att('codon_pos') ) if $codon_atts->{codon_pos}; } $prevdnaobj->RNAChange($var); $var->DNAMutation($prevdnaobj); $prevrnaobj = $var; } else { $prevrnaobj->AAChange($var); $var->RNAChange($prevrnaobj); $prevaaobj = $var; } } } sub next { my( $self ) = @_; local $/ = "\n"; return unless my $entry = $self->_readline; # print STDERR "|$entry|"; return unless $entry =~ /^\W*new; # create new parser object my $twig_handlers = {'seqDiff' => \&_seqDiff }; my $t = XML::Twig->new ( TwigHandlers => $twig_handlers, KeepEncoding => 1 ); $t->parse($entry); return $seqdiff; } =head2 write Title : write Usage : $stream->write(@haplos) Function: writes the $seqDiff objects into the stream Returns : 1 for success and 0 for error Args : Bio::Variation::SeqDiff object =cut sub write { my ($self,@h) = @_; if( !defined $h[0] ) { $self->throw("Attempting to write with no information!"); } my $str; my $output = IO::String->new($str); my $w = XML::Writer->new(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 ); foreach my $h (@h) { # # seqDiff # $h->alphabet || $self->throw("Moltype of the reference sequence is not set!"); my $hasAA = 0; foreach my $mut ($h->each_Variant) { $hasAA = 1 if $mut->isa('Bio::Variation::AAChange'); } if ($hasAA) { $w->startTag("seqDiff", "id" => $h->id, "moltype" => $h->alphabet, "offset" => $h->offset, "sysname" => $h->sysname, "trivname" => $h->trivname ); } else { $w->startTag("seqDiff", "id" => $h->id, "moltype" => $h->alphabet, "offset" => $h->offset, "sysname" => $h->sysname ); } my @allvariants = $h->each_Variant; #print "allvars:", scalar @allvariants, "\n"; my %variants = (); foreach my $mut ($h->each_Variant) { #print STDERR $mut->mut_number, "\t", $mut, "\t", #$mut->proof, "\t", scalar $mut->each_Allele, "\n"; push @{$variants{$mut->mut_number} }, $mut; } foreach my $var (sort keys %variants) { foreach my $mut (@{$variants{$var}}) { # # DNA # if( $mut->isa('Bio::Variation::DNAMutation') ) { $mut->isMutation(0) if not $mut->isMutation; my @alleles = $mut->each_Allele; my $count = 0; foreach my $allele (@alleles) { $count++; my ($variation_number, $change_number) = split /\./, $mut->mut_number; if ($change_number and $change_number != $count){ $mut->mut_number("$change_number.$count"); } $mut->allele_mut($allele); $w->startTag("DNA", "number" => $mut->mut_number, "start" => $mut->start, "end" => $mut->end, "length" => $mut->length, "isMutation" => $mut->isMutation ); if ($mut->label) { foreach my $label (split ', ', $mut->label) { $w->startTag("label"); $w->characters($label); $w->endTag; } } if ($mut->proof) { $w->startTag("proof"); $w->characters($mut->proof ); $w->endTag; } if ($mut->upStreamSeq) { $w->startTag("upFlank"); $w->characters($mut->upStreamSeq ); $w->endTag; } #if ( $mut->isMutation) { #if ($mut->allele_ori) { $w->startTag("allele_ori"); $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ; $w->endTag; #} #if ($mut->allele_mut) { $w->startTag("allele_mut"); $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq; $w->endTag; #} #} if ($mut->dnStreamSeq) { $w->startTag("dnFlank"); $w->characters($mut->dnStreamSeq ); $w->endTag; } if ($mut->restriction_changes) { $w->startTag("restriction_changes"); $w->characters($mut->restriction_changes); $w->endTag; } if ($mut->region) { if($mut->region_value and $mut->region_dist) { $w->startTag("region", "value" => $mut->region_value, "dist" => $mut->region_dist ); } elsif($mut->region_value) { $w->startTag("region", "value" => $mut->region_value ); } elsif($mut->region_dist) { $w->startTag("region", "dist" => $mut->region_dist ); } else { $w->startTag("region"); } $w->characters($mut->region ); $w->endTag; } $w->endTag; #DNA } } # # RNA # elsif( $mut->isa('Bio::Variation::RNAChange') ) { $mut->isMutation(0) if not $mut->isMutation; my @alleles = $mut->each_Allele; my $count = 0; foreach my $allele (@alleles) { $count++; my ($variation_number, $change_number) = split /\./, $mut->mut_number; if ($change_number and $change_number != $count){ $mut->mut_number("$change_number.$count"); } $mut->allele_mut($allele); $w->startTag("RNA", "number" => $mut->mut_number, "start" => $mut->start, "end" => $mut->end, "length" => $mut->length, "isMutation" => $mut->isMutation ); if ($mut->label) { foreach my $label (split ', ', $mut->label) { $w->startTag("label"); $w->characters($label ); $w->endTag; } } if ($mut->proof) { $w->startTag("proof"); $w->characters($mut->proof ); $w->endTag; } if ($mut->upStreamSeq) { $w->startTag("upFlank"); $w->characters($mut->upStreamSeq ); $w->endTag; } #if ( $mut->isMutation) { if ($mut->allele_ori) { $w->startTag("allele_ori"); $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ; $w->endTag; } if ($mut->allele_mut) { $w->startTag("allele_mut"); $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ; $w->endTag; } #} if ($mut->dnStreamSeq) { $w->startTag("dnFlank"); $w->characters($mut->dnStreamSeq ); $w->endTag; } if ($mut->region eq 'coding') { if (! $mut->codon_mut) { $w->startTag("codon", "codon_ori" => $mut->codon_ori, "codon_pos" => $mut->codon_pos ); } else { $w->startTag("codon", "codon_ori" => $mut->codon_ori, "codon_mut" => $mut->codon_mut, "codon_pos" => $mut->codon_pos ); } $w->endTag; } if ($mut->codon_table != 1) { $w->startTag("codon_table"); $w->characters($mut->codon_table); $w->endTag; } if ($mut->restriction_changes) { $w->startTag("restriction_changes"); $w->characters($mut->restriction_changes); $w->endTag; } if ($mut->region) { if($mut->region_value and $mut->region_dist) { $w->startTag("region", "value" => $mut->region_value, "dist" => $mut->region_dist ); } elsif($mut->region_value) { $w->startTag("region", "value" => $mut->region_value ); } elsif($mut->region_dist) { $w->startTag("region", "dist" => $mut->region_dist ); } else { $w->startTag("region"); } $w->characters($mut->region ); $w->endTag; } $w->endTag; #RNA } } # # AA # elsif( $mut->isa('Bio::Variation::AAChange') ) { $mut->isMutation(0) if not $mut->isMutation; my @alleles = $mut->each_Allele; my $count = 0; foreach my $allele (@alleles) { $count++; my ($variation_number, $change_number) = split /\./, $mut->mut_number; if ($change_number and $change_number != $count){ $mut->mut_number("$change_number.$count"); } $mut->allele_mut($allele); $w->startTag("AA", "number" => $mut->mut_number, "start" => $mut->start, "end" => $mut->end, "length" => $mut->length, "isMutation" => $mut->isMutation ); if ($mut->label) { foreach my $label (split ', ', $mut->label) { $w->startTag("label"); $w->characters($label ); $w->endTag; } } if ($mut->proof) { $w->startTag("proof"); $w->characters($mut->proof ); $w->endTag; } #if ( $mut->isMutation) { if ($mut->allele_ori) { $w->startTag("allele_ori"); $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq; $w->endTag; } if ($mut->allele_mut) { $w->startTag("allele_mut"); $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq; $w->endTag; } #} if ($mut->region) { if($mut->region_value and $mut->region_dist) { $w->startTag("region", "value" => $mut->region_value, "dist" => $mut->region_dist ); } elsif($mut->region_value) { $w->startTag("region", "value" => $mut->region_value ); } elsif($mut->region_dist) { $w->startTag("region", "dist" => $mut->region_dist ); } else { $w->startTag("region"); } $w->characters($mut->region ); $w->endTag; } $w->endTag; #AA } } } } } $w->endTag; $w->end; $self->_print($str); $output = undef; return 1; } 1; SeqDiff.pm100644000766000024 6071313604710571 21445 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# bioperl module for Bio::Variation::SeqDiff # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code # cds_end definition? =head1 NAME Bio::Variation::SeqDiff - Container class for mutation/variant descriptions =head1 SYNOPSIS $seqDiff = Bio::Variation::SeqDiff->new ( -id => $M20132, -alphabet => 'rna', -gene_symbol => 'AR' -chromosome => 'X', -numbering => 'coding' ); # get a DNAMutation object somehow $seqDiff->add_Variant($dnamut); print $seqDiff->sys_name(), "\n"; =head1 DESCRIPTION SeqDiff stores Bio::Variation::VariantI object references and descriptive information common to all changes in a sequence. Mutations are understood to be any kind of sequence markers and are expected to occur in the same chromosome. See L for details. The methods of SeqDiff are geared towards describing mutations in human genes using gene-based coordinate system where 'A' of the initiator codon has number 1 and the one before it -1. This is according to conventions of human genetics. There will be class Bio::Variation::Genotype to describe markers in different chromosomes and diploid genototypes. Classes implementing Bio::Variation::VariantI interface are Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and Bio::Variation::AAChange. See L, L, L, and L for more information. Variant objects can be added using two ways: an array passed to the constructor or as individual Variant objects with add_Variant method. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 CONTRIBUTORS Eckhard Lehmann, ecky@e-lehmann.de =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::SeqDiff; $Bio::Variation::SeqDiff::VERSION = '1.7.5'; use strict; use Bio::Tools::CodonTable; use Bio::PrimarySeq; use base qw(Bio::Root::Root); =head2 new Title : new Usage : $seqDiff = Bio::Variation::SeqDiff->new; Function: generates a new Bio::Variation::SeqDiff Returns : reference to a new object of class SeqDiff Args : =cut sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); my($id, $sysname, $trivname, $chr, $gene_symbol, $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end, $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut #@variants, @genes ) = $self->_rearrange([qw(ID SYSNAME TRIVNAME CHR GENE_SYMBOL DESC ALPHABET NUMBERING OFFSET RNA_OFFSET RNA_ID CDS_END DNA_ORI DNA_MUT RNA_ORI AA_ORI AA_MUT )], @args); #my $make = $self->SUPER::_initialize(@args); $id && $self->id($id); $sysname && $self->sysname($sysname); $trivname && $self->trivname($trivname); $chr && $self->chromosome($chr); $gene_symbol && $self->gene_symbol($chr); $desc && $self->description($desc); $alphabet && $self->alphabet($alphabet); $numbering && $self->numbering($numbering); $offset && $self->offset($offset); $rna_offset && $self->rna_offset($rna_offset); $rna_id && $self->rna_id($rna_id); $cds_end && $self->cds_end($cds_end); $dna_ori && $self->dna_ori($dna_ori); $dna_mut && $self->dna_mut($dna_mut); $rna_ori && $self->rna_ori($rna_ori); $rna_mut && $self->rna_mut($rna_mut); $aa_ori && $self->aa_ori ($aa_ori); $aa_mut && $self->aa_mut ($aa_mut); $self->{ 'variants' } = []; #@variants && push(@{$self->{'variants'}},@variants); $self->{ 'genes' } = []; #@genes && push(@{$self->{'genes'}},@genes); return $self; # success - we hope! } =head2 id Title : id Usage : $obj->id(H0001); $id = $obj->id(); Function: Sets or returns the id of the seqDiff. Should be used to give the collection of variants a UID without semantic associations. Example : Returns : value of id, a scalar Args : newvalue (optional) =cut sub id { my ($self,$value) = @_; if (defined $value) { $self->{'id'} = $value; } else { return $self->{'id'}; } } =head2 sysname Title : sysname Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname(); Function: Sets or returns the systematic name of the seqDiff. The name should follow the HUGO Mutation Database Initiative approved nomenclature. If called without first setting the value, will generate it from L objects attached. Example : Returns : value of sysname, a scalar Args : newvalue (optional) =cut sub sysname { my ($self,$value) = @_; if (defined $value) { $self->{'sysname'} = $value; } elsif (not defined $self->{'sysname'}) { my $sysname = ''; my $c = 0; foreach my $mut ($self->each_Variant) { if( $mut->isa('Bio::Variation::DNAMutation') ) { $c++; if ($c == 1 ) { $sysname = $mut->sysname ; } else { $sysname .= ";". $mut->sysname; } } } $sysname = "[". $sysname. "]" if $c > 1; $self->{'sysname'} = $sysname; } return $self->{'sysname'}; } =head2 trivname Title : trivname Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname(); Function: Sets or returns the trivial name of the seqDiff. The name should follow the HUGO Mutation Database Initiative approved nomenclature. If called without first setting the value, will generate it from L objects attached. Example : Returns : value of trivname, a scalar Args : newvalue (optional) =cut sub trivname { my ($self,$value) = @_; if (defined $value) { $self->{'trivname'} = $value; } elsif (not defined $self->{'trivname'}) { my $trivname = ''; my $c = 0; foreach my $mut ($self->each_Variant) { if( $mut->isa('Bio::Variation::AAChange') ) { $c++; if ($c == 1 ) { $trivname = $mut->trivname ; } else { $trivname .= ";". $mut->trivname; } } } $trivname = "[". $trivname. "]" if $c > 1; $self->{'trivname'} = $trivname; } else { return $self->{'trivname'}; } } =head2 chromosome Title : chromosome Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome(); Function: Sets or returns the chromosome ("linkage group") of the seqDiff. Example : Returns : value of chromosome, a scalar Args : newvalue (optional) =cut sub chromosome { my ($self,$value) = @_; if (defined $value) { $self->{'chromosome'} = $value; } else { return $self->{'chromosome'}; } } =head2 gene_symbol Title : gene_symbol Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol; Function: Sets or returns the gene symbol for the studied CDS. Example : Returns : value of gene_symbol, a scalar Args : newvalue (optional) =cut sub gene_symbol { my ($self,$value) = @_; if (defined $value) { $self->{'gene_symbol'} = $value; } else { return $self->{'gene_symbol'}; } } =head2 description Title : description Usage : $obj->description('short description'); $descr = $obj->description(); Function: Sets or returns the short description of the seqDiff. Example : Returns : value of description, a scalar Args : newvalue (optional) =cut sub description { my ($self,$value) = @_; if (defined $value) { $self->{'description'} = $value; } else { return $self->{'description'}; } } =head2 alphabet Title : alphabet Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } Function: Returns the type of primary reference sequence being one of 'dna', 'rna' or 'protein'. This is case sensitive. Returns : a string either 'dna','rna','protein'. Args : none =cut sub alphabet { my ($self,$value) = @_; my %type = (dna => 1, rna => 1, protein => 1); if( defined $value ) { if ($type{$value}) { $self->{'alphabet'} = $value; } else { $self->throw("$value is not valid alphabet value!"); } } return $self->{'alphabet'}; } =head2 numbering Title : numbering Usage : $obj->numbering('coding'); $numbering = $obj->numbering(); Function: Sets or returns the string giving the numbering schema used to describe the variants. Example : Returns : value of numbering, a scalar Args : newvalue (optional) =cut sub numbering { my ($self,$value) = @_; if (defined $value) { $self->{'numbering'} = $value; } else { return $self->{'numbering'}; } } =head2 offset Title : offset Usage : $obj->offset(124); $offset = $obj->offset(); Function: Sets or returns the offset from the beginning of the DNA sequence to the coordinate start used to describe variants. Typically the beginning of the coding region of the gene. The cds_start should be 1 + offset. Example : Returns : value of offset, a scalar Args : newvalue (optional) =cut sub offset { my ($self,$value) = @_; if (defined $value) { $self->{'offset'} = $value; } elsif (not defined $self->{'offset'} ) { return $self->{'offset'} = 0; } else { return $self->{'offset'}; } } =head2 cds_start Title : cds_start Usage : $obj->cds_start(123); $cds_start = $obj->cds_start(); Function: Sets or returns the cds_start from the beginning of the DNA sequence to the coordinate start used to describe variants. Typically the beginning of the coding region of the gene. Needs to be and is implemented as 1 + offset. Example : Returns : value of cds_start, a scalar Args : newvalue (optional) =cut sub cds_start { my ($self,$value) = @_; if (defined $value) { $self->{'offset'} = $value - 1; } else { return $self->{'offset'} + 1; } } =head2 cds_end Title : cds_end Usage : $obj->cds_end(321); $cds_end = $obj->cds_end(); Function: Sets or returns the position of the last nucleotitide of the termination codon. The coordinate system starts from cds_start. Example : Returns : value of cds_end, a scalar Args : newvalue (optional) =cut sub cds_end { my ($self,$value) = @_; if (defined $value) { $self->{'cds_end'} = $value; } else { return $self->{'cds_end'}; #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3; } } =head2 rna_offset Title : rna_offset Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset(); Function: Sets or returns the rna_offset from the beginning of the RNA sequence to the coordinate start used to describe variants. Typically the beginning of the coding region of the gene. Example : Returns : value of rna_offset, a scalar Args : newvalue (optional) =cut sub rna_offset { my ($self,$value) = @_; if (defined $value) { $self->{'rna_offset'} = $value; } elsif (not defined $self->{'rna_offset'} ) { return $self->{'rna_offset'} = 0; } else { return $self->{'rna_offset'}; } } =head2 rna_id Title : rna_id Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id(); Function: Sets or returns the ID for original RNA sequence of the seqDiff. Example : Returns : value of rna_id, a scalar Args : newvalue (optional) =cut sub rna_id { my ($self,$value) = @_; if (defined $value) { $self->{'rna_id'} = $value; } else { return $self->{'rna_id'}; } } =head2 add_Variant Title : add_Variant Usage : $obj->add_Variant($variant) Function: Pushes one Bio::Variation::Variant into the list of variants. At the same time, creates a link from the Variant to SeqDiff using its SeqDiff method. Example : Returns : 1 when succeeds, 0 for failure. Args : Variant object =cut sub add_Variant { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::VariantI') ) { $self->throw("Is not a VariantI complying object but a [$self]"); return 0; } else { push(@{$self->{'variants'}},$value); $value->SeqDiff($self); return 1; } } else { return 0; } } =head2 each_Variant Title : each_Variant Usage : $obj->each_Variant(); Function: Returns a list of Variants. Example : Returns : list of Variants Args : none =cut sub each_Variant{ my ($self,@args) = @_; return @{$self->{'variants'}}; } =head2 add_Gene Title : add_Gene Usage : $obj->add_Gene($gene) Function: Pushes one L into the list of genes. Example : Returns : 1 when succeeds, 0 for failure. Args : Bio::LiveSeq::Gene object See L for more information. =cut sub add_Gene { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::LiveSeq::Gene') ) { $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]"); return 0; } else { push(@{$self->{'genes'}},$value); return 1; } } else { return 0; } } =head2 each_Gene Title : each_Gene Usage : $obj->each_Gene(); Function: Returns a list of Ls. Example : Returns : list of Genes Args : none =cut sub each_Gene{ my ($self,@args) = @_; return @{$self->{'genes'}}; } =head2 dna_ori Title : dna_ori Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori(); Function: Sets or returns the original DNA sequence string of the seqDiff. Example : Returns : value of dna_ori, a scalar Args : newvalue (optional) =cut sub dna_ori { my ($self,$value) = @_; if (defined $value) { $self->{'dna_ori'} = $value; } else { return $self->{'dna_ori'}; } } =head2 dna_mut Title : dna_mut Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut(); Function: Sets or returns the mutated DNA sequence of the seqDiff. If sequence has not been set generates it from the original sequence and DNA mutations. Example : Returns : value of dna_mut, a scalar Args : newvalue (optional) =cut sub dna_mut { my ($self,$value) = @_; if (defined $value) { $self->{'dna_mut'} = $value; } else { $self->_set_dnamut() unless $self->{'dna_mut'}; return $self->{'dna_mut'}; } } sub _set_dnamut { my $self = shift; return unless $self->{'dna_ori'} && $self->each_Variant; $self->{'dna_mut'} = $self->{'dna_ori'}; foreach ($self->each_Variant) { next unless $_->isa('Bio::Variation::DNAMutation'); next unless $_->isMutation; my ($s, $la, $le); #lies the mutation less than 25 bases after the start of sequence? if ($_->start < 25) { $s = 0; $la = $_->start - 1; } else { $s = $_->start - 25; $la = 25; } #is the mutation an insertion? $_->end($_->start) unless $_->allele_ori->seq; #does the mutation end greater than 25 bases before the end of #sequence? if (($_->end + 25) > length($self->{'dna_mut'})) { $le = length($self->{'dna_mut'}) - $_->end; } else { $le = 25; } $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la)); $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le)); my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq; my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq; (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/; $self->{'dna_mut'} = $str; } } =head2 rna_ori Title : rna_ori Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori(); Function: Sets or returns the original RNA sequence of the seqDiff. Example : Returns : value of rna_ori, a scalar Args : newvalue (optional) =cut sub rna_ori { my ($self,$value) = @_; if (defined $value) { $self->{'rna_ori'} = $value; } else { return $self->{'rna_ori'}; } } =head2 rna_mut Title : rna_mut Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut(); Function: Sets or returns the mutated RNA sequence of the seqDiff. Example : Returns : value of rna_mut, a scalar Args : newvalue (optional) =cut sub rna_mut { my ($self,$value) = @_; if (defined $value) { $self->{'rna_mut'} = $value; } else { return $self->{'rna_mut'}; } } =head2 aa_ori Title : aa_ori Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori(); Function: Sets or returns the original protein sequence of the seqDiff. Example : Returns : value of aa_ori, a scalar Args : newvalue (optional) =cut sub aa_ori { my ($self,$value) = @_; if (defined $value) { $self->{'aa_ori'} = $value; } else { return $self->{'aa_ori'}; } } =head2 aa_mut Title : aa_mut Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut(); Function: Sets or returns the mutated protein sequence of the seqDiff. Example : Returns : value of aa_mut, a scalar Args : newvalue (optional) =cut sub aa_mut { my ($self,$value) = @_; if (defined $value) { $self->{'aa_mut'} = $value; } else { return $self->{'aa_mut'}; } } =head2 seqobj Title : seqobj Usage : $dnaobj = $obj->seqobj('dna_mut'); Function: Returns the any original or mutated sequences as a Bio::PrimarySeq object. Example : Returns : Bio::PrimarySeq object for the requested sequence Args : string, method name for the sequence requested See L for more information. =cut sub seqobj { my ($self,$value) = @_; my $out; my %valid_obj = map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut); $valid_obj{$value} || $self->throw("Sequence type '$value' is not a valid type (". join(',', map "'$_'", sort keys %valid_obj) .") lowercase"); my ($alphabet) = $value =~ /([^_]+)/; my $id = $self->id; $id = $self->rna_id if $self->rna_id; $alphabet = 'protein' if $alphabet eq 'aa'; $out = Bio::PrimarySeq->new ( '-seq' => $self->{$value}, '-display_id' => $id, '-accession_number' => $self->id, '-alphabet' => $alphabet ) if $self->{$value} ; return $out; } =head2 alignment Title : alignment Usage : $obj->alignment Function: Returns a pretty RNA/AA sequence alignment from linked objects. Under construction: Only simple coding region point mutations work. Example : Returns : Args : none =cut sub alignment { my $self = shift; my (@entry, $text); my $maxflanklen = 12; foreach my $mut ($self->each_Variant) { if( $mut->isa('Bio::Variation::RNAChange') ) { my $upflank = $mut->upStreamSeq; my $dnflank = $mut->dnStreamSeq; my $cposd = $mut->codon_pos; my $rori = $mut->allele_ori->seq; my $rmut = $mut->allele_mut->seq; my $rseqoriu = ''; my $rseqmutu = ''; my $rseqorid = ''; my $rseqmutd = ''; my $aaseqmutu = ''; my (@rseqori, @rseqmut ); # point if ($mut->DNAMutation->label =~ /point/) { if ($cposd == 1 ) { my $nt2d = substr($dnflank, 0, 2); push @rseqori, $rori. $nt2d; push @rseqmut, uc ($rmut). $nt2d; $dnflank = substr($dnflank, 2); } elsif ($cposd == 2) { my $ntu = chop $upflank; my $ntd = substr($dnflank, 0, 1); push @rseqori, $ntu. $rori. $ntd; push @rseqmut, $ntu. uc ($rmut). $ntd; $dnflank = substr($dnflank, 1); } elsif ($cposd == 3) { my $ntu1 = chop $upflank; my $ntu2 = chop $upflank; push (@rseqori, $ntu2. $ntu1. $rori); push (@rseqmut, $ntu2. $ntu1. uc $rmut); } } #deletion elsif ($mut->DNAMutation->label =~ /deletion/) { if ($cposd == 2 ) { $rseqorid = chop $upflank; $rseqmutd = $rseqorid; } for (my $i=1; $i<=$mut->length; $i++) { my $ntd .= substr($mut->allele_ori, $i-1, 1); $rseqorid .= $ntd; if (length($rseqorid) == 3 ) { push (@rseqori, $rseqorid); push (@rseqmut, " "); $rseqorid = ''; } } if ($rseqorid) { $rseqorid .= substr($dnflank, 0, 3-$rseqorid); push (@rseqori, $rseqorid); push (@rseqmut, " "); $dnflank = substr($dnflank,3-$rseqorid); } } $upflank = reverse $upflank; # loop throught the flanks for (my $i=1; $i<=length($dnflank); $i++) { last if $i > $maxflanklen; my $ntd .= substr($dnflank, $i-1, 1); my $ntu .= substr($upflank, $i-1, 1); $rseqmutd .= $ntd; $rseqorid .= $ntd; $rseqmutu = $ntu. $rseqmutu; $rseqoriu = $ntu. $rseqoriu; if (length($rseqorid) == 3 and length($rseqorid) == 3) { push (@rseqori, $rseqorid); push (@rseqmut, $rseqmutd); $rseqorid = $rseqmutd =''; } if (length($rseqoriu) == 3 and length($rseqoriu) == 3) { unshift (@rseqori, $rseqoriu); unshift (@rseqmut, $rseqmutu); $rseqoriu = $rseqmutu =''; } #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n"; #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n"; } push (@rseqori, $rseqorid); unshift (@rseqori, $rseqoriu); push (@rseqmut, $rseqmutd); unshift (@rseqmut, $rseqmutu); return unless $mut->AAChange; #translate my $tr = Bio::Tools::CodonTable->new('-id' => $mut->codon_table); my $apos = $mut->AAChange->start; my $aposmax = CORE::length($self->aa_ori); #terminator codon no my $rseqori; my $rseqmut; my $aaseqori; my $aaseqmut = ""; for (my $i = 0; $i <= $#rseqori; $i++) { my $a = ''; $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3; if (length($a) != 1 or $apos - ( $maxflanklen/2 -1) + $i < 1 or $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) { $aaseqori .= " "; } else { $aaseqori .= " ". $a. " "; } my $b = ''; if (length($rseqmut[$i]) == 3) { if ($rseqmut[$i] eq ' ') { $b = "_"; } else { $b = $tr->translate($rseqmut[$i]); } } if (( $b ne $a and length($b) == 1 and $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and $mut->label =~ 'termination') ) { $aaseqmut .= " ". $b. " "; } else { $aaseqmut .= " "; } if ($i == 0 and length($rseqori[$i]) != 3) { my $l = 3 - length($rseqori[$i]); $rseqori[$i] = (" " x $l). $rseqori[$i]; $rseqmut[$i] = (" " x $l). $rseqmut[$i]; } $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne ''; $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne ''; } # collect the results push (@entry, "\n" ); $text = " ". $aaseqmut; push (@entry, $text ); $text = "Variant : ". $rseqmut; push (@entry, $text ); $text = "Reference: ". $rseqori; push (@entry, $text ); $text = " ". $aaseqori; push (@entry, $text ); push (@entry, "\n" ); } } my $res; foreach my $line (@entry) { $res .= "$line\n"; } return $res; } 1; flat.pm100644000766000024 5103413604710571 21355 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation/IO# BioPerl module for Bio::Variation::IO::flat # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =head1 NAME Bio::Variation::IO::flat - flat file sequence variation input/output stream =head1 SYNOPSIS Do not use this module directly. Use it via the Bio::Variation::IO class. =head1 DESCRIPTION This object can transform Bio::Variation::SeqDiff objects to and from flat file databases. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::IO::flat; $Bio::Variation::IO::flat::VERSION = '1.7.5'; use strict; use Text::Wrap; use Bio::Variation::SeqDiff; use Bio::Variation::DNAMutation; use Bio::Variation::RNAChange; use Bio::Variation::AAChange; use Bio::Variation::Allele; use base qw(Bio::Variation::IO); sub new { my($class, @args) = @_; my $self = bless {}, $class; $self->_initialize(@args); return $self; } sub _initialize { my($self,@args) = @_; return unless $self->SUPER::_initialize(@args); } =head2 next Title : next Usage : $haplo = $stream->next() Function: returns the next seqDiff in the stream Returns : Bio::Variation::SeqDiff object Args : NONE =cut sub next { my( $self ) = @_; local $/ = '//'; return unless my $entry = $self->_readline; return if $entry =~ /^\s+$/; $entry =~ /\s*ID\s+\S+/ || $self->throw("We do need an ID!"); my ($id, $offset, $alphabet) = $entry =~ /\s*ID +([^:]+)..(\d+)[^\)]*.\[?([cg])?/ or $self->throw("Can't parse ID line"); # $self->throw("$1|$2|$3"); my $h =Bio::Variation::SeqDiff->new(-id => $id, -offset => $offset, ); if ($alphabet) { if ($alphabet eq 'g') { $alphabet = 'dna'; } elsif ($alphabet eq 'c') { $alphabet = 'rna'; } $h->alphabet($alphabet); } # # DNA # my @dna = split ( / DNA;/, $entry ); shift @dna; my $prevdnaobj; foreach my $dna (@dna) { $dna =~ s/Feature[ \t]+//g; ($dna) = split "RNA; ", $dna; #$self->warn("|$dna|") ; #exit; my ($mut_number, $proof, $location, $upflank, $change, $dnflank) = $dna =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: ([ \n\w]+).+/change: ([^ /]+).+/dnflank: ([ \n\w]+)|s; $change =~ s/[ \n]//g; my ($ori, $mut) = split /[>\|]/, $change; my ($variation_number, $change_number) = split /\./, $mut_number; #$self->warn("|$mut_number|>|$variation_number|$change_number|"); my $dnamut; if ($change_number and $change_number > 1 ) { my $a3 = Bio::Variation::Allele->new; $a3->seq($mut) if $mut; #$dnamut->add_Allele($a3); $prevdnaobj->add_Allele($a3); } else { $upflank =~ s/[ \n]//g; $dnflank =~ s/[ \n]//g; my ($region, $junk, $region_value, $junk2, $region_dist) = $dna =~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s; #my $s = join ("|", $mut_number, $proof, $location, $upflank, # $change, $dnflank, $region, $region_value, $region_dist, $1,$2,$3,$4,$5); #$self->warn($s); #exit; my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/; $end = $start if not defined $end ; my ($len) = $end - $start +1; $len = 0, $start = $end if defined $sep and $sep eq '^'; my $ismut = 0; $ismut = 1 if $change =~ m/>/; $dnamut = Bio::Variation::DNAMutation->new ('-start' => $start, '-end' => $end, '-length' => $len, '-upStreamSeq' => $upflank, '-dnStreamSeq' => $dnflank, '-proof' => $proof, '-mut_number' => $mut_number ); $prevdnaobj = $dnamut; my $a1 = Bio::Variation::Allele->new; $a1->seq($ori) if $ori; $dnamut->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq($mut) if $mut; $dnamut->add_Allele($a2); if ($ismut) { $dnamut->isMutation(1); $dnamut->allele_mut($a2); } $dnamut->region($region) if defined $region; $dnamut->region_value($region_value) if defined $region_value; $dnamut->region_dist($region_dist) if defined $region_dist; $h->add_Variant($dnamut); $dnamut->SeqDiff($h); } } # # RNA # my @rna = split ( / RNA;/, $entry ); shift @rna; my $prevrnaobj; foreach my $rna (@rna) { $rna = substr ($rna, 0, index($rna, 'Feature AA')); $rna =~ s/Feature[ \t]+//g; ($rna) = split "DNA; ", $rna; #$self->warn("|$rna|") ; my ($mut_number, $proof, $location, $upflank, $change, $dnflank) = $rna =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: (\w+).+/change: ([^/]+).+/dnflank: (\w+)|s ;#' my ($region, $junk, $region_value, $junk2, $region_dist) = $rna =~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s; #my $s = join ("|", $mut_number, $proof, $location, $upflank, # $change, $dnflank, $region, $region_value, $region_dist, $1,$2,$3,$4,$5); #$self->warn($s); #exit; $change =~ s/[ \n]//g; my ($ori, $mut) = split /[>\|]/, $change; my $rnamut; my ($variation_number, $change_number) = split /\./, $mut_number; if ($change_number and $change_number > 1 ) { my $a3 = Bio::Variation::Allele->new; $a3->seq($mut) if $mut; #$rnamut->add_Allele($a3); $prevrnaobj->add_Allele($a3); } else { my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/; $end = $start if not defined $end ; my ($len) = $end - $start + 1; $len = 0, $start = $end if defined $sep and $sep eq '^'; my $ismut; $ismut = 1 if $change =~ m/>/; my ($codon_table) = $rna =~ m|.+/codon_table: (\d+)|s; my ($codon_pos) = $rna =~ m|.+/codon:[^;]+; ([123])|s; $rnamut = Bio::Variation::RNAChange->new ('-start' => $start, '-end' => $end, '-length' => $len, '-upStreamSeq' => $upflank, '-dnStreamSeq' => $dnflank, '-proof' => $proof, '-mut_number' => $mut_number ); $prevrnaobj = $rnamut; my $a1 = Bio::Variation::Allele->new; $a1->seq($ori) if $ori; $rnamut->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq($mut) if $mut; $rnamut->add_Allele($a2); if ($ismut) { $rnamut->isMutation(1); $rnamut->allele_mut($a2); } $rnamut->region($region) if defined $region; $rnamut->region_value($region_value) if defined $region_value; $rnamut->region_dist($region_dist) if defined $region_dist; $rnamut->codon_table($codon_table) if $codon_table; $rnamut->codon_pos($codon_pos) if $codon_pos; $h->add_Variant($rnamut); foreach my $mut ($h->each_Variant) { if ($mut->isa('Bio::Variation::DNAMutation') ) { if ($mut->mut_number == $rnamut->mut_number) { $rnamut->DNAMutation($mut); $mut->RNAChange($rnamut); } } } } } # # AA # my @aa = split ( / AA;/, $entry ); shift @aa; my $prevaaobj; foreach my $aa (@aa) { $aa = substr ($aa, 0, index($aa, 'Feature AA')); $aa =~ s/Feature[ \t]+//g; ($aa) = split "DNA; ", $aa; #$self->warn("|$aa|") ; my ($mut_number, $proof, $location, $change) = $aa =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+)./change: ([^/;]+)|s; $change =~ s/[ \n]//g; #my $s = join ("|", $mut_number, $proof, $location, $change); #$self->warn($s); #exit; $change =~ s/[ \n]//g; $change =~ s/DNA$//; my ($ori, $mut) = split /[>\|]/, $change; #print "------$location----$ori-$mut-------------\n"; my ($variation_number, $change_number) = split /\./, $mut_number; my $aamut; if ($change_number and $change_number > 1 ) { my $a3 = Bio::Variation::Allele->new; $a3->seq($mut) if $mut; $prevaaobj->add_Allele($a3); } else { my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/; $end = $start if not defined $end ; my ($len) = $end - $start + 1; $len = 0, $start = $end if defined $sep and $sep eq '^'; my $ismut; $ismut = 1 if $change =~ m/>/; my ($region) = $aa =~ m|.+/region: (\w+)|s ; $aamut = Bio::Variation::AAChange->new ('-start' => $start, '-end' => $end, '-length' => $len, '-proof' => $proof, '-mut_number' => $mut_number ); $prevaaobj = $aamut; my $a1 = Bio::Variation::Allele->new; $a1->seq($ori) if $ori; $aamut->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq($mut) if $mut; $aamut->add_Allele($a2); if ($ismut) { $aamut->isMutation(1); $aamut->allele_mut($a2); } $region && $aamut->region($region); $h->add_Variant($aamut); foreach my $mut ($h->each_Variant) { if ($mut->isa('Bio::Variation::RNAChange') ) { if ($mut->mut_number == $aamut->mut_number) { $aamut->RNAChange($mut); $mut->AAChange($aamut); } } } } } return $h; } =head2 write Title : write Usage : $stream->write(@seqDiffs) Function: writes the $seqDiff object into the stream Returns : 1 for success and 0 for error Args : Bio::Variation::SeqDiff object =cut sub write { my ($self,@h) = @_; #$columns = 75; #default for Text::Wrap my %tag = ( 'ID' => 'ID ', 'Description' => 'Description ', 'FeatureKey' => 'Feature ', 'FeatureQual' => "Feature ", 'FeatureWrap' => "Feature ", 'ErrorComment' => 'Comment ' #'Comment' => 'Comment -!-', #'CommentLine' => 'Comment ', ); if( !defined $h[0] ) { $self->throw("Attempting to write with no information!"); } foreach my $h (@h) { my @entry =(); my ($text, $tmp, $tmp2, $sep); my ($count) = 0; $text = $tag{ID}; $text .= $h->id; $text .= ":(". $h->offset; $text .= "+1" if $h->sysname =~ /-/; $text .= ")". $h->sysname; $text .= "; ". $h->trivname if $h->trivname; push (@entry, $text); #Variants need to be ordered accoding to mutation_number attribute #put them into a hash of arrays holding the Variant objects #This is necessary for cases like several distict mutations present # in the same sequence. my @allvariants = $h->each_Variant; my %variants = (); foreach my $mut ($h->each_Variant) { push @{$variants{$mut->mut_number} }, $mut; } #my ($variation_number, $change_number) = split /\./, $mut_number; foreach my $var (sort keys %variants) { #print $var, ": ", join (" ", @{$variants{$var}}), "\n"; foreach my $mut (@{$variants{$var}}) { # # DNA # if ( $mut->isa('Bio::Variation::DNAMutation') ) { #collect all non-reference alleles $self->throw("allele_ori needs to be defined in [$mut]") if not $mut->allele_ori; if ($mut->isMutation) { $sep = '>'; } else { $sep = '|'; } my @alleles = $mut->each_Allele; #push @alleles, $mut->allele_mut if $mut->allele_mut; my $count = 0; # two alleles foreach my $allele (@alleles) { $count++; my ($variation_number, $change_number) = split /\./, $mut->mut_number; if ($change_number and $change_number != $count){ $mut->mut_number("$change_number.$count"); } $mut->allele_mut($allele); push (@entry, $tag{FeatureKey}. 'DNA'. "; ". $mut->mut_number ); #label $text=$tag{FeatureQual}. '/label: '. $mut->label; push (@entry, $text); #proof if ($mut->proof) { $text = $tag{FeatureQual}. '/proof: '. $mut->proof; push (@entry, $text) ; } #location $text = $tag{FeatureQual}. '/location: '; #$mut->id. '; '. $mut->start; if ($mut->length > 1 ) {# if ($mut->end - $mut->start ) { my $l = $mut->start + $mut->length -1; $text .= $mut->start. '..'. $l; } elsif ($mut->length == 0) { my $tmp_start = $mut->start - 1; $tmp_start-- if $tmp_start == 0; $text .= $tmp_start. '^'. $mut->end; } else { $text .= $mut->start; } if ($h->alphabet && $h->alphabet eq 'dna') { $tmp = $mut->start + $h->offset; $tmp-- if $tmp <= 0; $mut->start < 1 && $tmp++; #$text.= ' ('. $h->id. '::'. $tmp; $tmp2 = $mut->end + $h->offset; if ( $mut->length > 1 ) { $mut->end < 1 && $tmp2++; $text.= ' ('. $h->id. '::'. $tmp. "..". $tmp2; } elsif ($mut->length == 0) { $tmp--; $tmp-- if $tmp == 0; $text .= ' ('. $h->id. '::'. $tmp. '^'. $tmp2; } else { $text.= ' ('. $h->id. '::'. $tmp; } $text .= ')'; } push (@entry, $text); #sequence push (@entry, $tag{FeatureQual}. '/upflank: '. $mut->upStreamSeq ); $text = ''; $text = $mut->allele_ori->seq if $mut->allele_ori->seq; $text .= $sep; $text .= $mut->allele_mut->seq if $mut->allele_mut->seq; push (@entry, wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, $text) ); push (@entry, $tag{FeatureQual}. '/dnflank: '. $mut->dnStreamSeq ); #restriction enzyme if ($mut->restriction_changes ne '') { $text = $mut->restriction_changes; $text = wrap($tag{FeatureQual}. '/re_site: ', $tag{FeatureWrap}, $text); push (@entry, $text ); } #region if ($mut->region ) { $text = $tag{FeatureQual}. '/region: '. $mut->region; $text .= ';' if $mut->region_value or $mut->region_dist; $text .= ' '. $mut->region_value if $mut->region_value; if ($mut->region_dist ) { $tmp = ''; $tmp = '+' if $mut->region_dist > 1; $text .= " (". $tmp. $mut->region_dist. ')'; } push (@entry, $text); } #CpG if ($mut->CpG) { push (@entry, $tag{FeatureQual}. "/CpG" ); } } } # # RNA # elsif ($mut->isa('Bio::Variation::RNAChange') ) { #collect all non-reference alleles $self->throw("allele_ori needs to be defined in [$mut]") if not $mut->allele_ori; my @alleles = $mut->each_Allele; #push @alleles, $mut->allele_mut if $mut->allele_mut; if ($mut->isMutation) { $sep = '>'; } else { $sep = '|'; } my $count = 0; # two alleles foreach my $allele (@alleles) { $count++; my ($variation_number, $change_number) = split /\./, $mut->mut_number; if ($change_number and $change_number != $count){ $mut->mut_number("$change_number.$count"); } $mut->allele_mut($allele); push (@entry, $tag{FeatureKey}. 'RNA'. "; ". $mut->mut_number ); #label $text=$tag{FeatureQual}. '/label: '. $mut->label; push (@entry, $text); #proof if ($mut->proof) { $text = $tag{FeatureQual}. '/proof: '. $mut->proof; push (@entry, $text) ; } #location $text = $tag{FeatureQual}. '/location: ' ; if ($mut->length > 1 ) { $text .= $mut->start. '..'. $mut->end; $tmp2 = $mut->end + $h->offset; } elsif ($mut->length == 0) { my $tmp_start = $mut->start; $tmp_start--; $tmp_start-- if $tmp_start == 0; $text .= $tmp_start. '^'. $mut->end; } else { $text .= $mut->start; } if ($h->alphabet && $h->alphabet eq 'rna') { $tmp = $mut->start + $h->offset; $tmp-- if $tmp <= 0; #$mut->start < 1 && $tmp++; #$text.= ' ('. $h->id. '::'. $tmp; $tmp2 = $mut->end + $h->offset; #$mut->end < 1 && $tmp2++; if ( $mut->length > 1 ) { $text.= ' ('. $h->id. '::'. $tmp. "..". $tmp2; } elsif ($mut->length == 0) { $tmp--; $text .= ' ('. $h->id. '::'. $tmp. '^'. $tmp2; } else { $text.= ' ('. $h->id. '::'. $tmp; } $text .= ')'; } push (@entry, $text); #sequence push (@entry, $tag{FeatureQual}. '/upflank: '. $mut->upStreamSeq ); $text = ''; $text = $mut->allele_ori->seq if $mut->allele_ori->seq; $text .= $sep; $text .= $mut->allele_mut->seq if $mut->allele_mut->seq; push (@entry, wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, $text) ); push (@entry, $tag{FeatureQual}. '/dnflank: '. $mut->dnStreamSeq ); #restriction if ($mut->restriction_changes ne '') { $text = $mut->restriction_changes; $text = wrap($tag{FeatureQual}. '/re_site: ', $tag{FeatureWrap}, $text); push (@entry, $text ); } #coding if ($mut->region eq 'coding') { #codon table $text = $tag{FeatureQual}. '/codon_table: '; $text .= $mut->codon_table; push (@entry, $text); #codon $text = $tag{FeatureQual}. '/codon: '. $mut->codon_ori. $sep; if ($mut->DNAMutation->label =~ /.*point/) { $text .= $mut->codon_mut; } else { $text .= '-'; } $text .= "; ". $mut->codon_pos; push (@entry, $text); } #region if ($mut->region ) { $text = $tag{FeatureQual}. '/region: '. $mut->region; $text .= ';' if $mut->region_value or $mut->region_dist; $text .= ' '. $mut->region_value if $mut->region_value; if ($mut->region_dist ) { $tmp = ''; $tmp = '+' if $mut->region_dist > 1; $text .= " (". $tmp. $mut->region_dist. ')'; } push (@entry, $text); } } } # # AA # elsif ($mut->isa('Bio::Variation::AAChange')) { #collect all non-reference alleles $self->throw("allele_ori needs to be defined in [$mut]") if not $mut->allele_ori; if ($mut->isMutation) { $sep = '>'; } else { $sep = '|'; } my @alleles = $mut->each_Allele; #push @alleles, $mut->allele_mut if $mut->allele_mut; my $count = 0; # two alleles foreach my $allele (@alleles) { $count++; my ($variation_number, $change_number) = split /\./, $mut->mut_number; if ($change_number and $change_number != $count){ $mut->mut_number("$change_number.$count"); } $mut->allele_mut($allele); push (@entry, $tag{FeatureKey}. 'AA'. "; ". $mut->mut_number ); #label $text=$tag{FeatureQual}. '/label: '. $mut->label; push (@entry, $text) ; #proof if ($mut->proof) { $text = $tag{FeatureQual}. '/proof: '. $mut->proof; push (@entry, $text) ; } #location $text = $tag{FeatureQual}. '/location: '. #$mut->id. '; '. $mut->start; $mut->start; if ($mut->length > 1 ) { $tmp = $mut->start + $mut->length -1; $text .= '..'. $tmp; } push (@entry, $text); #sequence $text = ''; $text = $mut->allele_ori->seq if $mut->allele_ori->seq; $text .= $sep; $text .= $mut->allele_mut->seq if $mut->allele_mut->seq; push (@entry, wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, $text) ); #region if ($mut->region ) { $text = $tag{FeatureQual}. '/region: '. $mut->region; $text .= ';' if $mut->region_value or $mut->region_dist; $text .= ' '. $mut->region_value if $mut->region_value; if ($mut->region_dist ) { $tmp = ''; $tmp = '+' if $mut->region_dist > 1; $text .= " (". $tmp. $mut->region_dist. ')'; } push (@entry, $text); } } } } } push (@entry, "//" ); my $str = join ("\n", @entry). "\n"; $str =~ s/\t/ /g; $self->_print($str); } return 1; } 1; AAChange.pm100644000766000024 3310513604710571 21506 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::AAChange # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Variation::AAChange - Sequence change class for polypeptides =head1 SYNOPSIS $aamut = Bio::Variation::AAChange->new ('-start' => $start, '-end' => $end, '-length' => $len, '-proof' => $proof, '-isMutation' => 1, '-mut_number' => $mut_number ); my $a1 = Bio::Variation::Allele->new; $a1->seq($ori) if $ori; $aamut->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq($mut) if $mut; $aachange->add_Allele($a2); $aachange->allele_mut($a2); print "\n"; # add it to a SeqDiff container object $seqdiff->add_Variant($rnachange); # and create links to and from RNA level variant objects $aamut->RNAChange($rnachange); $rnachange->AAChange($rnachange); =head1 DESCRIPTION The instantiable class Bio::Variation::RNAChange describes basic sequence changes at polypeptide level. It uses methods defined in superclass Bio::Variation::VariantI, see L for details. If the variation described by a AAChange object has a known Bio::Variation::RNAAChange object, create the link with method AAChange(). See L for more information. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::AAChange; $Bio::Variation::AAChange::VERSION = '1.7.5'; use vars qw($MATRIX); use strict; # Object preamble - inheritance use base qw(Bio::Variation::VariantI); BEGIN { my $matrix = << "__MATRIX__"; # Matrix made by matblas from blosum62.iij # * column uses minimum score # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units # Blocks Database = /data/blocks_5.0/blocks.dat # Cluster Percentage: >= 62 # Entropy = 0.6979, Expected = -0.5209 A R N D C Q E G H I L K M F P S T W Y V B Z X * A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 __MATRIX__ my %blosum = (); $matrix =~ /^ +(.+)$/m; my @aas = split / +/, $1; foreach my $aa (@aas) { my $tmp = $aa; $tmp = "\\$aa" if $aa eq '*'; $matrix =~ /^($tmp) +([-+]?\d.*)$/m; my @scores = split / +/, $2 if defined $2; my $count = 0; foreach my $ak (@aas) { $blosum{$aa}->{$aas[$count]} = $scores[$count]; $count++; } } sub _matrix; $MATRIX = \%blosum; } sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($start, $end, $length, $strand, $primary, $source, $frame, $score, $gff_string, $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, $label, $status, $proof, $re_changes, $region, $region_value, $region_dist, $numbering, $mut_number, $ismutation) = $self->_rearrange([qw(START END LENGTH STRAND PRIMARY SOURCE FRAME SCORE GFF_STRING ALLELE_ORI ALLELE_MUT UPSTREAMSEQ DNSTREAMSEQ LABEL STATUS PROOF RE_CHANGES REGION REGION_VALUE REGION_DIST NUMBERING MUT_NUMBER ISMUTATION )],@args); $self->primary_tag("Variation"); $self->{ 'alleles' } = []; $start && $self->start($start); $end && $self->end($end); $length && $self->length($length); $strand && $self->strand($strand); $primary && $self->primary_tag($primary); $source && $self->source_tag($source); $frame && $self->frame($frame); $score && $self->score($score); $gff_string && $self->_from_gff_string($gff_string); $allele_ori && $self->allele_ori($allele_ori); $allele_mut && $self->allele_mut($allele_mut); $upstreamseq && $self->upstreamseq($upstreamseq); $dnstreamseq && $self->dnstreamseq($dnstreamseq); $label && $self->label($label); $status && $self->status($status); $proof && $self->proof($proof); $region && $self->region($region); $region_value && $self->region_value($region_value); $region_dist && $self->region_dist($region_dist); $numbering && $self->numbering($numbering); $mut_number && $self->mut_number($mut_number); $ismutation && $self->isMutation($ismutation); return $self; # success - we hope! } =head2 RNAChange Title : RNAChange Usage : $mutobj = $self->RNAChange; : $mutobj = $self->RNAChange($objref); Function: Returns or sets the link-reference to a mutation/change object. If there is no link, it will return undef Returns : an obj_ref or undef =cut sub RNAChange { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::RNAChange') ) { $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]"); return; } else { $self->{'RNAChange'} = $value; } } unless (exists $self->{'RNAChange'}) { return; } else { return $self->{'RNAChange'}; } } =head2 label Title : label Usage : $obj->label(); Function: Sets and returns mutation event label(s). If value is not set, or no argument is given returns false. Each instantiable subclass of L needs to implement this method. Valid values are listed in 'Mutation event controlled vocabulary' in http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. Example : Returns : string Args : string =cut sub label { my ($self) = @_; my ($o, $m, $type); $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; if ($self->start == 1 ) { if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) { $type = 'no translation'; } elsif ($o and $m and $o eq $m ) { $type = 'silent'; } # more ... } elsif ($o and substr($o, 0, 1) eq '*' ) { if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) { $type = 'post-elongation'; } elsif ($m and $o eq $m ) { $type = 'silent, conservative'; } } elsif ($o and $m and $o eq $m) { $type = 'silent, conservative'; } elsif ($m and $m eq '*') { $type = 'truncation'; } elsif ($o and $m and $o eq $m) { $type = 'silent, conservative'; } elsif (not $m or ($o and $m and length($o) > length($m) and substr($m, -1, 1) ne '*')) { $type = 'deletion'; if ($o and $m and $o !~ $m and $o !~ $m) { $type .= ', complex'; } } elsif (not $o or ($o and $m and length($o) < length($m) and substr($m, -1, 1) ne '*' ) ) { $type = 'insertion'; if ($o and $m and $o !~ $m and $o !~ $m) { $type .= ', complex'; } } elsif ($o and $m and $o ne $m and length $o == 1 and length $m == 1 ) { $type = 'substitution'; my $value = $self->similarity_score; if (defined $value) { my $cons = ($value < 0) ? 'nonconservative' : 'conservative'; $type .= ", ". $cons; } } else { $type = 'out-of-frame translation, truncation'; } $self->{'label'} = $type; return $self->{'label'}; } =head2 similarity_score Title : similarity_score Usage : $self->similarity_score Function: Measure for evolutionary conservativeness of single amino substitutions. Uses BLOSUM62. Negative numbers are noncoservative changes. Returns : integer, undef if not single amino acid change =cut sub similarity_score { my ($self) = @_; my ($o, $m, $type); $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; return unless $o and $m and length $o == 1 and length $m == 1; return unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i; return $MATRIX->{"\U$o"}->{"\U$m"}; } =head2 trivname Title : trivname Usage : $self->trivname Function: Given a Bio::Variation::AAChange object with linked Bio::Variation::RNAChange and Bio::Variation::DNAMutation objects, this subroutine creates a string corresponding to the 'trivial name' of the mutation. Trivial name is specified in Antonorakis & MDI Nomenclature Working Group: Human Mutation 11:1-3, 1998. Returns : string =cut sub trivname { my ($self,$value) = @_; if( defined $value) { $self->{'trivname'} = $value; } else { my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) = ('', '', '', '', ''); my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; $aaori = substr ($o, 0, 1) if $o; $aaori =~ tr/\*/X/; my $sep; if ($self->isMutation) { $sep = '>'; } else { $sep = '|'; } my $trivname = $aaori. $self->start; $trivname .= $sep if $sep eq '|'; my @alleles = $self->each_Allele; foreach my $allele (@alleles) { my $m = $allele->seq if $allele->seq; $self->allele_mut($allele); #$trivname .= $sep. uc $m if $m; $aamutterm = substr ($m, -1, 1) if $m; if ($self->RNAChange->label =~ /initiation codon/ and ( $o and $m and $o ne $m)) { $aamut = 'X'; } elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) { $aamutsymbol = ''; $aamut = $aamutterm; } elsif ($self->RNAChange->label =~ /deletion/) { $aamutsymbol = 'del'; if ($aamutterm eq '*') { $aatermnumber = $self->start + length($m) -1; $aamut = 'X'. $aatermnumber; } if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){ $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ; } } elsif ($self->RNAChange->label =~ /insertion/) { $aamutsymbol = 'ins'; if (($aamutterm eq '*') && (length($m)-1 != 0)) { $aatermnumber = $self->start + length($m)-1; $aamut = $aatermnumber. 'X'; } if ($self->RNAChange->label =~ /inframe/){ $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ; } } elsif ($self->RNAChange->label =~ /complex/ ) { my $diff = length($m) - length($o); if ($diff >= 0 ) { $aamutsymbol = 'ins'; } else { $aamutsymbol = 'del' ; } if (($aamutterm eq '*') && (length($m)-1 != 0)) { $aatermnumber = $self->start + length($m)-1; $aamut = $aatermnumber. 'X'; } if ($self->RNAChange->label =~ /inframe/){ if ($diff >= 0 ) { $aamut = '+'. $diff ; } else { $aamut = $diff ; } } } elsif ($self->label =~ /truncation/) { $aamut = $m; } else { $aamutsymbol = ''; $aamut = $aamutterm; } $aamut =~ tr/\*/X/; $trivname .= $aamutsymbol. $aamut. $sep; } chop $trivname; $self->{'trivname'} = $trivname; } return $self->{'trivname'}; } 1; VariantI.pm100644000766000024 6526613604710571 21651 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::VariantI # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Variation::VariantI - Sequence Change SeqFeature abstract class =head1 SYNOPSIS #get Bio::Variant::VariantI somehow print $var->restriction_changes, "\n"; foreach $allele ($var->each_Allele) { #work on Bio::Variation::Allele objects } =head1 DESCRIPTION This superclass defines common methods to basic sequence changes. The instantiable classes Bio::Variation::DNAMutation, Bio::Variation::RNAChange and Bio::Variation::AAChange use them. See L, L, and L for more information. These classes store information, heavy computation to determine allele sequences is done elsewhere. The database cross-references are implemented as Bio::Annotation::DBLink objects. The methods to access them are defined in Bio::DBLinkContainerI. See L and L for details. Bio::Variation::VariantI redifines and extends Bio::SeqFeature::Generic for sequence variations. This class describes specific sequence change events. These events are always from a specific reference sequence to something different. See L for more information. IMPORTANT: The notion of reference sequence permeates all Bio::Variation classes. This is especially important to remember when dealing with Alleles. In a polymorphic site, there can be a large number of alleles. One of then has to be selected to be the reference allele (allele_ori). ALL the rest has to be passed to the Variant using the method add_Allele, including the mutated allele in a canonical mutation. The IO modules and generated attributes depend on it. They ignore the allele linked to using allele_mut and circulate each Allele returned by each_Allele into allele_mut and calculate the changes between that and allele_ori. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::VariantI; $Bio::Variation::VariantI::VERSION = '1.7.5'; use strict; # Object preamble - inheritance use base qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI); =head2 id Title : id Usage : $obj->id Function: Read only method. Returns the id of the variation object. The id is the id of the first DBLink object attached to this object. Example : Returns : scalar Args : none =cut sub id { my ($self) = @_; my @ids = $self->each_DBLink; my $id = $ids[0] if scalar @ids > 0; return $id->database. "::". $id->primary_id if $id; } =head2 add_Allele Title : add_Allele Usage : $self->add_Allele($allele) Function: Adds one Bio::Variation::Allele into the list of alleles. Note that the method forces the convention that nucleotide sequence is in lower case and amino acds are in upper case. Example : Returns : 1 when succeeds, 0 for failure. Args : Allele object =cut sub add_Allele { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::Allele') ) { my $com = ref $value; $self->throw("Is not a Allele object but a [$com]"); return 0; } else { if ( $self->isa('Bio::Variation::AAChange') ) { $value->seq( uc $value->seq) if $value->seq; } else { $value->seq( lc $value->seq) if $value->seq; } push(@{$self->{'alleles'}},$value); $self->allele_mut($value); #???? return 1; } } else { return 0; } } =head2 each_Allele Title : alleles Usage : $obj->each_Allele(); Function: Returns a list of Bio::Variation::Allele objects Example : Returns : list of Alleles Args : none =cut sub each_Allele{ my ($self,@args) = @_; return @{$self->{'alleles'}}; } =head2 isMutation Title : isMutation Usage : print join('/', $obj->each_Allele) if not $obj->isMutation; Function: Returns or sets the boolean value indicating that the variant described is a canonical mutation with two alleles assinged to be the original (wild type) allele and mutated allele, respectively. If this value is not set, it is assumed that the Variant describes polymorphisms. Returns : a boolean =cut sub isMutation { my ($self,$value) = @_; if (defined $value) { if ($value ) { $self->{'isMutation'} = 1; } else { $self->{'isMutation'} = 0; } } return $self->{'isMutation'}; } =head2 allele_ori Title : allele_ori Usage : $obj->allele_ori(); Function: Links to and returns the Bio::Variation::Allele object. If value is not set, returns false. All other Alleles are compared to this. Amino acid sequences are stored in upper case characters, others in lower case. Example : Returns : string Args : string See L for more. =cut sub allele_ori { my ($self,$value) = @_; if( defined $value) { if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) { $self->throw("Value is not Bio::Variation::Allele but [$value]"); } else { if ( $self->isa('Bio::Variation::AAChange') ) { $value->seq( uc $value->seq) if $value->seq; } else { $value->seq( lc $value->seq) if $value->seq; } $self->{'allele_ori'} = $value; } } return $self->{'allele_ori'}; } =head2 allele_mut Title : allele_mut Usage : $obj->allele_mut(); Function: Links to and returns the Bio::Variation::Allele object. Sets and returns the mutated allele sequence. If value is not set, returns false. Amino acid sequences are stored in upper case characters, others in lower case. Example : Returns : string Args : string See L for more. =cut sub allele_mut { my ($self,$value) = @_; if( defined $value) { if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) { $self->throw("Value is not Bio::Variation::Allele but [$value]"); } else { if ( $self->isa('Bio::Variation::AAChange') ) { $value->seq( uc $value->seq) if $value->seq; } else { $value->seq( lc $value->seq) if $value->seq; } $self->{'allele_mut'} = $value; } } return $self->{'allele_mut'}; } =head2 length Title : length Usage : $obj->length(); Function: Sets and returns the length of the affected original allele sequence. If value is not set, returns false == 0. Value 0 means that the variant position is before the start=end sequence position. (Value 1 would denote a point mutation). This follows the convension to report an insertion (2insT) in equivalent way to a corresponding deletion (2delT) (Think about indel polymorpism ATC <=> AC where the origianal state is not known ). Example : Returns : string Args : string =cut sub length { my ($self,$value) = @_; if ( defined $value) { $self->{'length'} = $value; } if ( ! exists $self->{'length'} ) { return 0; } return $self->{'length'}; } =head2 upStreamSeq Title : upStreamSeq Usage : $obj->upStreamSeq(); Function: Sets and returns upstream flanking sequence string. If value is not set, returns false. The sequence should be >=25 characters long, if possible. Example : Returns : string or false Args : string =cut sub upStreamSeq { my ($self,$value) = @_; if( defined $value) { $self->{'upstreamseq'} = $value; } return $self->{'upstreamseq'}; } =head2 dnStreamSeq Title : dnStreamSeq Usage : $obj->dnStreamSeq(); Function: Sets and returns dnstream flanking sequence string. If value is not set, returns false. The sequence should be >=25 characters long, if possible. Example : Returns : string or false Args : string =cut sub dnStreamSeq { my ($self,$value) = @_; if( defined $value) { $self->{'dnstreamseq'} = $value; } return $self->{'dnstreamseq'}; } =head2 label Title : label Usage : $obj->label(); Function: Sets and returns mutation event label(s). If value is not set, or no argument is given returns false. Each instantiable class needs to implement this method. Valid values are listed in 'Mutation event controlled vocabulary' in http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. Example : Returns : string Args : string =cut sub label { my ($self,$value) = @_; $self->throw_not_implemented(); } =head2 status Title : status Usage : $obj->status() Function: Returns the status of the sequence change object. Valid values are: 'suspected' and 'proven' Example : $obj->status('proven'); Returns : scalar Args : valid string (optional, for setting) =cut sub status { my ($self,$value) = @_; my %status = (suspected => 1, proven => 1 ); if( defined $value) { $value = lc $value; if ($status{$value}) { $self->{'status'} = $value; } else { $self->throw("$value is not valid status value!"); } } if( ! exists $self->{'status'} ) { return "$self"; } return $self->{'status'}; } =head2 proof Title : proof Usage : $obj->proof() Function: Returns the proof of the sequence change object. Valid values are: 'computed' and 'experimental'. Example : $obj->proof('computed'); Returns : scalar Args : valid string (optional, for setting) =cut sub proof { my ($self,$value) = @_; my %proof = (computed => 1, experimental => 1 ); if( defined $value) { $value = lc $value; if ($proof{$value}) { $self->{'proof'} = $value; } else { $self->throw("$value is not valid proof value!"); } } return $self->{'proof'}; } =head2 region Title : region Usage : $obj->region(); Function: Sets and returns the name of the sequence region type or protein domain at this location. If value is not set, returns false. Example : Returns : string Args : string =cut sub region { my ($self,$value) = @_; if( defined $value) { $self->{'region'} = $value; } return $self->{'region'}; } =head2 region_value Title : region_value Usage : $obj->region_value(); Function: Sets and returns the name of the sequence region_value or protein domain at this location. If value is not set, returns false. Example : Returns : string Args : string =cut sub region_value { my ($self,$value) = @_; if( defined $value) { $self->{'region_value'} = $value; } return $self->{'region_value'}; } =head2 region_dist Title : region_dist Usage : $obj->region_dist(); Function: Sets and returns the distance tot the closest region (i.e. intro/exon or domain) boundary. If distance is not set, returns false. Example : Returns : integer Args : integer =cut sub region_dist { my ($self,$value) = @_; if( defined $value) { if ( not $value =~ /^[+-]?\d+$/ ) { $self->throw("[$value] for region_dist has to be an integer\n"); } else { $self->{'region_dist'} = $value; } } return $self->{'region_dist'}; } =head2 numbering Title : numbering Usage : $obj->numbering() Function: Returns the numbering chema used locating sequnce features. Valid values are: 'entry' and 'coding' Example : $obj->numbering('coding'); Returns : scalar Args : valid string (optional, for setting) =cut sub numbering { my ($self,$value) = @_; my %numbering = (entry => 1, coding => 1 ); if( defined $value) { $value = lc $value; if ($numbering{$value}) { $self->{'numbering'} = $value; } else { $self->throw("'$value' is not a valid for numbering!"); } } if( ! exists $self->{'numbering'} ) { return "$self"; } return $self->{'numbering'}; } =head2 mut_number Title : mut_number Usage : $num = $obj->mut_number; : $num = $obj->mut_number($number); Function: Returns or sets the number identifying the order in which the mutation has been issued. Numbers shouldstart from 1. If the number has never been set, the method will return '' If you want the output from IO modules look nice and, for multivariant/allele variations, make sense you better set this attribute. Returns : an integer =cut sub mut_number { my ($self,$value) = @_; if (defined $value) { $self->{'mut_number'} = $value; } unless (exists $self->{'mut_number'}) { return (''); } else { return $self->{'mut_number'}; } } =head2 SeqDiff Title : SeqDiff Usage : $mutobj = $obj->SeqDiff; : $mutobj = $obj->SeqDiff($objref); Function: Returns or sets the link-reference to the umbrella Bio::Variation::SeqDiff object. If there is no link, it will return undef Note: Adding a variant into a SeqDiff object will automatically set this value. Returns : an obj_ref or undef See L for more information. =cut sub SeqDiff { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::SeqDiff') ) { $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]"); return; } else { $self->{'seqDiff'} = $value; } } unless (exists $self->{'seqDiff'}) { return; } else { return $self->{'seqDiff'}; } } =head2 add_DBLink Title : add_DBLink Usage : $self->add_DBLink($ref) Function: adds a link object Example : Returns : Args : =cut sub add_DBLink{ my ($self,$com) = @_; if( $com && ! $com->isa('Bio::Annotation::DBLink') ) { $self->throw("Is not a link object but a [$com]"); } $com && push(@{$self->{'link'}},$com); } =head2 each_DBLink Title : each_DBLink Usage : foreach $ref ( $self->each_DBlink() ) Function: gets an array of DBlink of objects Example : Returns : Args : =cut sub each_DBLink{ my ($self) = @_; return @{$self->{'link'}}; } =head2 restriction_changes Title : restriction_changes Usage : $obj->restriction_changes(); Function: Returns a string containing a list of restriction enzyme changes of form +EcoRI, separated by commas. Strings need to be valid restriction enzyme names as stored in REBASE. allele_ori and allele_mut need to be assigned. Example : Returns : string Args : string =cut sub restriction_changes { my ($self) = @_; if (not $self->{'re_changes'}) { my %re = &_enzymes; # complain if used on AA data if ($self->isa('Bio::Variation::AAChange')) { $self->throw('Restriction enzymes do not bite polypeptides!'); } #sanity checks $self->warn('Upstream sequence is empty!') if $self->upStreamSeq eq ''; $self->warn('Downstream sequence is empty!') if $self->dnStreamSeq eq ''; # $self->warn('Original allele sequence is empty!') # if $self->allele_ori eq ''; # $self->warn('Mutated allele sequence is empty!') # if $self->allele_mut eq ''; #reuse the non empty DNA level list at RNA level if the flanks are identical #Hint: Check DNAMutation object first if ($self->isa('Bio::Variation::RNAChange') and $self->DNAMutation and $self->upStreamSeq eq $self->DNAMutation->upStreamSeq and $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and $self->DNAMutation->restriction_changes ne '' ) { $self->{'re_changes'} = $self->DNAMutation->restriction_changes; } else { #maximum length of a type II restriction site in the current REBASE my ($le_dn) = 15; my ($le_up) = $le_dn; #reduce the flank lengths if the desired length is not available $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq); $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq); #Build sequence strings to compare my ($oriseq, $mutseq); $oriseq = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up); $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq; $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq; $oriseq .= substr($self->dnStreamSeq, 0, $le_dn); $mutseq .= substr($self->dnStreamSeq, 0, $le_dn); # ... and their reverse complements my $oriseq_rev = _revcompl ($oriseq); my $mutseq_rev = _revcompl ($mutseq); # collect results into a string my $rec = ''; foreach my $enz (sort keys (%re)) { my $site = $re{$enz}; my @ori = ($oriseq=~ /$site/g); my @mut = ($mutseq=~ /$site/g); my @ori_r = ($oriseq_rev =~ /$site/g); my @mut_r = ($mutseq_rev =~ /$site/g); $rec .= '+'. $enz. ", " if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r); $rec .= '-'. $enz. ", " if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r); } $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne ''; $self->{'re_changes'} = $rec; } } return $self->{'re_changes'} } sub _revcompl { # side effect: lower case letters my ($seq) = shift; $seq = lc $seq; $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/; return CORE::reverse $seq; } sub _enzymes { #REBASE version 005 type2.005 my %enzymes = ( 'AarI' => 'cacctgc', 'AatII' => 'gacgtc', 'AccI' => 'gt[ac][gt]ac', 'AceIII' => 'cagctc', 'AciI' => 'ccgc', 'AclI' => 'aacgtt', 'AcyI' => 'g[ag]cg[ct]c', 'AflII' => 'cttaag', 'AflIII' => 'ac[ag][ct]gt', 'AgeI' => 'accggt', 'AhaIII' => 'tttaaa', 'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc', 'AluI' => 'agct', 'AlwNI' => 'cag[acgt][acgt][acgt]ctg', 'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc', 'ApaI' => 'gggccc', 'ApaLI' => 'gtgcac', 'ApoI' => '[ag]aatt[ct]', 'AscI' => 'ggcgcgcc', 'AsuI' => 'gg[acgt]cc', 'AsuII' => 'ttcgaa', 'AvaI' => 'c[ct]cg[ag]g', 'AvaII' => 'gg[at]cc', 'AvaIII' => 'atgcat', 'AvrII' => 'cctagg', 'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c', 'BalI' => 'tggcca', 'BamHI' => 'ggatcc', 'BbvCI' => 'cctcagc', 'BbvI' => 'gcagc', 'BbvII' => 'gaagac', 'BccI' => 'ccatc', 'Bce83I' => 'cttgag', 'BcefI' => 'acggc', 'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc', 'BciVI' => 'gtatcc', 'BclI' => 'tgatca', 'BetI' => '[at]ccgg[at]', 'BfiI' => 'actggg', 'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc', 'BglII' => 'agatct', 'BinI' => 'ggatc', 'BmgI' => 'g[gt]gccc', 'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc', 'Bpu10I' => 'cct[acgt]agc', 'BsaAI' => '[ct]acgt[ag]', 'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc', 'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc', 'BsbI' => 'caacac', 'BscGI' => 'cccgt', 'BseMII' => 'ctcag', 'BsePI' => 'gcgcgc', 'BseRI' => 'gaggag', 'BseSI' => 'g[gt]gc[ac]c', 'BsgI' => 'gtgcag', 'BsiI' => 'cacgag', 'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg', 'BsmAI' => 'gtctc', 'BsmI' => 'gaatgc', 'Bsp1407I' => 'tgtaca', 'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg', 'BspGI' => 'ctggac', 'BspHI' => 'tcatga', 'BspLU11I' => 'acatgt', 'BspMI' => 'acctgc', 'BspMII' => 'tccgga', 'BsrBI' => 'ccgctc', 'BsrDI' => 'gcaatg', 'BsrI' => 'actgg', 'BstEII' => 'ggt[acgt]acc', 'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg', 'BtrI' => 'cacgtc', 'BtsI' => 'gcagtg', 'Cac8I' => 'gc[acgt][acgt]gc', 'CauII' => 'cc[cg]gg', 'Cfr10I' => '[ag]ccgg[ct]', 'CfrI' => '[ct]ggcc[ag]', 'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt', 'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc', 'ClaI' => 'atcgat', 'CviJI' => '[ag]gc[ct]', 'CviRI' => 'tgca', 'DdeI' => 'ct[acgt]ag', 'DpnI' => 'gatc', 'DraII' => '[ag]gg[acgt]cc[ct]', 'DraIII' => 'cac[acgt][acgt][acgt]gtg', 'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc', 'DrdII' => 'gaacca', 'DsaI' => 'cc[ag][ct]gg', 'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc', 'EciI' => 'ggcgga', 'Eco31I' => 'ggtctc', 'Eco47III' => 'agcgct', 'Eco57I' => 'ctgaag', 'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg', 'EcoRI' => 'gaattc', 'EcoRII' => 'cc[at]gg', 'EcoRV' => 'gatatc', 'Esp3I' => 'cgtctc', 'EspI' => 'gct[acgt]agc', 'FauI' => 'cccgc', 'FinI' => 'gggac', 'Fnu4HI' => 'gc[acgt]gc', 'FnuDII' => 'cgcg', 'FokI' => 'ggatg', 'FseI' => 'ggccggcc', 'GdiII' => 'cggcc[ag]', 'GsuI' => 'ctggag', 'HaeI' => '[at]ggcc[at]', 'HaeII' => '[ag]gcgc[ct]', 'HaeIII' => 'ggcc', 'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc', 'HgaI' => 'gacgc', 'HgiAI' => 'g[at]gc[at]c', 'HgiCI' => 'gg[ct][ag]cc', 'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt', 'HgiJII' => 'g[ag]gc[ct]c', 'HhaI' => 'gcgc', 'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc', 'HindII' => 'gt[ct][ag]ac', 'HindIII' => 'aagctt', 'HinfI' => 'ga[acgt]tc', 'HpaI' => 'gttaac', 'HpaII' => 'ccgg', 'HphI' => 'ggtga', 'Hpy178III' => 'tc[acgt][acgt]ga', 'Hpy188I' => 'tc[acgt]ga', 'Hpy99I' => 'cg[at]cg', 'KpnI' => 'ggtacc', 'Ksp632I' => 'ctcttc', 'MaeI' => 'ctag', 'MaeII' => 'acgt', 'MaeIII' => 'gt[acgt]ac', 'MboI' => 'gatc', 'MboII' => 'gaaga', 'McrI' => 'cg[ag][ct]cg', 'MfeI' => 'caattg', 'MjaIV' => 'gt[acgt][acgt]ac', 'MluI' => 'acgcgt', 'MmeI' => 'tcc[ag]ac', 'MnlI' => 'cctc', 'MseI' => 'ttaa', 'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg', 'MstI' => 'tgcgca', 'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc', 'NaeI' => 'gccggc', 'NarI' => 'ggcgcc', 'NcoI' => 'ccatgg', 'NdeI' => 'catatg', 'NheI' => 'gctagc', 'NlaIII' => 'catg', 'NlaIV' => 'gg[acgt][acgt]cc', 'NotI' => 'gcggccgc', 'NruI' => 'tcgcga', 'NspBII' => 'c[ac]gc[gt]g', 'NspI' => '[ag]catg[ct]', 'PacI' => 'ttaattaa', 'Pfl1108I' => 'tcgtag', 'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg', 'PleI' => 'gagtc', 'PmaCI' => 'cacgtg', 'PmeI' => 'gtttaaac', 'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc', 'PpuMI' => '[ag]gg[at]cc[ct]', 'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc', 'PsiI' => 'ttataa', 'PstI' => 'ctgcag', 'PvuI' => 'cgatcg', 'PvuII' => 'cagctg', 'RleAI' => 'cccaca', 'RsaI' => 'gtac', 'RsrII' => 'cgg[at]ccg', 'SacI' => 'gagctc', 'SacII' => 'ccgcgg', 'SalI' => 'gtcgac', 'SanDI' => 'ggg[at]ccc', 'SapI' => 'gctcttc', 'SauI' => 'cct[acgt]agg', 'ScaI' => 'agtact', 'ScrFI' => 'cc[acgt]gg', 'SduI' => 'g[agt]gc[act]c', 'SecI' => 'cc[acgt][acgt]gg', 'SexAI' => 'acc[at]ggt', 'SfaNI' => 'gcatc', 'SfeI' => 'ct[ag][ct]ag', 'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc', 'SgfI' => 'gcgatcgc', 'SgrAI' => 'c[ag]ccgg[ct]g', 'SimI' => 'gggtc', 'SmaI' => 'cccggg', 'SmlI' => 'ct[ct][ag]ag', 'SnaBI' => 'tacgta', 'SnaI' => 'gtatac', 'SpeI' => 'actagt', 'SphI' => 'gcatgc', 'SplI' => 'cgtacg', 'SrfI' => 'gcccgggc', 'Sse232I' => 'cgccggcg', 'Sse8387I' => 'cctgcagg', 'Sse8647I' => 'agg[at]cct', 'SspI' => 'aatatt', 'Sth132I' => 'cccg', 'StuI' => 'aggcct', 'StyI' => 'cc[at][at]gg', 'SwaI' => 'atttaaat', 'TaqI' => 'tcga', 'TaqII' => 'gaccga', 'TatI' => '[at]gtac[at]', 'TauI' => 'gc[cg]gc', 'TfiI' => 'ga[at]tc', 'TseI' => 'gc[at]gc', 'Tsp45I' => 'gt[cg]ac', 'Tsp4CI' => 'ac[acgt]gt', 'TspEI' => 'aatt', 'TspRI' => 'ca[cg]tg[acgt][acgt]', 'Tth111I' => 'gac[acgt][acgt][acgt]gtc', 'Tth111II' => 'caa[ag]ca', 'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg', 'UbaPI' => 'cgaacg', 'VspI' => 'attaat', 'XbaI' => 'tctaga', 'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg', 'XhoI' => 'ctcgag', 'XhoII' => '[ag]gatc[ct]', 'XmaIII' => 'cggccg', 'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc' ); return %enzymes; } 1; RNAChange.pm100644000766000024 3726213604710571 21655 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::RNAChange # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Variation::RNAChange - Sequence change class for RNA level =head1 SYNOPSIS $rnachange = Bio::Variation::RNAChange->new ('-start' => $start, '-end' => $end, '-length' => $len, '-codon_pos' => $cp, '-upStreamSeq' => $upflank, '-dnStreamSeq' => $dnflank, '-proof' => $proof, '-isMutation' => 1, '-mut_number' => $mut_number ); $a1 = Bio::Variation::Allele->new; $a1->seq('a'); $rnachange->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq('t'); $rnachange->add_Allele($a2); $rnachange->allele_mut($a2); print "The codon change is ", $rnachange->codon_ori, ">", $rnachange->codon_mut, "\n"; # add it to a SeqDiff container object $seqdiff->add_Variant($rnachange); # and create links to and from DNA level mutation objects $rnachange->DNAMutation($dnamut); $dnamut->RNAChange($rnachange); =head1 DESCRIPTION The instantiable class Bio::Variation::DNAMutation describes basic sequence changes at RNA molecule level. It uses methods defined in superclass Bio::Variation::VariantI. See L for details. You are normally expected to create a corresponding Bio::Variation::DNAMutation object even if mutation is defined at RNA level. The numbering follows then cDNA numbering. Link the DNAMutation object to the RNAChange object using the method DNAMutation(). If the variation described by a RNAChange object is translated, link the corresponding Bio::Variation::AAChange object to it using method AAChange(). See L and L for more information. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::RNAChange; $Bio::Variation::RNAChange::VERSION = '1.7.5'; use strict; # Object preamble - inheritance use Bio::Tools::CodonTable; use base qw(Bio::Variation::VariantI); sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($start, $end, $length, $strand, $primary, $source, $frame, $score, $gff_string, $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, $label, $status, $proof, $region, $region_value, $region_dist, $numbering, $mut_number, $isMutation, $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) = $self->_rearrange([qw(START END LENGTH STRAND PRIMARY SOURCE FRAME SCORE GFF_STRING ALLELE_ORI ALLELE_MUT UPSTREAMSEQ DNSTREAMSEQ LABEL STATUS PROOF REGION REGION_VALUE REGION_DIST NUMBERING MUT_NUMBER ISMUTATION CODON_ORI CODON_MUT CODON_POS TRANSLATION_TABLE CDS_END )],@args); $self->primary_tag("Variation"); $self->{ 'alleles' } = []; $start && $self->start($start); $end && $self->end($end); $length && $self->length($length); $strand && $self->strand($strand); $primary && $self->primary_tag($primary); $source && $self->source_tag($source); $frame && $self->frame($frame); $score && $self->score($score); $gff_string && $self->_from_gff_string($gff_string); $allele_ori && $self->allele_ori($allele_ori); $allele_mut && $self->allele_mut($allele_mut); $upstreamseq && $self->upStreamSeq($upstreamseq); $dnstreamseq && $self->dnStreamSeq($dnstreamseq); $label && $self->label($label); $status && $self->status($status); $proof && $self->proof($proof); $region && $self->region($region); $region_value && $self->region_value($region_value); $region_dist && $self->region_dist($region_dist); $numbering && $self->numbering($numbering); $mut_number && $self->mut_number($mut_number); $isMutation && $self->isMutation($isMutation); $codon_ori && $self->codon_ori($codon_ori); $codon_mut && $self->codon_mut($codon_mut); $codon_pos && $self->codon_pos($codon_pos); $codon_table && $self->codon_table($codon_table); $cds_end && $self->cds_end($cds_end); return $self; # success - we hope! } =head2 codon_ori Title : codon_ori Usage : $obj->codon_ori(); Function: Sets and returns codon_ori triplet. If value is not set, creates the codon triplet from the codon position and flanking sequences. The string has to be three characters long. The character content is not checked. Example : Returns : string Args : string =cut sub codon_ori { my ($self,$value) = @_; if (defined $value) { if (length $value != 3) { $self->warn("Codon string \"$value\" is not three characters long"); } $self->{'codon_ori'} = $value; } elsif (! $self->{'codon_ori'}) { my $codon_ori = ''; if ($self->region eq 'coding' && $self->start && $self->start >= 1) { $self->warn('Codon position is not defined') if not defined $self->codon_pos; $self->warn('Upstream flanking sequence is not defined') if not defined $self->upStreamSeq; $self->warn('Downstream flanking sequence is not defined') if not defined $self->dnStreamSeq; my $cpos = $self->codon_pos; $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos) if $self->allele_ori and $self->allele_ori->seq; $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori)); } $self->{'codon_ori'} = lc $codon_ori; } return $self->{'codon_ori'}; } =head2 codon_mut Title : codon_mut Usage : $obj->codon_mut(); Function: Sets and returns codon_mut triplet. If value is not set, creates the codon triplet from the codon position and flanking sequences. Return undef for other than point mutations. Example : Returns : string Args : string =cut sub codon_mut { my ($self,$value) = @_; if (defined $value) { if (length $value != 3 ) { $self->warn("Codon string \"$value\" is not three characters long"); } $self->{'codon_mut'} = $value; } else { my $codon_mut = ''; if ($self->allele_ori->seq and $self->allele_mut->seq and CORE::length($self->allele_ori->seq) == 1 and CORE::length($self->allele_mut->seq) == 1 and $self->region eq 'coding' and $self->start >= 1) { $self->warn('Codon position is not defined') if not defined $self->codon_pos; $self->warn('Upstream flanking sequnce is not defined') if not defined $self->upStreamSeq; $self->warn('Downstream flanking sequnce is not defined') if not defined $self->dnStreamSeq; $self->throw('Mutated allele is not defined') if not defined $self->allele_mut; my $cpos = $self->codon_pos; $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos) if $self->allele_mut and $self->allele_mut->seq; $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut)); $self->{'codon_mut'} = lc $codon_mut; } } return $self->{'codon_mut'}; } =head2 codon_pos Title : codon_pos Usage : $obj->codon_pos(); Function: Sets and returns the position of the mutation start in the codon. If value is not set, returns false. Example : Returns : 1,2,3 Args : none if get, the new value if set =cut sub codon_pos { my ($self,$value) = @_; if( defined $value) { if ( $value !~ /[123]/ ) { $self->throw("'$value' is not a valid codon position"); } $self->{'codon_pos'} = $value; } return $self->{'codon_pos'}; } =head2 codon_table Title : codon_table Usage : $obj->codon_table(); Function: Sets and returns the codon table id of the RNA If value is not set, returns 1, 'universal' code, as the default. Example : Returns : integer Args : none if get, the new value if set =cut sub codon_table { my ($self,$value) = @_; if( defined $value) { if ( not $value =~ /^\d$/ ) { $self->throw("'$value' is not a valid codon table ID\n". "Has to be a positive integer. Defaulting to 1\n"); } else { $self->{'codon_table'} = $value; } } if( ! exists $self->{'codon_table'} ) { return 1; } else { return $self->{'codon_table'}; } } =head2 DNAMutation Title : DNAMutation Usage : $mutobj = $obj->DNAMutation; : $mutobj = $obj->DNAMutation($objref); Function: Returns or sets the link-reference to a mutation/change object. If there is no link, it will return undef Returns : an obj_ref or undef =cut sub DNAMutation { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::DNAMutation') ) { $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]"); return; } else { $self->{'DNAMutation'} = $value; } } unless (exists $self->{'DNAMutation'}) { return; } else { return $self->{'DNAMutation'}; } } =head2 AAChange Title : AAChange Usage : $mutobj = $obj->AAChange; : $mutobj = $obj->AAChange($objref); Function: Returns or sets the link-reference to a mutation/change object. If there is no link, it will return undef Returns : an obj_ref or undef =cut sub AAChange { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::AAChange') ) { $self->throw("Is not a Bio::Variation::AAChange object but a [$self]"); return; } else { $self->{'AAChange'} = $value; } } unless (exists $self->{'AAChange'}) { return; } else { return $self->{'AAChange'}; } } =head2 exons_modified Title : exons_modified Usage : $modified = $obj->exons_modified; : $modified = $obj->exons_modified(1); Function: Returns or sets information (example: a simple boolean flag) about the modification of exons as a result of a mutation. =cut sub exons_modified { my ($self,$value)=@_; if (defined($value)) { $self->{'exons_modified'}=$value; } return ($self->{'exons_modified'}); } =head2 region Title : region Usage : $obj->region(); Function: Sets and returns the name of the sequence region type or protein domain at this location. If value is not set, returns false. Example : Returns : string Args : string =cut sub region { my ($self,$value) = @_; if( defined $value) { $self->{'region'} = $value; } elsif (not defined $self->{'region'}) { $self->warn('Mutation start position is not defined') if not defined $self->start and $self->verbose; $self->warn('Mutation end position is not defined') if not defined $self->end and $self->verbose; $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!') if not defined $self->cds_end and $self->verbose; $self->region('coding'); if ($self->end && $self->end < 0 ){ $self->region('5\'UTR'); } elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) { $self->region('3\'UTR'); } } return $self->{'region'}; } =head2 cds_end Title : cds_end Usage : $cds_end = $obj->get_cds_end(); Function: Sets or returns the cds_end from the beginning of the DNA sequence to the coordinate start used to describe variants. Should be the location of the last nucleotide of the terminator codon of the gene. Example : Returns : value of cds_end, a scalar Args : =cut sub cds_end { my ($self, $value) = @_; if (defined $value) { $self->warn("[$value] is not a good value for sequence position") if not $value =~ /^\d+$/ ; $self->{'cds_end'} = $value; } else { $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff; } return $self->{'cds_end'}; } =head2 label Title : label Usage : $obj->label(); Function: Sets and returns mutation event label(s). If value is not set, or no argument is given returns false. Each instantiable subclass of L needs to implement this method. Valid values are listed in 'Mutation event controlled vocabulary' in http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. Example : Returns : string Args : string =cut sub label { my ($self) = @_; my ($o, $m, $type); $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table ); if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) { if (defined $self->AAChange) { if ($self->start > 0 and $self->start < 4 ) { $type = 'initiation codon'; } elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) { $type = 'termination codon'; } elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) { #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") { $type = 'nonsense'; } elsif ($o and $m and ($o eq $m or $self->AAChange->allele_ori->seq eq $self->AAChange->allele_mut->seq)) { $type = 'silent'; } else { $type = 'missense'; } } else { $type = 'unknown'; } } else { my $len = 0; $len = CORE::length($o) if $o; $len -= CORE::length($m) if $m; if ($len%3 == 0 ) { $type = 'inframe'; } else { $type = 'frameshift'; } if (not $m ) { $type .= ', '. 'deletion'; } elsif (not $o ) { $type .= ', '. 'insertion'; } else { $type .= ', '. 'complex'; } if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { $type .= ', '. 'termination codon'; } } $self->{'label'} = $type; return $self->{'label'}; } =head2 _change_codon_pos Title : _change_codon_pos Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5) Function: Keeps track of the codon position in a changeing sequence Returns : codon_pos = integer 1, 2 or 3 Args : valid codon position signed integer offset to a new location in sequence =cut sub _change_codon_pos ($$) { my ($cpos, $i) = @_; $cpos = ($cpos + $i%3)%3; if ($cpos > 3 ) { $cpos = $cpos - 3; } elsif ($cpos < 1 ) { $cpos = $cpos + 3; } return $cpos; } 1; DNAMutation.pm100644000766000024 2334313604710571 22245 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::DNAMutation # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Variation::DNAMutation - DNA level mutation class =head1 SYNOPSIS $dnamut = Bio::Variation::DNAMutation->new ('-start' => $start, '-end' => $end, '-length' => $len, '-upStreamSeq' => $upflank, '-dnStreamSeq' => $dnflank, '-proof' => $proof, '-isMutation' => 1, '-mut_number' => $mut_number ); $a1 = Bio::Variation::Allele->new; $a1->seq('a'); $dnamut->allele_ori($a1); my $a2 = Bio::Variation::Allele->new; $a2->seq('t'); $dnamut->add_Allele($a2); print "Restriction changes are ", $dnamut->restriction_changes, "\n"; # add it to a SeqDiff container object $seqdiff->add_Variant($dnamut); =head1 DESCRIPTION The instantiable class Bio::Variation::DNAMutation describes basic sequence changes in genomic DNA level. It uses methods defined in superclass Bio::Variation::VariantI. See L for details. If the variation described by a DNAMutation object is transcibed, link the corresponding Bio::Variation::RNAChange object to it using method RNAChange(). See L for more information. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::DNAMutation; $Bio::Variation::DNAMutation::VERSION = '1.7.5'; use strict; # Object preamble - inheritance use base qw(Bio::Variation::VariantI); sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($start, $end, $length, $strand, $primary, $source, $frame, $score, $gff_string, $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, $label, $status, $proof, $region, $region_value, $region_dist, $numbering, $cpg, $mut_number, $ismutation) = $self->_rearrange([qw(START END LENGTH STRAND PRIMARY SOURCE FRAME SCORE GFF_STRING ALLELE_ORI ALLELE_MUT UPSTREAMSEQ DNSTREAMSEQ LABEL STATUS PROOF REGION REGION_VALUE REGION_DIST NUMBERING CPG MUT_NUMBER ISMUTATION )], @args); $self->primary_tag("Variation"); $self->{ 'alleles' } = []; $start && $self->start($start); $end && $self->end($end); $length && $self->length($length); $strand && $self->strand($strand); $primary && $self->primary_tag($primary); $source && $self->source_tag($source); $frame && $self->frame($frame); $score && $self->score($score); $gff_string && $self->_from_gff_string($gff_string); $allele_ori && $self->allele_ori($allele_ori); $allele_mut && $self->allele_mut($allele_mut); $upstreamseq && $self->upStreamSeq($upstreamseq); $dnstreamseq && $self->dnStreamSeq($dnstreamseq); $label && $self->label($label); $status && $self->status($status); $proof && $self->proof($proof); $region && $self->region($region); $region_value && $self->region_value($region_value); $region_dist && $self->region_dist($region_dist); $numbering && $self->numbering($numbering); $mut_number && $self->mut_number($mut_number); $ismutation && $self->isMutation($ismutation); $cpg && $self->CpG($cpg); return $self; # success - we hope! } =head2 CpG Title : CpG Usage : $obj->CpG() Function: sets and returns boolean values for variation hitting a CpG site. Unset value return -1. Example : $obj->CpG() Returns : boolean Args : optional true of false value =cut sub CpG { my ($obj,$value) = @_; if( defined $value) { $value ? ($value = 1) : ($value = 0); $obj->{'cpg'} = $value; } elsif (not defined $obj->{'label'}) { $obj->{'cpg'} = $obj->_CpG_value; } else { return $obj->{'cpg'}; } } sub _CpG_value { my ($self) = @_; if ($self->allele_ori eq $self->allele_mut and length ($self->allele_ori) == 1 ) { # valid only for point mutations # CpG methylation-mediated deamination: # CG -> TG | CG -> CA substitutions # implementation here is less strict: if CpG dinucleotide was hit if ( ( ($self->allele_ori eq 'c') && (substr($self->upStreamSeq, 0, 1) eq 'g') ) || ( ($self->allele_ori eq 'g') && (substr($self->dnStreamSeq, -1, 1) eq 'c') ) ) { return 1; } else { return 0; } } else { $self->warn('CpG makes sense only in the context of point mutation'); return; } } =head2 RNAChange Title : RNAChange Usage : $mutobj = $obj->RNAChange; : $mutobj = $obj->RNAChange($objref); Function: Returns or sets the link-reference to a mutation/change object. If there is no link, it will return undef Returns : an obj_ref or undef =cut sub RNAChange { my ($self,$value) = @_; if (defined $value) { if( ! $value->isa('Bio::Variation::RNAChange') ) { $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]"); return; } else { $self->{'RNAChange'} = $value; } } unless (exists $self->{'RNAChange'}) { return; } else { return $self->{'RNAChange'}; } } =head2 label Title : label Usage : $obj->label(); Function: Sets and returns mutation event label(s). If value is not set, or no argument is given returns false. Each instantiable subclass of L needs to implement this method. Valid values are listed in 'Mutation event controlled vocabulary' in http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. Example : Returns : string Args : string =cut sub label { my ($self, $value) = @_; my ($o, $m, $type); $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; if (not $o and not $m ) { $self->warn("[DNAMutation, label] Both alleles should not be empty!\n"); $type = 'no change'; # is this enough? } elsif ($o && $m && length($o) == length($m) && length($o) == 1) { $type = 'point'; $type .= ", ". _point_type_label($o, $m); } elsif (not $o ) { $type = 'insertion'; } elsif (not $m ) { $type = 'deletion'; } else { $type = 'complex'; } $self->{'label'} = $type; return $self->{'label'}; } sub _point_type_label { my ($o, $m) = @_; my ($type); my %transition = ('a' => 'g', 'g' => 'a', 'c' => 't', 't' => 'c'); $o = lc $o; $m = lc $m; if ($o eq $m) { $type = 'no change'; } elsif ($transition{$o} eq $m ) { $type = 'transition'; } else { $type = 'transversion'; } } =head2 sysname Title : sysname Usage : $self->sysname Function: This subroutine creates a string corresponding to the 'systematic name' of the mutation. Systematic name is specified in Antonorakis & MDI Nomenclature Working Group: Human Mutation 11:1-3, 1998. Returns : string =cut sub sysname { my ($self,$value) = @_; if( defined $value) { $self->{'sysname'} = $value; } else { $self->warn('Mutation start position is not defined') if not defined $self->start; my $sysname = ''; # show the alphabet only if $self->SeqDiff->alphabet is set; my $mol = ''; if ($self->SeqDiff ) { if ($self->SeqDiff && $self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'dna') { $mol = 'g.'; } elsif ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'rna') { $mol = 'c.'; } } my $sep; if ($self->isMutation) { $sep = '>'; } else { $sep = '|'; } my $sign = '+'; $sign = '' if $self->start < 1; $sysname .= $mol ;#if $mol; $sysname .= $sign. $self->start; my @alleles = $self->each_Allele; $self->allele_mut($alleles[0]); $sysname .= 'del' if $self->label =~ /deletion/; $sysname .= 'ins' if $self->label =~ /insertion/; $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq; #push @alleles, $self->allele_mut if $self->allele_mut; foreach my $allele (@alleles) { $self->allele_mut($allele); $sysname .= $sep if $self->label =~ /point/ or $self->label =~ /complex/; $sysname .= uc $self->allele_mut->seq if $self->allele_mut->seq; } $self->{'sysname'} = $sysname; #$self->{'sysname'} = $sign. $self->start. # uc $self->allele_ori->seq. $sep. uc $self->allele_mut->seq; } return $self->{'sysname'}; } 1; AAReverseMutate.pm100644000766000024 1653513604710571 23124 0ustar00cjfieldsstaff000000000000Bio-Variation-1.7.5/lib/Bio/Variation# # BioPerl module for Bio::Variation::AAReverseMutate # # Please direct questions and support issues to # # Cared for by Heikki Lehvaslaiho # # Copyright Heikki Lehvaslaiho # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Variation::AAReverseMutate - point mutation and codon information from single amino acid changes =head1 SYNOPSIS $aamut = Bio::Variation::AAReverseMutate->new (-aa_ori => 'F', -aa_mut => 'S', -codon_ori => 'ttc', # optional -codon_table => '3' # defaults to 1 ); @points = $aamut->each_Variant; if (scalar @points > 0 ) { foreach $rnachange ( @points ) { # $rnachange is a Bio::Variation::RNAChange object print " ", $rnachange->allele_ori->seq, ">", $rnachange->allele_mut->seq, " in ", $rnachange->codon_ori, ">", $rnachange->codon_mut, " at position ", $rnachange->codon_pos, "\n"; } } else { print "No point mutations possible\n", } =head1 DESCRIPTION Bio::Variation::AAReverseMutate objects take in reference and mutated amino acid information and deduces potential point mutations at RNA level leading to this change. The choice can be further limited by letting the object know what is the the codon in the reference sequence. The results are returned as L objects. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via the web: https://github.com/bioperl/bioperl-live/issues =head1 AUTHOR - Heikki Lehvaslaiho Email: heikki-at-bioperl-dot-org =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Variation::AAReverseMutate; $Bio::Variation::AAReverseMutate::VERSION = '1.7.5'; use strict; # Object preamble - inheritance use Bio::Tools::CodonTable; use Bio::Variation::RNAChange; use Bio::Variation::Allele; use base qw(Bio::Root::Root); sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($aa_ori, $aa_mut, $codon_ori, $codon_table) = $self->_rearrange([qw(AA_ORI AA_MUT CODON CODON_TABLE )],@args); $aa_ori && $self->aa_ori($aa_ori); $aa_mut && $self->aa_mut($aa_mut); $codon_ori && $self->codon_ori($codon_ori); $codon_table && $self->codon_table($codon_table); return $self; # success - we hope! } =head2 aa_ori Title : aa_ori Usage : $obj->aa_ori(); Function: Sets and returns original aa sequence. If value is not set, returns false. Amino acid sequences are stored in upper case characters, others in lower case. Example : Returns : string Args : single character amino acid code =cut sub aa_ori { my ($self,$value) = @_; if( defined $value) { if ( uc($value) !~ /^[ARNDCQEGHILKMFPSTWYVBZX*]$/ ) { $self->throw("'$value' is not a valid one letter amino acid symbol\n"); } else { $self->{'aa_ori'} = uc $value; } } return $self->{'aa_ori'}; } =head2 aa_mut Title : aa_mut Usage : $obj->aa_mut(); Function: Sets and returns the mutated allele sequence. If value is not set, returns false. Example : Returns : string Args : single character amino acid code =cut sub aa_mut { my ($self,$value) = @_; if( defined $value) { if ( uc($value) !~ /^[ARNDCQEGHILKMFPSTWYVBZX*]$/ ) { $self->throw("'$value' is not a valid one letter amino acid symbol\n"); } else { $self->{'aa_mut'} = uc $value; } } return $self->{'aa_mut'}; } =head2 codon_ori Title : codon_ori Usage : $obj->codon_ori(); Function: Sets and returns codon_ori triplet. If value is not set, returns false. The string has to be three characters long. The character content is not checked. Example : Returns : string Args : string =cut sub codon_ori { my ($self,$value) = @_; if( defined $value) { if (length $value != 3 or lc $value =~ /[^atgc]/) { $self->warn("Codon string \"$value\" is not valid unique codon"); } $self->{'codon_ori'} = lc $value; } return $self->{'codon_ori'}; } =head2 codon_table Title : codon_table Usage : $obj->codon_table(); Function: Sets and returns the codon table id of the RNA If value is not set, returns 1, 'universal' code, as the default. Example : Returns : integer Args : none if get, the new value if set =cut sub codon_table { my ($self,$value) = @_; if( defined $value) { if ( not $value =~ /^\d+$/ ) { $self->throw("'$value' is not a valid codon table ID\n". "Has to be a positive integer. Defaulting to 1\n"); } else { $self->{'codon_table'} = $value; } } if( ! exists $self->{'codon_table'} ) { return 1; } else { return $self->{'codon_table'}; } } =head2 each_Variant Title : each_Variant Usage : $obj->each_Variant(); Function: Returns a list of Variants. Example : Returns : list of Variants Args : none =cut sub each_Variant{ my ($self,@args) = @_; $self->throw("aa_ori is not defined\n") if not defined $self->aa_ori; $self->throw("aa_mut is not defined\n") if not defined $self->aa_mut; my (@points, $codon_pos, $allele_ori, $allele_mut); my $ct = Bio::Tools::CodonTable->new( '-id' => $self->codon_table ); foreach my $codon_ori ($ct->revtranslate($self->aa_ori)) { next if $self->codon_ori and $self->codon_ori ne $codon_ori; foreach my $codon_mut ($ct->revtranslate($self->aa_mut)) { my $k = 0; my $length = 0; $codon_pos = $allele_ori = $allele_mut = undef; while ($k<3) { my $nt_ori = substr ($codon_ori, $k, 1); my $nt_mut = substr ($codon_mut, $k, 1); if ($nt_ori ne $nt_mut) { $length++; $codon_pos = $k+1; $allele_ori = $nt_ori; $allele_mut = $nt_mut; } $k++; } if ($length == 1) { my $rna = Bio::Variation::RNAChange->new ('-length' => '1', '-codon_ori' => $codon_ori, '-codon_mut' => $codon_mut, '-codon_pos' => $codon_pos, '-isMutation' => 1 ); my $all_ori = Bio::Variation::Allele->new('-seq'=>$allele_ori); $rna->allele_ori($all_ori); my $all_mut = Bio::Variation::Allele->new('-seq'=>$allele_mut); $rna->allele_mut($all_mut); push @points, $rna; } } } return @points; } 1;