STAR-Parser-0.59/ 42755 37455 1276 0 10035306136 12164 5ustar wbluhmrcsbSTAR-Parser-0.59/bin/ 42755 37455 1276 0 10035306136 12734 5ustar wbluhmrcsbSTAR-Parser-0.59/bin/query.pl100555 37455 1276 6422 7302532147 14524 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ############################## # # # query.pl # # # # Queries a data structure # # using the STAR::DataBlock # # module # # # # Wolfgang Bluhm, SDSC # # # ############################## use strict; use STAR::DataBlock; use STAR::Dictionary; my ($s, $i); my ($string, $dict); my (@item_data); my @selected; my $data = STAR::DataBlock->new($ARGV[0]); # 1-arg constructor # #this just retrieves an already blessed #object, so ok even if it's a Dictionary #which inherits from DataBlock # could also replace the above one-liner # with the following two lines: # # my $data = STAR::DataBlock->new; # no-arg constructor # $data = STAR::DataBlock::retrieve($ARGV[0]); $dict = 0; $dict = 1 if $data->type eq "dictionary"; if ( $dict ) { print "-"x62,"\n"; print "Query dictionary by save block and item name.\n"; print "save can be: - (not in a save block),\n"; print " A_CATEGORY (e.g. ENTITY),\n"; print " _an_item (e.g. _entity.id)\n"; print "Capitalization may vary with dictionary.\n"; print "Item examples: _dictionary.version ", "_dictionary_history.revision\n"; print " _category.description ", "_category_examples.case\n"; print " _item_linked.child_name ", "_item_description.description\n"; print "For items with multiple values: ", "choose index (e.g.: 1, 4-6)\n"; print "-"x62,"\n"; } else { print "-"x62,"\n"; print "Query ",$data->title," by item name.\n"; print "For items with multiple values: ", "choose index (e.g.: 1, 4-6)\n"; print "-"x62,"\n"; } do { if ( $dict ) { print "save: "; chomp ($s = ); } else { $s = "-"; } print "item: "; chomp ($i = ); @item_data = $data->get_item_data( -save=>$s, -item=>$i ); @selected = (); if ( $#item_data < 0 ) { # returned null, item doesn't exist print "item $i doesn't exist\n"; } else { if ( $#item_data > 0 ) { print "index (range: 0..", $#item_data, "): "; chomp ( $string = ); @selected = selection(); } else { push @selected, 0; } foreach (@selected) { print "[$_] " unless ( $#item_data == 0 ); print $item_data[$_]; print "\n"; } } } while (print("Continue with query? ") && =~ /\by/i); sub selection { while ( $string =~ /\d+/ ) { if ( $string =~ /^\D*(\d+)\-(\d+)(.*)/ ) { #range (e.g. 1-3) push @selected, ($1..$2); $string = $3; } elsif ( $string =~ /^\D*(\d+)(.*)/ ) { #one number push @selected, $1; $string = $2; } } return @selected; } =head1 DESCRIPTION This script provides a simple interactive query interface to the data structure of a file of dictionary (.cob files). Query is by item only (for a data file), or by save block and item (for a dictionary file). =head1 USAGE perl query.pl =cut STAR-Parser-0.59/bin/dependent.pl100555 37455 1276 2337 7302530715 15325 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ######################### # # # Script dependent.pl # # Wolfgang Bluhm, SDSC # # # ######################### # writes a list of all dependent item definitions in dictionary use STAR::DataBlock; use STAR::Dictionary; my $dict = STAR::Dictionary->new($ARGV[0]); my @saves = $dict->get_save_blocks; # these describe both items and cats my ($save, $mand); my (@depend_items, $depend_item); open (OUT, ">$ARGV[1]"); print OUT "Item\n"; print OUT "\tDependent items\n"; print OUT "---------------------------\n\n"; foreach $save ( @saves ) { if ( $save =~ /\./ ) { #this is an item @depend_items = $dict->get_item_data(-save=>$save, -item=>"_item_dependent.dependent_name"); if ( $#depend_items >=0 ) { print OUT "$save\n"; foreach $depend_item ( @depend_items ) { print OUT "\t$depend_item\n"; } } } } close OUT; =head1 DESCRIPTION Reads the saved data structure of a dictionary (.cob file), and outputs a file with a list of dependent item definitions contained in the dictionary. =head1 USAGE perl dependent.pl =cut STAR-Parser-0.59/bin/write.pl100555 37455 1276 1355 7302532147 14511 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ############################ # # # Script write.pl # # Wolfgang Bluhm, SDSC # # # ############################ #Reads a .cob file (CIF object) and writes it as a CIF file use STAR::DataBlock; use STAR::Writer; use strict; if ( !$ARGV[0] || !$ARGV[1] ) { print "Usage: write.pl \n"; exit; } my $data = STAR::DataBlock->new($ARGV[0]); STAR::Writer->write_cif( -dataref=>$data, -file=>$ARGV[1] ); =head1 DESCRIPTION Reads in the data structure of a file or dictionary (.cob file), and writes it out as a .cif file. =head1 USAGE perl write.pl =cut STAR-Parser-0.59/bin/parse.pl100555 37455 1276 2426 7322364304 14471 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ################################### # # # Script parse.pl # # Wolfgang Bluhm, SDSC # # # ################################### # A simple application script for the # parsing module STAR::Parser use STAR::Parser; use strict; use Getopt::Std; use vars qw($opt_d $opt_D $opt_l $opt_s); my ($file, $dict, @objs, $obj, $title); my $options = ""; getopt(''); $options .= 'd' if ( $opt_d ); #debug $options .= 'l' if ( $opt_l ); #logfile $dict = 1 if ( $opt_D ); #Dictionary $file = $ARGV[0]; @objs = STAR::Parser->parse(-file=>$file, -dict=>$dict, -options=>$options); foreach $obj ( @objs ) { if ( $opt_s ) { # save data structure $title =($obj->title).".cob"; $obj->store($title); } } =head1 DESCRIPTION Parses a STAR-compliant file (e.g. .cif file or dictionary). Command line options including saving the parsed data structure =head1 USAGE perl parse.pl [-dDls] <.cif file> -d writes debugging log to STDERR -D file to be parsed is a dictionary -l writes program activity log to STDERR -s Saves each entry as a .cob file to disk =cut STAR-Parser-0.59/bin/check.pl100555 37455 1276 2140 7302530715 14424 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ######################### # # # Script check.pl # # Wolfgang Bluhm, SDSC # # # ######################### # a simple application script for the module STAR::Checker use STAR::Checker; use strict; use Getopt::Std; use vars qw($opt_d $opt_l); my ($data, $dict, $options, $check); $options=""; getopt(''); $options .= 'd' if ( $opt_d ); #debug $options .= 'l' if ( $opt_l ); #log if ( !$ARGV[0] || !$ARGV[1] ) { print "Usage: check.pl [-dl] DataBlock Dictionary\n"; exit; } $data = STAR::DataBlock->new($ARGV[0]); $dict = STAR::Dictionary->new($ARGV[1]); $check=STAR::Checker->check(-datablock=>$data, -dictionary=>$dict, -options=>$options); print STDERR "Checker found ", $check?"no ":"", "problems.\n"; =head1 DESCRIPTION Checks the data representation of a cif file (.cob file) against a specified dictionary (.cob file). =head1 USAGE perl check.pl [-dl] -d write debug information to STDERR -l write activity log to STDERR =cut STAR-Parser-0.59/bin/keys.pl100555 37455 1276 1323 7302532147 14325 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ############################# # # # Script keys.pl # # # # Writes out a hierarchical # # list of hash keys # # # ############################# use strict; use STAR::DataBlock; use STAR::Dictionary; my $data = STAR::DataBlock->new($ARGV[0]); open (OUT, ">$ARGV[1]"); print OUT $data->get_keys; close OUT; =head1 DESCRIPTION Reads a data structure (.cob file). Outputs a file that contains a hierarchically formatted list of all the hash keys (data blocks. save blocks, categories, items) present in the data structure. =head1 USAGE perl keys.pl =cut STAR-Parser-0.59/bin/filterDict.pl100555 37455 1276 1370 7302532147 15445 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ################################### # # # Script filterDict.pl # # Wolfgang Bluhm, SDSC # # # ################################### use STAR::Filter; use strict; my $dict = STAR::Dictionary->new($ARGV[0]); my $dict_filtered = STAR::Filter->filter_dict(-dict=>$dict); my $outname = $ARGV[0].'.filtered'; $dict_filtered->store($outname); =head1 DESCRIPTION Reads the data structure of a dictionary (.cob file). Enters a very simple interactive dialog that prompts the user for each save block in the dictionary whether to retain it. Outputs a new file (original .cob file + ".filtered"). =head1 USAGE perl filterDict.pl =cut STAR-Parser-0.59/bin/parseMulti.pl100555 37455 1276 10013 7430354263 15516 0ustar wbluhmrcsb#! /usr/local/bin/perl =head1 DESCRIPTION This script will attempt to parse all cif files in a given directory and save the parsed binaries. =head1 USAGE parseMulti.pl [-i -r -s -o -f -l -c] Options: -i input directory -r recursively search all subdirectories -o output directory -f filter through dictionary -l log file -c compress the binaries -s size limit: skip files that are greater than MB uncompressed Comments: -i defaults to working directory if omitted -o defaults to working directory if omitted -l defaults to cifParse.log if omitted =cut use STAR::Parser; use STAR::Filter; use strict; use Getopt::Std; use vars qw( $opt_i $opt_r $opt_o $opt_f $opt_l $opt_c $opt_s ); getopt('iofls'); $opt_i or $opt_i = "."; $opt_o or $opt_o = "."; $opt_l or $opt_l = "cifParse.log"; my $compress = "/bin/compress -f"; my $uncompress = "/bin/uncompress -f"; my @tmp; # temporary file list (find command output) my @files; # file list my $file; # one file my $uncompressed; # uncompressed file my $status; # status of system call my $id; # pdbid my $parse_opt; # parse options my $data; # parsed data object my $filtered; # filtered data object my $dict; # dictionary my $date; # date and time my $size; # size limit for files (uncompressed, in MB) my $pwd; # working directory if ( -e "temp.cif.Z" or -e "temp.cif" ) { die "Please remove file(s) temp.cif* from working directory"; } $pwd = `pwd`; # open log file # open (LOG, ">$opt_l"); print LOG "Working directory: $pwd"; print LOG "Directory of cif files: $opt_i\n"; print LOG "Subdirectories included? ", $opt_r?"yes":"no","\n"; print LOG "Size limit for uncompressed files? ", $opt_s?"$opt_s MB":"none", "\n"; print LOG "Dictionary used for filtering: ", $opt_f?"$opt_f":"none","\n"; print LOG "\n"; # open dictionary # if ( $opt_f ) { $opt_f =~ /\.cob/ or die "Dictionary must be a binary (.cob file)"; $dict = STAR::Dictionary->new( $opt_f ); } # assemble file list # if ( $opt_r ) { @tmp = `find $opt_i -name "*.cif" -print`; @tmp = ( @tmp, `find $opt_i -name "*.cif.Z" -print` ); } else { @tmp = `ls -1 $opt_i/*.cif $opt_i/*.cif.Z`; } foreach ( @tmp ) { /^(.*\.cif[\.Z]*)/; push @files, $1; } $date = `date`; print LOG "Started parsing: $date"; # process all files # foreach $file ( sort @files ) { $file =~ /(....)\.cif/; $id = $1; if ( $file =~ /^(.*)\.Z/ ) { $uncompressed = $1; eval{ system( "cp -f $file temp.cif.Z; $uncompress temp.cif.Z" ); }; if ( ! $@ ) { &parse( "temp.cif", $file ); } else { print LOG "Could not uncompress $file\n"; } } else { &parse( $file, $file ); } } eval{ system( "/bin/rm -f temp.cif" ); }; $date = `date`; print LOG "Finished parsing: $date"; close LOG; exit(0); sub parse { if ( $opt_s ) { $size = -s "$_[0]"; if ( $size > ( $opt_s * 1048576 ) ) { print LOG "File $_[1] ($size bytes uncompressed) exceeds $opt_s MB size limit\n"; return; } } eval { ( $data ) = STAR::Parser->parse(-file=>$_[0]); }; if ( $@ ) { print LOG "Could not parse $_[1]\n"; return; } else { print LOG "Parsed $_[1]\n"; if ( $opt_f ) { eval { $filtered = STAR::Filter->filter_through_dict(-data=>$data, -dict=>$dict); }; if ( $@ ) { print LOG "Could not filter $_[0]\n"; return; } else { $filtered->store( "$opt_o/$id.cob" ); } } else { $data->store( "$opt_o/$id.cob" ); } } if ( $opt_c ) { eval{ system( "$compress $opt_o/$id.cob" ); }; if ( $@ ) { print LOG "Could not compress $opt_o/$id.cob\n"; } } return; } STAR-Parser-0.59/bin/mandatory.pl100555 37455 1276 4335 7302532147 15356 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ######################### # # # Sript mandatory.pl # # Wolfgang Bluhm, SDSC # # # ######################### # writes a list with all categories and items in the dictionary # grouped by whether they are listed as mandatory or not use STAR::DataBlock; use STAR::Dictionary; my $dict = STAR::Dictionary->new($ARGV[0]); my @saves = $dict->get_save_blocks; # these describe both items and cats my ($save, $mand); my @mand_items; my @opt_items; my @mand_cats; my @opt_cats; open (OUT, ">$ARGV[1]"); foreach $save ( @saves ) { if ( $save eq "-" ) { next; } elsif ( $save !~ /\./ ) { #this is a category $mand = ($dict->get_item_data(-save=>$save, -item=>"_category.mandatory_code"))[0]; if ( $mand eq "yes" ) { push @mand_cats, $save; } else { push @opt_cats, $save; } } else { #this is an item $mand = ($dict->get_item_data(-save=>$save, -item=>"_item.mandatory_code"))[0]; if ( $mand eq "yes" ) { push @mand_items, $save; } else { push @opt_items, $save; } } } print OUT "Summary:\n"; print OUT "--------\n"; print OUT $#mand_cats+1, " mandatory categories\n"; print OUT $#opt_cats+1, " optional categories\n"; print OUT $#mand_items+1, " mandatory items\n"; print OUT $#opt_items+1, " optional items\n"; print OUT "\n\nMandatory Categories:\n"; print OUT "---------------------\n"; foreach ( @mand_cats ) { print OUT "$_\n"} print OUT "\n\nOptional Categories:\n"; print OUT "--------------------\n"; foreach ( @opt_cats ) { print OUT "$_\n"} print OUT "\n\nMandatory Items:\n"; print OUT "----------------\n"; foreach ( @mand_items ) { print OUT "$_\n"} print OUT "\n\nOptional Items:\n"; print OUT "---------------\n"; foreach ( @opt_items ) { print OUT "$_\n"} close OUT; =head1 DESCRIPTION Reads in the data structure of a dictionary (.cob file), and outputs a file which lists the mandatory status for each category and item described. =head1 USAGE perl mandatory.pl =cut STAR-Parser-0.59/bin/parentChild.pl100555 37455 1276 2525 7302532147 15614 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ######################### # # # Script parentChild.pl # # Wolfgang Bluhm, SDSC # # # ######################### # writes a list of all dependent item definitions in dictionary use STAR::DataBlock; use STAR::Dictionary; my $dict = STAR::Dictionary->new($ARGV[0]); my @saves = $dict->get_save_blocks; # these describe both items and cats my ($save, $i ); my (@parent_items, @child_items); open (OUT, ">$ARGV[1]"); foreach $save ( @saves ) { if ( 1 ) { #all @parent_items = $dict->get_item_data(-save=>$save, -item=>"_item_linked.parent_name"); @child_items = $dict->get_item_data(-save=>$save, -item=>"_item_linked.child_name"); if ( $#parent_items >=0 ) { print OUT "save block: $save\n"; print OUT "parent\t\t\t child\n"; foreach $i ( 0..$#parent_items ) { print OUT $parent_items[$i],"\t ",$child_items[$i],"\n"; } print OUT "\n"; } } } close OUT; =head1 DESCRIPTION Reads the data structure of a dictionary (.cob file) and outputs a file listing all parent-child relationships in the dictionary. =head1 USAGE perl parentChild.pl =cut STAR-Parser-0.59/bin/filter.pl100555 37455 1276 1522 7302532147 14640 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ################################### # # # Script filter.pl # # Wolfgang Bluhm, SDSC # # # ################################### use STAR::Filter; use strict; my $data = STAR::DataBlock->new($ARGV[0]); my $dict = STAR::Dictionary->new($ARGV[1]); my $out = STAR::Filter->filter_through_dict(-data=>$data, -dict=>$dict); my $outname = $ARGV[0].'.filtered'; $out->store($outname); =head1 DESCRIPTION Reads a data structure (.cob file) and filters it through a dictionary (.cob file). Only items present in the dictionary are retained in the file. Outputs a new data structure file (.cob file -- original name + ".filtered"). =head1 USAGE perl filter.pl =cut STAR-Parser-0.59/bin/create.pl100555 37455 1276 2434 7302530715 14620 0ustar wbluhmrcsb#! /usr/local/bin/perl -w ########################## # # # create.pl # # # # simple script to test # # new methods for data # # inserting # # # # Wolfgang Bluhm, SDSC # # # ########################## use strict; use STAR::DataBlock; my $i; my ( @items, @item_data ); my $data = STAR::DataBlock->new; $data->title('newly_created'); $data->type('data'); $data->file_name('none'); $data->starting_line(0); $data->ending_line(0); #how would one handle these? @items = ('_citation.title', '_citation.id', '_citation_author.citation_id', '_citation_author.name' ); @item_data = ( ['This is just a dummy title', 'Another one'], [1,2], [1,1,2], ['Doe J.','Bear G.B.','Ghost W.'] ); foreach $i ( 0..$#items ) { $data->set_item_data( -item=> $items[$i], -dataref=>$item_data[$i]); } $data->store($ARGV[0]); =head1 DESCRIPTION A very primitive example using method calls for creating a new data structure. (Data to be inserted is hard-coded into the script :-) , and saved as the specified file.) =head1 USAGE perl create.pl =cut STAR-Parser-0.59/README100444 37455 1276 6750 7302530715 13132 0ustar wbluhmrcsbThis document describes the Perl modules and scripts contained in this distribution for parsing STAR compliant data files and dictionaries. It contains the following parts: -- General description -- Installation instructions -- Usage instructions -- Copyright notice General description ------------------- This distribution contains a set of Perl modules for parsing STAR compliant data files and dictionaries, for example CIF or mmCIF data files and dictionaries. While these tools are not limited to CIF or mmCIF files, they do not allow the presence of nested loops. The following modules are included in this distribution: STAR::Parser STAR::DataBlock STAR::Dictionary STAR::Writer STAR::Checker STAR::Filter Some simple examples of application scripts are also included. Installation instructions ------------------------- Unzip and untar the archive: % gunzip STAR.tar.gz % tar xvf STAR.tar % cd STAR Install the perl modules: % perl Makefile.PL % make % make test % make install These commands should install the modules into a default location on your system. Alternatively, you may create a STAR directory in a location of your choice, copy the "*.pm" files into the newly created STAR directory, and include a "use lib" line in all of the "*.pl" scripts in the bin directory: use lib ""; The modules require Storable to be installed in your Perl distribution. (See ftp://ftp.cpan.org/pub/perl/CPAN/modules/by-module/Storable/ ) Usage instructions ------------------ Users are expected to have a working knowledge of Perl and a basic familiarity of CIF or some other STAR compliant data file formats. Detailed documentation for each "*.pm" Perl module can be extracted with pod2html: % pod2html Parser.pm > Parser.html or viewed with perldoc: % perldoc Parser.pm Each "*.pl" script has a minimum description and usage information embedded, which can be viewed with perldoc: % perldoc parse.pl The included scripts are a mixture of basic utility scripts (e.g. parse.pl or check.pl), and very simplistic examples that are meant to test certain methods in the modules (e.g. create.pl). Most users would generally be expected to write their own customized scripts. The included documentation should be sufficient for all the scripts and modules. As a general comment, please be aware of the type of files each script operates on: parse.pl reads a CIF text file (.cif file). It has an option of saving the parsed data structure as a ".cob" (cif object) file. Note that the file extentions are by convention only, and are not enforced. write.pl reads a data structure (.cob file) and outputs a CIF text file (.cif file). All other scripts operate on data structures (.cob files), *NOT* on CIF text files (.cif files). Therefore, a user will probably want to first parse files and dictionaries, save the data structures (as .cob files) and then apply some of the other scripts to the saved data structures: % perl parse.pl -s 1LEP.cif % perl parse.pl -Ds mmcif1000.dic % perl check.pl -l 1LEP.cob cif_mm.dic.cob etc. More information on mmCIF can be found at these web sites: http://pdb.rutgers.edu/mmcif/ http://ndbserver.rutgers.edu/mmcif/ Additional questions or comments may be directed to the author of these modules and scripts: Wolfgang Bluhm mail@wbluhm.com Copyright notice ---------------- A full copyright statement is provided with the distribution (c) 2000 University of California, San Diego STAR-Parser-0.59/t/ 42755 37455 1276 0 10035306136 12427 5ustar wbluhmrcsbSTAR-Parser-0.59/t/filter.t100444 37455 1276 1221 7302532151 14147 0ustar wbluhmrcsb# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..1\n"; } END {print "not ok 1\n" unless $loaded;} use STAR::Filter; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): STAR-Parser-0.59/t/dataBlock.t100444 37455 1276 1224 7302532151 14551 0ustar wbluhmrcsb# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..1\n"; } END {print "not ok 1\n" unless $loaded;} use STAR::DataBlock; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): STAR-Parser-0.59/t/checker.t100444 37455 1276 1222 7302532151 14267 0ustar wbluhmrcsb# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..1\n"; } END {print "not ok 1\n" unless $loaded;} use STAR::Checker; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): STAR-Parser-0.59/t/writer.t100444 37455 1276 1221 7302532151 14176 0ustar wbluhmrcsb# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..1\n"; } END {print "not ok 1\n" unless $loaded;} use STAR::Writer; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): STAR-Parser-0.59/t/dictionary.t100444 37455 1276 1225 7302532151 15033 0ustar wbluhmrcsb# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..1\n"; } END {print "not ok 1\n" unless $loaded;} use STAR::Dictionary; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): STAR-Parser-0.59/t/parser.t100444 37455 1276 1221 7302532151 14156 0ustar wbluhmrcsb# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..1\n"; } END {print "not ok 1\n" unless $loaded;} use STAR::Parser; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): STAR-Parser-0.59/CHANGES100444 37455 1276 2321 10035305655 13253 0ustar wbluhmrcsbREVISION HISTORY: version 0.59 (released 08 Apr 2004): ----------------------------------- 1) Extended the DDL1 fix from 0.58 to items within loops 2) Fatal parsing errors now cause the Parser to 'die' instead of 'exit' to allow exception handling with 'eval' statements in wrapper scripts. 3) Included parseMulti.pl script, a wrapper script which attempts to parse all cif files in a given directory with multiple command line options. version 0.58 (released 10 July 2001): ------------------------------------- 1) Added limited compatibility with Data Defition Language 1 (DDL1). Earlier versions assumed DDL2-style item names with explict category names (_this_category.this_item). Now, the category is assigned the default value "-" (minus sign), if the item name does not contain a "." (period). 2) Improved proper recognition of "data_" strings as either data block headers or other legitimate text, depending on the position in the CIF file. version 0.57 (released 15 May 2001): ------------------------------------ A closing semicolon after text that extended over several lines does not have to be on a line by itelf. This wrong assumption from previous versions has been corrected. STAR-Parser-0.59/MANIFEST100444 37455 1276 647 7430354262 13365 0ustar wbluhmrcsbMANIFEST COPYRIGHT README CHANGES Makefile.PL bin/check.pl bin/create.pl bin/dependent.pl bin/filter.pl bin/filterDict.pl bin/keys.pl bin/mandatory.pl bin/parentChild.pl bin/parse.pl bin/parseMulti.pl bin/query.pl bin/write.pl lib/STAR/Checker.pm lib/STAR/DataBlock.pm lib/STAR/Dictionary.pm lib/STAR/Filter.pm lib/STAR/Parser.pm lib/STAR/Writer.pm t/parser.t t/dataBlock.t t/dictionary.t t/writer.t t/checker.t t/filter.t STAR-Parser-0.59/COPYRIGHT100444 37455 1276 3000 7302530402 13517 0ustar wbluhmrcsb Copyright 2000 The Regents of the University of California All Rights Reserved Permission to use, copy, modify and distribute any part of this PDB software for educational, research and non-profit purposes, without fee, and without a written agreement is hereby granted, provided that the above copyright notice, this paragraph and the following three paragraphs appear in all copies. Those desiring to incorporate this PDB Software into commercial products or use for commercial purposes should contact the Technology Transfer Office, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093-0910, Ph: (619) 534-5815, FAX: (619) 534-7345. In no event shall the University of California be liable to any party for direct, indirect, special, incidental, or consequential damages, including lost profits, arising out of the use of this PDB software, even if the University of California has been advised of the possibility of such damage. The PDB software provided herein is on an "as is" basis, and the University of California has no obligation to provide maintenance, support, updates, enhancements, or modifications. The University of California makes no representations and extends no warranties of any kind, either implied or express, including, but not limited to, the implied warranties of merchantability or fitness for a particular purpose, or that the use of the pdb software will not infringe any patent, trademark or other rights. STAR-Parser-0.59/lib/ 42755 37455 1276 0 10035306136 12732 5ustar wbluhmrcsbSTAR-Parser-0.59/lib/STAR/ 42755 37455 1276 0 10035306136 13503 5ustar wbluhmrcsbSTAR-Parser-0.59/lib/STAR/Writer.pm100444 37455 1276 21351 7302532151 15411 0ustar wbluhmrcsbpackage STAR::Writer; use STAR::DataBlock; use STAR::Dictionary; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '0.01'; # $Id: Writer.pm,v 1.2 2000/12/19 22:54:56 helgew Exp $ RCS Identification #################### # Constructor: new # #################### sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless ($self,$class); return $self; } ########################## # Class method write_cif # ########################## sub write_cif { my ($self,@parameters) = @_; my ($file, $data); my $options = ""; # single unnamed parameter doesn't make sense here, # need -dataref and -file while ($_ = shift @parameters) { $file = shift @parameters if /-file/; $data = shift @parameters if /-dataref/; $options = shift @parameters if /-options/; } my ($d, $s, $c, $i); #data, save, category, item my ($m); # loop counter my ($string, $dict, $log, $debug); my ($cat, $item, @cats, @items, $next); $dict = 1 if ( $data->type eq 'dictionary' ); $log = 1 if ( $options =~ /l/ ); $debug = 1 if ( $options =~ /d/ ); $data->add_quotes; open (OUT, ">$file"); print STDERR "writing $file\n" if ( $log ); foreach $d ( sort keys %{$data->{DATA}} ) { print OUT "data_$d\n"; foreach $s ( sort keys %{$data->{DATA}{$d}} ) { print STDERR "$s\n" if $debug ; # for debugging unless ( $s eq '-' ) { print OUT "\n","#"x(length($s)+4),"\n"; print OUT "# $s #\n"; print OUT "#"x(length($s)+4),"\n\n"; print OUT "save_$s\n"; } if ( $dict && ( $s eq '-' ) ) { print OUT "\n##############\n"; print OUT "# DICTIONARY #\n"; print OUT "##############\n\n"; } foreach $c ( sort keys %{$data->{DATA}{$d}{$s}} ) { print STDERR "\t$c\n" if $debug ; #for debugging unless ( $dict ) { print OUT "\n","#"x(length($c)+4),"\n"; print OUT "# $c #\n"; print OUT "#"x(length($c)+4),"\n\n"; } @items = sort keys %{$data->{DATA}{$d}{$s}{$c}}; if ( $#{$data->{DATA}{$d}{$s}{$c}{$items[0]}} == 0 ) { print STDERR "\t\tin items if\n" if $debug; #debugging foreach $item ( @items ) { print STDERR "\t\t$item\n" if $debug; #for debugging print OUT $item, " "; $next = $data->{DATA}{$d}{$s}{$c}{$item}[0]; if ( $next =~ /^;/ || length($item.$next) >= 77 ) { print OUT "\n"; } print OUT "$next\n"; } } else { #loop print STDERR "\t\tin items else\n" if $debug ; #debugging print OUT "loop_\n"; foreach $item ( @items ) { #items in loop print OUT "$item\n"; } #values in loop: foreach $m (0..$#{$data->{DATA}{$d}{$s}{$c}{$items[0]}}) { $string=''; foreach $i ( @items ) { $next = $data->{DATA}{$d}{$s}{$c}{$i}[$m]; if ( $next =~ /^;/ ) { $string .="\n" unless ( $string eq '' ); print OUT $string,$next; $string = ''; } elsif ( length($string.$next) >= 80 ) { $string .= "\n"; print OUT $string; $string = $next.' '; } else { $string .= $next; $string .= ' '; } } print OUT "$string\n"; } } } print OUT "save_\n" unless ( $s eq '-' ); } } print STDERR "writing $file\n" if $log; close (OUT); } ########################## # Class method write_xml # ########################## # This is very premature and "undocumented". # Just keeping the code here for convenience. sub write_xml { my ($self,@parameters) = @_; my ($file, $data); my $options = ""; # single unnamed parameter doesn't make sense here, # need -dataref and -file while ($_ = shift @parameters) { $file = shift @parameters if /-file/; $data = shift @parameters if /-dataref/; $options = shift @parameters if /-options/; } my ($d, $s, $c, $i); #data, save, category, item my ($dx, $sx, $cx, $ix); #xml compatible my ($m); # loop counter my ($string, $dict, $log); my ($cat, $item, @cats, @items, $next); $dict = 1 if ( $data->type eq 'dictionary' ); $log = 1 if ( $options =~ /l/ ); open (OUT, ">$file"); print STDERR "writing $file\n" if ( $log ); foreach $d ( sort keys %{$data->{DATA}} ) { $dx = $d; print OUT "\n"; foreach $s ( sort keys %{$data->{DATA}{$d}} ) { $sx = $s; print OUT "\t\n"; foreach $c ( sort keys %{$data->{DATA}{$d}{$s}} ) { $cx = xml_tag($c); print OUT "\t\t<$cx>\n"; @items = sort keys %{$data->{DATA}{$d}{$s}{$c}}; foreach $m (0..$#{$data->{DATA}{$d}{$s}{$c}{$items[0]}}) { foreach $item ( @items ) { $item =~ /^\S+?\.(.*)/; $ix = xml_tag($1); $next = $data->{DATA}{$d}{$s}{$c}{$item}[$m]; $next = xml_data($next); if ( $next =~ /\n/s ) { print OUT "\n"; } print OUT "\t\t\t<$ix i=\"$m\">"; print OUT "$next"; if ( $next =~ /\n/s ) { print OUT "\t\t\t\n"; } else { print OUT "\n"; } } } print OUT "\t\t\n"; } print OUT "\t\n"; } print OUT "\n"; } print STDERR "writing $file\n" if $log; close (OUT); } ########### # xml_tag # ########### sub xml_tag { my $string = shift; $string =~ s/[^A-Za-z0-9_\.-]/_/g; $string =~ s/^(\d.*)/_$1/; return $string; } ############ # xml_data # ############ sub xml_data { my $string = shift; $string =~ s/&/&/gs; $string =~ s//>/gs; $string =~ s/'/'/gs; $string =~ s/"/"/gs; return $string; } 1; __END__ =head1 NAME STAR::Writer - Perl extension for writing STAR::DataBlock objects as files. =head2 Version This documentation refers to version 0.01 of this module. =head1 SYNOPSIS use STAR::Writer; STAR::Writer->write_cif( -dataref=>$data, -file=>$file ); =head1 DESCRIPTION This module will provide several methods for writing STAR::DataBlocks as files in different format. Currently, there is a write_cif method, which writes a STAR::DataBlock or STAR::Dictionary object as a file in CIF (STAR) format. =head1 CLASS METHODS =head2 write_cif Usage: STAR::Writer->write_cif( -dataref=>$data, -file=>$file [, -options=>$options ] ); Write the STAR::DataBlock object referenced by $data to the file specified by $file. C<$options> are C<'l'> for logging activity (to STDERR) and C<'d'> for debugging. =head1 COMMENTS Categories and items are currently written out in alphabetical order. Obviously, this is of no importance to automated parsing. However, it may not be desirable for visual inspection of files. =head1 AUTHOR Wolfgang Bluhm, mail@wbluhm.com =head2 Acknowledgments Thanks to Phil Bourne, Helge Weissig, Anne Kuller, Doug Greer, Michele Bluhm, and others for support, help, and comments. =head1 COPYRIGHT A full copyright statement is provided with the distribution Copyright (c) 2000 University of California, San Diego =head1 SEE ALSO STAR::Parser, STAR::Dictionary. =cut STAR-Parser-0.59/lib/STAR/Checker.pm100444 37455 1276 22634 7302532150 15505 0ustar wbluhmrcsbpackage STAR::Checker; use STAR::DataBlock; use STAR::Dictionary; use strict; use Time::localtime; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '0.02'; # $Id: Checker.pm,v 1.2 2000/12/19 22:54:56 helgew Exp $ RCS Identification #################### # Constructor: new # #################### sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless ($self,$class); return $self; } ####################### # Class method: check # ####################### sub check { my ($self, @parameters) = @_; my ($data,$dict,$options); $options = ""; while ($_ = shift @parameters) { $data = shift @parameters if /-datablock/; $dict = shift @parameters if /-dictionary/; $options = shift @parameters if /-options/; } my ($n, $d, $save, @saves, $cat, @cats, $item, @items); my (@depend_items, $depend_item); my ($value, @values); my (%dict_lookup, %item_lookup, %cat_lookup); my (@parent_items, @child_items); my (%cp_hash, $cp_hash_ref); #child parent hash my ($mand); my ($construct, @constructs, $code, @code_data, @codes, %item_types); my ($debug, $log,$problem); $log = 1 if $options =~ /l/; $debug = 1 if $options =~ /d/; if ( $data->type eq 'dictionary' ) { print STDERR "Method check_against_dict is to be invoked only on\n", "DataBlock objects, not on dictionaries themselves.\n"; return; } print STDERR "-"x50,"\n" if $log; print STDERR "$0 ", ctime(),"\n" if $log; print STDERR "Checking ",$data->title, " against ",$dict->title,"\n" if $log; @items = $data->get_items; @cats = $data->get_categories; @saves = $dict->get_save_blocks; #make a dictionary lookup hash -- keys: lowercase, values: original case foreach $save (@saves) { $dict_lookup{lc($save)} = $save; } #same for an file item lookup hash foreach $item (@items) { $item_lookup{lc($item)} = $item; } #same for a file category lookup hash foreach $cat (@cats) { $cat_lookup{lc($cat)} = $cat; } # 1) checking whether items are present in dictionary # --------------------------------------------------- print STDERR "Checking whether items are present in dictionary\n" if $log; foreach $item (@items) { if ( ! exists $dict_lookup{lc($item)} ) { $problem=1; print STDERR "\t$item not in dictionary\n" if $log; } } # 2) checking for presence of mandatory items in file # --------------------------------------------------- print STDERR "Checking whether mandatory items ", "are present in file\n" if $log; foreach $save ( @saves ) { if ( $save =~ /^(_\S+?)\.\S+/ ) { # $save is item, not cat $cat = $1; $item = $save; $mand = ($dict->get_item_data(-save=>$save, -item=>"_item.mandatory_code"))[0]; if ( $mand eq "yes" ) { #item is mandatory if ( exists $cat_lookup{lc($cat)} ) { #the cat is in the file if ( ! exists $item_lookup{lc($item)} ) { #oops, should've #been present $problem=1; print STDERR "\t$item not present\n" if $log; } } } } } # 3) checking for presence of dependent items in file # --------------------------------------------------- print STDERR "Checking whether dependent items", " are present in file\n" if $log; foreach $item ( @items ) { if ( exists $dict_lookup{lc($item)} ) { @depend_items = $dict->get_item_data( -save=>$dict_lookup{lc($item)}, -item=>"_item_dependent.dependent_name"); foreach $depend_item ( @depend_items ) { if ( ! exists $item_lookup{lc($depend_item)} ) { $problem=1; print STDERR "\t$depend_item not present ", "(required by $item)\n" if $log; } } } } # 4) checking for presence of parent items # ---------------------------------------- print STDERR "Checking for presence of parent items\n" if $log; if ( -r "cp_hash" ) { print "Retrieving previously stored cp_hash\n" if $log; $cp_hash_ref = Storable::retrieve("cp_hash"); %cp_hash = %$cp_hash_ref; } else { print "Assembling and storing new cp_hash\n" if $log; foreach $save ( @saves ) { @parent_items = $dict->get_item_data(-save=>$save, -item=>"_item_linked.parent_name"); @child_items = $dict->get_item_data(-save=>$save, -item=>"_item_linked.child_name"); if ( $#parent_items >=0 ) { foreach $n ( 0..$#parent_items ) { $cp_hash{lc($child_items[$n])} = lc($parent_items[$n]); } } } Storable::store \%cp_hash, "cp_hash"; } foreach $item ( @items ) { if ( exists $cp_hash{lc($item)} ) { if ( ! exists $item_lookup{$cp_hash{lc($item)}} ) { print STDERR "\t",$cp_hash{lc($item)}, " not present ", "(parent to $item)\n" if $log; } } } # 5) checking for correct item types # ---------------------------------- print STDERR "Checking values against type definitions\n" if $log; @constructs=$dict->get_item_data(-save=>'-', -item=>'_item_type_list.construct'); @codes=$dict->get_item_data(-save=>'-', -item=>'_item_type_list.code'); foreach $n (0..$#codes) { $item_types{$codes[$n]} = $constructs[$n]; } foreach $item ( @items ) { $code=""; print STDERR "data item: $item\n" if $debug; print STDERR "dict item: ",$dict_lookup{lc($item)},"\n" if $debug; if ($dict_lookup{lc($item)}) { $code = ($dict->get_item_data (-save=>$dict_lookup{lc($item)}, -item=>'_item_type.code'))[0]; # not all items have this defined $construct = $item_types{$code} if $code; } if ( !$code ) { print STDERR "type code undefined\n" if $debug; } else { @values = $data->get_item_data(-item=>$item); print STDERR "values 0..",$#values,"\n" if $debug; $n=0; foreach $value (@values) { if ( $value eq '.' || $value eq '?' ) { print STDERR "$n item value undefined\n" if $debug; } elsif ( $value =~ /^$construct$/ ) { print STDERR "$n type $code ok\n" if $debug; } else { $problem = 1; if ($log) { print STDERR "\t","-"x14,"\n","\ttype mismatch:\n"; print STDERR "\titem: $item\n"; print STDERR "\titeration: $n\n"; print STDERR "\tvalue: $value\n"; print STDERR "\tcode: $code\n"; print STDERR "\tconstruct: $construct\n"; } } $n++; } } } return ( $problem ? 0 : 1 ); #returns 1 if check ok (no problem) #returns 0 if problem found } 1; __END__ =head1 NAME STAR::Checker - Perl extension for checking DataBlock objects =head2 Version This documentation refers to version 0.02 of this module. =head1 SYNOPSIS use STAR::Checker; $check = STAR::Checker->check( -datablock=>$ARGV[0], -dictionary=>$ARGV[1] ); =head1 DESCRIPTION Contains the checker object, with methods for checking DataBlock object against STAR rules and against a specified dictionary. DataBlock objects are created by Parser and modified by DataBlock. =head1 CLASS METHODS =head2 check Usage: $check = STAR::Checker->check(-datablock=>$data, -dictionary=>$dict [, -options=>$options ] ); Checks the DataBlock object C<$data> against the dictionary object C<$dict> (see STAR::Parser and STAR::DataBlock). Checks 1) whether all items in the DataBlock are defined in the dictionary, 2) whether mandatory items are present in the file, 3) whether dependent items are present in the file (e.g. cartn_x makes cartn_y and cartn_z dependent), 4) whether parent items are present, and 5) whether the item values in the DataBlock conform to the item type definitions in the dictionary. Returns 1 if the check was successful (no problems were found), and 0 if the check was unsuccessful (problems were found). A list of the specific problems is written to STDERR when C<-options=E'l'> is specified. =head1 AUTHOR Wolfgang Bluhm, mail@wbluhm.com =head2 Acknowledgments Thanks to Phil Bourne, Helge Weissig, Anne Kuller, Doug Greer, Michele Bluhm, and others for support, help, and comments. =head1 COPYRIGHT A full copyright statement is provided with the distribution Copyright (c) 2000 University of California, San Diego =head1 SEE ALSO STAR::Parser, STAR::DataBlock, STAR::Dictionary. =cut STAR-Parser-0.59/lib/STAR/DataBlock.pm100444 37455 1276 42703 7322633314 15772 0ustar wbluhmrcsbpackage STAR::DataBlock; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK ); use Storable; $VERSION = '0.58'; # $Id: DataBlock.pm,v 1.6 2001/07/10 17:05:51 wbluhm Exp $ RCS Identification #################### # Constructor: new # #################### # Overloaded constructor: # # The no-arg constructor is called internally by parse in STAR::Parser # # $entry = STAR::DataBlock->new; # # To retrieve an already stored DataBlock, use this in an application: # # $data = STAR::DataBlock->new("file"); sub new { my ($proto, @parameters) = @_; my $class = ref($proto) || $proto; my $file; my $self; $file = shift @parameters unless $#parameters; # # the above is executed if and only if $#parameters == 0, # which means only if exactly one parameter is being passed # (in "unnamed parameters" style) while ($_ = shift @parameters) { $file = shift @parameters if /-file/; } if ( $file ) { $self = retrieve($file); } else { $self = {}; bless ($self,$class); } return $self; } ###################################### # Private object method: _all_tokens # ###################################### # This method was moved into Parser.pm # as a class method with version 0.58 ######################################## # Private object method: _tokens_check # ######################################## # This method (which had not been implemented yet) # would also have to become a class method in Parser. ############################# # Object method: add_quotes # ############################# # Note: This method is called by STAR:Writer->write_cif # # There may or may not be any need for # explicit user calls to this method. sub add_quotes { my ($self,@parameters) = @_; my ($d,$s,$c,$i); #data, save, category, item my ($n,$value,$log); foreach $d ( keys %{$self->{DATA}} ) { foreach $s ( keys %{$self->{DATA}{$d}} ) { foreach $c ( keys %{$self->{DATA}{$d}{$s}} ) { foreach $i ( keys %{$self->{DATA}{$d}{$s}{$c}} ) { foreach $n ( 0..$#{$self->{DATA}{$d}{$s}{$c}{$i}} ) { $value = $self->{DATA}{$d}{$s}{$c}{$i}[$n]; if ( $self->{DATA}{$d}{$s}{$c}{$i}[$n] =~ /(\n)/s ) { #if line break $self->{DATA}{$d}{$s}{$c}{$i}[$n] =~ s/^([^;].*)/;\n$1;\n/s; #if no leading ; $self->{DATA}{$d}{$s}{$c}{$i}[$n] =~ s/^;\n\n(.*)/;\n$1/s; #remove leading #blank line } elsif ( $self->{DATA}{$d}{$s}{$c}{$i}[$n] =~ /(\s+)/ ) { #if white space $self->{DATA}{$d}{$s}{$c}{$i}[$n] =~ s/^([^"'].*)/"$1"/s; #if no #leading quote } elsif ( $self->{DATA}{$d}{$s}{$c}{$i}[$n] =~ /^(_.*)/ ) { $self->{DATA}{$d}{$s}{$c}{$i}[$n] = '"'.$1.'"'; } } } } } } return $self; } ################################ # Object method: get_item_data # ################################ sub get_item_data { my ($self,@parameters) = @_; my ($d,$s,$c,$i); $d = $self->title; #default data block $s = '-'; #default save block $i = shift @parameters unless $#parameters; while ($_ = shift @parameters) { $d = shift @parameters if /-datablock/; $s = shift @parameters if /-save/; $i = shift @parameters if /-item/; } if ( $i =~ /^(\S+?)\./ ) { $c = $1; } else { $c = '-'; } return if (! exists $self->{DATA}{$d}{$s}{$c}{$i}); return @{$self->{DATA}{$d}{$s}{$c}{$i}}; } # both insert_category and insert_item may be unnecessary methods ################################## # Object method: insert_category # ################################## sub insert_category { my ($self, @parameters) = @_; my ($d, $s, $c); $d = $self->title; # default data block $s = '-'; # default save block $c = shift @parameters unless $#parameters; # single "unnamed" parameter while ( $_ = shift @parameters ) { $d = shift @parameters if /-datablock/; $s = shift @parameters if /-save/; $c = shift @parameters if /-cat/; } if ( exists $self->{DATA}{$d}{$s}{$c} ) { # category already exists # do nothing } else { $self->{DATA}{$d}{$s}{$c} = {}; #just an empty addition to the hash #no data yet print "inserted category $c\n"; } return; } ############################## # Object method: insert_item # ############################## sub insert_item { my ($self, @parameters) = @_; my ($d, $s, $c, $i); $d = $self->title; # default data block $s = '-'; # default save block $i = shift @parameters unless $#parameters; # single "unnamed" parameter while ( $_ = shift @parameters ) { $d = shift @parameters if /-datablock/; $s = shift @parameters if /-save/; $i = shift @parameters if /-item/; } if ( $i =~ /^(\S+?)\./ ) { $c = $1; } else { $c = '-'; } #has the category already been created? if ( ! exists $self->{DATA}{$d}{$s}{$c} ) { print "category $c doesn't exist\n"; $self->insert_category( -datablock=>$d, -save=>$s, -cat=>$c ); } #has the item been created before? if ( exists $self->{DATA}{$d}{$s}{$c}{$i} ) { # item already exists # do nothing } else { $self->{DATA}{$d}{$s}{$c}{$i} = (); # empty array, still no data } return; } ################################ # Object method: set_item_data # ################################ sub set_item_data { my ($self, @parameters) = @_; my ($d, $s, $c, $i, $data_ref); $d = $self->title; # default data block $s = '-'; # default save block #no single "unnamed" parameter in this case #need at least -item and -dataref while ( $_ = shift @parameters ) { $d = shift @parameters if /-datablock/; $s = shift @parameters if /-save/; $i = shift @parameters if /-item/; $data_ref = shift @parameters if /-dataref/; } if ( $i =~ /^(\S+?)\./ ) { $c = $1; } else { $c = '-'; } #does the item exist? if ( ! exists $self->{DATA}{$d}{$s}{$c}{$i} ) { $self->insert_item( -datalblock=>$d, -save=>$s, -item=>$i ); } #now add the data $self->{DATA}{$d}{$s}{$c}{$i} = $data_ref; return; } ########################### # Object method: get_keys # ########################### sub get_keys { my ($self,@parameters) = @_; my ($d, $s, $c, $i, $log); my $keys = ''; $keys .= "data\tsave\n"; $keys .= "block\tblock\tcateg.\titem\n"; $keys .= "----------------------------\n\n"; foreach $d ( sort keys %{$self->{DATA}} ) { $keys .= "$d\n"; foreach $s ( sort keys %{$self->{DATA}{$d}} ) { $keys .= "\t$s\n"; foreach $c ( sort keys %{$self->{DATA}{$d}{$s}} ) { $keys .= "\t\t$c\n"; foreach $i ( sort keys %{$self->{DATA}{$d}{$s}{$c}} ) { $keys .= "\t\t\t$i\n"; } } } } return $keys; } ############################ # Object method: get_items # ############################ sub get_items { my $self = shift; my ($d,$s,$c,$i); my (@items); foreach $d ( sort keys %{$self->{DATA}} ) { foreach $s ( sort keys %{$self->{DATA}{$d}} ) { foreach $c ( sort keys %{$self->{DATA}{$d}{$s}} ) { foreach $i ( sort keys %{$self->{DATA}{$d}{$s}{$c}} ) { push @items,$i; } } } } return @items; } ################################# # Object method: get_categories # ################################# sub get_categories { my $self = shift; my ($d, $s, $c); my (@cats); foreach $d ( sort keys %{$self->{DATA}} ) { foreach $s ( sort keys %{$self->{DATA}{$d}} ) { foreach $c ( sort keys %{$self->{DATA}{$d}{$s}} ) { push @cats,$c; } } } return @cats; } ################################# # Object method: get_attributes # ################################# sub get_attributes { my $self = shift; my $string; $string .= $self->{TITLE}; $string .= " (dictionary)" if ($self->{TYPE} eq 'dictionary'); $string .= "\n"; $string .= "File: ".$self->{FILE}." "; $string .= "Lines: ".$self->{STARTLN}; $string .= " to ".$self->{ENDLN}."\n"; return $string; } ################################# # Object methods: # # file_name, title, type, # # starting_line, ending_line # ################################# ############# # file_name # ############# sub file_name { my ($self,@parameters) = @_; $self->{FILE} = shift @parameters unless $#parameters; while ($_ = shift @parameters ) { $self->{FILE} = shift @parameters if /-file/; } return $self->{FILE}; } ######### # title # ######### sub title { my ($self,@parameters) = @_; $self->{TITLE} = shift @parameters unless $#parameters; while ($_ = shift @parameters ) { $self->{TITLE} = shift @parameters if /-title/; } return $self->{TITLE}; } ######## # type # ######## sub type { my ($self,@parameters) = @_; $self->{TYPE} = shift @parameters unless $#parameters; while ($_ = shift @parameters ) { $self->{TYPE} = shift @parameters if /-type/; } return $self->{TYPE}; } ################# # starting_line # ################# sub starting_line { my ($self,@parameters) = @_; $self->{STARTLN} = shift @parameters unless $#parameters; while ($_ = shift @parameters ) { $self->{STARTLN} = shift @parameters if /-startln/; } return $self->{STARTLN}; } ############### # ending_line # ############### sub ending_line { my ($self,@parameters) = @_; $self->{ENDLN} = shift @parameters unless $#parameters; while ($_ = shift @parameters ) { $self->{ENDLN} = shift @parameters if /-endln/; } return $self->{ENDLN}; } 1; __END__ =head1 NAME STAR::DataBlock - Perl extension for handling DataBlock objects created by STAR::Parser. =head2 Version This documentation refers to version 0.58 of this module. =head1 SYNOPSIS use STAR::DataBlock; $data_obj = STAR::DataBlock->new(-file=>$ARGV[0]); #retrieves stored file $attributes = $data_obj->get_attributes; print $attributes; @items = $data_obj->get_items; foreach $item ( @items ) { @item_data = $data_obj->get_item_data( -item=>$item ); $count{ $_ } = $#item_data + 1; # do something else (hopefully more useful) with @item_data... } =head1 DESCRIPTION This package contains class and object methods for dealing with DataBlock objects created by STAR::Parser. They include methods for such tasks as reading objects from disk, querying their data structures or writing DataBlock objects as STAR compliant files. All methods support a "named parameters" style for passing arguments. If only one argument is mandatory, then it may be passed in either a "named parameters" or "unnamed parameters" style, for example: $data_obj->get_item_data( -file=>$file, -save=>'-' ); or: $data_obj->get_item_data( -file=>$file ); or: $data_obj->get_item_data( $file ); # all of the above are the same, since -save=>'-' is the default # and therefore only one parameter needs to be specified # in "named" or "unnamed" parameter style Some methods may be invoked with on C<$options> string. Currently, only one option is supported: l writes program activity log to STDERR Future versions may support additional options. =head1 CONSTRUCTOR =head2 new Usage: $data_obj = STAR::DataBlock->new(); #creates new object $data_obj = STAR::DataBlock->new( -file=>$file ); #retrieves previously OR: $data_obj = STAR::DataBlock->new( $file ); #stored object Overloaded constructor. Called as a no-arg constructor internally by STAR::Parser. May be called with a C<$file> argument to retrieve an object previously stored with store (see below). =head1 OBJECT METHODS =head2 store Usage: $data_obj->store($file); Saves a DataBlock object to disk. This method is in Storable. =head2 get_item_data Usage: @item_data = $data_obj->get_item_data(-item=>$item[, -save=>$save_block]); Example: -------- my @names=$data_obj-> get_item_data(-item=>"_citation_author.name"); print $names[0],"\n"; #prints first citation author name This object method returns all the data for a specified item. If the C<-save> parameter is omitted, it is assumed that the item is not in a save block (i.e. C<$save='-'>). This is always the case in data files, since they do not contain save blocks. However, this class is sub-classed by STAR::Dictionary, where items may be in save blocks. The data is returned as an array, which holds one or more scalars. =head2 get_keys Usage: $keys = $data_obj->get_keys; Returns a string with a hierarchically formatted list of hash keys (data blocks, save blocks, categories, and items) found in the data structure of the DataBlock object. =head2 get_items Usage: @items = $data_obj->get_items; Returns an array with all the items present in the DataBlock. =head2 get_categories Usage: @categories = $data_obj->get_categories; Returns an array with all the categories present in the DataBlock. =head2 insert_category Usage: $data_obj->insert_category( -cat=>$cat[, -save=>$save] ); Inserts the category C<$cat> into the data structure. The default save block (if none is specified) is C<'-'>. =head2 insert_item Usage: $data_obj->insert_item( -item=>$item[, -save=>$save] ); Inserts the item C<$item> into the data structure. The default save block (if none is specified) is C<'-'>. =head2 set_item_data Usage: $data_obj->set_item_data( -item=>$item, -dataref=>$dataref[, -save=>$save] ); Sets the data of the item C<$item> to the array of data referenced by C<$dataref>. If the C<-save> parameter is omitted, the save block defaults to C<'-'>. This is always correct for data blocks. In a dictionary (which inherits from DataBlock), the save block C<'-'> contains information pertaining to the dictionary itself. =head2 Object attributes The following five methods set or retrieve attributes of a DataBlock object. In the set mode (with argument), these methods are called internally to set the attributes of a DataBlock object. In the retrieve mode (without arguments) these methods may also be called by a user to retrieve object attributes (see the above examples). =head2 file_name Usage: $data_obj->file_name($name); #set mode $name = $data_obj->file_name; #get mode Name of the file in which the DataBlock object was found =head2 title Usage: $data_obj->title($title); #set mode $title = $data_obj->title; #get mode Title of the DataBlock object =head2 type Usage: $data_obj->type($type); #set mode $type = $data_obj->type; #get mode Type of data contained (always 'data' for a DataBlock object, but 'dictionary' for an object in the sub class STAR::Dictionary) =head2 starting_line Usage: $data_obj->starting_line($startln); #set mode $startln = $data_obj->starting_line; #get mode Line number where data block started in the file =head2 ending_line Usage: $data_obj->ending_line($endln); #set mode $endln = $data_obj->ending_line; #get mode Line number where data block ended in the file =head2 get_attributes Usage: $info = $data_obj->get_attributes; Returns a string containing a descriptive list of attributes of the DataBlock object. Two examples of output: RCSB011457 File: native/1fbm.cif Lines: 1 to 5294 cif_mm.dic (dictionary) File: dictionary/mmcif_dict.txt Lines: 89 to 38008 =head1 COMMENTS This module provides no error checking of files or objects, either against the dictionary, or otherwise. Dictionary information is not currently used in the parsing of files by STAR::Parser. So, for example, information about parent-child relationships between items is not present in a DataBlock object. Functionality related to these issues is being provided in additional modules, such as STAR::Checker, and STAR::Filter. =head1 AUTHOR Wolfgang Bluhm, mail@wbluhm.com =head2 Acknowledgments Thanks to Phil Bourne, Helge Weissig, Anne Kuller, Doug Greer, Michele Bluhm, and others for support, help, and comments. =head1 COPYRIGHT A full copyright statement is provided with the distribution Copyright (c) 2000 University of California, San Diego =head1 SEE ALSO STAR::Parser, STAR::Dictionary. =cut STAR-Parser-0.59/lib/STAR/Dictionary.pm100444 37455 1276 5073 7302532150 16224 0ustar wbluhmrcsbpackage STAR::Dictionary; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK ); use STAR::DataBlock; @ISA = qw( STAR::DataBlock ); $VERSION = '0.56'; # $Id: Dictionary.pm,v 1.2 2000/12/19 22:54:56 helgew Exp $ RCS Identification #################### # Constructor: new # #################### sub new { my ($proto, @parameters) = @_; my $class = ref($proto) || $proto; my $file; my $self; $file = shift @parameters unless $#parameters; while ($_ = shift @parameters) { $file = shift @parameters if /-file/; } if ( $file ) { $self = Storable::retrieve($file); } else { $self = {}; bless ($self,$class); } return $self; } ################################## # Object method: get_save_blocks # ################################## sub get_save_blocks { my $self = shift; my ($d, $s); my (@save_blocks); foreach $d ( sort keys %{$self->{DATA}} ) { foreach $s ( sort keys %{$self->{DATA}{$d}} ) { push @save_blocks,$s; } } return @save_blocks; } 1; __END__ =head1 NAME STAR::Dictionary - Perl extension for handling dictionaries that were parsed from STAR compliant files. =head2 Version This documentation refers to version 0.56 of this module. =head1 SYNOPSIS use STAR::Dictionary; $dict_obj = STAR::Dictionary->new(-file=>$file); @items_in_dict = $dict_obj->get_save_blocks; =head1 DESCRIPTION This package contains class and object methods for Dictionary objects created by STAR::Parser. This class is a sub class of STAR::DataBlock. It supports all methods from STAR::DataBlock (see related documentation), as well as the additional method get_save_blocks. =head1 OBJECT METHODS =head2 get_save_blocks Usage: @save_blocks = $dict_obj->get_save_blocks; This methods returns an array with all save_ blocks found in the Dictionary object. Each item defined in the dictionary is described within a save block. In addition, items pertaining to the dictionary itself (such as _dictionary.version) are found outside of save blocks in the dictionary file. In the data structure of a Dictionary object, these items are gathered in a C<$s='-'> save block. =head1 AUTHOR Wolfgang Bluhm, mail@wbluhm.com =head2 Acknowledgments Thanks to Phil Bourne, Helge Weissig, Anne Kuller, Doug Greer, Michele Bluhm, and others for support, help, and comments. =head1 COPYRIGHT A full copyright statement is provided with the distribution Copyright (c) 2000 University of California, San Diego =head1 SEE ALSO STAR::Parser, STAR::DataBlock. =cut STAR-Parser-0.59/lib/STAR/Parser.pm100444 37455 1276 36724 10035303005 15412 0ustar wbluhmrcsbpackage STAR::Parser; use STAR::DataBlock; use STAR::Dictionary; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '0.59'; # $Id: Parser.pm,v 1.6 2004/04/08 17:03:43 wbluhm Exp $ RCS identification #################### # Constructor: new # #################### sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless ($self,$class); return $self; } ####################### # Class method: parse # ####################### sub parse { my ($self,@parameters) = @_; my ($file,$dict,$options); $options = ''; $file = shift @parameters unless $#parameters; while ($_ = shift @parameters) { $file = shift @parameters if /-file/; $dict = shift @parameters if /-dict/; $options = shift @parameters if /-options/; } my ($d, $s, $c, $i); # data and save blocks # category, item my ($n, $m); # loop counters my ($flag); my ($debug, $log); my (@entries, $entry); my (@cats_in_loop, @items_in_loop); my ($line_nums_ref, $flags_ref, $tokens_ref); my $token; # Here, "token" shall mean an item name (e.g. _atom.id), # or an item value, (5 examples: 1 value 'a value' . ? ) # or a value over several lines delimited by semicolons. $d = 'untitled'; # default (if no data block) $s = '-'; # default (if not in save block) $debug = 1 if ( $options =~ /d/ ); $log =1 if ( $options =~ /l/ ); ################## ### tokenizing ### ################## print STDERR "tokenizing complete file\n" if ( $log ); ($line_nums_ref, $flags_ref, $tokens_ref) = STAR::Parser->_all_tokens(-file=>$file); ### check integrity of token list -- pre-parsing check ### # this had not been implemented yet, but # would now have to be a class method in STAR::Parser if ($debug) { print STDERR "Start of tokens\n"; foreach $n (0.. $#$tokens_ref) { print STDERR "next token: ",$$flags_ref[$n], " ",$$tokens_ref[$n],"\n"; } print STDERR "End of tokens\n"; } # default data block (if no data_ in file, e.g. for ERF files) $entry = STAR::DataBlock->new; $entry->file_name($file); $entry->type('data'); $entry->title('untitled'); $entry->starting_line(1); push @entries, $entry; ############### ### parsing ### ############### until ( (shift @$flags_ref) eq 'eof' ) { $token = shift @$tokens_ref; print STDERR "next token: $token\n" if ($debug); if ( $token =~ /^data_(.*)/ ) { #data block $d = $1; $s = '-'; # default (if not in save block) print STDERR "New data block: $token\n" if ($debug); # create new "entry object" (DataBlock or Dictionary) # --------------------------------------------------- if ( $dict ) { $entry = STAR::Dictionary->new; $entry->type('dictionary'); } else { $entry = STAR::DataBlock->new; $entry->type('data'); } $entry->file_name($file); $entry->title($1); $entry->starting_line( shift @$line_nums_ref ); # next data block line number push @entries, $entry; print STDERR "parsing ",$entry->{TITLE},"\n" if ( $log ); next; } if ( $token =~ /^save_(\S+)/ ) { #save block $s = $1; print STDERR "save block: $s\n" if ($debug); } elsif ( $token =~ /^save_$/ ) { #end of save block $s = '-'; } if ( $token =~ /^loop_/ ) { #loop block print STDERR "started loop\n" if ($debug); $flag = shift @$flags_ref; $token = shift @$tokens_ref; @cats_in_loop = (); @items_in_loop = (); while ( $flag eq 'i' ) { # need to check for $flag since _something could have # also been a value (in quotes) if ( $token =~ /^(_\S+?)\.\S+/ ) { # DDL2: _category.item $c = $1; } else { # DDL1: no notion of category $c = '-'; } print STDERR "token (item) in loop: ", "$token\n" if ($debug); push @cats_in_loop, $c; push @items_in_loop, $token; $flag = shift @$flags_ref; $token = shift @$tokens_ref; } $m=0; until ( $flag ) { #if it's NOT a value, it's got a flag foreach $n (0..$#items_in_loop) { print STDERR "token (value) in loop: ", "$token\n" if ($debug); $entry->{DATA}{$d}{$s}{$cats_in_loop[$n]} {$items_in_loop[$n]}[$m] = $token; $flag = shift @$flags_ref; if ( $flag && ( $n < $#items_in_loop ) ) { die "fatal parsing error in category $cats_in_loop[$n]\n"; } $token = shift @$tokens_ref; } $m++; } print STDERR "finished loop\n" if ($debug); print STDERR "last token (to be recycled): ", "$token\n" if ($debug); # the last token was out of 'loop_' # and needs to be recycled at the top unshift @$flags_ref, $flag; unshift @$tokens_ref,$token; } elsif ( $token =~ /^_\S+/ ) { $i = $token; if ( $token =~ /^(_\S+?)\.\S+/ ) { # DDL2: _category.item $c = $1; } else { # DDL1: no notion of category $c = '-'; } $flag = shift @$flags_ref; if ( $flag ) { die "fatal parsing error in category $c\n"; } $token = shift @$tokens_ref; #this one must be a value! print STDERR "next token (value): ", "$token\n" if ($debug); $entry->{DATA}{$d}{$s}{$c}{$i}[0] = $token; } } if ($#entries > 0) { # if there is more than one entry shift @entries; # discard the default "untitled" entry } # add ending line number attributes my @ending_lines; foreach $entry ( @entries ) { push @ending_lines, ( $entry->starting_line() - 1 ); } shift @ending_lines; # first one didn't make sense push @ending_lines, ( shift @$line_nums_ref ); # last one is last line number foreach $entry ( @entries ) { $entry->ending_line( shift @ending_lines ); } if ( $log ) { foreach $entry ( @entries ) { print STDERR $entry->get_attributes; } } return @entries; } ##################################### # Private class method: _all_tokens # ##################################### # This method was moved from DataBlock to Parser in version 0.58 sub _all_tokens { my ($self, @parameters) = @_; my ($file); $file = shift @parameters unless $#parameters; while ($_ = shift @parameters) { $file = shift @parameters if /-file/; } my $multi_flag=0; my ($lines, $token, $rest); my (@line_nums, @flags, @tokens); open (IN, "<$file") or die "Can't open file $file"; while () { if ($multi_flag == 1) { if ( /^;\s(.*)/s ) { $multi_flag=0; #one value (w/o semicolons) push @flags, ''; push @tokens, $lines; #no flag $_ = $1; # continue with rest of line # closing semicolon does not have to be on line by itself } elsif ( /^;/ ) { $multi_flag=0; #one value (w/o semicolons) push @flags, ''; push @tokens, $lines; #no flag next; } else { $lines .= $_; #append next; } } elsif ( /^;(.*)/s ) { $multi_flag=1; #start $lines = $1; #newline still on next; } while ( /\S/ ) { last if ( /^\s*#/ ); if ( /^\s*["']/s ) { /^\s*(["'])(.*?)\1\s(.*)/s; #stuff in quotes is one token push @flags, ''; #it's a value, so no flag push @tokens, $2; $_ = $3; } elsif ( /^\s*(\S+)(.*)/s ) { #one token $token = $1; push @tokens, $token; $_ = $2; unless ( $token =~ /_/ ) { push @flags, ''; #without '_' certainly a value next; } if ( $token =~ /^_/ ) { push @flags, 'i'; #item } elsif ( $token =~ /^loop_/ ) { #loop push @flags, 'l'; } elsif ( $token =~ /^save_/ ) { #save push @flags, 's'; } elsif ( $token =~ /^data_/ ) { #data push @flags, 'd'; push @line_nums, $.; # next data block line number } else { push @flags, ''; #an unquoted value with '_' } } } } push @flags, 'eof'; # 'eof' added as last flag # thus there should always be one more flag push @line_nums, $. ; # last line number close (IN); return (\@line_nums, \@flags, \@tokens); } ####################################### # Private class method: _find_entries # ####################################### # This method has been obsoleted in version 0.58. # Since 0.58, files are no longer pre-parsed # for data blocks, since it does not allow # for proper functional assignment of all # 'data' strings. 1; __END__ =head1 NAME STAR::Parser - Perl extension for parsing STAR compliant files (with no nested loops). =head2 Version This documentation refers to version 0.59 of this module. =head1 SYNOPSIS use STAR::Parser; ($data) = STAR::Parser->parse('1fbm.cif'); ($dict) = STAR::Parser->parse(-file=>'mmcif_dict', -dict=>1, -options=>'l'); #logs activity =head1 DESCRIPTION STAR::Parser is one of several related Perl modules for parsing STAR compliant files (such as CIF and mmCIF files). Currently, these modules include STAR::Parser, STAR::DataBlock, STAR::Dictionary, STAR::Writer, STAR::Checker, and STAR::Filter. STAR::Parser is the parsing module, with the class method parse for parsing any STAR compliant files or dictionaries, as long as they do B contain nested loops (i.e., only B level of loop is supported). Upon parsing of a file, an array of DataBlock objects is returned (one for each data_ entry in the file). The class STAR::DataBlock contains object methods for these objects. STAR::DataBlock is automatically accessible through STAR::Parser. Upon parsing of a dictionary (indicated with the C<-dict=E1> parameter), an array of Dictionary objects is returned. STAR::Dictionary is a sub-class of STAR::DataBlock. The methods of this module and the accompanying modules (STAR::DataBlock, STAR::Checker, etc.) support "named parameters" style for passing arguments. If only one argument is mandatory, then it may be passed in either a "named parameters" or "unnamed parameters" style, for example: @objs = STAR::Parser->parse( -file=>$file, -options=>'d' ); #debugging @objs = STAR::Parser->parse( -file=>$file ); #no options or: @objs = STAR::Parser->parse( $file ); =head1 CLASS METHODS =head2 parse Usage: @objs = STAR::Parser->parse(-file=>$file[, -dict=>1, -options=>$options]); or: @objs = STAR::Parser->parse($file); Examples: 1) @objs = STAR::Parser->parse('1fbm.cif'); $data = $objs[0]; OR: ($data) = STAR::Parser->parse('1fbm.cif'); 2) @objs = STAR::Parser->parse('7files.txt'); foreach $obj (@objs) { # do something, see STAR::DataBlock } 3) @objs = STAR::Parser->parse(-file=>'mmcif_dict', -dict=>1, -options=>'l'); #logs activity $dict = @objs[0]; This method first searches the file and creates a DataBlock object for each data_ identifier found in the file. If no data_ identifier is found, then only one DataBlock object will be created (with C<$d='untitled'>, see below). If parse is invoked with the C<-dict=E1> option, then a Dictionary object is created for each data_ identifier found. Next, the method populates the data structure of each DataBlock or Dictionary object. The parsed data may be queried or accessed by object methods of the STAR::DataBlock and STAR::Dictionary modules. See the documentation for STAR::DataBlock and STAR::Dictionary. The method always returns an array of objects, even if it contains only one object (if there is only one data_ block in the file). Internally, the parsed data is stored in a multidimensional hash with keys for data blocks (C<$d>), save blocks (C<$s>), categories (C<$c>), and items (C<$i>). For a file, C<$s> will always be C<'-'>, since there are no save blocks in files. For a dictionary, C<$s> will be C<'-'> outside of save_ blocks, and C<'CATEGORY'> or C<'_item'> inside save_CATEGORY or save__item blocks (capitalization depends on the user's dictionary.) If a file is parsed that contains no data_ identifier, then C<$d> becomes C<'untitled'>. C<$c> refers to a category, such as _atom_site and C<$i> refers to an item, such as _atom_site.id. The method may be invoked with an $options string. These options are the following letters which may be concatenated in any order: d writes debugging output to STDERR l writes program activity log to STDERR =head1 COMMENTS This module provides no error checking of files or objects, either against the dictionary, or otherwise. While the module is applicable to parsing either a file or a dictionary, dictionary information is not currently used in the parsing of files. So, for example, information about parent-child relationships between items is not present in a DataBlock object. Functionality related to these issues is being provided in additional modules such as STAR::Checker, and STAR::Filter. =head1 AUTHOR Wolfgang Bluhm, mail@wbluhm.com =head2 Acknowledgments Thanks to Phil Bourne, Helge Weissig, Anne Kuller, Doug Greer, Michele Bluhm, and others for support, help, and comments. =head1 COPYRIGHT A full copyright statement is provided with the distribution Copyright (c) 2000 University of California, San Diego =head1 SEE ALSO STAR::DataBlock, STAR::Dictionary. =cut STAR-Parser-0.59/lib/STAR/Filter.pm100444 37455 1276 14554 7302532150 15370 0ustar wbluhmrcsbpackage STAR::Filter; use STAR::DataBlock; use STAR::Dictionary; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '0.01'; # $Id: Filter.pm,v 1.2 2000/12/19 22:54:56 helgew Exp $ RCS Identification #################### # Constructor: new # #################### sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless ($self,$class); return $self; } ############################# # Class method: filter_dict # ############################# # a simple interactive method which goes through # the dictionary category by category and prompts # the user whether to keep/include it # This method is preliminary and should be considered # subject to change # this filters/reduces the in-memory data representation (.cob) # of the dictionary, not the dictionary file (.cif) sub filter_dict { my ($self, @parameters) = @_; my ($dict, $dict_filtered, $options); while ($_ = shift @parameters) { $dict = shift @parameters if /-dict/; $options = shift @parameters if /-options/; } my ($d, $s, $c, $i); #data, save, category, item my (@saves, $save); my ( %keep_cat_lookup, $incl ); $dict_filtered = STAR::Dictionary->new; $dict_filtered->{TITLE} = ($dict->{TITLE})."_filtered"; $dict_filtered->{TYPE} = $dict->{TYPE}; $dict_filtered->{FILE} = $dict->{FILE}; $dict_filtered->{STARTLN} = $dict->{STARTLN}; $dict_filtered->{ENDLN} = $dict->{ENDLN}; print $dict->get_attributes; #build up keep_cat_lookup foreach $d ( keys %{$dict->{DATA}} ) { foreach $s ( sort keys %{$dict->{DATA}{$d}} ) { if ( $s eq "-" ) { #dictionary itself (no save block) $keep_cat_lookup{lc($s)} = $s; } elsif ( $s !~ /\./ ) { #it's a category, not an item print "Category $s -- include? (y/n)"; $incl = ; chomp $incl; if ( $incl =~ /y/ ) { $keep_cat_lookup{lc($s)} = $s; #hash lookup #lower case => original } } } } #filter dictionary according to keep_cat_lookup hash foreach $d ( keys %{$dict->{DATA}} ) { foreach $s ( keys %{$dict->{DATA}{$d}} ) { if ( $s !~ /\./ && $keep_cat_lookup{lc($s)} ) { #save block that's a category to be included foreach $c ( keys %{$dict->{DATA}{$d}{$s}} ) { foreach $i ( keys %{$dict->{DATA}{$d}{$s}{$c}} ) { $dict_filtered->{DATA}{$d}{$s}{$c}{$i} = $dict->{DATA}{$d}{$s}{$c}{$i}; } } } if ( $s =~ /^_(\S+)\./ && $keep_cat_lookup{lc($1)} ) { #save block that's an item in a category to be included foreach $c ( keys %{$dict->{DATA}{$d}{$s}} ) { foreach $i ( keys %{$dict->{DATA}{$d}{$s}{$c}} ) { $dict_filtered->{DATA}{$d}{$s}{$c}{$i} = $dict->{DATA}{$d}{$s}{$c}{$i}; } } } } $dict_filtered->{DATA}{$d}{"-"}{"_dictionary"} {"_dictionary.version"}[0] .= "_filtered"; } return $dict_filtered; } ##################################### # Class method: filter_through_dict # ##################################### sub filter_through_dict { my ($self,@parameters) = @_; my ($data, $out, $dict, $options); while ($_ = shift @parameters) { $data = shift @parameters if /-data/; $dict = shift @parameters if /-dict/; $options = shift @parameters if /-options/; } my ($d,$s,$c,$i); # data, save, category, item my (@items); my ($dict_item, @dict_items, %dict_lookup); @items = $data ->get_items; @dict_items = $dict->get_save_blocks; foreach $dict_item (@dict_items) { $dict_lookup{lc($dict_item)} = $dict_item; } $out = STAR::DataBlock->new; $out->{TITLE} = $data->{TITLE}; $out->{TYPE} = $data->{TYPE}; $out->{FILE} = $data->{FILE}; $out->{STARTLN} = $data->{STARTLN}; $out->{ENDLN} = $data->{ENDLN}; foreach $d ( keys %{$data->{DATA}} ) { foreach $s ( keys %{$data->{DATA}{$d}} ) { foreach $c ( keys %{$data->{DATA}{$d}{$s}} ) { foreach $i ( keys %{$data->{DATA}{$d}{$s}{$c}} ) { if ( $dict_lookup{lc($i)} ) { $out ->{DATA}{$d}{$s}{$c}{$i} = $data->{DATA}{$d}{$s}{$c}{$i}; } } } } } return $out; } 1; __END__ =head1 NAME STAR::Filter - Perl extension for filtering DataBlock objects =head2 Version This documentation refers to version 0.01 of this module. =head1 SYNOPSIS use STAR::Filter; =head1 DESCRIPTION Contains the filter object for filtering DataBlock objects. DataBlock objects are created by Parser and modified by DataBlock. =head1 CLASS METHODS =head2 filter_dict Usage: $filtered_dict = STAR::Filter->filter_dict( -dict=>$dict, -options=>$options); A (very simplistic) interactive method for filtering a STAR::Dictionary object (.cob file). The user is prompted for each category whether to include (retain) it in the filtered object. The method returns a reference to the filtered (reduced) STAR::Dictionary object. Note: This method is preliminary and subject to change. =head2 filter_through_dict Usage: $filtered_data = STAR::Filter->filter_through_dict( -data=>$data, -dict=>$dict, -options=>$options); Filters an STAR::DataBlock object through a STAR::Dictionary object. Returns a reference to a new STAR::DataBlock object in which only those items are included which were defined in the specified dictionary. =head1 AUTHOR Wolfgang Bluhm, mail@wbluhm.com =head2 Acknowledgments Thanks to Phil Bourne, Helge Weissig, Anne Kuller, Doug Greer, Michele Bluhm, and others for support, help, and comments. =head1 COPYRIGHT A full copyright statement is provided with the distribution Copyright (c) 2000 University of California, San Diego =head1 SEE ALSO STAR::Parser, STAR::DataBlock, STAR::Dictionary. =cut STAR-Parser-0.59/Makefile.PL100444 37455 1276 476 7302530715 14203 0ustar wbluhmrcsbuse ExtUtils::MakeMaker; require Config; $Verbose = 1; WriteMakefile( NAME => 'STAR::Parser', VERSION_FROM => 'lib/STAR/Parser.pm' ); package MY; sub libscan { my($self, $path) = @_; return '' if $path =~ m:\bRCS/:; return '' if $path =~ m/~$/; return '' if $path =~ m/Makefile\~?$/; $path; }