DBIx-DBStag-0.12/0000755000076500000240000000000011331570203012114 5ustar cainstaffDBIx-DBStag-0.12/cgi-bin/0000755000076500000240000000000011331570203013424 5ustar cainstaffDBIx-DBStag-0.12/cgi-bin/ubiq.cgi0000755000076500000240000005044111326157220015063 0ustar cainstaff#!/usr/local/bin/perl -w BEGIN{ eval{do "dbenv.pl"}; die $@ if $@; }; # end of sub: use strict; use lib split(/:/, $ENV{STAGLIB} || ''); use IO::String; use DBIx::DBStag; use CGI qw/:standard/; use vars qw(%IS_FORMAT_FLAT $cscheme); #$ENV{DBSTAG_TRACE}=1; # -------------------------- # MAIN ubiq(); exit 0; # -------------------------- # ++++++++++++++++++++++++++++++++++++++++++++++++++ # ubiq # # This is the core function. It does everything # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub ubiq { # ============================================= # DECLARE VARIABLES # note: the functions below are lexically closed # and can thus access these variables. # # if you're not familiar with closures you might # find this a bit confusing... # ============================================= %IS_FORMAT_FLAT = map {$_=>1} qw(flat-CSV flat-TSV flat-HTML-table); $cscheme = { 'keyword'=>'cyan', 'variable'=>'magenta', 'text' => 'reset', 'comment' => 'red', 'block' => 'blue', 'property' => 'green', }; my $cgi = CGI->new; my $sdbh = DBIx::DBStag->new; # child dbh my $dbh; my $stag; my $res; my $schema; my $loc; my $templates = []; my $varnames = []; my $example_input = {}; my $options = {}; my $nesting = ''; my $rows; my $template; my $template_name = ''; my %exec_argh = (); my $resources = $sdbh->resources_list; my $resources_hash = $sdbh->resources_hash; my @dbresl = grep {$_->{type} eq 'rdb'} @$resources; my @dbnames = (map {$_->{name}} @dbresl); my $W = Data::Stag->getformathandler('sxpr'); my $ofh = \*STDOUT; my $format; my $dbname; my $errmsg = ''; # ++++++++++++++++++++++++++++++++++++++++++++++++++ # keep # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub keep; # *keep = sub { join('&', map {"$_=".param(myescapeHTML($_))} grep {param($_)} qw(dbname template format save mode)); }; # end of sub: keep # ++++++++++++++++++++++++++++++++++++++++++++++++++ # url # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub url; # *url = sub { my $base = shift; my %p = @_; %p = map { my $v = param($_); $p{$_} ? ($_ => $p{$_}) : ($v ? ($_=>$v) : ()); } (keys %p, qw(dbname template format save mode)); return "$base?". join('&', map {"$_=".$p{$_}} keys %p); }; # end of sub: url # ++++++++++++++++++++++++++++++++++++++++++++++++++ # conn # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub conn; # *conn = sub { $dbh = DBIx::DBStag->connect($dbname) unless $dbh; }; # end of sub: conn # ++++++++++++++++++++++++++++++++++++++++++++++++++ # is_format_flat # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub is_format_flat; # *is_format_flat = sub { # my $f = shift; $IS_FORMAT_FLAT{$format}; }; # end of sub: is_format_flat # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # BASIC LAYOUT # # headers, footers, help, etc # # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # ++++++++++++++++++++++++++++++++++++++++++++++++++ # g_title # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub g_title; # *g_title = sub { "U * B * I * Q"; }; # end of sub: g_title # ++++++++++++++++++++++++++++++++++++++++++++++++++ # short_intro # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub short_intro; # *short_intro = sub { "This is the generic UBIQ interface"; }; # end of sub: short_intro # ++++++++++++++++++++++++++++++++++++++++++++++++++ # top_of_page # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub top_of_page; # *top_of_page = sub { (h1(g_title), href("ubiq.cgi", "Ubiq"), ' | ', href("ubiq.cgi?help=1", "Help"), br, href('#templates', '>>Templates'), br, short_intro, hr, ); }; # end of sub: top_of_page # ++++++++++++++++++++++++++++++++++++++++++++++++++ # footer # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub footer; # *footer = sub { (hr, href('http://stag.sourceforge.net'), br, myfont('$Id: ubiq.cgi,v 1.8 2004/04/12 18:23:10 cmungall Exp $x', (size=>-2)), ); }; # end of sub: footer # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # VIEW WIDGETS # # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # ++++++++++++++++++++++++++++++++++++++++++++++++++ # template_detail # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub template_detail; # *template_detail = sub { my $templates = shift; my @tbls = map { my $io = IO::String->new; $_->show($io, $cscheme, \&htmlcolor); my $sr = $io->string_ref; ('name), table({-border=>1}, Tr( [td(["
$$sr
"])]))) } @$templates; return '
'.join("\n", @tbls); }; # end of sub: template_detail # ++++++++++++++++++++++++++++++++++++++++++++++++++ # stag_detail # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub stag_detail; # *stag_detail = sub { # my $W = Data::Stag->getformathandler($format || 'sxpr'); # $stag->events($W); # my $out = $W->popbuffer; my $out = $stag->generate(-fmt=>$format); return resultbox($out); }; # end of sub: stag_detail # ++++++++++++++++++++++++++++++++++++++++++++++++++ # rows_detail # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub rows_detail; # *rows_detail = sub { if ($format eq 'flat-HTML-table') { my $hdr = shift @$rows; h2('Results'). table({-border=>1, -bgcolor=>'yellow'}, Tr({}, [th([@$hdr]), map {td([map {colval2cell($_)} @$_])} @$rows])); } else { my $j = "\t"; if ($format eq 'flat-CSV') { $j = ','; } my $out = join("\n", map { join($j, map {escape($_, ("\n"=>'\n', $j=>"\\$j"))} @$_) } @$rows); resultbox($out); } }; # end of sub: rows_detail # ++++++++++++++++++++++++++++++++++++++++++++++++++ # query_results # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub query_results; # *query_results = sub { ( ($stag ? stag_detail() : ''), ($rows ? rows_detail() : ''), ); }; # end of sub: query_results # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # CHOOSERS # # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # ++++++++++++++++++++++++++++++++++++++++++++++++++ # template_chooser # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub template_chooser; # *template_chooser = sub { #my $templates = shift; return table(Tr({-valign=>"TOP"}, [ map { my $is_selected = $_->name eq $template_name; my $h = {}; if ($is_selected) { $h = {bgcolor=>'red'} } my $desc = $_->desc; my $name = $_->name; my $nl = "\n"; $desc =~ s/\n/\/gs; td($h, [ href("#$name", '[scroll]'), # href("#$name", '[view]'), href(url('ubiq.cgi', (template=>$name)), strong($name)), $desc.hr, ]) } @$templates, ])); }; # end of sub: template_chooser # ++++++++++++++++++++++++++++++++++++++++++++++++++ # attr_settings # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub attr_settings; # *attr_settings = sub { return unless $template; my @vals = (); my @popups = (); my @extra = (); my $basic_tbl = table(Tr({}, [ map { my $examples = ''; my $ei = $example_input->{$_} || []; while (length("@$ei") > 100) { pop @$ei; } if (@$ei) { $examples = " Examples: ".em(join(', ', @$ei)); } td([$_, textfield("attr_$_").$examples]) } @$varnames ])); my $adv_tbl = table(Tr({}, [td([ join(br, "Override SQL SELECT:", textarea(-name=>'select', -cols=>80, ), "Override SQL WHERE:", textarea(-name=>'where', -cols=>80, ), "Override Full SQL Query:", textarea(-name=>'sql', -cols=>80, ), "Use nesting hierarchy:", textarea(-name=>'nesting', -cols=>80, ), ) ])])); return ( hr, "Selected Template: ", strong($template_name), br, submit(-name=>'submit', -value=>'exectemplate'), $basic_tbl, $adv_tbl, # table({-border=>1}, # Tr({-valign=>"TOP"}, # [td([ # ])])), ("Tree/Flat format: ", popup_menu(-name=>'format', -values=>[qw(sxpr itext XML nested-HTML flat-TSV flat-CSV flat-HTML-table)]), checkbox(-name=>'save', -value=>1, -label=>'Save Results to Disk'), ' ', checkbox(-name=>'showsql', -value=>1, -label=>'Show SQL Statement'), ), br, submit(-name=>'submit', -value=>'exectemplate'), hr); }; # end of sub: attr_settings # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # SETTERS # # these set variables depending on users selections # # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # ++++++++++++++++++++++++++++++++++++++++++++++++++ # setdb # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub setdb; # *setdb = sub { #$dbname = shift; return unless $dbname; msg("Set dbname to $dbname"); $res = $resources_hash->{$dbname}; if ($res) { $schema = $res->{schema} || ''; $loc = $res->{loc} || ''; msg("loc: $loc") if $loc; if ($schema) { $templates = $sdbh->find_templates_by_schema($schema); msg("schema: $schema"); } else { msg("schema not known; templates unrestricted"); $templates = $sdbh->template_list; } msg("Templates available: " . scalar(@$templates)); } else { warnmsg("Unknown $dbname"); } $res; }; # end of sub: setdb # ++++++++++++++++++++++++++++++++++++++++++++++++++ # settemplate # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub settemplate; # *settemplate = sub { my $n = shift; my @matches = grep {$_->name eq $n} @$templates; die "looking for $n, got @matches" unless @matches == 1; $template = shift @matches; $varnames = $template->get_varnames; conn; my $cachef = "./cache/cache-$dbname-$n"; $example_input = $template->get_example_input($dbh, $cachef, 1); system("chmod 777 $cachef"); $template_name = $n; }; # end of sub: settemplate # ++++++++++++++++++++++++++++++++++++++++++++++++++ # resultbox # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub resultbox; # *resultbox = sub { my $out = shift; if (param('save')) { return $out; } h2('Results'). table({-border=>1}, Tr({}, td({bgcolor=>"yellow"},["
$out
"]))); }; # end of sub: resultbox # ++++++++++++++++++++++++++++++++++++++++++++++++++ # msg # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub msg; # *msg = sub { }; # end of sub: msg # ++++++++++++++++++++++++++++++++++++++++++++++++++ # htmlcolor # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub htmlcolor; # *htmlcolor = sub { my $c = shift; if ($c eq 'reset') { ''; } else { ""; } }; # end of sub: htmlcolor # ++++++++++++++++++++++++++++++++++++++++++++++++++ # display_helppage # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub display_helppage; # *display_helppage = sub { print(header, start_html("UBIQ: Instructions"), h1("UBIQ: Instructions for use"), h3("What is this?"), p("UBIQ is a generic interface to any relational database.", "It allows web-based queries either through SQL or an", "extensible set of",strong("SQL Templates"), "which must be defined for the database of interest."), p("UBIQ will take the SQL query results and make a", strong("hierarchical"), "data structure which is displayed", "in a format such as XML or indented text"), p("This is achieved using the ", href("http://stag.sourceforge.net", "DBStag"), "perl module"), p("UBIQ is intended for advanced, generic queries.", "If you want user-friendly queries you should use an", "interface that has been custom-designed for the database", "you are interested in."), h3("Using UBIQ"), p("First of all select the database of interest and", "click 'selectdb'. (There may only be one database,", "in which case you can skip this part)."), p("Next, choose a template from the list available for", "That database. Each template should have a description of", "what kind of query it is. You can also scroll down to the full", "SQL Template definition. For a description of the SQL Template", "syntax, see", href("http://stag.sourceforge.net", "Stag Documentation"), ), p("After you have selected a template, you can paste in settings", "for template attributes.", "The character '*' gets treated as a wildcard.", ), p("You can now choose a format for the results.", "Most of the formats are ", strong("hierarchical."), "if a hierarchical format is selected, then UBIQ will", "perform a transformation on the flat, tabular query results", "and build a tree-type structure that should reflect the", "natural organisation of the data."), p("Hierarchical formats are XML, sxpr (Lisp S-Expressions),", "itext (indented text)."), p("Non-hierarchical formats are tables of comma/tab seperated fields,", "which can optionally formatted into an HTML table"), p("You can also choose to see the actual SQL that gets executed"), p("When you have set the parameters, you can execute the template"), p(em("Note"), "As yet, UBIQ has no means of prioritising queries,", "it is possible to launcg queries that put a large load on the", "server, please be careful"), p( "If you receive an internal server error it probably means your query was terminated", "because it was not fully constrained. If this happens, pass in more constraints.", "DO NOT keep hitting reload - this will cause the database server to slow down.", "If this service becomes overloaded, it will have to be removed" ), h3("Advanced use"), p("Yes, a SOAP interface would be nice. No plans as yet.", ), p(href("ubiq.cgi", "Start UBIQ")), ); }; # end of sub: display_helppage # ++++++++++++++++++++++++++++++++++++++++++++++++++ # display_htmlpage # # MAIN PAGE # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub display_htmlpage; # *display_htmlpage = sub { print( header, start_html(g_title), top_of_page, start_form(-action=>'ubiq.cgi', -method=>'GET'), # DATABASE SELECTION ("Database", popup_menu(-name=>'dbname', -values=>[sort {$a cmp $b} @dbnames], -onChange=>"submit()", ), submit(-name=>'submit', -value=>"selectdb")), # QUERY RESULTS - if present (query_results), # ERRORS - if present ($errmsg), # ATTRIBUTE CHOOSER - if template is set (attr_settings(), ($template ? template_detail([$template]) : ''), hr), # TEMPLATE CHOOSER (h3("Choose a template:"), template_chooser, hr), # TEMPLATES - all or just selected ($template ? '' : template_detail($templates)), # PERSISTENT VARS hidden('template', param('template')), end_form, footer, ); }; # end of sub: display_htmlpage # ================================ # # SETTING THINGS UP # # ================================ my @initfuncs = (); *add_initfunc = sub { push(@initfuncs, shift); }; add_initfunc(sub { $format = param('format') || 'sxpr'; $dbname = param('dbname'); if (@dbnames == 1) { # only one to choose from; autoselect $dbname = $dbnames[0]; } setdb; # sets $dbh # sets $template $varnames settemplate(param('template')) if param('template') && param('submit') ne 'selectdb'; # set variable bindings foreach (@$varnames) { my $v = param("attr_$_"); if ($v) { $v =~ s/\*/\%/g; $exec_argh{$_} = $v; } } }); if (-f 'ubiq-customize.pl') { eval `cat ubiq-customize.pl`; die $@ if $@; } $_->() foreach @initfuncs; # execute query if ($template && param('submit') eq 'exectemplate') { eval { conn(); my $no_query_params_set = !scalar(keys %exec_argh); if (param('where')) { $template->set_clause(where=>param('where')); $no_query_params_set = 0; } if (param('select')) { $template->set_clause(where=>param('select')); } if ($no_query_params_set) { $errmsg = h2("No Query Constraints Set"); } else { # kill killer queries my $tag = "kill$$"."TAG"; my $tagf = "/tmp/$tag"; my $t=time; print STDERR "Launched killer $tagf at $t\n"; system("touch $tagf && chmod 777 $tagf && sleep 15 && test -f $tagf && kill -9 $$ && rm $tagf &"); if (is_format_flat) { $rows = $dbh->selectall_rows( -template=>$template, -bind=>\%exec_argh, ); } else { $stag = $dbh->selectall_stag( -template=>$template, -bind=>\%exec_argh, -nesting=>$nesting, ); } # inactivate killer (killer only kills if $tagf is present) $t=time; print STDERR "Inactivated killer $tagf at $t\n"; system("rm $tagf &"); } }; if ($@) { my $err = $@; $errmsg = br.strong("Database Error:")."
$err
"; } } # WRITE HTML TO BROWSER if (param('save')) { # WRITE TO FILE print(header({-type=>"text/text"}), query_results); } if (param('help')) { # WRITE TO FILE display_helppage } else { # WRITE HTML display_htmlpage; } } # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # CGI UTILITY FUNCTIONS # # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # ++++++++++++++++++++++++++++++++++++++++++++++++++ # href # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub href { my $url = shift; my $n = shift || $url; "
$n"; } # end of sub: href # ++++++++++++++++++++++++++++++++++++++++++++++++++ # myfont # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub myfont ($%) { my $str = shift; my %h = @_; sprintf("$str", join(' ', map {sprintf('%s="%s"', $_, $h{$_})} keys %h)); } # end of sub: myfont # ++++++++++++++++++++++++++++++++++++++++++++++++++ # escape # # escapes characters using a map # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub escape ($@) { my $s = shift || ''; my %cmap = @_; $cmap{'\\'} = '\\\\'; my @from = keys %cmap; my @to = map{$cmap{$_}} @from; my $f = join('', @from); my $t = join('', @to); $s =~ tr/$f/$t/; $s; } # end of sub: escape # ++++++++++++++++++++++++++++++++++++++++++++++++++ # myescapeHTML # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub myescapeHTML ($) { my $s = shift; return $s; } # end of sub: myescapeHTML # ++++++++++++++++++++++++++++++++++++++++++++++++++ # colval2cell # # # ++++++++++++++++++++++++++++++++++++++++++++++++++ sub colval2cell ($) { my $cell = shift; if (!defined($cell)) { return 'NULL'; } $cell; } # end of sub: colval2cell DBIx-DBStag-0.12/Changes0000644000076500000240000000544111326157220013417 0ustar cainstaffChangelog for DBIx::DBStag Version 0.01 ============ 2003-04-20 - Initial Release Version 0.02 ============ 2004-04-01 - Addition of Templates - Use of double underscore to force columns into table Version 0.03 ============ 2004-04-02 - Problem with 0.02 distribution (accidentally included blib) Version 0.04 ============ 2004-07-15 - Fixed storenode() so that it works with databases that use the primary_key="id" convention (eg GO DB) - added code to deal with in storenode - DBStag now used for loading data into GO DB (http://www.godatabase.org) Version 0.05 ============ 2004-10-01 - Added switches for caching and bulkloading Version 0.06 ============ 2004-10-29 - Efficiency improvements Version 0.07 ============ 2005-02-09 - [cjm] added cache option to storenode.pl - escapes tabs and newlines with -rows switch - added check-resources.pl 2005-03-06 - [cjm] added support for loading XORT-style XML - nodes are only made dbsafe if requested 2005-03-18 - [cjm] added include_metadata option 2005-04-29 - [cjm] added tests for xort-mode: chado feature Version 0.08 ============ 2005-07-15 - [cjm] removed noaliases option replaced with aliaspolicy [cjm] fixed IN and multivalued bindings [cjm] stag-storenode.pl arguments for user and password 2005-08-22 - [cjm] added stag-connect-params.pl 2005-08-26 - [cjm] Pg insert uses SEQUENCE to fetch inserted serial pk (mysql still uses max(pk)) 2005-10-28 - [cjm] DB2 DBI string fixes [cjm] improvements to stag-qsh 2006-03-11 - [cjm] back to using last_insert_id from mysql (now works on DBI/DBD::mysql) Version 0.09 ============ 2006-08-08 - [cjm] XORT ID fix 2006-08-08 - [cjm] added more docs to stag-storenode.pl - cache options 2007-08-08 - [cjm] allowed additional DBI arguments in DBSTAG_DBIMAP_FILE 2007-08-29 - [cjm] changed to DBIx::DBSchema 0.34 interface (now requires 0.34) Version 0.10 ============ 2008-02-05 - [cjm] removed default assumption that entire colset can function as UC 2008-02-05 - [cjm] use placeholders for inserts 2008-06-03 - [cjm] added missing test file to MANIFEST Version 0.11 ============ 2009-12-14 - miration to github 2010-01-21 - compensated from bug(?) in DBSchema 0.38, where default values of '' were being quoted, resulting in double quoting DBSchema 0.38 also appears to use refs for defaults. DBStag is now neutral w.r.t. version of DBSchema, it derefs if it is a reference. DBIx-DBStag-0.12/DBIx/0000755000076500000240000000000011331570203012702 5ustar cainstaffDBIx-DBStag-0.12/DBIx/DBStag/0000755000076500000240000000000011331570203014006 5ustar cainstaffDBIx-DBStag-0.12/DBIx/DBStag/Constraint.pm0000644000076500000240000000627511331570152016505 0ustar cainstaff# $Id: Constraint.pm,v 1.9 2007/03/05 09:12:49 cmungall Exp $ # ------------------------------------------------------- # # Copyright (C) 2003 Chris Mungall # # This module is free software. # You may distribute this module under the same terms as perl itself #--- # POD docs at end of file #--- package DBIx::DBStag::Constraint; use strict; use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS $DEBUG $AUTOLOAD); use Carp; use DBI; use Data::Stag qw(:all); use DBIx::DBStag; $VERSION='0.12'; sub DEBUG { $DBIx::DBStag::DEBUG = shift if @_; return $DBIx::DBStag::DEBUG; } sub trace { my ($priority, @msg) = @_; return unless $ENV{DBSTAG_TRACE}; print "@msg\n"; } sub dmp { use Data::Dumper; print Dumper shift; } sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless $self, $class; $self->bindings({}); $self->children([]); $self; } # takes a simple hash of bindings # and makes an ANDed set of constraints sub from_hash { my $class = shift; my $h = shift; my $cons = $class->new; $cons->bool('and'); $cons->children([ map { my $sc = $class->new; $sc->bindings({$_ => $h->{$_}}); $sc; } keys %$h ]); return $cons; } sub throw { my $self = shift; my $fmt = shift; print STDERR "\nERROR:\n"; printf STDERR $fmt, @_; print STDERR "\n"; confess; } #AND, OR, NOT sub bool { my $self = shift; $self->{_bool} = shift if @_; return $self->{_bool}; } # eg AND(cons1, cons2, cons3) sub children { my $self = shift; $self->{_children} = shift if @_; return $self->{_children}; } sub is_leaf { my $self = shift; return !scalar(@{$self->children}); } # value to replace => in option block with sub op { my $self = shift; $self->{_op} = shift if @_; return $self->{_op}; } # variable bindings - hash of varname => varval sub bindings { my $self = shift; $self->{_bindings} = shift if @_; return $self->{_bindings}; } 1; __END__ =head1 NAME DBIx::DBStag::Constraint - =head1 SYNOPSIS =cut =head1 DESCRIPTION A constraint is a recursive structure for representing query constraints; AND --- x=1 \ \---- OR --- y>2 \ \----- boolfunc(z) A constraint is either a bool (AND, OR, NOT) with >0 children, or it can be a leaf node representing an atomic constraint the constraint corresponds to a SQLTemplate option block; eg [ name LIKE &name& ] [ name => &name& ] [ start > &min_start& ] [ start => &start& ] [ somefunc(&x&) ] [ somefunc(&x&, &y&) ] [ somefunc(t.col, &x&, &y&) => &z& ] the constraint consists of an operator (represented by => in the option block). If no => is present, then it is a simple variable binding. A constraint can consist of multiple variable bindings =head1 WEBSITE L =head1 AUTHOR Chris Mungall > =head1 COPYRIGHT Copyright (c) 2003 Chris Mungall This module is free software. You may distribute this module under the same terms as perl itself =cut 1; DBIx-DBStag-0.12/DBIx/DBStag/Cookbook.pm0000644000076500000240000002440211326157220016120 0ustar cainstaff =head1 NAME DBIx::DBStag::Cookbook - building and querying databases from XML =head1 SYNOPSIS stag-autoddl.pl stag-storenode.pl selectall_xml.pl =head1 DESCRIPTION This will give an outline of how to build a normalised relational database from XML source data, set up SQL templates, issue relational queries that return hierarchical results (as XML or as perl objects), and autogenerate a web query front end for this data. Why would you want to do this? Well, it gives you the full power of the relational model and SQL, combined with the convenience of representations which allow for the nesting of data entities (SQL query results are typically flat relations which are inconvenient for complex hierarchical data). The dataset we will use is the CIA World Factbook. The web interface should end up looking something like this - L =head2 AUTOGENERATING A RELATIONAL DATABASE Download CIA world factbook in XML format; this has kindly been made available by The University of Goettingen as part of their Mondial database project, see L for details. The actual XML file is available via L Or from L =head3 Pre-processing We need to do some pre-processing of the XML to make it more database-friendly. This is necessitated by the way Stag handles attributes (Stag prefers XML documents that have a simple tree format). We also want to turn XXX_id fields into XXX_ciaid, because we prefer to use XXX_id for surrogate keys in the database. stag-mogrify.pl -w xml -r 's/text$/quant/'\ -r 's/id$/ciaid/'\ -r 's/(.*)\-//'\ cia.xml > cia-pp.xml See also L =head3 Generating the SQL DDL Next we generate the SQL B statements stag-autoddl.pl -t cia-pp2.xml cia-pp.xml > cia-schema.sql This does further post-processing of the XML, to make it suitable for relational storage; see the file B which is generated as a side-effect of running the above script. Load the database (the following instructions assume you have postgresql on your localhost; please consult your DBMS manual if this is not the case) createdb cia psql -a -e cia < cia-schema.sql >& create.log (check there are no errors in the log file) =head3 LOAD THE DATA Turn the processed XML into relations: stag-storenode.pl -d dbi:Pg:dbname=cia cia-pp2.xml >& load.log =head2 FETCHING TREE DATA USING SQL You can issue SQL queries (using optional stag-specific extensions) and get the results back in a hierarchical format such as XML =head3 SQL to XML via the command line Fetch countries nested under continents: selectall_xml.pl -d dbi:Pg:dbname=cia\ "SELECT * FROM continent INNER JOIN country ON (continent.name=country.continent)" Or, edit a file containing the SQL (the following query fetches data on countries bordering other countries on different continents) cat > myquery.sql select c1.*, c2.* from country AS c1 inner join borders on (c1.country_id = borders.country_id) inner join country AS c2 on (borders.country=c2.ciaid) where c1.continent != c2.continent order by c1.name, c2.name use nesting (set(c1(c2))); (the final clause is a DBStag SQL extension - it nests country c2 under country c1) Then query for XML selectall_xml.pl -d dbi:Pg:dbname=cia -f myquery.sql > myresults.xml =head3 SQL to XML via the Interactive Query Shell Query the data using the stag query shell (qsh). You type in SQL queries, and get results back as XML (or any other tree format, such as indented text or S-Expressions). The following can be cut and pasted directly onto the unix command line: Simple query rooted at B: stag-qsh -d dbi:Pg:dbname=cia \l SELECT * FROM country INNER JOIN country_coasts USING (country_id) WHERE country.name = 'France'; (type \q to quit stag-qsh) Or a more advanced query, still rooted at B stag-qsh -d dbi:Pg:dbname=cia \l SELECT * FROM country LEFT OUTER JOIN religions USING (country_id) LEFT OUTER JOIN languages USING (country_id) INNER JOIN continent ON (continent.name=country.continent) WHERE continent.ciaid = 'australia' USE NESTING (set(country(religions)(languages)(continent))); See L for more details on fetching hierarchical data from relational database =head2 USING TEMPLATES If you have a particular pattern of SQL you execute a lot, you can reuse this SQL by creating B =head3 Creating Templates First create a place for your templates: mkdir ./templates (do not change directory after this) The following command specifies a colon-separated path for directories containing templates (all templates must end with .stg) setenv DBSTAG_TEMPLATE_DIRS ".:templates:/usr/local/share/sql/templates" Auto-generate templates (you can customize these later): stag-autoschema.pl -w sxpr cia-pp2.xml > cia-stagschema.sxpr stag-autotemplate.pl -no_pp -s cia -dir ./templates cia-stagschema.sxpr The first command creates an S-Expression representation of the Schema; the second generates SQL templates from these. You may wish to examine a template: more templates/cia-country.stg You can hand generate as many templates as you like; see L for more details For more example templates for this schema, see L =head3 Executing Templates from the Command Line now execute a template from the command line: selectall_xml.pl -d dbi:Pg:dbname=cia /cia-country country_name=Austria You should get back a tree (rooted in B), that looks similar to this: 3 federal republic 8023244 83850 Austria 2.3 ... 1 German 100 3 ... =head3 Executing Templates with the Stag Query Shell You can also do this interactively using qsh First, we need to inform stag-qsh what the schema is. The schema is used to determine which templates are appropriate. Later we will discover how to set up a resources file, which will allow stag to infer the schema. Call qsh from command line: stag-qsh -d dbi:Pg:dbname=cia -s cia Interactive perl/qsh: \l t cia-country /borders_country=cid-cia-Germany (do not leave spaces at the beginning of the line) The above should fetch all countries bordering Germany If we prefer objects over hierarchical formats such as XML, we can do this using perl. For example, to print the religions of spanish speaking countries: Still in qsh (multi-line mode), type the following: # find all Spanish-speaking countries $dataset = $dbh->selectall_stag(-template=>'cia-country',-bind=>{languages_name=>'Spanish'}); # get country objects from query results @lcountry = $dataset->get_country; foreach $country (@lcountry) { printf("Country: %s\n Religions:%s\n", $country->sget_name, join(' & ', map { $_->get_name.' '.$_->get_quant.'%' } $country->get_religions)) } print "\n\nDone!\n"; \q See L for more details on using Stag objects =head2 BUILDING A CGI/WEB INTERFACE We can construct a generic but powerful default cgi interface for our data, using ubiq.cgi, which should come with your distribution. You may have to modify some of the directories below, depending on your web server set up (we assume Apache here). We want to create the CGI, and give it access to our templates: mkdir /usr/local/httpd/cgi-bin/cia cp templates/*.stg /usr/local/httpd/cgi-bin/cia cp `which ubiq.cgi` /usr/local/httpd/cgi-bin/cia chmod +x /usr/local/httpd/cgi-bin/cia/ubiq.cgi mkdir /usr/local/httpd/cgi-bin/cia/cache chmod 777 /usr/local/httpd/cgi-bin/cia/cache Set up the environment for the CGI script. It must be able to see the templates and the necessary perl libraries (if not installed system-wide) cat > /usr/local/httpd/cgi-bin/cia/dbenv.pl $ENV{DBSTAG_DBIMAP_FILE} = "./resources.conf"; $ENV{DBSTAG_TEMPLATE_DIRS} = ".:./templates:/usr/local/share/sql/templates"; $ENV{STAGLIB} = "/users/me/lib/DBIx-DBStag:/users/me/lib/stag"; We must create a basic resources file, currently containing one db: cat > /usr/local/httpd/cgi-bin/cia/resources.conf cia rdb Pg:cia schema=cia Fields are whitespace delimited; do not leave a space before the initial 'cia' (note that if you set DBSTAG_DBIMAP_FILE to the avove file on the command line, you can use the shortened name of B instead of B) You should be able to use the interface via http://localhost/cgi-bin/cia/ubiq.cgi You can customize this by overriding some of the existing display functions; cat > /usr/local/httpd/cgi-bin/cia/ubiq-customize.pl # --- CUSTOM SETTINGS { no warnings 'redefine'; *g_title = sub { "U * B * I * Q - CIA World Factbook"; }; *short_intro = sub { "Demo interface to CIA World Factbook" }; add_initfunc(sub { $dbname = 'cia'; $schema = 'cia'; }); } From here on you can customise the web interface, create new templates, integrate this with other data. Consult L and the script B for further details. =head2 FURTHER EXPLORATION This cookbook has focused on an example with relatively simple XML, with only a few layers of nesting. There is a more complex example you can download from the Mondial project site here: L This also integrates data on cities, which increases the depth of the XML tree. You could use the tutorial above to try and turn this XML into a database. =head1 WEBSITE L =head1 AUTHOR Chris Mungall cjm at fruitfly dot org =head1 COPYRIGHT Copyright (c) 2002 Chris Mungall This module is free software. You may distribute this module under the same terms as perl itself =cut DBIx-DBStag-0.12/DBIx/DBStag/SQLTemplate.pm0000644000076500000240000005050711331570165016515 0ustar cainstaff# $Id: SQLTemplate.pm,v 1.28 2007/03/05 09:12:49 cmungall Exp $ # ------------------------------------------------------- # # Copyright (C) 2003 Chris Mungall # # This module is free software. # You may distribute this module under the same terms as perl itself #--- # POD docs at end of file #--- package DBIx::DBStag::SQLTemplate; use strict; use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS $DEBUG $AUTOLOAD); use Carp; use DBI; use Data::Stag qw(:all); use DBIx::DBStag; use DBIx::DBStag::Constraint; use Text::Balanced qw(extract_bracketed); #use SQL::Statement; use Parse::RecDescent; $VERSION='0.12'; our @CLAUSE_ORDER = ('select', 'from', 'where', 'group', 'order', 'having'); sub DEBUG { $DBIx::DBStag::DEBUG = shift if @_; return $DBIx::DBStag::DEBUG; } sub trace { my ($priority, @msg) = @_; return unless $ENV{DBSTAG_TRACE}; print STDERR "@msg\n"; } sub dmp { use Data::Dumper; print Dumper shift; } sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless $self, $class; $self->cached_sth({}); $self; } sub name { my $self = shift; $self->{_name} = shift if @_; return $self->{_name}; } sub fn { my $self = shift; $self->{_fn} = shift if @_; return $self->{_fn}; } sub sth { my $self = shift; $self->{_sth} = shift if @_; return $self->{_sth}; } sub cached_sth { my $self = shift; $self->{_cached_sth} = shift if @_; return $self->{_cached_sth}; } sub sql_clauses { my $self = shift; $self->{_sql_clauses} = shift if @_; return $self->{_sql_clauses}; } sub set_clause { my $self = shift; my $ct = lc(shift); my $v = shift; $v =~ s/^ *//; my $add = 0; if ($v =~ /\+(.*)/) { $v = $1; $add = 1; } if ($ct eq 'order' || $ct eq 'group') { $v = "BY $v" unless $add; } my $is_set = 0; my $clauses = $self->sql_clauses; my @corder = @CLAUSE_ORDER; my @nu_clauses = (); foreach my $clause (@$clauses) { my $n = lc($clause->{name}); next unless $n; if ($n eq $ct) { if ($add && $clause->{value}) { # $clause->{value} .= " and $v"; $clause->{value} .= " $v"; } else { $clause->{value} = $v; } $is_set = 1; } CORDER: while (@corder) { my $next = $corder[0]; if (!$is_set && $next eq $ct) { $is_set = 1; push(@nu_clauses, {name=>uc($ct), value=>$v}); } last CORDER if $next eq $n; shift @corder; } push(@nu_clauses,$clause); } @$clauses = @nu_clauses; $self->throw("Cannot set $ct") unless $is_set; return; } sub properties { my $self = shift; $self->{_properties} = shift if @_; return $self->{_properties}; } sub property_hash { my $self = shift; my $pl = $self->{_properties}; my %h = (); foreach (@$pl) { push(@{$h{$_->{name}}},$_->{value}); } return \%h; } sub schema { my $sl = shift->property_hash->{schema} || []; $sl->[0]; } sub stag_props { my $self = shift; $self->{_stag_props} = shift if @_; return $self->{_stag_props}; } sub desc { my $self = shift; my $P = $self->properties || []; my ($p) = grep {$_->{name} =~ /^desc/} @$P; return $p->{value} if $p; } sub nesting { my $self = shift; my $sql_clauses = $self->sql_clauses; my ($use) = grep {$_->{name} =~ /use/i} @$sql_clauses; my $nesting; if ($use) { my $v = $use->{value}; $v =~ s/\s*nesting\s*//i; $nesting = Data::Stag->parsestr($v); } return $nesting; } sub get_example_input { my $self = shift; my $dbh = shift; my $cachefile = shift; my $refresh = shift; my %ei = (); if ($cachefile && -f $cachefile && !$refresh) { my $fh = FileHandle->new($cachefile) || $self->throw("cannot open $cachefile"); while(<$fh>) { chomp; my ($n, @v) = split(/\t/, $_); $ei{$n} = \@v; } $fh->close; } my $P = $self->properties || []; my @E = map {$_->{value}} grep {$_->{name} =~ /^example_input/} @$P; foreach my $e (@E) { if ($e =~ /(\S+)\s*=\>\s*(.*)/) { my $n = $1; my $v = $2; my @parts = split(/,\s*/, $v); $ei{$n} = []; while (my $part = shift @parts) { if ($part =~ /select/i) { my $sql = "$part @parts LIMIT 5"; my $examples = []; if (!$dbh) { # no connection last; } eval { $examples = $dbh->selectcol_arrayref($sql); push(@{$ei{$n}}, grep {length($_) < 32} @$examples); }; if ($@) { $self->throw("Problem with template - invalid example_input: $e"); } @parts = (); } else { push(@{$ei{$n}}, $part); } } } } if ($cachefile) { my $fh = FileHandle->new(">$cachefile") || $self->throw("cannot write to $cachefile"); foreach my $n (keys %ei) { print $fh join("\t", $n, @{$ei{$n}}), "\n"; } $fh->close || $self->throw; } return \%ei; } # --------------------------------- # given a template and a binding, this will # create an SQL statement and a list of exec args # - the exec args correspond to ?s in the SQL # # for example WHERE foo = &foo& # called with binding foo=bar # # will become WHERE foo = ? # and the exec args will be ('bar') # # if the template contains option blocks eg # # WHERE [foo = &foo&] # # then the part in square brackets will only be included if # there is a binding for variable foo # # if multiple option blocks are included, they will be ANDed # # # if this idiom appears # # WHERE foo => &foo& # # then the operator used will either be =, LIKE or IN # depending on the value of the foo variable # # if the foo variable contains % it will be LIKE # if the foo variable contains an ARRAY it will be IN # # (See DBI manpage for discussion of placeholders) sub get_sql_and_args { my $self = shift; my $bind = shift || {}; my $where_blocks = $self->split_where_clause; my $varnames = $self->get_varnames; # ORDERED list of variables in Q my %argh = (); my ($sql, @args); my $where; # binding can be a simple array of VARVALs if ($bind && ref($bind) eq 'ARRAY') { # assume that the order of arguments specified is # the same order that appears in the query for (my $i=0; $i<@$bind; $i++) { if (!$varnames->[$i]) { my $n=$i+1; my $c = @$varnames; $self->throw("Argument number $n ($bind->[$i]) has no place ". "to bind; there are only $c variables in the ". "template"); } # relate ordering of exec args via ordering of variables in # template; store in a hash $argh{$varnames->[$i]} = $bind->[$i]; } } if ($bind && ref($bind) eq 'HASH') { # binding is already specified as a hash - no need to convert %argh = %$bind; my %varnameh = map {$_=>1} @$varnames; my @bad = grep {!$varnameh{$_}} keys %argh; if (@bad) { $self->throw("param(s) not recognised: @bad\nValid params:\n".join("\n",map {" $_"}@$varnames)); } } if ($bind && ref($bind) eq "DBIx::DBStag::Constraint") { # COMPLEX BOOLEAN CONSTRAINTS - TODO my $constr; $constr = $bind; ($where, @args) = $self->_get_sql_where_and_args_from_constraints($constr); } else { # simple rules for substituting variables ($where, @args) = $self->_get_sql_where_and_args_from_hashmap(\%argh); } my $sql_clauses = $self->sql_clauses; $sql = join("\n", map { if (lc($_->{name}) eq 'where') { if ($where) { "WHERE $where"; } else { ''; # no WHERE clause } } else { "$_->{name} $_->{value}"; } } @$sql_clauses); return ($sql, @args); } # takes a simple set of hash variable bindings, and # a set of option blocks [...][...] # # generates SQL for every block required, replaces with # DBI placeholders, and returns SQL plus DBI execute args list sub _get_sql_where_and_args_from_hashmap { my $self = shift; my %argh = %{shift || {}}; my $where_blocks = $self->split_where_clause; # sql clauses to be ANDed my @sqls = (); # args to be fed to DBI execute() [corresponds to placeholder ?s] my @args = (); # index of variables replaced by ?s my $vari = 0; NEXT_BLOCK: foreach my $wb (@$where_blocks) { my $where = $wb->{text}; my $varnames = $wb->{varnames}; # trace(0, "WHEREBLOCK: $where;; VARNAMES=@$varnames;;\n"); my $str = $where; while ($str =~ /(=>)?\s*\&(\S+)\&/) { my $op = $1 || ''; my $varname = $2; my $argval = $argh{$varname}; if (!exists $argh{$varname}) { # if a variable is not set in a particular block, # that block is ignored, and does not form # part of the final query next NEXT_BLOCK; } if ($op) { $op = '= '; if ($argval =~ /\%/) { $op = ' LIKE '; } } my $replace_with; # replace arrays with IN (1,2,3,...) if (ref($argval)) { $replace_with = '('.join(',', map {_quote($_)} @$argval).')'; $op = ' IN '; } else { $replace_with = '?'; $args[$vari] = $argval; $vari++; } $str =~ s/(=>)?\s*\&$varname\&/$op$replace_with/; } push(@sqls, $str); } my $sql = join(' AND ', @sqls); trace(0, "WHERE:$sql"); return ($sql, @args); } # takes complex boolean constraints and generates SQL sub _get_sql_where_and_args_from_constraints { my $self = shift; my $constr = shift; if ($constr->is_leaf) { my $where_blocks = $self->split_where_clause; die("TODO"); } else { my $bool = $constr->bool; my $children = $constr->children; my @all_args = (); my @sqls = (); foreach my $child (@$children) { my ($sql, @args) = $self->_get_sql_where_and_args($constr); push(@sqls, $sql); push(@all_args, @args); } my $sql = '('.join(" $bool ", @sqls).')'; return ($sql, @all_args); } $self->throw("ASSERTION ERROR"); } # splits a WHERE clause with option blocks [ x=&x& ] [ y=&y& and z=&z& ] into # blocks, and attaches the variable names to the block sub split_where_clause { my $self = shift; my $sql_clauses = $self->sql_clauses; my $sql = ''; my ($clause) = grep {lc($_->{name}) eq 'where'} (@$sql_clauses); my $where = $clause->{value} || ''; my $vari = 0; my %vari_by_name = (); # this subroutine take a where block, checks if it contains # one or more patterns # foo.bar => &baz& # and adds 'baz' to the list of variable names my $sub = sub { my $textin = shift; return unless $textin; my $str = $textin; my @varnames = (); while ($str =~ /(=>)?\s*\&(\S+)\&/) { my $op = $1 || ''; my $varname = $2; push(@varnames, $varname); $str =~ s/(=>)?\s*\&$varname\&//; } return {text=>$textin, varnames=>\@varnames} }; my @constrs = (); while (1) { my ($extracted, $remainder, $skip) = extract_bracketed($where, '[]'); $extracted ||= ''; $remainder ||= ''; trace(0, "extraction:( E='$extracted', R='$remainder', SKIP='$skip' )\n"); $remainder =~ s/^\s+//; $remainder =~ s/\s+$//; $skip =~ s/^\s+//; $skip =~ s/\s+$//; push(@constrs, $sub->($skip)); if ($extracted) { $extracted =~ s/^\s*\[//; $extracted =~ s/\]\s*$//; push(@constrs, $sub->($extracted)); } else { push(@constrs, $sub->($remainder)); last; } $where = $remainder; } @constrs = grep {$_} @constrs; return \@constrs; } sub get_varnames { my $self = shift; my $parts = $self->split_where_clause; return [map {@{$_->{varnames}}} @$parts]; } sub prepare { my $self = shift; my $dbh = shift; my $bind = shift; my ($sql, @exec_args) = $self->get_sql_and_args($bind); my $sth = $self->cached_sth->{$sql}; if (!$sth) { $sth = $dbh->prepare($sql); $self->cached_sth->{$sql} = $sth; } return ($sql, $sth, @exec_args); } sub parsestr { my $self = shift; my $io = IO::String->new; print $io shift; $self->_parsefh($io); } sub parse { my $self = shift; my $fn = shift; my $fh = FileHandle->new($fn) || $self->throw("cannot open $fn"); $self->fn($fn); my $name = $fn; $name =~ s/.*\///; $name =~ s/\.\w+$//; $self->name($name); $self->_parsefh($fh); } sub _parsefh { my $self = shift; my $fh = shift; my $eosql_tag_idx; my $tag = {name=>'', value=>''}; my @tags = (); while (<$fh>) { chomp; if (/^\/\//) { $eosql_tag_idx = scalar(@tags)+1; next; } if (/^:(\w+)\s*(.*)/) { push(@tags, $tag); $tag = {name=>$1, value => $2}; } elsif (/^(\w+):\s*(.*)/) { push(@tags, $tag); $tag = {name=>$1, value => "$2"}; } else { if (substr($_, -1) eq '\\') { } else { $_ = "\n$_"; } $tag->{value} .= $_; } } foreach (@tags) { $_->{value} =~ s/^\s+//; $_->{value} =~ s/\s+$//; } push(@tags, $tag); if (!defined($eosql_tag_idx)) { $eosql_tag_idx = scalar(@tags); } my @clauses = splice(@tags, 0, $eosql_tag_idx); if (!@clauses) { $self->throw("No SQL"); } if (@clauses == 1 && !$clauses[0]->{name}) { my $j = join('|', 'select', 'from', 'where', 'order', 'limit', 'group', 'having', 'use nesting', ); my @parts = split(/($j)/i, $clauses[0]->{value}); @clauses = (); while (my ($n, $v) = splice(@parts, 0, 2)) { push(@clauses, {name=>$n, value=>$v}); } } $self->sql_clauses(\@clauses); $self->properties(\@tags); my $sp = Data::Stag->new(properties=>[ map { [$_->{name} => $_->{value}] } @tags ]); $self->stag_props($sp); $fh->close; } sub throw { my $self = shift; my $fmt = shift; print STDERR "\nERROR:\n"; printf STDERR $fmt, @_; print STDERR "\n"; confess; } my $default_cscheme = { 'keyword'=>'cyan', 'variable'=>'magenta', 'text' => 'reset', 'comment' => 'red', 'block' => 'blue', 'property' => 'green', }; sub show { my $t = shift; my $fh = shift || \*STDOUT; my %cscheme = %{shift || $default_cscheme}; my $colorfunc = shift; my $n = $t->name; my $clauses = $t->sql_clauses; my $props = $t->properties; my $keyword = sub { my $color = $cscheme{keyword}; $colorfunc->($color) . "@_" . $colorfunc->('reset'); }; my $comment = sub { my $color = $cscheme{comment}; $colorfunc->($color) . "@_" . $colorfunc->('reset'); }; my $property = sub { my $color = $cscheme{property}; $colorfunc->($color) . "@_" . $colorfunc->('reset'); }; my $c0 = $colorfunc->('reset'); my $c1 = $colorfunc->($cscheme{variable}); my $c2 = $colorfunc->($cscheme{keyword}); my $c3 = $colorfunc->($cscheme{block}); # my $c0 = 'reset'; # my $c1 = $cscheme{variable}; # my $c2 = $cscheme{keyword}; # my $c3 = $cscheme{block}; foreach my $clause (@$clauses) { my ($n, $c) = ($clause->{name}, $clause->{value}); print $fh $keyword->("$n "); if ($c =~ /\[.*\]/s) { $c =~ s/\[/$c3\[$c0/g; $c =~ s/\]/$c3\]$c0/g; $c =~ s/=\>/$c2=\>$c0/gs; $c =~ s/(\&\S+\&)/$c1$1$c0/gs; print $fh $c; $c = ''; } if ($n =~ /^use/i) { $c =~ s/\(/$c3\($c0/g; $c =~ s/\)/$c3\)$c0/g; # print $fh $c; # $c = ''; } while ($c =~ /(\S+)(\s*)(.*)/s) { my ($w, $sp, $next) = ($1, $2, $3); if ($w =~ /^[A-Z]+$/) { print $fh $keyword->($w); } else { print $fh $w; } print $fh $sp; $c = $next; } print $fh "\n"; } print $fh $comment->("// -- METADATA --\n"); foreach my $p (@$props) { my ($n, $v) = ($p->{name}, $p->{value}); print $fh $property->("$n: "); print $fh $v; print $fh "\n"; } } sub _quote { my $v = shift; $v =~ s/\'/\'\'/g; "'$v'"; } 1; __END__ =head1 NAME DBIx::DBStag::SQLTemplate - A Template for an SQL query =head1 SYNOPSIS # find template by name $template = $dbh->find_template("mydb-personq"); # execute this template, filling in the 'name' attribute $xml = $dbh->selectall_xml(-template=>$template, -bind=>{name => "fred"}); =cut =head1 DESCRIPTION A template represents a canned query that can be parameterized. Templates are collected in directories (in future it will be possible to store them in files or in the db itself). To tell DBStag where your templates are, you should set: setenv DBSTAG_TEMPLATE_DIRS "$HOME/mytemplates:/data/bioconf/templates" Your templates should end with the suffix B<.stg>, otherwise they will not be picked up You can name templates any way you like, but the standard way is to use 2 or 3 fields SCHEMA-OBJECT or SCHEMA-OBJECT-QUALIFIERS (with underscores used within fields) A template file should contain at minimum some SQL; for example: =over =item Example template 1 SELECT studio.*, movie.*, star.* FROM studio NATURAL JOIN movie NATURAL JOIN movie_to_star NATURAL JOIN star WHERE [movie.genre = &genre&] [star.lastname = &lastname&] USE NESTING (set(studio(movie(star)))) Thats all! However, there are ways to make your template more useful =item Example template 2 :SELECT studio.*, movie.*, star.* :FROM studio NATURAL JOIN movie NATURAL JOIN movie_to_star NATURAL JOIN star :WHERE [movie.genre = &genre&] [star.lastname = &lastname&] :USE NESTING (set(studio(movie(star)))) // schema: movie desc: query for fetching movies By including B<:> at the beginning it makes it easier for parsers to assemble SQL (this is not necessary for DBStag however) After the // you can add tag: value data. You should set B if you want the template to be available to users of a db that conforms to that schema =back =head2 GETTING A TEMPLATE The L object gives various methods for fetching templates by name, by database or by schema =head2 VARIABLES WHERE clause variables in the template look like this &foo& variables are bound at query time my $set = $dbh->selectall_stag(-template=>$t, -bind=>["bar"]); or my $set = $dbh->selectall_stag(-template=>$t, -bind=>{foo=>"bar"}); If the former is chosen, variables are bound from the bind list as they are found =head2 OPTIONAL BLOCKS WHERE [ foo = &foo& ] If foo is not bound then the part between the square brackets is left out Multiple option blocks are Bed together An option block need not contain a variable - if it contains no B<&variable&> name it is automatically Bed =head2 BINDING OPERATORS The operator can be bound at query time too WHERE [ foo => &foo& ] Will become either WHERE foo = ? or WHERE foo LIKE ? or WHERE foo IN (f0, f1, ..., fn) Depending on whether foo contains the % character, or if foo is bound to an ARRAY =head1 METHODS =head2 name Usage - $name = $template->name Returns - str Args - every template has a name that (should) uniquely identify it =head2 desc Usage - $desc = $template->desc Returns - str Args - templates have optional descriptions =cut =head2 get_varnames Usage - $varnames = $template->get_varnames Returns - listref of strs Args - Returns the names of all variable used in this template =cut =head1 WEBSITE L =head1 AUTHOR Chris Mungall > =head1 COPYRIGHT Copyright (c) 2003 Chris Mungall This module is free software. You may distribute this module under the same terms as perl itself =cut 1; DBIx-DBStag-0.12/DBIx/DBStag.pm0000644000076500000240000042440111331570136014356 0ustar cainstaff# $Id: DBStag.pm,v 1.59 2008/02/06 00:50:55 cmungall Exp $ # ------------------------------------------------------- # # Copyright (C) 2002 Chris Mungall # # This module is free software. # You may distribute this module under the same terms as perl itself #--- # POD docs at end of file #--- package DBIx::DBStag; use strict; use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS $DEBUG $AUTOLOAD); use Carp; use DBI; use Data::Stag qw(:all); use DBIx::DBSchema; use Text::Balanced qw(extract_bracketed); #use SQL::Statement; use Parse::RecDescent; $VERSION='0.12'; our $DEBUG; our $TRACE = $ENV{DBSTAG_TRACE}; sub DEBUG { $DEBUG = shift if @_; return $DEBUG; } sub trace { my ($priority, @msg) = @_; return unless $ENV{DBSTAG_TRACE}; print STDERR "@msg\n"; } sub dmp { use Data::Dumper; print Dumper shift; } sub force { my $self = shift; $self->{_force} = shift if @_; return $self->{_force}; } sub new { my $proto = shift; my $class = ref($proto) || $proto; my ($dbh) = rearrange([qw(dbh)], @_); my $self = {}; bless $self, $class; if ($dbh) { $self->dbh($dbh); } $self; } sub connect { my $class = shift; my $dbi = shift; my $self; if (ref($class)) { $self = $class; } else { $self = {}; bless $self, $class; } $dbi = $self->resolve_dbi($dbi); eval { $self->dbh(DBI->connect($dbi, @_)); }; if ($@ || !$self->dbh) { my $mapf = $ENV{DBSTAG_DBIMAP_FILE}; if ($dbi =~ /^dbi:(\w+)/) { print STDERR <dbh->{RaiseError} = 1; $self->dbh->{ShowErrorStatement} = 1; if ($dbi =~ /dbi:([\w\d]+)/) { $self->{_driver} = $1; } $self->setup; return $self; } sub resolve_dbi { my $self = shift; my $dbi = shift; if (!$dbi) { $self->throw("database name not provided!"); } if ($dbi !~ /^dbi:/) { my $rh = $self->resources_hash; my $res = $rh->{$dbi}; if (!$res) { if ($dbi =~ /:/) { $res = {loc=>"$dbi"} } else { $res = {loc=>"Pg:$dbi"}; } } if ($res) { my $loc = $res->{loc}; if ($loc =~ /(\S+?):(\S+)\@(\S+)/) { my $dbms = $1; my $dbn = $2; my $host = $3; my $extra = ''; if ($host =~ /(\S+?):(.*)/) { $host = $1; $extra = ":$2"; } if ($dbms =~ /pg/i) { $dbi = "dbi:Pg:dbname=$dbn;host=$host$extra"; } elsif ($dbms =~ /db2/i) { $dbi = "dbi:Pg:$dbn;host=$host$extra"; } else { # default - tested on MySQL $dbi = "dbi:$dbms:database=$dbn:host=$host$extra"; } } elsif ($loc =~ /(\S+):(\S+)$/) { my $dbms = $1; my $dbn = $2; $dbi = "dbi:$dbms:database=$dbn"; if ($dbms =~ /pg/i) { $dbi = "dbi:Pg:dbname=$dbn"; } } else { $self->throw("$dbi -> $loc does not conform to standard.\n". ":\@"); } } else { $self->throw("$dbi is not a valid DBI locator.\n"); } } return $dbi; } sub resources_hash { my $self = shift; my $mapf = $ENV{DBSTAG_DBIMAP_FILE}; my $rh; if ($mapf) { if (-f $mapf) { $rh = {}; open(F, $mapf) || $self->throw("Cannot open $mapf"); while () { chomp; next if /^\#/; s/^\!//; my @parts =split(' ', $_); next unless (@parts >= 3); my ($name, $type, $loc, $tagstr) =@parts; my %tagh = (); if ($tagstr) { my @parts = split(/;\s*/, $tagstr); foreach (@parts) { my ($t, $v) = split(/\s*=\s*/, $_); $tagh{$t} = $v; } } $rh->{$name} = { %tagh, name=>$name, type=>$type, loc=>$loc, tagstr=>$tagstr, }; } close(F) || $self->throw("Cannot close $mapf"); } else { $self->throw("$mapf does not exist"); } } return $rh; } sub resources_list { my $self = shift; my $rh = $self->resources_hash; my $rl; if ($rh) { $rl = [map {$_} values %$rh]; } return $rl; } sub find_template { my $self = shift; my $tname = shift; my $path = $ENV{DBSTAG_TEMPLATE_DIRS} || ''; my $tl = $self->template_list; my ($template, @rest) = grep {$tname eq $_->name} @$tl; if (!$template) { print STDERR "\n\nI could not find the Stag SQL template called \"$tname\".\n"; if (!$path) { print STDERR <throw("Could not find template \"$tname\" in: $path"); } return $template; } sub find_templates_by_schema { my $self = shift; my $schema = shift; my $tl = $self->template_list; my @templates = grep {$_->stag_props->tmatch('schema', $schema)} @$tl; return \@templates; } sub find_templates_by_dbname { my $self = shift; my $dbname = shift; my $res = $self->resources_hash->{$dbname}; my $templates; if ($res) { my $schema = $res->{schema} || ''; if ($schema) { $templates = $self->find_templates_by_schema($schema); } else { # unknown schema - show all templates # $templates = $self->template_list; } } else { $self->throw("unknown db: $dbname"); } return $templates; } sub template_list { my $self = shift; my %already_got = (); if (!$self->{_template_list}) { my $path = $ENV{DBSTAG_TEMPLATE_DIRS} || '.'; my @dirs = split(/:/, $path); my @templates = (); foreach my $dir (@dirs) { foreach my $fn (glob("$dir/*.stg")) { if (-f $fn) { require "DBIx/DBStag/SQLTemplate.pm"; my $template = DBIx::DBStag::SQLTemplate->new; $template->parse($fn); push(@templates, $template) unless $already_got{$template->name}; $already_got{$template->name} = 1; } } } $self->{_template_list} = \@templates; } return $self->{_template_list}; } sub find_schema { my $self = shift; my $dbname = shift; my $rl = $self->resouces_list || []; my ($r) = grep {$_->{name} eq $_ || $_->{loc} eq $_} @$rl; if ($r) { return $r->{schema}; } return; } sub setup { my $self = shift; return; } # counter sub next_id { my $self = shift; $self->{_next_id} = shift if @_; $self->{_next_id} = 0 unless $self->{_next_id}; return ++$self->{_next_id}; } sub dbh { my $self = shift; $self->{_dbh} = shift if @_; return $self->{_dbh}; } sub dbschema { my $self = shift; $self->{_dbschema} = shift if @_; if (!$self->{_dbschema}) { if (!$self->dbh) { confess("you must establish connection using connect() first"); } $self->dbschema(DBIx::DBSchema->new_native($self->dbh)); # my $sth = $self->dbh->table_info(undef, undef, undef, 'VIEW') or die $self->dbh->errstr; # use Data::Dumper; # print Dumper $sth->fetchall_arrayref([2,3]); } return $self->{_dbschema}; } sub parser { my $self = shift; $self->{_parser} = shift if @_; if (!$self->{_parser}) { $self->{_parser} = Parse::RecDescent->new($self->selectgrammar()); } return $self->{_parser}; } sub warn { my $self = shift; my $fmt = shift; print STDERR "\nWARNING:\n"; printf STDERR $fmt, @_; print STDERR "\n"; } sub throw { my $self = shift; my $fmt = shift; print STDERR "\nERROR:\n"; printf STDERR $fmt, @_; print STDERR "\n"; confess; } sub get_pk_col { my $self = shift; my $table = shift; my $tableobj = $self->dbschema->table(lc($table)); if (!$tableobj) { confess("Can't get table $table from db.\n". "Maybe DBIx::DBSchema does not work with your database?"); } return $tableobj->primary_key; } sub is_table { my $self = shift; my $tbl = shift; return 1 if $self->dbschema->table($tbl); } sub is_col { my $self = shift; my $col = shift; if ($self->{_is_col_h}) { return $self->{_is_col_h}->{$col} } my @tablenames = $self->dbschema->tables; my @allcols = map { $self->get_all_cols($_); } @tablenames; my %h = map {$_=>1} @allcols; $self->{_is_col_h} = \%h; return $self->{_is_col_h}->{$col}; } # ASSUMPTION: pk names same as fk names sub is_fk_col { my $self = shift; my $col = shift; # HACK!!! # currently dbschema does not know about FKs return 1 if $col =~ /_id$/; if ($self->{_is_fk_col_h}) { return $self->{_is_fk_col_h}->{$col} } my @tablenames = $self->dbschema->tables; my %h = (); foreach (@tablenames) { my $pk = $self->dbschema->table($_)->primary_key; $h{$pk} =1 if $pk; } $self->{_is_fk_col_h} = \%h; return $self->{_is_fk_col_h}->{$col}; } sub is_pk_col { my $self = shift; my $col = shift; if ($self->{_is_pk_col_h}) { return $self->{_is_pk_col_h}->{$col} } my @tablenames = $self->dbschema->tables; my %h = (); foreach (@tablenames) { my $pk = $self->dbschema->table($_)->primary_key; $h{$pk} =1 if $pk; } $self->{_is_pk_col_h} = \%h; return $self->{_is_pk_col_h}->{$col}; } sub get_all_cols { my $self = shift; my $table = shift; my $tableobj = $self->dbschema->table(lc($table)); if (!$tableobj) { confess("Can't get table $table from db.\n". "Maybe DBIx::DBSchema does not work with your database?"); } return $tableobj->columns; } sub get_unique_sets { my $self = shift; my $table = shift; my $tableobj = $self->dbschema->table(lc($table)); if (!$tableobj) { confess("Can't get table $table from db.\n". "Maybe DBIx::DBSchema does not work with your database?"); } if ($ENV{OLD_DBIX_DBSCHEMA}) { return @{$tableobj->unique->lol_ref || []}; } else { my %indices = $tableobj->indices; my @unique_indices = grep {$_->unique} values %indices; return map {$_->columns} @unique_indices; } } sub mapconf { my $self = shift; my $fn = shift; my $fh = FileHandle->new($fn) || confess("cannot open $fn"); my @mappings = <$fh>; $fh->close; $self->mapping(\@mappings); } sub mapping { my $self = shift; if (@_) { my $ml = shift; my @nu = map { if (ref($_)) { Data::Stag->nodify($_); } else { if (/^(\w+)\/(\w+)\.(\w+)=(\w+)\.(\w+)/) { Data::Stag->new(map=>[ [fktable_alias=>$1], [table=>$2], [col=>$3], [fktable=>$4], [fkcol=>$5] ]); } elsif (/^(\w+)\.(\w+)=(\w+)\.(\w+)/) { Data::Stag->new(map=>[ [table=>$1], [col=>$2], [fktable=>$3], [fkcol=>$4] ]); } elsif (/^parentfk:(\w+)\.(\w+)/) { Data::Stag->new(parentfk=>[ [table=>$1], [col=>$2], ]); } else { confess("incorrectly specified mapping: $_". "(must be alias/tbl.col=ftbl.fcol)"); (); } } } @$ml; $self->{_mapping} = \@nu; } return $self->{_mapping}; } sub guess_mapping { my $self = shift; my $dbschema = $self->dbschema; $self->mapping([]); my %th = map { $_ => $dbschema->table($_) } $dbschema->tables; foreach my $tn (keys %th) { my @cns = $th{$tn}->columns; foreach my $cn (@cns) { my $ftn = $cn; $ftn =~ s/_id$//; if ($th{$ftn}) { push(@{$self->mapping}, Data::Stag->new(map=>[ [table=>$tn], [col=>$cn], [fktable=>$ftn], [fkcol=>$cn] ])); } } } } sub linking_tables { my $self = shift; $self->{_linking_tables} = {@_} if @_; return %{$self->{_linking_tables} || {}}; } sub add_linking_tables { my $self = shift; my %linkh = $self->linking_tables; return unless %linkh; my $struct = shift; foreach my $ltname (keys %linkh) { my ($t1, $t2) = @{$linkh{$ltname}}; $struct->where($t1, sub { my $n=shift; my @v = $n->getnode($t2); return unless @v; $n->unset($t2); my @nv = map { $n->new($ltname=>[$_]); } @v; # $n->setnode($ltname, # $n->new($ltname=>[@v])); foreach (@nv) { $n->addkid($_); } 0; }); } return; } # ---------------------------------------- sub elt_card { my $e = shift; my $c = ''; if ($e =~ /(.*)([\+\?\*])/) { ($e, $c) = ($1, $2); } # make the element RDB-safe $e =~ s/\-//g; return ($e, $c); } sub source_transforms { my $self = shift; $self->{_source_transforms} = shift if @_; return $self->{_source_transforms}; } sub autotemplate { my $self = shift; my $schema = shift; return () unless grep {!stag_isterminal($_)} $schema->subnodes; my @J = (); my @W = (); my @EXAMPLE = (); my ($tname) = elt_card($schema->element); my %joinpaths = (); $schema->iterate(sub { my $n = shift; my $parent = shift; my ($tbl, $card) = elt_card($n->element); if (!$parent) { push(@J, $tbl); # $joinpaths{$tbl} = $tbl; return; } my ($ptbl) = elt_card($parent->element); if (stag_isterminal($n)) { my $v = $ptbl.'_'.$tbl; my $w = "$ptbl.$tbl => \&$v\&"; if ($ptbl eq $tname) { push(@W, "[ $w ]"); } else { my $pk = $tname.'_id'; my $subselect = "SELECT $pk FROM $joinpaths{$ptbl}". " WHERE $w"; push(@W, "[ $pk IN ($subselect) ]"); } # produce example formula for non-ints if ($n->data eq 's') { push(@EXAMPLE, "$v => SELECT DISTINCT $tbl FROM $ptbl"); } } else { my $jtype = 'INNER JOIN'; if ($card eq '*' || $card eq '?') { $jtype = 'LEFT OUTER JOIN'; } my $jcol = $ptbl.'_id'; push(@J, "$jtype $tbl USING ($jcol)"); if ($joinpaths{$ptbl}) { $joinpaths{$tbl} = "$joinpaths{$ptbl} INNER JOIN $tbl USING ($jcol)"; } else { $joinpaths{$tbl} = $tbl; } } return; }); my $from = join("\n ", @J); my $where = join("\n ", @W); my $nesting = $schema->duplicate; $nesting->iterate(sub { my $n = shift; if (stag_isterminal($n)) { return; } my ($tbl, $card) = elt_card($n->element); $n->element($tbl); my @sn = $n->kids; @sn = grep { my ($tbl, $card) = elt_card($_->element); $_->element($tbl); !stag_isterminal($_) } @sn; if (@sn) { $n->kids(@sn); } else { $n->data([]); } }); $nesting = Data::Stag->new(set=>[$nesting]); my $nstr = $nesting->sxpr; $nstr =~ s/^\'//; my $tt = join("\n", ":SELECT *", ":FROM $from", ":WHERE $where", ":USE NESTING", "$nstr", "", "// ---- METADATA ----", "schema:", "desc: Fetches $tname objects", " This is an AUTOGENERATED template", "", (map { "example_input: $_" } @EXAMPLE), ); # my $template = DBIx::DBStag::SQLTemplate->new; my @sn = $schema->subnodes; my @tts = (); push(@tts, $self->autotemplate($_)) foreach @sn; return ([$tname=>$tt], @tts); } sub autoddl { my $self = shift; my $stag = shift; my $link = shift; $stag->makeattrsnodes; my $schema = $stag->autoschema; $self->source_transforms([]);; $self->_autoddl($schema, undef, $link); } sub _autoddl { my $self = shift; my $schema = shift; my $parent = shift; my $link = shift || []; # link tables my $tbls = shift || []; my @sn = $schema->subnodes; my ($tbl, $card) = elt_card($schema->element); my @cols = (sprintf("%s_id serial PRIMARY KEY NOT NULL", $tbl)); my $casc = " ON DELETE CASCADE"; foreach (grep {stag_isterminal($_)} @sn) { my ($col, $card) = elt_card($_->element); my $pk = ''; if ($col eq $tbl.'_id') { shift @cols; $pk = ' PRIMARY KEY'; } if ($card =~ /[\+\*]/) { my $new_name = sprintf("%s_%s", $tbl, $col); my $tf = ["$tbl/$col", "$new_name/$col"]; push(@{$self->source_transforms}, $tf); $_->name($new_name); $_->data([[$col => $_->data]]); # $self->throw("In the source data, '$col' is a multivalued\n". # "terminal (data) node. This is difficult to transform"); } else { # my $isnull = $card eq '?' ? '' : ' NOT NULL'; my $isnull = ''; push(@cols, sprintf("%s %s$isnull$pk", $col, $_->data)); } } if ($parent) { my ($pn) = elt_card($parent->element); push(@cols, sprintf("%s_id INT", $pn)); push(@cols, sprintf("FOREIGN KEY (%s_id) REFERENCES $pn(%s_id)$casc", $pn, $pn)); } my $mapping = $self->mapping || []; if (grep {$_ eq $tbl} @$tbls) { # $self->throw("$tbl has >1 parent - you need to\n". # "transform input data"); return ""; } push(@$tbls, $tbl); my $post_ddl = ''; my $pre_ddl = ''; foreach (grep {!stag_isterminal($_)} @sn) { # check for cases where we want to include FK to subnode my ($map) = grep { $_->name eq 'map' && ($_->get_table eq $tbl && ($_->get_fktable_alias eq $_->element || $_->get_fktable eq $_->element)) } @$mapping; # linking tables if ($map || grep {$_ eq $tbl} @$link) { my $ftbl = $_->element; push(@cols, sprintf("%s_id INT", $ftbl)); push(@cols, sprintf("FOREIGN KEY (%s_id) REFERENCES $ftbl(%s_id)$casc", $ftbl, $ftbl)); $pre_ddl .= $self->_autoddl($_, undef, $link, $tbls); } else { $post_ddl .= $self->_autoddl($_, $schema, $link, $tbls); } } my $ddl = sprintf("CREATE TABLE $tbl (\n%s\n);\n\n", join(",\n", map {" $_"} @cols)); return $pre_ddl . $ddl . $post_ddl;; } # ---------------------------------------- # CACHE METHODS # # we keep a cache of what is stored in # each table # # cache->{$element}->{$key}->{$val} # ---------------------------------------- # list of table names that should be cached sub cached_tables { my $self = shift; $self->{_cached_tables} = shift if @_; return $self->{_cached_tables}; } sub is_caching_on { my $self = shift; my $element = shift; $self->{_is_caching_on} = {} unless $self->{_is_caching_on}; if (@_) { $self->{_is_caching_on}->{$element} = shift; } return $self->{_is_caching_on}->{$element}; } sub query_cache { my $self = shift; my $element = shift; my $constr = shift; my $update_h = shift; my @keycols = sort keys %$constr; my $cache = $self->get_tuple_idx($element, \@keycols); my $valstr = join("\t", map {$constr->{$_}} @keycols); # use Data::Dumper; # print Dumper $cache; if ($update_h) { my $current_h = $cache->{$valstr} || {}; $current_h->{$_} = $update_h->{$_} foreach keys %$update_h; $cache->{$valstr} = $current_h; } return $cache->{$valstr}; } sub insert_into_cache { my $self = shift; my $element = shift; my $insert_h = shift; my $usets = shift; foreach my $uset (@$usets) { my @undef = grep {!defined $insert_h->{$_}} @$uset; if (@undef) { my @defined = grep {defined $insert_h->{$_}} @$uset; trace(1, "undefined column in unique key: @$uset IN $element/[@$uset] ". join('; ', map {"$_=$insert_h->{$_}"} @defined, ) ) if $TRACE; # cannot cache undefined values next; } my $cache = $self->get_tuple_idx($element, $uset); my $valstr = join("\t", map {$insert_h->{$_}} sort @$uset); $cache->{$valstr} = $insert_h; } return 1; } sub update_cache { my $self = shift; my $element = shift; my $store_hash = shift; my $unique_constr = shift; my $tuple = $self->query_cache($element, $unique_constr, $store_hash); return; } sub get_tuple_idx { my $self = shift; my $element = shift; my $ukey = shift; my @keycols = @$ukey; @keycols = sort @keycols; @keycols || die; my $cache = $self->cache; if (!$cache->{$element}) { $cache->{$element} = {}; } my $eltcache = $cache->{$element}; # we just use a flat perl hash - flatten the list of unique cols # to a string with spaces between my $k = "@keycols"; if (!$eltcache->{$k}) { $eltcache->{$k} = {}; } return $eltcache->{$k}; } sub cache_summary { my $self = shift; my $s = Data::Stag->new(cache_summary=>[]); my $cache = $self->cache || {}; my @elts = keys %$cache; foreach my $elt (@elts) { my $cnode = $cache->{$elt} || {}; my @keys = keys %$cnode; $s->add($elt=>[map {[$_=>scalar(keys %{$cnode->{$_}})]} @keys]); } return $s; } sub cache { my $self = shift; $self->{_cache} = shift if @_; $self->{_cache} = {} unless $self->{_cache}; return $self->{_cache}; } sub clear_cache { my $self = shift; $self->cache({}); } # ---- END OF CACHE METHODS ---- # set this if we are loading a fresh/blank slate DB # (will assume database is empty and not check for # existing tuples) sub policy_freshbulkload { my $self = shift; $self->{_policy_freshbulkload} = shift if @_; return $self->{_policy_freshbulkload}; } sub noupdate_h { my $self = shift; $self->{_noupdate_h} = shift if @_; return $self->{_noupdate_h} || {}; } sub tracenode { my $self = shift; $self->{_tracenode} = shift if @_; return $self->{_tracenode}; } sub mapgroups { my $self = shift; if (@_) { $self->{_mapgroups} = [@_]; $self->{_colvalmap} = {} unless $self->{_colvalmap}; foreach my $cols (@_) { my $h = {}; foreach (@$cols) { $self->{_colvalmap}->{$_} = $h; } } } return @{$self->{_mapgroups} || []}; } # DEPRECATED sub get_mapping_for_col { my $self = shift; my $col = shift; $self->{_colvalmap}->{$col} = {} unless $self->{_colvalmap}->{$col}; return $self->{_colvalmap}->{$col}; } # mapping of Old ID => New ID # IDs are assumed to be global across ALL tables sub id_remap_idx { my $self = shift; if (@_) { $self->{_id_remap_idx} = shift; } else { $self->{_id_remap_idx} = {} unless $self->{_id_remap_idx}; } return $self->{_id_remap_idx}; } # do the PK values in the XML represent the actual # internal db ids, or are they local to the document? # if the latter then we will create a id_remap_idx sub trust_primary_key_values { my $self = shift; $self->{_trust_primary_key_values} = shift if @_; return $self->{_trust_primary_key_values}; } sub make_stag_node_dbsafe { my $self = shift; my $node = shift; my $parent = shift; my $name = $node->name; # CJM 2007-03-05 #return if $name eq '@'; # leave attrs alone if ($name eq '@') { # descend into attrs $parent->data([grep {$_->name ne '@'} @{$parent->data},@{$node->data}]); return; } my $safename = $self->dbsafe($name,$parent); if ($name ne $safename) { $node->name($safename); } my @kids = $node->kids; foreach (@kids) { $self->make_stag_node_dbsafe($_,$node) if ref $_; } return; } sub dbsafe { my $self = shift; my $name = shift; my $parent = shift; $name = lc($name); # dbstag is designed for stag-like xml; no mixed attributes # however, we do have basic checks for mixed attributes if ($name eq '.') { $name = $parent->name.'_data'; # TODO - allow custom column } $name =~ tr/a-z0-9_//cd; return $name; } # cache the attribute nodes as they are parsed #sub current_attribute_node { # my $self = shift; # $self->{_current_attribute_node} = shift if @_; # return $self->{_current_attribute_node}; #} # lookup table; macro ID => internal database ID sub macro_id_h { my $self = shift; $self->{_macro_id_h} = shift if @_; $self->{_macro_id_h} = {} unless $self->{_macro_id_h}; return $self->{_macro_id_h}; } # xort-style XML; set if an attribute is encountered sub xort_mode { my $self = shift; $self->{_xort_mode} = shift if @_; return $self->{_xort_mode}; } #'(t1 # (foo x) # (t2 # (bar y))) # # '(fk # (table t2) # (ftable t1)) # # alg: store t1, then t2 # '(t1 # (foo x) # (t1_t2 # (t2 # (bar y)))) # # '(fk # (table t1_t2) # (ftable t1)) # '(fk # (table t1_t2) # (ftable t2)) # # # alg: store t1, hold on t1_t2, store t2 # '(t1 # (foo x) # (blah # (t2 # (bar y)))) # # '(fk # (table t1) # (fktable t2) # (fktable_alias "blah") # (fk "blah_id")) # alg: store t2, store t1 # if set, will ensure that tbl/col names are transformed to be safe sub force_safe_node_names { my $self = shift; $self->{_force_safe_node_names} = shift if @_; return $self->{_force_safe_node_names}; } # recursively stores a Data::Stag tree node in the database sub storenode { my $self = shift; my $node = shift; my @args = @_; my $dupnode = $node->duplicate; $self->make_stag_node_dbsafe($dupnode,'') if $self->force_safe_node_names; $self->add_linking_tables($dupnode); $self->_storenode($dupnode,@args); } sub _storenode { my $self = shift; my $node = shift; my $opt = shift; if (!$node) { confess("you need to pass in a node!"); } my $element = $node->element; return unless $node->kids; if ($element eq 'dbstag_metadata') { my @maps = $node->get_map; $self->mapping(\@maps); my @links = $node->get_link; if (@links) { my %h = map { ($_->sget_table => [$_->sget_from, $_->sget_to]) } @links; $self->linking_tables(%h); } return; } # sql can be embedded as <_sql> tags if ($element eq '_sql') { $self->_execute_sqlnode($node); return; } # check for XORT-style attributes # if ($element eq '@') { # # is this check required??? # $self->current_attribute_node($node); # $self->xort_mode(1); # return; # } my $current_attribute_node; unless ($node->isterminal) { my @kids = $node->kids; my $changed = 0; @kids = map { if ($_->element eq '@') { $self->xort_mode(1); $current_attribute_node = $_; $changed = 1; trace(0, "GOT ATTR NODE"); (); # omit } else { $_; # unchanged } } @kids; $node->kids(@kids) if $changed; } my $operation; # directive: force/update/lookup if ($current_attribute_node){ $operation = $current_attribute_node->sget_op; } trace(0, "STORING $element\n", $node->xml) if $TRACE; my $tracenode = $self->tracenode || ''; my $tracekeyval; if ($tracenode && $tracenode =~ /^(\w+)\/(.*)/) { my $nn = $1; my $tag = $2; if ($nn eq $element) { $tracekeyval = $node->get($tag); } } my $dbh = $self->dbh; my $dbschema = $self->dbschema; my $is_caching_on = $self->is_caching_on($element) || 0; my $mapping = $self->mapping || []; # each relation has zero or one primary keys; # primary keys are assumed to be single-column my $pkcol = $self->get_pk_col($element); trace(0, "PKCOL: $pkcol") if $TRACE; # DBIx::DBSchema metadata my $tableobj = $dbschema->table($element); # -- PRE-STORE CHILD NON-TERMINALS -- # before storing this node, we need to # see if we first need to store any child # non-terminal nodes (in order to get their # primary keys, to use as foreign keys in # the current relation) # store non-terminal subnodes first my @ntnodes = $node->ntnodes; # keep track of nodes that have been assigned xort-style my %assigned_node_h; # GET INFORMATION FROM SUPER-NODE # some nodes may have been assigned by the calling process # (eg if the supernode is refered to by a fk from the current table) # this hash maps element names to a boolean; # this is ONLY used in conjunction with xort-style xml # we set this when we want to make sure that an element value is # NOT macro-expanded by the expansion code %assigned_node_h = %{$opt->{assigned_node_h} || {}}; # the primary key value of the supernode my $parent_pk_id = $opt->{parent_pk_id}; # the element type of the supernode my $parent_element = $opt->{parent_element}; # -- end of info from super-node # PRE-STORE # look through the current node's children; # + some of these will be nodes that must be pre-stored BEFORE # the current node (because the current node has a fk to them) # + some of these will be nodes that must be post-stored AFTER # the current node (because they have an fk to the current node) # # one or other of these situations must be true - otherwise # nodes should not be nested! my @delayed_store = (); # keep track of non-pre-stored nodes foreach my $nt (@ntnodes) { # First check for XORT-STYLE # xort-style XML; nodes can be nested inside a non-terminal # node corresponding to a FK column # eg # # foo # # # here, what looks like a non-terminal node should actually # check all sub-nodes; if any of them are nonterminal and correspond # to a column (not a table) then add the sub-node and use the pk id # as the returned value # note: we have to explicitly check the col is not also a table # since some dbs (eg go db) have col names the same as tbl names if ($self->is_col($nt->name) && !$nt->isterminal && !$self->is_table($nt->name)) { my @kids = $nt->kids; if (@kids != 1) { $self->throw("non-terminal pk node should have one subnode only; ". $nt->name." has ".scalar(@kids)); } my $sn_val = $self->_storenode(shift @kids); if (!defined($sn_val)) { $self->throw("no returned value for ".$nt->name); } # TRANSFORM NODE: non-terminal to terminal # replace node with return pk ID value $nt->data($sn_val); # do NOT try and expand the value assigned to this # node with a xort-macro expansion later on $assigned_node_h{$nt->name} = 1; trace(0, "ASSIGNED NON-MACRO ID for ".$nt->name." TO $sn_val") if $TRACE; # skip this ntnode - it is now a tnode next; } # -- END OF xort-style check # we want to PRE-STORE any ntnodes that # are required for foreign key relationships # within this node; # ie this node N1 has a foreign key "fk_id" that # points to ntnode N2. # if there is an intermediate alias element in # between then we need to store the ntnode too # # check for either of these conditions my ($map) = grep { $_->get_table && $_->get_table eq $element && ($_->get_fktable_alias && $_->get_fktable_alias eq $nt->element || ($_->get_fktable && $_->get_fktable eq $nt->element && !$_->get_fktable_alias)) } @$mapping; # check to see if sub-element has FK to this element if (!$map) { # my $subtable = $dbschema->table($nt->element); my $table = $dbschema->table($element); my $ntelement = $nt->element; my $subpkcol = $self->get_pk_col($ntelement); my @cns = $table->columns; my $cn; # col name (FK in current element) my $fcn; # foreign col name (PK in sub element) # HACK - ASSUME NATURAL JOIN # for example, a FK: person.dept_id => dept.dept_id if ($subpkcol ne 'id') { foreach (@cns) { if ($_ eq $subpkcol) { $cn = $_; $fcn = $_; } } } # second chance; allow base "id" style # for example, a FK: person.dept_id => dept.id # via ... if (!$cn) { if ($subpkcol eq 'id') { foreach (@cns) { if ($_ eq $ntelement."_id") { $cn = $_; $fcn = 'id'; } } } } if ($cn) { $map = Data::Stag->new(map=>[ [table=>$element], [col=>$cn], [fktable=>$nt->element], [fkcol=>$fcn] ]); } } # if $map is set, then we have to pre-store this subnode if ($map) { # 1:many between this and child # (eg this has fk to child) # store child before this; # use fk in this my $fktable = $map->get_fktable; my $col = $map->get_col || $self->get_pk_col($fktable); # aliases map an extra table # eg table X col X.A => Y.B # fktable_alias = A my $fktable_alias = $map->get_fktable_alias; my $orig_nt = $nt; # if we have an alias, it means the actual node # we want to store is one beneath the alias; # eg .. # we want to actually store the node foo2 if ($fktable_alias) { my @nts = $nt->sgetnode($map->sget_fktable); if (!@nts) { print STDERR $nt->sxpr; confess("could not get node for: ".$map->sget_fktable); } if (@nts > 1) { print STDERR $nt->sxpr; confess("multiple nodes for: ".$map->sget_fktable); } $nt = shift @nts; if (!$nt) { print STDERR $map->sxpr; print STDERR $orig_nt->sxpr; confess("bad nodes for: ".$map->sget_fktable); } } my $fk_id = $self->_storenode($nt); if (!defined($fk_id)) { confess("ASSERTION ERROR: could not get foreign key val\n". "trying to store: $element\n". "no fk returned when storing: $fktable"); } trace(0, "SETTING $element.$col=$fk_id [via ".$orig_nt->element."]") if $TRACE; $node->set($col, $fk_id); $node->unset($orig_nt->element); # do NOT try and expand the value assigned to this # node with a xort-macro expansion later on $assigned_node_h{$col} = 1; trace(0, "ASSIGNED NON-MACRO ID for ".$col) if $TRACE; } else { # 1:many between child and this # (eg child has fk to this) # store child after trace(0, "WILL STORE LATER:\n", $nt->xml) if $TRACE; $node->unset($nt->element); push(@delayed_store, $nt); } # $node->unset($nt->element); # clear it } # --- done storing kids # --- replace *IDs --- # dbstag XML allows placeholder values in primary key cols # (for now, PKs are always assumed to be autoincrement/serial ints) # placeholder PKs get remapped to a new autogenerated ID # all FKs referring to this get remapped too my @tnodes = $node->tnodes; # terminal nodes mapped to columns in db my %remap = (); # indexed by column name; new PK value if (!$self->trust_primary_key_values) { foreach my $tnode (@tnodes) { # foreign keys in XORT mode - replace macro ID with # actual database foreign key value if ($self->is_fk_col($tnode->name) && $self->xort_mode) { my $v = $tnode->data; # -- CHECK FOR MACRO EXPANSION (XORT-STYLE) -- # IF this tnode was originally an ntnode that # was collapsed to a pk val, xort style, do not # try and map it to a previously assigned macro # EXAMPLE: # we start with A # we collapse too $v if ($assigned_node_h{$tnode->name}) { trace(0, "ALREADY CALCULATED; not a Macro ID:$v;; in $element/".$tnode->name) if $TRACE; # DO NOTHING } else { # NOT ASSIGNED my $actual_id = $self->macro_id_h->{$v}; if (!defined($actual_id)) { $self->throw("XORT-style Macro ID:$v is undefined;; in $element/".$tnode->name); } $tnode->data($actual_id); } # -- END OF MACRO EXPANSION -- } elsif ($tnode->name eq $pkcol) { my $v = $tnode->data; trace(0, "REMAP $pkcol: $v => ? [do not know new value yet]") if $TRACE; $remap{$tnode->name} = $v; # map after insert/update $node->unset($tnode->name); # discard placeholder } else { if ($self->is_fk_col($tnode->name)) { # hack!! need proper FK refs...; DBSchema wont do this my $colvalmap = $self->id_remap_idx; #my $colvalmap = $self->get_mapping_for_col($nt->elememt); if ($colvalmap) { my $v = $tnode->data; my $nv = $colvalmap->{$v}; if ($nv) { trace(0, "remapping $v => $nv") if $TRACE; $tnode->data($nv); } } } } } } # -- end of ID remapping # --- Get columns that need updating/inserting --- # turn all remaining tag-val pairs into a hash my %store_hash = $node->pairs; # All columns to be stored should be terminal nodes # in the Stag tree; if not there is a problem my @refcols = grep { ref($store_hash{$_}) } keys %store_hash; if (@refcols) { foreach (@$mapping) { trace(0, $_->sxpr) if $TRACE; } confess("I can't store the current node; ". "These elements need to be mapped via FKs: ". join(', ', map {"\"@refcols\""} @refcols). "\n\nPerhaps you need to specify more schema metadata?"); } # -- end of sanity check # each relation has zero or more unique keys; # unique keys may be compound (ie >1 column) my @usets = $self->get_unique_sets($element); trace(0, "USETS: ", map {"unique[ @$_ ]"} @usets) if $TRACE; # get all the columns/fields/attributes of this relation my @cols = $self->get_all_cols($element); trace(0, "COLS: @cols") if $TRACE; # store_node() will either perform an update or # an insert. if we are performing an update, we # need a query constraint to determine which row # to update. # # this hash is used to determine the key/val pairs my %unique_constr; # this is the value of the primary key of # the inserted/update row my $id; # if this relation has a primary key AND the stag node # being stored has the value of this column set, THEN # use this as the update constraint if (0 && $pkcol) { my $pk_id; $pk_id = $node->get($pkcol); if ($pk_id) { # unset the value of the pk in the node; there # is no point setting this in the UPDATE as it # is already part of the update constraint $node->unset($pkcol); # set the update constraint based on the PK value %unique_constr = ($pkcol => $pk_id); # return this value at the end $id = $pk_id; trace(0, "SETTING UPDATE CONSTR BASED ON PK $pkcol = $pk_id") if $TRACE; } } # -- end of xxxx # foreach my $sn ($node->kids) { # my $name = $sn->element; # my $nu_id = $self->id_mapping($name, $sn->data); # # do the old 2 nu mapping # # (the ids in the xml are just temporary # # for internal consistency) # $sn->data($nu_id) if $nu_id; # } if (0) { # ---- EXPERIMENTAL ---- # if no unique keys are provided, assume that all # non-PK columns together provide a compound unique key # <> expedient for now! if (!@usets) { @usets = ( [grep {$_ ne $pkcol} @cols] ); } } if ($pkcol) { # make single PK the first unique key set; # add to beginning as this is the most efficient unshift(@usets, [$pkcol]); } # get the column to select to get the pk for this element my $select_col = $pkcol; # -------- find update constraint by unique keys ---- # if the unique_constr hash is set, we know we # are doing an UPDATE, and we know the query # constraint that will be used; # # otherwise loop through all unique keys; if # all the columns in the key are set, then we # can safely use this unique key as the update # constraint. # if no update constraint can be found, this node # is presumed not to exist in the DB and an INSERT # is performed foreach my $uset (@usets) { # we already know & have the primary key last if %unique_constr; # if we are loading up a fresh/blank slate # database then we don't need to check for # existing tuples, as everything should # have been inserted/updated this session if ($self->policy_freshbulkload) { next; } # if an xort-style attribute has op=insert # this is the same as a bulkload if ($operation && $operation eq 'insert') { next; } # already tried PK # if (scalar(@$uset) == 1 && # $uset->[0] eq $pkcol) { # next; # } trace(0, "TRYING USET: ;@$uset; [pk=$pkcol]") if $TRACE; # get the values of the unique key columns; # %constr is a candidate unique key=>val mapping my %constr = map { my $v = $node->sget($_); $_ => $v } @$uset; # each column in the unique key must be # non-NULL; try the next unique key if # this one is unsuitable # -- COMMENTED OUT cjm 20041012 # mysql auto-creates defaults for non-null fields; # we cannot use this code: # UNCOMMENTED 20050304 # -- make null value part of the key # -- ADDED 20041012 - make null 0/'' foreach (keys %constr) { # in pg, pk cols are sequences with defaults nextval # skip these next if $self->is_pk_col($_); if (!defined($constr{$_})) { if ($self->is_fk_col($_)) { # if xort-style, the container may be an # implicit foreign key # TODO: check element if ($parent_pk_id) { trace(0, "USING PARENT ELEMENT: $_ => $parent_pk_id"); $constr{$_} = $parent_pk_id; } } else { my $colobj = $tableobj->column($_); my $default_val = $colobj->default; my $col_type = $colobj->type; if (defined $default_val) { # problem with DBIx::DBSchema if ($default_val =~ /^\'(.*)\'::/) { trace(0, "FIXING DEFAULT: $default_val => $1") if $TRACE; $default_val = $1; } if (($col_type =~ /^int/ || $col_type =~ /float/) && $default_val eq '') { # this SHOULDN'T be necessary, but appears to be required for # some configuartions. DBSchema problem? $default_val=0; } if (ref($default_val)) { # In new versions of DBIx::DBSchema (0.38, possibly older versions), # this appears to be a reference $default_val = $$default_val; if ($default_val eq "''") { $default_val = ''; } } $constr{$_} = $default_val; trace(0, "USING DEFAULT[type=$col_type] $_ => \"$constr{$_}\"") if $TRACE; } } } } # TODO: check cases eg dbxref in chado; null default values...? next if grep { !defined($_) } values %constr; %unique_constr = %constr; if (!$select_col && @$uset == 1) { $select_col = $uset->[0]; } trace(0, "GOT unique_constr, select_col=$select_col") if $TRACE; last; } # -- END OF @usets -- # %unique_constr is set; a mapping for a unique key colset # if this is not set, then we must insert if (%unique_constr) { # -- IN-MEMORY CACHING -- # check if we have already updated/inserted # this tuple this session; and if so, what # the update constraint used was if ($is_caching_on == 1 || $is_caching_on == 3) { #$self->throw("no select col for $element") unless $select_col; # fetch values of unique_constr from cache my %cached_colvals = %{$self->query_cache($element, \%unique_constr) || {}}; # have we stored anything with uniq key %unique_constr before? if (%cached_colvals) { if ($pkcol) { $id = $cached_colvals{$pkcol}; if ($id) { # use the cached pk id for efficiency #%unique_constr = {$pkcol => $id}; trace(0, "CACHED $pkcol = $id") if $TRACE; } else { trace(0, "NO CACHED COLVAL FOR $pkcol :: ". join("; ",map {"$_ = $cached_colvals{$_}"} keys %cached_colvals)) if $TRACE; } } # yes - has it changed? foreach my $col (keys %cached_colvals) { if ($cached_colvals{$col} && $store_hash{$col} && $cached_colvals{$col} && $store_hash{$col}) { # don't bother re-storing anything delete $store_hash{$col}; } } if (%store_hash) { my @x = keys %store_hash; trace(0, "WILL STORE: @x") if $TRACE; } else { trace(0, "UNCHANGED - WILL NOT STORE; store_hash empty") if $TRACE; } } else { } } # -- END OF CACHING CHECK -- # -- GET PK VAL $id BASED ON unique_constr -- # (we may already have this based on memory-cache) if (!$id) { # the input node contains all the keys in %update_constr # - check to see if this relation exists in the DB my $vals; if ($is_caching_on >= 2) { $vals = []; } else { my $sql = $self->makesql($element, \%unique_constr, $select_col); trace(0, "SQL: $sql") if $TRACE; $vals = $dbh->selectcol_arrayref($sql); } if (@$vals) { # yes it does exist in DB; check if there is a # pkcol - if there is, it means we can do an # update and if ($pkcol && $select_col && $select_col eq $pkcol) { # this is the value we return at the # end $id = $vals->[0]; if ($remap{$pkcol}) { #my $colvalmap = $self->get_mapping_for_col($pkcol); my $colvalmap = $self->id_remap_idx; #my $colvalmap = $self->get_mapping_for_col($element); $colvalmap->{$remap{$pkcol}} = $id; trace(0, "COLVALMAP $pkcol $remap{$pkcol} = $id") if $TRACE; } } else { # $id not set, but we will later perform an update anyway } } else { # this node is not in the DB; force insert %unique_constr = (); } } } # end of get pk val # ---- UPDATE OR INSERT ----- # at this stage we know if we are updating # or inserting, depending on whether a suitable # update constraint has been found my $this_op; if (%unique_constr) { $this_op = 'update'; } else { $this_op = 'insert'; } if (defined $operation) { if ($operation eq 'force') { $operation = $this_op; } else { # update/lookup/insert # insert: already dealt with } } else { $operation = $this_op; } if ($operation eq 'replace') { # replace = delete followed by insert if (%unique_constr) { $self->deleterow($element,\%unique_constr); } else { $self->throw("Cannot find row to delete it:\n".$node->xml); } $operation = 'insert'; } if ($operation eq 'update') { # ** UPDATE ** if ($self->noupdate_h->{$element}) { if ($tracekeyval) { printf STDERR "NOUPDATE: $tracenode = $tracekeyval\n" } trace(0, sprintf("NOUPDATE on %s OR child nodes (We have %s)", $element, join('; ',values %unique_constr) )) if $TRACE; # don't return yet; there are still the delayed nodes ##return $id; } else { # if there are no fields modified, # no change foreach (keys %unique_constr) { # no point setting any column # that is part of the update constraint delete $store_hash{$_}; } # only update if there are cols set that are # not part of unique constraint if (%store_hash) { if ($tracekeyval) { printf STDERR "UPDATE: $tracenode = $tracekeyval\n" } $self->updaterow($element, \%store_hash, \%unique_constr); # -- CACHE RESULTS -- if ($is_caching_on == 1 || $is_caching_on == 3) { $self->update_cache($element, \%store_hash, \%unique_constr); } } else { trace(0, sprintf("NOCHANGE on %s (We have %s) id=$id", $element, join('; ',values %unique_constr) )) if $TRACE; if ($tracekeyval) { print STDERR "NOCHANGE: $tracenode = $tracekeyval\n" } } } } elsif ($operation eq 'insert') { # ** INSERT ** if (%store_hash) { $id = $self->insertrow($element, \%store_hash, $pkcol); if (!$id) { # this only happens if $self->force(1) is set if (@delayed_store) { print STDERR "Insert on \"$element\" did not return a primary key ID.\n Possible causes: sequence not define [Pg]?\n"; if ($self->force) { return; } else { confess("non-recoverable error"); } } return; } if ($tracekeyval) { printf STDERR "INSERT: $tracenode $tracekeyval [val = $id]\n" } if ($pkcol) { if ($remap{$pkcol}) { my $colvalmap = $self->id_remap_idx; #my $colvalmap = $self->get_mapping_for_col($element); $colvalmap->{$remap{$pkcol}} = $id; trace(0, "colvalmap $remap{$pkcol} = $id") if $TRACE; } } # -- CACHE RESULTS -- if ($is_caching_on) { my %cache_hash = %store_hash; if ($pkcol) { $cache_hash{$pkcol} = $id; } $self->insert_into_cache($element, \%cache_hash, \@usets); trace(0, "CACHING: $element") if $TRACE; } } } elsif ($operation eq 'delete') { if (%unique_constr) { $self->deleterow($element,\%unique_constr); } else { $self->throw("Cannot find row to delete it (perhaps unique constraint not satisfied?):\n".$node->xml); } } elsif ($operation eq 'lookup') { # lookup: do nothing, already have ID if (!$id) { $self->throw("lookup: no ID; could not find this node in db (perhaps unique constraint not satisfied?) %s:\n",$node->xml); } } else { $self->throw("cannot do op: $operation"); } # -- end of UPDATE/INSERT/LOOKUP # -- DELAYED STORE -- # Any non-terminal child nodes of the current one have # some kind of foreign key relationship to the current # relation. Either it is 1:many or many:1 # # if the relation for the child node has a foreign key # into the current relation, we need to store the current # relation first to get the current relation's primary key. # # we have already done this, so now is the time to store # any of these child nodes if (@delayed_store) { foreach my $sn (@delayed_store) { my $fk; # foreign key column in subtable my $snname = $sn->name; # subtable name # if a mapping is used (eg in metadata), then # this takes priority foreach (@$mapping) { if ($_->name eq 'parentfk' && $_->get_table eq $snname) { $fk = $_->get_col; } } # no mapping, by default use the current nodes primary # key (this assumes eg person.address_id is a fk to # a table with pk address_id; we will check and possibly # override this later) if (!$fk) { $fk = $pkcol; } # HACK!! # Some databases (eg GO Database) use 'id' for pk col # names; fks to this table will be of form _id if ($fk eq 'id') { $fk = $element . '_id'; } # --SET SUBNODE FK-- # it is necessarily true that each delayed-store subnode # must have some fk relationship back to the existing one # the subnode has a fk relation up to this one; # by default we assume that the subnode fk column is named # the same as the current pk. However, we check that this # is the case. If not, we deduce what the correct fk col is my $subtable = $dbschema->table($snname); if ($subtable->column($fk)) { # a fk col with the name as the current node pk col exists; # use it # do nothing - current value of $fk is fine } else { # deduce actual fk column # there should only be ONE subnode fk column UNSET; # this implicitly refers to the current node my @subcolumns = $subtable->columns; my @potential_fks = (); foreach my $subcolumn (@subcolumns) { if ($self->is_fk_col($subcolumn) && !$self->is_pk_col($subcolumn)) { # Definite foreign key if (defined $sn->sget($subcolumn)) { # already set } else { push(@potential_fks, $subcolumn); } } } trace(0, "POTENTIAL FKS: @potential_fks"); if (!@potential_fks) { $self->throw("I do not know what to do with the current ". "pl val ($id). There does not appear to be ". "a $fk column in $snname, and all fks in ". "the subtable $snname are currently set"); } if (@potential_fks > 1) { $self->throw("There appear to be multiple potential fks ". "[ @potential_fks ]. I do not know which ". "to choose to assign the current pk val $id". " to"); } $fk = shift @potential_fks; } # -- $fk value is set $sn->set($fk, $id); # -- $fk table assigned trace(0, "NOW TIME TO STORE [curr pk val = $id] [fkcol = $fk] ", $sn->xml) if $TRACE; # store subnode, passing in info on current node $self->_storenode($sn,{parent_pk_id=>$id, parent_element=>$element, assigned_node_h=>{$fk=>1}}); } } # -- end of @delayed_store if ($current_attribute_node) { if ($id) { my $macro_id = $current_attribute_node->sget_id; if ($macro_id) { $self->macro_id_h->{$macro_id} = $id; trace(0, "SETTING MACRO ID MAP: $macro_id => $id") if $TRACE; } else { } } } return $id; } # --SQL directives embedded in XML-- sub _execute_sqlnode { my $self = shift; my $sqlnode = shift; if ($sqlnode->element eq '_sql') { my $dbh = $self->dbh; my $op = $sqlnode->get('@/op'); my $col = $sqlnode->get('@/col'); my $table = $sqlnode->get('@/from'); my $match = $sqlnode->get('@/match'); my @subnodes = grep {$_->element ne '@'} $sqlnode->kids; if ($op eq 'delete') { my $pkey = $sqlnode->get('@/pkey'); trace(0,"deleting from $table"); my @vals = map {$self->_execute_sqlnode($_)} @subnodes; # do iteratively rather than in 1 SQL stmt if (@vals) { my $sql = sprintf("SELECT $pkey FROM $table WHERE $match IN (%s)", join(", ",@vals)); trace(0, "SQL: $sql"); my $ids_to_delete = $dbh->selectcol_arrayref($sql); # quote foreach my $id (@$ids_to_delete) { my $delete_sql = "DELETE FROM $table WHERE $pkey=$id"; trace(0,"SQL: $delete_sql"); $dbh->do($delete_sql); } } } elsif ($op eq "select") { my @vals = $sqlnode->get('.'); my $sql = sprintf("SELECT $col FROM $table WHERE $match IN (%s)", join(", ",map {$dbh->quote($_)} @vals)); trace(0, "SQL: $sql"); my $ids = $dbh->selectcol_arrayref($sql); trace(0,"id list in select: @$ids"); return(@$ids); } else { $self->throw("Do not understand SQL directive: $op") } } else { return $sqlnode->data; } return; } sub _process_sql { my $self = shift; my $node = shift; my $element = $node->element; if ($element eq 'in') { } else { $self->throw("Do not understand SQL directive: $element") } } # -- QUERYING -- sub rmake_nesting { my $node = shift; if ($node->element eq 'composite') { my $first = $node->getnode_first; my $second = $node->getnode_second; my $head = rmake_nesting($first->data->[0]); my $tail = rmake_nesting($second->data->[0]); if ($head->isterminal) { return Data::Stag->new($head->element => [$tail]); } $head->addkid($tail); return $head; } elsif ($node->element eq 'leaf') { my $alias = $node->get_alias; my $tn = $alias || $node->get_name; return Data::Stag->new($tn=>1); } else { die; } } # if true, a metadata tag will be added to stag nodes selected from db sub include_metadata { my $self = shift; $self->{_include_metadata} = shift if @_; return $self->{_include_metadata}; } # last SQL SELECT statement executed sub last_stmt { my $self = shift; $self->{_last_stmt} = shift if @_; return $self->{_last_stmt}; } sub last_sql_and_args { my $self = shift; $self->{_last_sql_and_args} = shift if @_; return $self->{_last_sql_and_args}; } sub sax_handler { my $self = shift; $self->{_sax_handler} = shift if @_; return $self->{_sax_handler}; } # delegates to selectall_stag and turns tree to XML sub selectall_xml { my $self = shift; my $stag = $self->selectall_stag(@_); return $stag->xml; } # delegates to selectall_stag and turns tree to SAX # (candidate for optimisation - TODO - use event firing model) sub selectall_sax { my $self = shift; my ($sql, $nesting, $h) = rearrange([qw(sql nesting handler)], @_); my $stag = $self->selectall_stag(@_); $h = $h || $self->sax_handler; if (!$h) { $self->throw("You must specify the sax handler;\n". "Either use \$dbh->sax_handler(\$h), or \n". "\$dbh->selectall_sax(-sql=>\$sql, handler->\$h)"); } return $stag->sax($h); } # delegates to selectall_stag and turns tree to S-Expression sub selectall_sxpr { my $self = shift; my $stag = $self->selectall_stag(@_); return $stag->sxpr; } # does not bother decomposing and nesting the results; just # returns the denormalised table from the SQL query. # arrayref of arrayrefs - rows x cols # first row of rows is column headings sub selectall_rows { my $self = shift; my ($sql, $nesting, $bind, $template) = rearrange([qw(sql nesting bind template)], @_); my $rows = $self->selectall_stag(-sql=>$sql, -nesting=>$nesting, -bind=>$bind, -template=>$template, -return_arrayref=>1, ); return $rows; } # --------------------------------------- # selectall_stag(sql, nesting) # # Takes an sql string containing a SELECT statement, # parses it to get the tree structure; this can be # overridden with the nesting optional argument. # # The SELECT statement is executed, and the relations are # transformed into a stag tree # # --------------------------------------- sub selectall_stag { my $self = shift; my ($sql, $nesting, $bind, $template, $return_arrayref, $include_metadata, $aliaspolicy) = rearrange([qw(sql nesting bind template return_arrayref include_metadata aliaspolicy)], @_); my $prep_h = $self->prepare_stag(@_); my $cols = $prep_h->{cols}; my $sth = $prep_h->{sth}; my $exec_args = $prep_h->{exec_args}; if (!defined($include_metadata)) { $include_metadata = $self->include_metadata; } # TODO - make this event based so we don't have to # load all into memory my $rows = $self->dbh->selectall_arrayref($sth, undef, @$exec_args); if ($return_arrayref) { my @hdrs = (); for (my $i=0; $i<@$cols; $i++) { my $h = $prep_h->{col_aliases_ordered}->[$i] || $cols->[$i]; push(@hdrs, $h); } return [\@hdrs, @$rows]; } trace(0, sprintf("Got %d rows\n", scalar(@$rows))) if $TRACE; # --- reconstruct tree from relations my $stag = $self->reconstruct( -rows=>$rows, -cols=>$cols, -alias=>$prep_h->{alias}, -nesting=>$prep_h->{nesting}, -aliaspolicy=>$aliaspolicy, ); if ($include_metadata) { my ($last_sql, @sql_args) = @{$self->last_sql_and_args || []}; my @kids = $stag->kids; my @bind_nodes; if ($bind && ref($bind) eq 'HASH') { @bind_nodes = (stag_unflatten(argset=>[%$bind])); } unshift(@kids, [dbstag_metadata=>[ [sql=>$last_sql], [nesting=>$nesting], [template=>$template], @bind_nodes, (map {[exec_arg=>$_]} @sql_args) ]]); $stag->kids(@kids); } return $stag; } sub prepare_stag { my $self = shift; my ($sql, $nesting, $bind, $template, $return_arrayref, $aliaspolicy) = rearrange([qw(sql nesting bind template return_arrayref aliaspolicy)], @_); my $parser = $self->parser; my $sth; my @exec_args = (); if (ref($sql)) { $template = $sql; } if ($template) { if (!ref($template)) { $template = $self->find_template($template); } ($sql, @exec_args) = $template->get_sql_and_args($bind); } trace 0, "parsing_sql: $sql\n"; # PRE-parse SQL statement for stag-specific extensions if ($sql =~ /(.*)\s+use\s+nesting\s*(.*)/si) { my ($pre, $post) = ($1, $2); my ($extracted, $remainder) = extract_bracketed($post, '()'); if ($nesting) { $self->throw("nestings clash: $nesting vs $extracted"); } $nesting = Data::Stag->parsestr($extracted); $sql = "$pre $remainder"; } # get the parsed SQL SELECT statement as a stag node my $stmt = $parser->selectstmt($sql); if (!$stmt) { # there was some error parsing the SQL; # DBI can probably give a better explanation. eval { my $sth = $self->dbh->prepare($sql); }; if ($@) { $self->throw("SQL ERROR:\n$@"); } # DBI accepted it - must be a bug in the DBStag grammar $self->throw("I'm sorry but the SQL statement you gave does\n". "not conform to the more limited subset of SQL\n". "that DBStag supports. Please see the DBStag docs\n". "for details.\n". "\n". "Remember to check you explicitly declare all aliases\n". "using AS\n\n\nSQL:$sql"); } trace 0, "parsed_sql: $sql\n"; # trace 0, $stmt->xml; my $dbschema = $self->dbschema; $self->last_stmt($stmt); # stag node of FROM part of SQL my $fromstruct = $stmt->get_from; # --- aliases --- # keep a hash of table aliases # KEY: table alias # VAL: base table # for example, 'SELECT * FROM person AS p' # will result in $alias_h = { p => person } my $alias_h = {}; # build alias hash using FROM node foreach my $sn ($fromstruct->subnodes) { get_table_alias_map($sn, $alias_h); } # as well as an alias hash map, # keep an array of stag nodes representing all the aliases my @aliases = (); foreach my $alias (keys %$alias_h) { push(@aliases, Data::Stag->new(alias=>[ [name=>$alias], [table=>$alias_h->{$alias}->[0]] ])); } my $aliasstruct = Data::Stag->new(alias=>[@aliases]); # --- nestings --- # # the cartesian product that results from a SELECT can # be turned into a tree - there is more than one tree to # choose from; eg with "x NJ y NJ z" we can have trees: # [x [y [z]]] # [x [y z]] # [z [x y]] # etc # # the actual allowed nestings through the graph is constrained # by the FK relationships; we do not utilise this yet (TODO!) # later the user need only specify the root. for now they # must specify the full nesting OR allow the bracket structure # of the joins... # if the user did not explicitly supply a nesting, # guess one from the bracket structure of the FROM # clause (see rmake_nesting) # [TODO: be more clever in guessing the nesting using FKs] if (!$nesting) { $nesting = Data::Stag->new(top=>1); # my $cons = rmake_cons($fromstruct->data->[0], $nesting); $nesting = rmake_nesting($fromstruct->data->[0]); $nesting = Data::Stag->new(top=>[$nesting]); trace(0, "\n\nNesting:\n%s\n\n", $nesting->xml) if $TRACE; } if ($nesting && !ref($nesting)) { $nesting = Data::Stag->parsestr($nesting); } # keep an array of named relations used in the query - # the named relation is the alias if present; # eg # SELECT * FROM person AS p NATURAL JOIN department # the named relations here are 'p' and 'department' my @namedrelations = (); $fromstruct->iterate(sub { my $n = shift; if ($n->element eq 'leaf') { my $v = $n->sget_alias || $n->sget_name; push(@namedrelations, $v) } }); # --- fetch columns --- # # loop through all the columns in the SELECT clause # making them all of a standard form; eg dealing # with functions and '*' wildcards appropriately my @col_aliases_ordered = (); my @cols = map { # $_ iterator variable is over the columns # specified in the SELECT part of the query; # each column is represented as a stag node # column name my $name = $_->get_name; # column alias, if exists # eg in 'SELECT name AS n' the alias is 'n' my $col_alias = $_->get_alias; push(@col_aliases_ordered, $col_alias); # make the name the alias; prepend named relation if supplied. # eg in 'SELECT person.name AS n' the name will become # 'person.n' if ($col_alias) { $name = $col_alias; if ($_->get_table) { $name = $_->get_table . '.'. $name; } } my $func = $_->getnode('func'); # from here on determines returned value of the # map iteration: if ($func) { # a typical column node for a function looks like # this: # # (col # (func # (name "somefunc") # (args # (col # (name "x.foo") # (table "x"))))) # (alias "myname")) # # if a function is included, and the function # return value is aliased, use that alias; # otherwise ... my $funcname = $func->get_name; # query the function stag node for the element # 'col' my ($col) = $func->where('col', sub {shift->get_table}); my $table = $col_alias || $funcname; if (!$col_alias) { $col_alias = $funcname; } if ($col) { $table = $col->get_table; } # if ($col_alias =~ /(\w+)__(\w+)/) { # $table = $1; # $col_alias = $2; # } $name = $table . '.' . $col_alias; # return: $name; } elsif ($name =~ /^(\w+)\.\*$/) { # if the column name is of the form # RELATION.*, then replace the * with # all the actual columns from the base relation # RELATION # # the final result will be TABLE.col1, TABLE.col2,... my $tn = $1; my $tn_alias = $tn; # use base relation name to introspect schema if ($alias_h->{$tn}) { $tn = $alias_h->{$tn}->[0]; } my $tbl = $dbschema->table(lc($tn)); if (!$tbl) { confess("No such table as $tn"); } # introspect schema to get columns for this table my @cns = $tbl->columns; # trace(0, Dumper $tbl) if $TRACE; trace(0, "TN:$tn ALIAS:$tn_alias COLS:@cns") if $TRACE; # return: map { "$tn_alias.$_" } @cns; } elsif ($name =~ /^\*$/) { # if the column name is '*' (ie select all) # then replace the * with # all the actual columns from the base relations in # the query (use FROM clause) # my %got = (); my @allcols = map { my $tn = $_; my $baserelname = $alias_h->{$tn} ? $alias_h->{$tn}->[0] : $tn; my $tbl = $dbschema->table(lc($baserelname)); if (!$tbl) { confess("Don't know anything about table:$tn\n". "Maybe DBIx::DBSchema does not work for your DBMS?\n". "If $tn is a view, you may need to modify DBIxLLDBSchema"); } my @cns = $tbl->columns; # @cns = grep { !$got{$_}++ } @cns; map { "$tn.$_"} @cns; } @namedrelations; # This is a bit hacky; if the user specifies # SELECT * FROM... then there is no way # to introspect the actual column returned # using DBI->selectall_arrayref # # maybe we should selectall_hashref # instead? this is generally slower; also # even if we get it with a hashref, the # result can be ambiguous since DBI only # gives us the colun names back # # to get round this we just replace the * # in the user's query (ie in the actual SQL) # with the full column list my $replace = join(', ', @allcols); # rewrite SQL statement; assum only one instance of # string '*' in these cases $sql =~ s/\*/$replace/; # return: @allcols; } else { # no * wildcard in column, and not a function; # just give back the node # return: $name } } $stmt->sgetnode_cols->getnode_col; @cols = map { if (/(\w+)__(\w+)/) { "$1.$2"; } else { $_ } } @cols; # ---- end of column fetching --- trace(0, "COLS:@cols") if $TRACE; # --- execute SQL SELECT statement --- if ($template) { $sth = $template->cached_sth->{$sql}; if (!$sth) { $sth = $self->dbh->prepare($sql); $template->cached_sth->{$sql} = $sth; } # ($sql, $sth, @exec_args) = # $template->prepare($self->dbh, $bind); } else { $sth = $self->dbh->prepare($sql); } my $sql_or_sth = $sql; if ($sth) { $sql_or_sth = $sth; } trace(0, "SQL:$sql") if $TRACE; trace(0, "Exec_args: @exec_args") if $TRACE && @exec_args; $self->last_sql_and_args([$sql, @exec_args]); return { sth=>$sth, exec_args=>\@exec_args, cols=>\@cols, col_aliases_ordered=>\@col_aliases_ordered, alias=>$aliasstruct, nesting=>$nesting }; } # ============================ # get_table_alias_map(tablenode, alias hash) # # checks a tablenode (eg the stag representing # a table construct in the FROM clause) and adds # it to the alias hash if it specifies an alias # ============================ sub get_table_alias_map { my $s = shift; my $h = shift; # the FROM clause is natively stored as a binary tree # (in order to group the joins by brackets) - recursively # descend building the hash map if ($s->name eq 'leaf') { my $alias = $s->get_alias; if ($alias) { $h->{$alias} = [$s->get_name]; } return ($s->get_name); } elsif ($s->name eq 'composite') { my ($first, $second) = ($s->getnode_first, $s->getnode_second); my $alias = $s->get_alias; my @sn = ($first->subnodes, $second->subnodes); my @subtbls = map { get_table_alias_map($_, $h), } @sn; if ($alias) { $h->{$alias} = [@subtbls]; } return @subtbls; } else { confess $s->name; } } # ============================ # reconstruct(schema, rows, top, cols, constraints, nesting, aliasstruct) # # mainly called by: selectall_stag(...) # # takes an array of rows (ie the result of an SQL query, probably # involving JOINs, which is a denormalised relation) and # decomposes this relation into a tree structure # # in order to do this, it requires schema information, and a nesting # through the implicit result graph to build a tree # ============================ sub reconstruct { my $self = shift; my $tree = Data::Stag->new(); my ($schema, # OPTIONAL - meta data on relation $rows, # REQUIRED - relation R - array-of-array $top, # OPTIONAL - root node name $cols, # REQUIRED - array of stag nodes per column of R $constraints, # NOT USED!!! $nesting, # REQUIRED - tree representing decomposed schema $aliasstruct, # OPTIONAL - renaming of columns in R $aliaspolicy) = rearrange([qw(schema rows top cols constraints nesting alias aliaspolicy)], @_); $aliaspolicy = 'nest' unless $aliaspolicy; # --- get the schema --- # # $schema is a stag representing the schema # of the input releation R (not the schema of # the db that produced it.... hmm, this could # be misleading) # # it conforms to the following stag-struct: # #'(schema # (top? "RECORDSET-ELEMENT-NAME") # (cols? # (col+ # (relation "RELATION-NAME") # (name "COLUMN-NAME") # )) # (nesting? # (* "NESTING-TREE"))) # # each column represents the if (!$schema) { $schema = $tree->new(schema=>[]); } if (!ref($schema)) { # it is a string - parse it # (assume sxpr) $schema = $tree->from('sxprstr', $schema); } # TOP - this is the element name # to group the structs under. # [override if specified explicitly] if ($top) { stag_set($schema, 'top', $top); } # $top = $schema->get_top || "set"; if (!$top) { if ($nesting) { # use first element in nesting $top = $nesting->element; } else { $top = 'set'; } } my $topstruct = $tree->new($top, []); # COLS - this is the columns (attribute names) # in the order they appear # [override if specified explicitly] if ($cols) { my @ncols = map { if (ref($_)) { $_ } else { # presume it's a string # format = RELATION.ATTRIBUTENAME if (/(\w+)\.(\w+)/) { $tree->new(col=>[ [relation=>$1], [name=>$2]]); } elsif (/(\w+)/) { confess("Not implemented yet - must specify tbl for $_"); $tree->new(col=>[ [relation=>'unknown'], [name=>$2]]); } else { confess "I am confused by this column: $_"; } } } @$cols; $schema->set_cols([@ncols]); } # NESTING - this is the tree structure in # which the relations are structured # [override if specified explicitly] if ($nesting) { if (ref($nesting)) { } else { $nesting = $tree->from('sxprstr', $nesting); } $schema->set_nesting([$nesting]); } else { $nesting = $schema->sgetnode_nesting; } if (!$nesting) { confess("no nesting!"); } # --- alias structure --- # # use this to get a hash map of alias => baserelation ($aliasstruct) = $schema->getnode_aliases unless $aliasstruct; if ($aliasstruct && !ref($aliasstruct)) { $aliasstruct = $tree->from('sxprstr', $aliasstruct); } my @aliases = (); if ($aliasstruct && $aliaspolicy !~ /^a/i) { @aliases = $aliasstruct->getnode_alias; } my %alias2baserelation = map { $_->sget_name => $_->sget_table } @aliases; # column headings; (ie all columns in R) my @cols = $schema->sgetnode_cols->getnode_col(); # --- primary key info --- # set the primary key for each relation (one per relation); # the default is *all* the columns in that relation my %pkey_by_relationname = (); # eg {person => [person_id] my %cols_by_relationname = (); # eg {person => [person_id, fname, lname] # loop through all columns in R, setting above hash maps foreach my $col (@cols) { # the stag struct for each $col looks like this: # # (col+ # (relation "RELATION-NAME") # (name "COLUMN-NAME") # )) my $relationname = $col->get_relation; my $colname = $col->get_name; # pkey defaults to all columns in a relation # (we may override this later) $pkey_by_relationname{$relationname} = [] unless $pkey_by_relationname{$relationname}; push(@{$pkey_by_relationname{$relationname}}, $colname); # all columns in a relation # (note: same as default PK) $cols_by_relationname{$relationname} = [] unless $cols_by_relationname{$relationname}; push(@{$cols_by_relationname{$relationname}}, $colname); } my @relationnames = keys %pkey_by_relationname; # override PK if explicitly set as a constraint my @pks = $schema->findnode("primarykey"); foreach my $pk (@pks) { # $pk looks like this: # # '(primarykey # (relation "R-NAME") # (col+ "COL-NAME")) my $relationname = $pk->get_relation; my @cols = $pk->get_col; # the hash %pkey_by_relationname should # be keyed by the named relations, not the # base relations my @aliasnames = grep { $alias2baserelation{$_} eq $relationname } keys %alias2baserelation; # relation is not aliased if (!@aliasnames) { @aliasnames = ($relationname); } foreach (@aliasnames) { $pkey_by_relationname{$_} = [@cols]; } } # ------------------ # # loop through denormalised rows, # putting the columns into their # respecive relations # # eg # # <----- a -----> <-- b --> # a.1 a.2 a.3 b.1 b.2 # # algorithm: # use nesting/tree to walk through # # ------------------ #~~~ keep a hash of all relations by their primary key vals #~~~ outer key = relationname #~~~ inner key = pkval #~~~ hash val = relation structure #~~~ my %all_relation_hh = (); #~~~ foreach my $relationname (@relationnames) { #~~~ $all_relation_hh{$relationname} = {}; #~~~ } #~~~ keep an array of all relations #~~~ outer key = relationname #~~~ inner array = ordered list of relations #~~~ my %all_relation_ah = (); #~~~ foreach my $relationname (keys %pkey_by_relationname) { #~~~ $all_relation_ah{$relationname} = []; #~~~ } # start at top of nesting tree # # a typical nesting tree may look like this: # # '(tableA # (tableB "1") # (tableC # (tableD "1"))) # # terminals ie "1" are ignored my ($first_in_nesting) = $nesting->subnodes; if (!$first_in_nesting) { $first_in_nesting = $nesting; } my $fipname = $first_in_nesting ? $first_in_nesting->name : ''; # recursive hash representing tree # # $record = # {child_h => { # $relation_name* => { # $pk_val => $record # } # }, # struct => $stag_obj # } # # this is recursively constructed using the make_a_tree() method # below. the nesting tree (see above) is traversed depth first, # constructing both the child_h hash and the resulting Stag # structure. my $top_record_h = { child_h=>{ $fipname ? ($fipname=>{}) : () }, struct=>$topstruct }; # loop through rows in R foreach my $row (@$rows) { my @colvals = @$row; # keep a record of all table names in # this row from R my %current_relation_h = (); for (my $i=0; $i<@cols; $i++) { my $colval = $colvals[$i]; my $col = $cols[$i]; my $relationname = $col->get_relation; my $colname = $col->get_name; my $relation = $current_relation_h{$relationname}; if (!$relation) { $relation = {}; $current_relation_h{$relationname} = $relation; } $relation->{$colname} = $colval; } # print "ROW=@$row\n"; # dmp(\%pkey_by_relationname); # dmp($top_record_h); # we now have a hash of hashes - # outer keyed by relation id # inner keyed by relation attribute name # traverse depth first down nesting; # add new nodes as children of the parent $self->make_a_tree($tree, $top_record_h, $first_in_nesting, \%current_relation_h, \%pkey_by_relationname, \%cols_by_relationname, \%alias2baserelation, $aliaspolicy); } return $topstruct; } *norm = \&reconstruct; *normalise = \&reconstruct; *normalize = \&reconstruct; # ============================ # make_a_tree(...) RECURSIVE # # called by: reconstruct(...) # # ============================ sub make_a_tree { my $self = shift; my $tree = shift; my $parent_rec_h = shift; my $node = shift; my %current_relation_h= %{shift ||{}}; my %pkey_by_relationname = %{shift ||{}}; my %cols_by_relationname = %{shift ||{}}; my %alias2baserelation = %{shift ||{}}; my $aliaspolicy = shift; my $relationname = $node->name; my $relationrec = $current_relation_h{$relationname}; my $pkcols = $pkey_by_relationname{$relationname}; my $rec; # this is the next node down in the hash tree if (!$pkcols || !@$pkcols) { # if we have no columns for a particular part of # the nesting through the relation, it means it # was ommitted from the select clause - just skip # this part of the nesting. # # for example: SELECT a.*, b.* FROM a NJ a_to_b NJ b # the default nesting will be: [a [a_to_b [b]]] # the relation R will have columns: # a.c1 a.c2 b.c1 b.c2 # # we want to build a resulting structure like this: # (a # (c1 "x") (c2 "y") # (b # (c1 "a") (c2 "b"))) # # so we just miss out a_to_b in the nesting, because it # has no columns in the relation R. $rec = $parent_rec_h; } else { my $pkval = CORE::join("\t", map { esctab($relationrec->{$_} || '') } @$pkcols); $rec = $parent_rec_h->{child_h}->{$relationname}->{$pkval}; if (!$rec) { my $relationcols = $cols_by_relationname{$relationname}; my $has_non_null_val = grep {defined($relationrec->{$_})} @$relationcols; return unless $has_non_null_val; my $relationstruct = $tree->new($relationname=>[ map { defined($relationrec->{$_}) ? [$_ => $relationrec->{$_}] : () } @$relationcols ]); my $parent_relationstruct = $parent_rec_h->{struct}; if (!$parent_relationstruct) { confess("no parent for $relationname"); } # if we have an aliased relation, add an extra # level of nesting my $baserelation = $alias2baserelation{$relationname}; if ($baserelation) { # $aliaspolicy eq 'nest' or 't' # nest base relations inside an alias node # OR use table name in place of alias name if ($aliaspolicy =~ /^t/i) { stag_add($parent_relationstruct, $baserelation, $relationstruct->data); } else { # nest my $baserelationstruct = Data::Stag->new($baserelation => $relationstruct->data); stag_add($parent_relationstruct, $relationname, [$baserelationstruct]); } } else { # either no aliases, or $aliaspolicy eq 'a' # (in which case columns already mapped to aliases) stag_add($parent_relationstruct, $relationstruct->name, $relationstruct->data); } $rec = {struct=>$relationstruct, child_h=>{}}; foreach ($node->subnodes) { # keep index of children by PK $rec->{child_h}->{$_->name} = {}; } $parent_rec_h->{child_h}->{$relationname}->{$pkval} = $rec; } } foreach ($node->subnodes) { $self->make_a_tree($tree, $rec, $_, \%current_relation_h, \%pkey_by_relationname, \%cols_by_relationname, \%alias2baserelation, $aliaspolicy); } } # -------- GENERAL SUBS ----------- sub esctab { my $w=shift; $w =~ s/\t/__MAGICTAB__/g; $w; } sub makesql { my $self = shift; my ($table, $where, $select, $order, $group, $distinct) = rearrange([qw(table where select order group distinct)], @_); confess("must specify table") unless $table; # array of tables if (ref($table)) { if (ref($table) eq "HASH") { $table = [ map { "$table->{$_} AS $_" } keys %$table ]; } } else { $table = [$table]; } $where = [] unless $where; # array of ANDed where clauses if (ref($where)) { if (ref($where) eq "HASH") { $where = [ map { "$_ = ".$self->quote($where->{$_}) } keys %$where ]; } } else { $where = [$where]; } $select = ['*'] unless $select; # array of SELECT cols if (ref($select)) { if (ref($select) eq "HASH") { $select = [ map { "$select->{$_} AS $_" } keys %$select ]; } } else { $select = [$select]; } $order = [] unless $order; # array of order tables if (ref($order)) { if (ref($order) eq "HASH") { confess("order must be an array"); } } else { $order = [$order]; } $group = [] unless $group; # array of group tables if (ref($group)) { if (ref($group) eq "HASH") { confess("group must be an array"); } } else { $group = [$group]; } $distinct = $distinct ? '' : ' DISTINCT'; my $sql = sprintf("SELECT%s %s FROM %s%s%s", $distinct, join(', ', @$select), join(', ', @$table), (scalar(@$where) ? ' WHERE '.join(' AND ', @$where) : ''), (scalar(@$group) ? ' GROUP BY '.join(', ', @$group) : ''), (scalar(@$order) ? ' ORDER BY '.join(', ', @$order) : ''), ); return $sql; } sub selectval { my $self = shift; trace(0, "@_") if $TRACE; return $self->dbh->selectcol_arrayref(@_)->[0]; } sub insertrow { my $self = shift; my ($table, $colvalh, $pkcol) = @_; my $driver = $self->dbh->{Driver}->{Name}; my @cols = keys %$colvalh; my @vals = map { defined($_) ? $colvalh->{$_} : undef } @cols; my @placeholders = map { '?' } @vals; my $sql = sprintf("INSERT INTO %s (%s) VALUES (%s)", $table, join(", ", @cols), #join(", ", @vals), join(", ", @placeholders), ); if (!@cols) { $sql = "INSERT INTO $table DEFAULT VALUES"; } trace(0, "SQL:$sql") if $TRACE; my $succeeded = 0; eval { my $sth = $self->dbh->prepare($sql); my $rval = $sth->execute(@vals); $succeeded = 1 if defined $rval; }; if ($@) { if ($self->force) { # what about transactions?? $self->warn("IN SQL: $sql\nWARNING: $@\n"); return; } else { confess $@; } } return unless $succeeded; my $pkval; if ($pkcol) { # primary key value may have been specified in the xml # (this is necessary for non-surrogate pks in tables that # are to be linked to via foreign keys) $pkval = $colvalh->{$pkcol}; # pk was not supplied - perhaps this is a SERIAL/AUTO_INCREMENT # column (ie surrogate integer primary key) if (!$pkval) { # assume pk is a SERIAL / AUTO_INCREMENT if ($driver eq 'Pg') { my $seqn = sprintf("%s_%s_seq", $table, $pkcol); $pkval = $self->selectval("select currval('$seqn')"); trace(0, "CURRVAL $seqn = $pkval [Pg]") if $TRACE; } # this doesn't work on older # versions of DBI/DBD::mysql # seems to have been fixed Oct 2004 release elsif ($driver eq 'mysql') { $pkval = $self->dbh->last_insert_id(undef,undef,$table,$pkcol); trace(0, "CURRVAL mysql_insert_id $pkval [mysql]") if $TRACE; } else { $pkval = $self->selectval("select max($pkcol) from $table"); } } trace(0, "PKVAL = $pkval") if $TRACE; } return $pkval; } sub updaterow { my $self = shift; my ($table, $set, $where) = @_; confess("must specify table") unless $table; my $dbh = $self->dbh; # array of WHERE cols if (ref($where)) { if (ref($where) eq "HASH") { $where = [ map { "$_ = ".$dbh->quote($where->{$_}) } keys %$where ]; } } else { $where = [$where]; } confess("must specify constraints") unless @$where; confess("must set update vals") unless $set; my @bind = (); # array of SET colvals if (ref($set)) { if (ref($set) eq "HASH") { $set = [ map { push(@bind, defined $set->{$_} ? $set->{$_} : 'NULL'); "$_ = ?" } keys %$set ]; } } else { $set = [$set]; } my $sql = sprintf("UPDATE %s SET %s WHERE %s", $table, join(', ', @$set), join(' AND ', @$where), ); trace(0, "SQL:$sql [",join(', ',@bind)."]") if $TRACE; my $sth = $dbh->prepare($sql) || confess($sql."\n\t".$dbh->errstr); return $sth->execute(@bind) || confess($sql."\n\t".$sth->errstr); } sub deleterow { my $self = shift; my ($table, $where) = @_; confess("must specify table") unless $table; my $dbh = $self->dbh; # array of WHERE cols if (ref($where)) { if (ref($where) eq "HASH") { $where = [ map { "$_ = ".$dbh->quote($where->{$_}) } keys %$where ]; } } else { $where = [$where]; } confess("must specify constraints") unless @$where; my $sql = sprintf("DELETE FROM %s WHERE %s", $table, join(' AND ', @$where), ); trace(0, "SQL:$sql") if $TRACE; my $sth = $dbh->prepare($sql) || confess($sql."\n\t".$dbh->errstr); return $sth->execute() || confess($sql."\n\t".$sth->errstr); } #$::RD_HINT = 1; $::RD_AUTOACTION = q { [@item] }; sub selectgrammar { return q[ { use Data::Dumper; use Data::Stag; sub N { Data::Stag->new(@_); } } ] . q[ selectstmts: selectstmt ';' selectstmts selectstmts: selectstmt # selectstmt: /select/i selectcols /from/i fromtables selectstmt: /select/i selectq(?) selectcols /from/i fromtables where(?) group(?) having(?) combiner(?) order(?) limit(?) offset(?) { N(select => [ [qual => $item{'selectq'}[0]], [cols => $item[3]], [from => $item[5]], # [where => $item[6]], # [group => $item{'group'}[0]], # [having => $item{'having'}[0]], ]); } | selectq: /all/i | /distinct/i { $item[1] } | # as: /\s+as\s+/i as: /as/i selectcols: selectexpr /\,/ selectcols { [$item[1], @{$item[3]}] } | selectcols: selectexpr { [$item[1]] } | selectexpr: bselectexpr as aliasname { my $col = $item{bselectexpr}; $col->set_alias($item{aliasname}->[1]); $col; } | selectexpr: bselectexpr { $item[1] } | bselectexpr: funccall { $item[1] } | bselectexpr: selectcol { $item[1] } | selectcol: brackselectcol operator selectcol { N(col=>[ [func => [ [name => $item[2]->[1]], [args => [$item[1],$item[3]]] ] ] ]); } ### { $item[1]} | selectcol: brackselectcol { $item[1]} | brackselectcol: '(' selectcol ')' { $item[2]} | brackselectcol: bselectcol { $item[1]} | bselectcol: /(\w+)\.(\w+)/ { N(col=>[ [name => $item[1]], [table=>$1], ]) } | bselectcol: /(\w+)\.\*/ { N(col=>[ [name => $item[1]], [table=>$1], ]) } | bselectcol: /\*/ { N(col=>[ [name => $item[1]] ]) } | bselectcol: /\w+/ { N(col=>[ [name => $item[1]] ]) } | bselectcol: expr { N(col=>[ [expr => $item[1]] ]) } | funccall: funcname '(' distinct(?) selectcols ')' { my $col = N(col=>[ [func => [ [name => $item[1]->[1]], [args => $item[4]] ] ] ]); $col; } | distinct: /distinct/i operator: '+' | '-' | '*' | '/' | '||' fromtables: jtable { [$item[1]] } | jtable: join_jtable { $item[1] } | join_jtable: qual_jtable jointype join_jtable { shift @{$item[2]}; my $j = N(composite=>[ [ctype=>"@{$item[2]}"], [first=>[$item[1]]], [second=>[$item[3]]] ]); $j; } | join_jtable: qual_jtable { $item[1] } | qual_jtable: alias_jtable joinqual { my $j = $item[1]; $j->setnode_qual($item[2]); $j; } | qual_jtable: alias_jtable { $item[1] } | alias_jtable: brack_jtable /as\s+/i aliasname { my $j = $item[1]; $j->set_alias($item[3][1]); $j; } | alias_jtable: brack_jtable { $item[1] } | brack_jtable: '(' jtable ')' { $item[2] } | brack_jtable: table { N(leaf=>[[name=>$item[1]->[1]]]) } | joinqual: /on\s+/i bool_expr { N(qual => [ [type=>'on'], [expr=>"@{$item[2]}"] ]) } | joinqual: /using\s+/i '(' cols ')' { N(qual =>[ [type=>'using'], [expr=>"@{$item[3]}"] ]) } | table: tablename { $item[1] } | funcname: /\w+/ tablename: /\w+/ aliasname: /\w+/ cols: col(s) col: /\w+\.\w+/ col: /\w+/ jointype: /\,/ jointype: /natural/i bjointype /join/i jointype: /natural/i /join/i jointype: bjointype /join/i jointype: /join/i bjointype: /inner/i bjointype: lrf(?) /outer/i lrf: /left/i | /right/i | /full/i bjointype: /cross/i number: float | int float: /\d*\.?\d+/ 'e' sign int float: /\d*\.\d+/ int: /\d+/ string: /\'.*?\'/ sign: '+' | '-' exprs: '(' exprs ')' exprs: expr ',' exprs exprs: expr # bool_expr - eg in where clause bool_expr: not_bool_expr boolop bool_expr | not_bool_expr not_bool_expr: '!' brack_bool_expr | brack_bool_expr brack_bool_expr: '(' bool_expr ')' | bool_exprprim bool_exprprim: boolval | expr boolval: /true/i | /false/i | /null/i expr: brack_expr op expr | brack_expr brack_expr: '(' expr ')' | exprprim exprprim: col | val val: number | string op: /not\s+/i /like\s+/i op: /like\s+/i op: /is\s+/i /not\s+/i op: /is\s+/i op: '=' | '!=' | '<>' | '<=' | '>=' | '<' | '>' boolop: /and\s+/i | /or\s+/i | /not\s+/i # where: /where/i /.*/ where: /where/i bool_expr group: /group/i /by/i exprs having: /having/i /.*/ combiner: combinekwd selectstmt combinekwd: /union/i | /intersect/i | /update/i order: /order/i /by/i orderexprs orderexprs: orderexpr ',' orderexprs orderexprs: orderexpr orderexpr: expr /asc/i orderexpr: expr /desc/i orderexpr: expr /using/i op orderexpr: expr limit: /limit/i /\w+/ offset: /offset/i /\d+/ ]; } no strict 'refs'; sub AUTOLOAD { my $self = shift; my @args = @_; my $name = $AUTOLOAD; $name =~ s/.*://; # strip fully-qualified portion if ($name eq "DESTROY") { # we dont want to propagate this!! return; } unless ($self->isa("DBIx::DBStag")) { confess("no such subroutine $name"); } if ($self->dbh) { if ($TRACE) { # the following check may impair performance if (grep { ref($_) } @args) { $self->throw("cannot quote @args"); } } if ($self->dbh->can($name)) { return $self->dbh->$name(@args); } } confess("no such method:$name)"); } sub rearrange { my($order,@param) = @_; # If there are no parameters, we simply wish to return # an undef array which is the size of the @{$order} array. return (undef) x $#{$order} unless @param; # If we've got parameters, we need to check to see whether # they are named or simply listed. If they are listed, we # can just return them. return @param unless (defined($param[0]) && $param[0]=~/^-/); # Now we've got to do some work on the named parameters. # The next few lines strip out the '-' characters which # preceed the keys, and capitalizes them. my $i; for ($i=0;$i<@param;$i+=2) { if (!defined($param[$i])) { cluck("Hmmm in $i ".CORE::join(";", @param)." == ".CORE::join(";",@$order)."\n"); } else { $param[$i]=~s/^\-//; $param[$i]=~tr/a-z/A-Z/; } } # Now we'll convert the @params variable into an associative array. my(%param) = @param; my(@return_array); # What we intend to do is loop through the @{$order} variable, # and for each value, we use that as a key into our associative # array, pushing the value at that key onto our return array. my($key); foreach $key (@{$order}) { $key=~tr/a-z/A-Z/; my($value) = $param{$key}; delete $param{$key}; push(@return_array,$value); } # catch user misspellings resulting in unrecognized names my(@restkeys) = keys %param; if (scalar(@restkeys) > 0) { confess("@restkeys not processed in rearrange(), did you use a non-recognized parameter name ? "); } return @return_array; } #sub loadschema { # my $self = shift; # my ($ddl, $ddlf, $dialect) = # rearrange([qw(ddl ddlf dialect)], @_); # if ($ddlf) { # my $fh = FileHandle->new($ddlf) || $self->throw("no file $ddlf"); # $ddl = join('',<$fh>); # $fh->close; # } # $self->throw("no DDL") unless $ddl; # if ($dialect) { # my $driver = $self->{_driver} || 'Pg'; # if ($driver ne $dialect) { # } # } #} 1; __END__ =head1 NAME DBIx::DBStag - Relational Database to Hierarchical (Stag/XML) Mapping =head1 SYNOPSIS use DBIx::DBStag; my $dbh = DBIx::DBStag->connect("dbi:Pg:dbname=moviedb"); my $sql = q[ SELECT studio.*, movie.*, star.* FROM studio NATURAL JOIN movie NATURAL JOIN movie_to_star NATURAL JOIN star WHERE movie.genre = 'sci-fi' AND star.lastname = 'Fisher' USE NESTING (set(studio(movie(star)))) ]; my $dataset = $dbh->selectall_stag($sql); my @studios = $dataset->get_studio; # returns nested data that looks like this - # # (studio # (name "20th C Fox") # (movie # (name "star wars") (genre "sci-fi") # (star # (firstname "Carrie")(lastname "Fisher"))))) # iterate through result tree - foreach my $studio (@studios) { printf "STUDIO: %s\n", $studio->get_name; my @movies = $studio->get_movie; foreach my $movie (@movies) { printf " MOVIE: %s (genre:%s)\n", $movie->get_name, $movie->get_genre; my @stars = $movie->get_star; foreach my $star (@stars) { printf " STARRING: %s:%s\n", $star->get_firstname, $star->get_lastname; } } } # manipulate data then store it back in the database my @allstars = $dataset->get("movie/studio/star"); $_->set_fullname($_->get_firstname.' '.$_->get_lastname) foreach(@allstars); $dbh->storenode($dataset); exit 0; Or from the command line: unix> selectall_xml.pl -d 'dbi:Pg:dbname=moviebase' \ 'SELECT * FROM studio NATURAL JOIN movie NATURAL \ JOIN movie_to_star NATURAL JOIN star \ USE NESTING (set(studio(movie(star))))' Or using a predefined template: unix> selectall_xml.pl -d moviebase /mdb-movie genre=sci-fi =cut =head1 DESCRIPTION This module is for mapping between relational databases and Stag objects (Structured Tags - see L). Stag objects can also be represented as XML. The module has two main uses: =over =item Querying This module can take the results of any SQL query and decompose the flattened results into a tree data structure which reflects the foreign keys in the underlying relational schema. It does this by looking at the SQL query and introspecting the database schema, rather than requiring metadata or an object model. In this respect, the module works just like a regular L handle, with a few extra methods. Queries can also make use of predefined B =item Storing Data DBStag objects can store any tree-like datastructure (such as XML documents) into a database using normalized schema that reflects the structure of the tree being stored. This is done using little or no metadata. XML can also be imported, and a relational schema automatically generated. =back For a tutorial on using DBStag to build and query relational databases from XML sources, please see L =head2 HOW QUERY RESULTS ARE TURNED INTO STAG/XML This is a general overview of the rules for turning SQL query results into a tree like data structure. You don't need to understand all these rules to be able to use this module - you can experiment by using the B script which comes with this distribution. =head3 Mapping Relations Relations (i.e. tables and views) are elements (nodes) in the tree. The elements have the same name as the relation in the database. These nodes are always non-terminal (ie they always have child nodes) =head3 Mapping Columns Table and view columns of a relation are sub-elements of the table or view to which they belong. These elements will be B (i.e. terminal nodes). Only the columns selected in the SQL query will be present. For example, the following query SELECT name, job FROM person; will return a data structure that looks like this: (set (person (name "fred") (job "forklift driver")) (person (name "joe") (job "steamroller mechanic"))) The data is shown as a lisp-style S-Expression - it can also be expressed as XML, or manipulated as an object within perl. =head3 Handling table aliases If an ALIAS is used in the FROM part of the SQL query, the relation element will be nested inside an element with the same name as the alias. For instance, the query SELECT name FROM person AS author WHERE job = 'author'; Will return a data structure like this: (set (author (person (name "Philip K Dick")))) The underlying assumption is that aliasing is used for a purpose in the original query; for instance, to determine the context of the relation where it may be ambiguous. SELECT * FROM person AS employee INNER JOIN person AS boss ON (employee.boss_id = boss.person_id) Will generate a nested result structure similar to this - (set (employee (person (person_id "...") (name "...") (salary "...") (boss (person (person_id "...") (name "...") (salary "...")))))) If we neglected the alias, we would have 'person' directly nested under 'person', and the meaning would not be obvious. Note how the contents of the SQL query dynamically modifies the schema/structure of the result tree. =head3 NOTE ON SQL SYNTAX Right now, DBStag is fussy about how you specify aliases; you must use B - you must say SELECT name FROM person AS author; instead of SELECT name FROM person author; =head3 Nesting of relations The main utility of querying using this module is in retrieving the nested relation elements from the flattened query results. Given a query over relations A, B, C, D,... there are a number of possible tree structures. Not all of the tree structures are meaningful or useful. Usually it will make no sense to nest A under B if there is no foreign key relationship linking either A to B, or B to A. This is not always the case - it may be desirable to nest A under B if there is an intermediate linking table that is required at the relational level but not required in the tree structure. DBStag will guess a structure/schema based on the ordering of the relations in your FROM clause. However, this guess can be over-ridden at either the SQL level (using DBStag specific SQL extensions) or at the API level. The default algorithm is to nest each relation element under the relation element preceeding it in the FROM clause; for instance: SELECT * FROM a NATURAL JOIN b NATURAL JOIN c If there are appropriately named foreign keys, the following data will be returned (assuming one column 'x_foo' in each of a, b and c) (set (a (a_foo "...") (b (b_foo "...") (c (c_foo "..."))))) where 'x_foo' is a column in relation 'x' This is not always desirable. If both b and c have foreign keys into table a, DBStag will not detect this - you have to guide it. There are two ways of doing this - you can guide by bracketing your FROM clause like this: SELECT * FROM (a NATURAL JOIN b) NATURAL JOIN c This will generate (set (a (a_foo "...") (b (b_foo "...")) (c (c_foo "...")))) Now b and c are siblings in the tree. The algorithm is similar to before: nest each relation element under the relation element preceeding it; or, if the preceeding item in the FROM clause is a bracketed structure, nest it under the first relational element in the bracketed structure. (Note that in MySQL you may not place brackets in the FROM clause in this way) Another way to achieve the same thing is to specify the desired tree structure using a DBStag specific SQL extension. The DBStag specific component is removed from the SQL before being presented to the DBMS. The extension is the B clause, which should come at the end of the SQL query (and is subsequently removed before processing by the DBMS). SELECT * FROM a NATURAL JOIN b NATURAL JOIN c USE NESTING (set (a (b)(c))); This will generate the same tree as above (i.e. 'b' and 'c' are siblings). Notice how the nesting in the clause is the same as the nesting in the resulting tree structure. Note that 'set' is not a table in the underlying relational schema - the result data tree requires a named top level node to group all the 'a' relations under. You can call this top level element whatever you like. If you are using the DBStag API directly, you can pass in the nesting structure as an argument to the select call; for instance: my $xmlstr = $dbh->selectall_xml(-sql=>q[SELECT * FROM a NATURAL JOIN b NATURAL JOIN c], -nesting=>'(set (a (b)(c)))'); or the equivalent - my $xmlstr = $dbh->selectall_xml(q[SELECT * FROM a NATURAL JOIN b NATURAL JOIN c], '(set (a (b)(c)))'); If you like, you can also use XML here (only at the API level, not at the SQL level) - my $seq = $dbh->selectall_xml(-sql=>q[SELECT * FROM a NATURAL JOIN b NATURAL JOIN c], -nesting=>q[ ]); As you can see, this is a little more verbose than the S-Expression Most command line scripts that use this module should allow pass-through via the '-nesting' switch. =head3 Aliasing of functions and expressions If you alias a function or an expression, DBStag needs to know where to put the resulting column; the column must be aliased. This is inferred from the first named column in the function or expression; for example, the SQL below uses the minus function: SELECT blah.*, foo.*, foo.x-foo.y AS z The B element will be nested under the B element You can force different nesting using a B: SELECT blah.*, foo.*, foo.x - foo.y AS blah__z This will nest the B element under the B element If you would like to override this behaviour and use the alias as the element name, pass in the -aliaspolicy=>'a' arg to the API call. If you wish to use the table names without nesting, use -aliaspolicy=>'t'. =head2 Conformance to DTD/XML-Schema DBStag returns L structures that are equivalent to a simplified subset of XML (and also a simplified subset of lisp S-Expressions). These structures are examples of B - a good reference is this book - Data on the Web: From Relations to Semistructured Data and XML Serge Abiteboul, Dan Suciu, Peter Buneman Morgan Kaufmann; 1st edition (January 2000) The schema for the resulting Stag structures can be seen to conform to a schema that is dynamically determined at query-time from the underlying relational schema and from the specification of the query itself. If you need to generate a DTD you can ause the B script, which is part of the L distribution =head1 QUERY METHODS The following methods are for using the DBStag API to query a database =head2 connect Usage - $dbh = DBIx::DBStag->connect($DSN); Returns - L Args - see the connect() method in L This will be the first method you call to initiate a DBStag object The DSN may be a standard DBI DSN, or it can be a DBStag alias =head2 selectall_stag Usage - $stag = $dbh->selectall_stag($sql); $stag = $dbh->selectall_stag($sql, $nesting_clause); $stag = $dbh->selectall_stag(-template=>$template, -bind=>{%variable_bindinfs}); Returns - L Args - sql string, [nesting string], [bind hashref], [template DBIx::DBStag::SQLTemplate] Executes a query and returns a L structure An optional nesting expression can be passed in to control how the relation is decomposed into a tree. The nesting expression can be XML or an S-Expression; see above for details =cut =head2 selectall_xml Usage - $xml = $dbh->selectall_xml($sql); Returns - string Args - See selectall_stag() As selectall_stag(), but the results are transformed into an XML string =cut =head2 selectall_sxpr Usage - $sxpr = $dbh->selectall_sxpr($sql); Returns - string Args - See selectall_stag() As selectall_stag(), but the results are transformed into an S-Expression string; see L for more details. =cut =head2 selectall_sax Usage - $dbh->selectall_sax(-sql=>$sql, -handler=>$sax_handler); Returns - string Args - sql string, [nesting string], handler SAX As selectall_stag(), but the results are transformed into SAX events [currently this is just a wrapper to selectall_xml but a genuine event generation model will later be used] =cut =head2 selectall_rows Usage - $tbl = $dbh->selectall_rows($sql); Returns - arrayref of arrayref Args - See selectall_stag() As selectall_stag(), but the results of the SQL query are left undecomposed and unnested. The resulting structure is just a flat table; the first row is the column headings. This is similar to DBI->selectall_arrayref(). The main reason to use this over the direct DBI method is to take advantage of other stag functionality, such as templates =head2 prepare_stag PRIVATE METHOD Usage - $prepare_h = $dbh->prepare_stag(-template=>$template); Returns - hashref (see below) Args - See selectall_stag() Returns a hashref { sth=>$sth, exec_args=>\@exec_args, cols=>\@cols, col_aliases_ordered=>\@col_aliases_ordered, alias=>$aliasstruct, nesting=>$nesting }; =cut =head1 STORAGE METHODS The following methods are for using the DBStag API to store nested data in a database =head2 storenode Usage - $dbh->storenode($stag); Returns - Args - L SEE ALSO: The B script Recursively stores a stag tree structure in the database. The database schema is introspected for most of the mapping data, but you can supply your own (see later) The Stag tree/XML must be a direct mapping of the relational schema. Column and table names must correspond to element names. Elements may be nested. Different styles of XML-Relational mapping may be used: XORT-style and the more compact Stag-style =head3 XORT-style mapping With a XORT-style mapping, elements corresponding to tables can be nested under elements corresponding to foreign keys. For example, if the relational schema has a foreign key from table B to table B
, the following XML is permissable: ..
The B
node will be stored in the database and collapsed to whatever the value of the primary key is. =head3 Stag-style mapping Stag-style is more compact, but sometimes relies on the presence of a B element to specify how foreign keys are mapped =head3 Operations Operations are specified as attributes inside elements, specifying whether the nod should be inserted, updated, looked up or stored/forced. Operations are optional (default is force/store). fred .. .. The above will always insert into the person table (which may be quite dangerous; if an entry with the same unique constraint exists, an error will be thrown). Assuming (streetaddr,city) is a unique constraint for the address table, this will lookup the specified address (and not modify the table) and use the returned pk value for the B foreign key The operations are: =over =item force (default) looks up (by unique constraints) first; if exists, will do an update. if does not exist, will do an insert =item insert insert only. DBMS will throw error if row with same UC exists =item update update only. DBMS will throw error if a row the with the specified UC cannot be found =item lookup finds the pk value using one of the unique constraints present in the XML node =item delete NOT IMPLEMENTED deletes row that has matching UC =back Operations can be used in either XORT or Stag mode =head3 Macros Macros can be used with either XORT or Stag style mappings. Macros allow you to refer to the same node later on in the XML joe fred ... friend joe fred Assuming B is a unique constraint for B, and person_relationship has two foreign keys named person1_id and person2_id linking to the person table, DBStag will first lookup the two person rows by name (throwing an error if not present) and use the returned pk values to populate the person_relationship table =head3 How it works Before a node is stored, certain subnodes will be pre-stored; these are subnodes for which there is a foreign key mapping FROM the parent node TO the child node. This pre-storage is recursive. After these nodes are stored, the current node is either INSERTed or UPDATEd. The database is introspected for UNIQUE constraints; these are used as keys. If there exists a row in the database with matching key, then the node is UPDATEd; otherwise it is INSERTed. (primary keys from pre-stored nodes become foreign key values in the existing node) Subsequently, all subnodes that were not pre-stored are now post-stored. The primary key for the existing node will become foreign keys for the post-stored subnodes. =head2 force_safe_node_names Usage - $dbh->force_safe_node_names(1); Returns - bool Args - bool [optional] If this is set, then before storage, all node names are made B; they are lowercased, and the following transform is applied: tr/a-z0-9_//cd; =head2 mapping Usage - $dbh->mapping(["alias/table.col=fktable.fkcol"]); Returns - Args - array Creates a stag-relational mapping (for storing data only) Occasionally not enough information can be obtained from db introspection; you can provide extra mapping data this way. Occasionally you stag objects/data/XML will contain aliases that do not correspond to actual SQL relations; the aliases are intermediate nodes that provide information on which foreign key column to use For example, with data like this: (person (name "...") (favourite_film (film (....)) (least_favourite_film (film (....))))) There may only be two SQL tables: person and film; person would have two foreign key columns into film. The mapping may look like this ["favourite_film/person.favourite_film_id=film.film_id", "least_favourite_film/person.least_favourite_film_id=film.film_id"] The mapping can also be supplied in the xml that is loaded; any node named "dbstag_metadata" will not be loaded; it is used to supply the mapping. For example: favourite_film/person.favourite_film_id=film.film_id least_favourite_film/person.least_favourite_film_id=film.film_id ... =head2 mapconf Usage - $dbh->mapconf("mydb-stagmap.stm"); Returns - Args - filename sets the conf file containing the stag-relational mappings This is not of any use for a XORT-style mapping, where foreign key columns are explicitly stated See mapping() above The file contains line like: favourite_film/person.favourite_film_id=film.film_id least_favourite_film/person.least_favourite_film_id=film.film_id =head2 noupdate_h Usage - $dbh->noupdate_h({person=>1}) Returns - Args - hashref Keys of hash are names of nodes that do not get updated - if a unique key is queried for and does not exist, the node will be inserted and subnodes will be stored; if the unique key does exist in the db, then this will not be updated; subnodes will not be stored =head2 trust_primary_key_values Usage - $dbh->trust_primary_key_values(1) Returns - bool Args - bool (optional) The default behaviour of the storenode() method is to remap all B PRIMARY KEY values it comes across. A surrogate primary key is typically a primary key of type SERIAL (or AUTO_INCREMENT) in MySQL. They are identifiers assigned automatically be the database with no semantics. It may be desirable to store the same data in two different databases. We would generally not expect the surrogate IDs to match between databases, even if the rest of the data does. (If you do not use surrogate primary key columns in your load xml, then you can ignore this accessor) You should NOT use this method in conjunction with Macros If you use primary key columns in your XML, and the primary keys are not surrogate, then youshould set this. If this accessor is set to non-zero (true) then the primary key values in the XML will be used. If your db has surrogate/auto-increment/serial PKs, and you wish to use these PK columns in your XML, yet you want to make XML that can be exported from one db and imported into another, then the default behaviour will be fine. For example, if we extract a 'person' from a db with surrogate PK B and unique key B, we may get this: 23 fred 1234-567 If we then import this into an entirely fresh db, with no rows in table B, then the default behaviour of storenode() will create a row like this: 1 fred 1234-567 The PK val 23 has been mapped to 1 (all foreign keys that point to person.id=23 will now point to person.id=1) If we were to first call $sdbh->trust_primary_key_values(1), then person.id would remain to be 23. This would only be appropriate behaviour if we were storing back into the same db we retrieved from. =head2 tracenode Usage - $dbh->tracenode('person/name') Traces on STDERR inserts/updates on a particular element type (table), displaying the sub-element (column value). =head2 is_caching_on B Usage - $dbh->is_caching_on('person', 1) Returns - number Args - number 0: off (default) 1: memory-caching ON 2: memory-caching OFF, bulkload ON 3: memory-caching ON, bulkload ON IN-MEMORY CACHING By default no in-memory caching is used. If this is set to 1, then an in-memory cache is used for any particular element. No cache management is used, so you should be sure not to cache elements that will cause memory overloads. Setting this will not affect the final result, it is purely an efficiency measure for use with storenode(). The cache is indexed by all unique keys for that particular element/table, wherever those unique keys are set BULKLOAD If bulkload is used without memory-caching (set to 2), then only INSERTs will be performed for this element. Note that this could potentially cause a unique key violation, if the same element is present twice If bulkload is used with memory-caching (set to 3) then only INSERTs will be performed; the unique serial/autoincrement identifiers for those inserts will be cached and used. This means you can have the same element twice. However, the load must take place in one session, otherwise the contents of memory will be lost =head2 clear_cache Usage - $dbh->clear_cache; Returns - Args - none Clears the in-memory cache Caches are not automatically managed - the API user is responsible for making suring the cache does not get too big =head2 cache_summary Usage - print $dbh->cache_summary->xml Returns - L Args - Gives a summary of the size of the in-memory cache by keys. This can be used for automatic cache management: $person_cache = $dbh->cache_summary->get_person; my @index_nodes = $person_cache->tnodes; foreach (@index_nodes) { if ($_->data > MAX_PERSON_CACHE_SIZE) { $dbh->clear_cache; } } =head1 SQL TEMPLATES DBStag comes with its own SQL templating system. This allows you to reuse the same canned SQL or similar SQL qeuries in different contexts. See L =head2 find_template Usage - $template = $dbh->find_template("my-template-name"); Returns - L Args - str Returns an object representing a canned paramterized SQL query. See L for documentation on templates =head2 list_templates Usage - $templates = $dbh->list_templates(); Returns - Arrayref of L Args - Returns a list of ALL defined templates - See L =head2 find_templates_by_schema Usage - $templates = $dbh->find_templates_by_schema($schema_name); Returns - Arrayref of L Args - str Returns a list of templates for a particular schema - See L =head2 find_templates_by_dbname Usage - $templates = $dbh->find_templates_by_dbname("mydb"); Returns - Arrayref of L Args - db name Returns a list of templates for a particular db Requires resources to be set up (see below) =cut =head1 RESOURCES Generally when connecting to a database, it is necessary to specify a DBI style DSN locator. DBStag also allows you specify a B file which maps logical names to full locators The following methods allows you to use a resource list =head2 resources_list Usage - $rlist = $dbh->resources_list Returns - arrayref to a hashref Args - none Returns a list of resources; each resource is a hash {name=>"mydbname", type=>"rdb", schema=>"myschema", } =head1 SETTING UP RESOURCES The above methods rely on you having a file describing all the relational dbs available to you, and setting the env var DBSTAG_DBIMAP_FILE set (this is a B<:> separated list of paths). B Currently a resources file is a whitespace delimited text file - XML/Sxpr/IText definitions may be available later Here is an example of a resources file: # LOCAL mytestdb rdb Pg:mytestdb schema=test # SYSTEM worldfactbook rdb Pg:worldfactbook@db1.mycompany.com schema=wfb employees rdb Pg:employees@db2.mycompany.com schema=employees The first column is the B or B of the resource/db. This nickname can be used instead of the full DBI locator path (eg you can just use B instead of B The second column is the resource type - rdb is for relational database. You can use the same file to track other system datasources available to you, but DBStag is only interested in relational dbs. The 3rd column is a way of locating the resource - driver:name@host The 4th column is a B<;> separated list of B=B pairs; the most important tag is the B tag. Multiple dbs may share the same schema, and hence share SQL Templates =cut =head1 COMMAND LINE SCRIPTS DBStag is usable without writing any perl, you can use command line scripts and files that utilise tree structures (XML, S-Expressions) =over =item selectall_xml.pl selectall_xml.pl -d [-n ] Queries database and writes decomposed relation as XML Can also be used with templates: selectall_xml.pl -d / ... =item selectall_html.pl selectall_html.pl -d [-n ] Queries database and writes decomposed relation as HTML with nested tables indicating the nested structures. =item stag-storenode.pl stag-storenode.pl -d Stores data from a file (Supported formats: XML, Sxpr, IText - see L) in a normalized database. Gets it right most of the time. TODO - metadata help =item stag-autoddl.pl stag-autoddl.pl [-l ]* Takes data from a file (Supported formats: XML, Sxpr, IText - see L) and generates a relational schema in the form of SQL CREATE TABLE statements. =back =head1 ENVIRONMENT VARIABLES =over =item DBSTAG_TRACE setting this environment will cause all SQL statements to be printed on STDERR, as well as a full trace of how nodes are stored =back =head1 BUGS The SQL parsing can be quite particular - sometimes the SQL can be parsed by the DBMS but not by DBStag. The error messages are not always helpful. There are probably a few cases the SQL SELECT parsing grammar cannot deal with. If you want to select from views, you need to hack DBIx::DBSchema (as of v0.21) =head1 TODO Use SQL::Translator to make SQL DDL generation less Pg-specific; also for deducing foreign keys (right now foreign keys are guessed by the name of the column, eg table_id) Can we cache the grammar so that startup is not so slow? Improve algorithm so that events are fired rather than building up entire structure in-memory Tie in all DBI attributes accessible by hash, i.e.: $dbh->{...} Error handling =head1 WEBSITE L =head1 AUTHOR Chris Mungall > =head1 COPYRIGHT Copyright (c) 2004 Chris Mungall This module is free software. You may distribute this module under the same terms as perl itself =cut 1; DBIx-DBStag-0.12/example_templates/0000755000076500000240000000000011331570203015625 5ustar cainstaffDBIx-DBStag-0.12/example_templates/cia-country.stg0000644000076500000240000000750111326157220020610 0ustar cainstaff:SELECT * :FROM country LEFT OUTER JOIN ethnicgroups USING (country_id) LEFT OUTER JOIN religions USING (country_id) LEFT OUTER JOIN borders USING (country_id) LEFT OUTER JOIN country_coasts USING (country_id) LEFT OUTER JOIN languages USING (country_id) :WHERE [ country.government => &country_government& ] [ country.population => &country_population& ] [ country.total_area => &country_total_area& ] [ country.name => &country_name& ] [ country.inflation => &country_inflation& ] [ country.gdp_total => &country_gdp_total& ] [ country.datacode => &country_datacode& ] [ country.continent => &country_continent& ] [ country.gdp_agri => &country_gdp_agri& ] [ country.ciaid => &country_ciaid& ] [ country.population_growth => &country_population_growth& ] [ country.infant_mortality => &country_infant_mortality& ] [ country.capital => &country_capital& ] [ country.indep_date => &country_indep_date& ] [ country_id IN (SELECT country_id FROM ethnicgroups WHERE ethnicgroups.name => ðnicgroups_name&) ] [ country_id IN (SELECT country_id FROM ethnicgroups WHERE ethnicgroups.num => ðnicgroups_num&) ] [ country_id IN (SELECT country_id FROM religions WHERE religions.name => &religions_name&) ] [ country_id IN (SELECT country_id FROM religions WHERE religions.num => &religions_num&) ] [ country_id IN (SELECT country_id FROM borders WHERE borders.country => &borders_country&) ] [ country_id IN (SELECT country_id FROM borders WHERE borders.num => &borders_num&) ] [ country_id IN (SELECT country_id FROM country_coasts WHERE country_coasts.coasts => &country_coasts_coasts&) ] [ country.gdp_serv => &country_gdp_serv& ] [ country.gdp_ind => &country_gdp_ind& ] [ country_id IN (SELECT country_id FROM languages WHERE languages.name => &languages_name&) ] [ country_id IN (SELECT country_id FROM languages WHERE languages.num => &languages_num&) ] :USE NESTING (set (country (ethnicgroups) (religions) (borders) (country_coasts) (languages))) // ---- METADATA ---- schema: cia desc: Fetches country objects This is an AUTOGENERATED template example_input: country_government => SELECT DISTINCT government FROM country example_input: country_total_area => SELECT DISTINCT total_area FROM country example_input: country_name => SELECT DISTINCT name FROM country example_input: country_inflation => SELECT DISTINCT inflation FROM country example_input: country_gdp_total => SELECT DISTINCT gdp_total FROM country example_input: country_datacode => SELECT DISTINCT datacode FROM country example_input: country_continent => SELECT DISTINCT continent FROM country example_input: country_gdp_agri => SELECT DISTINCT gdp_agri FROM country example_input: country_ciaid => SELECT DISTINCT ciaid FROM country example_input: country_population_growth => SELECT DISTINCT population_growth FROM country example_input: country_infant_mortality => SELECT DISTINCT infant_mortality FROM country example_input: country_capital => SELECT DISTINCT capital FROM country example_input: country_indep_date => SELECT DISTINCT indep_date FROM country example_input: ethnicgroups_name => SELECT DISTINCT name FROM ethnicgroups example_input: ethnicgroups_num => SELECT DISTINCT num FROM ethnicgroups example_input: religions_name => SELECT DISTINCT name FROM religions example_input: religions_num => SELECT DISTINCT num FROM religions example_input: borders_country => SELECT DISTINCT country FROM borders example_input: borders_num => SELECT DISTINCT num FROM borders example_input: country_coasts_coasts => SELECT DISTINCT coasts FROM country_coasts example_input: country_gdp_serv => SELECT DISTINCT gdp_serv FROM country example_input: country_gdp_ind => SELECT DISTINCT gdp_ind FROM country example_input: languages_name => SELECT DISTINCT name FROM languages example_input: languages_num => SELECT DISTINCT num FROM languagesDBIx-DBStag-0.12/example_templates/spy-agent.stg0000644000076500000240000000040211326157220020253 0ustar cainstaff:SELECT * :FROM bureau NATURAL JOIN bureau_to_agent NATURAL JOIN agent NATURAL JOIN mission :WHERE [bureau.name => &bureau_name&] [agent.agent_code => &agent_code&] [mission.codename => &mcode&] :USE NESTING (set(agent(mission)(bureau))) // DBIx-DBStag-0.12/INSTALL0000644000076500000240000000074711326157220013161 0ustar cainstaffWHAT IS THIS? This is DBIx::DBStag, a perl module. Please see the README that comes with this distribution. HOW DO I INSTALL IT? To install this module, cd to the directory that contains this README file and type the following: perl Makefile.PL make make test make install To install this module into a specific directory, do: perl Makefile.PL PREFIX=/name/of/the/directory ...the rest is the same... Please also read the perlmodinstall man page, if available. DBIx-DBStag-0.12/Makefile.PL0000644000076500000240000001722411326157220014100 0ustar cainstaff#$Id: Makefile.PL,v 1.14 2007/08/29 09:33:45 cmungall Exp $ # This Makefile.PL was cut-n-pasted from the DBIx::Abstract # Nakefile.PL, by Andrew Turner use strict; BEGIN { $^W = 1 } # use warnings in Perl 5.6 parlance use ExtUtils::MakeMaker qw( prompt WriteMakefile ); use Config (); use Getopt::Long(); use Data::Dumper (); use lib '.'; use lib 't'; use vars qw($opt); $opt = { "help" => \&Usage, }; Getopt::Long::GetOptions($opt, "help", "testdsn=s", "testdriver=s", # "testdb=s", "testhost=s", # "testport=s", # "testdriver=s", "testuser=s", "testpassword=s", "testrecreate=s", ); my $hostflag = $ENV{TESTHOST} || $opt->{testhost}; my $hostdsn = $hostflag ? ";host=$hostflag" : ''; $hostflag = $hostflag ? "-h $hostflag" : ''; my $TESTDB = $ENV{TESTDB} || "test"; my $TESTDRIVER = "Pg"; #my $TESTDSN = $ENV{TESTDSN} || "dbi:Pg:dbname=test$hostdsn"; my $TESTDSN = $ENV{TESTDSN} || "dbi:Pg:dbname=test$hostdsn"; my $TESTRECREATE = $ENV{TESTRECREATE} || "dropdb $hostflag test; createdb $hostflag test"; if ($opt->{testdriver}) { if ($opt->{testdriver} eq 'mysql') { $TESTRECREATE = $ENV{TESTRECREATE} || "mysqladmin --force $hostflag drop test; mysqladmin --force $hostflag create test"; } if ($opt->{testdriver} ne 'Pg') { $TESTDSN = "$opt->{testdriver}:test$hostdsn"; print <{'help'}; my $keylen = 0; foreach my $key (keys %$opt) { $keylen = length($key) if length($key) > $keylen; } my $slen = 0; foreach my $val (values %$source) { $slen = length($val) if length($val) > $slen; } foreach my $key (sort { $a cmp $b} keys %$opt) { printf(" %-" . $keylen . "s (%-" . $slen . "s) = %s\n", $key, $source->{$key}, $opt->{$key}) } print <<"MSG"; To change these settings, see 'perl Makefile.PL --help'. MSG #sleep 5; eval { require File::Spec }; my $fileName = $@ ? "t/db.config" : File::Spec->catfile("t", "db.config"); #die "Failed to determine location of $fileName" unless -f $fileName; if (open(FILE, ">$fileName")) { print FILE "{ my " . Data::Dumper->Dump([$opt], ["opt"]) . " sub connect_args { return (\n". " \$opt->{'testdsn'},\n" . " \$opt->{'testuser'},\n" . " \$opt->{'testpassword'},\n" . " ) }\n". " sub recreate_cmd { return (\n". " \$opt->{'testrecreate'},\n" . " ) }\n". "} 1;\n"; close(FILE) or die "Failed to create $fileName: $!"; } WriteMakefile( 'NAME' => "DBIx", 'DISTNAME' => "DBIx-DBStag", 'ABSTRACT' => 'DBStag', 'AUTHOR' => 'Chris Mungall ', 'dist' => { 'SUFFIX' => ".gz", 'DIST_DEFAULT' => 'all tardist', 'COMPRESS' => "gzip -9f" }, 'VERSION_FROM' => "DBIx/DBStag.pm", 'PREREQ_PM' => { 'Parse::RecDescent' => 0, 'Text::Balanced' => 0, 'Data::Stag' => '0.07', 'XML::Parser::PerlSAX' => 0, 'DBIx::DBSchema' => '0.34', 'DBI' => 0, }, 'EXE_FILES' => [ 'scripts/selectall_xml.pl', 'scripts/selectall_html.pl', 'scripts/stag-autoddl.pl', 'scripts/stag-autotemplate.pl', 'scripts/stag-template2bin.pl', 'scripts/stag-storenode.pl', 'scripts/stag-ir.pl', 'scripts/stag-qsh', 'cgi-bin/ubiq.cgi', ], clean => { FILES => 'DBIx-DBStag-$(VERSION).tar.gz', }, ); eval { require "DBStagTest.pm"; }; if ($@) { print "You are missing a required module!\n"; print $@; print "Exiting!\n"; exit 1; } eval { DBStagTest::dbh()->disconnect; }; if ($@) { print STDERR <{'PL_FILES'}})) { my $from = $_; my $to = $self->{'PL_FILES'}{$_}; my $cfg = 'config.pl'; push(@output, qq| pm_to_blib: $to $to: $from \$(PERL) -I\$(INST_ARCHLIB) -I\$(INST_LIB) -I\$(PERL_ARCHLIB) \\ -I\$(PERL_LIB) -Ilib -MExtUtils::PerlPP \\ -e ppp "$from" "$to" "$cfg" |); } join('', @output); } } sub Configure { my($opt, $source, $param) = @_; if (exists($opt->{$param})) { $source->{$param} = "Users choice"; return; } if ($param eq "testdriver") { $source->{$param} = "default"; $opt->{$param} = $TESTDRIVER; } elsif ($param eq "testdb") { $source->{$param} = "default"; $opt->{$param} = $TESTDB; } elsif ($param eq "testdriver") { $source->{$param} = "default"; $opt->{$param} = $TESTDRIVER; } elsif ($param eq "testdsn") { $source->{$param} = "default"; $opt->{$param} = $TESTDSN; } elsif ($param eq "testrecreate") { $source->{$param} = "default"; $opt->{$param} = $TESTRECREATE; } elsif ($param eq "testuser" || $param eq "testpassword" || $param eq "testdsn" || $param eq "testhost" || $param eq "testport") { $source->{$param} = "default"; $opt->{$param} = ""; } else { die "Unknown configuration parameter: $param"; } } sub Usage { print STDERR <<"USAGE"; Usage: perl $0 [options] Possible options are: --testhost= Use the DBMS on for testing defaults to empty (localhost) --testdsn= Use the DBI datasource for running the test suite defaults to $TESTDSN --testdriver= Use the DBD driver for running the test suite defaults to $TESTDRIVER (you may also need to change testrecreate) --testuser= Use the username for running the test suite; defaults to no username --testpassword= Use the password for running the test suite; defaults to no password --testrecreate= Unix command string for dropping and creating the db --help Print this message and exit Examples: perl Makefile.PL uses the postgresql database 'test' on localhost perl Makefile.PL --testhost mydbserver uses the postgresql database 'test' on host mydbserver perl Makefile.PL -testdsn 'dbi:Pg:dbname=mytestdb;host=mydbserver' -mytestdbrecreate 'dropdb -h mydbserver mytestdb; createdb -h mydbserver mytestdb' uses the postgresql database 'mytestdb' on host mydbserver Note: the options are only important for running the test suite - if you are willing to risk an install without running the test suite, just do this: perl Makefile.PL make install USAGE exit 1; } __END__ DBIx-DBStag-0.12/MANIFEST0000644000076500000240000000236411326157220013256 0ustar cainstaffChanges INSTALL MANIFEST Makefile.PL README DBIx/DBStag.pm DBIx/DBStag/Constraint.pm DBIx/DBStag/Cookbook.pm DBIx/DBStag/SQLTemplate.pm cgi-bin/ubiq.cgi example_templates/cia-country.stg example_templates/spy-agent.stg scripts/selectall_html.pl scripts/selectall_xml.pl scripts/stag-autoddl.pl scripts/stag-autotemplate.pl scripts/stag-bulkload.pl scripts/stag-check-resources.pl scripts/stag-connect-parameters.pl scripts/stag-ir.pl scripts/stag-pgslurp.pl scripts/stag-qsh scripts/stag-show-template.pl scripts/stag-sl2sql.pl scripts/stag-storenode.pl scripts/stag-template2bin.pl scripts/stag-templates2scripts.pl scripts/ubiq t/DBStagTest.pm t/autoddl.t t/bond.t t/cvterm.t t/feature.t t/load-go.x t/movie.x t/norm2.t t/norm3.t t/normalize.t t/parsesql.t t/sql.x t/store1.t t/store2.t t/store2b.t t/store3.t t/store4.t t/store5.t t/template1.x t/tstore2.x t/xort-style.t t/data/CG10833.with-macros.chado-xml t/data/bond.el t/data/chado-cvterm.sql t/data/chado-feature.sql t/data/chado-fr.sql t/data/chado-pub.sql t/data/game.el t/data/mset.xml t/data/parts-data.xml t/data/parts-schema.sql t/data/relationship.chado-xml t/data/sofa.chado-xml t/data/test.chadoxml t/data/test2.chadoxml META.yml Module meta-data (added by MakeMaker) DBIx-DBStag-0.12/META.yml0000644000076500000240000000125011331570203013363 0ustar cainstaff--- #YAML:1.0 name: DBIx-DBStag version: 0.12 abstract: DBStag author: - Chris Mungall license: unknown distribution_type: module configure_requires: ExtUtils::MakeMaker: 0 build_requires: ExtUtils::MakeMaker: 0 requires: Data::Stag: 0.07 DBI: 0 DBIx::DBSchema: 0.34 Parse::RecDescent: 0 Text::Balanced: 0 XML::Parser::PerlSAX: 0 no_index: directory: - t - inc generated_by: ExtUtils::MakeMaker version 6.54 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 DBIx-DBStag-0.12/README0000644000076500000240000004612311326157220013006 0ustar cainstaffNAME DBIx::DBStag - Relational Database to Hierarchical (Stag/XML) Mapping SYNOPSIS use DBIx::DBStag; my $dbh = DBIx::DBStag->connect("dbi:Pg:dbname=moviedb"); my $sql = q[ SELECT studio.*, movie.*, star.* FROM studio NATURAL JOIN movie NATURAL JOIN movie_to_star NATURAL JOIN star WHERE movie.genre = 'sci-fi' AND star.lastname = 'Fisher' USE NESTING (set(studio(movie(star)))) ]; my $dataset = $dbh->selectall_stag($sql); my @studios = $dataset->get_studio; # returns nested data that looks like this - # # (studio # (name "20th C Fox") # (movie # (name "star wars") (genre "sci-fi") # (star # (firstname "Carrie")(lastname "Fisher"))))) # iterate through result tree - foreach my $studio (@studios) { printf "STUDIO: %s\n", $studio->get_name; my @movies = $studio->get_movie; foreach my $movie (@movies) { printf " MOVIE: %s (genre:%s)\n", $movie->get_name, $movie->get_genre; my @stars = $movie->get_star; foreach my $star (@stars) { printf " STARRING: %s:%s\n", $star->get_firstname, $star->get_lastname; } } } # manipulate data then store it back in the database my @allstars = $dataset->get("movie/studio/star"); $_->set_fullname($_->get_firstname.' '.$_->get_lastname) foreach(@allstars); $dbh->storenode($dataset); Or from the command line: unix> selectall_xml -d 'dbi:Pg:dbname=spybase' 'SELECT * FROM studio NATURAL JOIN movie' DESCRIPTION This module is for mapping from databases to Stag objects (Structured Tags - see the Data::Stag manpage), which can also be represented as XML. It has two main uses: Querying This module can take the results of any SQL query and decompose the flattened results into a tree data structure which reflects the foreign keys in the underlying relational schema. It does this by looking at the SQL query and introspecting the database schema, rather than requiring metadata or an object model. In this respect, the module works just like a regular the DBI manpage handle, with some extra methods provided. Storing Data DBStag objects can store any tree-like datastructure (such as XML documents) into a database using normalized schema that reflects the structure of the tree being stored. This is done using little or no metadata. XML can also be imported, and a relational schema automatically generated. For a tutorial on using DBStag to build and query relational databases from XML sources, please see the DBIx::DBStag::Cookbook manpage HOW QUERYING WORKS This is a general overview of the rules for turning SQL query results into a tree like data structure. Relations Relations (i.e. tables and views) are elements (nodes) in the tree. The elements have the same name as the relation in the database. Columns Table and view columns of a relation are sub-elements of the table or view to which they belong. These elements will be data elements (i.e. terminal nodes). Only the columns selected in the SQL query will be present. For example, the following query SELECT name, job FROM person; will return a data structure that looks like this: (person (name "fred") (job "forklift driver")) (person (name "joe") (job "steamroller mechanic")) The data is shown as a lisp-style S-Expression - it can also be expressed as XML, or manipulated as an object within perl. Table aliases If an ALIAS is used in the FROM part of the SQL query, the relation element will be nested inside an element with the same name as the alias. For instance, the query SELECT name FROM person AS author WHERE job = 'author'; Will return a data structure like this: (author (person (name "Philip K Dick"))) The underlying assumption is that aliasing is used for a purpose in the original query; for instance, to determine the context of the relation where it may be ambiguous. SELECT * FROM person AS employee INNER JOIN person AS boss ON (employee.boss_id = boss.person_id) Will generate a nested result structure similar to this - (employee (person (person_id "...") (name "...") (foo "...") (boss (person (person_id "...") (name "...") (foo "..."))))) If we neglected the alias, we would have 'person' directly nested under 'person', and the meaning would not be obvious. Note how the contents of the SQL query dynamically modifies the schema/structure of the result tree. NOTE ON SQL SYNTAX Right now, DBStag is fussy about how you specify aliases; you must use AS - you must say SELECT name FROM person AS author; instead of SELECT name FROM person author; Nesting of relations The main utility of querying using this module is in retrieving the nested relation elements from the flattened query results. Given a query over relations A, B, C, D,... there are a number of possible tree structures. Not all of the tree structures are meaningful. Usually it will make no sense to nest A under B if there is no foreign key relationship linking either A to B, or B to A. This is not always the case - it may be desirable to nest A under B if there is an intermediate linking table that is required at the relational level but not required in the tree structure. DBStag will guess a structure/schema based on the ordering of the relations in your FROM clause. However, this guess can be over-ridden at either the SQL level (using DBStag specific SQL extensions) or at the API level. The default algorithm is to nest each relation element under the relation element preceeding it in the FROM clause; for instance: SELECT * FROM a NATURAL JOIN b NATURAL JOIN c If there are appropriately named foreign keys, the following data will be returned (assuming one row in each of a, b and c) (set (a (a_foo "...") (b (b_foo "...") (c (c_foo "..."))))) where 'x_foo' is a column in relation 'x' This is not always desirable. If both b and c have foreign keys into table a, DBStag will not detect this - you have to guide it. There are two ways of doing this - you can guide by bracketing your FROM clause like this: !!## !!## NOTE - THIS PART IS NOT SET IN STONE - THIS MAY CHANGE !!## SELECT * FROM (a NATURAL JOIN b) NATURAL JOIN c This will generate (set (a (a_foo "...") (b (b_foo "...")) (c (c_foo "...")))) Now b and c are siblings in the tree. The algorithm is similar to before: nest each relation element under the relation element preceeding it; or, if the preceeding item in the FROM clause is a bracketed structure, nest it under the first relational element in the bracketed structure. (Note that in MySQL you may not place brackets in the FROM clause in this way) Another way to achieve the same thing is to specify the desired tree structure using a DBStag specific SQL extension. The DBStag specific component is removed from the SQL before being presented to the DBMS. The extension is the USE NESTING clause, which should come at the end of the SQL query (and is subsequently removed before processing by the DBMS). SELECT * FROM a NATURAL JOIN b NATURAL JOIN c USE NESTING (set (a (b)(c))); This will generate the same tree as above (i.e. 'b' and 'c' are siblings). Notice how the nesting in the clause is the same as the nesting in the resulting tree structure. Note that 'set' is not a table in the underlying relational schema - the result data tree requires a named top level node to group all the 'a' relations under. You can call this top level element whatever you like. If you are using the DBStag API directly, you can pass in the nesting structure as an argument to the select call; for instance: my $seq = $dbh->selectall_xml(-sql=>q[SELECT * FROM a NATURAL JOIN b NATURAL JOIN c], -nesting=>'(set (a (b)(c)))'); or the equivalent - my $seq = $dbh->selectall_xml(q[SELECT * FROM a NATURAL JOIN b NATURAL JOIN c], '(set (a (b)(c)))'); If you like, you can also use XML here (only at the API level, not at the SQL level) - my $seq = $dbh->selectall_xml(-sql=>q[SELECT * FROM a NATURAL JOIN b NATURAL JOIN c], -nesting=>q[ ]); As you can see, this is a little more verbose. Most command line scripts that use this module should allow pass-through via the '-nesting' switch. Aliasing of functions and expressions If you alias a function or an expression, DBStag needs to know where to put the resulting column; the column must be aliased. This is inferred from the first named column in the function or expression; for example, in the SQL below SELECT blah.*, foo.*, foo.x - foo.y AS z The z element will be nested under the foo element You can force different nesting using a double underscore: SELECT blah.*, foo.*, foo.x - foo.y AS blah__z This will nest the z element under the blah element Conformance to DTD/XML-Schema DBStag returns the Data::Stag manpage structures that are equivalent to a simplified subset of XML (and also a simplified subset of lisp S-Expressions). These structures are examples of semi-structured data - a good reference is this book - Data on the Web: From Relations to Semistructured Data and XML Serge Abiteboul, Dan Suciu, Peter Buneman Morgan Kaufmann; 1st edition (January 2000) The schema for the resulting Stag structures can be seen to conform to a schema that is dynamically determined at query-time from the underlying relational schema and from the specification of the query itself. CLASS METHODS connect Usage - $dbh = DBIx::DBStag->connect($DSN); Returns - L Args - see the connect() method in L selectall_stag Usage - $stag = $dbh->selectall_stag($sql); $stag = $dbh->selectall_stag($sql, $nesting_clause); $stag = $dbh->selectall_stag(-template=>$template, -bind=>{%variable_bindinfs}); Returns - L Args - sql string, [nesting string], [bind hashref], [template DBIx::DBStag::SQLTemplate] Executes a query and returns a the Data::Stag manpage structure An optional nesting expression can be passed in to control how the relation is decomposed into a tree. The nesting expression can be XML or an S-Expression; see above for details selectall_xml Usage - $xml = $dbh->selectall_xml($sql); Returns - string Args - See selectall_stag() As selectall_stag(), but the results are transformed into an XML string selectall_sxpr Usage - $sxpr = $dbh->selectall_sxpr($sql); Returns - string Args - See selectall_stag() As selectall_stag(), but the results are transformed into an S-Expression string; see the Data::Stag manpage for more details. selectall_sax Usage - $dbh->selectall_sax(-sql=>$sql, -handler=>$sax_handler); Returns - string Args - sql string, [nesting string], handler SAX As selectall_stag(), but the results are transformed into SAX events [currently this is just a wrapper to selectall_xml but a genuine event generation model will later be used] selectall_rows Usage - $tbl = $dbh->selectall_rows($sql); Returns - arrayref of arrayref Args - See selectall_stag() As selectall_stag(), but the results of the SQL query are left undecomposed and unnested. The resulting structure is just a flat table; the first row is the column headings. This is similar to DBI->selectall_arrayref(). The main reason to use this over the direct DBI method is to take advantage of other stag functionality, such as templates prepare_stag SEMI-PRIVATE METHOD Usage - $prepare_h = $dbh->prepare_stag(-template=>$template); Returns - hashref (see below) Args - See selectall_stag() Returns a hashref { sth=>$sth, exec_args=>\@exec_args, cols=>\@cols, col_aliases_ordered=>\@col_aliases_ordered, alias=>$aliasstruct, nesting=>$nesting }; storenode Usage - $dbh->storenode($stag); Returns - Args - L Recursively stores a tree structure in the database SQL TEMPLATES DBStag comes with its own SQL templating system. This allows you to reuse the same canned SQL or similar SQL qeuries in different contexts. See the DBIx::DBStag::SQLTemplate manpage find_template Usage - $template = $dbh->find_template("my-template-name"); Returns - L Args - str Returns an object representing a canned paramterized SQL query. See the DBIx::DBStag::SQLTemplate manpage for documentation on templates list_templates Usage - $templates = $dbh->list_templates(); Returns - Arrayref of L Args - Returns a list of ALL defined templates - See the DBIx::DBStag::SQLTemplate manpage find_templates_by_schema Usage - $templates = $dbh->find_templates_by_schema($schema_name); Returns - Arrayref of L Args - str Returns a list of templates for a particular schema - See the DBIx::DBStag::SQLTemplate manpage find_templates_by_dbname Usage - $templates = $dbh->find_templates_by_dbname("mydb"); Returns - Arrayref of L Args - db name Returns a list of templates for a particular db Requires resources to be set up (see below) RESOURCES resources_list Usage - $rlist = $dbh->resources_list Returns - arrayref to a hashref Args - none Returns a list of resources; each resource is a hash {name=>"mydbname", type=>"rdb", schema=>"myschema", } SETTING UP RESOURCES The above methods rely on you having a file describing all the relational dbs available to you, and setting the env var DBSTAG_DBIMAP_FILE set (this is a : separated list of paths). This is alpha code - not fully documented, API may change Currently a resources file is a whitespace delimited text file - XML/Sxpr/IText definitions may be available later Here is an example of a resources file: # LOCAL mytestdb rdb Pg:mytestdb schema=test # SYSTEM worldfactbook rdb Pg:worldfactbook@db1.mycompany.com schema=wfb employees rdb Pg:employees@db2.mycompany.com schema=employees The first column is the nickname or logical name of the resource/db. This nickname can be used instead of the full DBI locator path (eg you can just use employees instead of dbi:Pg:dbname=employees;host=db2.mycompany.com The second column is the resource type - rdb is for relational database. You can use the same file to track other system datasources available to you, but DBStag is only interested in relational dbs. The 3rd column is a way of locating the resource - driver:name@host The 4th column is a ; separated list of tag=value pairs; the most important tag is the schema tag. Multiple dbs may share the same schema, and hence share SQL Templates COMMAND LINE SCRIPTS DBStag is usable without writing any perl, you can use command line scripts and files that utilise tree structures (XML, S-Expressions) selectall_xml.pl selectall_xml.pl -d [-n ] Queries database and writes decomposed relation as XML Can also be used with templates: selectall_xml.pl -d / ... selectall_html.pl selectall_html.pl -d [-n ] Queries database and writes decomposed relation as HTML with nested tables indicating the nested structures. stag-storenode.pl stag-storenode.pl -d Stores data from a file (Supported formats: XML, Sxpr, IText - see the Data::Stag manpage) in a normalized database. Gets it right most of the time. TODO - metadata help stag-autoddl.pl stag-autoddl.pl [-l ]* Takes data from a file (Supported formats: XML, Sxpr, IText - see the Data::Stag manpage) and generates a relational schema in the form of SQL CREATE TABLE statements. ENVIRONMENT VARIABLES DBSTAG_TRACE setting this environment will cause all SQL statements to be printed on STDERR BUGS This is alpha software! Probably several bugs. The SQL parsing can be quite particular - sometimes the SQL can be parsed by the DBMS but not by DBStag. The error messages are not always helpful. There are probably a few cases the SQL SELECT parsing grammar cannot deal with. If you want to select from views, you need to hack DBIx::DBSchema (as of v0.21) TODO Use SQL::Translator to make SQL DDL generation less Pg-specific; also for deducing foreign keys (right now foreign keys are guessed by the name of the column, eg table_id) Can we cache the grammar so that startup is not so slow? Improve algorithm so that events are fired rather than building up entire structure in-memory Tie in all DBI attributes accessible by hash, i.e.: $dbh->{...} Error handling WEBSITE http://stag.sourceforge.net AUTHOR Chris Mungall COPYRIGHT Copyright (c) 2004 Chris Mungall This module is free software. You may distribute this module under the same terms as perl itself DBIx-DBStag-0.12/scripts/0000755000076500000240000000000011331570203013603 5ustar cainstaffDBIx-DBStag-0.12/scripts/selectall_html.pl0000755000076500000240000000604111326157220017144 0ustar cainstaff#!/usr/local/bin/perl -w =head1 NAME selectall_html.pl =head1 SYNOPSIS selectall_html.pl -d "dbi:Pg:dbname=mydb;host=localhost" "SELECT * FROM a NATURAL JOIN b" =head1 DESCRIPTION =head1 ARGUMENTS =cut use strict; use Carp; use DBIx::DBStag; use Data::Stag; use Getopt::Long; my $debug; my $help; my $db; my $nesting; my $show; my $remove = ""; my $highlight = ""; GetOptions( "help|h"=>\$help, "db|d=s"=>\$db, "show"=>\$show, "remove|r=s"=>\$remove, "highlight=s"=>\$highlight, "nesting|p=s"=>\$nesting, ); if ($help) { system("perldoc $0"); exit 0; } my $dbh = DBIx::DBStag->connect($db); my $sql = shift @ARGV; if (!$sql) { print STDERR "Reading SQL from STDIN...\n"; $sql = ; } my $stag = $dbh->selectall_stag($sql, $nesting); my @rmlist = split(/\;\s*/, $remove); foreach (@rmlist) { my @was = $stag->findnode($_, []); } my %hih = map {$_=>1} split(/\;\s*/, $highlight); our $BORDER = 0; our $TDARGS = 'VALIGN="top"'; our $THARGS = 'BGCOLOR=#9999FF'; our $TERMCOLOUR = 'BGCOLOR=#DDDDFF'; print to_html($stag); $dbh->disconnect; sub to_html { my $stag = shift; my $row = 0; return sprintf("
%s
", join('', map { _to_html($_, $row++) } $stag->kids)); } sub _to_html { my $stag = shift; my $thisrow = shift; my @kids = $stag->kids; my $el = $stag->element; # my $type = $types{$el}; my $type = 'tr'; my $out = "<$type>"; my $pre = ''; my @hdrs = (); foreach my $kid (@kids) { next unless $kid->isterminal; my $data = $kid->data; if ($hih{$kid->element}) { $data = "$data"; } $out .= sprintf("%s", $data); push(@hdrs, $kid->element); } my @ntkids = $stag->ntnodes; my %elh = (); my @elts = (); # get unique ordered list foreach (@ntkids) { if (!$elh{$_->element}) { $elh{$_->element} = 1; push(@elts, $_->element); } } foreach my $subel (@elts) { push(@hdrs, $subel); my @stags = $stag->getnode($subel); my $inner; my @ntstags = grep {!$_->isterminal} @stags; if (@ntstags) { my $row = 0; $inner = sprintf("%s
", join('', map { _to_html($_, $row++) } @stags)); } else { $inner = join('
', map { _to_html($_) . '\n' } @stags); } $out .= sprintf("$inner"); } $out .= "\n"; if (!$thisrow) { $pre = '' . join('', map {"$_"} @hdrs) . ''; } return $pre . $out; } DBIx-DBStag-0.12/scripts/selectall_xml.pl0000755000076500000240000002550211326157220017003 0ustar cainstaff#!/usr/local/bin/perl -w # POD docs at end of file use strict; use Carp; use DBIx::DBStag; use Data::Dumper; use Getopt::Long; my $debug; my $help; my $db; my $nesting; my $show; my $file; my $user; my $pass; my $template_name; my $where; my $select; my $rows; my $writer; my $verbose; my @order; my $color; my $out; my $sgml; my $pre_sql; my $aliaspolicy; my $metadata; my @matrixcols; my @matrixcells; # cmd line interpreter gets rid of quotes; need to use backspace my @ARGV2 = (); while (my $arg = shift @ARGV) { while (substr($arg,-1) eq '\\' && @ARGV) { my $next = shift @ARGV; substr($arg,-1,1," $next"); } push(@ARGV2,$arg); } @ARGV = @ARGV2; GetOptions( "help|h"=>\$help, "rows"=>\$rows, "show"=>\$show, "sgml"=>\$sgml, "nesting|n=s"=>\$nesting, "file|f=s"=>\$file, "db|d=s"=>\$db, "user|u=s"=>\$user, "pass|p=s"=>\$pass, "template|t=s"=>\$template_name, "where|wh=s"=>\$where, "matrixcol|mcol=s@"=>\@matrixcols, "matrixcell|mcell=s@"=>\@matrixcells, "writer|w=s"=>\$writer, "select|s=s"=>\$select, "order=s@"=>\@order, "verbose|v"=>\$verbose, "aliaspolicy|alias=s"=>\$aliaspolicy, "colour|color"=>\$color, "out|o=s"=>\$out, "pre=s"=>\$pre_sql, "metadata"=>\$metadata, "trace"=>\$ENV{DBSTAG_TRACE}, ); @ARGV = map { if (/^\/(.*)/) {$template_name=$1;()} else {$_} } @ARGV; if ($help && !$template_name && !$db) { system("perldoc $0"); exit 0; } if ((@matrixcols && !@matrixcells) || (!@matrixcols && @matrixcells)) { print STDERR "-matrixcol and -matrixcell must be set together!\n"; exit 1; } if (@matrixcols) { $rows = 1; } my $H = Data::Stag->getformathandler($writer || $ENV{STAG_WRITER} || 'xml'); $H->use_color(1) if $color; if ($sgml) { $rows = 1; $H = Data::Stag->getformathandler('xml'); } my $sql; if ($file) { open(F, $file) || die $file; $sql = join('', ); close(F); } elsif ($template_name) { # No SQL required if template provided } elsif ($help) { # deal with this later... } else { $sql = shift @ARGV; if ($sql eq '-') { print STDERR "Reading SQL from STDIN...\n"; $sql = ; } # if ($sql =~ /^\/(.*)/) { # # shorthand for a template # $template_name = $1; # $sql = ''; # } } my $template; if ($template_name) { $template = DBIx::DBStag->new->find_template($template_name); } if ($help) { if ($template) { my $varnames = $template->get_varnames; my $desc = $template->desc; # $desc =~ s/\s+/ /; if ($verbose) { require "Term/ANSIColor.pm"; $template->show(\*STDOUT, undef, sub { Term::ANSIColor::color(@_)} ); } else { $desc =~ s/\n */\n /mg; print "DESC:\n $desc\n"; } print "PARAMETERS:\n"; foreach my $vn (@$varnames) { print " $vn\n"; } my $nesting = $template->nesting; if ($nesting) { print "QUERY RESULT STRUCTURE (NESTING):\n"; print $nesting->sxpr; } } else { # show templates my $dbh = DBIx::DBStag->new; my $templates = $dbh->find_templates_by_dbname($db); foreach my $template (@$templates) { if ($verbose) { require "Term/ANSIColor.pm"; $template->show(\*STDOUT, undef, sub { Term::ANSIColor::color(@_)}, ); } else { my $desc = $template->desc || ''; $desc =~ s/\s*$//; printf "NAME: %s\nDESC: %s\n//\n", $template->name, $desc; } } } exit 0; } if (!$db) { die "you must specify a database name (logical name or dbi path) with -d"; } # QUERY DB my $dbh = DBIx::DBStag->connect($db, $user, $pass); $dbh->include_metadata($metadata); if ($pre_sql) { $dbh->do($pre_sql); } my $xml; my @sel_args = (-sql=>$sql, -nesting=>$nesting); if ($template) { if ($where) { $template->set_clause(where => $where); } if ($select) { $template->set_clause(select => $select); } if (@order) { $template->set_clause(order => join(", ",@order)); } my @args = (); my %argh = (); while (my $arg = shift @ARGV) { # print "ARG:$arg;;\n"; if ($arg =~ /(.*)\@=(.*)/) { my ($k,$v) = ($1,$2); $v = [split(/\,/,$v)]; $argh{$k} = $v; } elsif ($arg =~ /(.*)=(.*)/) { my ($k,$v) = ($1,$2); $argh{$k} = $v; } else { push(@args, $arg); } } my $bind = \@args; if (%argh) { $bind = \%argh; if (@args) { die("can't used mixed argument passing"); } } @sel_args = (-template=>$template, -nesting=>$nesting, -bind=>$bind); } if ($aliaspolicy) { push(@sel_args, -aliaspolicy=>$aliaspolicy); } eval { if ($rows) { my $count = 0; my $prep_h = $dbh->prepare_stag(@sel_args); my $cols = $prep_h->{cols}; my $sth = $prep_h->{sth}; my $exec_args = $prep_h->{exec_args}; my $rv = $sth->execute(@$exec_args); if (@matrixcols) { my @COL = (); my @CELL = (); for (my $i=0;$i<@$cols;$i++) { my $col = $cols->[$i]; foreach (@matrixcols) { if ($_ eq $col) { $COL[$i]=1; } } foreach (@matrixcells) { if ($_ eq $col) { $CELL[$i]=1; } } } while (my $r = $sth->fetchrow_arrayref) { my @row = (); for (my $i=0;$i<@$cols;$i++) { if ($COL[$i]) { } elsif ($COL[$i]) { } else { } } } } while (my $r = $sth->fetchrow_arrayref) { # TODO: html if ($sgml) { if (!$count) { $H->start_event('table'); $H->event(title=>"Query Results"); $H->start_event('tgroup'); $H->event('@'=>[ [cols=>scalar(@$r)]]); $H->event(thead=>[ [row=>[ map {[entry=>$_]} @$cols]]]); $H->start_event('tbody'); } $H->event(row=>[map {[entry=>$_]} @$r]); } else { # ASCII printf "%s\n", join("\t", map {esc_col_val($_)} @$r); } $count++; } } # end of ROWS mode else { # HIERARCHICAL my $fh; if ($out) { my $fh = FileHandle->new(">$out") || die "cannot write to $out"; $H->fh($fh); } else { $H->fh(\*STDOUT); } my $stag = $dbh->selectall_stag(@sel_args); $stag->events($H); $fh->close if $fh; } }; if ($@) { print "FAILED\n$@"; } $dbh->disconnect; if ($show) { my ($sql, @exec_args) = $dbh->last_sql_and_args; print "DBI SQL:\n$sql\n\nARGUMENT BINDINGS: @exec_args\n"; } #print $xml; exit 0; sub esc_col_val { my $str = shift; return '\\NULL' unless defined $str; $str =~ s/\t/\\t/g; $str =~ s/\n/\\n/g; $str; } __END__ =head1 NAME selectall_xml.pl =head1 SYNOPSIS selectall_xml.pl [-d ] [-f file of sql] [-nesting|n ] SQL =head1 DESCRIPTION This script will query a database using either SQL provided by the script user, or using an SQL templates; the query results will be turned into XML using the L module. The nesting of the XML can be controlled by the DBStag SQL extension "USE NESTING..." =head2 EXAMPLES selectall_xml.pl -d "dbi:Pg:dbname=mydb;host=localhost"\ "SELECT * FROM a NATURAL JOIN b" =head2 TEMPLATES A parameterized SQL template (canned query) can be used instead of specifying the full SQL For example: selectall_xml.pl -d genedb /genedb-gene gene_symbol=Adh Or: selectall_xml.pl -d genedb /genedb-gene Adh Or: selectall_xml.pl -d genedb /genedb-gene gene_symbol@=Adh,dpp,bam,indy A template is indicated by the syntactic shorthand of using a slash to precede the template name; in this case the template is called B. the -t option can also be used. All the remaining arguments are passed in as SQL template parameters. They can be passed in as either name=value pairs, or as a simple list of arguments which get passed into the template in order To use templates, you should have the environment variable B set. See B for details. =head2 LISTING AVAILABLE TEMPLATES FOR A DB selectall_xml.pl -d mydb -h =head2 LISTING VARIABLES FOR A TEMPLATE selectall_xml.pl /genedb-gene -h =head1 ENVIRONMENT VARIABLES =over =item DBSTAG_DBIMAP_FILE A file containing configuration details for local databases =item DBSTAG_TEMPLATE_DIRS list of directories (seperated by B<:>s) to be searched when templates are requested =back =head1 COMMAND LINE ARGUMENTS =over =item -h|help shows this page if no other arguments are given if a template is specified, gives template details if a db is specified, lists templates for that db use in conjunction with -v for full descriptions =item -d|dbname DBNAME this is either a full DBI locator string (eg B) or it can also be a shortened "nickname", which is then looked up in the file pointed at by the environment variable B =item -u|user USER database user identity =item -p|password PASS database password =item -f|file SQLFILE this is a path to a file containing SQL that will be executed, as an alternative to writing the SQL on the command line =item -n|nesting NESTING-EXPRESSIONS a bracketed expression indicating how to the resulting objects/XML should be nested. See L for details. =item -t|template TEMPLATE-NAME the name of a template; see above =item -wh|where WHERE-CLAUSE used to override the WHERE clause of the query; useful for combining with templates You can append to an existing where clause by using the prefix B<+> =item -s|select SELECT-COLS used to override the SELECT clause of the query; useful for combining with templates =item -rows sometimes it is preferable to return the results as a table rather than xml or a similar nested structure. specifying -rows will fetch a table, one line per row, and columns seperated by tabs =item -pre SQL a piece of SQL is that is executed immediately before the main query; e.g.: -pre "SET search_path=myschema,public" =item -o|out FILE a file to output the results to =item -w|writer WRITER writer class; can be any perl class, or one of these =over =item xml [default] =item sxpr lisp S-Expressions =item itext indented text =back =item -color shows results in color (sxpr and itext only) =item -show will show the parse of the SQL statement =back =cut DBIx-DBStag-0.12/scripts/stag-autoddl.pl0000755000076500000240000000445711326157220016551 0ustar cainstaff#!/usr/local/bin/perl -w =head1 NAME stag-autoddl.pl =head1 SYNOPSIS stag-autoddl.pl -parser XMLAutoddl -handler ITextWriter file1.txt file2.txt stag-autoddl.pl -parser MyMod::MyParser -handler MyMod::MyWriter file.txt =head1 DESCRIPTION script wrapper for the Data::Stag modules =over ARGUMENTS =item -help|h shows this document =item -p|parser FORMAT FORMAT is one of xml, sxpr or itext, or the name of a perl module xml assumed as default =item -w|writer FORMAT FORMAT is one of xml, sxpr or itext, or the name of a perl module This is only used if transforms are required on the data to turn it into relational form; the transformed xml (or other stag format) is stored in the file specified by -t =item -link|l NODE_NAME this node name will be used as a linking table multiple linking nodes can be set: -l foo2bar -l x2y -l bim2bum =item -t TRANSFORMED_FILE_NAME the transformed input file is written here =back =cut use strict; use Carp; use Data::Stag qw(:all); use DBIx::DBStag; use FileHandle; use Getopt::Long; my $parser = ""; my $handler = ""; my $mapf; my $tosql; my $toxml; my $toperl; my $debug; my $help; my @link = (); my $ofn; GetOptions( "help|h"=>\$help, "parser|format|p=s" => \$parser, "handler|writer|w=s" => \$handler, "xml"=>\$toxml, "perl"=>\$toperl, "debug"=>\$debug, "link|l=s@"=>\@link, "transform|t=s"=>\$ofn, ); if ($help) { system("perldoc $0"); exit 0; } my $db = DBIx::DBStag->new; my $fn = shift @ARGV; die "max 1 file" if @ARGV; autoddl($fn); sub autoddl { my $fn = shift; my $tree = Data::Stag->parse($fn, $parser); my $ddl = $db->autoddl($tree, \@link); my $transforms = $db->source_transforms; if (@$transforms) { foreach (@$transforms) { print STDERR "-- SOURCE REQUIRES TRANSFORM: $_->[0] => $_->[1]\n"; } if (!$ofn) { print STDERR "-- $fn requires transforms; consider running with -transform\n"; } else { $tree->transform(@$transforms); my $W = $tree->getformathandler($handler || 'xml'); my $ofh = FileHandle->new(">$ofn") || die("cannot write transformed file $ofn"); $W->fh($ofh); # $W->fh(\*STDOUT); $tree->events($W); $ofh->close; } } print $ddl; } DBIx-DBStag-0.12/scripts/stag-autotemplate.pl0000755000076500000240000000375111326157220017615 0ustar cainstaff#!/usr/local/bin/perl -w =head1 NAME stag-autotemplate.pl =head1 SYNOPSIS stag-autotemplate.pl \ -s my-schema-name -dir ./templates my-sample-data.xml stag-autotemplate.pl \ -s my-schema-name -dir ./templates -no_pp my-stagschema.sxpr =head1 DESCRIPTION Generates Stag SQL Templates files based on sample data or a stag-schema See the script stag-autoschema.pl for generating stag schemas from sample data =head1 ARGUMENTS =over =item -no_pp do not pre-process (the input is a stag schema, not a sample data file) =item -dir directory in which to write autogenerated templates =back =cut use strict; use Carp; use Data::Stag qw(:all); use DBIx::DBStag; use FileHandle; use Getopt::Long; my $parser = ""; my $handler = ""; my $mapf; my $tosql; my $toxml; my $toperl; my $debug; my $help; my @link = (); my $ofn; my $no_pp; my $dir = '.'; my $schema_name; GetOptions( "help|h"=>\$help, "parser|format|p=s" => \$parser, "handler|writer|w=s" => \$handler, "xml"=>\$toxml, "perl"=>\$toperl, "debug"=>\$debug, "link|l=s@"=>\@link, "transform|t=s"=>\$ofn, "schema|s=s"=>\$schema_name, "no_pp|n"=>\$no_pp, "dir|d=s"=>\$dir, ); if ($help) { system("perldoc $0"); exit 0; } my $db = DBIx::DBStag->new; if (!$schema_name) { print STDERR "You should consider using the -schema|s option to set schema name\n"; } my $fn = shift @ARGV; die "max 1 file" if @ARGV; autotemplate($fn); sub autotemplate { my $fn = shift; my $tree = Data::Stag->parse($fn, $parser); my $schema = $tree; if (!$no_pp) { $schema = $tree->autoschema; } my @tts = $db->autotemplate($schema); foreach my $tt (@tts) { my $base = $schema_name || 'AUTO'; my $fn = "$dir/$base-$tt->[0].stg"; open(F, ">$fn") || die("cannot open $fn"); $tt->[1] =~ s/\nschema:/\nschema: $schema_name/ if $schema_name; print F "$tt->[1]"; close(F); } } DBIx-DBStag-0.12/scripts/stag-bulkload.pl0000755000076500000240000000344211326157220016703 0ustar cainstaff#!/usr/local/bin/perl -w # POD docs at end use strict; use Carp; use Data::Stag qw(:all); use DBIx::DBStag::SimpleBulkload; use Getopt::Long; my $parser = ""; my $handler = ""; my $debug; my $help; my $loadrecord; GetOptions( "help|h"=>\$help, "parser|format|p=s" => \$parser, "handler|writer|w=s" => \$handler, "loadrecord|l=s" => \$loadrecord, "debug"=>\$debug, ); if ($help) { system("perldoc $0"); exit 0; } my @files = @ARGV; foreach my $fn (@files) { $handler = DBIx::DBStag::SimpleBulkload->new; $handler->load_on_event($loadrecord) if $loadrecord; my @pargs = (-file=>$fn, -format=>$parser, -handler=>$handler); if ($fn eq '-') { if (!$parser) { $parser = 'xml'; } @pargs = (-format=>$parser, -handler=>$handler, -fh=>\*STDIN); } my $tree = Data::Stag->parse(@pargs); } exit 0; __END__ =head1 NAME stag-bulkload.pl - creates bulkload SQL for input data =head1 SYNOPSIS # convert XML to IText stag-bulkload.pl -l person file1.xml file2.xml # use a custom parser/generator and a custom writer/generator stag-bulkload.pl -p MyMod::MyParser file.txt =head1 DESCRIPTION Creates bulkload SQL statements for an input file Works only with certain kinds of schemas, where the FK relations make a tree (not a graph); i.e. the only FKs are to the parent You do not need a connection to the DB It is of no use for incremental loading - it assumes integer surrogate promary keys and starts these from 1 =head1 ARGUMENTS =over =item -p|parser FORMAT FORMAT is one of xml, sxpr or itext, or the name of a perl module xml assumed as default =item -l|loadrecord NODE adds a COMMIT statement after the INSERTs for this node =back =head1 SEE ALSO L L =cut DBIx-DBStag-0.12/scripts/stag-check-resources.pl0000755000076500000240000000141411326157220020170 0ustar cainstaff#!/usr/local/bin/perl # cjm@fruitfly.org use strict; use Carp; use DBIx::DBStag; use Data::Stag qw(:all); use Data::Dumper; use Getopt::Long; my $h = {}; my $term; my @hist = (); my $match = shift; # parent dbh my $sdbh = DBIx::DBStag->new; my $resources = $sdbh->resources_list; foreach my $r (@$resources) { next unless $r->{type} eq 'rdb'; my $name = $r->{name}; eval { my $testdbh = DBIx::DBStag->connect($name); $testdbh->disconnect; }; my $ok = $@ ? 'FAIL' : 'PASS'; printf "%12s $ok\n", $name; } exit 0; __END__ =head1 NAME stag-check-resources.pl =head1 SYNOPSIS stag-check-resources.pl =head1 DESCRIPTION Iterates all resources pointed at in DBSTAG_DBIMAP_FILE and determines if they are accessible or not =cut DBIx-DBStag-0.12/scripts/stag-connect-parameters.pl0000755000076500000240000000224611326157220020701 0ustar cainstaff#!/usr/local/bin/perl # cjm@fruitfly.org # currently assumes Pg use strict; use Carp; use DBIx::DBStag; use Data::Stag qw(:all); use Data::Dumper; use Getopt::Long; my $h = {}; my $dbname = ''; my $term; my @hist = (); GetOptions( "dbname|d=s"=>\$dbname, ); my $db = shift || $dbname; # parent dbh my $sdbh = DBIx::DBStag->new; my $resource = $sdbh->resources_hash->{$db}; my $pstr = ''; if ($resource) { my $loc = $resource->{loc}; if ($loc =~ /(\w+):(\S+)\@(\S+)/) { $pstr = "-h $3 $2"; } if (!$pstr) { print STDERR "Could not resolve: $db [from $loc]\n"; exit 1; } } else { print STDERR "No such resource: $db\n"; exit 1; } print $pstr; exit 0; __END__ =head1 NAME stag-connect-parameters.pl =head1 SYNOPSIS alias db='stag-connect-parameters.pl -d' psql `db mydb` =head1 DESCRIPTION Looks up the connection parameters for a logical dbname in the metadata file pointed at by DBSTAG_DBIMAP_FILE See L for more on this mapping =head2 ARGUMENTS =head3 -d B This is either a DBI locator or the logical name of a database in the DBSTAG_DBIMAP_FILE config file =cut DBIx-DBStag-0.12/scripts/stag-ir.pl0000755000076500000240000002277711326157220015534 0ustar cainstaff#!/usr/local/bin/perl -w # POD docs at bottom of file use strict; use Data::Stag qw(:all); use DBIx::DBStag; use Getopt::Long; my $record_type; my $unique_key; my $dir; my $fmt = ''; my $out; my $help; my $top; my $dbname; my $qf; my @query = (); my $keys; my $reset; my $verbose; my $create; my $clear; my $newonly; my $insertonly; my $transaction_size; my $all; my $ID_DOMAIN = 'VARCHAR(255)'; GetOptions("record_type|r=s"=>\$record_type, "unique_key|unique|u|k=s"=>\$unique_key, "parser|format|p=s"=>\$fmt, "out|o=s"=>\$out, "dbname|d=s"=>\$dbname, "top|t=s"=>\$top, "query|q=s@"=>\@query, "qf=s"=>\$qf, "all|a"=>\$all, "create"=>\$create, "clear"=>\$clear, "newonly"=>\$newonly, "insertonly"=>\$insertonly, "transaction_size=s"=>\$transaction_size, "help|h"=>\$help, "keys"=>\$keys, "reset"=>\$reset, "idtype=s"=>\$ID_DOMAIN, "verbose|v"=>\$verbose, ); if ($help) { system("perldoc $0"); exit 0; } my $dbh = DBIx::DBStag->connect($dbname); if ($create) { $dbh->do("CREATE TABLE $record_type (id $ID_DOMAIN NOT NULL PRIMARY KEY, xml TEXT NOT NULL)"); # No index required, because it is a primary key } if ($clear) { $dbh->do("DELETE FROM $record_type"); } my $is_store_mode = scalar(@ARGV); $dbh->dbh->{AutoCommit} = 0 if $transaction_size; if ($is_store_mode) { my $sth = $dbh->prepare("INSERT INTO $record_type (id,xml) VALUES (?, ?)"); my $sthcheck = $dbh->prepare("SELECT id FROM $record_type WHERE id = ?"); my $n=0; my $store_handler; if ($insertonly) { my $sth = $dbh->prepare("INSERT INTO $record_type (id,xml) VALUES (?, ?)"); $store_handler = Data::Stag->makehandler($record_type=>sub { my ($self, $stag) = @_; my $id = $stag->get($unique_key); $sth->execute($id, $stag->xml); $n++; $dbh->commit if $transaction_size && $n % $transaction_size == 0; return; }); } elsif ($newonly) { # don't touch existing $store_handler = Data::Stag->makehandler($record_type=>sub { my ($self, $stag) = @_; my $id = $stag->get($unique_key); my $ids = $dbh->selectcol_arrayref($sthcheck,undef,$id); if (!@$ids) { $sth->execute($id, $stag->xml); $n++; } $dbh->commit if $transaction_size && $n % $transaction_size == 0; return; }); } else { # default clobber mode my $sthupdate = $dbh->prepare("UPDATE $record_type SET xml = ? WHERE id = ?"); $store_handler = Data::Stag->makehandler($record_type=>sub { my ($self, $stag) = @_; my $id = $stag->get($unique_key); my $ids = $dbh->selectcol_arrayref($sthcheck,undef,$id); if (@$ids) { $sthupdate->execute($stag->xml, $id); } else { $sth->execute($id, $stag->xml); } $n++; $dbh->commit if $transaction_size && $n % $transaction_size == 0; return; }); } foreach my $file (@ARGV) { my $p; if ($file eq '-') { $fmt ||= 'xml'; $p = Data::Stag->parser(-format=>$fmt, -fh=>\*STDIN); $p->handler($store_handler); $p->parse(-fh=>\*STDIN); } else { if (!-f $file) { print "the file \"$file\" does not exist\n"; } $p = Data::Stag->parser($file, $fmt); $p->handler($store_handler); $p->parse($file); } } $dbh->commit if $transaction_size; } else { # query mode if ($keys) { my $cols = $dbh->selectcol_arrayref("SELECT id FROM $record_type"); printf "$_\n", $_ foreach (@$cols); exit 0; } if ($qf) { open(F, $qf) || die "cannot open queryfile: $qf"; @query = map {chomp;$_} ; close(F); } my $fh; if ($out) { $fh = FileHandle->new(">$out") || die("cannot write to $out"); } else { $fh = \*STDOUT; } if ($top) { print $fh "\n"; } if ($all) { my $sth = $dbh->prepare("SELECT xml FROM $record_type") || die; $sth->execute; while (my $row = $sth->fetchrow_arrayref) { print $fh "@$row"; } } if (@query) { my $n_found = 0; my $sth = $dbh->prepare("SELECT xml FROM $record_type WHERE id = ?"); foreach my $q (@query) { my $xmls = $dbh->selectcol_arrayref($sth, undef, $q); if (!@$xmls) { print STDERR "Could not find a record indexed by key: \"$q\"\n"; next; } if (@$xmls > 1) { die "assertion error $q"; } my $xml = shift @$xmls; print $fh $xml; $n_found++; } if (!$n_found && !$top) { print STDERR "NONE FOUND!\n"; } } if ($top) { print "\n"; } $fh->close if $out; } $dbh->disconnect; exit 0; __END__ =head1 NAME stag-ir.pl - information retrieval using a simple relational index =head1 SYNOPSIS stag-ir.pl -r person -k social_security_no -d Pg:mydb myrecords.xml stag-ir.pl -d Pg:mydb -q 999-9999-9999 -q 888-8888-8888 =head1 DESCRIPTION Indexes stag nodes (XML Elements) in a simple relational db structure - keyed by ID with an XML Blob as a value Imagine you have a very large file of data, in a stag compatible format such as XML. You want to index all the elements of type B; each person can be uniquely identified by B, which is a direct subnode of B The first thing to do is to build the index file, which will be stored in the database mydb stag-ir.pl -r person -k social_security_no -d Pg:mydb myrecords.xml You can then use the index "person-idx" to retrieve B nodes by their social security number stag-ir.pl -d Pg:mydb -q 999-9999-9999 > some-person.xml You can export using different stag formats stag-ir.pl -d Pg:mydb -q 999-9999-9999 -w sxpr > some-person.xml You can retrieve multiple nodes (although these need to be rooted to make a valid file) stag-ir.pl -d Pg:mydb -q 999-9999-9999 -q 888-8888-8888 -top personset Or you can use a list of IDs from a file (newline delimited) stag-ir.pl -d Pg:mydb -qf my_ss_nmbrs.txt -top personset =head2 ARGUMENTS =head3 -d DB_NAME This database will be used for storing the stag nodes The name can be a logical name or DBI locator or DBStag shorthand - see L The database must already exist =head3 -clear Deletes all data from the relation type (specified with B<-r>) before loading =head3 -insertonly Does not check if the ID in the file exists in the db - will always attempt an INSERT (and will fail if ID already exists) This is the fastest way to load data (only one SQL operation per node rather than two) but is only safe if there is no existing data (Default is clobber mode - existing data with same ID will be replaced) =head3 -newonly If there is already data in the specified relation in the db, and the XML being loaded specifies an ID that is already in the db, then this node will be ignored (Default is clobber mode - existing data with same ID will be replaced) =head3 -transaction_size A commit will be performed every n UPDATEs/COMMITs (and at the end) Default is autocommit note that if you are using -insertonly, and you are using transactions, and the input file contains an ID already in the database, then the transaction will fail because this script will try and insert a duplicate ID =head3 -r RELATION-NAME This is the name of the stag node (XML element) that will be stored in the index; for example, with the XML below you may want to use the node name B and the unique key B ... ... ... This flag should only be used when you want to store data =head3 -k UNIQUE-KEY This node will be used as the unique/primary key for the data This node should be nested directly below the node that is being stored in the index - if it is more that one below, specify a path This flag should only be used when you want to store data =head3 -u UNIQUE-KEY Synonym for B<-k> =head3 -create If specified, this will create a table for the relation name specified below; you should use this the first time you index a relation =head3 -idtype TYPE (optional) This is the SQL datatype for the unique key; it defaults to VARCHAR(255) If you know that your id is an integer, you can specify INTEGER here If your id is always a 8-character field you can do this -idtype 'CHAR(8)' This option only makes sense when combined with the B<-c> option =head3 -p PARSER This can be the name of a stag supported format (xml, sxpr, itext) - XML is assumed by default It can also be a module name - this module is used to parse the input file into a stag stream; see L for details on writing your own parsers/event generators This flag should only be used when you want to store data =head3 -q QUERY-ID Fetches the relation/node with unique key value equal to query-id Multiple arguments can be passed by specifying -q multple times This flag should only be used when you want to query data =head3 -top NODE-NAME If this is specified in conjunction with B<-q> or B<-qf> then all the query result nodes will be nested inside a node with this name (ie this provides a root for the resulting document tree) =head3 -qf QUERY-FILE This is a file of newline-seperated IDs; this is useful for querying the index in batch =head3 -keys This will write a list of all primary keys in the index =head1 SEE ALSO L For more complex stag to database mapping, see L and the scripts L use file DBM indexes L is for storing fully normalised stag trees L =cut DBIx-DBStag-0.12/scripts/stag-pgslurp.pl0000755000076500000240000001125211326157220016600 0ustar cainstaff#!/usr/local/bin/perl -w =head1 NAME stag-pgslurp.pl =head1 SYNOPSIS stag-pgslurp.pl -d "dbi:Pg:dbname=mydb;host=localhost" myfile.xml =head1 DESCRIPTION This script is for storing data (specified in a nested file format such as XML or S-Expressions) in a database. It assumes a database schema corresponding to the tags in the input data already exists. =head1 MAKING DATABASE FROM XML FILES It is possible to automatically generate a database schema and populate it directly from XML files (or from Stag objects or other Stag compatible files). Of course, this is no substitute for proper relational design, but often it can be necessary to quickly generate databases from heterogeneous XML data sources, for the purposes of data mining. There are 3 steps involved: 1. Prepare the input XML (for instance, modifying db reserved words). 2. Autogenerate the CREATE TABLE statements, and make a db from these. 3. Store the XML data in the database. =head2 Step 1: Prepare input file You may need to make modifications to your XML before it can be used to make a schema. If your XML elements contain any words that are reserved by your DB you should change these. Any XML processing tool (eg XSLT) can be used. Alternatively you can use the script 'stag-mogrify' e.g. to get rid of '-' characters (this is how Stag treates attributes) and to change the element with postgresql reserved word 'date', do this: stag-mogrify.pl -xml -r 's/^date$/moddate/' -r 's/\-//g' data.xml > data.mog.xml You may also need to explicitly make elements where you will need linking tables. For instance, if the relationship between 'movie' and 'star' is many-to-many, and your input data looks like this: (movie (name "star wars") (star (name "mark hamill"))) You will need to *interpose* an element between these two, like this: (movie (name "star wars") (movie2star (star (name "mark hamill")))) you can do this with the -i switch: stag-mogrify.pl -xml -i movie,star,movie2star data.xml > data.mog.xml or if you simply do: stag-mogrify.pl -xml -i star data.xml > data.mog.xml the mogrifier will simply interpose an element above every time it sees 'star'; the naming rule is to use the two elements with an underscore between (in this case, 'movie_star'). =head2 Step 2: Generating CREATE TABLE statements Use the stag-autoddl.pl script; stag-autoddl.pl data.mog.xml > table.sql The default rule is to create foreign keys from the nested element to the outer element; you will want linking tables tobe treated differently (a linking table will point to parent and child elements). stag-autoddl.pl -l movie2star -l star2character data.mog.xml > table.sql Once you have done this, load the statements into your db; eg for postgresql (for other databases, use L) psql -a mydb < table.sql If something goes wrong, go back to step 1 and sort it out! Note that certain rules are followed: ever table generated gets a surrogate primary key of type 'serial'; this is used to generate foreign key relationships. The rule used is primary and foreign key names are the name of the table with the '_id' suffix. Feel free to modify the autogenerated schema at this stage (eg add uniqueness constraints) =head2 Step 3: Store the data in the db stag-pgslurp.pl -u movie -d 'dbi:Pg:mydb' data.mog.xml You generally dont need extra metadata here; everything can be infered by introspecting the database. The -u|unit switch controls when transactions are committed If this works, you should now be able to retreive XML from the database, eg selectall_xml.pl -d 'dbi:Pg:mydb' 'SELECT * FROM x NATURAL JOIN y' =cut use strict; use Carp; use Data::Stag; use DBIx::DBStag; use Getopt::Long; my $debug; my $help; my $db; my $unit; GetOptions( "help|h"=>\$help, "db|d=s"=>\$db, "unit|u=s"=>\$unit, ); if ($help) { system("perldoc $0"); exit 0; } #print STDERR "Connecting to $db\n"; my $fn = shift @ARGV; slurp($fn); sub slurp { my $fn = shift; my $tree = Data::Stag->parse($fn, $parser); fcmd("$DROPDB $db"); cmd("$CREATEDB $db"); my $dbh = DBIx::DBStag->connect($db); $dbh->dbh->{AutoCommit} = 0; $dbh->do($db->autoddl($tree, \@link)); if ($unit) { my $H = Data::Stag->makehandler($unit=>sub { my $self = shift; my $stag = shift; $dbh->begin_work; $dbh->storenode($stag); $dbh->commit; }); Data::Stag->parse(-file=>$fn, -handler=>$H); } else { my $stag = Data::Stag->parse($fn); $dbh->storenode($stag); $dbh->commit; } } sub cmd { my $err = system("@_"); die "error in: @_" if $err; } sub fcmd { my $err = system("@_"); } DBIx-DBStag-0.12/scripts/stag-qsh0000755000076500000240000004005211326157220015265 0ustar cainstaff#!/usr/local/bin/perl # stag-q # cjm@fruitfly.org use strict; no strict qw(vars); use Carp; use DBIx::DBStag; use Data::Stag qw(:all); use Data::Dumper; use Getopt::Long; use FileHandle; use Term::ANSIColor; my $h = {}; $| = 1; my $dbname = ''; my $connect; my $term; my @hist = (); my $XTERMMODE = 0; my $MLINEMODE = 0; my $TBLMODE = 0; my $PAGER = "less -fR"; my $cscheme = { 'keyword'=>'cyan', 'variable'=>'magenta', 'text' => 'reset', 'comment' => 'red', 'block' => 'blue', 'property' => 'green', }; my $dbimap; my $schema; GetOptions( "dbname|d=s"=>\$dbname, "connect|c"=>\$connect, "dbimap=s"=>\$dbimap, "schema|s=s"=>\$schema, ); if ($dbimap) { $ENV{DBSTAG_DBIMAP_FILE} = $dbimap; } # parent dbh my $sdbh = DBIx::DBStag->new; # child dbh my $dbh; my $stag; my $res; my $loc; my $templates = []; my $varnames = []; my $options = {}; my $nesting; my $rows = []; my $template; my $template_name = ''; my %exec_argh = (); my $resources = $sdbh->resources_list; my $resources_hash = $sdbh->resources_hash; my @dbresl = grep {$_->{type} eq 'rdb'} @$resources; my @dbnames = (map {$_->{name}} @dbresl); my $W = Data::Stag->getformathandler('sxpr'); my $ofh = \*STDOUT; $W->fh($ofh); $W->use_color(1); if ($connect) { db($dbname); } else { setdb($dbname) if $dbname; } shell(); exit 0; sub shell { my $prompt = $ENV{STAG_SHELL} || "Stag[\$dbname]\$template_name> "; my $quit = 0; my @lines = (); my $r; my $rv; my $outfh; my $return; my $echo; my $line; # welcome; require Term::ReadLine; require Shell; welcome(); # checkoptions; $term = shift || Term::ReadLine->new($prompt); my $rcfile = "$ENV{HOME}/.stagshellrc"; if (-f $rcfile) { open(F, $rcfile); @lines = ; close(F); } my $end_signal = ";"; while (!$quit) { my $line; if ($MLINEMODE) { $line = ''; my $thisline = $term->readline(prompt($prompt)); if ($thisline =~ /^\\/ || $thisline =~ /^\w$/ || $thisline =~ /^\w\s+/ || $thisline =~ /^\//) { $line = $thisline; } else { while (1) { if($thisline !~ /(.*)$end_signal/) { $line.= "$thisline\n"; } else { $line.= "$1\n"; last; } $thisline = $term->readline('- '); } } } else { $line = @lines ? shift @lines : $term->readline(prompt($prompt)); } if ($line =~ /^\^/) { $line =~ s/^\^//; print "$prompt$line"; my $e = ; if ($e =~ /^q/) { $line = ""; @lines = (); } } if ($options->{echo} && $line !~ /[\+]wait/) { if ($line =~ /^\#/) { print "$line\n"; } else { print "$prompt$line\n"; } if ($options->{sleep}) { sleep $options->{sleep}; } if ($options->{wait}) { sleep $options->{wait}; } } my ($cmd, @w) = split(' ',$line); my $rest = join(" ", @w); $_ = $cmd; addhist($line); $line =~ s/\#[^\n]*\n*/\n/gs; $line =~ s/^\s*select/:SELECT/i; # : - sql tunnel and escape everything after in quotes if ($line =~ /^:/) { if ($line =~ /\!\s*$/) { $line =~ s/\!\s*$//; # interpolate $line = eval("\"$line\""); } $line =~ s/^:/ sqlselect q\[/; $line .= ']'; } if ($line =~ /^\/\//) { # $line =~ s/^\/\/(.*)/\@r = apph\-\>$1; print tree2xml\(\@r\)/; } if ($line =~ /^\//) { # $line =~ s/^\//\@r = apph\-\>/; } if ($line =~ /^\\dd\s*(.*)/) { my $arg = $1 ? "('$1')" : ''; $line = "dshowdbs $arg"; } if ($line =~ /^\\d\s*(.*)/) { my $arg = $1 ? "('$1')" : ''; $line = "showdbs $arg;"; } if ($line =~ /^\\tt\s*(.*)/) { my $arg = $1 ? "('$1')" : ''; $line = "dshowtemplates $arg;"; } if ($line =~ /^\\t\s*(.*)/) { my $arg = $1 ? "('$1')" : ''; $line = "showtemplates $arg;"; } if ($line =~ /^\\v\s*(.*)/) { my $arg = $1 ? "('$1')" : ''; $line = "showvars $arg;"; } if ($line =~ /^d\s+(.*)/) { $line = "db '$1';"; } if ($line =~ /^w\s+(.*)/) { $line = "writer '$1';"; } if ($line =~ /^t\s+(.*)/) { $line = "template '$1';"; } if ($line =~ /^\/$/) { ex(); next; } if ($line =~ /^\/\s*(.*)$/) { ex(split(' ', $1)); next; } if ($line =~ /^\\l/) { $MLINEMODE = !$MLINEMODE; printf "MLINEMODE = %s\n", $MLINEMODE ? 'MULTI' : 'SINGLE'; $line = ''; } if ($line =~ /^\\x/) { $XTERMMODE = !$XTERMMODE; printf "XTERMMODE = %s\n", $XTERMMODE ? 'ON' : 'OFF'; $line = ''; } if ($line =~ /^\\r/) { $TBLMODE = !$TBLMODE; printf "TBLMODE = %s\n", $TBLMODE ? 'ON' : 'OFF'; $line = ''; } if ($line =~ /^\\c/) { showresourcesfile(); next; } if ($line =~ /^\\q/) { $quit = 1; last; } if ($line =~ /^(\w+)\s*=\s*(.*)/) { $exec_argh{"$1"} = $2; msg("SETTING $1 to $2"); next; } # ! - shell and escape everything after in quotes if ($line =~ /^\!/) { $line =~ s/^\!/sh q\[/; $line .= ']'; } # ? - show if ($line =~ /^\?/) { $line =~ s/^\?/help /; } # ? - show if ($line =~ /^\#/) { next; } # + is the set command if ($line =~ /^\+/) { $line =~ s/\+//; $line = "set ".join(",", map {"q[$_]"} split(' ', $line)); } # --- EXECUTE --- print "Echo:$line\n" if $echo; $rv = eval $line; if ($@) { print STDERR $@; } print "\n"; print "$rv\n" if $return; if ($options->{sleep}) { sleep $options->{sleep}; } if ($options->{wait}) { sleep $options->{wait}; $options->{wait} = 0; } } } sub trace { $ENV{DBSTAG_TRACE} = !$ENV{DBSTAG_TRACE}; } sub setdb { $dbname = shift; msg("Set dbname to $dbname"); $res = $resources_hash->{$dbname}; if ($res) { $schema = $res->{schema}; $loc = $res->{loc} || ''; msg("loc: $loc") if $loc; } else { warnmsg("Unknown $dbname"); } if ($schema) { $templates = $sdbh->find_templates_by_schema($schema); msg("schema: $schema"); } else { msg("schema not known; templates unrestricted"); $templates = $sdbh->template_list; } msg("Templates available: " . scalar(@$templates)); $res; } sub db { $dbname = shift; if ($dbh) { $dbh->disconnect; } eval { $dbh = DBIx::DBStag->connect($dbname); msg("Connected to $dbname"); setdb($dbname); }; if ($@) { print STDERR "Could not connect to database '$dbname'\n"; } $dbh; } sub conn { $dbname = shift if @_; if (!$dbh) { if (!$dbname) { warnmsg("You need to set a database with 'd' first"); } else { db($dbname); } } } sub addhist { my $line = shift; next unless $line; # $term->addhistory($line); push(@hist, $line); } sub showhist { print "$_\n" foreach @hist; } sub showdbs { showfilter(shift, [sort @dbnames]); } sub dshowdbs { my @N = filter(shift, \@dbnames); my @R = map {$resources_hash->{$_}} @N; page(sub { my $fh = shift; my $r = shift @R; return 0 unless $r; printf $fh "%-20s %s\n", $r->{name}, hilite('keyword', $r->{schema}); return 1; }); } sub showfilter { foreach my $item (filter(@_)) { printf "$item\n"; } } sub writer { $W = Data::Stag->getformathandler(shift); $W->use_color(1); $W->fh($ofh); } sub filter { my $re = shift; my $list = shift || []; return grep { !$re || $_ =~ /$re/ } @$list; } sub showresourcesfile { `xterm -e less $ENV{DBSTAG_DBIMAP_FILE}`; } sub showtemplates { if ($templates) { showfilter(shift, [map {$_->name} @$templates]); } else { warnmsg("no templates for $dbname"); } } sub dshowtemplates { my @T = map {$sdbh->find_template($_)} filter(shift, [map {$_->name} @$templates]); page(sub { my $fh = shift; my $t = shift @T; return 0 unless $t; my $n = $t->name; my $hdr = hilite('comment', join("\n", "+" x 60, "++++ $n". (' ' x (50 - length($n))). "++++", ("+" x 60), "\n")); my $ftr = hilite('comment', "// -- END OF TEMPLATE --\n". ("=" x 60)); print $fh $hdr; $t->show($fh, $cscheme, sub { Term::ANSIColor::color(@_)}); print $fh $ftr; print $fh "\n"; }); # my $fn; # my $fh = \*STDOUT; # ($fn, $fh) = opentmp(); # foreach (@t) { # $_->show($fh, $cscheme); # } # $fh->close; # if ($XTERMMODE) { # my ($pfn, $pfh) = opentmp(); # print $pfh "more $fn && sleep 3600"; # system("xterm -e sh $pfn"); # $pfh->close; # } # else { # system("more $fn"); # } # unlink $fn; } sub page { my $sub = shift; my $fn; my $fh = \*STDOUT; ($fn, $fh) = opentmp(); while ($sub->($fh)) { # } # print $fh "\n\L"; $fh->close; if ($XTERMMODE) { my ($pfn, $pfh) = opentmp(); print $pfh "$PAGER $fn"; $pfh->close; system("xterm -e sh $pfn &"); sleep(1); unlink $pfn; } else { system("$PAGER $fn"); } unlink $fn; } sub prompt { my $p = shift; $p =~ s/(\$\w+)/eval($1)/eg; $p; } sub template { my $n = shift; my @matches = grep {$_->name eq $n} @$templates; if (@matches) { if (@matches > 1) { msg("not set - these are the possibilities"); showfilter(undef, \@matches); } elsif (!@matches) { warnmsg("No templates match: $n"); } else { $template = shift @matches; $varnames = $template->get_varnames; $template_name = $n; msg("Set template to \"$n\""); msg("varnames:"); %exec_argh = (); showvars(); } } } sub showvars { foreach my $vn (@$varnames) { printf("%-20s => %s\n", hilite('keyword', $vn), defined($exec_argh{$vn}) ? hilite('variable', $exec_argh{$vn}) : ' - NOT SET - '); } } sub ex { my @args = @_; my $bind = {%exec_argh}; if (@args) { foreach my $arg (@args) { if ($arg =~ /(\w+)=(\S+)/) { $bind->{$1} = $2; } else { $bind = [] unless ref($bind) eq 'ARRAY'; push(@$bind, $arg); } } } conn(); $stag = $dbh->selectall_stag(-template=>$template, -bind=>$bind, -nesting=>$nesting, ); showstag(); } sub sqlselect { my $sql = shift; conn(); if ($TBLMODE) { $rows = $dbh->selectall_arrayref($sql); if ($rows) { showrows(); } else { print STDERR "Query failed\n"; } } else { $stag = $dbh->selectall_stag(-sql=>$sql, -nesting=>$nesting, ); showstag(); } } sub sql { my $sql = shift; $dbh->do($sql); } sub hilite { my $cn = shift; my $str = shift; color($cscheme->{$cn}) . $str . color('reset'); } sub showstag { my @kids = $stag->kids; if (!@kids) { msg("NO DATA"); return; } page(sub { my $fh = shift; $W->fh($fh); $stag->sax($W); return 0; }); } sub showrows { my @R = @$rows; page(sub { my $fh = shift; my $r = shift @R; return 0 unless $r; my @C = values %$cscheme; for (my $i=0; $i<@$r; $i++) { printf $fh "%s%s\t", color($C[$i%3]), $r->[$i]; # printf $fh $r->[$i], "\t"; } print $fh color('reset') . "\n"; return 1; }); } sub x { print Dumper shift; } sub msg { print "@_\n"; } sub warnmsg { print "WARNING:@_\n"; } sub welcome { msg("Hello. This is the command line interface to DBStag."); msg("Type ? or help if you are ever confused.\n"); } sub make_offering { print "Thank you! You are very kind\n"; `xv /users/cjm/stag/stag-god.jpg`; } sub help { my $s = join("\n", 'COMMAND SUMMARY', '===============', ' ? displays this help message', ' \d displays available databases', ' \d MATCH available databases containing regexp MATCH', ' \dd displays available databases - DETAILED', ' \dd MATCH available databases containing regexp MATCH - DETAILED', ' \t [MATCH] lists available templates (optionally matching MATCH regexp)', ' \tt [MATCH] as above, but detailed view', ' \x toggle pager - from shell to popup xterm', ' \r toggle queryresult mode - rows vs trees', ' \c show resources conf file', ' d DBNAME set the database', ' w WRITER set the tree writer (xml, sxpr or itext)', ' t TEMPLATE set the template', ' x VARIABLE show perl variable using Data::Dump', ' trace toggle DBSTAG_TRACE', ' \v show template variable bindings', ' \l toggle multi/single line mode', ' \q QUIT', ' :SQL runs SQL', ' / [ARGS] execute the template; bind using ARGS', 'INTRO', 'This is the dbstag interactive query shell', 'You can issue SQL queries or call dbstag parameterized', 'canned query templates here. This is also a perl interpreter', '', 'TEMPLATED QUERIES', 'See DBIx::DBStag::SQLTemplate for more details', 'You can execute the current template using forward-slash', 'Use the t command to set a template by name', '', 'Binding to template args:', '', 'You can bind any template argument on the command line by saying', 'VARIABLE = VALUE', '(spaces around the = are optional)', ' you do not need a $ prefix', ' you do not need quotes around the value', '', 'you can also bind variables when you execute the query, like this', '', ' /VAR1=VAL1 VAR2=VAL2', '', 'or sequentially:', ' / VAL1 VAL2', 'single value:', ' /VAL1', 'example with wildcard:', ' /*foo*', '', 'if you have already bound the variables using =, then just type "q"', '', "\n\n\n", "Other possibilities", "-------------------", 'SQL', ' type in any SQL SELECT statement, and the results will be turned', ' into a Stag tree and displayed as either xml, sxpr or itext', ' the resulting tree goes into the $stag global variable', ' You can also switch to table mode using the \\r toggle', ' (in this case results go into the $rows variable)', ' perl variable will not be interpolated by default - you', ' can force interpolation by ending your SQL with !', ' $x="foo"', ' select * from bar where name=\'$x\'!', '', 'Perl', ' type in any perl and it will be evaluated. don\'t use my to declare', ' variables though!', '', 'Weird stuff', '-----------', ' the xterm pager acts kind of weird depending on the stag god\'s whims', ' try running the same command again', '', 'you will need xhost+ set if you want to use the xterm pager', '', 'have fun and be careful out there', ); page(sub { my $fh = shift; print $fh $s, "\n"; return 0; }); } our $ID = 0; sub opentmp { my $fn = "/tmp/stag-tmp-$ID-$$"; $ID++; my $fh = new FileHandle "> $fn"; $fh || die($fn); return ($fn, $fh); } sub closetmp { my ($fn, $fh) = shift; $fh->close || die($fn); unlink($fn); } =head1 NAME stag-qsh - DBStag query shell =head1 SYNOPSIS stag-qsh help =head1 DESCRIPTION This is an interactive database query shell. For a full description, execute this script and type 'help' =head2 ARGUMENTS =head3 -d B This is either a DBI locator or the logical name of a database in the DBSTAG_DBIMAP_FILE config file =cut DBIx-DBStag-0.12/scripts/stag-show-template.pl0000755000076500000240000000142511326157220017676 0ustar cainstaff#!/usr/local/bin/perl # stag-q # cjm@fruitfly.org use strict; use Carp; use DBIx::DBStag; use Data::Stag qw(:all); use Data::Dumper; use Getopt::Long; use Term::ANSIColor; my $h = {}; my $dbname = ''; my $connect; my $term; my @hist = (); my %cscheme = ( 'keyword'=>'cyan', 'variable'=>'magenta', 'text' => 'reset', 'comment' => 'red', 'block' => 'blue', 'property' => 'green', ); GetOptions( "dbname|d=s"=>\$dbname, "connect|c"=>\$connect, ); my $match = shift; # parent dbh my $sdbh = DBIx::DBStag->new; my $all_templates = $sdbh->template_list; my @templates = @$all_templates; if ($match) { @templates = grep {$_->name =~ /$match/} @templates; } foreach my $t (@templates) { $t->show(\*STDOUT, \%cscheme); } DBIx-DBStag-0.12/scripts/stag-sl2sql.pl0000755000076500000240000000664411326157220016335 0ustar cainstaff#!/usr/local/bin/perl -w # POD docs at end of file use strict; use Carp; use FileHandle; use Getopt::Long; my $debug; my $help; GetOptions( "help|h"=>\$help, ); foreach my $fn (@ARGV) { sl2sql($fn); } exit 0; sub sl2sql { my $fn = shift; my $fh = FileHandle->new($fn) || die("can't open $fn"); my @domains = (); while(<$fh>) { chomp; s/\%.*//; s/\s+$//; next unless $_; if (/^:(\w+)\s+(.*)/) { my $cmd = $1; my $rest = $2; my $cmt = ''; if ($rest =~ /\#\s*(.*)/) { $cmt = $1; $rest =~ s/\#.*//; } my @args = split(' ', $rest); if ($cmd eq 'domain') { my $d = [shift @args, join(' ', @args)]; push(@$d, $args[0] =~ /^[A-Z]/ ? 'primitive' : 'relation'); push(@$d, $cmt); push(@domains, $d); } if ($cmd eq 'import') { print "-- $_\n"; push(@domains, [$args[0], $args[0], 'relation', '']); } } elsif (/^\#/) { s/\#/\-\-/; print "$_\n"; } else { if (/^(\w+)\((.*)\)\s*(.*)/) { my $rel = $1; my $argstr = $2; my $sep = $3; my @cmts = (); my @constraints = (); if ($sep eq ':-') { while(<$fh>) { chomp; s/\s+$//; last unless $_; if (/\s+(.*)/) { my $line = $1; my $last; if ($line =~ /\#\s*(.*)/) { push(@cmts, $1); $line =~ s/\#.*//; } $line =~ s/\,$//; if ($line =~ /\.$/) { $line =~ s/\.$//; $last = 1; } if ($line =~ /:(.*)/) { # push(@cmts, $1); } else { push(@constraints, $line) if $line; } last if $last; } else { last; # must have indent } } if (@cmts) { push(@cmts, ''); } } my @args = split(/,\s*/, $argstr); push(@domains, [$rel, $rel, 'relation']); my @cols = map { my $col = $_; my $nullable = 0; if ($col =~ /\?$/) { $nullable = 1; $col =~ s/\?$//; } my $type = 'TEXT'; my $qual = ''; my $cmt = ''; if ($col eq '*') { $col = $rel .'_id'; $type = "SERIAL"; $nullable = 0; } else { my ($d) = grep {$_->[0] eq $col} @domains; if ($d) { $cmt = $d->[3]; $type = $d->[1]; if ($d->[2] eq 'relation') { $col .= '_id'; $qual = sprintf("REFERENCES $type(%s)", $type.'_id'); $type = "INTEGER"; } } else { warn $col; } } unless ($nullable) { $qual = "NOT NULL $qual"; } my $all = sprintf("%-20s %-12s %s", $col, $type, $qual); push(@cmts, sprintf("%-20s: $cmt", $col)) if $cmt; # if ($cmt) { # # $all = "-- $cmt\n $all"; # $all = sprintf("%-60s -- $cmt", $all); # } $all; } @args; # blank line $constraints[0] = "\n$constraints[0]" if @constraints; createtable($rel, [@cols, @constraints], [@cmts]); } else { warn $_; } } } $fh->close; } sub createtable { my $tbl = shift; my $cols = shift; my $cmts = shift || []; my $cmtstr = join('', map {"-- $_\n"} @$cmts); printf "\n\n-- RELATION: $tbl\n--\n$cmtstr--\n"; printf("CREATE TABLE $tbl (\n%s\n);\n", join(",\n", map { s/\s+$//;s/(\n*)\s*(.*)/$1 $2/;$_} @$cols)); print "-- ****************************************\n"; } __END__ =head1 NAME stag-sl2sql.pl =head1 SYNOPSIS =head1 DESCRIPTION =cut DBIx-DBStag-0.12/scripts/stag-storenode.pl0000755000076500000240000002342011326157220017106 0ustar cainstaff#!/usr/local/bin/perl -w # POD docs at end use strict; use Carp; use Data::Stag; use DBIx::DBStag; use Getopt::Long; my $debug; my $help; my $db; my $user; my $pass; my @units; my $parser; my @mappings; my $mapconf; my @noupdate = (); my $force; my $tracenode; my $transform; my $trust_ids; my $autocommit; my %cache_h = (); my $safe; GetOptions( "help|h"=>\$help, "db|d=s"=>\$db, "user=s"=>\$user, "password|pass=s"=>\$pass, "unit|u=s@"=>\@units, "parser|p=s"=>\$parser, "mapping|m=s@"=>\@mappings, "conf|c=s"=>\$mapconf, "noupdate=s@"=>\@noupdate, "tracenode=s"=>\$tracenode, "transform|t=s"=>\$transform, "trust_ids=s"=>\$trust_ids, "cache=s%"=>\%cache_h, "force"=>\$force, "safe"=>\$safe, "autocommit"=>\$autocommit, ); if ($help) { system("perldoc $0"); exit 0; } #print STDERR "Connecting to $db\n"; my $dbh = DBIx::DBStag->connect($db, $user, $pass); eval { $dbh->dbh->{AutoCommit} = $autocommit || 0; }; if ($@) { print STDERR $@; } if ($trust_ids) { $dbh->trust_primary_key_values(1); } if ($mapconf) { $dbh->mapconf($mapconf); } if (@mappings) { $dbh->mapping(\@mappings); } @noupdate = map {split(/\,/,$_)} @noupdate; $dbh->noupdate_h({map {$_=>1} @noupdate}); $dbh->tracenode($tracenode) if $tracenode; $dbh->force(1) if $force; $dbh->force_safe_node_names(1) if $safe; foreach (keys %cache_h) { $dbh->is_caching_on($_, $cache_h{$_}); } sub store { my $self = shift; my $stag = shift; #$dbh->begin_work; eval { $dbh->storenode($stag); $dbh->commit unless $autocommit; }; if ($@) { print STDERR $@; if ($force) { print STDERR "-force set, ignoring error"; } else { exit 1; } } return; } my $thandler; if ($transform) { $thandler = Data::Stag->makehandler($transform); } foreach my $fn (@ARGV) { if ($fn eq '-' && !$parser) { $parser = 'xml'; } my $H; if (@units) { my $storehandler = Data::Stag->makehandler( map { $_ =>sub{store(@_)}; } @units ); if ($thandler) { $H = Data::Stag->chainhandlers([@units], $thandler, $storehandler); } else { $H = $storehandler; } } else { # if no load units are specified, store everything # nested one-level below top $H = Data::Stag->makehandler; $H->catch_end_sub(sub { my ($handler,$stag) = @_; if ($handler->depth == 1 && $stag->element ne '@') { store($handler,$stag); return; } return $stag; }); } Data::Stag->parse(-format=>$parser,-file=>$fn, -handler=>$H); } $dbh->disconnect; exit 0; __END__ =head1 NAME stag-storenode.pl =head1 SYNOPSIS stag-storenode.pl -d "dbi:Pg:dbname=mydb;host=localhost" myfile.xml =head1 DESCRIPTION This script is for storing data (specified in a nested file format such as XML or S-Expressions) in a database. It assumes a database schema corresponding to the tags in the input data already exists. =head2 ARGUMENTS =head3 -d B This is either a DBI locator or the logical name of a database in the DBSTAG_DBIMAP_FILE config file =head3 -user B db user name =head3 -password B db user password =head3 -u B This is the node/element name on which to load; a database loading event will be fired every time one of these elements is parsed; this also constitutes a whole transaction =head3 -c B This is a stag mapping file, indicating which elements are aliases =head3 -p B Default is xml; can be any stag compatible parser, OR a perl module which will parse the input file and fire stag events (see L) =head3 -t B This is the name of a perl module that will perform a transformation on the stag events/XML. See also L =head3 -noupdate B A comma-seperated (no spaces) list of nodes/elements on which no update should be performed if a unique key is found to be present in the DB =head3 -trust_ids If this flag is present, the values for primary key values are trusted; otherwise they are assumed to be surrogate internal IDs that should not be used. In this case they will be remapped. =head3 -tracenode B E.g. -tracenode person/name Writes out a line on STDERR for every new person inserted/updated =head3 -cache B
=B Can be specified multiple times Example: -cache 0: off (default) 1: memory-caching ON 2: memory-caching OFF, bulkload ON 3: memory-caching ON, bulkload ON IN-MEMORY CACHING By default no in-memory caching is used. If this is set to 1, then an in-memory cache is used for any particular element. No cache management is used, so you should be sure not to cache elements that will cause memory overloads. Setting this will not affect the final result, it is purely an efficiency measure for use with storenode(). The cache is indexed by all unique keys for that particular element/table, wherever those unique keys are set BULKLOAD If bulkload is used without memory-caching (set to 2), then only INSERTs will be performed for this element. Note that this could potentially cause a unique key violation, if the same element is present twice If bulkload is used with memory-caching (set to 3) then only INSERTs will be performed; the unique serial/autoincrement identifiers for those inserts will be cached and used. This means you can have the same element twice. However, the load must take place in one session, otherwise the contents of memory will be lost =head1 XML TO DB MAPPING See L for details of the actual mapping. Two styles of mapping are allowed: stag-dbxml and XORT-style XML. You do not have to specify which, they are sufficiently similar that the loader can accept either. =head1 MAKING DATABASE FROM XML FILES It is possible to automatically generate a database schema and populate it directly from XML files (or from Stag objects or other Stag compatible files). Of course, this is no substitute for proper relational design, but often it can be necessary to quickly generate databases from heterogeneous XML data sources, for the purposes of data mining. There are 3 steps involved: 1. Prepare the input XML (for instance, modifying db reserved words). 2. Autogenerate the CREATE TABLE statements, and make a db from these. 3. Store the XML data in the database. =head2 Step 1: Prepare input file You may need to make modifications to your XML before it can be used to make a schema. If your XML elements contain any words that are reserved by your DB you should change these. Any XML processing tool (eg XSLT) can be used. Alternatively you can use the script 'stag-mogrify' e.g. to get rid of '-' characters (this is how Stag treates attributes) and to change the element with postgresql reserved word 'date', do this: stag-mogrify.pl -xml -r 's/^date$/moddate/' -r 's/\-//g' data.xml > data.mog.xml You may also need to explicitly make elements where you will need linking tables. For instance, if the relationship between 'movie' and 'star' is many-to-many, and your input data looks like this: (movie (name "star wars") (star (name "mark hamill"))) You will need to *interpose* an element between these two, like this: (movie (name "star wars") (movie2star (star (name "mark hamill")))) you can do this with the -i switch: stag-mogrify.pl -xml -i movie,star,movie2star data.xml > data.mog.xml or if you simply do: stag-mogrify.pl -xml -i star data.xml > data.mog.xml the mogrifier will simply interpose an element above every time it sees 'star'; the naming rule is to use the two elements with an underscore between (in this case, 'movie_star'). =head2 Step 2: Generating CREATE TABLE statements Use the stag-autoddl.pl script; stag-autoddl.pl data.mog.xml > table.sql The default rule is to create foreign keys from the nested element to the outer element; you will want linking tables tobe treated differently (a linking table will point to parent and child elements). stag-autoddl.pl -l movie2star -l star2character data.mog.xml > table.sql Once you have done this, load the statements into your db; eg for postgresql (for other databases, use L) psql -a mydb < table.sql If something goes wrong, go back to step 1 and sort it out! Note that certain rules are followed: ever table generated gets a surrogate primary key of type 'serial'; this is used to generate foreign key relationships. The rule used is primary and foreign key names are the name of the table with the '_id' suffix. Feel free to modify the autogenerated schema at this stage (eg add uniqueness constraints) =head2 Step 3: Store the data in the db stag-storenode.pl -u movie -d 'dbi:Pg:mydb' data.mog.xml You generally dont need extra metadata here; everything can be infered by introspecting the database. The -u|unit switch controls when transactions are committed You can omit the -u switch, and every node directly under the top node will be stored. This will also be the transaction unit. If this works, you should now be able to retreive XML from the database, eg selectall_xml.pl -d 'dbi:Pg:mydb' 'SELECT * FROM x NATURAL JOIN y' =cut DBIx-DBStag-0.12/scripts/stag-template2bin.pl0000755000076500000240000000201211326157220017464 0ustar cainstaff#!/usr/local/bin/perl # cjm@fruitfly.org use strict; use Carp; use Getopt::Long; my $bindir = $ENV{DBSTAG_TEMPLATE_BINDIR}; my $force; my $quiet; my $chmod = "777"; GetOptions("bindir|b=s"=>\$bindir, "force"=>\$force, "quiet|q"=>\$quiet, "chmod=s"=>\$chmod, ); if (!$bindir) { $bindir = "/usr/local/bin"; unless ($force) { print "You did not specify -b (path to template generated binaries) on the command line\n or in \$DBSTAG_TEMPLATE_BINDIR"; print "I will use this: $bindir\n"; print "\nOK? [yes/no] "; my $ok = ; if ($ok !~ /^y/i) { print "Bye!\n"; exit 0; } } } foreach my $f (@ARGV) { my @parts = split(/\//, $f); my $name = $parts[-1]; if ($name =~ s/\.stg$//) { my $bin = "$bindir/$name"; open(F, ">$bin") || die("can't write to $bin"); print F "#!/bin/sh\n"; print F "selectall_xml.pl -t $name \$\@\n"; close(F); system("chmod $chmod $bin"); print "$bin\n" unless $quiet; } else { "$f doesn't look like a template (no .stg suffix)"; } } DBIx-DBStag-0.12/scripts/stag-templates2scripts.pl0000755000076500000240000000155011326157220020574 0ustar cainstaff#!/usr/local/bin/perl # cjm@fruitfly.org use strict; use Carp; use Getopt::Long; my $bindir = $ENV{DBSTAG_TEMPLATE_BINDIR}; GetOptions("bindir|b"=>\$bindir); my @dirs = @ARGV; if (!@dirs) { @dirs = split(/:/, $ENV{DBSTAG_TEMPLATE_DIRS}); print "You did not specify directories on the command line\n"; print "I will use these\n"; print "$_\n" foreach @dirs; print "\nOK? [yes/no] "; my $ok = ; if ($ok !~ /^y/i) { print "Bye!\n"; exit 0; } } if (!$bindir) { $bindir = "/usr/local/bin"; print "You did not specify -b (path to template generated binaries) on the command line\n or in \$DBSTAG_TEMPLATE_BINDIR"; print "I will use this: $bindir\n"; print "$_\n" foreach @dirs; print "\nOK? [yes/no] "; my $ok = ; if ($ok !~ /^y/i) { print "Bye!\n"; exit 0; } } foreach my $dir (@dirs) { } DBIx-DBStag-0.12/scripts/ubiq0000755000076500000240000003150411326157220014500 0ustar cainstaff#!/usr/local/bin/perl -w =head1 NAME ubiq =head1 SYNOPSIS ubiq [-d ] [-f file of sql] [-nesting|n ] SQL =head1 DESCRIPTION Example: ubiq -d "dbi:Pg:dbname=mydb;host=localhost"\ "SELECT * FROM a NATURAL JOIN b" =head1 ARGUMENTS =cut use strict; use Carp; use DBIx::DBStag; use Data::Dumper; use Getopt::Long; use Curses::UI; my $debug; my $help; my $dbname; my $nesting; my $show; my $file; my $user; my $pass; my $template_name; my $where; GetOptions( "help|h"=>\$help, "dbname|d=s"=>\$dbname, "show"=>\$show, "nesting|n=s"=>\$nesting, "file|f=s"=>\$file, "user|u=s"=>\$user, "pass|p=s"=>\$pass, "template|t=s"=>\$template_name, "where|w=s"=>\$where, ); if ($help) { system("perldoc $0"); exit 0; } my @C = (config=>[ setting=>[ name => 'output_format', default => 'xml', allowed => 'xml', allowed => 'sxpr', allowed => 'itext', ], ]); my $config = Data::Stag->unflatten(@C); my %confset = map { $_->get_name => $_->get_default } $config->get_setting; my $sdbh = DBIx::DBStag->new; my $dbh; my $resources = $sdbh->resources_list; my $resources_hash = $sdbh->resources_hash; my @dbresl = grep {$_->{type} eq 'rdb'} @$resources; my @dbnames = (map {$_->{name}} @dbresl); my $templates = []; my $template; if (1) { my $cui = Curses::UI->new; my $schema = ''; my $loc = ''; my $screen = 'select_db'; my %w = (); my $varnames = []; my %exec_argh = (); my $qr_obj; my $file_menu = [ { -label => 'Quit program', -value => sub {exit(0)} }, ]; my $select_menu = [ { -label => 'Choose Database', -value => sub{select_screen('select_db')}}, { -label => 'Choose Template', -value => sub{select_screen('select_template')}}, { -label => 'Query', -value => sub{select_screen('query')}}, ]; my $menu = [ { -label => 'File', -submenu => $file_menu }, { -label => 'Select', -submenu => $select_menu }, ]; $cui->add('menu', 'Menubar', -menu => $menu); my $w0 = $cui->add( 'w0', 'Window', -border => 1, -y => -1, -height => 3, ); $w0->add('explain', 'Label', -text => "CTRL+P: previous demo CTRL+N: next demo " . "CTRL+X: menu CTRL+Q: quit" ); my $w1 = $cui->add( 'w1', 'Window', -border => 1, -y => -4, -height => 5, ); $w1->add('label_varwin', 'Label', -text => "ho\nho", -width => 60, -height => 3, ); my $w2 = $cui->add( 'w2', 'Window', -border => 1, -y => -9, -height => 6, ); $w2->add('label_detailwin', 'Label', -text => "hello", -width => 60, -height => 4, -wrapping=>1, ); my $button_template = { -label => '[ Select a template ]', -value => 'template', -onpress=> sub { select_screen('select_template'); }, }; my $button_query = { -label => '[ Create Query ]', -value => 'query', -onpress=> sub { select_screen('query'); }, }; my $button_exec_query = { -label => '[ Execute Query ]', -value => 'exec_query', -onpress=> sub { execute_query(); }, }; my %args = ( -border => 1, -titlereverse => 0, -padtop => 2, -padbottom => 14, -ipad => 1, ); $w{select_db} = $cui->add( 'window_select_db', 'Window', -title => "Select Database", %args ); my $chooser_select_db = $w{select_db}->add( 'chooser_select_db', 'Listbox', -height => 10, -values => [sort @dbnames], -vscrollbar => 'right', -onchange=> sub { # my $pop = shift->parent->getobj('chooser_select_db'); set_dbname(shift->get); # print "D=$dbname\n"; return; }, ); # $chooser_select_db->set_binding(sub { # my $pop = shift; # my $db = $pop->get; # my $res = $resources_hash->{$db}; # if ($res) { # $cui->dialog("DB: $db\n". # "LOC: $res->{loc}\n". # "SCHENA: $res->{schema}\n"); # } # else { # $cui->dialog("No data for $db"); # } # }, # '?'); $w{select_db}->add(undef, 'Buttonbox', -y => -3, -buttons => [ $button_template, $button_query, ] ); $w{select_template} = $cui->add( 'window_select_template', 'Window', -title => "Select Template", %args ); my $chooser_select_template = $w{select_template}->add( 'chooser_select_template', 'Listbox', -y=>5, -height => 8, -values => [1, 2], -onchange=> sub { my $pop = shift; set_template($pop->get); return; }, ); $chooser_select_template->set_binding(\&dialog_template, '?'); $w{select_template}->add(undef, 'Buttonbox', -y => -3, -buttons => [ $button_query, ] ); $w{query} = $cui->add( 'window_query', 'Window', -title => "Query Database Using Template", %args ); $w{query} = $cui->add( 'window_exec_query', 'Window', -title => "Query Results", %args ); $w{query}->add(undef, 'Buttonbox', -y => -3, -buttons => [ $button_exec_query, ] ); $w{qr} = $cui->add( 'window_qr', 'Window', -title => "Query Results", %args ); $w{select_db}->focus; $cui->set_binding( sub{ exit }, "\cQ" ); # Bind to menubar. $cui->set_binding( sub{ shift()->root->focus('menu') }, "\cX" ); if ($dbname) { set_dbname($dbname); } if ($template_name) { set_template($template_name); } update_varwin(); setup_query_options(); $cui->MainLoop; $cui->dialog("ubiq quitting!"); sub set_template { my $tn = shift; $template = $sdbh->find_template($tn); if ($template) { $varnames = $template->get_varnames; update_varwin(); setup_query_options(); $w{query}->intellidraw; my $detail = $w2->getobj('label_detailwin'); my $sp = $template->stag_props; my $desc = $sp->get_desc; $detail->text($desc); $w2->intellidraw; } else { $cui->dialog("no such template $tn"); } update_varwin(); } sub set_dbname { my $set = shift; $dbname = $set; my $res = $resources_hash->{$dbname}; if ($res) { $schema = $res->{schema} || ''; $loc = $res->{loc} || ''; if ($schema) { $templates = $sdbh->find_templates_by_schema($schema); } else { $templates = $sdbh->template_list; } } else { $cui->dialog("Unknown $dbname"); } $dbh = DBIx::DBStag->connect($dbname); my $w = $cui->getobj('window_select_template'); my $chooser = $w->getobj('chooser_select_template'); my @tnames = map {$_->name} @$templates; $chooser->values(\@tnames); update_varwin(); } sub update_varwin { my $label = $w1->getobj('label_varwin'); $label->text(sprintf("DBNAME:%-20s SCHEMA:%-20s\nLOC:%-20s TMPL:%-20s", $dbname || '', $schema, $loc, $template ? $template->name : '')); $w1->intellidraw; } sub setup_query_options { for (my $i=0; $i<@$varnames; $i++) { my $y = $i; my $vn = $varnames->[$i]; # replace_widg($w{query}, # "query_label$i", 'TextViewer', # -readonly => 1, # -singleline => 1, # -x => 1, # -y => $y, # -text => $vn, # -width => 20, # -sbborder => 1, # ); my $label = replace_widg($w{query}, "query_label$i", 'Label', -x => 1, -y => $y, -width => 40, -height => 1, -bold => 1, ); $label->text($vn); replace_widg($w{query}, "query_val_popup$i", 'TextEntry', -x => 42, -y => $y, -width => 30, -sbborder => 1, -onchange=> sub { my $v = shift->get; $exec_argh{$vn} = $v; if (!$v) { delete $exec_argh{$vn}; } } ); } } sub execute_query { $qr_obj = $dbh->selectall_stag(-template=>$template, -bind=>\%exec_argh); show_qr(); } sub show_qr { my $txt = $qr_obj->xml; open(F, ">z"); print F $txt; close(F); # $txt = substr($txt, 0, 100); replace_widg($w{qr}, "qr_textviewer", "TextViewer", -sbborder=>1, # -text=>"x\ny\n z\n 123\n", -text=>$txt, -wrapping=>1, -showoverflow=>0, ); $w{qr}->draw; $w{qr}->focus; } sub setup_complex_query_options { for (my $i=0; $i<10; $i++) { my $y = $i; replace_widg($w{cquery}, "cquery_att_popup$i", 'Popupmenu', -y => $y, -sbborder => 1, -values => $varnames, ); replace_widg($w{cquery}, "cquery_val_popup$i", 'TextEntry', -x => 20, -y => $y, -width => 20, -sbborder => 1, ); replace_widg($w{cquery}, "cquery_bool_popup$i", 'Popupmenu', -x => 50, -y => $y, -sbborder => 1, -values => [ qw(AND OR) ], ); } } sub replace_widg { my $w = shift; my $id = shift; my @args = @_; my $obj = $w->getobj($id); if ($obj) { $w->delete($id); } $w->add($id, @args); return $w->getobj($id); } sub select_screen { my $screen = shift; $w{$screen}->focus; } sub dialog_template { my $pop = shift; my $tn = $pop->get_active_value; my $t = $sdbh->find_template($tn); if ($t) { $cui->dialog("TEMPLATE: $tn\n"); } else { $cui->dialog("No data"); } } } DBIx-DBStag-0.12/t/0000755000076500000240000000000011331570203012357 5ustar cainstaffDBIx-DBStag-0.12/t/autoddl.t0000644000076500000240000000273211326157220014210 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 2; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; drop(); my $f = "t/data/game.el"; my $data = Data::Stag->parse($f); $data->where('seq', sub { my $s = shift; my @dbxref = $s->get_dbxref; $s->unset_dbxref; foreach (@dbxref) { my $sx = Data::Stag->new(seq_dbxref=>[$_]); $s->add_seq_dbxref($sx); } }); $data->where('annot', sub { my $s = shift; my @dbxref = $s->get_dbxref; $s->unset_dbxref; foreach (@dbxref) { my $sx = Data::Stag->new(annot_dbxref=>[$_]); $s->add_annot_dbxref($sx); } }); print $data->sxpr; my $dbh = dbh(); my $ddl = $dbh->autoddl($data, [qw(seq_dbxref annot_dbxref)]); print $ddl; $dbh->do($ddl); $dbh->storenode($data); my $out = $dbh->selectall_stag('SELECT * FROM game NATURAL JOIN seq NATURAL JOIN seq_dbxref NATURAL JOIN dbxref'); print $out->sxpr; my @dbxrefs = $out->get("game/seq/seq_dbxref/dbxref"); ok(@dbxrefs ==1); ok($dbxrefs[0]->get_db eq 'x'); $out = $dbh->selectall_stag('SELECT * FROM annot NATURAL JOIN fset NATURAL JOIN fspan NATURAL JOIN game NATURAL JOIN seq NATURAL JOIN seq_dbxref NATURAL JOIN dbxref USE NESTING (annotset (annot (game(seq(seq_dbxref(dbxref)))) (fset(fspan))))'); $dbh->disconnect; DBIx-DBStag-0.12/t/bond.t0000644000076500000240000000241711326157220013476 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 5; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; drop(); my $f = "t/data/bond.el"; my $spydata = Data::Stag->parse($f); my $dbh = dbh(); my $ddl = $dbh->autoddl($spydata); $dbh->do($ddl); my @kids = $spydata->kids; foreach (@kids) { $dbh->storenode($_); } my $out = $dbh->selectall_stag('SELECT * FROM agent NATURAL JOIN mission NATURAL JOIN mission_gizmo NATURAL JOIN gizmo'); my @agents = $out->get_agent; ok(@agents,1); my $agent = shift @agents; my @missions = $agent->get_mission; ok(@missions,2); my @missions_with_a_car = $out->where('mission', sub { grep { $_->get_gizmo_type eq 'car' } shift->find_gizmo }); ok(@missions_with_a_car,1); print $missions_with_a_car[0]->sxpr; ok($missions_with_a_car[0]->get_codename,'goldfinger'); $out = $dbh->selectall_stag("SELECT agent.*, bureau.*, agent.firstname || agent.lastname AS agent__fullname FROM agent NATURAL JOIN bureau_to_agent NATURAL JOIN bureau"); print $out->sxpr; ok($out->sget_agent->sget_fullname,'JamesBond'); $dbh->disconnect; DBIx-DBStag-0.12/t/cvterm.t0000644000076500000240000000447411326157220014061 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 9; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; open(F, "t/data/chado-cvterm.sql") || die; my $ddl = join('',); close(F); drop(); my $dbh = connect_to_cleandb(); #DBI->trace(1); $dbh->do($ddl); my $chado = Data::Stag->parse("t/data/test.chadoxml"); $dbh->storenode($_) foreach $chado->subnodes; ok(1); my $query = q[ SELECT * FROM cvterm INNER JOIN dbxref ON (cvterm.dbxref_id = dbxref.dbxref_id) INNER JOIN db ON (dbxref.db_id = db.db_id) INNER JOIN cv ON (cvterm.cv_id = cv.cv_id) WHERE cvterm.definition LIKE '%snoRNA%' USE NESTING (set(cvterm(cv)(dbxref(db)))) ]; my $termset = $dbh->selectall_stag($query); print $termset->xml; my @terms = $termset->get_cvterm; ok(@terms,1); my $term = shift @terms; ok($term->sget_cv->sget_name eq 'biological_process'); ok($term->sget_dbxref->sget_db->sget_name eq 'GO'); # find parents of 'snoRNA' $query = q[ SELECT * FROM cvterm AS baseterm INNER JOIN cvterm_relationship AS r ON (r.subject_id=baseterm.cvterm_id) INNER JOIN cvterm AS parentterm ON (r.object_id=parentterm.cvterm_id) INNER JOIN cvterm AS rtype ON (r.type_id=rtype.cvterm_id) WHERE baseterm.definition LIKE '%snoRNA%' USE NESTING (set(baseterm(r(parentterm)(rtype)))) ]; $termset = $dbh->selectall_stag(-sql=>$query, -aliaspolicy=>'a'); my @parents = $termset->get('baseterm/r/parentterm'); ok(@parents == 1); $termset = $dbh->selectall_stag(-sql=>$query, -aliaspolicy=>'t'); @parents = $termset->get('cvterm/cvterm_relationship/cvterm'); #both child and parent are cvterm ok(@parents == 2); $termset = $dbh->selectall_stag(-sql=>$query); @parents = $termset->get('baseterm/cvterm/r/cvterm_relationship/parentterm/cvterm'); ok(@parents == 1); # this next test uses the new style of obo2chadoxml conversion my $chado = Data::Stag->parse("t/data/test2.chadoxml"); $dbh->storenode($_) foreach $chado->subnodes; ok(1); my ($genus) = $dbh->selectrow_array("SELECT cvterm.name FROM cvterm_genus INNER JOIN cvterm ON (genus_id=cvterm.cvterm_id)"); ok($genus eq 'a'); $dbh->disconnect; DBIx-DBStag-0.12/t/data/0000755000076500000240000000000011331570203013270 5ustar cainstaffDBIx-DBStag-0.12/t/data/bond.el0000644000076500000240000000201111326157220014532 0ustar cainstaff'(spydata (bureau (name "MI7") (country "UK") (bureau_to_agent (agent (agent_code "007") (lastname "Bond") (firstname "James") (license "kill") (mission (codename "goldfinger") (description "") (mission_to_person (role "villain") (person (person_name "Auric Goldfinger"))) (mission_to_person (person (person_name "Pussy Galore"))) (mission_to_person (role "henchman") (person (person_name "oddjob"))) (mission_gizmo (gizmo (gizmo_name "Aston Martin DB5") (gizmo_type "car") (feature "ejector seat") ;;; (feature "smoke screen") ))) (mission (codename "live and let die") (description "investigate plot to flood US with heroin") (mission_to_person (person (person_name "Baron Samedi"))) (mission_gizmo (gizmo (gizmo_name "Rolex watch") (gizmo_type "watch")))) )))) DBIx-DBStag-0.12/t/data/CG10833.with-macros.chado-xml0000644000076500000240000005560211326157220020226 0ustar cainstaff Drosophila melanogaster Gadfly anon SO GB_polypeptide FlyBase sequence relationship pub type synonym type property type cv__SO gene cv__SO mRNA cv__SO exon cv__SO chromosome cv__relationship type part_of cv__SO polypeptide cv__pub type computer file computer_file db__anon cv__synonym type synonym synonym db__anon cv__relationship type derives_from cv__property type owner owner db__anon cv__property type sp_comment comment db__anon cv__property type protein_id protein_id db__anon cv__property type sp_status sp_status db__anon cv__property type cyto_range cyto_range db__anon cv__property type gbunit gbunit db__anon CG10833 db__Gadfly 0 Cyp28d1 Drosophila__melanogaster 1972 2004-08-09 15:56:24.057523 2004-08-09 15:56:24.057523 SO__gene CG10833 CG10833-RA db__Gadfly 0 275f2c58611a9f912c514d896a652470 Cyp28d1-RA Drosophila__melanogaster CCGCGTTGGCGAGAGGTTGTGCCATGTGTCCGATTTCCACGGCTCTTTTTGTAATTGCGGCCATCCTGGCCTTGATCTATGTCTTTCTAACATGGAACTTTAGCTACTGGAAGAAGAGGGGCATTCCAACGGCCAAGTCATGGCCCTTTGTGGGCAGTTTTCCCAGCGTTTTCACCCAGAAACGGAACGTAGTCTACGACATCGATGAGATCTATGAGCAGTACAAGAACACCGACAGCATTGTGGGAGTGTTTCAAACCAGAATTCCACAACTAATGGTCACAACTCCGGAATATGCGCACAAGATATATGTTAGTGACTTCCGCAGCTTCCACGACAATGAGATGGCTAAGTTTACCGACAGCAAAACGGATCCCATTTTGGCGAATAATCCGTTCGTATTGACTGGTGAGGCCTGGAAAGAAAGACGCGCCGAAGTTACACCCGGACTTTCGGCAAATCGGGTCAAAGCTGCCTATCCCGTCTCGCTGCGCGTTTGTAAAAAGTTTGTGGAATATATAAGGCGACAGAGCCTGATGGCCCCCGCCCAAGGACTAAATGCGAAGGATCTCTGCTTGTGCTACACCACCGAAGTGATTTCCGATTGTGTCCTGGGCATTTCCGCCCAGAGTTTCACGGATAATCCCACACCCATGGTGGGAATGACCAAGCGCGTCTTCGAACAGTCTTTTGGCTTCATTTTCTACACGGTGGTCGCCAATCTATGGCCACCAATCACGAAATTCTACTCCGTTTCGCTGTTCGCCAAGGACGTGGCTGCGTTCTTCTATGACCTCATGCAGAAGTGCATCCAAGTTCGGCGGGAAAGTCCGGCGGCACAGCAGCGAGATGACTTCCTCAACTACATGTTGCAGTTGCAGGAGAAAAAGGGACTGAATGCGGCGGAGCTGACCTCGCACACAATGACATTTTTAACGGACGGATTCGAGACCACCGCACAAGTGCTTACCCACACACTCCTTTTCCTGGCACGCAATCCCAAGGAGCAGATGAAGTTGAGGGAGGAGATCGGTACCGCCGAGCTGACCTTTGAACAGATAAGTGAGCTGCCCTTCACCGAAGCCTGCATCCATGAAACTTTGAGAATTTTCTCACCTGTTCTGGCTGCCCGCAAGGTGGTAACTGAGCCCTGTGAACTGACGAACAAAAACGGAGTGAGCGTGAAACTGAGACCCGGGGATGTAGTCATCATTCCTGTGAACGCCTTGCACCACGATCCCCAATACTATGAGGAACCTCAATCGTTCAAGCCCGAGCGATTCCTGAACATCAATGGCGGAGCCAAAAAGTACAGAGATCAGGGTCTATTCTTTGGGTTTGGCGATGGACCACGTATTTGCCCCGGTATGCGGTTTTCACTTACCCAAATCAAAGCTGCCCTGGTGGAAATCGTGCGAAACTTCGACATCAAGGTTAATCCCAAAACTCGCAAGGATAATGAAATTGATGATACCTACTTTATGCCAGCCTTAAAAGGCGGCGTTTGGCTGGATTTTGTTGAACGCAATTAGTTATTCAATAATGTTACGACTTTATATTAAATATAGTTTATCCAGTTGCATACATGTTATATTTCCAAACA 1603 2004-08-09 15:56:24.230779 2002-04-30 16:10:52 SO__mRNA CG10833-RA 6 0 Cyp28d1:6 Drosophila__melanogaster 238 2004-08-09 15:56:24.665877 2004-08-09 15:56:24.665877 SO__exon CG10833:6 5212450 5212212 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 relationship type__part_of 4 0 Cyp28d1:4 Drosophila__melanogaster 631 2004-08-09 15:56:24.587619 2004-08-09 15:56:24.587619 SO__exon CG10833:4 5211821 5211190 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 relationship type__part_of 1 0 Cyp28d1:1 Drosophila__melanogaster 217 2004-08-09 15:56:24.453082 2004-08-09 15:56:24.453082 SO__exon CG10833:1 5210695 5210478 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 relationship type__part_of 5 0 Cyp28d1:5 Drosophila__melanogaster 270 2004-08-09 15:56:24.626408 2004-08-09 15:56:24.626408 SO__exon CG10833:5 5212150 5211880 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 relationship type__part_of 2 0 Cyp28d1:2 Drosophila__melanogaster 139 2004-08-09 15:56:24.502579 2004-08-09 15:56:24.502579 SO__exon CG10833:2 5210889 5210750 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 relationship type__part_of 3 0 Cyp28d1:3 Drosophila__melanogaster 108 2004-08-09 15:56:24.546345 2004-08-09 15:56:24.546345 SO__exon CG10833:3 5211061 5210953 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 relationship type__part_of CG10833-PA db__Gadfly 0 35dca04d49d72956485a3ecefc21ada4 Cyp28d1-PA Drosophila__melanogaster MCPISTALFVIAAILALIYVFLTWNFSYWKKRGIPTAKSWPFVGSFPSVFTQKRNVVYDIDEIYEQYKNTDSIVGVFQTRIPQLMVTTPEYAHKIYVSDFRSFHDNEMAKFTDSKTDPILANNPFVLTGEAWKERRAEVTPGLSANRVKAAYPVSLRVCKKFVEYIRRQSLMAPAQGLNAKDLCLCYTTEVISDCVLGISAQSFTDNPTPMVGMTKRVFEQSFGFIFYTVVANLWPPITKFYSVSLFAKDVAAFFYDLMQKCIQVRRESPAAQQRDDFLNYMLQLQEKKGLNAAELTSHTMTFLTDGFETTAQVLTHTLLFLARNPKEQMKLREEIGTAELTFEQISELPFTEACIHETLRIFSPVLAARKVVTEPCELTNKNGVSVKLRPGDVVIIPVNALHHDPQYYEEPQSFKPERFLNINGGAKKYRDQGLFFGFGDGPRICPGMRFSLTQIKAALVEIVRNFDIKVNPKTRKDNEIDDTYFMPALKGGVWLDFVERN 502 2004-08-09 15:56:24.711814 2004-08-09 15:56:24.711814 SO__polypeptide CG10833-PA 5212376 5210501 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 AAF52226.1 db__GB_polypeptide 1 FBpp0078698 db__FlyBase 1 CG10833-PA db__Gadfly 0 1 0 pub type__computer file gadfly3 CG10833-PA CG10833-PA synonym type__synonym relationship type__derives_from 5212450 5210478 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 FBtr0079062 db__FlyBase 1 CG10833-RA db__Gadfly 0 1 0 pub type__computer file joshk CG10833-RA CG10833-RA synonym type__synonym 1 0 pub type__computer file gadfly3 CG10833-RA CG10833-RA synonym type__synonym 0 property type__owner joshk 0 property type__sp_comment Perfect match to REAL SP with corresponding FBgn 0 property type__protein_id AAF52226 relationship type__part_of 0 property type__sp_status Perfect match to SwissProt real (computational) 0 property type__cyto_range 25C10-25C10 0 property type__gbunit AE003609 FBgn0031689 db__FlyBase 1 CG10833 db__Gadfly 1 FBan0010833 db__FlyBase 1 0 0 pub type__computer file gadfly3 28d1 28d1 synonym type__synonym 5212450 5210478 0 0 0 0 Drosophila__melanogaster SO__chromosome 2L 1 DBIx-DBStag-0.12/t/data/chado-cvterm.sql0000644000076500000240000003476511326157220016410 0ustar cainstaff-- ================================================ -- TABLE: tableinfo -- ================================================ create table tableinfo ( tableinfo_id serial not null, primary key (tableinfo_id), name varchar(30) not null, primary_key_column varchar(30) null, is_view int not null default 0, view_on_table_id int null, superclass_table_id int null, is_updateable int not null default 1, modification_date date not null default now(), constraint tableinfo_c1 unique (name) ); COMMENT ON TABLE tableinfo IS NULL; -- ================================================ -- TABLE: contact -- ================================================ create table contact ( contact_id serial not null, primary key (contact_id), name varchar(30) not null, description varchar(255) null, constraint contact_c1 unique (name) ); COMMENT ON TABLE contact IS NULL; -- ================================================ -- TABLE: db -- ================================================ create table db ( db_id serial not null, primary key (db_id), name varchar(255) not null, contact_id int, foreign key (contact_id) references contact (contact_id) on delete cascade INITIALLY DEFERRED, description varchar(255) null, urlprefix varchar(255) null, url varchar(255) null, constraint db_c1 unique (name) ); COMMENT ON TABLE db IS NULL; -- ================================================ -- TABLE: dbxref -- ================================================ create table dbxref ( dbxref_id serial not null, primary key (dbxref_id), db_id int not null, foreign key (db_id) references db (db_id) on delete cascade INITIALLY DEFERRED, accession varchar(255) not null, version varchar(255) not null default '', description text, constraint dbxref_c1 unique (db_id,accession,version) ); create index dbxref_idx1 on dbxref (db_id); create index dbxref_idx2 on dbxref (accession); create index dbxref_idx3 on dbxref (version); COMMENT ON TABLE dbxref IS NULL; -- ================================================ -- TABLE: project -- ================================================ create table project ( project_id serial not null, primary key (project_id), name varchar(255) not null, description varchar(255) not null, constraint project_c1 unique (name) ); COMMENT ON TABLE project IS NULL; -- See cv-intro.txt -- ================================================ -- TABLE: cv -- ================================================ create table cv ( cv_id serial not null, primary key (cv_id), name varchar(255) not null, definition text, constraint cv_c1 unique (name) ); -- ================================================ -- TABLE: cvterm -- ================================================ create table cvterm ( cvterm_id serial not null, primary key (cvterm_id), cv_id int not null, foreign key (cv_id) references cv (cv_id) on delete cascade INITIALLY DEFERRED, name varchar(1024) not null, definition text, dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) on delete set null INITIALLY DEFERRED, is_obsolete int not null default 0, is_relationshiptype int not null default 0, constraint cvterm_c1 unique (name,cv_id,is_obsolete), constraint cvterm_c2 unique (dbxref_id) ); create index cvterm_idx1 on cvterm (cv_id); create index cvterm_idx2 on cvterm (name); create index cvterm_idx3 on cvterm (dbxref_id); COMMENT ON TABLE cvterm IS 'A term, class or concept within an ontology or controlled vocabulary. Also used for relationship types. A cvterm can also be thought of as a node in a graph'; COMMENT ON COLUMN cvterm.cv_id IS 'The cv/ontology/namespace to which this cvterm belongs'; COMMENT ON COLUMN cvterm.name IS 'A concise human-readable name describing the meaning of the cvterm'; COMMENT ON COLUMN cvterm.definition IS 'A human-readable text definition'; COMMENT ON COLUMN cvterm.dbxref_id IS 'Primary dbxref - The unique global OBO identifier for this cvterm. Note that a cvterm may have multiple secondary dbxrefs - see also table: cvterm_dbxref'; COMMENT ON COLUMN cvterm.is_obsolete IS 'Boolean 0=false,1=true; see GO documentation for details of obsoletion. note that two terms with different primary dbxrefs may exist if one is obsolete'; COMMENT ON COLUMN cvterm.is_relationshiptype IS 'Boolean 0=false,1=true Relationship types (also known as Typedefs in OBO format, or as properties or slots) form a cv/ontology in themselves. We use this flag to indicate whether this cvterm is an actual term/concept or a relationship type'; COMMENT ON INDEX cvterm_c1 IS 'the OBO identifier is globally unique'; COMMENT ON INDEX cvterm_c2 IS 'a name can mean different things in different contexts; for example "chromosome" in SO and GO. A name should be unique within an ontology/cv. A name may exist twice in a cv, in both obsolete and non-obsolete forms - these will be for different cvterms with different OBO identifiers; so GO documentation for more details on obsoletion'; -- ================================================ -- TABLE: cvterm_relationship -- ================================================ create table cvterm_relationship ( cvterm_relationship_id serial not null, primary key (cvterm_relationship_id), type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, subject_id int not null, foreign key (subject_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, object_id int not null, foreign key (object_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, constraint cvterm_relationship_c1 unique (subject_id,object_id,type_id) ); COMMENT ON TABLE cvterm_relationship IS 'A relationship linking two cvterms. A relationship can be thought of as an edge in a graph, or as a natural language statement about two cvterms. The statement is of the form SUBJECT PREDICATE OBJECT; for example "wing part_of body"'; create index cvterm_relationship_idx1 on cvterm_relationship (type_id); create index cvterm_relationship_idx2 on cvterm_relationship (subject_id); create index cvterm_relationship_idx3 on cvterm_relationship (object_id); -- ================================================ -- TABLE: cvtermpath -- ================================================ create table cvtermpath ( cvtermpath_id serial not null, primary key (cvtermpath_id), type_id int, foreign key (type_id) references cvterm (cvterm_id) on delete set null INITIALLY DEFERRED, subject_id int not null, foreign key (subject_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, object_id int not null, foreign key (object_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, cv_id int not null, foreign key (cv_id) references cv (cv_id) on delete cascade INITIALLY DEFERRED, pathdistance int, constraint cvtermpath_c1 unique (subject_id,object_id,type_id,pathdistance) ); create index cvtermpath_idx1 on cvtermpath (type_id); create index cvtermpath_idx2 on cvtermpath (subject_id); create index cvtermpath_idx3 on cvtermpath (object_id); create index cvtermpath_idx4 on cvtermpath (cv_id); -- ================================================ -- TABLE: cvtermsynonym -- ================================================ create table cvtermsynonym ( cvtermsynonym_id serial not null, primary key (cvtermsynonym_id), cvterm_id int not null, foreign key (cvterm_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, synonym varchar(1024) not null, type_id int, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, constraint cvtermsynonym_c1 unique (cvterm_id,synonym) ); create index cvtermsynonym_idx1 on cvtermsynonym (cvterm_id); -- ================================================ -- TABLE: cvterm_dbxref -- ================================================ create table cvterm_dbxref ( cvterm_dbxref_id serial not null, primary key (cvterm_dbxref_id), cvterm_id int not null, foreign key (cvterm_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) on delete cascade INITIALLY DEFERRED, is_for_definition int not null default 0, constraint cvterm_dbxref_c1 unique (cvterm_id,dbxref_id) ); create index cvterm_dbxref_idx1 on cvterm_dbxref (cvterm_id); create index cvterm_dbxref_idx2 on cvterm_dbxref (dbxref_id); -- ================================================ -- TABLE: cvtermprop -- ================================================ create table cvtermprop ( cvtermprop_id serial not null, primary key (cvtermprop_id), cvterm_id int not null, foreign key (cvterm_id) references cvterm (cvterm_id) on delete cascade, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade, value text not null default '', rank int not null default 0, unique(cvterm_id, type_id, value, rank) ); -- ================================================ -- TABLE: dbxrefprop -- ================================================ create table dbxrefprop ( dbxrefprop_id serial not null, primary key (dbxrefprop_id), dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) INITIALLY DEFERRED, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) INITIALLY DEFERRED, value text not null default '', rank int not null default 0, constraint dbxrefprop_c1 unique (dbxref_id,type_id,rank) ); create index dbxrefprop_idx1 on dbxrefprop (dbxref_id); create index dbxrefprop_idx2 on dbxrefprop (type_id); -- ================================================ -- TABLE: organism -- ================================================ create table organism ( organism_id serial not null, primary key (organism_id), abbreviation varchar(255) null, genus varchar(255) not null, species varchar(255) not null, common_name varchar(255) null, comment text null, constraint organism_c1 unique (genus,species) ); COMMENT ON COLUMN organism.species IS 'A type of organism is always uniquely identified by genus+species. When mapping from the NCBI taxonomy names.dmp file, the unique-name column must be used where it is present, as the name column is not always unique (eg environmental samples)'; -- Compared to mol5..Species, organism table lacks "approved char(1) null". -- We need to work w/ Aubrey & Michael to ensure that we don't need this in -- future [dave] -- -- in response: this is very specific to a limited use case I think; -- if it's really necessary we can have an organismprop table -- for adding internal project specific data -- [cjm] -- done (below) 19-MAY-03 [dave] -- ================================================ -- TABLE: organism_dbxref -- ================================================ create table organism_dbxref ( organism_dbxref_id serial not null, primary key (organism_dbxref_id), organism_id int not null, foreign key (organism_id) references organism (organism_id) on delete cascade INITIALLY DEFERRED, dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) on delete cascade INITIALLY DEFERRED, constraint organism_dbxref_c1 unique (organism_id,dbxref_id) ); create index organism_dbxref_idx1 on organism_dbxref (organism_id); create index organism_dbxref_idx2 on organism_dbxref (dbxref_id); -- ================================================ -- TABLE: organismprop -- ================================================ create table organismprop ( organismprop_id serial not null, primary key (organismprop_id), organism_id int not null, foreign key (organism_id) references organism (organism_id) on delete cascade INITIALLY DEFERRED, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, value text null, rank int not null default 0, constraint organismprop_c1 unique (organism_id,type_id,rank) ); create index organismprop_idx1 on organismprop (organism_id); create index organismprop_idx2 on organismprop (type_id); CREATE VIEW cvterm_relationship_with_typename AS SELECT cvterm_relationship.*, typeterm.name AS typename, typeterm.cv_id AS typeterm_cv_id FROM cvterm_relationship INNER JOIN cvterm AS typeterm ON (type_id=typeterm.cvterm_id); CREATE VIEW cvtermprop_with_propname AS SELECT cvtermprop.*, propterm.name AS propname, propterm.cv_id AS propterm_cv_id FROM cvtermprop INNER JOIN cvterm AS propterm ON (type_id=propterm.cvterm_id); -- Cross-products, logical definitions -- These views are for advanced use - you will only need them if -- you are loading ontologies that use either advanced obo format 1.2 -- features or OWL DL ontologies. Please read the relevant documentation -- first -- keywords: defined classes, OWL, Aristotelian definitions CREATE OR REPLACE VIEW is_anonymous_cvterm AS SELECT cvterm_id FROM cvtermprop_with_propname WHERE propname='is_anonymous' AND value='1'; CREATE OR REPLACE VIEW cvterm_ldef_intersection AS SELECT * FROM cvterm_relationship_with_typename WHERE typename='intersection_of'; COMMENT ON VIEW cvterm_ldef_intersection IS 'for advanced OWL/Description Logic style definitions, chado allows the specification of an equivalentClass using intersection_of links between the defined term and the cross-product'; CREATE OR REPLACE VIEW cvterm_genus AS SELECT i.subject_id AS cvterm_id, i.object_id AS genus_id FROM cvterm_ldef_intersection AS i WHERE i.object_id NOT IN (SELECT cvterm_id FROM is_anonymous_cvterm); COMMENT ON VIEW cvterm_genus IS 'In a logical (cross-product) definition, there is a generic term (genus) and discriminating characteristics. E.g. a biosynthesis (genus) which outputs cysteine (differentia). The genus is the -true- is_a parent'; CREATE OR REPLACE VIEW cvterm_differentium AS SELECT i.subject_id AS cvterm_id, diff.* FROM cvterm_ldef_intersection AS i INNER JOIN cvterm_relationship AS diff ON (i.object_id=diff.subject_id) INNER JOIN is_anonymous_cvterm AS anon ON (anon.cvterm_id=i.object_id); COMMENT ON VIEW cvterm_differentium IS 'In a logical (cross-product) definition, there is a generic term (genus) and discriminating characteristics. E.g. a biosynthesis (genus) which outputs cysteine (differentia). Each differentium is a link via a relation to another cvterm which discriminates the defined term from other is_a siblings'; DBIx-DBStag-0.12/t/data/chado-feature.sql0000644000076500000240000002250611326157220016531 0ustar cainstaff -- ================================================ -- TABLE: feature -- ================================================ create table feature ( feature_id serial not null, primary key (feature_id), dbxref_id int, foreign key (dbxref_id) references dbxref (dbxref_id) on delete set null INITIALLY DEFERRED, organism_id int not null, foreign key (organism_id) references organism (organism_id) on delete cascade INITIALLY DEFERRED, name varchar(255), uniquename text not null, residues text, seqlen int, md5checksum char(32), type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, is_analysis boolean not null default 'false', is_obsolete boolean not null default 'false', timeaccessioned timestamp not null default current_timestamp, timelastmodified timestamp not null default current_timestamp, constraint feature_c1 unique (organism_id,uniquename,type_id) ); -- dbxref_id here is intended for the primary dbxref for this feature. -- Additional dbxref links are made via feature_dbxref -- name: the human-readable common name for a feature, for display -- uniquename: the unique name for a feature; may not be particularly human-readable -- timeaccessioned and timelastmodified are for handling object accession/ -- modification timestamps (as opposed to db auditing info, handled elsewhere). -- The expectation is that these fields would be available to software -- interacting with chado. -- is_obsolete is for marking records obsolete which none the less need to -- remain in the table. create sequence feature_uniquename_seq; create index feature_name_ind1 on feature(name); create index feature_idx1 on feature (dbxref_id); create index feature_idx2 on feature (organism_id); create index feature_idx3 on feature (type_id); create index feature_idx4 on feature (uniquename); create index feature_idx5 on feature (lower(name)); --This ALTER TABLE statement changes the way sequence data --is stored on disk to make extracting substrings much faster --at the expense of more disk space --ALTER TABLE feature ALTER COLUMN residues SET STORAGE EXTENDED; -- ================================================ -- TABLE: featureloc -- ================================================ -- each feature can have 0 or more locations. -- multiple locations do NOT indicate non-contiguous locations. -- instead they designate alternate locations or grouped locations; -- for instance, a feature designating a blast hit or hsp will have two -- locations, one on the query feature, one on the subject feature. -- features representing sequence variation could have alternate locations -- instantiated on a feature on the mutant strain. -- the field "rank" is used to differentiate these different locations. -- the default rank '0' is used for the main/primary location (eg in -- similarity features, 0 is query, 1 is subject), although sometimes -- this will be symmeytical and there is no primary location. -- -- redundant locations can also be stored - for instance, the position -- of an exon on a BAC and in global coordinates. the field "locgroup" -- is used to differentiate these groupings of locations. the default -- locgroup '0' is used for the main/primary location, from which the -- others can be derived via coordinate transformations. another -- example of redundant locations is storing ORF coordinates relative -- to both transcript and genome. redundant locations open the possibility -- of the database getting into inconsistent states; this schema gives -- us the flexibility of both 'warehouse' instantiations with redundant -- locations (easier for querying) and 'management' instantiations with -- no redundant locations. -- most features (exons, transcripts, etc) will have 1 location, with -- locgroup and rank equal to 0 -- -- an example of using both locgroup and rank: -- imagine a feature indicating a conserved region between the chromosomes -- of two different species. we may want to keep redundant locations on -- both contigs and chromosomes. we would thus have 4 locations for the -- single conserved region feature - two distinct locgroups (contig level -- and chromosome level) and two distinct ranks (for the two species). -- altresidues is used to store alternate residues of a feature, when these -- differ from feature.residues. for instance, a SNP feature located on -- a wild and mutant protein would have different alresidues. -- for alignment/similarity features, the altresidues is used to represent -- the alignment string. -- note on variation features; even if we don't want to instantiate a mutant -- chromosome/contig feature, we can still represent a SNP etc with 2 locations, -- one (rank 0) on the genome, the other (rank 1) would have most fields null, -- except for altresidues -- IMPORTANT: fnbeg and fnend are space-based (INTERBASE) coordinates -- this is vital as it allows us to represent zero-length -- features eg splice sites, insertion points without -- an awkward fuzzy system -- Note that nbeg and nend have been replaced with fmin and fmax, -- which are the minimum and maximum coordinates of the feature -- relative to the parent feature. By contrast, -- nbeg, nend are for feature natural begin/end -- by natural begin, end we mean these are the actual -- beginning (5' position) and actual end (3' position) -- rather than the low position and high position, as -- these terms are sometimes erroneously used. To compensate -- for the removal of nbeg and nend from featureloc, a view -- based on featureloc, dfeatureloc, is provided in sequence_views.sql. create table featureloc ( featureloc_id serial not null, primary key (featureloc_id), feature_id int not null, foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, srcfeature_id int, foreign key (srcfeature_id) references feature (feature_id) on delete set null INITIALLY DEFERRED, fmin int, is_fmin_partial boolean not null default 'false', fmax int, is_fmax_partial boolean not null default 'false', strand smallint, phase int, residue_info text, locgroup int not null default 0, rank int not null default 0, constraint featureloc_c1 unique (feature_id,locgroup,rank), constraint featureloc_c2 check (fmin <= fmax) ); -- phase: phase of translation wrt srcfeature_id. Values are 0,1,2 create index featureloc_idx1 on featureloc (feature_id); create index featureloc_idx2 on featureloc (srcfeature_id); create index featureloc_idx3 on featureloc (srcfeature_id,fmin,fmax); -- ================================================ -- TABLE: feature_pub -- ================================================ create table feature_pub ( feature_pub_id serial not null, primary key (feature_pub_id), feature_id int not null, foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, constraint feature_pub_c1 unique (feature_id,pub_id) ); create index feature_pub_idx1 on feature_pub (feature_id); create index feature_pub_idx2 on feature_pub (pub_id); -- ================================================ -- TABLE: featureprop -- ================================================ create table featureprop ( featureprop_id serial not null, primary key (featureprop_id), feature_id int not null, foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, value text null, rank int not null default 0, constraint featureprop_c1 unique (feature_id,type_id,rank) ); create index featureprop_idx1 on featureprop (feature_id); create index featureprop_idx2 on featureprop (type_id); -- ================================================ -- TABLE: featureprop_pub -- ================================================ create table featureprop_pub ( featureprop_pub_id serial not null, primary key (featureprop_pub_id), featureprop_id int not null, foreign key (featureprop_id) references featureprop (featureprop_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, constraint featureprop_pub_c1 unique (featureprop_id,pub_id) ); create index featureprop_pub_idx1 on featureprop_pub (featureprop_id); create index featureprop_pub_idx2 on featureprop_pub (pub_id); -- ================================================ -- TABLE: feature_dbxref -- ================================================ -- links a feature to dbxrefs. Note that there is also feature.dbxref_id -- link for the primary dbxref link. create table feature_dbxref ( feature_dbxref_id serial not null, primary key (feature_dbxref_id), feature_id int not null, foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) on delete cascade INITIALLY DEFERRED, is_current boolean not null default 'true', constraint feature_dbxref_c1 unique (feature_id,dbxref_id) ); create index feature_dbxref_idx1 on feature_dbxref (feature_id); create index feature_dbxref_idx2 on feature_dbxref (dbxref_id); DBIx-DBStag-0.12/t/data/chado-fr.sql0000644000076500000240000001744111326157220015507 0ustar cainstaff create table feature_relationship ( feature_relationship_id serial not null, primary key (feature_relationship_id), subject_id int not null, foreign key (subject_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, object_id int not null, foreign key (object_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, value text null, rank int not null default 0, constraint feature_relationship_c1 unique (subject_id,object_id,type_id,rank) ); create index feature_relationship_idx1 on feature_relationship (subject_id); create index feature_relationship_idx2 on feature_relationship (object_id); create index feature_relationship_idx3 on feature_relationship (type_id); create table feature_relationship_pub ( feature_relationship_pub_id serial not null, primary key (feature_relationship_pub_id), feature_relationship_id int not null, foreign key (feature_relationship_id) references feature_relationship (feature_relationship_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, constraint feature_relationship_pub_c1 unique (feature_relationship_id,pub_id) ); create index feature_relationship_pub_idx1 on feature_relationship_pub (feature_relationship_id); create index feature_relationship_pub_idx2 on feature_relationship_pub (pub_id); create table feature_relationshipprop ( feature_relationshipprop_id serial not null, primary key (feature_relationshipprop_id), feature_relationship_id int not null, foreign key (feature_relationship_id) references feature_relationship (feature_relationship_id) on delete cascade, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, value text null, rank int not null default 0, constraint feature_relationshipprop_c1 unique (feature_relationship_id,type_id,rank) ); create index feature_relationshipprop_idx1 on feature_relationshipprop (feature_relationship_id); create index feature_relationshipprop_idx2 on feature_relationshipprop (type_id); create table feature_relationshipprop_pub ( feature_relationshipprop_pub_id serial not null, primary key (feature_relationshipprop_pub_id), feature_relationshipprop_id int not null, foreign key (feature_relationshipprop_id) references feature_relationshipprop (feature_relationshipprop_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, constraint feature_relationshipprop_pub_c1 unique (feature_relationshipprop_id,pub_id) ); create index feature_relationshipprop_pub_idx1 on feature_relationshipprop_pub (feature_relationshipprop_id); create index feature_relationshipprop_pub_idx2 on feature_relationshipprop_pub (pub_id); create table feature_cvterm ( feature_cvterm_id serial not null, primary key (feature_cvterm_id), feature_id int not null, foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, cvterm_id int not null, foreign key (cvterm_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, constraint feature_cvterm_c1 unique (feature_id,cvterm_id,pub_id) ); create index feature_cvterm_idx1 on feature_cvterm (feature_id); create index feature_cvterm_idx2 on feature_cvterm (cvterm_id); create index feature_cvterm_idx3 on feature_cvterm (pub_id); create table feature_cvtermprop ( feature_cvtermprop_id serial not null, primary key (feature_cvtermprop_id), feature_cvterm_id int not null, foreign key (feature_cvterm_id) references feature_cvterm (feature_cvterm_id) on delete cascade, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, value text null, rank int not null default 0, constraint feature_cvtermprop_c1 unique (feature_cvterm_id,type_id,rank) ); create index feature_cvtermprop_idx1 on feature_cvtermprop (feature_cvterm_id); create index feature_cvtermprop_idx2 on feature_cvtermprop (type_id); create table feature_cvterm_dbxref ( feature_cvterm_dbxref_id serial not null, primary key (feature_cvterm_dbxref_id), feature_cvterm_id int not null, foreign key (feature_cvterm_id) references feature_cvterm (feature_cvterm_id) on delete cascade, dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) on delete cascade INITIALLY DEFERRED, constraint feature_cvterm_dbxref_c1 unique (feature_cvterm_id,dbxref_id) ); create index feature_cvterm_dbxref_idx1 on feature_cvterm_dbxref (feature_cvterm_id); create index feature_cvterm_dbxref_idx2 on feature_cvterm_dbxref (dbxref_id); COMMENT ON TABLE feature_cvterm_dbxref IS 'Additional dbxrefs for an association. Rows in the feature_cvterm table may be backed up by dbxrefs. For example, a feature_cvterm association that was inferred via a protein-protein interaction may be backed by by refering to the dbxref for the alternate protein. Corresponds to the WITH column in a GO gene association file (but can also be used for other analagous associations). See http://www.geneontology.org/doc/GO.annotation.shtml#file for more details'; -- ================================================ -- TABLE: synonym -- ================================================ create table synonym ( synonym_id serial not null, primary key (synonym_id), name varchar(255) not null, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, synonym_sgml varchar(255) not null, constraint synonym_c1 unique (name,type_id) ); -- type_id: types would be symbol and fullname for now -- synonym_sgml: sgml-ized version of symbols create index synonym_idx1 on synonym (type_id); -- ================================================ -- TABLE: feature_synonym -- ================================================ create table feature_synonym ( feature_synonym_id serial not null, primary key (feature_synonym_id), synonym_id int not null, foreign key (synonym_id) references synonym (synonym_id) on delete cascade INITIALLY DEFERRED, feature_id int not null, foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, is_current boolean not null default 'true', is_internal boolean not null default 'false', constraint feature_synonym_c1 unique (synonym_id,feature_id,pub_id) ); -- pub_id: the pub_id link is for relating the usage of a given synonym to the -- publication in which it was used -- is_current: the is_current bit indicates whether the linked synonym is the -- current -official- symbol for the linked feature -- is_internal: typically a synonym exists so that somebody querying the db with an -- obsolete name can find the object they're looking for (under its current -- name. If the synonym has been used publicly & deliberately (eg in a -- paper), it my also be listed in reports as a synonym. If the synonym -- was not used deliberately (eg, there was a typo which went public), then -- the is_internal bit may be set to 'true' so that it is known that the -- synonym is "internal" and should be queryable but should not be listed -- in reports as a valid synonym. create index feature_synonym_idx1 on feature_synonym (synonym_id); create index feature_synonym_idx2 on feature_synonym (feature_id); create index feature_synonym_idx3 on feature_synonym (pub_id); DBIx-DBStag-0.12/t/data/chado-pub.sql0000644000076500000240000001174711326157220015671 0ustar cainstaff-- We should take a look in OMG for a standard representation we might use -- instead of this. -- ================================================ -- TABLE: pub -- ================================================ create table pub ( pub_id serial not null, primary key (pub_id), title text, volumetitle text, volume varchar(255), series_name varchar(255), issue varchar(255), pyear varchar(255), pages varchar(255), miniref varchar(255), uniquename text not null, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, is_obsolete boolean default 'false', publisher varchar(255), pubplace varchar(255), constraint pub_c1 unique (uniquename,type_id) ); -- title: title of paper, chapter of book, journal, etc -- volumetitle: title of part if one of a series -- series_name: full name of (journal) series -- pages: page number range[s], eg, 457--459, viii + 664pp, lv--lvii -- type_id: the type of the publication (book, journal, poem, graffiti, etc) -- is_obsolete: do we want this even though we have the relationship in pub_relationship? create index pub_idx1 on pub (type_id); -- ================================================ -- TABLE: pub_relationship -- ================================================ -- Handle relationships between publications, eg, when one publication -- makes others obsolete, when one publication contains errata with -- respect to other publication(s), or when one publication also -- appears in another pub (I think these three are it - at least for fb) create table pub_relationship ( pub_relationship_id serial not null, primary key (pub_relationship_id), subject_id int not null, foreign key (subject_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, object_id int not null, foreign key (object_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, constraint pub_relationship_c1 unique (subject_id,object_id,type_id) ); create index pub_relationship_idx1 on pub_relationship (subject_id); create index pub_relationship_idx2 on pub_relationship (object_id); create index pub_relationship_idx3 on pub_relationship (type_id); -- ================================================ -- TABLE: pub_dbxref -- ================================================ -- Handle links to eg, pubmed, biosis, zoorec, OCLC, mdeline, ISSN, coden... create table pub_dbxref ( pub_dbxref_id serial not null, primary key (pub_dbxref_id), pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, dbxref_id int not null, foreign key (dbxref_id) references dbxref (dbxref_id) on delete cascade INITIALLY DEFERRED, constraint pub_dbxref_c1 unique (pub_id,dbxref_id) ); create index pub_dbxref_idx1 on pub_dbxref (pub_id); create index pub_dbxref_idx2 on pub_dbxref (dbxref_id); -- ================================================ -- TABLE: author -- ================================================ -- using the FB author table columns create table author ( author_id serial not null, primary key (author_id), contact_id int null, foreign key (contact_id) references contact (contact_id) INITIALLY DEFERRED, surname varchar(100) not null, givennames varchar(100), suffix varchar(100), constraint author_c1 unique (surname,givennames,suffix) ); -- givennames: first name, initials -- suffix: Jr., Sr., etc -- ================================================ -- TABLE: pub_author -- ================================================ create table pub_author ( pub_author_id serial not null, primary key (pub_author_id), author_id int not null, foreign key (author_id) references author (author_id) on delete cascade INITIALLY DEFERRED, pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, rank int not null, editor boolean default 'false', constraint pub_author_c1 unique (author_id,pub_id) ); -- rank: order of author in author list for this pub -- editor: indicates whether the author is an editor for linked publication create index pub_author_idx1 on pub_author (author_id); create index pub_author_idx2 on pub_author (pub_id); -- ================================================ -- TABLE: pubprop -- ================================================ create table pubprop ( pubprop_id serial not null, primary key (pubprop_id), pub_id int not null, foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED, type_id int not null, foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED, value text not null, rank integer, constraint pubprop_c1 unique (pub_id,type_id,value) ); create index pubprop_idx1 on pubprop (pub_id); create index pubprop_idx2 on pubprop (type_id); DBIx-DBStag-0.12/t/data/game.el0000644000076500000240000000035011326157220014525 0ustar cainstaff'(game (version "1") (seq (res "xxx") (dbxref (db "x") (acc "1")) ) (annot (dbxref (db "x") (acc "2")) (name "cg1") (fset (name "cg1-ra") (fspan (fstart 1) (fstop 2))))) DBIx-DBStag-0.12/t/data/mset.xml0000644000076500000240000001000111326157220014756 0ustar cainstaff actor wars sci-fi lucas george US han solo ford harrison princess leia fisher carrie luke skywalker hamill mark darth vader earl-jones james prowse david obi-wan kenobi guiness alec attack of the clones sci-fi lucas george US obi-wan kenobi mcgregor ewan princess amigdala portman natalie braindead horror jackson peter new zealand - - - lord of the rings fantasy jackson peter new zealand saruman lee christopher gandalf kellan ian seven samurai samurai kurosawa akira japan Kikuchiyo mifune toshiro terminator sci-fi cameron john US terminator schwarzenegger arnold terminator2 sci-fi cameron john US terminator schwarzenegger arnold barton fink odd coen joel US coen ethan US barton fink turturro john charlie meadows goodman john DBIx-DBStag-0.12/t/data/parts-data.xml0000644000076500000240000000137211326157220016061 0ustar cainstaff 1 1a 1b 1b-I 1b-II DBIx-DBStag-0.12/t/data/parts-schema.sql0000644000076500000240000000064111326157220016405 0ustar cainstaffCREATE TABLE component ( component_id SERIAL NOT NULL PRIMARY KEY, name VARCHAR(32) NOT NULL, descr TEXT, UNIQUE (name) ); CREATE TABLE part_of ( part_of_id SERIAL NOT NULL PRIMARY KEY, subject_id INTEGER NOT NULL REFERENCES component(component_id), object_id INTEGER NOT NULL REFERENCES component(component_id), UNIQUE (subject_id,object_id) ); DBIx-DBStag-0.12/t/data/relationship.chado-xml0000644000076500000240000007574511326157220017615 0ustar cainstaff relationship synonym_type cvterm_property_type anonymous OBO_REL internal internal cvterm_property_type cvterm_property_type comment internal is_anonymous cvterm_property_type is_anonymous OBO_REL is_a relationship is_a 1 internal intersection_of cvterm_property_type intersection_of 1 OBO_REL relationship relationship relationship 1 OBO_REL is_a is_a relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_anti_symmetric cvterm_property_type is_anti_symmetric 1 internal is_transitive cvterm_property_type is_transitive 1 1 is_subtype_of internal exact synonym_type exact owl subClassOf OBO_REL part_of part_of relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_anti_symmetric cvterm_property_type is_anti_symmetric 1 internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL has_part has_part relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_anti_symmetric cvterm_property_type is_anti_symmetric 1 internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL integral_part_of integral_part_of relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_anti_symmetric cvterm_property_type is_anti_symmetric 1 internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL has_integral_part has_integral_part relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_anti_symmetric cvterm_property_type is_anti_symmetric 1 internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL proper_part_of proper_part_of relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL has_proper_part has_proper_part relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL has_improper_part has_improper_part relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL improper_part_of improper_part_of relationship internal is_reflexive cvterm_property_type is_reflexive 1 internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL located_in located_in relationship internal is_transitive cvterm_property_type is_transitive 1 internal is_reflexive cvterm_property_type is_reflexive 1 1 OBO_REL location_of location_of relationship internal is_transitive cvterm_property_type is_transitive 1 internal is_reflexive cvterm_property_type is_reflexive 1 1 OBO_REL contained_in contained_in relationship 1 OBO_REL contains contains relationship 1 OBO_REL adjacent_to adjacent_to relationship 1 OBO_REL transformation_of transformation_of relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL transformed_into transformed_into relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL derives_from derives_from relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL derived_into derived_into relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL preceded_by preceded_by relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL precedes precedes relationship internal is_transitive cvterm_property_type is_transitive 1 1 OBO_REL has_participant has_participant relationship 1 OBO_REL participates_in participates_in relationship 1 OBO_REL has_agent has_agent relationship 1 OBO_REL agent_in agent_in relationship 1 OBO_REL instance_of instance_of relationship 1 is_a OBO_REL:part_of OBO_REL:relationship is_a OBO_REL:has_part OBO_REL:relationship is_a OBO_REL:integral_part_of OBO_REL:part_of is_a OBO_REL:has_integral_part OBO_REL:part_of is_a OBO_REL:proper_part_of OBO_REL:part_of is_a OBO_REL:has_proper_part OBO_REL:part_of is_a OBO_REL:has_improper_part OBO_REL:part_of is_a OBO_REL:improper_part_of OBO_REL:part_of is_a OBO_REL:located_in OBO_REL:relationship is_a OBO_REL:location_of OBO_REL:relationship is_a OBO_REL:contained_in OBO_REL:relationship is_a OBO_REL:contains OBO_REL:relationship is_a OBO_REL:adjacent_to OBO_REL:relationship is_a OBO_REL:transformation_of OBO_REL:relationship is_a OBO_REL:transformed_into OBO_REL:relationship is_a OBO_REL:derives_from OBO_REL:relationship is_a OBO_REL:derived_into OBO_REL:relationship is_a OBO_REL:preceded_by OBO_REL:relationship is_a OBO_REL:precedes OBO_REL:relationship is_a OBO_REL:has_participant OBO_REL:relationship is_a OBO_REL:participates_in OBO_REL:relationship is_a OBO_REL:has_agent OBO_REL:relationship is_a OBO_REL:agent_in OBO_REL:relationship is_a OBO_REL:instance_of OBO_REL:relationship DBIx-DBStag-0.12/t/data/sofa.chado-xml0000644000076500000240000144022711326157220016034 0ustar cainstaff relationship synonym_type cvterm_property_type OBO_REL internal internal cvterm_property_type cvterm_property_type comment OBO_REL is_a relationship is_a 1 OBO_REL derived_from derived_from relationship 1 OBO_REL part_of part_of relationship 1 SO 0000436 ARS sequence A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host. SO ma 1 autonomously_replicating_sequence SOFA SOFA SO 0000153 1 BAC (obsolete SO:0000153) sequence Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host. SO ma 1 SOFA SOFA SO 0000316 CDS sequence A contiguous RNA sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon. SO ma 1 coding_sequence SOFA SOFA SO 0000595 1 C_D_box_snoRNA_primary_transcript (obsolete SO:0000595) sequence A primary transcript encoding a small nucleolar RNA of the box C/D family. SO ke 1 C/D_box_snoRNA_primary_transcript SOFA SOFA SO 0000307 CpG_island sequence Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes. SO rd 1 CG_island SOFA SOFA SO 0000182 1 DNA_transposon (obsolete SO:0000182) sequence A transposon where the mechanism of transposition is via a DNA intermediate. SO ke 1 SOFA SOFA SO 0000297 1 D_loop (obsolete SO:0000297) sequence Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein. SOFA SOFA SO 0000345 EST sequence Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long. http //genomics.phrma.org/lexicon/e.html 1 expressed_sequence_tag SOFA SOFA SO 0000668 EST_match sequence A match against an EST sequence. SO ke 1 SOFA SOFA SO 0000596 1 H_ACA_box_snoRNA_primary_transcript (obsolete SO:0000596) sequence A primary transcript encoding a small nucleolar RNA of the box H/ACA family. SO ke 1 H/ACA_box_snoRNA_primary_transcript SOFA SOFA SO 0000194 1 LINE_element (obsolete SO:0000194) sequence A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats. http www.ucl.ac.uk/~ucbhjow/b241/glossary.html 1 SOFA SOFA SO 0000186 1 LTR_retrotransposon (obsolete SO:0000186) sequence A retrotransposon flanked by long terminal repeat sequences. SO ke 1 SOFA SOFA SO 0000435 1 LTR_retrotransposon_poly_purine_tract (obsolete SO:0000435) sequence A polypurine tract within an LTR_retrotransposon. SO ke 1 SOFA SOFA SO 0000338 1 MITE (obsolete SO:0000338) sequence A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins. http www.pnas.org/cgi/content/full/97/18/10083 1 SOFA SOFA SO 0000236 ORF sequence A nucleic acid sequence that, when read as sequential triplets, has the potential of encoding a sequential string of amino acids. SO ma 1 open_reading_frame SOFA SOFA SO 0000154 1 PAC (obsolete SO:0000154) sequence P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome. http //www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm 1 SOFA SOFA SO 0000006 PCR_product sequence A region amplified by a PCR reaction. SO ke 1 amplicon SOFA SOFA SO 0000193 RFLP_fragment sequence A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme. PMID 6247908 1 SOFA SOFA SO 0000337 RNAi_reagent sequence A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference. SO rd 1 SOFA SOFA SO 0000385 RNase_MRP_RNA sequence The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030 1 SOFA SOFA SO 0000386 RNase_P_RNA sequence The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010 1 SOFA SOFA SO 0000326 SAGE_tag sequence A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts. http //www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract 1 SOFA SOFA SO 0000206 1 SINE_element (obsolete SO:0000206) sequence A repetitive element, a few hundred base pairs long,that is dispersed throughout the genome. A common human SINE is the Alu element. SO ke 1 SOFA SOFA SO 0000694 SNP sequence SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater. http //www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf 1 single_nucleotide_polymorphism SOFA SOFA SO 0000590 SRP_RNA sequence The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017 1 7S RNA signal_recognition_particle_RNA SOFA SOFA SO 0000589 1 SRP_RNA_primary_transcript (obsolete SO:0000589) sequence A primary transcript encoding a signal recognition particle RNA. SO ke 1 signal_recognition_particle_RNA_primary_transcript SOFA SOFA SO 0000331 STS sequence Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known. http //www.biospace.com 1 sequence_tag_site SOFA SOFA SO 0000000 Sequence_Ontology sequence SOFA SOFA SO 0000235 TF_binding_site sequence A region of a molecule that binds to a transcription factor. SO ke 1 SOFA SOFA SO 0000398 U11_snRNA sequence U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence. PMID 9622129 1 SOFA SOFA SO 0000399 U12_snRNA sequence The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007 1 SOFA SOFA SO 0000403 U14_snRNA sequence U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016 1 SOFA SOFA SO 0000391 U1_snRNA sequence U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003 1 SOFA SOFA SO 0000392 U2_snRNA sequence U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004 1 SOFA SOFA SO 0000393 U4_snRNA sequence U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015 1 SOFA SOFA SO 0000394 U4atac_snRNA sequence An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397). PMID =12409455 1 SOFA SOFA SO 0000395 U5_snRNA sequence U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020 1 SOFA SOFA SO 0000396 U6_snRNA sequence U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015 1 SOFA SOFA SO 0000397 U6atac_snRNA sequence U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394). http http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract 1 SOFA SOFA SO 0000203 UTR sequence Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated. SO ke 1 untranslated_region SOFA SOFA SO 0000152 1 YAC (obsolete SO:0000152) sequence Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells. SO ma 1 SOFA SOFA SO 0000405 Y_RNA sequence Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019 1 SOFA SOFA SO 0000644 antisense_RNA sequence Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA. SO ke 1 SOFA SOFA SO 0000645 antisense_primary_transcript sequence The reverse complement of the primary transcript. SO ke 1 SOFA SOFA SO 0000353 assembly sequence A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences. SO ma 1 consensus_sequence SOFA SOFA SO 0000143 assembly_component sequence A region of sequence which may be used to manufacture a longer assembled, sequence. SO ke 1 SOFA SOFA SO 0000140 attenuator sequence A sequence segment located between the promoter and a structural gene that causes partial termination of transcription. SOFA SOFA SO 0000588 autocatalytically_spliced_intron sequence A self spliced intron. SO ke 1 SOFA SOFA SO 0000409 binding_site sequence A region on the surface of a molecule that may interact with another molecule. SO ke 1 SOFA SOFA SO 0000611 branch_site sequence A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat. SO ke 1 branch_point SOFA SOFA SO 0000317 1 cDNA_clone (obsolete SO:0000317) sequence Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propogation in a suitable host. http //seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html 1 SOFA SOFA SO 0000689 cDNA_match sequence A match against cDNA sequence. SO ke 1 SOFA SOFA SO 0000581 cap sequence A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA. http //seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html 1 SOFA SOFA SO 0000577 centromere sequence A region of chromosome where the spindle fibers attach during mitosis and meiosis. SO ke 1 SO 0000362 1 chimeric_cDNA_clone (obsolete SO:0000362) sequence A cDNA clone constructed from more than one mRNA. Usually an experimental artifact. SO ma 1 SOFA SOFA SO 0000628 chromosomal_structural_element sequence SO 0000340 chromosome sequence Structural unit composed of long DNA molecule. http //biotech.icmb.utexas.edu/search/dict-search.mhtml 1 SOFA SOFA SO 0000303 clip sequence Part of the primary transcript that is clipped off during processing. SO ke 1 SOFA SOFA SO 0000151 clone sequence A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism. http //www.geospiza.com/community/support/glossary/ 1 SOFA SOFA SO 0000103 clone_end sequence The end of the clone insert. SO ke 1 SOFA SOFA SO 0000179 clone_start sequence The start of the clone insert. SO ke 1 SOFA SOFA SO 0000360 codon sequence A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation. http //genomics.phrma.org/lexicon/c.html 1 comment_type codon is not part of CDS because the stop codon is not part of CDS. 0 SOFA SOFA SO 1000005 complex_substitution sequence When no simple or well defined DNA mutation event describes the observed DNA change, the keyword "complex" should be used. Usually there are multiple equally plausible explanations for the change. http //www.ebi.ac.uk/mutations/recommendations/mutevent.html 1 SOFA SOFA SO 0000371 1 conjugative_transposon (obsolete SO:0000371) sequence A transposon that encodes function required for conjugation. http //www.sci.sdsu.edu/ ~ smaloy/Glossary/C.html 1 SOFA SOFA SO 0000149 contig sequence A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases. SO ls 1 SOFA SOFA SO 0000156 1 cosmid (obsolete SO:0000156) sequence A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as aplasmids or packaged as a phage,since they retain the lambda cos sites. SO ma 1 comment_type vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1)\\\:9-20. 0 SOFA SOFA SO 0000177 cross_genome_match sequence A nucleotide match against a sequence from another organism. SO ma 1 SOFA SOFA SO 2000061 databank_entry sequence The sequence referred to by an entry in a databank such as Genbank or SwissProt. SO ke 1 accession SOFA SOFA SO 0000464 decayed_exon sequence A non-functional descendent of an exon. SO ke 1 SOFA SOFA SO 0000045 1 delete (obsolete SO:0000045) sequence To remove a subsection of sequence. SO ke 1 SOFA SOFA SO 0000159 deletion sequence The sequence that is deleted. SO ke 1 SOFA SOFA SO 0000687 deletion_junction sequence The space between two bases in a sequence which marks the position where a deletion has occured. SO ke 1 SOFA SOFA SO 0000314 direct_repeat sequence he same sequence is repeated in the same direction. Example: GCTGA-----GCTGA. SO ke 1 SOFA SOFA SO 0000658 dispersed_repeat sequence A repeat that is located at dispersed sites in the genome. SO ke 1 SOFA SOFA SO 0000165 enhancer sequence A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter. SOFA SOFA SO 0000372 enzymatic_RNA sequence A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression. SO ma 1 SOFA SOFA SO 0000147 exon sequence A region of the genome that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205). SOFA SOFA SO 0000333 exon_junction sequence The boundary between two exons in a processed transcript. SO ke 1 SOFA SOFA SO 0000703 experimental_result_region sequence A region of sequence implicated in an experimental result. SO ke 1 SOFA SOFA SO 0000102 expressed_sequence_match sequence A match to an EST or cDNA sequence. SO ke 1 SOFA SOFA SO 0000640 1 external_transcribed_spacer_region (obsolete SO:0000640) sequence Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA. SO ke 1 SOFA SOFA SO 0000204 five_prime_UTR sequence A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein. SOFA SOFA SO 0000239 flanking_region sequence The DNA sequences extending on either side of a specific locus. http //biotech.icmb.utexas.edu/search/dict-search.mhtml 1 SOFA SOFA SO 0000238 1 foldback_element (obsolete SO:0000238) sequence A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats http www.genetics.org/cgi/reprint/156/4/1983.pdf 1 SOFA SOFA SO 0000158 1 fosmid (obsolete SO:0000158) sequence A cloning vector that utilises the E. coli F factor. SO ma 1 comment_type Birren BW et al. A human chromosome 22 fosmid resource\\\: mapping and analysis of 96 clones. Genomics 1996; 0 SOFA SOFA SO 0000704 gene sequence "Gene" is an abstract term used to describe a collection of transcripts and related regulatory features. SO ke 1 SOFA SOFA SO 0005855 gene_group sequence A collection of related genes. SO ma 1 SOFA SOFA SO 0000050 gene_part sequence A part of a gene. SO ke 1 SOFA SOFA SO 0000040 1 genomic_clone (obsolete SO:0000040) sequence A clone of a DNA region of a genome. SO ma 1 SOFA SOFA SO 0000688 golden_path sequence A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence. SO ls 1 SOFA SOFA SO 0000468 golden_path_fragment sequence One of the pieces of sequence that make up a golden path. SO rd 1 SOFA SOFA SO 0000603 group_II_intron sequence Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny. http //www.sanger.ac.uk/Software/Rfam/browse/index.shtml 1 SOFA SOFA SO 0000587 group_I_intron sequence Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028 1 SOFA SOFA SO 0000602 guide_RNA sequence short 3'-uridylated RNA that can form a perfect duplex (except for the oligoU tail (SO:0000609)) with a stretch of mature edited mRNA. http //www.rna.ucla.edu/index.html 1 gRNA SOFA SOFA SO 0000380 hammerhead_ribozyme sequence A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs. http rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html 1 SOFA SOFA SO 0000544 1 helitron (obsolete SO:0000544) sequence A rolling circle transposon. Autonomous Helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons. http //www.pnas.org/cgi/content/full/100/11/6569 1 SOFA SOFA SO 0000046 1 insert (obsolete SO:0000046) sequence To insert a subsection of sequence. SO ke 1 SO 0000667 insertion sequence A region of sequence identified as having been inserted. SO ke 1 SOFA SOFA SO 0000366 insertion_site sequence The site where an insertion occurred. SO ke 1 SOFA SOFA SO 0000113 integrated_virus sequence A viral sequence which has integrated into the host genome. SO ke 1 SOFA SOFA SO 0000605 intergenic_region sequence The region between two known genes. SO ke 1 SOFA SOFA SO 0000639 1 internal_transcribed_spacer_region (obsolete SO:0000639) sequence Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs. SO ke 1 SOFA SOFA SO 0000188 intron sequence A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it. SOFA SOFA SO 1000036 inversion sequence A continuous nucleotide sequence is inverted in the same position. http //www.ebi.ac.uk/mutations/recommendations/mutevent.html 1 SOFA SOFA SO 0000047 1 invert (obsolete SO:0000047) sequence To invert a subsection of sequence SO ke 1 SO 0000294 inverted_repeat sequence The sequence is complementarily repeated on the opposite strand. Example: GCTGA-----TCAGC. SO ke 1 SOFA SOFA SO 0000699 junction sequence A junction refers to an interbase location of zero in a sequence. SO ke 1 SOFA SOFA SO 0000160 1 lambda_clone (obsolete SO:0000160) sequence A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome. ISBN 0-1767-2380-8 1 SOFA SOFA SO 0000018 1 linkage_group (obsolete SO:0000018) sequence A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned. ISBN 038752046 1 SO 0000110 located_sequence_feature sequence A biological feature that can be attributed to a region of biological sequence. SO ke 1 SOFA SOFA SO 0000286 1 long_terminal_repeat (obsolete SO:0000286) sequence A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses. LTR direct_terminal _repeat SOFA SOFA SO 0000234 mRNA sequence The intermediate molecule between DNA and protein. SO ma 1 messenger_RNA SOFA SOFA SO 0000343 match sequence A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4. SO ke 1 SOFA SOFA SO 0000039 match_part sequence A part of a match, for example an hsp from BLAST isa match_part. SO ke 1 SOFA SOFA SO 0000038 match_set sequence A collection of match parts SO ke 1 SOFA SOFA SO 0000419 mature_peptide sequence The coding sequence for the mature or final peptide or protein product following post-translational modification. http www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html 1 SOFA SOFA SO 0000161 methylated_A sequence A methylated adenine. SO ke 1 SOFA SOFA SO 0000114 methylated_C sequence A methylated deoxy-cytosine. SO ke 1 SOFA SOFA SO 0000306 methylated_base_feature sequence A nucleotide modified by methylation. SO ke 1 SOFA SOFA SO 0000580 1 methylation_guide_snoRNA_primary_transcript (obsolete SO:0000580) sequence A primary transcript encoding a methylation guide small nucleolar RNA. SO ke 1 SOFA SOFA SO 0000276 miRNA sequence Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. miRNAs are produced from precursor molecules (SO:0000647) that can form local hairpin strcutures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of their target molecules oract as translational repressors. PMID 12592000 1 micro_RNA SOFA SOFA SO 0000647 1 miRNA_primary_transcript (obsolete SO:0000647) sequence A primary transcript encoding a micro RNA. SO ke 1 micro_RNA_primary_transcript SOFA SOFA SO 0000289 microsatellite sequence A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem. http //www.informatics.jax.org/silver/glossary.shtml 1 SOFA SOFA SO 0000643 minisatellite sequence A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs). http //www.rerf.or.jp/eigo/glossary/minisate.htm 1 SOFA SOFA SO 0000305 modified_base_site sequence A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U. comment_type modified base\\\:<modified_base> 0 SOFA SOFA SO 0000655 ncRNA sequence An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product. SO ke 1 noncoding_RNA SOFA SOFA SO 0000483 nc_primary_transcript sequence A primary transcript that is never translated into a protein. SO ke 1 noncoding_primary_transcript SOFA SOFA SO 0000189 1 non_LTR_retrotransposon (obsolete SO:0000189) sequence A retrotransposon without long terminal repeat sequences. SO ke 1 SOFA SOFA SO 0000433 1 non_LTR_retrotransposon_polymeric_tract (obsolete SO:0000433) sequence A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon. SO ke 1 SOFA SOFA SO 0000684 nuclease_sensitive_site sequence A region of nucleotide sequence targeting by a nuclease enzyme. SO ma 1 SOFA SOFA SO 0000347 nucleotide_match sequence A match against a nucleotide sequence. SO ke 1 SOFA SOFA SO 0000714 nucleotide_motif sequence A region of nucleotide sequence corresponding to a known motif. SO ke 1 SOFA SOFA SO 0000696 oligo sequence A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded. SO ma 1 oligonucleotide SOFA SOFA SO 0000609 1 oligo_U_tail (obsolete SO:0000609) sequence The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602). http //www.rna.ucla.edu/ 1 SOFA SOFA SO 0000178 operon sequence A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regualtory region. SO ma 1 SOFA SOFA SO 0000296 origin_of_replication sequence The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies. SOFA SOFA SO 0000157 1 phagemid (obsolete SO:0000157) sequence A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with "helper" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids. SO ma 1 SOFA SOFA SO 0000155 1 plasmid (obsolete SO:0000155) sequence A self-replicating circular DNA molecule that is distinct from a chromosome in the organism. SO ma 1 SOFA SOFA SO 1000008 point_mutation sequence A mutation event where a single DNA nucleotide changes into another nucleotide. http //www.ebi.ac.uk/mutations/recommendations/mutevent.html 1 SOFA SOFA SO 0000610 polyA_sequence sequence Sequence of about 100 nucleotides of A at the 3' end of most eukaryotic mRNAs. SO ke 1 SOFA SOFA SO 0000551 polyA_signal_sequence sequence The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA. SOFA SOFA SO 0000553 polyA_site sequence The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. SOFA SOFA SO 0000104 polypeptide sequence A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversable denaturation. SO ma 1 SOFA SOFA SO 0000417 polypeptide_domain sequence A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. http www.molbiol.bbsrc.ac.uk/new_protein/domains.html 1 SOFA SOFA SO 0000612 polypyrimidine_tract sequence The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing. http //nar.oupjournals.org/cgi/content/full/25/4/888 1 SOFA SOFA SO 0000702 possible_assembly_error sequence A region of sequence where there may have been an error in the assembly. SO ke 1 SOFA SOFA SO 0000701 possible_base_call_error sequence A region of sequence where the validity of the base calling is questionable. SO ke 1 SOFA SOFA SO 0000502 precursor_miRNA sequence A miRNA transcript that has not been diced. SO cw 1 precursor_micro_RNA SOFA SOFA SO 0000516 precursor_rasiRNA sequence A rasiRNA transcript that has not been diced. SO cw 1 precursor_repeat_associated_small_interferring_RNA SOFA SOFA SO 0000507 precursor_siRNA sequence A siRNA transcript that has not been diced. SO cw 1 precursor_small_interferring_RNA SOFA SOFA SO 0000185 primary_transcript sequence The primary (initial, unprocessed) transcript; includes five_prime_clip (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) and three_prime_ untranslated_region (three_prime_UTR), and three_prime_clip (SO:0000557). precursor_RNA SOFA SOFA SO 0000112 primer sequence A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase. http //www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html 1 SOFA SOFA SO 0005850 1 primer_binding_site (obsolete SO:0005850) sequence Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription. http www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html 1 SOFA SOFA SO 0000233 processed_transcript sequence A transcript which has undergone processing. The processing may include intron removal, 5' capping and polyadenlyation. SO ke 1 SOFA SOFA SO 0000167 promoter sequence The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. SOFA SOFA SO 0000120 protein_coding_primary_transcript sequence A primary transcript that, at least in part, encodes one or more proteins. SO ke 1 SOFA SOFA SO 0000349 protein_match sequence A match against a protein sequence. SO ke 1 SOFA SOFA SO 0000336 pseudogene sequence A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their "normal" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its "normal" paralog). On occasion a pseudogene is functional as a consequence being "captured" by a non-paralogous gene, it is then known as a "captured_pseudogene" (SO:0100042). definition_reference: http //www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html 1 SOFA SOFA SO 0000462 pseudogenic_region sequence A non-functional descendent of a functional entitity. SO cjm 1 SOFA SOFA SO 0000252 rRNA sequence RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity. ribosomal_RNA SOFA SOFA SO 0000407 rRNA_18S sequence 18S_rRNA -A large polynucleotide which functions as a part of the small subunit of the ribosome SO ke 1 16S_rRNA 18S_rRNA SOFA SOFA SO 0000653 rRNA_28S sequence A component of the large ribosomal subunit. SO ke 1 23S_rRNA 28S_rRNA SOFA SOFA SO 0000375 rRNA_5.8S sequence 5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002 1 5.8S_rRNA SOFA SOFA SO 0000652 rRNA_5S sequence 5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001 1 5S_rRNA SOFA SOFA SO 0000582 1 rRNA_cleavage_snoRNA_primary_transcript (obsolete SO:0000582) sequence A primary transcript encoding an rRNA cleavage snoRNA. SO ke 1 SOFA SOFA SO 0000325 1 rRNA_large_subunit_primary_transcript (obsolete SO:0000325) sequence A primary transcript encoding a large ribosomal subunit RNA. SO ke 1 SO 0000209 1 rRNA_primary_transcript (obsolete SO:0000209) sequence A primary transcript encoding a ribosomal RNA. SO ke 1 ribosomal_RNA_primary_transcript SOFA SOFA SO 0000255 1 rRNA_small_subunit_primary_transcript (obsolete SO:0000255) sequence A primary transcript encoding a small ribosomal subunit RNA. SO ke 1 SO 0000454 rasiRNA sequence A small, 17-28-nt, small interfering RNA derived from transcripts ofrepetitive elements. http //www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284 1 repeat associated small interfering RNA SOFA SOFA SO 0000150 read sequence A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine. SO rd 1 SOFA SOFA SO 0000007 read_pair sequence A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert. SO ls 1 mate_pair SOFA SOFA SO 0000695 reagent sequence A sequence used in experiment. SO ke 1 SOFA SOFA SO 0000145 1 recoded_codon (obsolete SO:0000145) sequence . SO ke 1 SOFA SOFA SO 0000001 region sequence Continous sequence. SO ke 1 sequence SOFA SOFA SO 0005836 regulatory_region sequence A DNA sequence that controls the expression of a gene. http //www.genpromag.com/scripts/glossary.asp?LETTER=R 1 SOFA SOFA SO 1001284 regulon sequence A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal. ISBN 0198506732 1 SOFA SOFA SO 0000700 remark sequence A comment about the sequence. SO ke 1 SOFA SOFA SO 0000187 repeat_family sequence A group of characterized repeat sequences. SO ke 1 SOFA SOFA SO 0000657 repeat_region sequence A region of sequence containing one or more repeat sequences. SO ke 1 SOFA SOFA SO 0000168 restriction_enzyme_cut_site sequence A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA. SO ma 1 SOFA SOFA SO 0000412 restriction_fragment sequence Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease. http //www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119 1 SOFA SOFA SO 0000180 1 retrotransposon (obsolete SO:0000180) sequence A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase. http //www.genpromag.com/scripts/glossary.asp?LETTER=R 1 SOFA SOFA SO 0000139 ribosome_entry_site sequence Region in 5' UTR where ribosome assembles on mRNA. SO ke 1 comment_type gene\\\:<gene_id> 0 SOFA SOFA SO 0000374 ribozyme sequence An RNA with catalytic activity. SO ma 1 SOFA SOFA SO 0000013 scRNA sequence Any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote. http www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html 1 SOFA SOFA SO 0000012 1 scRNA_primary_transcript (obsolete SO:0000012) sequence The primary transcript of any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote. http www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html 1 small_cytoplasmic_RNA SOFA SOFA SO 0000413 sequence_difference sequence A region where the sequences differs from that of a specified sequence. SO ke 1 SOFA SOFA SO 0000109 sequence_variant sequence A region of sequence where variation has been observed. SO ke 1 mutation SOFA SOFA SO 0000646 siRNA sequence Small RNA molecule that is the product of a longerexogenous or endogenous dsRNA, which is either a bimolecular duplexe or very longhairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of their target molecules. PMID 12592000 1 small_interfering_RNA SOFA SOFA SO 0000418 signal_peptide sequence The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. http www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html 1 SOFA SOFA SO 0000625 silencer sequence Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes. SOFA SOFA SO 0000408 1 site (obsolete SO:0000408) sequence The interbase position where something (eg an aberration) occurred. SO ke 1 SOFA SOFA SO 0000370 small_regulatory_ncRNA sequence A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression. SO ma 1 SOFA SOFA SO 0000274 snRNA sequence Small non-coding RNA in the nucleoplasm. small_nuclear_RNA SOFA SOFA SO 0000231 1 snRNA_primary_transcript (obsolete SO:0000231) sequence A primary transcript encoding a small nuclear mRNA (SO:0000274). SO ke 1 SOFA SOFA SO 0000275 snoRNA sequence Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012 1 small_nucleolar_RNA SOFA SOFA SO 0000232 1 snoRNA_primary_transcript (obsolete SO:0000232) sequence A primary transcript encoding a small nucleolar mRNA (SO:0000275). SO ke 1 SOFA SOFA SO 0000164 splice_acceptor sequence The junction between the 3 prime end of an intron and the following exon. http //www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html 1 acceptor acceptor_splice_site SOFA SOFA SO 0000163 splice_donor sequence The junction between the 3 prime end of an exon and the following intron. http //www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html 1 donor donor_splice_site SOFA SOFA SO 0000344 splice_enhancer sequence Region of a transcript that regulates splicing. SO ke 1 SOFA SOFA SO 0000162 splice_site sequence The position where intron is excised. SO ke 1 SOFA SOFA SO 0000662 spliceosomal_intron sequence An intron which is spliced by the spliceosome. SO ke 1 SOFA SOFA SO 0000649 stRNA sequence Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans. PMID 11081512 1 small_temporal_RNA SOFA SOFA SO 0000648 1 stRNA_primary_transcript (obsolete SO:0000648) sequence primary_transcript:A primary transcript encoding a small temporal mRNA (SO:0000649). SO ke 1 small_temporal_RNA_primary_transcript SOFA SOFA SO 0000048 1 substitute (obsolete SO:0000048) sequence To substitute a subsection of sequence. SO ke 1 SO 1000002 substitution sequence Any change in genomic DNA caused by a single event. http //www.ebi.ac.uk/mutations/recommendations/mutevent.html 1 SOFA SOFA SO 0000148 supercontig sequence One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's. SO ls 1 scaffold SOFA SOFA SO 0000253 tRNA sequence Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. tRNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005 1 ISBN 0198506732 1 transfer_RNA SOFA SOFA SO 0000210 1 tRNA_primary_transcript (obsolete SO:0000210) sequence A primary transcript encoding a transfer RNA (SO:0000253.) SO ke 1 transfer_RNA_primary_transcript SOFA SOFA SO 0000324 tag sequence A nucleotide sequence that may be used to identify a larger sequence. SO ke 1 SOFA SOFA SO 0000705 tandem_repeat sequence Two or more adjacent copies of a DNA sequence. http //www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html 1 SOFA SOFA SO 0000390 telomerase_RNA sequence The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025 1 SOFA SOFA SO 0000624 telomere sequence A specific structure at the end of a linear chromosome, required for the integrity and maintenence of the end, SO ma 1 SO 0000481 1 terminal_inverted_repeat (obsolete SO:0000481) sequence An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon. SO ke 1 SO 0000208 1 terminal_inverted_repeat_element (obsolete SO:0000208) sequence A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long. http www.genetics.org/cgi/reprint/156/4/1983.pdf 1 SO 0000141 terminator sequence The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription. SOFA SOFA SO 0000205 three_prime_UTR sequence A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein. SOFA SOFA SO 0000472 tiling_path sequence A set of regions which overlap with minimal polymorphism to form a linear sequence. CJM SO 1 SOFA SOFA SO 0000480 1 tiling_path_clone (obsolete SO:0000480) sequence A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly.A minimal_tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly attempting to minimize the overlap between adjacent clones. (LS) SO ke 1 SO 0000474 tiling_path_fragment sequence A piece of sequence that makes up a tiling_path (SO:0000472.). SO ke 1 SOFA SOFA SO 0000584 tmRNA sequence tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. tmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023 1 10Sa_RNA ssrA SOFA SOFA SO 0000586 1 tmRNA_primary_transcript (obsolete SO:0000586) sequence A primary transcript encoding a tmRNA (SO:0000584). SO ke 1 10Sa_RNA_primary_transcript ssrA_primary_transcript SOFA SOFA SO 0000706 trans_splice_acceptor_site sequence The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules SO ke 1 SO 0000638 1 transcribed_spacer_region (obsolete SO:0000638) sequence Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA. http //oregonstate.edu/instruction/bb492/general/glossary.html 1 SOFA SOFA SO 0000673 transcript sequence An RNA synthesized on a DNA or RNA template by an RNA polymerase. SO ma 1 SOFA SOFA SO 0000616 transcription_end_site sequence The region where transcription ends. SO ke 1 SOFA SOFA SO 0000315 transcription_start_site sequence The region where transcription begins. SO ke 1 TSS SOFA SOFA SO 1000009 1 transition (obsolete SO:1000009) sequence Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide. http //www.ebi.ac.uk/mutations/recommendations/mutevent.html 1 SOFA SOFA SO 0000181 translated_nucleotide_match sequence A match against a translated sequence. SO ke 1 SOFA SOFA SO 0000049 1 translocate (obsolete SO:0000049) sequence SO 0000199 1 translocation (obsolete SO:0000199) sequence A region of nucleotide sequence that has translocated to a new position. SO ke 1 SOFA SOFA SO 0000691 1 translocation_site (obsolete SO:0000691) sequence The space between two bases in a sequence which marks the position where a translocation has occurred. SO ke 1 SOFA SOFA SO 0000101 transposable_element sequence A transposon or insertion sequence. An element that can insert in a variety of DNA sequences. http //www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html 1 SOFA SOFA SO 0000111 transposable_element_gene sequence A gene encoded within a transposable element. For example gag, int, env and pol are the transpable element genes of the TY element in yeast. SO ke 1 SOFA SOFA SO 0000368 transposable_element_insertion_site sequence The site in a genome where a transposable_element has inserted. SO ke 1 SOFA SOFA SO 0000434 1 transposable_element_target_site_duplication (obsolete SO:0000434) sequence A sequence of DNA that is duplicated when a transposable element inserts; usually found at each end the insertion. http www.koko.gov.my/CocoaBioTech/Glossaryt.html 1 SOFA SOFA SO 1000017 1 transversion (obsolete SO:1000017) sequence Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa. http //www.ebi.ac.uk/mutations/recommendations/mutevent.html 1 SOFA SOFA SO 0000041 1 variation_operation (obsolete SO:0000041) sequence An operation that can be applied to a sequence that results in a change. SO ke 1 SO 0000404 vault_RNA sequence A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance. http //www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006 1 SOFA SOFA SO 0000440 1 vector (obsolete SO:0000440) sequence A DNA molecule that can be used to transfer DNA molecules between organisms. SO ma 1 SOFA SOFA SO 0000499 virtual_sequence sequence A continous piece of sequence similar to the 'virtual contig' concept of ensembl. SO ke 1 SOFA SOFA is_a SO 0000436 SO 0000001 is_a SO 0000316 SO 0000050 is_a SO 0000307 SO 0000001 is_a SO 0000345 SO 0000695 is_a SO 0000668 SO 0000102 is_a SO 0000236 SO 0000001 is_a SO 0000006 SO 0000695 is_a SO 0000193 SO 0000168 is_a SO 0000337 SO 0000695 is_a SO 0000385 SO 0000372 is_a SO 0000386 SO 0000374 is_a SO 0000326 SO 0000324 is_a SO 0000694 SO 1000008 is_a SO 0000590 SO 0000655 is_a SO 0000331 SO 0000324 is_a SO 0000235 SO 0000409 is_a SO 0000235 SO 0005836 is_a SO 0000398 SO 0000274 is_a SO 0000399 SO 0000274 is_a SO 0000403 SO 0000274 is_a SO 0000391 SO 0000274 is_a SO 0000392 SO 0000274 is_a SO 0000393 SO 0000274 is_a SO 0000394 SO 0000274 is_a SO 0000395 SO 0000274 is_a SO 0000396 SO 0000274 is_a SO 0000397 SO 0000274 is_a SO 0000203 SO 0000050 is_a SO 0000405 SO 0000655 is_a SO 0000644 SO 0000655 is_a SO 0000645 SO 0000185 is_a SO 0000353 SO 0000001 is_a SO 0000143 SO 0000001 is_a SO 0000140 SO 0005836 is_a SO 0000588 SO 0000188 is_a SO 0000588 SO 0000374 is_a SO 0000409 SO 0000001 is_a SO 0000611 SO 0000050 is_a SO 0000689 SO 0000102 is_a SO 0000581 SO 0000050 is_a SO 0000577 SO 0000628 is_a SO 0000628 SO 0000001 is_a SO 0000340 SO 0000001 is_a SO 0000303 SO 0000050 is_a SO 0000151 SO 0000695 is_a SO 0000103 SO 0000151 is_a SO 0000103 SO 0000699 is_a SO 0000179 SO 0000151 is_a SO 0000179 SO 0000699 is_a SO 0000360 SO 0000050 is_a SO 1000005 SO 1000002 is_a SO 0000149 SO 0000143 is_a SO 0000149 SO 0000353 is_a SO 0000177 SO 0000347 is_a SO 2000061 SO 0000695 is_a SO 0000464 SO 0000462 is_a SO 0000159 SO 0000001 is_a SO 0000159 SO 0000109 is_a SO 0000687 SO 0000109 is_a SO 0000687 SO 0000699 is_a SO 0000314 SO 0000657 is_a SO 0000658 SO 0000657 is_a SO 0000165 SO 0005836 is_a SO 0000372 SO 0000655 is_a SO 0000147 SO 0000050 is_a SO 0000333 SO 0000699 is_a SO 0000703 SO 0000700 is_a SO 0000102 SO 0000347 is_a SO 0000204 SO 0000203 is_a SO 0000239 SO 0000001 is_a SO 0000704 SO 0000001 is_a SO 0005855 SO 0000001 is_a SO 0000050 SO 0000001 is_a SO 0000688 SO 0000353 is_a SO 0000468 SO 0000143 is_a SO 0000603 SO 0000188 is_a SO 0000587 SO 0000188 is_a SO 0000602 SO 0000655 is_a SO 0000380 SO 0000374 is_a SO 0000667 SO 0000001 is_a SO 0000667 SO 0000109 is_a SO 0000366 SO 0000109 is_a SO 0000366 SO 0000699 is_a SO 0000113 SO 0000001 is_a SO 0000605 SO 0000001 is_a SO 0000188 SO 0000050 is_a SO 1000036 SO 0000001 is_a SO 1000036 SO 0000109 is_a SO 0000294 SO 0000657 is_a SO 0000699 SO 0000110 is_a SO 0000110 SO 0000000 is_a SO 0000234 SO 0000233 is_a SO 0000343 SO 0000001 is_a SO 0000039 SO 0000343 is_a SO 0000038 SO 0000343 is_a SO 0000419 SO 0000104 is_a SO 0000161 SO 0000306 is_a SO 0000114 SO 0000306 is_a SO 0000306 SO 0000305 is_a SO 0000276 SO 0000370 is_a SO 0000289 SO 0000705 is_a SO 0000643 SO 0000705 is_a SO 0000305 SO 0000001 is_a SO 0000655 SO 0000233 is_a SO 0000483 SO 0000185 is_a SO 0000684 SO 0000001 is_a SO 0000347 SO 0000343 is_a SO 0000714 SO 0000001 is_a SO 0000696 SO 0000695 is_a SO 0000178 SO 0005855 is_a SO 0000296 SO 0000001 is_a SO 1000008 SO 1000002 is_a SO 0000610 SO 0000050 is_a SO 0000551 SO 0005836 is_a SO 0000553 SO 0000050 is_a SO 0000612 SO 0000050 is_a SO 0000702 SO 0000413 is_a SO 0000701 SO 0000413 is_a SO 0000502 SO 0000370 is_a SO 0000516 SO 0000507 is_a SO 0000507 SO 0000655 is_a SO 0000185 SO 0000673 is_a SO 0000112 SO 0000696 is_a SO 0000233 SO 0000673 is_a SO 0000167 SO 0005836 is_a SO 0000120 SO 0000185 is_a SO 0000349 SO 0000343 is_a SO 0000336 SO 0000462 is_a SO 0000462 SO 0000001 is_a SO 0000252 SO 0000655 is_a SO 0000407 SO 0000252 is_a SO 0000653 SO 0000252 is_a SO 0000375 SO 0000252 is_a SO 0000652 SO 0000252 is_a SO 0000150 SO 0000143 is_a SO 0000007 SO 0000143 is_a SO 0000695 SO 0000001 is_a SO 0000001 SO 0000110 is_a SO 0005836 SO 0000050 is_a SO 1001284 SO 0005855 is_a SO 0000700 SO 0000001 is_a SO 0000187 SO 0000657 is_a SO 0000657 SO 0000001 is_a SO 0000168 SO 0000684 is_a SO 0000412 SO 0000695 is_a SO 0000139 SO 0000050 is_a SO 0000374 SO 0000372 is_a SO 0000013 SO 0000655 is_a SO 0000413 SO 0000700 is_a SO 0000109 SO 0000110 is_a SO 0000646 SO 0000655 is_a SO 0000625 SO 0005836 is_a SO 0000370 SO 0000655 is_a SO 0000274 SO 0000655 is_a SO 0000275 SO 0000655 is_a SO 0000164 SO 0000162 is_a SO 0000163 SO 0000162 is_a SO 0000344 SO 0005836 is_a SO 0000162 SO 0000699 is_a SO 0000662 SO 0000188 is_a SO 0000649 SO 0000655 is_a SO 1000002 SO 0000001 is_a SO 1000002 SO 0000109 is_a SO 0000148 SO 0000353 is_a SO 0000253 SO 0000655 is_a SO 0000324 SO 0000695 is_a SO 0000705 SO 0000657 is_a SO 0000390 SO 0000372 is_a SO 0000624 SO 0000628 is_a SO 0000141 SO 0005836 is_a SO 0000205 SO 0000203 is_a SO 0000472 SO 0000353 is_a SO 0000474 SO 0000143 is_a SO 0000584 SO 0000370 is_a SO 0000706 SO 0000164 is_a SO 0000673 SO 0000050 is_a SO 0000616 SO 0000699 is_a SO 0000315 SO 0000699 is_a SO 0000181 SO 0000347 is_a SO 0000101 SO 0000187 is_a SO 0000111 SO 0000704 is_a SO 0000368 SO 0000366 is_a SO 0000404 SO 0000655 is_a SO 0000499 SO 0000353 part_of SO 0000316 SO 0000234 derived_from SO 0000345 SO 0000234 part_of SO 0000203 SO 0000234 part_of SO 0000611 SO 0000662 part_of SO 0000581 SO 0000204 part_of SO 0000581 SO 0000233 part_of SO 0000628 SO 0000340 part_of SO 0000303 SO 0000185 part_of SO 0000360 SO 0000234 part_of SO 0000149 SO 0000148 part_of SO 0000147 SO 0000673 part_of SO 0000333 SO 0000233 part_of SO 0000468 SO 0000688 part_of SO 0000188 SO 0000185 part_of SO 0000039 SO 0000038 part_of SO 0000276 SO 0000502 part_of SO 0000610 SO 0000205 part_of SO 0000553 SO 0000233 derived_from SO 0000104 SO 0000316 part_of SO 0000417 SO 0000104 part_of SO 0000612 SO 0000662 derived_from SO 0000233 SO 0000185 part_of SO 0000454 SO 0000516 part_of SO 0000150 SO 0000149 part_of SO 0000007 SO 0000149 part_of SO 0005836 SO 0000704 part_of SO 0000139 SO 0000203 part_of SO 0000646 SO 0000507 part_of SO 0000418 SO 0000104 part_of SO 0000162 SO 0000185 part_of SO 0000474 SO 0000472 part_of SO 0000673 SO 0000704 part_of SO 0000616 SO 0000185 part_of SO 0000315 SO 0000185 DBIx-DBStag-0.12/t/data/test.chadoxml0000644000076500000240000025400111326157220015776 0ustar cainstaff relationship synonym_type cvterm_property_type OBO_REL internal internal cvterm_property_type cvterm_property_type comment obo_rel is_a relationship is_a 1 obo_rel part_of part_of relationship 1 GO 0003673 Gene_Ontology Gene_Ontology GO 0008150 biological_process biological_process GO 0009987 cellular process biological_process GO 0050875 cellular physiological process biological_process GO 0008219 cell death biological_process GO 0007569 cell aging biological_process cellular senescence GO 0001303 nucleolar fragmentation during replicative aging biological_process GO 0001302 replicative cell aging biological_process GO 0008151 cell growth and/or maintenance biological_process cell physiology GO 0016043 cell organization and biogenesis biological_process GO 0006997 nuclear organization and biogenesis biological_process GO:0048287 nucleus organization and biogenesis GO 0030575 nuclear body organization and biogenesis biological_process GO 0007275 development biological_process GO 0007568 aging biological_process GO:0016280 senescence GO 0007576 nucleolar fragmentation biological_process GO 0008371 obsolete biological process biological_process GO 0046616 nucleolar fragmentation (sensu Saccharomyces) biological_process GO 0007575 nucleolar size increase biological_process GO 0046617 nucleolar size increase (sensu Saccharomyces) biological_process GO 0007582 physiological process biological_process GO 0016265 death biological_process GO 0008152 metabolism biological_process GO 0009056 catabolism biological_process GO 0009057 macromolecule catabolism biological_process GO 0006401 RNA catabolism biological_process GO 0016077 snoRNA catabolism biological_process GO 0006139 nucleobase, nucleoside, nucleotide and nucleic acid metabolism biological_process GO 0016070 RNA metabolism biological_process GO 0006396 RNA processing biological_process GO:0006394 GO 0043144 snoRNA processing biological_process GO 0016074 snoRNA metabolism biological_process GO 0006350 transcription biological_process GO 0006351 transcription, DNA-dependent biological_process GO 0009302 snoRNA transcription biological_process The synthesis of small nucleolar RNA (snoRNA) from a DNA template. GO curators 1 testdb 001 1 comment_type test comment 0 GO 0005575 cellular_component cellular_component GO 0005623 cell cellular_component GO 0005622 intracellular cellular_component protoplasm GO 0005694 chromosome cellular_component GO 0000785 chromatin cellular_component GO 0000790 nuclear chromatin cellular_component GO 0030874 nucleolar chromatin cellular_component GO 0000228 nuclear chromosome cellular_component GO 0005737 cytoplasm cellular_component GO 0030684 preribosome cellular_component GO 0030685 nucleolar preribosome cellular_component GO 0030687 nucleolar preribosome, large subunit precursor cellular_component 66S preribosome GO 0030688 nucleolar preribosome, small subunit precursor cellular_component 43S preribosome GO 0005634 nucleus cellular_component GO 0005730 nucleolus cellular_component GO 0005655 nucleolar ribonuclease P complex cellular_component nucleolar RNase P complex GO 0005732 small nucleolar ribonucleoprotein complex cellular_component snoRNP GO 0005654 nucleoplasm cellular_component GO 0016585 chromatin remodeling complex cellular_component chromatin remodelling complex GO 0005677 chromatin silencing complex cellular_component GO 0030869 RENT complex cellular_component GO 0016604 nuclear body cellular_component GO 0016607 nuclear speck cellular_component nuclear speckle speckle domain speckle focus splicing speckle GO 0030529 ribonucleoprotein complex cellular_component RNP GO 0030677 ribonuclease P complex cellular_component RNase P complex GO 0008370 obsolete cellular component cellular_component GO 0005733 small nucleolar RNA cellular_component snoRNA GO 0003674 molecular_function molecular_function GO 0005488 binding molecular_function ligand GO 0003676 nucleic acid binding molecular_function GO 0003723 RNA binding molecular_function GO 0030515 snoRNA binding molecular_function GO 0043021 ribonucleoprotein binding molecular_function RNP binding GO 0030519 snoRNP binding molecular_function GO 0008369 obsolete molecular function molecular_function GO 0030355 small nucleolar ribonucleoprotein molecular_function snoRNP GO 0005569 small nucleolar RNA molecular_function is_a GO 0009987 GO 0008150 is_a GO 0050875 GO 0007582 is_a GO 0050875 GO 0009987 is_a GO 0008219 GO 0016265 is_a GO 0008219 GO 0050875 is_a GO 0007569 GO 0007568 is_a GO 0001303 GO 0007569 is_a GO 0001302 GO 0007569 is_a GO 0008151 GO 0050875 is_a GO 0016043 GO 0008151 is_a GO 0006997 GO 0016043 is_a GO 0030575 GO 0006997 is_a GO 0007275 GO 0008150 is_a GO 0007568 GO 0007275 is_a GO 0008371 GO 0008150 is_a GO 0046616 GO 0008371 is_a GO 0007575 GO 0008371 is_a GO 0046617 GO 0008371 is_a GO 0007582 GO 0008150 is_a GO 0016265 GO 0007582 is_a GO 0008152 GO 0007582 is_a GO 0009056 GO 0008152 is_a GO 0009057 GO 0009056 is_a GO 0006401 GO 0016070 is_a GO 0006401 GO 0009057 is_a GO 0016077 GO 0016074 is_a GO 0016077 GO 0006401 is_a GO 0006139 GO 0008152 is_a GO 0016070 GO 0006139 is_a GO 0006396 GO 0016070 is_a GO 0043144 GO 0016074 is_a GO 0043144 GO 0006396 is_a GO 0016074 GO 0016070 is_a GO 0006350 GO 0006139 is_a GO 0006351 GO 0006350 is_a GO 0009302 GO 0006351 is_a GO 0005623 GO 0005575 is_a GO 0000790 GO 0000785 is_a GO 0000228 GO 0005694 is_a GO 0030684 GO 0030529 is_a GO 0030685 GO 0030684 is_a GO 0030687 GO 0030685 is_a GO 0030688 GO 0030685 is_a GO 0005655 GO 0030677 is_a GO 0005732 GO 0030529 is_a GO 0005677 GO 0016585 is_a GO 0030869 GO 0005677 is_a GO 0016607 GO 0016604 is_a GO 0030677 GO 0030529 is_a GO 0008370 GO 0005575 is_a GO 0005733 GO 0008370 is_a GO 0005488 GO 0003674 is_a GO 0003676 GO 0005488 is_a GO 0003723 GO 0003676 is_a GO 0030515 GO 0003723 is_a GO 0043021 GO 0005488 is_a GO 0030519 GO 0043021 is_a GO 0008369 GO 0003674 is_a GO 0030355 GO 0008369 is_a GO 0005569 GO 0008369 part_of GO 0008150 GO 0003673 part_of GO 0007569 GO 0008219 part_of GO 0001303 GO 0001302 part_of GO 0007568 GO 0016265 part_of GO 0007576 GO 0007568 part_of GO 0005575 GO 0003673 part_of GO 0005622 GO 0005623 part_of GO 0005694 GO 0005622 part_of GO 0000785 GO 0005694 part_of GO 0000790 GO 0000228 part_of GO 0030874 GO 0005730 part_of GO 0030874 GO 0000790 part_of GO 0000228 GO 0005634 part_of GO 0005737 GO 0005622 part_of GO 0030684 GO 0005737 part_of GO 0030685 GO 0005730 part_of GO 0005634 GO 0005622 part_of GO 0005730 GO 0005634 part_of GO 0005655 GO 0005730 part_of GO 0005732 GO 0005730 part_of GO 0005654 GO 0005634 part_of GO 0016585 GO 0005654 part_of GO 0016604 GO 0005654 part_of GO 0030529 GO 0005622 part_of GO 0003674 GO 0003673 DBIx-DBStag-0.12/t/data/test2.chadoxml0000644000076500000240000001302311326157220016055 0ustar cainstaff relationship synonym_type cvterm_property_type anonymous OBO_REL internal internal cvterm_property_type cvterm_property_type comment internal is_anonymous cvterm_property_type is_anonymous OBO_REL is_a relationship is_a 1 internal intersection_of cvterm_property_type intersection_of 1 OBO_REL part_of dbxref__OBO_REL:part_of OBO_REL wibble wibble test 1 TEST a a test TEST b b test TEST c c test TEST ab ab test is_a TEST:c TEST:a is_a TEST:ab TEST:a wibble TEST:b TEST:c OBO_REL:part_of TEST:ab TEST:b intersection_of TEST:ab TEST:a intersection_of TEST:ab internal restriction--OBO_REL:part_of--TEST:ab restriction--OBO_REL:part_of--TEST:ab anonymous_cv is_anonymous 1 0 OBO_REL:part_of TEST:b DBIx-DBStag-0.12/t/DBStagTest.pm0000644000076500000240000000424211326157220014667 0ustar cainstaffpackage DBStagTest; use strict; use base qw(Exporter); use DBIx::DBStag; BEGIN { use Test; if (0) { plan tests=>1; skip(1, 1); exit 0; } # this file defines sub connect_args() unless (defined(do 'db.config')) { die $@ if $@; die "Could not reade db.config: $!\n"; } } use vars qw(@EXPORT); our $driver; #our $dbname = "dbistagtest"; #our $testdb = "dbi:Pg:dbname=$dbname;host=localhost"; @EXPORT = qw(connect_to_cleandb dbh drop cvtddl); sub dbh { # this file defines sub connect_args() # unless (defined(do 'db.config')) { # die $@ if $@; # die "Could not reade db.config: $!\n"; # } my $dbh; my @conn = connect_args(); eval { $dbh = DBIx::DBStag->connect(@conn); }; if (!$dbh) { printf STDERR "COULD NOT CONNECT USING DBI->connect(@conn)\n\n"; die; } $driver = $dbh->{_driver}; $dbh; } *connect_to_cleandb = \&dbh; sub ddl { my $dbh = dbh(); my $ddl = shift; } sub cvtddl { my $ddl = shift; if ($driver eq 'mysql') { $ddl =~ s/ serial / INTEGER AUTO_INCREMENT /i; } return $ddl; } sub alltbl { qw(person2address person address ); } sub drop { # unless (defined(do 'db.config')) { # die $@ if $@; # die "Could not reade db.config: $!\n"; # } # this sub is defined in config file my $cmd = recreate_cmd(); $cmd =~ s/\;/\;sleep 2\;/g; # if (system($cmd)) { # # allowed to fail first time... # # (pg sometimes won't let you create a db immediately after dropping) # sleep(2); # } if (system($cmd)) { # must pass 2nd time print STDERR "PROBLEM recreating using: $cmd\n"; } } sub zdrop { # my @t = @_; my @t = alltbl; my $dbh = dbh(); my %created = (); if (1) { use DBIx::DBSchema; my $s = DBIx::DBSchema->new_native($dbh->dbh); use Data::Dumper; %created = map {$_=>1} $s->tables; } # foreach (@t) { # eval { # $dbh->do("DROP TABLE $_"); # }; foreach (@t) { if ($created{$_}) { eval { $dbh->do("DROP TABLE $_"); }; } } $dbh->disconnect; } 1; DBIx-DBStag-0.12/t/feature.t0000644000076500000240000000443611326157220014212 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 5; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; unless ($ENV{DBSTAG_DEVELOPER_MODE}) { print "test feature.t takes a loooooong time, so I'm skipping it.\n"; print "if you want to run this test, set env DBSTAG_DEVELOPER_MODE\n"; ok(1) foreach (1..5); exit 0; } drop(); my $dbh = connect_to_cleandb(); foreach my $f (qw(chado-cvterm.sql chado-pub.sql chado-feature.sql chado-fr.sql )) { open(F, "t/data/$f") || die; my $ddl = join('',); close(F); $dbh->do($ddl); } foreach my $f (qw(relationship.chado-xml sofa.chado-xml CG10833.with-macros.chado-xml)) { print "parsing..\n"; my $chado = Data::Stag->parse("t/data/$f"); print "parsed; now storing..\n"; $dbh->storenode($_) foreach $chado->subnodes; } ok(1); my $fset = $dbh->selectall_stag(q[ SELECT * FROM feature LEFT OUTER JOIN dbxref ON (feature.dbxref_id = dbxref.dbxref_id) LEFT OUTER JOIN db ON (dbxref.db_id = db.db_id) INNER JOIN cvterm AS ftype ON (feature.type_id = ftype.cvterm_id) USE NESTING (set(feature(ftype)(dbxref(db)))) ]); print $fset->xml; my @features = $fset->get_feature; ok(@features,10); my ($gene) = $fset->where(feature=>sub { shift->find('cvterm/name') eq 'gene' }); ok($gene->sget_name eq 'Cyp28d1'); $fset = $dbh->selectall_stag(q[ SELECT subf.* FROM feature INNER JOIN feature_relationship ON (feature.feature_id = feature_relationship.object_id) INNER JOIN feature AS subf ON (subf.feature_id = feature_relationship.subject_id) WHERE feature.name='Cyp28d1-RA' ]); print $fset->xml; @features = $fset->get_subf; ok(@features,7); $fset = $dbh->selectall_stag(q[ SELECT feature.* FROM feature INNER JOIN featureloc ON (feature.feature_id = featureloc.feature_id) INNER JOIN feature AS srcf ON (srcf.feature_id = featureloc.srcfeature_id) WHERE srcf.uniquename='2L' ]); @features = $fset->get_feature; ok(@features,9); $dbh->disconnect; DBIx-DBStag-0.12/t/load-go.x0000644000076500000240000001317511326157220014105 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBIStagTest; plan tests => 2; } use DBIx::DBIStag; use DBI; use FileHandle; use strict; use Getopt::Long; use Data::Stag; my ($fmt, $outfmt, $type); $type = 'seq'; $outfmt = 'chadosxpr'; GetOptions("fmt|i=s"=>\$fmt, "outfmt|o=s"=>\$outfmt, "type|t=s"=>\$type); my $db = shift @ARGV; my $dbh = DBIx::DBIStag->connect($db); ##my $dbh = DBIx::DBIStag->new; $dbh->mapping([ Data::Stag->from('sxprstr', '(map (table "cvterm_dbxref") (col "dbxrefstr") (fkcol "dbxrefstr") (fktable "dbxref"))'), Data::Stag->from('sxprstr', '(map (table "cvterm") (col "termtype_id") (fktable_alias "termtype") (fkcol "cvterm_id") (fktable "cvterm"))'), Data::Stag->from('sxprstr', '(map (table "cvrelationship") (col "subjterm_id") (fktable_alias "subjterm") (fkcol "cvterm_id") (fktable "cvterm"))'), Data::Stag->from('sxprstr', '(map (table "cvrelationship") (col "objterm_id") (fktable_alias "objterm") (fkcol "cvterm_id") (fktable "cvterm"))'), Data::Stag->from('sxprstr', '(map (table "cvrelationship") (col "reltype_id") (fktable_alias "reltype") (fkcol "cvterm_id") (fktable "cvterm"))'), ]); my %h = ( cvrelationship => sub { my ($self, $stag) = @_; return unless $self->{pass} == 2; foreach (qw(subjterm objterm)) { my $v = $stag->get($_); $stag->set($_, [Data::Stag->new(cvterm=>[ [dbxrefstr=>$v] ])]); } my $v = $stag->get('reltype'); $stag->set('reltype', [Data::Stag->new(cvterm=>[ [termname=>$v], ] ) ] ); print $stag->xml; my $dbh = $self->{dbh}; $dbh->storenode($stag); return; }, cvterm => sub { my ($self, $stag) = @_; return unless $self->{pass} == 1; print $stag->xml; my $dbh = $self->{dbh}; $dbh->storenode($stag); return; }, cvterm_dbxref => sub { my ($self, $stag) = @_; my $dbxref = $stag->duplicate; $dbxref->element('dbxref'); $dbxref->set_dbxrefstr($dbxref->get_dbname . ':' . $dbxref->get_accession); $stag->data([$dbxref]); 0; }, dbxref => sub { my ($self, $stag) = @_; $stag->element('dbxrefstr'); }, termdefintion => sub { my ($self, $stag) = @_; $stag->element('termdefinition'); }, ); my $handler = Data::Stag->makehandler(%h); $handler->{dbh} = $dbh; foreach my $f (@ARGV) { $handler->{pass} = 1; my $stag = Data::Stag->new->parse(-file=>$f, -handler=>$handler); $handler->{pass} = 2; my $stag = Data::Stag->new->parse(-file=>$f, -handler=>$handler); } exit 0; package LoadChado; use strict; use base qw(Data::Stag::BaseHandler); sub dbh { my $self = shift; $self->{_dbh} = shift if @_; return $self->{_dbh}; } sub e_cvrelationship { my $self = shift; my $stag = shift; printf "CATCH END:%s\n", $self->depth; my $dbh = $self->dbh; foreach (qw(subjterm objterm)) { my $v = $stag->get($_); $stag->set($_, [Data::Stag->new(cvterm=>[ [dbxrefstr=>$v] ])]); } my $v = $stag->get('reltype'); $stag->set('reltype', [Data::Stag->new(cvterm=>[ [termname=>$v], [termtype=>'relationship'] ] ) ] ); print $stag->sxpr; $dbh->storenode($stag); return; } sub e_cvterm { my $self = shift; my $stag = shift; } sub zcatch_end { my $self = shift; my $ev = shift; my $stag = shift; return unless $self->depth == 1; printf "CATCH END:$ev:%s\n", $self->depth; my $dbh = $self->dbh; if ($stag->element eq 'cvrelationship') { foreach (qw(subjterm objterm)) { my $v = $stag->get($_); $stag->set($_, [Data::Stag->new(cvterm=>[ [dbxrefstr=>$v] ])]); } my $v = $stag->get('reltype'); $stag->set('reltype', [Data::Stag->new(cvterm=>[ [termname=>$v], [termtype=>'relationship'] ] ) ] ); } $stag->iterate(sub { my $n = shift; if ($n->element eq 'dbxref') { $n->element('dbxrefstr'); } if ($n->element eq 'termtype') { my $v = $n->data; $n->data([Data::Stag->new(cvterm=>[ [termname=>$v], [termtype=>'type'] ])]); } 0; }); # $dbh->store($stag); print $stag->sxpr; print "\n"; return; } 1; DBIx-DBStag-0.12/t/movie.x0000644000076500000240000000277211326157220013703 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; plan tests => 1; } use DBIx::DBIStag; use Data::Stag; use FileHandle; my $dbh; $dbh = getdbh(@ARGV) if @ARGV; my $mset = Data::Stag->parse("t/data/mset.xml"); my @movies = $mset->getnode('movie'); foreach my $movie (@movies) { my @movie_chars = $movie->getnode('movie_char'); $movie->unset('movie_char'); # $dbh->store_stag($movie); # stores director too foreach my $movie_char (@movie_chars) { my $actor = $movie_char->getnode_actor; $movie_char->unset('actor'); # print $actor->xml; my $actor_id = $dbh->store_stag($actor); my $role = Data::Stag->new(role=>[ [movie_name=>$movie->get_name], [movie_char_name=>$movie_char->get_name], [actor_id=>$actor_id] ]); $dbh->store_stag($role); } } my $xmlstruct = $dbh->selectall_stag(q[ SELECT bioentry.*, seqfeature.*, seqfeature_qualifier_value.*, ftype.* FROM bioentry NATURAL JOIN (seqfeature NATURAL JOIN seqfeature_qualifier_value) INNER JOIN ontology_term AS ftype ON (ftype.ontology_term_id = seqfeature_key_id) LIMIT 300 ], "(bioentry(seqfeature(seqfeature_qualifier_value 1)(ftype 1)))", ); print $xmlstruct->xml; sub getdbh { return DBIx::DBIStag->connect(@_); } DBIx-DBStag-0.12/t/norm2.t0000644000076500000240000001037711326157220013615 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; plan tests => 5; } use DBIx::DBStag; use FileHandle; my $moviedata = getmoviedata(); my ($hdr, @data) = process($moviedata); my $dbhstag = DBIx::DBStag->new; my $ss = <normalize($ss, \@data); print $mstruct->xml; my @movies = $mstruct->where('movie', sub { grep { $_->sgetnode_person->sget_lname eq 'coen' } shift->getnode_dir }); ok(@movies == 1); my $movie = shift @movies; ok($movie->sget_name eq 'barton fink'); my @characters = $movie->get_character; ok(@characters == 2); my $schema = Data::Stag->from('sxprstr', $ss); my $astruct = $schema->sgetnode_aliases; $astruct->addnode_alias(Data::Stag->new(alias=>[ [name=>'movie'], [table=>'work']])); print $schema->sxpr; my $dirstruct = $dbhstag->normalize(-schema=>$schema, -rows=>\@data, -top=>"dirset", -aliaspolicy=>'n', -nesting=>"'(dirset(dir(movie(star(character 1)))))"); print $dirstruct->sxpr, "\n"; my @dirs = $dirstruct->getnode_dir; ok(@dirs == 6); my @coens = grep {$_->sgetnode_person->sget_lname eq 'coen'} @dirs; ok(@coens == 2); exit 0; # sub process{ my $data = shift; my @data = map {chomp;[split(/\,\s*/, $_)]} split(/\n/,$data); # first line is header line my $hdr = shift @data; $hdr->[0] =~ s/^\#//; return ($hdr, @data); } sub getmoviedata { return < 1; } use DBIx::DBStag; use FileHandle; my $moviedata = getmoviedata(); my ($hdr, @data) = process($moviedata); my $dbhstag = DBIx::DBStag->new; my $ss = <from('sxprstr', $ss); #$schema->unset_alias; print $schema->sxpr; my $dirstruct = $dbhstag->normalize(-schema=>$schema, -rows=>\@data, -top=>"dirset", -aliaspolicy=>'n', -nesting=>"'(dirset(dir(movie(star(character 1)))))"); print $dirstruct->sxpr; ($hdr, @data) = process(getanimaldata()); my $struct = $dbhstag->normalize(-top=>"animal-set", -rows=>\@data, -cols=>$hdr, -nesting=>"'(rel 1)"); print $struct->xml; ok(1); exit 0; # sub process{ my $data = shift; my @data = map {chomp;[split(/\,\s*/, $_)]} split(/\n/,$data); # first line is header line my $hdr = shift @data; $hdr->[0] =~ s/^\#//; return ($hdr, @data); } sub getmoviedata { return < 1; } use DBIx::DBStag; use FileHandle; my $moviedata = getmoviedata(); my ($hdr, @data) = process($moviedata); my $dbhstag = DBIx::DBStag->new; # by director my $dirstruct = $dbhstag->normalize(-top=>"director-set", -rows=>\@data, -cols=>$hdr, -nesting=>"(director(film(actor(character 1))))"); print $dirstruct->xml; my $ss = <normalize($ss, \@data); print $mstruct->xml; ($hdr, @data) = process(getanimaldata()); my $struct = $dbhstag->normalize(-top=>"animal-set", -rows=>\@data, -cols=>$hdr, -nesting=>"'(rel 1)"); print $struct->xml; ok(1); exit 0; # sub process{ my $data = shift; my @data = map {chomp;[split(/\,\s*/, $_)]} split(/\n/,$data); # first line is header line my $hdr = shift @data; $hdr->[0] =~ s/^\#//; return ($hdr, @data); } sub getmoviedata { return < 26; } use DBIx::DBStag; use FileHandle; use strict; my $dbh = DBIx::DBStag->new; if (1) { my $sql = q[ SELECT avg(abs(exon.start-exon.end)) AS av FROM x ]; my $s = $dbh->parser ->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 1); ok($cols[0]->get_alias eq 'av'); my $f = $s->get_from; my @tbls = sort map {$_->get_name} $f->find_leaf; print "T=@tbls\n"; ok("@tbls" eq "x"); } if (1) { my $sql = q[ SELECT avg(abs(y)) AS av FROM x ]; my $s = $dbh->parser ->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 1); ok($cols[0]->get_alias eq 'av'); my $f = $s->get_from; my @tbls = sort map {$_->get_name} $f->find_leaf; print "T=@tbls\n"; ok("@tbls" eq "x"); } if (1) { my $sql = q[ SELECT * FROM f_type NATURAL JOIN featureloc INNER JOIN feature_relationship ON (f_type.feature_id = objfeature_id) NATURAL LEFT OUTER JOIN dbxref WHERE name = 'CG17018' ]; my $s = $dbh->parser ->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 1); ok($cols[0]->get_name eq '*'); my $f = $s->get_from; my @tbls = sort map {$_->get_name} $f->find_leaf; print "T=@tbls\n"; ok(@tbls == 4); } if (1) { my $sql = q[ SELECT * FROM dna INNER JOIN contig USING (dna_id) NATURAL JOIN z WHERE contig_id = 5 ]; my $s = $dbh->parser ->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 1); ok($cols[0]->get_name eq '*'); my $f = $s->get_from; my @tbls = sort map {$_->get_name} $f->find_leaf; print "T=@tbls\n"; ok(@tbls == 3); } if (1) { my $sql = q[ SELECT * FROM dna INNER JOIN contig USING (dna_id) INNER JOIN clone USING (clone_id) WHERE contig_id = 5 ]; my $s = $dbh->parser ->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 1); ok($cols[0]->get_name eq '*'); my $f = $s->get_from; my @tbls = sort map {$_->get_name} $f->find_leaf; print "T=@tbls\n"; ok(@tbls == 3); } if (1) { my $sql = q[ SELECT * FROM x NATURAL JOIN y ]; my $s = $dbh->parser->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 1); ok($cols[0]->get_name eq '*'); my $f = $s->get_from; my @tbls = sort map {$_->get_name} $f->find_leaf; print "@tbls\n"; ok("@tbls" eq "x y"); } if (1) { my $sql = q[ SELECT a, b AS y FROM x ]; my $s = $dbh->parser->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 2); ok($cols[0]->get_name eq 'a'); my $f = $s->get_from; } if (1) { my $sql = q[ SELECT somefunc(x.foo), func2(bar), func3(y) AS r FROM x ]; my $s = $dbh->parser->selectstmt($sql); print $s->sxpr; my @cols = $s->get_cols->get_col; ok(@cols == 3); ok($cols[0]->get_func->get_name eq 'somefunc'); ok($cols[0]->get_func->get_args->get_col->get_name eq 'x.foo'); ok($cols[1]->get_func->get_args->get_col->get_name eq 'bar'); } if (0) { # TODO - expressions my $sql = q[ SELECT 5+3 FROM x ]; my $s = $dbh->parser->selectstmt($sql); # print $s->sxpr; } if (1) { my $sql = q[ SELECT transcript.name, transcript_loc.nbeg, transcript_loc.nend, exon.name, exon_loc.nbeg, exon_loc.nend FROM feature_relationship INNER JOIN f_type AS transcript ON (feature_relationship.subjfeature_id = transcript.feature_id) INNER JOIN featureloc AS transcript_loc ON (transcript_loc.feature_id = transcript.feature_id) INNER JOIN f_type AS exon ON (feature_relationship.objfeature_id = exon.feature_id) INNER JOIN featureloc AS exon_loc ON (exon_loc.feature_id = exon.feature_id) WHERE transcript.type = 'transcript' AND exon.type = 'exon' AND transcript.name = 'CG12345-RA'; ]; my $s = $dbh->parser->selectstmt($sql); print $s->sxpr; ok(1); } if (1) { my $sql = q[ SELECT F1.feature_id, F1.dbxrefstr, FL1.nbeg, FL1.nend FROM feature AS F2 INNER JOIN featureloc AS FL2 ON (F2.feature_id = FL2.feature_id), feature AS F1 INNER JOIN featureloc AS FL1 ON (F1.feature_id = FL1.feature_id) WHERE FL1.nbeg >= FL2.nbeg AND FL1.nend <= FL2.nend and F2.feature_id = 47 and FL2.srcfeature_id = FL1.srcfeature_id and F1.dbxrefstr != ''; ]; # q[ # SELECT # F1.feature_id, F1.dbxrefstr, FL1.nbeg, FL1.nend # FROM # feature F2 # INNER JOIN # featureloc FL2 ON(F2.feature_id = FL2.feature_id), # feature F1 # INNER JOIN # featureloc FL1 ON (F1.feature_id = FL1.feature_id) # WHERE # FL1.nbeg >= FL2.nbeg AND FL1.nend <= FL2.nend # and F2.feature_id = 47 and FL2.srcfeature_id = # FL1.srcfeature_id and F1.dbxrefstr != ''; # ]; my $s = $dbh->parser->selectstmt($sql); print $s->sxpr; ok(1); } DBIx-DBStag-0.12/t/sql.x0000644000076500000240000000273711326157220013364 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; plan tests => 1; } use DBIx::DBIStag; use Data::Stag; use FileHandle; use Parse::RecDescent; $::RD_AUTOACTION = q { [@item] }; #$::RD_TRACE = 1; my $parser = Parse::RecDescent->new(selectgrammar()); #my $x = # $parser->selectstmt(q[ # SELECT DISTINCT a, b, count(c) AS nn FROM (p NATURAL JOIN q) INNER JOIN y ON (x.a=y.b AND c=d AND f like y) WHERE r>7 or a like 't%' GROUP BY uu ORDER BY q, i LIMIT 20 # ] # ); #my $x = $parser->bool_expr("a like 'b' or !(c != d)"); my $x = $parser->selectstmt(q[ SELECT srcseq.*, gene.*, transcript.*, exon.* FROM seq AS srcseq INNER JOIN seqfeature AS gene ON (gene.src_seq_id = srcseq.id) INNER JOIN sf_produces_sf ON gene.id = produced_by_sf_id) INNER JOIN seqfeature AS transcript ON (transcript.id = produces_sf_id) INNER JOIN exon_rank ON (transcript.id = transcript_sf_id) INNER JOIN seqfeature AS exon ON (exon.id = exon_sf_id) LIMIT 20 ]);; #my $x = $parser->selectstmt(q[SELECT seqfeature.* FROM seqfeature LIMIT 10]); use Data::Dumper; #print Dumper $x; print $x->sxpr; sub selectgrammar { return DBIx::DBIStag->selectgrammar; } DBIx-DBStag-0.12/t/store1.t0000644000076500000240000000314011326157220013763 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 2; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; my $ddl = <trace(1); $dbh->do(cvtddl($ddl)); my $personset = Data::Stag->from('sxprstr', $data); my @persons = $personset->getnode_person; foreach (@persons) { $dbh->storenode($_); } our $NEW_ADDRESS = "some new address"; my $rset = $dbh->selectall_stag("SELECT * FROM person WHERE fname = 'joe'", ); my $joe = $rset->getnode_person; my $OLD_ADDRESS = $joe->get_address; $joe->set_address($NEW_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sget_address eq $NEW_ADDRESS); $joe->unset_person_id; $joe->set_address($OLD_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sget_address eq $OLD_ADDRESS); $dbh->disconnect; DBIx-DBStag-0.12/t/store2.t0000644000076500000240000000433411326157220013772 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 2; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; drop(qw( person address)); my $ddl = <trace(1); $dbh->do($ddl); $dbh->mapping([ Data::Stag->from('sxprstr', '(map (table "person") (col "address_id") (fktable "address") (fkcol "address_id"))') ]); my $personset = Data::Stag->from('sxprstr', $data); my @persons = $personset->getnode_person; foreach (@persons) { $dbh->storenode($_); } our $NEW_ADDRESS = "some new address"; my $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); print $rset->sxpr; my $joe = $rset->getnode_person; my $OLD_ADDRESS = $joe->sgetnode_address->sget_addressline; $joe->sgetnode_address->set_addressline($NEW_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sgetnode_address->sget_addressline eq $NEW_ADDRESS); $joe->unset_person_id; $joe->sgetnode_address->set_addressline($OLD_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sgetnode_address->sget_addressline eq $OLD_ADDRESS); $dbh->disconnect; DBIx-DBStag-0.12/t/store2b.t0000644000076500000240000000407311326157220014134 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 2; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; drop qw( person address); my $ddl = <trace(1); $dbh->do($ddl); $dbh->guess_mapping; my $personset = Data::Stag->from('sxprstr', $data); my @persons = $personset->getnode_person; foreach (@persons) { $dbh->storenode($_); } our $NEW_ADDRESS = "some new address"; my $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); print $rset->sxpr; my $joe = $rset->getnode_person; my $OLD_ADDRESS = $joe->sgetnode_address->sget_addressline; $joe->sgetnode_address->set_addressline($NEW_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sgetnode_address->sget_addressline eq $NEW_ADDRESS); $joe->unset_person_id; $joe->sgetnode_address->set_addressline($OLD_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sgetnode_address->sget_addressline eq $OLD_ADDRESS); $dbh->disconnect; DBIx-DBStag-0.12/t/store3.t0000644000076500000240000000552511326157220013776 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 2; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; drop qw(person address ); my $ddl = <trace(1); $dbh->do($ddl); $dbh->mapping([ Data::Stag->from('sxprstr', '(map (table "person") (col "homeaddress_id") (fktable_alias "homeaddress") (fkcol "address_id") (fktable "address"))'), Data::Stag->from('sxprstr', '(map (table "person") (col "workaddress_id") (fktable_alias "workaddress") (fkcol "address_id") (fktable "address"))'), ]); my $personset = Data::Stag->from('sxprstr', $data); my @persons = $personset->getnode_person; foreach (@persons) { $dbh->storenode($_); } my $query = "SELECT * FROM person INNER JOIN address AS homeaddress ON (person.homeaddress_id = homeaddress.address_id) WHERE fname = 'joe'"; our $NEW_ADDRESS = "some new address"; my $rset = $dbh->selectall_stag($query ); print $rset->sxpr; my $joe = $rset->getnode_person; print $joe->sxpr; my $OLD_ADDRESS = $joe->sgetnode_homeaddress->sgetnode_address->sget_addressline; $joe->sgetnode_homeaddress->sgetnode_address->set_addressline($NEW_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag($query ); $joe = $rset->getnode_person; ok($joe->sgetnode_homeaddress->sgetnode_address->sget_addressline eq $NEW_ADDRESS); $joe->unset_person_id; $joe->sgetnode_homeaddress->sgetnode_address->set_addressline($OLD_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag($query ); $joe = $rset->getnode_person; ok($joe->sgetnode_homeaddress->sgetnode_address->sget_addressline eq $OLD_ADDRESS); $dbh->disconnect; DBIx-DBStag-0.12/t/store4.t0000644000076500000240000000566711326157220014006 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 4; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; drop qw( person2address person address ); my $ddl = <trace(1); $dbh->do($ddl); $dbh->trust_primary_key_values(1); $dbh->guess_mapping; my $personset = Data::Stag->from('sxprstr', $data); $dbh->linking_tables(person2address => [qw(person address)]); #$dbh->add_linking_tables($personset); #die $personset->sxpr; my @persons = $personset->getnode_person; foreach (@persons) { $dbh->storenode($_); } my @q = ("SELECT person.*, address.* FROM person NATURAL JOIN person2address NATURAL JOIN address WHERE person.fname = 'joe' ORDER BY addressline", "(personset(person(address 1)))"); my $rset = $dbh->selectall_stag(@q ); print $rset->sxpr; my $joe = $rset->getnode_person; my $first_address = $joe->sgetnode_address; my $OLD_ADDRESS = $first_address->sget_addressline; our $NEW_ADDRESS = $OLD_ADDRESS . "; appartment C"; $first_address->set_addressline($NEW_ADDRESS); print "added appt\n"; print $joe->sxpr, "\n"; $dbh->storenode($joe); $rset = $dbh->selectall_stag(@q ); $joe = $rset->getnode_person; my @addresses = $joe->get_address; ok(@addresses == 2); ok($joe->sgetnode_address->sget_addressline eq $NEW_ADDRESS); $joe->unset_person_id; $joe->sgetnode_address->set_addressline($OLD_ADDRESS); print "unset person_id\n"; print $joe->sxpr, "\n"; $dbh->storenode($joe); $rset = $dbh->selectall_stag(@q ); $joe = $rset->getnode_person; print $joe->sxpr, "\n"; ok($joe->sget_address->sget_addressline eq $OLD_ADDRESS); $rset = $dbh->selectall_stag(@q ); print $rset->sxpr; $joe = $rset->getnode_person; $joe->set_lname('bliggs'); $dbh->storenode($joe); $rset = $dbh->selectall_stag(@q ); print $rset->sxpr; ok($rset->get_person->get_lname eq 'bliggs'); $dbh->disconnect; DBIx-DBStag-0.12/t/store5.t0000644000076500000240000000471111326157220013774 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 4; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; drop qw( person2address person address ); my $ddl = <do($ddl); my $set = Data::Stag->from('sxprstr', $data); my @nodes = $set->kids; $dbh->is_caching_on('person',1); foreach (@nodes) { $dbh->storenode($_); } my $aset = $dbh->selectall_stag("SELECT address.*, person.* FROM address NATURAL JOIN person2address NATURAL JOIN person WHERE addressline = '10 Downing Street'"); print $aset->xml; my @addresses = $aset->get_address; ok(@addresses==1); my $address = shift @addresses; my @persons = $address->get_person; ok(@persons==2); ok(grep {$_->get_lname eq "attlee"} @persons); ok(grep {$_->get_lname eq "churchill"} @persons); $dbh->disconnect; DBIx-DBStag-0.12/t/template1.x0000644000076500000240000000241611326157220014453 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 4; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; drop(); my $f = "t/data/bond.el"; my $spydata = Data::Stag->parse($f); my $dbh = dbh(); my $ddl = $dbh->autoddl($spydata); $dbh->do($ddl); my @kids = $spydata->kids; foreach (@kids) { $dbh->storenode($_); } $dbh->add_template(agent => q/ SELECT * FROM agent NATURAL JOIN mission NATURAL JOIN mission_gizmo NATURAL JOIN mission_to_person NATURAL JOIN person WHERE lastname = ? /); $dbh->add_template(agent_by_mission => q/ SELECT * FROM agent NATURAL JOIN mission NATURAL JOIN mission_gizmo NATURAL JOIN mission_to_person NATURAL JOIN person WHERE agent_id IN (SELECT agent_id FROM MISSION WHERE codename = ?) /); # use 'agent' template my $bond = $dbh->fetch_agent("Bond"); $bond = $dbh->fetch_agent("mission.codename" => "goldfinger"); # use 'agent_by_mission' template $bond = $dbh->fetch_agent_by_mission(codename => "goldfinger"); $dbh->disconnect; DBIx-DBStag-0.12/t/tstore2.x0000644000076500000240000000377611326157220014173 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBIStagTest; plan tests => 2; } use DBIx::DBIStag; use DBI; use Data::Stag; use FileHandle; my $ddl = <trace(1); $dbh->do($ddl); $dbh->guess_mapping; my $personset = Data::Stag->from('sxprstr', $data); my @persons = $personset->getnode_person; foreach (@persons) { $dbh->storenode($_); } our $NEW_ADDRESS = "some new address"; my $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); print $rset->sxpr; my $joe = $rset->getnode_person; my $OLD_ADDRESS = $joe->sgetnode_address->sget_addressline; $joe->sgetnode_address->set_addressline($NEW_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sgetnode_address->sget_addressline eq $NEW_ADDRESS); $joe->unset_person_id; $joe->sgetnode_address->set_addressline($OLD_ADDRESS); $dbh->storenode($joe); $rset = $dbh->selectall_stag("SELECT * FROM person NATURAL JOIN address WHERE fname = 'joe'", ); $joe = $rset->getnode_person; ok($joe->sgetnode_address->sget_addressline eq $OLD_ADDRESS); DBIx-DBStag-0.12/t/xort-style.t0000644000076500000240000000243511326157220014706 0ustar cainstaffuse lib 't'; BEGIN { # to handle systems with no installed Test module # we include the t dir (where a copy of Test.pm is located) # as a fallback eval { require Test; }; use Test; use DBStagTest; plan tests => 3; } use DBIx::DBStag; use DBI; use Data::Stag; use FileHandle; use strict; drop(); my $dbh = connect_to_cleandb(); my $f = "parts-schema.sql"; open(F, "t/data/$f") || die; my $ddl = join('',); close(F); $dbh->do($ddl); $f = "parts-data.xml"; print "parsing..\n"; my $chado = Data::Stag->parse("t/data/$f"); print "parsed; now storing..\n"; $dbh->storenode($_) foreach $chado->subnodes; ok(1); my $cset = $dbh->selectall_stag(q[ SELECT * FROM component LEFT OUTER JOIN part_of ON (component.component_id=part_of.object_id) LEFT OUTER JOIN component AS c2 ON (c2.component_id=part_of.subject_id) USE NESTING (set(component(part_of(c2)))) ]); print $cset->xml; my @cs = $cset->get_component; ok(@cs,5); $cset = $dbh->selectall_stag(q[ SELECT component.* FROM component INNER JOIN part_of ON (component.component_id=part_of.subject_id) INNER JOIN component AS c2 ON (c2.component_id=part_of.object_id) WHERE c2.name='1b' ]); print $cset->xml; my @names = sort $cset->find_name; print "names=@names\n"; ok("@names", "1b-I 1b-II"); $dbh->disconnect;