DBIx-DBStag-0.12/ 0000755 0000765 0000024 00000000000 11331570203 012114 5 ustar cain staff DBIx-DBStag-0.12/cgi-bin/ 0000755 0000765 0000024 00000000000 11331570203 013424 5 ustar cain staff DBIx-DBStag-0.12/cgi-bin/ubiq.cgi 0000755 0000765 0000024 00000050441 11326157220 015063 0 ustar cain staff #!/usr/local/bin/perl -w
BEGIN{
eval{do "dbenv.pl"};
die $@ if $@;
}; # end of sub:
use strict;
use lib split(/:/, $ENV{STAGLIB} || '');
use IO::String;
use DBIx::DBStag;
use CGI qw/:standard/;
use vars qw(%IS_FORMAT_FLAT $cscheme);
#$ENV{DBSTAG_TRACE}=1;
# --------------------------
# MAIN
ubiq();
exit 0;
# --------------------------
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# ubiq
#
# This is the core function. It does everything
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub ubiq {
# =============================================
# DECLARE VARIABLES
# note: the functions below are lexically closed
# and can thus access these variables.
#
# if you're not familiar with closures you might
# find this a bit confusing...
# =============================================
%IS_FORMAT_FLAT =
map {$_=>1} qw(flat-CSV flat-TSV flat-HTML-table);
$cscheme =
{
'keyword'=>'cyan',
'variable'=>'magenta',
'text' => 'reset',
'comment' => 'red',
'block' => 'blue',
'property' => 'green',
};
my $cgi = CGI->new;
my $sdbh =
DBIx::DBStag->new;
# child dbh
my $dbh;
my $stag;
my $res;
my $schema;
my $loc;
my $templates = [];
my $varnames = [];
my $example_input = {};
my $options = {};
my $nesting = '';
my $rows;
my $template;
my $template_name = '';
my %exec_argh = ();
my $resources = $sdbh->resources_list;
my $resources_hash = $sdbh->resources_hash;
my @dbresl = grep {$_->{type} eq 'rdb'} @$resources;
my @dbnames = (map {$_->{name}} @dbresl);
my $W = Data::Stag->getformathandler('sxpr');
my $ofh = \*STDOUT;
my $format;
my $dbname;
my $errmsg = '';
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# keep
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub keep; #
*keep = sub {
join('&',
map {"$_=".param(myescapeHTML($_))} grep {param($_)} qw(dbname template format save mode));
}; # end of sub: keep
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# url
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub url; #
*url = sub {
my $base = shift;
my %p = @_;
%p = map {
my $v = param($_);
$p{$_} ?
($_ => $p{$_}) :
($v ? ($_=>$v) : ());
} (keys %p, qw(dbname template format save mode));
return "$base?".
join('&',
map {"$_=".$p{$_}} keys %p);
}; # end of sub: url
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# conn
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub conn; #
*conn = sub {
$dbh = DBIx::DBStag->connect($dbname) unless $dbh;
}; # end of sub: conn
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# is_format_flat
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub is_format_flat; #
*is_format_flat = sub {
# my $f = shift;
$IS_FORMAT_FLAT{$format};
}; # end of sub: is_format_flat
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# BASIC LAYOUT
#
# headers, footers, help, etc
#
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# g_title
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub g_title; #
*g_title = sub {
"U * B * I * Q";
}; # end of sub: g_title
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# short_intro
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub short_intro; #
*short_intro = sub {
"This is the generic UBIQ interface";
}; # end of sub: short_intro
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# top_of_page
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub top_of_page; #
*top_of_page = sub {
(h1(g_title),
href("ubiq.cgi", "Ubiq"),
' | ',
href("ubiq.cgi?help=1", "Help"),
br,
href('#templates', '>>Templates'),
br,
short_intro,
hr,
);
}; # end of sub: top_of_page
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# footer
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub footer; #
*footer = sub {
(hr,
href('http://stag.sourceforge.net'),
br,
myfont('$Id: ubiq.cgi,v 1.8 2004/04/12 18:23:10 cmungall Exp $x',
(size=>-2)),
);
}; # end of sub: footer
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# VIEW WIDGETS
#
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# template_detail
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub template_detail; #
*template_detail = sub {
my $templates = shift;
my @tbls =
map {
my $io = IO::String->new;
$_->show($io, $cscheme, \&htmlcolor);
my $sr = $io->string_ref;
('name),
table({-border=>1},
Tr(
[td(["$$sr
"])])))
} @$templates;
return ''.join("\n", @tbls);
}; # end of sub: template_detail
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# stag_detail
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub stag_detail; #
*stag_detail = sub {
# my $W = Data::Stag->getformathandler($format || 'sxpr');
# $stag->events($W);
# my $out = $W->popbuffer;
my $out = $stag->generate(-fmt=>$format);
return resultbox($out);
}; # end of sub: stag_detail
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# rows_detail
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub rows_detail; #
*rows_detail = sub {
if ($format eq 'flat-HTML-table') {
my $hdr = shift @$rows;
h2('Results').
table({-border=>1, -bgcolor=>'yellow'},
Tr({},
[th([@$hdr]),
map {td([map {colval2cell($_)} @$_])} @$rows]));
} else {
my $j = "\t";
if ($format eq 'flat-CSV') {
$j = ',';
}
my $out = join("\n",
map {
join($j,
map {escape($_, ("\n"=>'\n', $j=>"\\$j"))} @$_)
} @$rows);
resultbox($out);
}
}; # end of sub: rows_detail
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# query_results
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub query_results; #
*query_results = sub {
(
($stag ? stag_detail() : ''),
($rows ? rows_detail() : ''),
);
}; # end of sub: query_results
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# CHOOSERS
#
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# template_chooser
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub template_chooser; #
*template_chooser = sub {
#my $templates = shift;
return
table(Tr({-valign=>"TOP"},
[
map {
my $is_selected = $_->name eq $template_name;
my $h = {};
if ($is_selected) {
$h = {bgcolor=>'red'}
}
my $desc = $_->desc;
my $name = $_->name;
my $nl = "\n";
$desc =~ s/\n/\
/gs;
td($h,
[
href("#$name", '[scroll]'),
# href("#$name", '[view]'),
href(url('ubiq.cgi', (template=>$name)),
strong($name)),
$desc.hr,
])
} @$templates,
]));
}; # end of sub: template_chooser
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# attr_settings
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub attr_settings; #
*attr_settings = sub {
return unless $template;
my @vals = ();
my @popups = ();
my @extra = ();
my $basic_tbl =
table(Tr({},
[
map {
my $examples = '';
my $ei = $example_input->{$_} || [];
while (length("@$ei") > 100) {
pop @$ei;
}
if (@$ei) {
$examples = " Examples: ".em(join(', ', @$ei));
}
td([$_, textfield("attr_$_").$examples])
} @$varnames
]));
my $adv_tbl =
table(Tr({},
[td([
join(br,
"Override SQL SELECT:",
textarea(-name=>'select',
-cols=>80,
),
"Override SQL WHERE:",
textarea(-name=>'where',
-cols=>80,
),
"Override Full SQL Query:",
textarea(-name=>'sql',
-cols=>80,
),
"Use nesting hierarchy:",
textarea(-name=>'nesting',
-cols=>80,
),
)
])]));
return
(
hr,
"Selected Template: ",
strong($template_name),
br,
submit(-name=>'submit',
-value=>'exectemplate'),
$basic_tbl,
$adv_tbl,
# table({-border=>1},
# Tr({-valign=>"TOP"},
# [td([
# ])])),
("Tree/Flat format: ",
popup_menu(-name=>'format',
-values=>[qw(sxpr itext XML nested-HTML flat-TSV flat-CSV flat-HTML-table)]),
checkbox(-name=>'save',
-value=>1,
-label=>'Save Results to Disk'),
' ',
checkbox(-name=>'showsql',
-value=>1,
-label=>'Show SQL Statement'),
),
br,
submit(-name=>'submit',
-value=>'exectemplate'),
hr);
}; # end of sub: attr_settings
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# SETTERS
#
# these set variables depending on users selections
#
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# setdb
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub setdb; #
*setdb = sub {
#$dbname = shift;
return unless $dbname;
msg("Set dbname to $dbname");
$res = $resources_hash->{$dbname};
if ($res) {
$schema = $res->{schema} || '';
$loc = $res->{loc} || '';
msg("loc: $loc") if $loc;
if ($schema) {
$templates = $sdbh->find_templates_by_schema($schema);
msg("schema: $schema");
} else {
msg("schema not known; templates unrestricted");
$templates = $sdbh->template_list;
}
msg("Templates available: " . scalar(@$templates));
} else {
warnmsg("Unknown $dbname");
}
$res;
}; # end of sub: setdb
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# settemplate
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub settemplate; #
*settemplate = sub {
my $n = shift;
my @matches = grep {$_->name eq $n} @$templates;
die "looking for $n, got @matches" unless @matches == 1;
$template = shift @matches;
$varnames = $template->get_varnames;
conn;
my $cachef = "./cache/cache-$dbname-$n";
$example_input = $template->get_example_input($dbh,
$cachef,
1);
system("chmod 777 $cachef");
$template_name = $n;
}; # end of sub: settemplate
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# resultbox
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub resultbox; #
*resultbox = sub {
my $out = shift;
if (param('save')) {
return $out;
}
h2('Results').
table({-border=>1},
Tr({},
td({bgcolor=>"yellow"},["$out
"])));
}; # end of sub: resultbox
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# msg
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub msg; #
*msg = sub {
}; # end of sub: msg
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# htmlcolor
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub htmlcolor; #
*htmlcolor = sub {
my $c = shift;
if ($c eq 'reset') {
'';
} else {
"";
}
}; # end of sub: htmlcolor
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# display_helppage
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub display_helppage; #
*display_helppage = sub {
print(header,
start_html("UBIQ: Instructions"),
h1("UBIQ: Instructions for use"),
h3("What is this?"),
p("UBIQ is a generic interface to any relational database.",
"It allows web-based queries either through SQL or an",
"extensible set of",strong("SQL Templates"),
"which must be defined for the database of interest."),
p("UBIQ will take the SQL query results and make a",
strong("hierarchical"), "data structure which is displayed",
"in a format such as XML or indented text"),
p("This is achieved using the ",
href("http://stag.sourceforge.net", "DBStag"),
"perl module"),
p("UBIQ is intended for advanced, generic queries.",
"If you want user-friendly queries you should use an",
"interface that has been custom-designed for the database",
"you are interested in."),
h3("Using UBIQ"),
p("First of all select the database of interest and",
"click 'selectdb'. (There may only be one database,",
"in which case you can skip this part)."),
p("Next, choose a template from the list available for",
"That database. Each template should have a description of",
"what kind of query it is. You can also scroll down to the full",
"SQL Template definition. For a description of the SQL Template",
"syntax, see",
href("http://stag.sourceforge.net", "Stag Documentation"),
),
p("After you have selected a template, you can paste in settings",
"for template attributes.",
"The character '*' gets treated as a wildcard.",
),
p("You can now choose a format for the results.",
"Most of the formats are ", strong("hierarchical."),
"if a hierarchical format is selected, then UBIQ will",
"perform a transformation on the flat, tabular query results",
"and build a tree-type structure that should reflect the",
"natural organisation of the data."),
p("Hierarchical formats are XML, sxpr (Lisp S-Expressions),",
"itext (indented text)."),
p("Non-hierarchical formats are tables of comma/tab seperated fields,",
"which can optionally formatted into an HTML table"),
p("You can also choose to see the actual SQL that gets executed"),
p("When you have set the parameters, you can execute the template"),
p(em("Note"), "As yet, UBIQ has no means of prioritising queries,",
"it is possible to launcg queries that put a large load on the",
"server, please be careful"),
p(
"If you receive an internal server error it probably means your query was terminated",
"because it was not fully constrained. If this happens, pass in more constraints.",
"DO NOT keep hitting reload - this will cause the database server to slow down.",
"If this service becomes overloaded, it will have to be removed"
),
h3("Advanced use"),
p("Yes, a SOAP interface would be nice. No plans as yet.",
),
p(href("ubiq.cgi", "Start UBIQ")),
);
}; # end of sub: display_helppage
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# display_htmlpage
#
# MAIN PAGE
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub display_htmlpage; #
*display_htmlpage = sub {
print(
header,
start_html(g_title),
top_of_page,
start_form(-action=>'ubiq.cgi', -method=>'GET'),
# DATABASE SELECTION
("Database",
popup_menu(-name=>'dbname',
-values=>[sort {$a cmp $b} @dbnames],
-onChange=>"submit()",
),
submit(-name=>'submit',
-value=>"selectdb")),
# QUERY RESULTS - if present
(query_results),
# ERRORS - if present
($errmsg),
# ATTRIBUTE CHOOSER - if template is set
(attr_settings(),
($template ? template_detail([$template]) : ''),
hr),
# TEMPLATE CHOOSER
(h3("Choose a template:"),
template_chooser,
hr),
# TEMPLATES - all or just selected
($template ? '' : template_detail($templates)),
# PERSISTENT VARS
hidden('template', param('template')),
end_form,
footer,
);
}; # end of sub: display_htmlpage
# ================================
#
# SETTING THINGS UP
#
# ================================
my @initfuncs = ();
*add_initfunc = sub {
push(@initfuncs, shift);
};
add_initfunc(sub {
$format = param('format') || 'sxpr';
$dbname = param('dbname');
if (@dbnames == 1) {
# only one to choose from; autoselect
$dbname = $dbnames[0];
}
setdb; # sets $dbh
# sets $template $varnames
settemplate(param('template'))
if param('template') && param('submit') ne 'selectdb';
# set variable bindings
foreach (@$varnames) {
my $v = param("attr_$_");
if ($v) {
$v =~ s/\*/\%/g;
$exec_argh{$_} = $v;
}
}
});
if (-f 'ubiq-customize.pl') {
eval `cat ubiq-customize.pl`;
die $@ if $@;
}
$_->() foreach @initfuncs;
# execute query
if ($template && param('submit') eq 'exectemplate') {
eval {
conn();
my $no_query_params_set = !scalar(keys %exec_argh);
if (param('where')) {
$template->set_clause(where=>param('where'));
$no_query_params_set = 0;
}
if (param('select')) {
$template->set_clause(where=>param('select'));
}
if ($no_query_params_set) {
$errmsg = h2("No Query Constraints Set");
}
else {
# kill killer queries
my $tag = "kill$$"."TAG";
my $tagf = "/tmp/$tag";
my $t=time;
print STDERR "Launched killer $tagf at $t\n";
system("touch $tagf && chmod 777 $tagf && sleep 15 && test -f $tagf && kill -9 $$ && rm $tagf &");
if (is_format_flat) {
$rows =
$dbh->selectall_rows(
-template=>$template,
-bind=>\%exec_argh,
);
} else {
$stag =
$dbh->selectall_stag(
-template=>$template,
-bind=>\%exec_argh,
-nesting=>$nesting,
);
}
# inactivate killer (killer only kills if $tagf is present)
$t=time;
print STDERR "Inactivated killer $tagf at $t\n";
system("rm $tagf &");
}
};
if ($@) {
my $err = $@;
$errmsg =
br.strong("Database Error:")."$err
";
}
}
# WRITE HTML TO BROWSER
if (param('save')) {
# WRITE TO FILE
print(header({-type=>"text/text"}),
query_results);
}
if (param('help')) {
# WRITE TO FILE
display_helppage
}
else {
# WRITE HTML
display_htmlpage;
}
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# CGI UTILITY FUNCTIONS
#
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# href
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub href {
my $url = shift;
my $n = shift || $url;
"$n";
} # end of sub: href
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# myfont
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub myfont ($%) {
my $str = shift;
my %h = @_;
sprintf("$str",
join(' ',
map {sprintf('%s="%s"',
$_, $h{$_})} keys %h));
} # end of sub: myfont
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# escape
#
# escapes characters using a map
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub escape ($@) {
my $s = shift || '';
my %cmap = @_;
$cmap{'\\'} = '\\\\';
my @from = keys %cmap;
my @to = map{$cmap{$_}} @from;
my $f = join('', @from);
my $t = join('', @to);
$s =~ tr/$f/$t/;
$s;
} # end of sub: escape
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# myescapeHTML
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub myescapeHTML ($) {
my $s = shift;
return $s;
} # end of sub: myescapeHTML
# ++++++++++++++++++++++++++++++++++++++++++++++++++
# colval2cell
#
#
# ++++++++++++++++++++++++++++++++++++++++++++++++++
sub colval2cell ($) {
my $cell = shift;
if (!defined($cell)) {
return 'NULL';
}
$cell;
} # end of sub: colval2cell
DBIx-DBStag-0.12/Changes 0000644 0000765 0000024 00000005441 11326157220 013417 0 ustar cain staff Changelog for DBIx::DBStag
Version 0.01
============
2003-04-20 - Initial Release
Version 0.02
============
2004-04-01 - Addition of Templates
- Use of double underscore to force columns into table
Version 0.03
============
2004-04-02 - Problem with 0.02 distribution (accidentally included blib)
Version 0.04
============
2004-07-15 - Fixed storenode() so that it works with databases
that use the primary_key="id" convention (eg GO DB)
- added code to deal with in storenode
- DBStag now used for loading data into GO DB
(http://www.godatabase.org)
Version 0.05
============
2004-10-01 - Added switches for caching and bulkloading
Version 0.06
============
2004-10-29 - Efficiency improvements
Version 0.07
============
2005-02-09 - [cjm] added cache option to storenode.pl
- escapes tabs and newlines with -rows switch
- added check-resources.pl
2005-03-06 - [cjm] added support for loading XORT-style XML
- nodes are only made dbsafe if requested
2005-03-18 - [cjm] added include_metadata option
2005-04-29 - [cjm] added tests for xort-mode: chado feature
Version 0.08
============
2005-07-15 - [cjm] removed noaliases option
replaced with aliaspolicy
[cjm] fixed IN and multivalued bindings
[cjm] stag-storenode.pl
arguments for user and password
2005-08-22 - [cjm] added stag-connect-params.pl
2005-08-26 - [cjm] Pg insert uses SEQUENCE to fetch inserted serial pk
(mysql still uses max(pk))
2005-10-28 - [cjm] DB2 DBI string fixes
[cjm] improvements to stag-qsh
2006-03-11 - [cjm] back to using last_insert_id from mysql
(now works on DBI/DBD::mysql)
Version 0.09
============
2006-08-08 - [cjm] XORT ID fix
2006-08-08 - [cjm] added more docs to stag-storenode.pl - cache options
2007-08-08 - [cjm] allowed additional DBI arguments in DBSTAG_DBIMAP_FILE
2007-08-29 - [cjm] changed to DBIx::DBSchema 0.34 interface (now requires 0.34)
Version 0.10
============
2008-02-05 - [cjm] removed default assumption that entire colset can function as UC
2008-02-05 - [cjm] use placeholders for inserts
2008-06-03 - [cjm] added missing test file to MANIFEST
Version 0.11
============
2009-12-14 - miration to github
2010-01-21 - compensated from bug(?) in DBSchema 0.38, where default values of '' were being quoted, resulting in double quoting
DBSchema 0.38 also appears to use refs for defaults. DBStag is now neutral w.r.t. version of DBSchema, it derefs if it is a reference.
DBIx-DBStag-0.12/DBIx/ 0000755 0000765 0000024 00000000000 11331570203 012702 5 ustar cain staff DBIx-DBStag-0.12/DBIx/DBStag/ 0000755 0000765 0000024 00000000000 11331570203 014006 5 ustar cain staff DBIx-DBStag-0.12/DBIx/DBStag/Constraint.pm 0000644 0000765 0000024 00000006275 11331570152 016505 0 ustar cain staff # $Id: Constraint.pm,v 1.9 2007/03/05 09:12:49 cmungall Exp $
# -------------------------------------------------------
#
# Copyright (C) 2003 Chris Mungall
#
# This module is free software.
# You may distribute this module under the same terms as perl itself
#---
# POD docs at end of file
#---
package DBIx::DBStag::Constraint;
use strict;
use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS $DEBUG $AUTOLOAD);
use Carp;
use DBI;
use Data::Stag qw(:all);
use DBIx::DBStag;
$VERSION='0.12';
sub DEBUG {
$DBIx::DBStag::DEBUG = shift if @_;
return $DBIx::DBStag::DEBUG;
}
sub trace {
my ($priority, @msg) = @_;
return unless $ENV{DBSTAG_TRACE};
print "@msg\n";
}
sub dmp {
use Data::Dumper;
print Dumper shift;
}
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
bless $self, $class;
$self->bindings({});
$self->children([]);
$self;
}
# takes a simple hash of bindings
# and makes an ANDed set of constraints
sub from_hash {
my $class = shift;
my $h = shift;
my $cons = $class->new;
$cons->bool('and');
$cons->children([
map {
my $sc = $class->new;
$sc->bindings({$_ => $h->{$_}});
$sc;
} keys %$h
]);
return $cons;
}
sub throw {
my $self = shift;
my $fmt = shift;
print STDERR "\nERROR:\n";
printf STDERR $fmt, @_;
print STDERR "\n";
confess;
}
#AND, OR, NOT
sub bool {
my $self = shift;
$self->{_bool} = shift if @_;
return $self->{_bool};
}
# eg AND(cons1, cons2, cons3)
sub children {
my $self = shift;
$self->{_children} = shift if @_;
return $self->{_children};
}
sub is_leaf {
my $self = shift;
return !scalar(@{$self->children});
}
# value to replace => in option block with
sub op {
my $self = shift;
$self->{_op} = shift if @_;
return $self->{_op};
}
# variable bindings - hash of varname => varval
sub bindings {
my $self = shift;
$self->{_bindings} = shift if @_;
return $self->{_bindings};
}
1;
__END__
=head1 NAME
DBIx::DBStag::Constraint -
=head1 SYNOPSIS
=cut
=head1 DESCRIPTION
A constraint is a recursive structure for representing query constraints;
AND --- x=1
\
\---- OR --- y>2
\
\----- boolfunc(z)
A constraint is either a bool (AND, OR, NOT) with >0 children,
or it can be a leaf node representing an atomic constraint
the constraint corresponds to a SQLTemplate option block; eg
[ name LIKE &name& ]
[ name => &name& ]
[ start > &min_start& ]
[ start => &start& ]
[ somefunc(&x&) ]
[ somefunc(&x&, &y&) ]
[ somefunc(t.col, &x&, &y&) => &z& ]
the constraint consists of an operator (represented by => in the
option block). If no => is present, then it is a simple variable binding.
A constraint can consist of multiple variable bindings
=head1 WEBSITE
L
=head1 AUTHOR
Chris Mungall >
=head1 COPYRIGHT
Copyright (c) 2003 Chris Mungall
This module is free software.
You may distribute this module under the same terms as perl itself
=cut
1;
DBIx-DBStag-0.12/DBIx/DBStag/Cookbook.pm 0000644 0000765 0000024 00000024402 11326157220 016120 0 ustar cain staff
=head1 NAME
DBIx::DBStag::Cookbook - building and querying databases from XML
=head1 SYNOPSIS
stag-autoddl.pl
stag-storenode.pl
selectall_xml.pl
=head1 DESCRIPTION
This will give an outline of how to build a normalised relational
database from XML source data, set up SQL templates, issue relational
queries that return hierarchical results (as XML or as perl objects),
and autogenerate a web query front end for this data.
Why would you want to do this? Well, it gives you the full power of
the relational model and SQL, combined with the convenience of
representations which allow for the nesting of data entities (SQL
query results are typically flat relations which are inconvenient for
complex hierarchical data).
The dataset we will use is the CIA World Factbook.
The web interface should end up looking something like this -
L
=head2 AUTOGENERATING A RELATIONAL DATABASE
Download CIA world factbook in XML format; this has kindly been made
available by The University of Goettingen as part of their Mondial
database project, see
L for details.
The actual XML file is available via
L
Or from
L
=head3 Pre-processing
We need to do some pre-processing of the XML to make it more
database-friendly. This is necessitated by the way Stag handles
attributes (Stag prefers XML documents that have a simple tree
format). We also want to turn XXX_id fields into XXX_ciaid, because we
prefer to use XXX_id for surrogate keys in the database.
stag-mogrify.pl -w xml -r 's/text$/quant/'\
-r 's/id$/ciaid/'\
-r 's/(.*)\-//'\
cia.xml > cia-pp.xml
See also
L
=head3 Generating the SQL DDL
Next we generate the SQL B statements
stag-autoddl.pl -t cia-pp2.xml cia-pp.xml > cia-schema.sql
This does further post-processing of the XML, to make it suitable for
relational storage; see the file B which is generated as
a side-effect of running the above script.
Load the database (the following instructions assume you have
postgresql on your localhost; please consult your DBMS manual if this
is not the case)
createdb cia
psql -a -e cia < cia-schema.sql >& create.log
(check there are no errors in the log file)
=head3 LOAD THE DATA
Turn the processed XML into relations:
stag-storenode.pl -d dbi:Pg:dbname=cia cia-pp2.xml >& load.log
=head2 FETCHING TREE DATA USING SQL
You can issue SQL queries (using optional stag-specific extensions)
and get the results back in a hierarchical format such as XML
=head3 SQL to XML via the command line
Fetch countries nested under continents:
selectall_xml.pl -d dbi:Pg:dbname=cia\
"SELECT * FROM continent INNER JOIN country ON (continent.name=country.continent)"
Or, edit a file containing the SQL (the following query fetches data
on countries bordering other countries on different continents)
cat > myquery.sql
select c1.*, c2.*
from country AS c1
inner join borders on (c1.country_id = borders.country_id)
inner join country AS c2 on (borders.country=c2.ciaid)
where c1.continent != c2.continent
order by c1.name, c2.name
use nesting (set(c1(c2)));
(the final clause is a DBStag SQL extension - it nests country c2
under country c1)
Then query for XML
selectall_xml.pl -d dbi:Pg:dbname=cia -f myquery.sql > myresults.xml
=head3 SQL to XML via the Interactive Query Shell
Query the data using the stag query shell (qsh). You type in SQL
queries, and get results back as XML (or any other tree format, such
as indented text or S-Expressions).
The following can be cut and pasted directly onto the unix command
line:
Simple query rooted at B:
stag-qsh -d dbi:Pg:dbname=cia
\l
SELECT * FROM country INNER JOIN country_coasts USING (country_id)
WHERE country.name = 'France';
(type \q to quit stag-qsh)
Or a more advanced query, still rooted at B
stag-qsh -d dbi:Pg:dbname=cia
\l
SELECT *
FROM country
LEFT OUTER JOIN religions USING (country_id)
LEFT OUTER JOIN languages USING (country_id)
INNER JOIN continent ON (continent.name=country.continent)
WHERE continent.ciaid = 'australia'
USE NESTING (set(country(religions)(languages)(continent)));
See L for more details on fetching hierarchical data
from relational database
=head2 USING TEMPLATES
If you have a particular pattern of SQL you execute a lot, you can
reuse this SQL by creating B
=head3 Creating Templates
First create a place for your templates:
mkdir ./templates
(do not change directory after this)
The following command specifies a colon-separated path for directories
containing templates (all templates must end with .stg)
setenv DBSTAG_TEMPLATE_DIRS ".:templates:/usr/local/share/sql/templates"
Auto-generate templates (you can customize these later):
stag-autoschema.pl -w sxpr cia-pp2.xml > cia-stagschema.sxpr
stag-autotemplate.pl -no_pp -s cia -dir ./templates cia-stagschema.sxpr
The first command creates an S-Expression representation of the
Schema; the second generates SQL templates from these.
You may wish to examine a template:
more templates/cia-country.stg
You can hand generate as many templates as you like; see
L for more details
For more example templates for this schema, see
L
=head3 Executing Templates from the Command Line
now execute a template from the command line:
selectall_xml.pl -d dbi:Pg:dbname=cia /cia-country country_name=Austria
You should get back a tree (rooted in B), that looks similar
to this:
3
federal republic
8023244
83850
Austria
2.3
...
1
German
100
3
...
=head3 Executing Templates with the Stag Query Shell
You can also do this interactively using qsh
First, we need to inform stag-qsh what the schema is. The schema is
used to determine which templates are appropriate. Later we will
discover how to set up a resources file, which will allow stag to
infer the schema.
Call qsh from command line:
stag-qsh -d dbi:Pg:dbname=cia -s cia
Interactive perl/qsh:
\l
t cia-country
/borders_country=cid-cia-Germany
(do not leave spaces at the beginning of the line)
The above should fetch all countries bordering Germany
If we prefer objects over hierarchical formats such as XML, we can do
this using perl. For example, to print the religions of spanish
speaking countries:
Still in qsh (multi-line mode), type the following:
# find all Spanish-speaking countries
$dataset =
$dbh->selectall_stag(-template=>'cia-country',-bind=>{languages_name=>'Spanish'});
# get country objects from query results
@lcountry = $dataset->get_country;
foreach $country (@lcountry) {
printf("Country: %s\n Religions:%s\n",
$country->sget_name,
join(' & ',
map {
$_->get_name.' '.$_->get_quant.'%'
} $country->get_religions))
}
print "\n\nDone!\n";
\q
See L for more details on using Stag objects
=head2 BUILDING A CGI/WEB INTERFACE
We can construct a generic but powerful default cgi interface for our
data, using ubiq.cgi, which should come with your distribution.
You may have to modify some of the directories below, depending on
your web server set up (we assume Apache here).
We want to create the CGI, and give it access to our templates:
mkdir /usr/local/httpd/cgi-bin/cia
cp templates/*.stg /usr/local/httpd/cgi-bin/cia
cp `which ubiq.cgi` /usr/local/httpd/cgi-bin/cia
chmod +x /usr/local/httpd/cgi-bin/cia/ubiq.cgi
mkdir /usr/local/httpd/cgi-bin/cia/cache
chmod 777 /usr/local/httpd/cgi-bin/cia/cache
Set up the environment for the CGI script. It must be able to see the
templates and the necessary perl libraries (if not installed
system-wide)
cat > /usr/local/httpd/cgi-bin/cia/dbenv.pl
$ENV{DBSTAG_DBIMAP_FILE} = "./resources.conf";
$ENV{DBSTAG_TEMPLATE_DIRS} = ".:./templates:/usr/local/share/sql/templates";
$ENV{STAGLIB} = "/users/me/lib/DBIx-DBStag:/users/me/lib/stag";
We must create a basic resources file, currently containing one db:
cat > /usr/local/httpd/cgi-bin/cia/resources.conf
cia rdb Pg:cia schema=cia
Fields are whitespace delimited; do not leave a space before the
initial 'cia'
(note that if you set DBSTAG_DBIMAP_FILE to the avove file on the
command line, you can use the shortened name of B instead of
B)
You should be able to use the interface via
http://localhost/cgi-bin/cia/ubiq.cgi
You can customize this by overriding some of the existing display functions;
cat > /usr/local/httpd/cgi-bin/cia/ubiq-customize.pl
# --- CUSTOM SETTINGS
{
no warnings 'redefine';
*g_title = sub {
"U * B * I * Q - CIA World Factbook";
};
*short_intro = sub {
"Demo interface to CIA World Factbook"
};
add_initfunc(sub {
$dbname = 'cia';
$schema = 'cia';
});
}
From here on you can customise the web interface, create new
templates, integrate this with other data. Consult L and
the script B for further details.
=head2 FURTHER EXPLORATION
This cookbook has focused on an example with relatively simple XML,
with only a few layers of nesting.
There is a more complex example you can download from the Mondial
project site here:
L
This also integrates data on cities, which increases the depth of the
XML tree.
You could use the tutorial above to try and turn this XML into a
database.
=head1 WEBSITE
L
=head1 AUTHOR
Chris Mungall
cjm at fruitfly dot org
=head1 COPYRIGHT
Copyright (c) 2002 Chris Mungall
This module is free software.
You may distribute this module under the same terms as perl itself
=cut
DBIx-DBStag-0.12/DBIx/DBStag/SQLTemplate.pm 0000644 0000765 0000024 00000050507 11331570165 016515 0 ustar cain staff # $Id: SQLTemplate.pm,v 1.28 2007/03/05 09:12:49 cmungall Exp $
# -------------------------------------------------------
#
# Copyright (C) 2003 Chris Mungall
#
# This module is free software.
# You may distribute this module under the same terms as perl itself
#---
# POD docs at end of file
#---
package DBIx::DBStag::SQLTemplate;
use strict;
use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS $DEBUG $AUTOLOAD);
use Carp;
use DBI;
use Data::Stag qw(:all);
use DBIx::DBStag;
use DBIx::DBStag::Constraint;
use Text::Balanced qw(extract_bracketed);
#use SQL::Statement;
use Parse::RecDescent;
$VERSION='0.12';
our @CLAUSE_ORDER = ('select',
'from',
'where',
'group',
'order',
'having');
sub DEBUG {
$DBIx::DBStag::DEBUG = shift if @_;
return $DBIx::DBStag::DEBUG;
}
sub trace {
my ($priority, @msg) = @_;
return unless $ENV{DBSTAG_TRACE};
print STDERR "@msg\n";
}
sub dmp {
use Data::Dumper;
print Dumper shift;
}
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
bless $self, $class;
$self->cached_sth({});
$self;
}
sub name {
my $self = shift;
$self->{_name} = shift if @_;
return $self->{_name};
}
sub fn {
my $self = shift;
$self->{_fn} = shift if @_;
return $self->{_fn};
}
sub sth {
my $self = shift;
$self->{_sth} = shift if @_;
return $self->{_sth};
}
sub cached_sth {
my $self = shift;
$self->{_cached_sth} = shift if @_;
return $self->{_cached_sth};
}
sub sql_clauses {
my $self = shift;
$self->{_sql_clauses} = shift if @_;
return $self->{_sql_clauses};
}
sub set_clause {
my $self = shift;
my $ct = lc(shift);
my $v = shift;
$v =~ s/^ *//;
my $add = 0;
if ($v =~ /\+(.*)/) {
$v = $1;
$add = 1;
}
if ($ct eq 'order' || $ct eq 'group') {
$v = "BY $v" unless $add;
}
my $is_set = 0;
my $clauses = $self->sql_clauses;
my @corder = @CLAUSE_ORDER;
my @nu_clauses = ();
foreach my $clause (@$clauses) {
my $n = lc($clause->{name});
next unless $n;
if ($n eq $ct) {
if ($add && $clause->{value}) {
# $clause->{value} .= " and $v";
$clause->{value} .= " $v";
}
else {
$clause->{value} = $v;
}
$is_set = 1;
}
CORDER:
while (@corder) {
my $next = $corder[0];
if (!$is_set && $next eq $ct) {
$is_set = 1;
push(@nu_clauses,
{name=>uc($ct),
value=>$v});
}
last CORDER if $next eq $n;
shift @corder;
}
push(@nu_clauses,$clause);
}
@$clauses = @nu_clauses;
$self->throw("Cannot set $ct") unless $is_set;
return;
}
sub properties {
my $self = shift;
$self->{_properties} = shift if @_;
return $self->{_properties};
}
sub property_hash {
my $self = shift;
my $pl = $self->{_properties};
my %h = ();
foreach (@$pl) {
push(@{$h{$_->{name}}},$_->{value});
}
return \%h;
}
sub schema {
my $sl = shift->property_hash->{schema} || [];
$sl->[0];
}
sub stag_props {
my $self = shift;
$self->{_stag_props} = shift if @_;
return $self->{_stag_props};
}
sub desc {
my $self = shift;
my $P = $self->properties || [];
my ($p) = grep {$_->{name} =~ /^desc/} @$P;
return $p->{value} if $p;
}
sub nesting {
my $self = shift;
my $sql_clauses = $self->sql_clauses;
my ($use) = grep {$_->{name} =~ /use/i} @$sql_clauses;
my $nesting;
if ($use) {
my $v = $use->{value};
$v =~ s/\s*nesting\s*//i;
$nesting = Data::Stag->parsestr($v);
}
return $nesting;
}
sub get_example_input {
my $self = shift;
my $dbh = shift;
my $cachefile = shift;
my $refresh = shift;
my %ei = ();
if ($cachefile && -f $cachefile && !$refresh) {
my $fh = FileHandle->new($cachefile) || $self->throw("cannot open $cachefile");
while(<$fh>) {
chomp;
my ($n, @v) = split(/\t/, $_);
$ei{$n} = \@v;
}
$fh->close;
}
my $P = $self->properties || [];
my @E = map {$_->{value}} grep {$_->{name} =~ /^example_input/} @$P;
foreach my $e (@E) {
if ($e =~ /(\S+)\s*=\>\s*(.*)/) {
my $n = $1;
my $v = $2;
my @parts = split(/,\s*/, $v);
$ei{$n} = [];
while (my $part = shift @parts) {
if ($part =~ /select/i) {
my $sql = "$part @parts LIMIT 5";
my $examples = [];
if (!$dbh) {
# no connection
last;
}
eval {
$examples =
$dbh->selectcol_arrayref($sql);
push(@{$ei{$n}},
grep {length($_) < 32} @$examples);
};
if ($@) {
$self->throw("Problem with template - invalid example_input: $e");
}
@parts = ();
}
else {
push(@{$ei{$n}}, $part);
}
}
}
}
if ($cachefile) {
my $fh = FileHandle->new(">$cachefile") ||
$self->throw("cannot write to $cachefile");
foreach my $n (keys %ei) {
print $fh join("\t", $n, @{$ei{$n}}), "\n";
}
$fh->close || $self->throw;
}
return \%ei;
}
# ---------------------------------
# given a template and a binding, this will
# create an SQL statement and a list of exec args
# - the exec args correspond to ?s in the SQL
#
# for example WHERE foo = &foo&
# called with binding foo=bar
#
# will become WHERE foo = ?
# and the exec args will be ('bar')
#
# if the template contains option blocks eg
#
# WHERE [foo = &foo&]
#
# then the part in square brackets will only be included if
# there is a binding for variable foo
#
# if multiple option blocks are included, they will be ANDed
#
#
# if this idiom appears
#
# WHERE foo => &foo&
#
# then the operator used will either be =, LIKE or IN
# depending on the value of the foo variable
#
# if the foo variable contains % it will be LIKE
# if the foo variable contains an ARRAY it will be IN
#
# (See DBI manpage for discussion of placeholders)
sub get_sql_and_args {
my $self = shift;
my $bind = shift || {};
my $where_blocks = $self->split_where_clause;
my $varnames = $self->get_varnames; # ORDERED list of variables in Q
my %argh = ();
my ($sql, @args);
my $where;
# binding can be a simple array of VARVALs
if ($bind &&
ref($bind) eq 'ARRAY') {
# assume that the order of arguments specified is
# the same order that appears in the query
for (my $i=0; $i<@$bind; $i++) {
if (!$varnames->[$i]) {
my $n=$i+1;
my $c = @$varnames;
$self->throw("Argument number $n ($bind->[$i]) has no place ".
"to bind; there are only $c variables in the ".
"template");
}
# relate ordering of exec args via ordering of variables in
# template; store in a hash
$argh{$varnames->[$i]} = $bind->[$i];
}
}
if ($bind &&
ref($bind) eq 'HASH') {
# binding is already specified as a hash - no need to convert
%argh = %$bind;
my %varnameh = map {$_=>1} @$varnames;
my @bad = grep {!$varnameh{$_}} keys %argh;
if (@bad) {
$self->throw("param(s) not recognised: @bad\nValid params:\n".join("\n",map {" $_"}@$varnames));
}
}
if ($bind && ref($bind) eq "DBIx::DBStag::Constraint") {
# COMPLEX BOOLEAN CONSTRAINTS - TODO
my $constr;
$constr = $bind;
($where, @args) = $self->_get_sql_where_and_args_from_constraints($constr);
}
else {
# simple rules for substituting variables
($where, @args) = $self->_get_sql_where_and_args_from_hashmap(\%argh);
}
my $sql_clauses = $self->sql_clauses;
$sql = join("\n",
map {
if (lc($_->{name}) eq 'where') {
if ($where) {
"WHERE $where";
}
else {
''; # no WHERE clause
}
}
else {
"$_->{name} $_->{value}";
}
} @$sql_clauses);
return ($sql, @args);
}
# takes a simple set of hash variable bindings, and
# a set of option blocks [...][...]
#
# generates SQL for every block required, replaces with
# DBI placeholders, and returns SQL plus DBI execute args list
sub _get_sql_where_and_args_from_hashmap {
my $self = shift;
my %argh = %{shift || {}};
my $where_blocks = $self->split_where_clause;
# sql clauses to be ANDed
my @sqls = ();
# args to be fed to DBI execute() [corresponds to placeholder ?s]
my @args = ();
# index of variables replaced by ?s
my $vari = 0;
NEXT_BLOCK:
foreach my $wb (@$where_blocks) {
my $where = $wb->{text};
my $varnames = $wb->{varnames};
# trace(0, "WHEREBLOCK: $where;; VARNAMES=@$varnames;;\n");
my $str = $where;
while ($str =~ /(=>)?\s*\&(\S+)\&/) {
my $op = $1 || '';
my $varname = $2;
my $argval = $argh{$varname};
if (!exists $argh{$varname}) {
# if a variable is not set in a particular block,
# that block is ignored, and does not form
# part of the final query
next NEXT_BLOCK;
}
if ($op) {
$op = '= ';
if ($argval =~ /\%/) {
$op = ' LIKE ';
}
}
my $replace_with;
# replace arrays with IN (1,2,3,...)
if (ref($argval)) {
$replace_with =
'('.join(',',
map {_quote($_)} @$argval).')';
$op = ' IN ';
}
else {
$replace_with = '?';
$args[$vari] = $argval;
$vari++;
}
$str =~ s/(=>)?\s*\&$varname\&/$op$replace_with/;
}
push(@sqls, $str);
}
my $sql = join(' AND ', @sqls);
trace(0, "WHERE:$sql");
return ($sql, @args);
}
# takes complex boolean constraints and generates SQL
sub _get_sql_where_and_args_from_constraints {
my $self = shift;
my $constr = shift;
if ($constr->is_leaf) {
my $where_blocks = $self->split_where_clause;
die("TODO");
}
else {
my $bool = $constr->bool;
my $children = $constr->children;
my @all_args = ();
my @sqls = ();
foreach my $child (@$children) {
my ($sql, @args) = $self->_get_sql_where_and_args($constr);
push(@sqls, $sql);
push(@all_args, @args);
}
my $sql = '('.join(" $bool ",
@sqls).')';
return ($sql, @all_args);
}
$self->throw("ASSERTION ERROR");
}
# splits a WHERE clause with option blocks [ x=&x& ] [ y=&y& and z=&z& ] into
# blocks, and attaches the variable names to the block
sub split_where_clause {
my $self = shift;
my $sql_clauses = $self->sql_clauses;
my $sql = '';
my ($clause) = grep {lc($_->{name}) eq 'where'} (@$sql_clauses);
my $where = $clause->{value} || '';
my $vari = 0;
my %vari_by_name = ();
# this subroutine take a where block, checks if it contains
# one or more patterns
# foo.bar => &baz&
# and adds 'baz' to the list of variable names
my $sub =
sub {
my $textin = shift;
return unless $textin;
my $str = $textin;
my @varnames = ();
while ($str =~ /(=>)?\s*\&(\S+)\&/) {
my $op = $1 || '';
my $varname = $2;
push(@varnames, $varname);
$str =~ s/(=>)?\s*\&$varname\&//;
}
return
{text=>$textin,
varnames=>\@varnames}
};
my @constrs = ();
while (1) {
my ($extracted, $remainder, $skip) =
extract_bracketed($where, '[]');
$extracted ||= '';
$remainder ||= '';
trace(0, "extraction:( E='$extracted', R='$remainder', SKIP='$skip' )\n");
$remainder =~ s/^\s+//;
$remainder =~ s/\s+$//;
$skip =~ s/^\s+//;
$skip =~ s/\s+$//;
push(@constrs,
$sub->($skip));
if ($extracted) {
$extracted =~ s/^\s*\[//;
$extracted =~ s/\]\s*$//;
push(@constrs,
$sub->($extracted));
}
else {
push(@constrs,
$sub->($remainder));
last;
}
$where = $remainder;
}
@constrs = grep {$_} @constrs;
return \@constrs;
}
sub get_varnames {
my $self = shift;
my $parts = $self->split_where_clause;
return [map {@{$_->{varnames}}} @$parts];
}
sub prepare {
my $self = shift;
my $dbh = shift;
my $bind = shift;
my ($sql, @exec_args) = $self->get_sql_and_args($bind);
my $sth = $self->cached_sth->{$sql};
if (!$sth) {
$sth = $dbh->prepare($sql);
$self->cached_sth->{$sql} = $sth;
}
return ($sql, $sth, @exec_args);
}
sub parsestr {
my $self = shift;
my $io = IO::String->new;
print $io shift;
$self->_parsefh($io);
}
sub parse {
my $self = shift;
my $fn = shift;
my $fh = FileHandle->new($fn) || $self->throw("cannot open $fn");
$self->fn($fn);
my $name = $fn;
$name =~ s/.*\///;
$name =~ s/\.\w+$//;
$self->name($name);
$self->_parsefh($fh);
}
sub _parsefh {
my $self = shift;
my $fh = shift;
my $eosql_tag_idx;
my $tag = {name=>'', value=>''};
my @tags = ();
while (<$fh>) {
chomp;
if (/^\/\//) {
$eosql_tag_idx = scalar(@tags)+1;
next;
}
if (/^:(\w+)\s*(.*)/) {
push(@tags, $tag);
$tag = {name=>$1, value => $2};
}
elsif (/^(\w+):\s*(.*)/) {
push(@tags, $tag);
$tag = {name=>$1, value => "$2"};
}
else {
if (substr($_, -1) eq '\\') {
}
else {
$_ = "\n$_";
}
$tag->{value} .= $_;
}
}
foreach (@tags) {
$_->{value} =~ s/^\s+//;
$_->{value} =~ s/\s+$//;
}
push(@tags, $tag);
if (!defined($eosql_tag_idx)) {
$eosql_tag_idx = scalar(@tags);
}
my @clauses = splice(@tags, 0, $eosql_tag_idx);
if (!@clauses) {
$self->throw("No SQL");
}
if (@clauses == 1 && !$clauses[0]->{name}) {
my $j = join('|',
'select',
'from',
'where',
'order',
'limit',
'group',
'having',
'use nesting',
);
my @parts =
split(/($j)/i, $clauses[0]->{value});
@clauses = ();
while (my ($n, $v) = splice(@parts, 0, 2)) {
push(@clauses, {name=>$n, value=>$v});
}
}
$self->sql_clauses(\@clauses);
$self->properties(\@tags);
my $sp = Data::Stag->new(properties=>[
map {
[$_->{name} => $_->{value}]
} @tags
]);
$self->stag_props($sp);
$fh->close;
}
sub throw {
my $self = shift;
my $fmt = shift;
print STDERR "\nERROR:\n";
printf STDERR $fmt, @_;
print STDERR "\n";
confess;
}
my $default_cscheme =
{
'keyword'=>'cyan',
'variable'=>'magenta',
'text' => 'reset',
'comment' => 'red',
'block' => 'blue',
'property' => 'green',
};
sub show {
my $t = shift;
my $fh = shift || \*STDOUT;
my %cscheme = %{shift || $default_cscheme};
my $colorfunc = shift;
my $n = $t->name;
my $clauses = $t->sql_clauses;
my $props = $t->properties;
my $keyword = sub {
my $color = $cscheme{keyword};
$colorfunc->($color) . "@_" . $colorfunc->('reset');
};
my $comment = sub {
my $color = $cscheme{comment};
$colorfunc->($color) . "@_" . $colorfunc->('reset');
};
my $property = sub {
my $color = $cscheme{property};
$colorfunc->($color) . "@_" . $colorfunc->('reset');
};
my $c0 = $colorfunc->('reset');
my $c1 = $colorfunc->($cscheme{variable});
my $c2 = $colorfunc->($cscheme{keyword});
my $c3 = $colorfunc->($cscheme{block});
# my $c0 = 'reset';
# my $c1 = $cscheme{variable};
# my $c2 = $cscheme{keyword};
# my $c3 = $cscheme{block};
foreach my $clause (@$clauses) {
my ($n, $c) = ($clause->{name}, $clause->{value});
print $fh $keyword->("$n ");
if ($c =~ /\[.*\]/s) {
$c =~ s/\[/$c3\[$c0/g;
$c =~ s/\]/$c3\]$c0/g;
$c =~ s/=\>/$c2=\>$c0/gs;
$c =~ s/(\&\S+\&)/$c1$1$c0/gs;
print $fh $c;
$c = '';
}
if ($n =~ /^use/i) {
$c =~ s/\(/$c3\($c0/g;
$c =~ s/\)/$c3\)$c0/g;
# print $fh $c;
# $c = '';
}
while ($c =~ /(\S+)(\s*)(.*)/s) {
my ($w, $sp, $next) = ($1, $2, $3);
if ($w =~ /^[A-Z]+$/) {
print $fh $keyword->($w);
}
else {
print $fh $w;
}
print $fh $sp;
$c = $next;
}
print $fh "\n";
}
print $fh $comment->("// -- METADATA --\n");
foreach my $p (@$props) {
my ($n, $v) = ($p->{name}, $p->{value});
print $fh $property->("$n: ");
print $fh $v;
print $fh "\n";
}
}
sub _quote {
my $v = shift;
$v =~ s/\'/\'\'/g;
"'$v'";
}
1;
__END__
=head1 NAME
DBIx::DBStag::SQLTemplate - A Template for an SQL query
=head1 SYNOPSIS
# find template by name
$template = $dbh->find_template("mydb-personq");
# execute this template, filling in the 'name' attribute
$xml = $dbh->selectall_xml(-template=>$template,
-bind=>{name => "fred"});
=cut
=head1 DESCRIPTION
A template represents a canned query that can be parameterized.
Templates are collected in directories (in future it will be possible
to store them in files or in the db itself).
To tell DBStag where your templates are, you should set:
setenv DBSTAG_TEMPLATE_DIRS "$HOME/mytemplates:/data/bioconf/templates"
Your templates should end with the suffix B<.stg>, otherwise they will
not be picked up
You can name templates any way you like, but the standard way is to
use 2 or 3 fields
SCHEMA-OBJECT
or
SCHEMA-OBJECT-QUALIFIERS
(with underscores used within fields)
A template file should contain at minimum some SQL; for example:
=over
=item Example template 1
SELECT
studio.*,
movie.*,
star.*
FROM
studio NATURAL JOIN
movie NATURAL JOIN
movie_to_star NATURAL JOIN
star
WHERE
[movie.genre = &genre&] [star.lastname = &lastname&]
USE NESTING (set(studio(movie(star))))
Thats all! However, there are ways to make your template more useful
=item Example template 2
:SELECT
studio.*,
movie.*,
star.*
:FROM
studio NATURAL JOIN
movie NATURAL JOIN
movie_to_star NATURAL JOIN
star
:WHERE
[movie.genre = &genre&] [star.lastname = &lastname&]
:USE NESTING (set(studio(movie(star))))
//
schema: movie
desc: query for fetching movies
By including B<:> at the beginning it makes it easier for parsers to
assemble SQL (this is not necessary for DBStag however)
After the // you can add tag: value data.
You should set B if you want the template to be available to
users of a db that conforms to that schema
=back
=head2 GETTING A TEMPLATE
The L object gives various methods for fetching
templates by name, by database or by schema
=head2 VARIABLES
WHERE clause variables in the template look like this
&foo&
variables are bound at query time
my $set = $dbh->selectall_stag(-template=>$t,
-bind=>["bar"]);
or
my $set = $dbh->selectall_stag(-template=>$t,
-bind=>{foo=>"bar"});
If the former is chosen, variables are bound from the bind list as
they are found
=head2 OPTIONAL BLOCKS
WHERE [ foo = &foo& ]
If foo is not bound then the part between the square brackets is left out
Multiple option blocks are Bed together
An option block need not contain a variable - if it contains no
B<&variable&> name it is automatically Bed
=head2 BINDING OPERATORS
The operator can be bound at query time too
WHERE [ foo => &foo& ]
Will become either
WHERE foo = ?
or
WHERE foo LIKE ?
or
WHERE foo IN (f0, f1, ..., fn)
Depending on whether foo contains the % character, or if foo is bound
to an ARRAY
=head1 METHODS
=head2 name
Usage - $name = $template->name
Returns - str
Args -
every template has a name that (should) uniquely identify it
=head2 desc
Usage - $desc = $template->desc
Returns - str
Args -
templates have optional descriptions
=cut
=head2 get_varnames
Usage - $varnames = $template->get_varnames
Returns - listref of strs
Args -
Returns the names of all variable used in this template
=cut
=head1 WEBSITE
L
=head1 AUTHOR
Chris Mungall >
=head1 COPYRIGHT
Copyright (c) 2003 Chris Mungall
This module is free software.
You may distribute this module under the same terms as perl itself
=cut
1;
DBIx-DBStag-0.12/DBIx/DBStag.pm 0000644 0000765 0000024 00000424401 11331570136 014356 0 ustar cain staff # $Id: DBStag.pm,v 1.59 2008/02/06 00:50:55 cmungall Exp $
# -------------------------------------------------------
#
# Copyright (C) 2002 Chris Mungall
#
# This module is free software.
# You may distribute this module under the same terms as perl itself
#---
# POD docs at end of file
#---
package DBIx::DBStag;
use strict;
use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS $DEBUG $AUTOLOAD);
use Carp;
use DBI;
use Data::Stag qw(:all);
use DBIx::DBSchema;
use Text::Balanced qw(extract_bracketed);
#use SQL::Statement;
use Parse::RecDescent;
$VERSION='0.12';
our $DEBUG;
our $TRACE = $ENV{DBSTAG_TRACE};
sub DEBUG {
$DEBUG = shift if @_;
return $DEBUG;
}
sub trace {
my ($priority, @msg) = @_;
return unless $ENV{DBSTAG_TRACE};
print STDERR "@msg\n";
}
sub dmp {
use Data::Dumper;
print Dumper shift;
}
sub force {
my $self = shift;
$self->{_force} = shift if @_;
return $self->{_force};
}
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my ($dbh) =
rearrange([qw(dbh)], @_);
my $self = {};
bless $self, $class;
if ($dbh) {
$self->dbh($dbh);
}
$self;
}
sub connect {
my $class = shift;
my $dbi = shift;
my $self;
if (ref($class)) {
$self = $class;
}
else {
$self = {};
bless $self, $class;
}
$dbi = $self->resolve_dbi($dbi);
eval {
$self->dbh(DBI->connect($dbi, @_));
};
if ($@ || !$self->dbh) {
my $mapf = $ENV{DBSTAG_DBIMAP_FILE};
if ($dbi =~ /^dbi:(\w+)/) {
print STDERR <dbh->{RaiseError} = 1;
$self->dbh->{ShowErrorStatement} = 1;
if ($dbi =~ /dbi:([\w\d]+)/) {
$self->{_driver} = $1;
}
$self->setup;
return $self;
}
sub resolve_dbi {
my $self = shift;
my $dbi = shift;
if (!$dbi) {
$self->throw("database name not provided!");
}
if ($dbi !~ /^dbi:/) {
my $rh = $self->resources_hash;
my $res =
$rh->{$dbi};
if (!$res) {
if ($dbi =~ /:/) {
$res =
{loc=>"$dbi"}
}
else {
$res =
{loc=>"Pg:$dbi"};
}
}
if ($res) {
my $loc = $res->{loc};
if ($loc =~ /(\S+?):(\S+)\@(\S+)/) {
my $dbms = $1;
my $dbn = $2;
my $host = $3;
my $extra = '';
if ($host =~ /(\S+?):(.*)/) {
$host = $1;
$extra = ":$2";
}
if ($dbms =~ /pg/i) {
$dbi = "dbi:Pg:dbname=$dbn;host=$host$extra";
}
elsif ($dbms =~ /db2/i) {
$dbi = "dbi:Pg:$dbn;host=$host$extra";
}
else {
# default - tested on MySQL
$dbi = "dbi:$dbms:database=$dbn:host=$host$extra";
}
}
elsif ($loc =~ /(\S+):(\S+)$/) {
my $dbms = $1;
my $dbn = $2;
$dbi = "dbi:$dbms:database=$dbn";
if ($dbms =~ /pg/i) {
$dbi = "dbi:Pg:dbname=$dbn";
}
}
else {
$self->throw("$dbi -> $loc does not conform to standard.\n".
":\@");
}
}
else {
$self->throw("$dbi is not a valid DBI locator.\n");
}
}
return $dbi;
}
sub resources_hash {
my $self = shift;
my $mapf = $ENV{DBSTAG_DBIMAP_FILE};
my $rh;
if ($mapf) {
if (-f $mapf) {
$rh = {};
open(F, $mapf) || $self->throw("Cannot open $mapf");
while () {
chomp;
next if /^\#/;
s/^\!//;
my @parts =split(' ', $_);
next unless (@parts >= 3);
my ($name, $type, $loc, $tagstr) =@parts;
my %tagh = ();
if ($tagstr) {
my @parts = split(/;\s*/, $tagstr);
foreach (@parts) {
my ($t, $v) = split(/\s*=\s*/, $_);
$tagh{$t} = $v;
}
}
$rh->{$name} =
{
%tagh,
name=>$name,
type=>$type,
loc=>$loc,
tagstr=>$tagstr,
};
}
close(F) || $self->throw("Cannot close $mapf");
} else {
$self->throw("$mapf does not exist");
}
}
return $rh;
}
sub resources_list {
my $self = shift;
my $rh =
$self->resources_hash;
my $rl;
if ($rh) {
$rl =
[map {$_} values %$rh];
}
return $rl;
}
sub find_template {
my $self = shift;
my $tname = shift;
my $path = $ENV{DBSTAG_TEMPLATE_DIRS} || '';
my $tl = $self->template_list;
my ($template, @rest) = grep {$tname eq $_->name} @$tl;
if (!$template) {
print STDERR "\n\nI could not find the Stag SQL template called \"$tname\".\n";
if (!$path) {
print STDERR <throw("Could not find template \"$tname\" in: $path");
}
return $template;
}
sub find_templates_by_schema {
my $self = shift;
my $schema = shift;
my $tl = $self->template_list;
my @templates = grep {$_->stag_props->tmatch('schema', $schema)} @$tl;
return \@templates;
}
sub find_templates_by_dbname {
my $self = shift;
my $dbname = shift;
my $res = $self->resources_hash->{$dbname};
my $templates;
if ($res) {
my $schema = $res->{schema} || '';
if ($schema) {
$templates = $self->find_templates_by_schema($schema);
}
else {
# unknown schema - show all templates
# $templates = $self->template_list;
}
}
else {
$self->throw("unknown db: $dbname");
}
return $templates;
}
sub template_list {
my $self = shift;
my %already_got = ();
if (!$self->{_template_list}) {
my $path = $ENV{DBSTAG_TEMPLATE_DIRS} || '.';
my @dirs = split(/:/, $path);
my @templates = ();
foreach my $dir (@dirs) {
foreach my $fn (glob("$dir/*.stg")) {
if (-f $fn) {
require "DBIx/DBStag/SQLTemplate.pm";
my $template = DBIx::DBStag::SQLTemplate->new;
$template->parse($fn);
push(@templates, $template) unless $already_got{$template->name};
$already_got{$template->name} = 1;
}
}
}
$self->{_template_list} = \@templates;
}
return $self->{_template_list};
}
sub find_schema {
my $self = shift;
my $dbname = shift;
my $rl = $self->resouces_list || [];
my ($r) = grep {$_->{name} eq $_ ||
$_->{loc} eq $_} @$rl;
if ($r) {
return $r->{schema};
}
return;
}
sub setup {
my $self = shift;
return;
}
# counter
sub next_id {
my $self = shift;
$self->{_next_id} = shift if @_;
$self->{_next_id} = 0 unless $self->{_next_id};
return ++$self->{_next_id};
}
sub dbh {
my $self = shift;
$self->{_dbh} = shift if @_;
return $self->{_dbh};
}
sub dbschema {
my $self = shift;
$self->{_dbschema} = shift if @_;
if (!$self->{_dbschema}) {
if (!$self->dbh) {
confess("you must establish connection using connect() first");
}
$self->dbschema(DBIx::DBSchema->new_native($self->dbh));
# my $sth = $self->dbh->table_info(undef, undef, undef, 'VIEW') or die $self->dbh->errstr;
# use Data::Dumper;
# print Dumper $sth->fetchall_arrayref([2,3]);
}
return $self->{_dbschema};
}
sub parser {
my $self = shift;
$self->{_parser} = shift if @_;
if (!$self->{_parser}) {
$self->{_parser} = Parse::RecDescent->new($self->selectgrammar());
}
return $self->{_parser};
}
sub warn {
my $self = shift;
my $fmt = shift;
print STDERR "\nWARNING:\n";
printf STDERR $fmt, @_;
print STDERR "\n";
}
sub throw {
my $self = shift;
my $fmt = shift;
print STDERR "\nERROR:\n";
printf STDERR $fmt, @_;
print STDERR "\n";
confess;
}
sub get_pk_col {
my $self = shift;
my $table = shift;
my $tableobj = $self->dbschema->table(lc($table));
if (!$tableobj) {
confess("Can't get table $table from db.\n".
"Maybe DBIx::DBSchema does not work with your database?");
}
return $tableobj->primary_key;
}
sub is_table {
my $self = shift;
my $tbl = shift;
return 1 if $self->dbschema->table($tbl);
}
sub is_col {
my $self = shift;
my $col = shift;
if ($self->{_is_col_h}) {
return $self->{_is_col_h}->{$col}
}
my @tablenames = $self->dbschema->tables;
my @allcols =
map {
$self->get_all_cols($_);
} @tablenames;
my %h = map {$_=>1} @allcols;
$self->{_is_col_h} = \%h;
return $self->{_is_col_h}->{$col};
}
# ASSUMPTION: pk names same as fk names
sub is_fk_col {
my $self = shift;
my $col = shift;
# HACK!!!
# currently dbschema does not know about FKs
return 1 if $col =~ /_id$/;
if ($self->{_is_fk_col_h}) {
return $self->{_is_fk_col_h}->{$col}
}
my @tablenames = $self->dbschema->tables;
my %h = ();
foreach (@tablenames) {
my $pk = $self->dbschema->table($_)->primary_key;
$h{$pk} =1 if $pk;
}
$self->{_is_fk_col_h} = \%h;
return $self->{_is_fk_col_h}->{$col};
}
sub is_pk_col {
my $self = shift;
my $col = shift;
if ($self->{_is_pk_col_h}) {
return $self->{_is_pk_col_h}->{$col}
}
my @tablenames = $self->dbschema->tables;
my %h = ();
foreach (@tablenames) {
my $pk = $self->dbschema->table($_)->primary_key;
$h{$pk} =1 if $pk;
}
$self->{_is_pk_col_h} = \%h;
return $self->{_is_pk_col_h}->{$col};
}
sub get_all_cols {
my $self = shift;
my $table = shift;
my $tableobj = $self->dbschema->table(lc($table));
if (!$tableobj) {
confess("Can't get table $table from db.\n".
"Maybe DBIx::DBSchema does not work with your database?");
}
return $tableobj->columns;
}
sub get_unique_sets {
my $self = shift;
my $table = shift;
my $tableobj = $self->dbschema->table(lc($table));
if (!$tableobj) {
confess("Can't get table $table from db.\n".
"Maybe DBIx::DBSchema does not work with your database?");
}
if ($ENV{OLD_DBIX_DBSCHEMA}) {
return @{$tableobj->unique->lol_ref || []};
}
else {
my %indices = $tableobj->indices;
my @unique_indices = grep {$_->unique} values %indices;
return map {$_->columns} @unique_indices;
}
}
sub mapconf {
my $self = shift;
my $fn = shift;
my $fh = FileHandle->new($fn) || confess("cannot open $fn");
my @mappings = <$fh>;
$fh->close;
$self->mapping(\@mappings);
}
sub mapping {
my $self = shift;
if (@_) {
my $ml = shift;
my @nu =
map {
if (ref($_)) {
Data::Stag->nodify($_);
}
else {
if (/^(\w+)\/(\w+)\.(\w+)=(\w+)\.(\w+)/) {
Data::Stag->new(map=>[
[fktable_alias=>$1],
[table=>$2],
[col=>$3],
[fktable=>$4],
[fkcol=>$5]
]);
}
elsif (/^(\w+)\.(\w+)=(\w+)\.(\w+)/) {
Data::Stag->new(map=>[
[table=>$1],
[col=>$2],
[fktable=>$3],
[fkcol=>$4]
]);
}
elsif (/^parentfk:(\w+)\.(\w+)/) {
Data::Stag->new(parentfk=>[
[table=>$1],
[col=>$2],
]);
}
else {
confess("incorrectly specified mapping: $_".
"(must be alias/tbl.col=ftbl.fcol)");
();
}
}
} @$ml;
$self->{_mapping} = \@nu;
}
return $self->{_mapping};
}
sub guess_mapping {
my $self = shift;
my $dbschema = $self->dbschema;
$self->mapping([]);
my %th =
map { $_ => $dbschema->table($_) } $dbschema->tables;
foreach my $tn (keys %th) {
my @cns = $th{$tn}->columns;
foreach my $cn (@cns) {
my $ftn = $cn;
$ftn =~ s/_id$//;
if ($th{$ftn}) {
push(@{$self->mapping},
Data::Stag->new(map=>[
[table=>$tn],
[col=>$cn],
[fktable=>$ftn],
[fkcol=>$cn]
]));
}
}
}
}
sub linking_tables {
my $self = shift;
$self->{_linking_tables} = {@_} if @_;
return %{$self->{_linking_tables} || {}};
}
sub add_linking_tables {
my $self = shift;
my %linkh = $self->linking_tables;
return unless %linkh;
my $struct = shift;
foreach my $ltname (keys %linkh) {
my ($t1, $t2) = @{$linkh{$ltname}};
$struct->where($t1,
sub {
my $n=shift;
my @v = $n->getnode($t2);
return unless @v;
$n->unset($t2);
my @nv =
map {
$n->new($ltname=>[$_]);
} @v;
# $n->setnode($ltname,
# $n->new($ltname=>[@v]));
foreach (@nv) {
$n->addkid($_);
}
0;
});
}
return;
}
# ----------------------------------------
sub elt_card {
my $e = shift;
my $c = '';
if ($e =~ /(.*)([\+\?\*])/) {
($e, $c) = ($1, $2);
}
# make the element RDB-safe
$e =~ s/\-//g;
return ($e, $c);
}
sub source_transforms {
my $self = shift;
$self->{_source_transforms} = shift if @_;
return $self->{_source_transforms};
}
sub autotemplate {
my $self = shift;
my $schema = shift;
return () unless grep {!stag_isterminal($_)} $schema->subnodes;
my @J = ();
my @W = ();
my @EXAMPLE = ();
my ($tname) = elt_card($schema->element);
my %joinpaths = ();
$schema->iterate(sub {
my $n = shift;
my $parent = shift;
my ($tbl, $card) = elt_card($n->element);
if (!$parent) {
push(@J, $tbl);
# $joinpaths{$tbl} = $tbl;
return;
}
my ($ptbl) = elt_card($parent->element);
if (stag_isterminal($n)) {
my $v = $ptbl.'_'.$tbl;
my $w = "$ptbl.$tbl => \&$v\&";
if ($ptbl eq $tname) {
push(@W,
"[ $w ]");
}
else {
my $pk = $tname.'_id';
my $subselect =
"SELECT $pk FROM $joinpaths{$ptbl}".
" WHERE $w";
push(@W,
"[ $pk IN ($subselect) ]");
}
# produce example formula for non-ints
if ($n->data eq 's') {
push(@EXAMPLE,
"$v => SELECT DISTINCT $tbl FROM $ptbl");
}
}
else {
my $jtype = 'INNER JOIN';
if ($card eq '*' || $card eq '?') {
$jtype = 'LEFT OUTER JOIN';
}
my $jcol = $ptbl.'_id';
push(@J,
"$jtype $tbl USING ($jcol)");
if ($joinpaths{$ptbl}) {
$joinpaths{$tbl} =
"$joinpaths{$ptbl} INNER JOIN $tbl USING ($jcol)";
}
else {
$joinpaths{$tbl} = $tbl;
}
}
return;
});
my $from = join("\n ", @J);
my $where = join("\n ", @W);
my $nesting = $schema->duplicate;
$nesting->iterate(sub {
my $n = shift;
if (stag_isterminal($n)) {
return;
}
my ($tbl, $card) = elt_card($n->element);
$n->element($tbl);
my @sn = $n->kids;
@sn =
grep {
my ($tbl, $card) = elt_card($_->element);
$_->element($tbl);
!stag_isterminal($_)
} @sn;
if (@sn) {
$n->kids(@sn);
}
else {
$n->data([]);
}
});
$nesting = Data::Stag->new(set=>[$nesting]);
my $nstr = $nesting->sxpr;
$nstr =~ s/^\'//;
my $tt =
join("\n",
":SELECT *",
":FROM $from",
":WHERE $where",
":USE NESTING",
"$nstr",
"",
"// ---- METADATA ----",
"schema:",
"desc: Fetches $tname objects",
" This is an AUTOGENERATED template",
"",
(map {
"example_input: $_"
} @EXAMPLE),
);
# my $template = DBIx::DBStag::SQLTemplate->new;
my @sn = $schema->subnodes;
my @tts = ();
push(@tts, $self->autotemplate($_)) foreach @sn;
return ([$tname=>$tt], @tts);
}
sub autoddl {
my $self = shift;
my $stag = shift;
my $link = shift;
$stag->makeattrsnodes;
my $schema = $stag->autoschema;
$self->source_transforms([]);;
$self->_autoddl($schema, undef, $link);
}
sub _autoddl {
my $self = shift;
my $schema = shift;
my $parent = shift;
my $link = shift || []; # link tables
my $tbls = shift || [];
my @sn = $schema->subnodes;
my ($tbl, $card) = elt_card($schema->element);
my @cols = (sprintf("%s_id serial PRIMARY KEY NOT NULL", $tbl));
my $casc = " ON DELETE CASCADE";
foreach (grep {stag_isterminal($_)} @sn) {
my ($col, $card) = elt_card($_->element);
my $pk = '';
if ($col eq $tbl.'_id') {
shift @cols;
$pk = ' PRIMARY KEY';
}
if ($card =~ /[\+\*]/) {
my $new_name = sprintf("%s_%s", $tbl, $col);
my $tf = ["$tbl/$col", "$new_name/$col"];
push(@{$self->source_transforms}, $tf);
$_->name($new_name);
$_->data([[$col => $_->data]]);
# $self->throw("In the source data, '$col' is a multivalued\n".
# "terminal (data) node. This is difficult to transform");
}
else {
# my $isnull = $card eq '?' ? '' : ' NOT NULL';
my $isnull = '';
push(@cols,
sprintf("%s %s$isnull$pk",
$col, $_->data));
}
}
if ($parent) {
my ($pn) = elt_card($parent->element);
push(@cols,
sprintf("%s_id INT", $pn));
push(@cols,
sprintf("FOREIGN KEY (%s_id) REFERENCES $pn(%s_id)$casc", $pn, $pn));
}
my $mapping = $self->mapping || [];
if (grep {$_ eq $tbl} @$tbls) {
# $self->throw("$tbl has >1 parent - you need to\n".
# "transform input data");
return "";
}
push(@$tbls, $tbl);
my $post_ddl = '';
my $pre_ddl = '';
foreach (grep {!stag_isterminal($_)} @sn) {
# check for cases where we want to include FK to subnode
my ($map) =
grep {
$_->name eq 'map' &&
($_->get_table eq $tbl &&
($_->get_fktable_alias eq $_->element ||
$_->get_fktable eq $_->element))
} @$mapping;
# linking tables
if ($map ||
grep {$_ eq $tbl} @$link) {
my $ftbl = $_->element;
push(@cols,
sprintf("%s_id INT", $ftbl));
push(@cols,
sprintf("FOREIGN KEY (%s_id) REFERENCES $ftbl(%s_id)$casc", $ftbl, $ftbl));
$pre_ddl .= $self->_autoddl($_, undef, $link, $tbls);
}
else {
$post_ddl .= $self->_autoddl($_, $schema, $link, $tbls);
}
}
my $ddl =
sprintf("CREATE TABLE $tbl (\n%s\n);\n\n",
join(",\n", map {" $_"} @cols));
return $pre_ddl . $ddl . $post_ddl;;
}
# ----------------------------------------
# CACHE METHODS
#
# we keep a cache of what is stored in
# each table
#
# cache->{$element}->{$key}->{$val}
# ----------------------------------------
# list of table names that should be cached
sub cached_tables {
my $self = shift;
$self->{_cached_tables} = shift if @_;
return $self->{_cached_tables};
}
sub is_caching_on {
my $self = shift;
my $element = shift;
$self->{_is_caching_on} = {}
unless $self->{_is_caching_on};
if (@_) {
$self->{_is_caching_on}->{$element} = shift;
}
return $self->{_is_caching_on}->{$element};
}
sub query_cache {
my $self = shift;
my $element = shift;
my $constr = shift;
my $update_h = shift;
my @keycols = sort keys %$constr;
my $cache = $self->get_tuple_idx($element, \@keycols);
my $valstr = join("\t", map {$constr->{$_}} @keycols);
# use Data::Dumper;
# print Dumper $cache;
if ($update_h) {
my $current_h = $cache->{$valstr} || {};
$current_h->{$_} = $update_h->{$_} foreach keys %$update_h;
$cache->{$valstr} = $current_h;
}
return $cache->{$valstr};
}
sub insert_into_cache {
my $self = shift;
my $element = shift;
my $insert_h = shift;
my $usets = shift;
foreach my $uset (@$usets) {
my @undef = grep {!defined $insert_h->{$_}} @$uset;
if (@undef) {
my @defined = grep {defined $insert_h->{$_}} @$uset;
trace(1,
"undefined column in unique key: @$uset IN $element/[@$uset] ".
join('; ',
map {"$_=$insert_h->{$_}"} @defined,
)
) if $TRACE;
# cannot cache undefined values
next;
}
my $cache = $self->get_tuple_idx($element, $uset);
my $valstr = join("\t", map {$insert_h->{$_}} sort @$uset);
$cache->{$valstr} = $insert_h;
}
return 1;
}
sub update_cache {
my $self = shift;
my $element = shift;
my $store_hash = shift;
my $unique_constr = shift;
my $tuple = $self->query_cache($element,
$unique_constr,
$store_hash);
return;
}
sub get_tuple_idx {
my $self = shift;
my $element = shift;
my $ukey = shift;
my @keycols = @$ukey;
@keycols = sort @keycols;
@keycols || die;
my $cache = $self->cache;
if (!$cache->{$element}) {
$cache->{$element} = {};
}
my $eltcache = $cache->{$element};
# we just use a flat perl hash - flatten the list of unique cols
# to a string with spaces between
my $k = "@keycols";
if (!$eltcache->{$k}) {
$eltcache->{$k} = {};
}
return $eltcache->{$k};
}
sub cache_summary {
my $self = shift;
my $s = Data::Stag->new(cache_summary=>[]);
my $cache = $self->cache || {};
my @elts = keys %$cache;
foreach my $elt (@elts) {
my $cnode = $cache->{$elt} || {};
my @keys = keys %$cnode;
$s->add($elt=>[map {[$_=>scalar(keys %{$cnode->{$_}})]} @keys]);
}
return $s;
}
sub cache {
my $self = shift;
$self->{_cache} = shift if @_;
$self->{_cache} = {} unless $self->{_cache};
return $self->{_cache};
}
sub clear_cache {
my $self = shift;
$self->cache({});
}
# ---- END OF CACHE METHODS ----
# set this if we are loading a fresh/blank slate DB
# (will assume database is empty and not check for
# existing tuples)
sub policy_freshbulkload {
my $self = shift;
$self->{_policy_freshbulkload} = shift if @_;
return $self->{_policy_freshbulkload};
}
sub noupdate_h {
my $self = shift;
$self->{_noupdate_h} = shift if @_;
return $self->{_noupdate_h} || {};
}
sub tracenode {
my $self = shift;
$self->{_tracenode} = shift if @_;
return $self->{_tracenode};
}
sub mapgroups {
my $self = shift;
if (@_) {
$self->{_mapgroups} = [@_];
$self->{_colvalmap} = {}
unless $self->{_colvalmap};
foreach my $cols (@_) {
my $h = {};
foreach (@$cols) {
$self->{_colvalmap}->{$_} = $h;
}
}
}
return @{$self->{_mapgroups} || []};
}
# DEPRECATED
sub get_mapping_for_col {
my $self = shift;
my $col = shift;
$self->{_colvalmap}->{$col} = {}
unless $self->{_colvalmap}->{$col};
return $self->{_colvalmap}->{$col};
}
# mapping of Old ID => New ID
# IDs are assumed to be global across ALL tables
sub id_remap_idx {
my $self = shift;
if (@_) {
$self->{_id_remap_idx} = shift;
}
else {
$self->{_id_remap_idx} = {}
unless $self->{_id_remap_idx};
}
return $self->{_id_remap_idx};
}
# do the PK values in the XML represent the actual
# internal db ids, or are they local to the document?
# if the latter then we will create a id_remap_idx
sub trust_primary_key_values {
my $self = shift;
$self->{_trust_primary_key_values} = shift if @_;
return $self->{_trust_primary_key_values};
}
sub make_stag_node_dbsafe {
my $self = shift;
my $node = shift;
my $parent = shift;
my $name = $node->name;
# CJM 2007-03-05
#return if $name eq '@'; # leave attrs alone
if ($name eq '@') {
# descend into attrs
$parent->data([grep {$_->name ne '@'} @{$parent->data},@{$node->data}]);
return;
}
my $safename = $self->dbsafe($name,$parent);
if ($name ne $safename) {
$node->name($safename);
}
my @kids = $node->kids;
foreach (@kids) {
$self->make_stag_node_dbsafe($_,$node) if ref $_;
}
return;
}
sub dbsafe {
my $self = shift;
my $name = shift;
my $parent = shift;
$name = lc($name);
# dbstag is designed for stag-like xml; no mixed attributes
# however, we do have basic checks for mixed attributes
if ($name eq '.') {
$name = $parent->name.'_data'; # TODO - allow custom column
}
$name =~ tr/a-z0-9_//cd;
return $name;
}
# cache the attribute nodes as they are parsed
#sub current_attribute_node {
# my $self = shift;
# $self->{_current_attribute_node} = shift if @_;
# return $self->{_current_attribute_node};
#}
# lookup table; macro ID => internal database ID
sub macro_id_h {
my $self = shift;
$self->{_macro_id_h} = shift if @_;
$self->{_macro_id_h} = {}
unless $self->{_macro_id_h};
return $self->{_macro_id_h};
}
# xort-style XML; set if an attribute is encountered
sub xort_mode {
my $self = shift;
$self->{_xort_mode} = shift if @_;
return $self->{_xort_mode};
}
#'(t1
# (foo x)
# (t2
# (bar y)))
#
# '(fk
# (table t2)
# (ftable t1))
#
# alg: store t1, then t2
# '(t1
# (foo x)
# (t1_t2
# (t2
# (bar y))))
#
# '(fk
# (table t1_t2)
# (ftable t1))
# '(fk
# (table t1_t2)
# (ftable t2))
#
#
# alg: store t1, hold on t1_t2, store t2
# '(t1
# (foo x)
# (blah
# (t2
# (bar y))))
#
# '(fk
# (table t1)
# (fktable t2)
# (fktable_alias "blah")
# (fk "blah_id"))
# alg: store t2, store t1
# if set, will ensure that tbl/col names are transformed to be safe
sub force_safe_node_names {
my $self = shift;
$self->{_force_safe_node_names} = shift if @_;
return $self->{_force_safe_node_names};
}
# recursively stores a Data::Stag tree node in the database
sub storenode {
my $self = shift;
my $node = shift;
my @args = @_;
my $dupnode = $node->duplicate;
$self->make_stag_node_dbsafe($dupnode,'')
if $self->force_safe_node_names;
$self->add_linking_tables($dupnode);
$self->_storenode($dupnode,@args);
}
sub _storenode {
my $self = shift;
my $node = shift;
my $opt = shift;
if (!$node) {
confess("you need to pass in a node!");
}
my $element = $node->element;
return unless $node->kids;
if ($element eq 'dbstag_metadata') {
my @maps = $node->get_map;
$self->mapping(\@maps);
my @links = $node->get_link;
if (@links) {
my %h =
map {
($_->sget_table => [$_->sget_from, $_->sget_to])
} @links;
$self->linking_tables(%h);
}
return;
}
# sql can be embedded as <_sql> tags
if ($element eq '_sql') {
$self->_execute_sqlnode($node);
return;
}
# check for XORT-style attributes
# if ($element eq '@') {
# # is this check required???
# $self->current_attribute_node($node);
# $self->xort_mode(1);
# return;
# }
my $current_attribute_node;
unless ($node->isterminal) {
my @kids = $node->kids;
my $changed = 0;
@kids =
map {
if ($_->element eq '@') {
$self->xort_mode(1);
$current_attribute_node = $_;
$changed = 1;
trace(0, "GOT ATTR NODE");
(); # omit
}
else {
$_; # unchanged
}
} @kids;
$node->kids(@kids) if $changed;
}
my $operation; # directive: force/update/lookup
if ($current_attribute_node){
$operation =
$current_attribute_node->sget_op;
}
trace(0, "STORING $element\n", $node->xml) if $TRACE;
my $tracenode = $self->tracenode || '';
my $tracekeyval;
if ($tracenode && $tracenode =~ /^(\w+)\/(.*)/) {
my $nn = $1;
my $tag = $2;
if ($nn eq $element) {
$tracekeyval = $node->get($tag);
}
}
my $dbh = $self->dbh;
my $dbschema = $self->dbschema;
my $is_caching_on = $self->is_caching_on($element) || 0;
my $mapping = $self->mapping || [];
# each relation has zero or one primary keys;
# primary keys are assumed to be single-column
my $pkcol = $self->get_pk_col($element);
trace(0, "PKCOL: $pkcol") if $TRACE;
# DBIx::DBSchema metadata
my $tableobj = $dbschema->table($element);
# -- PRE-STORE CHILD NON-TERMINALS --
# before storing this node, we need to
# see if we first need to store any child
# non-terminal nodes (in order to get their
# primary keys, to use as foreign keys in
# the current relation)
# store non-terminal subnodes first
my @ntnodes = $node->ntnodes;
# keep track of nodes that have been assigned xort-style
my %assigned_node_h;
# GET INFORMATION FROM SUPER-NODE
# some nodes may have been assigned by the calling process
# (eg if the supernode is refered to by a fk from the current table)
# this hash maps element names to a boolean;
# this is ONLY used in conjunction with xort-style xml
# we set this when we want to make sure that an element value is
# NOT macro-expanded by the expansion code
%assigned_node_h = %{$opt->{assigned_node_h} || {}};
# the primary key value of the supernode
my $parent_pk_id = $opt->{parent_pk_id};
# the element type of the supernode
my $parent_element = $opt->{parent_element};
# -- end of info from super-node
# PRE-STORE
# look through the current node's children;
# + some of these will be nodes that must be pre-stored BEFORE
# the current node (because the current node has a fk to them)
# + some of these will be nodes that must be post-stored AFTER
# the current node (because they have an fk to the current node)
#
# one or other of these situations must be true - otherwise
# nodes should not be nested!
my @delayed_store = (); # keep track of non-pre-stored nodes
foreach my $nt (@ntnodes) {
# First check for XORT-STYLE
# xort-style XML; nodes can be nested inside a non-terminal
# node corresponding to a FK column
# eg
#
# foo
#
#
# here, what looks like a non-terminal node should actually
# check all sub-nodes; if any of them are nonterminal and correspond
# to a column (not a table) then add the sub-node and use the pk id
# as the returned value
# note: we have to explicitly check the col is not also a table
# since some dbs (eg go db) have col names the same as tbl names
if ($self->is_col($nt->name) &&
!$nt->isterminal &&
!$self->is_table($nt->name)) {
my @kids = $nt->kids;
if (@kids != 1) {
$self->throw("non-terminal pk node should have one subnode only; ".
$nt->name." has ".scalar(@kids));
}
my $sn_val = $self->_storenode(shift @kids);
if (!defined($sn_val)) {
$self->throw("no returned value for ".$nt->name);
}
# TRANSFORM NODE: non-terminal to terminal
# replace node with return pk ID value
$nt->data($sn_val);
# do NOT try and expand the value assigned to this
# node with a xort-macro expansion later on
$assigned_node_h{$nt->name} = 1;
trace(0, "ASSIGNED NON-MACRO ID for ".$nt->name." TO $sn_val") if $TRACE;
# skip this ntnode - it is now a tnode
next;
}
# -- END OF xort-style check
# we want to PRE-STORE any ntnodes that
# are required for foreign key relationships
# within this node;
# ie this node N1 has a foreign key "fk_id" that
# points to ntnode N2.
# if there is an intermediate alias element in
# between then we need to store the ntnode too
#
# check for either of these conditions
my ($map) =
grep {
$_->get_table &&
$_->get_table eq $element &&
($_->get_fktable_alias &&
$_->get_fktable_alias eq $nt->element ||
($_->get_fktable &&
$_->get_fktable eq $nt->element && !$_->get_fktable_alias))
} @$mapping;
# check to see if sub-element has FK to this element
if (!$map) {
# my $subtable = $dbschema->table($nt->element);
my $table = $dbschema->table($element);
my $ntelement = $nt->element;
my $subpkcol = $self->get_pk_col($ntelement);
my @cns = $table->columns;
my $cn; # col name (FK in current element)
my $fcn; # foreign col name (PK in sub element)
# HACK - ASSUME NATURAL JOIN
# for example, a FK: person.dept_id => dept.dept_id
if ($subpkcol ne 'id') {
foreach (@cns) {
if ($_ eq $subpkcol) {
$cn = $_;
$fcn = $_;
}
}
}
# second chance; allow base "id" style
# for example, a FK: person.dept_id => dept.id
# via ...
if (!$cn) {
if ($subpkcol eq 'id') {
foreach (@cns) {
if ($_ eq $ntelement."_id") {
$cn = $_;
$fcn = 'id';
}
}
}
}
if ($cn) {
$map =
Data::Stag->new(map=>[
[table=>$element],
[col=>$cn],
[fktable=>$nt->element],
[fkcol=>$fcn]
]);
}
}
# if $map is set, then we have to pre-store this subnode
if ($map) {
# 1:many between this and child
# (eg this has fk to child)
# store child before this;
# use fk in this
my $fktable = $map->get_fktable;
my $col = $map->get_col || $self->get_pk_col($fktable);
# aliases map an extra table
# eg table X col X.A => Y.B
# fktable_alias = A
my $fktable_alias = $map->get_fktable_alias;
my $orig_nt = $nt;
# if we have an alias, it means the actual node
# we want to store is one beneath the alias;
# eg ..
# we want to actually store the node foo2
if ($fktable_alias) {
my @nts = $nt->sgetnode($map->sget_fktable);
if (!@nts) {
print STDERR $nt->sxpr;
confess("could not get node for: ".$map->sget_fktable);
}
if (@nts > 1) {
print STDERR $nt->sxpr;
confess("multiple nodes for: ".$map->sget_fktable);
}
$nt = shift @nts;
if (!$nt) {
print STDERR $map->sxpr;
print STDERR $orig_nt->sxpr;
confess("bad nodes for: ".$map->sget_fktable);
}
}
my $fk_id = $self->_storenode($nt);
if (!defined($fk_id)) {
confess("ASSERTION ERROR: could not get foreign key val\n".
"trying to store: $element\n".
"no fk returned when storing: $fktable");
}
trace(0, "SETTING $element.$col=$fk_id [via ".$orig_nt->element."]") if $TRACE;
$node->set($col, $fk_id);
$node->unset($orig_nt->element);
# do NOT try and expand the value assigned to this
# node with a xort-macro expansion later on
$assigned_node_h{$col} = 1;
trace(0, "ASSIGNED NON-MACRO ID for ".$col) if $TRACE;
}
else {
# 1:many between child and this
# (eg child has fk to this)
# store child after
trace(0, "WILL STORE LATER:\n", $nt->xml) if $TRACE;
$node->unset($nt->element);
push(@delayed_store, $nt);
}
# $node->unset($nt->element); # clear it
}
# --- done storing kids
# --- replace *IDs ---
# dbstag XML allows placeholder values in primary key cols
# (for now, PKs are always assumed to be autoincrement/serial ints)
# placeholder PKs get remapped to a new autogenerated ID
# all FKs referring to this get remapped too
my @tnodes = $node->tnodes; # terminal nodes mapped to columns in db
my %remap = (); # indexed by column name; new PK value
if (!$self->trust_primary_key_values) {
foreach my $tnode (@tnodes) {
# foreign keys in XORT mode - replace macro ID with
# actual database foreign key value
if ($self->is_fk_col($tnode->name) && $self->xort_mode) {
my $v = $tnode->data;
# -- CHECK FOR MACRO EXPANSION (XORT-STYLE) --
# IF this tnode was originally an ntnode that
# was collapsed to a pk val, xort style, do not
# try and map it to a previously assigned macro
# EXAMPLE:
# we start with A>>>
# we collapse too $v>
if ($assigned_node_h{$tnode->name}) {
trace(0, "ALREADY CALCULATED; not a Macro ID:$v;; in $element/".$tnode->name) if $TRACE;
# DO NOTHING
}
else { # NOT ASSIGNED
my $actual_id =
$self->macro_id_h->{$v};
if (!defined($actual_id)) {
$self->throw("XORT-style Macro ID:$v is undefined;; in $element/".$tnode->name);
}
$tnode->data($actual_id);
}
# -- END OF MACRO EXPANSION --
}
elsif ($tnode->name eq $pkcol) {
my $v = $tnode->data;
trace(0, "REMAP $pkcol: $v => ? [do not know new value yet]") if $TRACE;
$remap{$tnode->name} = $v; # map after insert/update
$node->unset($tnode->name); # discard placeholder
} else {
if ($self->is_fk_col($tnode->name)) {
# hack!! need proper FK refs...; DBSchema wont do this
my $colvalmap = $self->id_remap_idx;
#my $colvalmap = $self->get_mapping_for_col($nt->elememt);
if ($colvalmap) {
my $v = $tnode->data;
my $nv = $colvalmap->{$v};
if ($nv) {
trace(0, "remapping $v => $nv") if $TRACE;
$tnode->data($nv);
}
}
}
}
}
} # -- end of ID remapping
# --- Get columns that need updating/inserting ---
# turn all remaining tag-val pairs into a hash
my %store_hash = $node->pairs;
# All columns to be stored should be terminal nodes
# in the Stag tree; if not there is a problem
my @refcols = grep { ref($store_hash{$_}) } keys %store_hash;
if (@refcols) {
foreach (@$mapping) {
trace(0, $_->sxpr) if $TRACE;
}
confess("I can't store the current node; ".
"These elements need to be mapped via FKs: ".
join(', ', map {"\"@refcols\""} @refcols).
"\n\nPerhaps you need to specify more schema metadata?");
} # -- end of sanity check
# each relation has zero or more unique keys;
# unique keys may be compound (ie >1 column)
my @usets = $self->get_unique_sets($element);
trace(0, "USETS: ", map {"unique[ @$_ ]"} @usets) if $TRACE;
# get all the columns/fields/attributes of this relation
my @cols = $self->get_all_cols($element);
trace(0, "COLS: @cols") if $TRACE;
# store_node() will either perform an update or
# an insert. if we are performing an update, we
# need a query constraint to determine which row
# to update.
#
# this hash is used to determine the key/val pairs
my %unique_constr;
# this is the value of the primary key of
# the inserted/update row
my $id;
# if this relation has a primary key AND the stag node
# being stored has the value of this column set, THEN
# use this as the update constraint
if (0 && $pkcol) {
my $pk_id;
$pk_id = $node->get($pkcol);
if ($pk_id) {
# unset the value of the pk in the node; there
# is no point setting this in the UPDATE as it
# is already part of the update constraint
$node->unset($pkcol);
# set the update constraint based on the PK value
%unique_constr = ($pkcol => $pk_id);
# return this value at the end
$id = $pk_id;
trace(0, "SETTING UPDATE CONSTR BASED ON PK $pkcol = $pk_id") if $TRACE;
}
} # -- end of xxxx
# foreach my $sn ($node->kids) {
# my $name = $sn->element;
# my $nu_id = $self->id_mapping($name, $sn->data);
# # do the old 2 nu mapping
# # (the ids in the xml are just temporary
# # for internal consistency)
# $sn->data($nu_id) if $nu_id;
# }
if (0) {
# ---- EXPERIMENTAL ----
# if no unique keys are provided, assume that all
# non-PK columns together provide a compound unique key
# <> expedient for now!
if (!@usets) {
@usets = ( [grep {$_ ne $pkcol} @cols] );
}
}
if ($pkcol) {
# make single PK the first unique key set;
# add to beginning as this is the most efficient
unshift(@usets, [$pkcol]);
}
# get the column to select to get the pk for this element
my $select_col = $pkcol;
# -------- find update constraint by unique keys ----
# if the unique_constr hash is set, we know we
# are doing an UPDATE, and we know the query
# constraint that will be used;
#
# otherwise loop through all unique keys; if
# all the columns in the key are set, then we
# can safely use this unique key as the update
# constraint.
# if no update constraint can be found, this node
# is presumed not to exist in the DB and an INSERT
# is performed
foreach my $uset (@usets) {
# we already know & have the primary key
last if %unique_constr;
# if we are loading up a fresh/blank slate
# database then we don't need to check for
# existing tuples, as everything should
# have been inserted/updated this session
if ($self->policy_freshbulkload) {
next;
}
# if an xort-style attribute has op=insert
# this is the same as a bulkload
if ($operation && $operation eq 'insert') {
next;
}
# already tried PK
# if (scalar(@$uset) == 1 &&
# $uset->[0] eq $pkcol) {
# next;
# }
trace(0, "TRYING USET: ;@$uset; [pk=$pkcol]") if $TRACE;
# get the values of the unique key columns;
# %constr is a candidate unique key=>val mapping
my %constr =
map {
my $v = $node->sget($_);
$_ => $v
} @$uset;
# each column in the unique key must be
# non-NULL; try the next unique key if
# this one is unsuitable
# -- COMMENTED OUT cjm 20041012
# mysql auto-creates defaults for non-null fields;
# we cannot use this code:
# UNCOMMENTED 20050304
# -- make null value part of the key
# -- ADDED 20041012 - make null 0/''
foreach (keys %constr) {
# in pg, pk cols are sequences with defaults nextval
# skip these
next if $self->is_pk_col($_);
if (!defined($constr{$_})) {
if ($self->is_fk_col($_)) {
# if xort-style, the container may be an
# implicit foreign key
# TODO: check element
if ($parent_pk_id) {
trace(0, "USING PARENT ELEMENT: $_ => $parent_pk_id");
$constr{$_} = $parent_pk_id;
}
}
else {
my $colobj = $tableobj->column($_);
my $default_val = $colobj->default;
my $col_type = $colobj->type;
if (defined $default_val) {
# problem with DBIx::DBSchema
if ($default_val =~ /^\'(.*)\'::/) {
trace(0, "FIXING DEFAULT: $default_val => $1") if $TRACE;
$default_val = $1;
}
if (($col_type =~ /^int/ || $col_type =~ /float/) && $default_val eq '') {
# this SHOULDN'T be necessary, but appears to be required for
# some configuartions. DBSchema problem?
$default_val=0;
}
if (ref($default_val)) {
# In new versions of DBIx::DBSchema (0.38, possibly older versions),
# this appears to be a reference
$default_val = $$default_val;
if ($default_val eq "''") {
$default_val = '';
}
}
$constr{$_} = $default_val;
trace(0, "USING DEFAULT[type=$col_type] $_ => \"$constr{$_}\"") if $TRACE;
}
}
}
}
# TODO: check cases eg dbxref in chado; null default values...?
next if grep { !defined($_) } values %constr;
%unique_constr = %constr;
if (!$select_col && @$uset == 1) {
$select_col = $uset->[0];
}
trace(0, "GOT unique_constr, select_col=$select_col") if $TRACE;
last;
}
# -- END OF @usets --
# %unique_constr is set; a mapping for a unique key colset
# if this is not set, then we must insert
if (%unique_constr) {
# -- IN-MEMORY CACHING --
# check if we have already updated/inserted
# this tuple this session; and if so, what
# the update constraint used was
if ($is_caching_on == 1 || $is_caching_on == 3) {
#$self->throw("no select col for $element") unless $select_col;
# fetch values of unique_constr from cache
my %cached_colvals =
%{$self->query_cache($element,
\%unique_constr)
|| {}};
# have we stored anything with uniq key %unique_constr before?
if (%cached_colvals) {
if ($pkcol) {
$id = $cached_colvals{$pkcol};
if ($id) {
# use the cached pk id for efficiency
#%unique_constr = {$pkcol => $id};
trace(0, "CACHED $pkcol = $id") if $TRACE;
}
else {
trace(0, "NO CACHED COLVAL FOR $pkcol :: ".
join("; ",map {"$_ = $cached_colvals{$_}"} keys %cached_colvals)) if $TRACE;
}
}
# yes - has it changed?
foreach my $col (keys %cached_colvals) {
if ($cached_colvals{$col} && $store_hash{$col} &&
$cached_colvals{$col} && $store_hash{$col}) {
# don't bother re-storing anything
delete $store_hash{$col};
}
}
if (%store_hash) {
my @x = keys %store_hash;
trace(0, "WILL STORE: @x") if $TRACE;
}
else {
trace(0, "UNCHANGED - WILL NOT STORE; store_hash empty") if $TRACE;
}
}
else {
}
}
# -- END OF CACHING CHECK --
# -- GET PK VAL $id BASED ON unique_constr --
# (we may already have this based on memory-cache)
if (!$id) {
# the input node contains all the keys in %update_constr
# - check to see if this relation exists in the DB
my $vals;
if ($is_caching_on >= 2) {
$vals = [];
}
else {
my $sql =
$self->makesql($element,
\%unique_constr,
$select_col);
trace(0, "SQL: $sql") if $TRACE;
$vals =
$dbh->selectcol_arrayref($sql);
}
if (@$vals) {
# yes it does exist in DB; check if there is a
# pkcol - if there is, it means we can do an
# update and
if ($pkcol && $select_col && $select_col eq $pkcol) {
# this is the value we return at the
# end
$id = $vals->[0];
if ($remap{$pkcol}) {
#my $colvalmap = $self->get_mapping_for_col($pkcol);
my $colvalmap = $self->id_remap_idx;
#my $colvalmap = $self->get_mapping_for_col($element);
$colvalmap->{$remap{$pkcol}} = $id;
trace(0, "COLVALMAP $pkcol $remap{$pkcol} = $id") if $TRACE;
}
}
else {
# $id not set, but we will later perform an update anyway
}
}
else {
# this node is not in the DB; force insert
%unique_constr = ();
}
}
} # end of get pk val
# ---- UPDATE OR INSERT -----
# at this stage we know if we are updating
# or inserting, depending on whether a suitable
# update constraint has been found
my $this_op;
if (%unique_constr) {
$this_op = 'update';
}
else {
$this_op = 'insert';
}
if (defined $operation) {
if ($operation eq 'force') {
$operation = $this_op;
}
else {
# update/lookup/insert
# insert: already dealt with
}
}
else {
$operation = $this_op;
}
if ($operation eq 'replace') {
# replace = delete followed by insert
if (%unique_constr) {
$self->deleterow($element,\%unique_constr);
}
else {
$self->throw("Cannot find row to delete it:\n".$node->xml);
}
$operation = 'insert';
}
if ($operation eq 'update') {
# ** UPDATE **
if ($self->noupdate_h->{$element}) {
if ($tracekeyval) {
printf STDERR "NOUPDATE: $tracenode = $tracekeyval\n"
}
trace(0, sprintf("NOUPDATE on %s OR child nodes (We have %s)",
$element,
join('; ',values %unique_constr)
)) if $TRACE;
# don't return yet; there are still the delayed nodes
##return $id;
}
else {
# if there are no fields modified,
# no change
foreach (keys %unique_constr) {
# no point setting any column
# that is part of the update constraint
delete $store_hash{$_};
}
# only update if there are cols set that are
# not part of unique constraint
if (%store_hash) {
if ($tracekeyval) {
printf STDERR "UPDATE: $tracenode = $tracekeyval\n"
}
$self->updaterow($element,
\%store_hash,
\%unique_constr);
# -- CACHE RESULTS --
if ($is_caching_on == 1 || $is_caching_on == 3) {
$self->update_cache($element,
\%store_hash,
\%unique_constr);
}
}
else {
trace(0, sprintf("NOCHANGE on %s (We have %s) id=$id",
$element,
join('; ',values %unique_constr)
)) if $TRACE;
if ($tracekeyval) {
print STDERR "NOCHANGE: $tracenode = $tracekeyval\n"
}
}
}
} elsif ($operation eq 'insert') {
# ** INSERT **
if (%store_hash) {
$id =
$self->insertrow($element,
\%store_hash,
$pkcol);
if (!$id) {
# this only happens if $self->force(1) is set
if (@delayed_store) {
print STDERR "Insert on \"$element\" did not return a primary key ID.\n Possible causes: sequence not define [Pg]?\n";
if ($self->force) {
return;
}
else {
confess("non-recoverable error");
}
}
return;
}
if ($tracekeyval) {
printf STDERR "INSERT: $tracenode $tracekeyval [val = $id]\n"
}
if ($pkcol) {
if ($remap{$pkcol}) {
my $colvalmap = $self->id_remap_idx;
#my $colvalmap = $self->get_mapping_for_col($element);
$colvalmap->{$remap{$pkcol}} = $id;
trace(0, "colvalmap $remap{$pkcol} = $id") if $TRACE;
}
}
# -- CACHE RESULTS --
if ($is_caching_on) {
my %cache_hash = %store_hash;
if ($pkcol) {
$cache_hash{$pkcol} = $id;
}
$self->insert_into_cache($element,
\%cache_hash,
\@usets);
trace(0, "CACHING: $element") if $TRACE;
}
}
}
elsif ($operation eq 'delete') {
if (%unique_constr) {
$self->deleterow($element,\%unique_constr);
}
else {
$self->throw("Cannot find row to delete it (perhaps unique constraint not satisfied?):\n".$node->xml);
}
}
elsif ($operation eq 'lookup') {
# lookup: do nothing, already have ID
if (!$id) {
$self->throw("lookup: no ID; could not find this node in db (perhaps unique constraint not satisfied?) %s:\n",$node->xml);
}
}
else {
$self->throw("cannot do op: $operation");
} # -- end of UPDATE/INSERT/LOOKUP
# -- DELAYED STORE --
# Any non-terminal child nodes of the current one have
# some kind of foreign key relationship to the current
# relation. Either it is 1:many or many:1
#
# if the relation for the child node has a foreign key
# into the current relation, we need to store the current
# relation first to get the current relation's primary key.
#
# we have already done this, so now is the time to store
# any of these child nodes
if (@delayed_store) {
foreach my $sn (@delayed_store) {
my $fk; # foreign key column in subtable
my $snname = $sn->name; # subtable name
# if a mapping is used (eg in metadata), then
# this takes priority
foreach (@$mapping) {
if ($_->name eq 'parentfk' &&
$_->get_table eq $snname) {
$fk = $_->get_col;
}
}
# no mapping, by default use the current nodes primary
# key (this assumes eg person.address_id is a fk to
# a table with pk address_id; we will check and possibly
# override this later)
if (!$fk) {
$fk = $pkcol;
}
# HACK!!
# Some databases (eg GO Database) use 'id' for pk col
# names; fks to this table will be of form _id
if ($fk eq 'id') {
$fk = $element . '_id';
}
# --SET SUBNODE FK--
# it is necessarily true that each delayed-store subnode
# must have some fk relationship back to the existing one
# the subnode has a fk relation up to this one;
# by default we assume that the subnode fk column is named
# the same as the current pk. However, we check that this
# is the case. If not, we deduce what the correct fk col is
my $subtable =
$dbschema->table($snname);
if ($subtable->column($fk)) {
# a fk col with the name as the current node pk col exists;
# use it
# do nothing - current value of $fk is fine
}
else {
# deduce actual fk column
# there should only be ONE subnode fk column UNSET;
# this implicitly refers to the current node
my @subcolumns = $subtable->columns;
my @potential_fks = ();
foreach my $subcolumn (@subcolumns) {
if ($self->is_fk_col($subcolumn) &&
!$self->is_pk_col($subcolumn)) {
# Definite foreign key
if (defined $sn->sget($subcolumn)) {
# already set
}
else {
push(@potential_fks, $subcolumn);
}
}
}
trace(0, "POTENTIAL FKS: @potential_fks");
if (!@potential_fks) {
$self->throw("I do not know what to do with the current ".
"pl val ($id). There does not appear to be ".
"a $fk column in $snname, and all fks in ".
"the subtable $snname are currently set");
}
if (@potential_fks > 1) {
$self->throw("There appear to be multiple potential fks ".
"[ @potential_fks ]. I do not know which ".
"to choose to assign the current pk val $id".
" to");
}
$fk = shift @potential_fks;
}
# -- $fk value is set
$sn->set($fk, $id);
# -- $fk table assigned
trace(0, "NOW TIME TO STORE [curr pk val = $id] [fkcol = $fk] ", $sn->xml) if $TRACE;
# store subnode, passing in info on current node
$self->_storenode($sn,{parent_pk_id=>$id,
parent_element=>$element,
assigned_node_h=>{$fk=>1}});
}
} # -- end of @delayed_store
if ($current_attribute_node) {
if ($id) {
my $macro_id = $current_attribute_node->sget_id;
if ($macro_id) {
$self->macro_id_h->{$macro_id} = $id;
trace(0, "SETTING MACRO ID MAP: $macro_id => $id") if $TRACE;
}
else {
}
}
}
return $id;
}
# --SQL directives embedded in XML--
sub _execute_sqlnode {
my $self = shift;
my $sqlnode = shift;
if ($sqlnode->element eq '_sql') {
my $dbh = $self->dbh;
my $op = $sqlnode->get('@/op');
my $col = $sqlnode->get('@/col');
my $table = $sqlnode->get('@/from');
my $match = $sqlnode->get('@/match');
my @subnodes = grep {$_->element ne '@'} $sqlnode->kids;
if ($op eq 'delete') {
my $pkey = $sqlnode->get('@/pkey');
trace(0,"deleting from $table");
my @vals = map {$self->_execute_sqlnode($_)} @subnodes;
# do iteratively rather than in 1 SQL stmt
if (@vals) {
my $sql =
sprintf("SELECT $pkey FROM $table WHERE $match IN (%s)",
join(", ",@vals));
trace(0, "SQL: $sql");
my $ids_to_delete =
$dbh->selectcol_arrayref($sql); # quote
foreach my $id (@$ids_to_delete) {
my $delete_sql =
"DELETE FROM $table WHERE $pkey=$id";
trace(0,"SQL: $delete_sql");
$dbh->do($delete_sql);
}
}
}
elsif ($op eq "select") {
my @vals = $sqlnode->get('.');
my $sql =
sprintf("SELECT $col FROM $table WHERE $match IN (%s)",
join(", ",map {$dbh->quote($_)} @vals));
trace(0, "SQL: $sql");
my $ids =
$dbh->selectcol_arrayref($sql);
trace(0,"id list in select: @$ids");
return(@$ids);
}
else {
$self->throw("Do not understand SQL directive: $op")
}
}
else {
return $sqlnode->data;
}
return;
}
sub _process_sql {
my $self = shift;
my $node = shift;
my $element = $node->element;
if ($element eq 'in') {
}
else {
$self->throw("Do not understand SQL directive: $element")
}
}
# -- QUERYING --
sub rmake_nesting {
my $node = shift;
if ($node->element eq 'composite') {
my $first = $node->getnode_first;
my $second = $node->getnode_second;
my $head = rmake_nesting($first->data->[0]);
my $tail = rmake_nesting($second->data->[0]);
if ($head->isterminal) {
return
Data::Stag->new($head->element => [$tail]);
}
$head->addkid($tail);
return $head;
}
elsif ($node->element eq 'leaf') {
my $alias = $node->get_alias;
my $tn = $alias || $node->get_name;
return Data::Stag->new($tn=>1);
}
else {
die;
}
}
# if true, a metadata tag will be added to stag nodes selected from db
sub include_metadata {
my $self = shift;
$self->{_include_metadata} = shift if @_;
return $self->{_include_metadata};
}
# last SQL SELECT statement executed
sub last_stmt {
my $self = shift;
$self->{_last_stmt} = shift if @_;
return $self->{_last_stmt};
}
sub last_sql_and_args {
my $self = shift;
$self->{_last_sql_and_args} = shift if @_;
return $self->{_last_sql_and_args};
}
sub sax_handler {
my $self = shift;
$self->{_sax_handler} = shift if @_;
return $self->{_sax_handler};
}
# delegates to selectall_stag and turns tree to XML
sub selectall_xml {
my $self = shift;
my $stag = $self->selectall_stag(@_);
return $stag->xml;
}
# delegates to selectall_stag and turns tree to SAX
# (candidate for optimisation - TODO - use event firing model)
sub selectall_sax {
my $self = shift;
my ($sql, $nesting, $h) =
rearrange([qw(sql nesting handler)], @_);
my $stag = $self->selectall_stag(@_);
$h = $h || $self->sax_handler;
if (!$h) {
$self->throw("You must specify the sax handler;\n".
"Either use \$dbh->sax_handler(\$h), or \n".
"\$dbh->selectall_sax(-sql=>\$sql, handler->\$h)");
}
return $stag->sax($h);
}
# delegates to selectall_stag and turns tree to S-Expression
sub selectall_sxpr {
my $self = shift;
my $stag = $self->selectall_stag(@_);
return $stag->sxpr;
}
# does not bother decomposing and nesting the results; just
# returns the denormalised table from the SQL query.
# arrayref of arrayrefs - rows x cols
# first row of rows is column headings
sub selectall_rows {
my $self = shift;
my ($sql, $nesting, $bind, $template) =
rearrange([qw(sql nesting bind template)], @_);
my $rows =
$self->selectall_stag(-sql=>$sql,
-nesting=>$nesting,
-bind=>$bind,
-template=>$template,
-return_arrayref=>1,
);
return $rows;
}
# ---------------------------------------
# selectall_stag(sql, nesting)
#
# Takes an sql string containing a SELECT statement,
# parses it to get the tree structure; this can be
# overridden with the nesting optional argument.
#
# The SELECT statement is executed, and the relations are
# transformed into a stag tree
#
# ---------------------------------------
sub selectall_stag {
my $self = shift;
my ($sql, $nesting, $bind, $template, $return_arrayref, $include_metadata, $aliaspolicy) =
rearrange([qw(sql nesting bind template return_arrayref include_metadata aliaspolicy)], @_);
my $prep_h = $self->prepare_stag(@_);
my $cols = $prep_h->{cols};
my $sth = $prep_h->{sth};
my $exec_args = $prep_h->{exec_args};
if (!defined($include_metadata)) {
$include_metadata = $self->include_metadata;
}
# TODO - make this event based so we don't have to
# load all into memory
my $rows =
$self->dbh->selectall_arrayref($sth, undef, @$exec_args);
if ($return_arrayref) {
my @hdrs = ();
for (my $i=0; $i<@$cols; $i++) {
my $h = $prep_h->{col_aliases_ordered}->[$i] || $cols->[$i];
push(@hdrs, $h);
}
return [\@hdrs, @$rows];
}
trace(0, sprintf("Got %d rows\n", scalar(@$rows))) if $TRACE;
# --- reconstruct tree from relations
my $stag =
$self->reconstruct(
-rows=>$rows,
-cols=>$cols,
-alias=>$prep_h->{alias},
-nesting=>$prep_h->{nesting},
-aliaspolicy=>$aliaspolicy,
);
if ($include_metadata) {
my ($last_sql, @sql_args) = @{$self->last_sql_and_args || []};
my @kids = $stag->kids;
my @bind_nodes;
if ($bind && ref($bind) eq 'HASH') {
@bind_nodes = (stag_unflatten(argset=>[%$bind]));
}
unshift(@kids,
[dbstag_metadata=>[
[sql=>$last_sql],
[nesting=>$nesting],
[template=>$template],
@bind_nodes,
(map {[exec_arg=>$_]} @sql_args)
]]);
$stag->kids(@kids);
}
return $stag;
}
sub prepare_stag {
my $self = shift;
my ($sql, $nesting, $bind, $template, $return_arrayref, $aliaspolicy) =
rearrange([qw(sql nesting bind template return_arrayref aliaspolicy)], @_);
my $parser = $self->parser;
my $sth;
my @exec_args = ();
if (ref($sql)) {
$template = $sql;
}
if ($template) {
if (!ref($template)) {
$template = $self->find_template($template);
}
($sql, @exec_args) = $template->get_sql_and_args($bind);
}
trace 0, "parsing_sql: $sql\n";
# PRE-parse SQL statement for stag-specific extensions
if ($sql =~ /(.*)\s+use\s+nesting\s*(.*)/si) {
my ($pre, $post) = ($1, $2);
my ($extracted, $remainder) =
extract_bracketed($post, '()');
if ($nesting) {
$self->throw("nestings clash: $nesting vs $extracted");
}
$nesting = Data::Stag->parsestr($extracted);
$sql = "$pre $remainder";
}
# get the parsed SQL SELECT statement as a stag node
my $stmt = $parser->selectstmt($sql);
if (!$stmt) {
# there was some error parsing the SQL;
# DBI can probably give a better explanation.
eval {
my $sth = $self->dbh->prepare($sql);
};
if ($@) {
$self->throw("SQL ERROR:\n$@");
}
# DBI accepted it - must be a bug in the DBStag grammar
$self->throw("I'm sorry but the SQL statement you gave does\n".
"not conform to the more limited subset of SQL\n".
"that DBStag supports. Please see the DBStag docs\n".
"for details.\n".
"\n".
"Remember to check you explicitly declare all aliases\n".
"using AS\n\n\nSQL:$sql");
}
trace 0, "parsed_sql: $sql\n";
# trace 0, $stmt->xml;
my $dbschema = $self->dbschema;
$self->last_stmt($stmt);
# stag node of FROM part of SQL
my $fromstruct = $stmt->get_from;
# --- aliases ---
# keep a hash of table aliases
# KEY: table alias
# VAL: base table
# for example, 'SELECT * FROM person AS p'
# will result in $alias_h = { p => person }
my $alias_h = {};
# build alias hash using FROM node
foreach my $sn ($fromstruct->subnodes) {
get_table_alias_map($sn, $alias_h);
}
# as well as an alias hash map,
# keep an array of stag nodes representing all the aliases
my @aliases = ();
foreach my $alias (keys %$alias_h) {
push(@aliases,
Data::Stag->new(alias=>[
[name=>$alias],
[table=>$alias_h->{$alias}->[0]]
]));
}
my $aliasstruct = Data::Stag->new(alias=>[@aliases]);
# --- nestings ---
#
# the cartesian product that results from a SELECT can
# be turned into a tree - there is more than one tree to
# choose from; eg with "x NJ y NJ z" we can have trees:
# [x [y [z]]]
# [x [y z]]
# [z [x y]]
# etc
#
# the actual allowed nestings through the graph is constrained
# by the FK relationships; we do not utilise this yet (TODO!)
# later the user need only specify the root. for now they
# must specify the full nesting OR allow the bracket structure
# of the joins...
# if the user did not explicitly supply a nesting,
# guess one from the bracket structure of the FROM
# clause (see rmake_nesting)
# [TODO: be more clever in guessing the nesting using FKs]
if (!$nesting) {
$nesting = Data::Stag->new(top=>1);
# my $cons = rmake_cons($fromstruct->data->[0], $nesting);
$nesting = rmake_nesting($fromstruct->data->[0]);
$nesting = Data::Stag->new(top=>[$nesting]);
trace(0, "\n\nNesting:\n%s\n\n", $nesting->xml) if $TRACE;
}
if ($nesting && !ref($nesting)) {
$nesting = Data::Stag->parsestr($nesting);
}
# keep an array of named relations used in the query -
# the named relation is the alias if present;
# eg
# SELECT * FROM person AS p NATURAL JOIN department
# the named relations here are 'p' and 'department'
my @namedrelations = ();
$fromstruct->iterate(sub {
my $n = shift;
if ($n->element eq 'leaf') {
my $v = $n->sget_alias || $n->sget_name;
push(@namedrelations, $v)
}
});
# --- fetch columns ---
#
# loop through all the columns in the SELECT clause
# making them all of a standard form; eg dealing
# with functions and '*' wildcards appropriately
my @col_aliases_ordered = ();
my @cols =
map {
# $_ iterator variable is over the columns
# specified in the SELECT part of the query;
# each column is represented as a stag node
# column name
my $name = $_->get_name;
# column alias, if exists
# eg in 'SELECT name AS n' the alias is 'n'
my $col_alias = $_->get_alias;
push(@col_aliases_ordered, $col_alias);
# make the name the alias; prepend named relation if supplied.
# eg in 'SELECT person.name AS n' the name will become
# 'person.n'
if ($col_alias) {
$name = $col_alias;
if ($_->get_table) {
$name = $_->get_table . '.'. $name;
}
}
my $func = $_->getnode('func');
# from here on determines returned value of the
# map iteration:
if ($func) {
# a typical column node for a function looks like
# this:
#
# (col
# (func
# (name "somefunc")
# (args
# (col
# (name "x.foo")
# (table "x")))))
# (alias "myname"))
#
# if a function is included, and the function
# return value is aliased, use that alias;
# otherwise ...
my $funcname = $func->get_name;
# query the function stag node for the element
# 'col'
my ($col) =
$func->where('col',
sub {shift->get_table});
my $table = $col_alias || $funcname;
if (!$col_alias) {
$col_alias = $funcname;
}
if ($col) {
$table = $col->get_table;
}
# if ($col_alias =~ /(\w+)__(\w+)/) {
# $table = $1;
# $col_alias = $2;
# }
$name = $table . '.' . $col_alias;
# return:
$name;
}
elsif ($name =~ /^(\w+)\.\*$/) {
# if the column name is of the form
# RELATION.*, then replace the * with
# all the actual columns from the base relation
# RELATION
#
# the final result will be TABLE.col1, TABLE.col2,...
my $tn = $1;
my $tn_alias = $tn;
# use base relation name to introspect schema
if ($alias_h->{$tn}) {
$tn = $alias_h->{$tn}->[0];
}
my $tbl = $dbschema->table(lc($tn));
if (!$tbl) {
confess("No such table as $tn");
}
# introspect schema to get columns for this table
my @cns = $tbl->columns;
# trace(0, Dumper $tbl) if $TRACE;
trace(0, "TN:$tn ALIAS:$tn_alias COLS:@cns") if $TRACE;
# return:
map { "$tn_alias.$_" } @cns;
}
elsif ($name =~ /^\*$/) {
# if the column name is '*' (ie select all)
# then replace the * with
# all the actual columns from the base relations in
# the query (use FROM clause)
#
my %got = ();
my @allcols =
map {
my $tn = $_;
my $baserelname =
$alias_h->{$tn} ?
$alias_h->{$tn}->[0] : $tn;
my $tbl = $dbschema->table(lc($baserelname));
if (!$tbl) {
confess("Don't know anything about table:$tn\n".
"Maybe DBIx::DBSchema does not work for your DBMS?\n".
"If $tn is a view, you may need to modify DBIxLLDBSchema");
}
my @cns = $tbl->columns;
# @cns = grep { !$got{$_}++ } @cns;
map { "$tn.$_"} @cns;
} @namedrelations;
# This is a bit hacky; if the user specifies
# SELECT * FROM... then there is no way
# to introspect the actual column returned
# using DBI->selectall_arrayref
#
# maybe we should selectall_hashref
# instead? this is generally slower; also
# even if we get it with a hashref, the
# result can be ambiguous since DBI only
# gives us the colun names back
#
# to get round this we just replace the *
# in the user's query (ie in the actual SQL)
# with the full column list
my $replace = join(', ', @allcols);
# rewrite SQL statement; assum only one instance of
# string '*' in these cases
$sql =~ s/\*/$replace/;
# return:
@allcols;
}
else {
# no * wildcard in column, and not a function;
# just give back the node
# return:
$name
}
} $stmt->sgetnode_cols->getnode_col;
@cols =
map {
if (/(\w+)__(\w+)/) {
"$1.$2";
}
else {
$_
}
} @cols;
# ---- end of column fetching ---
trace(0, "COLS:@cols") if $TRACE;
# --- execute SQL SELECT statement ---
if ($template) {
$sth = $template->cached_sth->{$sql};
if (!$sth) {
$sth = $self->dbh->prepare($sql);
$template->cached_sth->{$sql} = $sth;
}
# ($sql, $sth, @exec_args) =
# $template->prepare($self->dbh, $bind);
}
else {
$sth = $self->dbh->prepare($sql);
}
my $sql_or_sth = $sql;
if ($sth) {
$sql_or_sth = $sth;
}
trace(0, "SQL:$sql") if $TRACE;
trace(0, "Exec_args: @exec_args") if $TRACE && @exec_args;
$self->last_sql_and_args([$sql, @exec_args]);
return
{
sth=>$sth,
exec_args=>\@exec_args,
cols=>\@cols,
col_aliases_ordered=>\@col_aliases_ordered,
alias=>$aliasstruct,
nesting=>$nesting
};
}
# ============================
# get_table_alias_map(tablenode, alias hash)
#
# checks a tablenode (eg the stag representing
# a table construct in the FROM clause) and adds
# it to the alias hash if it specifies an alias
# ============================
sub get_table_alias_map {
my $s = shift;
my $h = shift;
# the FROM clause is natively stored as a binary tree
# (in order to group the joins by brackets) - recursively
# descend building the hash map
if ($s->name eq 'leaf') {
my $alias = $s->get_alias;
if ($alias) {
$h->{$alias} = [$s->get_name];
}
return ($s->get_name);
}
elsif ($s->name eq 'composite') {
my ($first, $second) =
($s->getnode_first,
$s->getnode_second);
my $alias = $s->get_alias;
my @sn = ($first->subnodes, $second->subnodes);
my @subtbls = map {
get_table_alias_map($_, $h),
} @sn;
if ($alias) {
$h->{$alias} = [@subtbls];
}
return @subtbls;
}
else {
confess $s->name;
}
}
# ============================
# reconstruct(schema, rows, top, cols, constraints, nesting, aliasstruct)
#
# mainly called by: selectall_stag(...)
#
# takes an array of rows (ie the result of an SQL query, probably
# involving JOINs, which is a denormalised relation) and
# decomposes this relation into a tree structure
#
# in order to do this, it requires schema information, and a nesting
# through the implicit result graph to build a tree
# ============================
sub reconstruct {
my $self = shift;
my $tree = Data::Stag->new();
my ($schema, # OPTIONAL - meta data on relation
$rows, # REQUIRED - relation R - array-of-array
$top, # OPTIONAL - root node name
$cols, # REQUIRED - array of stag nodes per column of R
$constraints, # NOT USED!!!
$nesting, # REQUIRED - tree representing decomposed schema
$aliasstruct, # OPTIONAL - renaming of columns in R
$aliaspolicy) =
rearrange([qw(schema
rows
top
cols
constraints
nesting
alias
aliaspolicy)], @_);
$aliaspolicy = 'nest' unless $aliaspolicy;
# --- get the schema ---
#
# $schema is a stag representing the schema
# of the input releation R (not the schema of
# the db that produced it.... hmm, this could
# be misleading)
#
# it conforms to the following stag-struct:
#
#'(schema
# (top? "RECORDSET-ELEMENT-NAME")
# (cols?
# (col+
# (relation "RELATION-NAME")
# (name "COLUMN-NAME")
# ))
# (nesting?
# (* "NESTING-TREE")))
#
# each column represents the
if (!$schema) {
$schema = $tree->new(schema=>[]);
}
if (!ref($schema)) {
# it is a string - parse it
# (assume sxpr)
$schema = $tree->from('sxprstr', $schema);
}
# TOP - this is the element name
# to group the structs under.
# [override if specified explicitly]
if ($top) {
stag_set($schema, 'top', $top);
}
# $top = $schema->get_top || "set";
if (!$top) {
if ($nesting) {
# use first element in nesting
$top = $nesting->element;
}
else {
$top = 'set';
}
}
my $topstruct = $tree->new($top, []);
# COLS - this is the columns (attribute names)
# in the order they appear
# [override if specified explicitly]
if ($cols) {
my @ncols =
map {
if (ref($_)) {
$_
}
else {
# presume it's a string
# format = RELATION.ATTRIBUTENAME
if (/(\w+)\.(\w+)/) {
$tree->new(col=>[
[relation=>$1],
[name=>$2]]);
}
elsif (/(\w+)/) {
confess("Not implemented yet - must specify tbl for $_");
$tree->new(col=>[
[relation=>'unknown'],
[name=>$2]]);
}
else {
confess "I am confused by this column: $_";
}
}
} @$cols;
$schema->set_cols([@ncols]);
}
# NESTING - this is the tree structure in
# which the relations are structured
# [override if specified explicitly]
if ($nesting) {
if (ref($nesting)) {
}
else {
$nesting = $tree->from('sxprstr', $nesting);
}
$schema->set_nesting([$nesting]);
}
else {
$nesting = $schema->sgetnode_nesting;
}
if (!$nesting) {
confess("no nesting!");
}
# --- alias structure ---
#
# use this to get a hash map of alias => baserelation
($aliasstruct) = $schema->getnode_aliases unless $aliasstruct;
if ($aliasstruct && !ref($aliasstruct)) {
$aliasstruct = $tree->from('sxprstr', $aliasstruct);
}
my @aliases = ();
if ($aliasstruct && $aliaspolicy !~ /^a/i) {
@aliases = $aliasstruct->getnode_alias;
}
my %alias2baserelation =
map {
$_->sget_name => $_->sget_table
} @aliases;
# column headings; (ie all columns in R)
my @cols = $schema->sgetnode_cols->getnode_col();
# --- primary key info ---
# set the primary key for each relation (one per relation);
# the default is *all* the columns in that relation
my %pkey_by_relationname = (); # eg {person => [person_id]
my %cols_by_relationname = (); # eg {person => [person_id, fname, lname]
# loop through all columns in R, setting above hash maps
foreach my $col (@cols) {
# the stag struct for each $col looks like this:
#
# (col+
# (relation "RELATION-NAME")
# (name "COLUMN-NAME")
# ))
my $relationname = $col->get_relation;
my $colname = $col->get_name;
# pkey defaults to all columns in a relation
# (we may override this later)
$pkey_by_relationname{$relationname} = []
unless $pkey_by_relationname{$relationname};
push(@{$pkey_by_relationname{$relationname}},
$colname);
# all columns in a relation
# (note: same as default PK)
$cols_by_relationname{$relationname} = []
unless $cols_by_relationname{$relationname};
push(@{$cols_by_relationname{$relationname}},
$colname);
}
my @relationnames = keys %pkey_by_relationname;
# override PK if explicitly set as a constraint
my @pks = $schema->findnode("primarykey");
foreach my $pk (@pks) {
# $pk looks like this:
#
# '(primarykey
# (relation "R-NAME")
# (col+ "COL-NAME"))
my $relationname = $pk->get_relation;
my @cols = $pk->get_col;
# the hash %pkey_by_relationname should
# be keyed by the named relations, not the
# base relations
my @aliasnames =
grep {
$alias2baserelation{$_} eq $relationname
} keys %alias2baserelation;
# relation is not aliased
if (!@aliasnames) {
@aliasnames = ($relationname);
}
foreach (@aliasnames) {
$pkey_by_relationname{$_} = [@cols];
}
}
# ------------------
#
# loop through denormalised rows,
# putting the columns into their
# respecive relations
#
# eg
#
# <----- a -----> <-- b -->
# a.1 a.2 a.3 b.1 b.2
#
# algorithm:
# use nesting/tree to walk through
#
# ------------------
#~~~ keep a hash of all relations by their primary key vals
#~~~ outer key = relationname
#~~~ inner key = pkval
#~~~ hash val = relation structure
#~~~ my %all_relation_hh = ();
#~~~ foreach my $relationname (@relationnames) {
#~~~ $all_relation_hh{$relationname} = {};
#~~~ }
#~~~ keep an array of all relations
#~~~ outer key = relationname
#~~~ inner array = ordered list of relations
#~~~ my %all_relation_ah = ();
#~~~ foreach my $relationname (keys %pkey_by_relationname) {
#~~~ $all_relation_ah{$relationname} = [];
#~~~ }
# start at top of nesting tree
#
# a typical nesting tree may look like this:
#
# '(tableA
# (tableB "1")
# (tableC
# (tableD "1")))
#
# terminals ie "1" are ignored
my ($first_in_nesting) = $nesting->subnodes;
if (!$first_in_nesting) {
$first_in_nesting = $nesting;
}
my $fipname = $first_in_nesting ? $first_in_nesting->name : '';
# recursive hash representing tree
#
# $record =
# {child_h => {
# $relation_name* => {
# $pk_val => $record
# }
# },
# struct => $stag_obj
# }
#
# this is recursively constructed using the make_a_tree() method
# below. the nesting tree (see above) is traversed depth first,
# constructing both the child_h hash and the resulting Stag
# structure.
my $top_record_h =
{
child_h=>{ $fipname ? ($fipname=>{}) : () },
struct=>$topstruct
};
# loop through rows in R
foreach my $row (@$rows) {
my @colvals = @$row;
# keep a record of all table names in
# this row from R
my %current_relation_h = ();
for (my $i=0; $i<@cols; $i++) {
my $colval = $colvals[$i];
my $col = $cols[$i];
my $relationname = $col->get_relation;
my $colname = $col->get_name;
my $relation = $current_relation_h{$relationname};
if (!$relation) {
$relation = {};
$current_relation_h{$relationname} = $relation;
}
$relation->{$colname} = $colval;
}
# print "ROW=@$row\n";
# dmp(\%pkey_by_relationname);
# dmp($top_record_h);
# we now have a hash of hashes -
# outer keyed by relation id
# inner keyed by relation attribute name
# traverse depth first down nesting;
# add new nodes as children of the parent
$self->make_a_tree($tree,
$top_record_h,
$first_in_nesting,
\%current_relation_h,
\%pkey_by_relationname,
\%cols_by_relationname,
\%alias2baserelation,
$aliaspolicy);
}
return $topstruct;
}
*norm = \&reconstruct;
*normalise = \&reconstruct;
*normalize = \&reconstruct;
# ============================
# make_a_tree(...) RECURSIVE
#
# called by: reconstruct(...)
#
# ============================
sub make_a_tree {
my $self = shift;
my $tree = shift;
my $parent_rec_h = shift;
my $node = shift;
my %current_relation_h= %{shift ||{}};
my %pkey_by_relationname = %{shift ||{}};
my %cols_by_relationname = %{shift ||{}};
my %alias2baserelation = %{shift ||{}};
my $aliaspolicy = shift;
my $relationname = $node->name;
my $relationrec = $current_relation_h{$relationname};
my $pkcols = $pkey_by_relationname{$relationname};
my $rec; # this is the next node down in the hash tree
if (!$pkcols || !@$pkcols) {
# if we have no columns for a particular part of
# the nesting through the relation, it means it
# was ommitted from the select clause - just skip
# this part of the nesting.
#
# for example: SELECT a.*, b.* FROM a NJ a_to_b NJ b
# the default nesting will be: [a [a_to_b [b]]]
# the relation R will have columns:
# a.c1 a.c2 b.c1 b.c2
#
# we want to build a resulting structure like this:
# (a
# (c1 "x") (c2 "y")
# (b
# (c1 "a") (c2 "b")))
#
# so we just miss out a_to_b in the nesting, because it
# has no columns in the relation R.
$rec = $parent_rec_h;
}
else {
my $pkval =
CORE::join("\t",
map {
esctab($relationrec->{$_} || '')
} @$pkcols);
$rec = $parent_rec_h->{child_h}->{$relationname}->{$pkval};
if (!$rec) {
my $relationcols = $cols_by_relationname{$relationname};
my $has_non_null_val = grep {defined($relationrec->{$_})} @$relationcols;
return unless $has_non_null_val;
my $relationstruct =
$tree->new($relationname=>[
map {
defined($relationrec->{$_}) ? [$_ => $relationrec->{$_}] : ()
} @$relationcols
]);
my $parent_relationstruct = $parent_rec_h->{struct};
if (!$parent_relationstruct) {
confess("no parent for $relationname");
}
# if we have an aliased relation, add an extra
# level of nesting
my $baserelation = $alias2baserelation{$relationname};
if ($baserelation) {
# $aliaspolicy eq 'nest' or 't'
# nest base relations inside an alias node
# OR use table name in place of alias name
if ($aliaspolicy =~ /^t/i) {
stag_add($parent_relationstruct,
$baserelation,
$relationstruct->data);
}
else {
# nest
my $baserelationstruct =
Data::Stag->new($baserelation =>
$relationstruct->data);
stag_add($parent_relationstruct,
$relationname,
[$baserelationstruct]);
}
} else {
# either no aliases, or $aliaspolicy eq 'a'
# (in which case columns already mapped to aliases)
stag_add($parent_relationstruct,
$relationstruct->name,
$relationstruct->data);
}
$rec =
{struct=>$relationstruct,
child_h=>{}};
foreach ($node->subnodes) {
# keep index of children by PK
$rec->{child_h}->{$_->name} = {};
}
$parent_rec_h->{child_h}->{$relationname}->{$pkval} = $rec;
}
}
foreach ($node->subnodes) {
$self->make_a_tree($tree,
$rec,
$_,
\%current_relation_h,
\%pkey_by_relationname,
\%cols_by_relationname,
\%alias2baserelation,
$aliaspolicy);
}
}
# -------- GENERAL SUBS -----------
sub esctab {
my $w=shift;
$w =~ s/\t/__MAGICTAB__/g;
$w;
}
sub makesql {
my $self = shift;
my ($table,
$where,
$select,
$order,
$group,
$distinct) =
rearrange([qw(table
where
select
order
group
distinct)], @_);
confess("must specify table") unless $table;
# array of tables
if (ref($table)) {
if (ref($table) eq "HASH") {
$table =
[
map {
"$table->{$_} AS $_"
} keys %$table
];
}
}
else {
$table = [$table];
}
$where = [] unless $where;
# array of ANDed where clauses
if (ref($where)) {
if (ref($where) eq "HASH") {
$where =
[
map {
"$_ = ".$self->quote($where->{$_})
} keys %$where
];
}
}
else {
$where = [$where];
}
$select = ['*'] unless $select;
# array of SELECT cols
if (ref($select)) {
if (ref($select) eq "HASH") {
$select =
[
map {
"$select->{$_} AS $_"
} keys %$select
];
}
}
else {
$select = [$select];
}
$order = [] unless $order;
# array of order tables
if (ref($order)) {
if (ref($order) eq "HASH") {
confess("order must be an array");
}
}
else {
$order = [$order];
}
$group = [] unless $group;
# array of group tables
if (ref($group)) {
if (ref($group) eq "HASH") {
confess("group must be an array");
}
}
else {
$group = [$group];
}
$distinct = $distinct ? '' : ' DISTINCT';
my $sql =
sprintf("SELECT%s %s FROM %s%s%s",
$distinct,
join(', ', @$select),
join(', ', @$table),
(scalar(@$where) ?
' WHERE '.join(' AND ', @$where) : ''),
(scalar(@$group) ?
' GROUP BY '.join(', ', @$group) : ''),
(scalar(@$order) ?
' ORDER BY '.join(', ', @$order) : ''),
);
return $sql;
}
sub selectval {
my $self = shift;
trace(0, "@_") if $TRACE;
return $self->dbh->selectcol_arrayref(@_)->[0];
}
sub insertrow {
my $self = shift;
my ($table, $colvalh, $pkcol) = @_;
my $driver = $self->dbh->{Driver}->{Name};
my @cols = keys %$colvalh;
my @vals =
map {
defined($_) ? $colvalh->{$_} : undef
} @cols;
my @placeholders = map { '?' } @vals;
my $sql =
sprintf("INSERT INTO %s (%s) VALUES (%s)",
$table,
join(", ", @cols),
#join(", ", @vals),
join(", ", @placeholders),
);
if (!@cols) {
$sql = "INSERT INTO $table DEFAULT VALUES";
}
trace(0, "SQL:$sql") if $TRACE;
my $succeeded = 0;
eval {
my $sth = $self->dbh->prepare($sql);
my $rval = $sth->execute(@vals);
$succeeded = 1 if defined $rval;
};
if ($@) {
if ($self->force) {
# what about transactions??
$self->warn("IN SQL: $sql\nWARNING: $@\n");
return;
}
else {
confess $@;
}
}
return unless $succeeded;
my $pkval;
if ($pkcol) {
# primary key value may have been specified in the xml
# (this is necessary for non-surrogate pks in tables that
# are to be linked to via foreign keys)
$pkval = $colvalh->{$pkcol};
# pk was not supplied - perhaps this is a SERIAL/AUTO_INCREMENT
# column (ie surrogate integer primary key)
if (!$pkval) {
# assume pk is a SERIAL / AUTO_INCREMENT
if ($driver eq 'Pg') {
my $seqn = sprintf("%s_%s_seq",
$table,
$pkcol);
$pkval = $self->selectval("select currval('$seqn')");
trace(0, "CURRVAL $seqn = $pkval [Pg]") if $TRACE;
}
# this doesn't work on older
# versions of DBI/DBD::mysql
# seems to have been fixed Oct 2004 release
elsif ($driver eq 'mysql') {
$pkval = $self->dbh->last_insert_id(undef,undef,$table,$pkcol);
trace(0, "CURRVAL mysql_insert_id $pkval [mysql]") if $TRACE;
}
else {
$pkval = $self->selectval("select max($pkcol) from $table");
}
}
trace(0, "PKVAL = $pkval") if $TRACE;
}
return $pkval;
}
sub updaterow {
my $self = shift;
my ($table, $set, $where) = @_;
confess("must specify table") unless $table;
my $dbh = $self->dbh;
# array of WHERE cols
if (ref($where)) {
if (ref($where) eq "HASH") {
$where =
[
map {
"$_ = ".$dbh->quote($where->{$_})
} keys %$where
];
}
}
else {
$where = [$where];
}
confess("must specify constraints") unless @$where;
confess("must set update vals") unless $set;
my @bind = ();
# array of SET colvals
if (ref($set)) {
if (ref($set) eq "HASH") {
$set =
[
map {
push(@bind, defined $set->{$_} ? $set->{$_} : 'NULL');
"$_ = ?"
} keys %$set
];
}
}
else {
$set = [$set];
}
my $sql =
sprintf("UPDATE %s SET %s WHERE %s",
$table,
join(', ', @$set),
join(' AND ', @$where),
);
trace(0, "SQL:$sql [",join(', ',@bind)."]") if $TRACE;
my $sth = $dbh->prepare($sql) || confess($sql."\n\t".$dbh->errstr);
return $sth->execute(@bind) || confess($sql."\n\t".$sth->errstr);
}
sub deleterow {
my $self = shift;
my ($table, $where) = @_;
confess("must specify table") unless $table;
my $dbh = $self->dbh;
# array of WHERE cols
if (ref($where)) {
if (ref($where) eq "HASH") {
$where =
[
map {
"$_ = ".$dbh->quote($where->{$_})
} keys %$where
];
}
}
else {
$where = [$where];
}
confess("must specify constraints") unless @$where;
my $sql =
sprintf("DELETE FROM %s WHERE %s",
$table,
join(' AND ', @$where),
);
trace(0, "SQL:$sql") if $TRACE;
my $sth = $dbh->prepare($sql) || confess($sql."\n\t".$dbh->errstr);
return $sth->execute() || confess($sql."\n\t".$sth->errstr);
}
#$::RD_HINT = 1;
$::RD_AUTOACTION = q { [@item] };
sub selectgrammar {
return q[
{
use Data::Dumper;
use Data::Stag;
sub N {
Data::Stag->new(@_);
}
}
]
.
q[
selectstmts: selectstmt ';' selectstmts
selectstmts: selectstmt
# selectstmt: /select/i selectcols /from/i fromtables
selectstmt: /select/i selectq(?) selectcols /from/i fromtables where(?) group(?) having(?) combiner(?) order(?) limit(?) offset(?)
{
N(select => [
[qual => $item{'selectq'}[0]],
[cols => $item[3]],
[from => $item[5]],
# [where => $item[6]],
# [group => $item{'group'}[0]],
# [having => $item{'having'}[0]],
]);
}
|
selectq: /all/i | /distinct/i
{ $item[1] }
|
# as: /\s+as\s+/i
as: /as/i
selectcols: selectexpr /\,/ selectcols
{ [$item[1], @{$item[3]}] }
|
selectcols: selectexpr
{ [$item[1]] }
|
selectexpr: bselectexpr as aliasname
{
my $col = $item{bselectexpr};
$col->set_alias($item{aliasname}->[1]);
$col;
}
|
selectexpr: bselectexpr
{ $item[1] }
|
bselectexpr: funccall
{ $item[1] }
|
bselectexpr: selectcol
{ $item[1] }
|
selectcol: brackselectcol operator selectcol
{
N(col=>[
[func => [
[name => $item[2]->[1]],
[args => [$item[1],$item[3]]]
]
]
]);
}
### { $item[1]}
|
selectcol: brackselectcol
{ $item[1]}
|
brackselectcol: '(' selectcol ')'
{ $item[2]}
|
brackselectcol: bselectcol
{ $item[1]}
|
bselectcol: /(\w+)\.(\w+)/
{ N(col=>[
[name => $item[1]],
[table=>$1],
])
}
|
bselectcol: /(\w+)\.\*/
{ N(col=>[
[name => $item[1]],
[table=>$1],
])
}
|
bselectcol: /\*/
{ N(col=>[
[name => $item[1]]
])
}
|
bselectcol: /\w+/
{ N(col=>[
[name => $item[1]]
])
}
|
bselectcol: expr
{ N(col=>[
[expr => $item[1]]
]) }
|
funccall: funcname '(' distinct(?) selectcols ')'
{
my $col = N(col=>[
[func => [
[name => $item[1]->[1]],
[args => $item[4]]
]
]
]);
$col;
}
|
distinct: /distinct/i
operator: '+' | '-' | '*' | '/' | '||'
fromtables: jtable
{ [$item[1]] }
|
jtable: join_jtable
{ $item[1] }
|
join_jtable: qual_jtable jointype join_jtable
{
shift @{$item[2]};
my $j =
N(composite=>[
[ctype=>"@{$item[2]}"],
[first=>[$item[1]]],
[second=>[$item[3]]]
]);
$j;
}
|
join_jtable: qual_jtable
{ $item[1] }
|
qual_jtable: alias_jtable joinqual
{
my $j = $item[1];
$j->setnode_qual($item[2]);
$j;
}
|
qual_jtable: alias_jtable
{ $item[1] }
|
alias_jtable: brack_jtable /as\s+/i aliasname
{
my $j = $item[1];
$j->set_alias($item[3][1]);
$j;
}
|
alias_jtable: brack_jtable
{ $item[1] }
|
brack_jtable: '(' jtable ')'
{ $item[2] }
|
brack_jtable: table
{ N(leaf=>[[name=>$item[1]->[1]]]) }
|
joinqual: /on\s+/i bool_expr
{ N(qual => [
[type=>'on'],
[expr=>"@{$item[2]}"]
])
}
|
joinqual: /using\s+/i '(' cols ')'
{ N(qual =>[
[type=>'using'],
[expr=>"@{$item[3]}"]
])
}
|
table: tablename
{ $item[1] }
|
funcname: /\w+/
tablename: /\w+/
aliasname: /\w+/
cols: col(s)
col: /\w+\.\w+/
col: /\w+/
jointype: /\,/
jointype: /natural/i bjointype /join/i
jointype: /natural/i /join/i
jointype: bjointype /join/i
jointype: /join/i
bjointype: /inner/i
bjointype: lrf(?) /outer/i
lrf: /left/i | /right/i | /full/i
bjointype: /cross/i
number: float | int
float: /\d*\.?\d+/ 'e' sign int
float: /\d*\.\d+/
int: /\d+/
string: /\'.*?\'/
sign: '+' | '-'
exprs: '(' exprs ')'
exprs: expr ',' exprs
exprs: expr
# bool_expr - eg in where clause
bool_expr: not_bool_expr boolop bool_expr | not_bool_expr
not_bool_expr: '!' brack_bool_expr | brack_bool_expr
brack_bool_expr: '(' bool_expr ')' | bool_exprprim
bool_exprprim: boolval | expr
boolval: /true/i | /false/i | /null/i
expr: brack_expr op expr | brack_expr
brack_expr: '(' expr ')' | exprprim
exprprim: col | val
val: number | string
op: /not\s+/i /like\s+/i
op: /like\s+/i
op: /is\s+/i /not\s+/i
op: /is\s+/i
op: '=' | '!=' | '<>' | '<=' | '>=' | '<' | '>'
boolop: /and\s+/i | /or\s+/i | /not\s+/i
# where: /where/i /.*/
where: /where/i bool_expr
group: /group/i /by/i exprs
having: /having/i /.*/
combiner: combinekwd selectstmt
combinekwd: /union/i | /intersect/i | /update/i
order: /order/i /by/i orderexprs
orderexprs: orderexpr ',' orderexprs
orderexprs: orderexpr
orderexpr: expr /asc/i
orderexpr: expr /desc/i
orderexpr: expr /using/i op
orderexpr: expr
limit: /limit/i /\w+/
offset: /offset/i /\d+/
];
}
no strict 'refs';
sub AUTOLOAD {
my $self = shift;
my @args = @_;
my $name = $AUTOLOAD;
$name =~ s/.*://; # strip fully-qualified portion
if ($name eq "DESTROY") {
# we dont want to propagate this!!
return;
}
unless ($self->isa("DBIx::DBStag")) {
confess("no such subroutine $name");
}
if ($self->dbh) {
if ($TRACE) {
# the following check may impair performance
if (grep { ref($_) } @args) {
$self->throw("cannot quote @args");
}
}
if ($self->dbh->can($name)) {
return $self->dbh->$name(@args);
}
}
confess("no such method:$name)");
}
sub rearrange {
my($order,@param) = @_;
# If there are no parameters, we simply wish to return
# an undef array which is the size of the @{$order} array.
return (undef) x $#{$order} unless @param;
# If we've got parameters, we need to check to see whether
# they are named or simply listed. If they are listed, we
# can just return them.
return @param unless (defined($param[0]) && $param[0]=~/^-/);
# Now we've got to do some work on the named parameters.
# The next few lines strip out the '-' characters which
# preceed the keys, and capitalizes them.
my $i;
for ($i=0;$i<@param;$i+=2) {
if (!defined($param[$i])) {
cluck("Hmmm in $i ".CORE::join(";", @param)." == ".CORE::join(";",@$order)."\n");
}
else {
$param[$i]=~s/^\-//;
$param[$i]=~tr/a-z/A-Z/;
}
}
# Now we'll convert the @params variable into an associative array.
my(%param) = @param;
my(@return_array);
# What we intend to do is loop through the @{$order} variable,
# and for each value, we use that as a key into our associative
# array, pushing the value at that key onto our return array.
my($key);
foreach $key (@{$order}) {
$key=~tr/a-z/A-Z/;
my($value) = $param{$key};
delete $param{$key};
push(@return_array,$value);
}
# catch user misspellings resulting in unrecognized names
my(@restkeys) = keys %param;
if (scalar(@restkeys) > 0) {
confess("@restkeys not processed in rearrange(), did you use a
non-recognized parameter name ? ");
}
return @return_array;
}
#sub loadschema {
# my $self = shift;
# my ($ddl, $ddlf, $dialect) =
# rearrange([qw(ddl ddlf dialect)], @_);
# if ($ddlf) {
# my $fh = FileHandle->new($ddlf) || $self->throw("no file $ddlf");
# $ddl = join('',<$fh>);
# $fh->close;
# }
# $self->throw("no DDL") unless $ddl;
# if ($dialect) {
# my $driver = $self->{_driver} || 'Pg';
# if ($driver ne $dialect) {
# }
# }
#}
1;
__END__
=head1 NAME
DBIx::DBStag - Relational Database to Hierarchical (Stag/XML) Mapping
=head1 SYNOPSIS
use DBIx::DBStag;
my $dbh = DBIx::DBStag->connect("dbi:Pg:dbname=moviedb");
my $sql = q[
SELECT
studio.*,
movie.*,
star.*
FROM
studio NATURAL JOIN
movie NATURAL JOIN
movie_to_star NATURAL JOIN
star
WHERE
movie.genre = 'sci-fi' AND star.lastname = 'Fisher'
USE NESTING
(set(studio(movie(star))))
];
my $dataset = $dbh->selectall_stag($sql);
my @studios = $dataset->get_studio;
# returns nested data that looks like this -
#
# (studio
# (name "20th C Fox")
# (movie
# (name "star wars") (genre "sci-fi")
# (star
# (firstname "Carrie")(lastname "Fisher")))))
# iterate through result tree -
foreach my $studio (@studios) {
printf "STUDIO: %s\n", $studio->get_name;
my @movies = $studio->get_movie;
foreach my $movie (@movies) {
printf " MOVIE: %s (genre:%s)\n",
$movie->get_name, $movie->get_genre;
my @stars = $movie->get_star;
foreach my $star (@stars) {
printf " STARRING: %s:%s\n",
$star->get_firstname, $star->get_lastname;
}
}
}
# manipulate data then store it back in the database
my @allstars = $dataset->get("movie/studio/star");
$_->set_fullname($_->get_firstname.' '.$_->get_lastname)
foreach(@allstars);
$dbh->storenode($dataset);
exit 0;
Or from the command line:
unix> selectall_xml.pl -d 'dbi:Pg:dbname=moviebase' \
'SELECT * FROM studio NATURAL JOIN movie NATURAL \
JOIN movie_to_star NATURAL JOIN star \
USE NESTING (set(studio(movie(star))))'
Or using a predefined template:
unix> selectall_xml.pl -d moviebase /mdb-movie genre=sci-fi
=cut
=head1 DESCRIPTION
This module is for mapping between relational databases and Stag
objects (Structured Tags - see L). Stag objects can also
be represented as XML. The module has two main uses:
=over
=item Querying
This module can take the results of any SQL query and decompose the
flattened results into a tree data structure which reflects the
foreign keys in the underlying relational schema. It does this by
looking at the SQL query and introspecting the database schema, rather
than requiring metadata or an object model.
In this respect, the module works just like a regular L handle, with
a few extra methods.
Queries can also make use of predefined B
=item Storing Data
DBStag objects can store any tree-like datastructure (such as XML
documents) into a database using normalized schema that reflects the
structure of the tree being stored. This is done using little or no
metadata.
XML can also be imported, and a relational schema automatically generated.
=back
For a tutorial on using DBStag to build and query relational databases
from XML sources, please see L
=head2 HOW QUERY RESULTS ARE TURNED INTO STAG/XML
This is a general overview of the rules for turning SQL query results
into a tree like data structure. You don't need to understand all
these rules to be able to use this module - you can experiment by
using the B script which comes with this
distribution.
=head3 Mapping Relations
Relations (i.e. tables and views) are elements (nodes) in the
tree. The elements have the same name as the relation in the database.
These nodes are always non-terminal (ie they always have child nodes)
=head3 Mapping Columns
Table and view columns of a relation are sub-elements of the table or
view to which they belong. These elements will be B
(i.e. terminal nodes). Only the columns selected in the SQL query
will be present.
For example, the following query
SELECT name, job FROM person;
will return a data structure that looks like this:
(set
(person
(name "fred")
(job "forklift driver"))
(person
(name "joe")
(job "steamroller mechanic")))
The data is shown as a lisp-style S-Expression - it can also be
expressed as XML, or manipulated as an object within perl.
=head3 Handling table aliases
If an ALIAS is used in the FROM part of the SQL query, the relation
element will be nested inside an element with the same name as the
alias. For instance, the query
SELECT name FROM person AS author WHERE job = 'author';
Will return a data structure like this:
(set
(author
(person
(name "Philip K Dick"))))
The underlying assumption is that aliasing is used for a purpose in
the original query; for instance, to determine the context of the
relation where it may be ambiguous.
SELECT *
FROM person AS employee
INNER JOIN
person AS boss ON (employee.boss_id = boss.person_id)
Will generate a nested result structure similar to this -
(set
(employee
(person
(person_id "...")
(name "...")
(salary "...")
(boss
(person
(person_id "...")
(name "...")
(salary "..."))))))
If we neglected the alias, we would have 'person' directly nested
under 'person', and the meaning would not be obvious. Note how the
contents of the SQL query dynamically modifies the schema/structure of
the result tree.
=head3 NOTE ON SQL SYNTAX
Right now, DBStag is fussy about how you specify aliases; you must use
B - you must say
SELECT name FROM person AS author;
instead of
SELECT name FROM person author;
=head3 Nesting of relations
The main utility of querying using this module is in retrieving the
nested relation elements from the flattened query results. Given a
query over relations A, B, C, D,... there are a number of possible
tree structures. Not all of the tree structures are meaningful or
useful.
Usually it will make no sense to nest A under B if there is no foreign
key relationship linking either A to B, or B to A. This is not always
the case - it may be desirable to nest A under B if there is an
intermediate linking table that is required at the relational level
but not required in the tree structure.
DBStag will guess a structure/schema based on the ordering of the
relations in your FROM clause. However, this guess can be over-ridden
at either the SQL level (using DBStag specific SQL extensions) or at
the API level.
The default algorithm is to nest each relation element under the
relation element preceeding it in the FROM clause; for instance:
SELECT * FROM a NATURAL JOIN b NATURAL JOIN c
If there are appropriately named foreign keys, the following data will
be returned (assuming one column 'x_foo' in each of a, b and c)
(set
(a
(a_foo "...")
(b
(b_foo "...")
(c
(c_foo "...")))))
where 'x_foo' is a column in relation 'x'
This is not always desirable. If both b and c have foreign keys into
table a, DBStag will not detect this - you have to guide it. There are
two ways of doing this - you can guide by bracketing your FROM clause
like this:
SELECT * FROM (a NATURAL JOIN b) NATURAL JOIN c
This will generate
(set
(a
(a_foo "...")
(b
(b_foo "..."))
(c
(c_foo "..."))))
Now b and c are siblings in the tree. The algorithm is similar to
before: nest each relation element under the relation element
preceeding it; or, if the preceeding item in the FROM clause is a
bracketed structure, nest it under the first relational element in the
bracketed structure.
(Note that in MySQL you may not place brackets in the FROM clause in
this way)
Another way to achieve the same thing is to specify the desired tree
structure using a DBStag specific SQL extension. The DBStag specific
component is removed from the SQL before being presented to the
DBMS. The extension is the B