Net-Google-SafeBrowsing2-1.07/0000755000076400007640000000000011764517146016316 5ustar jsobrierjsobrierNet-Google-SafeBrowsing2-1.07/t/0000755000076400007640000000000011764517146016561 5ustar jsobrierjsobrierNet-Google-SafeBrowsing2-1.07/t/Net-Google-SafeBrowsing2.t0000644000076400007640000002243711762325320023354 0ustar jsobrierjsobrier# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl Net-Google-SafeBrowsing2.t' ######################### use List::Util qw(first); use Test::More qw(no_plan); BEGIN { use_ok('Net::Google::SafeBrowsing2') }; require_ok( 'Net::Google::SafeBrowsing2' ); ######################### my $gsb = Net::Google::SafeBrowsing2->new(); is( $gsb->hex_to_ascii( 'A' ), 41, 'hex_to_ascii OK'); is( $gsb->hex_to_ascii( $gsb->ascii_to_hex('11223344') ), '11223344', 'hex_to_ascii OK'); # From Google API doc, prefix is( length $gsb->prefix('abc'), 4, 'Prefix length is 4'); is( $gsb->prefix('abc'), $gsb->ascii_to_hex('ba7816bf'), 'prefix "abc" is OK'); is( $gsb->prefix('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq'), $gsb->ascii_to_hex('248d6a61'), 'prefix is OK'); is( $gsb->prefix('a' x 1000000), $gsb->ascii_to_hex('cdc76e5c'), 'prefix "a" is OK'); # From Google API doc, URL canonicalization is( $gsb->canonical_uri('http://host/%25%32%35')->as_string, 'http://host/%25', 'canonicalization "http://host/%25%32%35" is OK'); is( $gsb->canonical_uri('http://host/%25%32%35%25%32%35')->as_string, 'http://host/%25%25', 'canonicalization "http://host/%25%32%35" is OK'); is( $gsb->canonical_uri('http://host/asdf%25%32%35asd')->as_string, 'http://host/asdf%25asd', 'canonicalization "http://host/asdf%25asd" is OK'); is( $gsb->canonical_uri('http://host/%%%25%32%35asd%%')->as_string, 'http://host/%25%25%25asd%25%25', 'canonicalization "http://host/%25%25%25asd%25%25 is OK'); is( $gsb->canonical_uri('http://www.google.com/')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/" is OK'); is( $gsb->canonical_uri('http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/')->as_string, 'http://168.188.99.26/.secure/www.ebay.com/', 'canonicalization "http://168.188.99.26/.secure/www.ebay.com/" is OK'); is( $gsb->canonical_uri('http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/')->as_string, 'http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/', 'canonicalization "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/" is OK'); # is( $gsb->canonical_uri('http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B')->as_string, 'http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+', 'canonicalization "http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B" is OK'); # Fails because URI->new does some parsing automatically and parse ^ into %5E is( $gsb->canonical_uri('http://3279880203/blah')->as_string, 'http://195.127.0.11/blah', 'canonicalization "http://195.127.0.11/blah" is OK'); is( $gsb->canonical_uri('http://www.google.com/blah/..')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/blah/..." is OK'); is( $gsb->canonical_uri('www.google.com/')->as_string, 'http://www.google.com/', 'canonicalization "www.google.com/" is OK'); is( $gsb->canonical_uri('www.google.com')->as_string, 'http://www.google.com/', 'canonicalization "www.google.com" is OK'); is( $gsb->canonical_uri('http://www.evil.com/blah#frag')->as_string, 'http://www.evil.com/blah', 'canonicalization "http://www.evil.com/blah" is OK'); is( $gsb->canonical_uri('http://www.GOOgle.com/')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/" is OK'); # is( $gsb->canonical_uri('http://www.google.com.../')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/" is OK'); # Dies! is( $gsb->canonical_uri("http://www.google.com/foo\tbar\rbaz\n2")->as_string, 'http://www.google.com/foobarbaz2', 'canonicalization "http://www.google.com/foobarbaz2" is OK'); is( $gsb->canonical_uri('http://www.google.com/q?')->as_string, 'http://www.google.com/q?', 'canonicalization "http://www.google.com/q?" is OK'); is( $gsb->canonical_uri('http://www.google.com/q?r?')->as_string, 'http://www.google.com/q?r?', 'canonicalization "http://www.google.com/q?r?" is OK'); is( $gsb->canonical_uri('http://www.google.com/q?r?s')->as_string, 'http://www.google.com/q?r?s', 'canonicalization "http://www.google.com/q?r?s" is OK'); is( $gsb->canonical_uri('http://evil.com/foo#bar#baz')->as_string, 'http://evil.com/foo', 'canonicalization "http://evil.com/foo" is OK'); is( $gsb->canonical_uri('http://evil.com/foo;')->as_string, 'http://evil.com/foo;', 'canonicalization "http://evil.com/foo;" is OK'); is( $gsb->canonical_uri('http://evil.com/foo?bar;')->as_string, 'http://evil.com/foo?bar;', 'canonicalization "http://evil.com/foo?bar;" is OK'); # is( $gsb->canonical_uri("http://\x01\x80.com/")->as_string, 'http://%01%80.com/', 'canonicalization "http://%01%80.com/" is OK'); # fails is( $gsb->canonical_uri('http://notrailingslash.com')->as_string, 'http://notrailingslash.com/', 'canonicalization "http://notrailingslash.com/" is OK'); is( $gsb->canonical_uri('http://www.gotaport.com:1234/')->as_string, 'http://www.gotaport.com:1234/', 'canonicalization "http://www.gotaport.com:1234/" is OK'); is( $gsb->canonical_uri(' http://www.google.com/ ')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/" is OK'); is( $gsb->canonical_uri('http:// leadingspace.com/')->as_string, 'http://%20leadingspace.com/', 'canonicalization "http://%20leadingspace.com/" is OK'); is( $gsb->canonical_uri('http://%20leadingspace.com/')->as_string, 'http://%20leadingspace.com/', 'canonicalization "http://%20leadingspace.com/" (1) is OK'); is( $gsb->canonical_uri('%20leadingspace.com/')->as_string, 'http://%20leadingspace.com/', 'canonicalization "%20leadingspace.com/" is OK'); is( $gsb->canonical_uri('https://www.securesite.com/')->as_string, 'https://www.securesite.com/', 'canonicalization "https://www.securesite.com/" is OK'); is( $gsb->canonical_uri('http://host.com/ab%23cd')->as_string, 'http://host.com/ab%23cd', 'canonicalization "http://host.com/ab%23cd" is OK'); # fails is( $gsb->canonical_uri('http://host.com//twoslashes?more//slashes')->as_string, 'http://host.com/twoslashes?more//slashes', 'canonicalization "http://host.com/twoslashes?more//slashes" is OK'); # Own tests, URL canonicalization is( $gsb->canonical_uri('http://www.google.com/a/../b/../c')->as_string, 'http://www.google.com/c', 'canonicalization "http://www.google.com/a/../b/../c" is OK'); is( $gsb->canonical_uri('http://www.google.com/a/../b/..')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/a/../b/.." is OK'); is( $gsb->canonical_uri('http://www.google.com/a/../b/..?foo')->as_string, 'http://www.google.com/?foo', 'canonicalization "http://www.google.com/a/../b/..?foo" is OK'); is( $gsb->canonical_uri('http://www.google.com/#a#b')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/#a#b" is OK'); is( $gsb->canonical_uri('http://www.google.com/#a#b#c')->as_string, 'http://www.google.com/', 'canonicalization "http://www.google.com/#a#b#c" is OK'); # Form Google API doc, possible strings for lookup my @values = $gsb->canonical('http://a.b.c/1/2.html?param=1'); is( scalar @values, 8, 'Number of possible strings for "http://a.b.c/1/2.html?param=1" is OK'); my @strings = qw(a.b.c/1/2.html?param=1 a.b.c/1/2.html a.b.c/ a.b.c/1/ b.c/1/2.html?param=1 b.c/1/2.html b.c/ b.c/1/); foreach my $string (@strings) { my $found = defined first { $_ eq $string } @values; is( $found, 1, "$string is present"); } @values = $gsb->canonical('http://a.b.c.d.e.f.g/1.html'); is( scalar @values, 10, 'Number of possible strings for "http://a.b.c.d.e.f.g/1.html" is OK'); @strings = qw(a.b.c.d.e.f.g/1.html a.b.c.d.e.f.g/ c.d.e.f.g/1.html c.d.e.f.g/ d.e.f.g/1.html d.e.f.g/ e.f.g/1.html e.f.g/ f.g/1.html f.g/); foreach my $string (@strings) { my $found = defined first { $_ eq $string } @values; is( $found, 1, "$string is present"); } @values = $gsb->canonical('http://www1.rapidsoftclearon.net/'); is( scalar @values, 2, 'Number of possible strings for "http://www1.rapidsoftclearon.net/" is OK'); @strings = qw(www1.rapidsoftclearon.net/ rapidsoftclearon.net/); foreach my $string (@strings) { my $found = defined first { $_ eq $string } @values; is( $found, 1, "$string is present"); } # Own test for canonical_domain_suffix @values = $gsb->canonical_domain_suffixes('www.google.com'); is( scalar @values, 2, 'Number of possible domains for "www.google.com" is OK'); @strings = qw(www.google.com google.com); foreach my $string (@strings) { my $found = defined first { $_ eq $string } @values; is( $found, 1, "domain $string is present"); } @values = $gsb->canonical_domain_suffixes('google.com'); is( scalar @values, 1, 'Number of possible domains for "google.com" is OK'); @strings = qw(google.com); foreach my $string (@strings) { my $found = defined first { $_ eq $string } @values; is( $found, 1, "domain $string is present"); } @values = $gsb->canonical_domain_suffixes('malware.testing.google.test'); is( scalar @values, 2, 'Number of possible domains for "malware.testing.google.test" is OK'); @strings = qw(testing.google.test google.test); foreach my $string (@strings) { my $found = defined first { $_ eq $string } @values; is( $found, 1, "domain $string is present"); } # ok(2); # Module tested OKNet-Google-SafeBrowsing2-1.07/MANIFEST0000644000076400007640000000076211764517146017454 0ustar jsobrierjsobrierChanges Makefile.PL MANIFEST README t/Net-Google-SafeBrowsing2.t lib/Net/Google/SafeBrowsing2.pm lib/Net/Google/SafeBrowsing2/Storage.pm lib/Net/Google/SafeBrowsing2/Sqlite.pm lib/Net/Google/SafeBrowsing2/MySQL.pm lib/Net/Google/SafeBrowsing2/Postgres.pm lib/Net/Google/SafeBrowsing2/DBI.pm lib/Net/Google/SafeBrowsing2/Lookup.pm META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) Net-Google-SafeBrowsing2-1.07/Makefile.PL0000644000076400007640000000142611764515532020270 0ustar jsobrierjsobrieruse 5.008008; use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Net::Google::SafeBrowsing2', VERSION_FROM => 'lib/Net/Google/SafeBrowsing2.pm', # finds $VERSION PREREQ_PM => { LWP::UserAgent => 0, URI => 0, Digest::SHA => 0, List::Util => 0, constant => 0, Net::IPAddress => 0, Test::More => 0, Text::Trim => 0, Digest::HMAC_SHA1 => 0, MIME::Base64::URLSafe => 0, MIME::Base64 => 0, String::HexConvert => 0, File::Slurp => 0, }, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/Net/Google/SafeBrowsing2.pm', # retrieve abstract from module AUTHOR => 'Julien Sobrier ') : ()), ); Net-Google-SafeBrowsing2-1.07/META.yml0000664000076400007640000000144711764517146017577 0ustar jsobrierjsobrier--- abstract: 'Perl extension for the Google Safe Browsing v2 API. (Google Safe Browsing v1 has been deprecated by Google.)' author: - 'Julien Sobrier ' build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.112150' license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: Net-Google-SafeBrowsing2 no_index: directory: - t - inc requires: Digest::HMAC_SHA1: 0 Digest::SHA: 0 File::Slurp: 0 LWP::UserAgent: 0 List::Util: 0 MIME::Base64: 0 MIME::Base64::URLSafe: 0 Net::IPAddress: 0 String::HexConvert: 0 Test::More: 0 Text::Trim: 0 URI: 0 constant: 0 version: 1.07 Net-Google-SafeBrowsing2-1.07/lib/0000755000076400007640000000000011764517146017064 5ustar jsobrierjsobrierNet-Google-SafeBrowsing2-1.07/lib/Net/0000755000076400007640000000000011764517146017612 5ustar jsobrierjsobrierNet-Google-SafeBrowsing2-1.07/lib/Net/Google/0000755000076400007640000000000011764517146021026 5ustar jsobrierjsobrierNet-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/0000755000076400007640000000000011764517146023501 5ustar jsobrierjsobrierNet-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/DBI.pm0000644000076400007640000004601011764517036024434 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2::DBI; use strict; use warnings; use base 'Net::Google::SafeBrowsing2::Storage'; use Carp; use DBI; use List::Util qw(first); our $VERSION = '0.7'; =head1 NAME Net::Google::SafeBrowsing2::DBI - Base class for all DBI-based back-end storage for the Google Safe Browsing v2 database =head1 SYNOPSIS use Net::Google::SafeBrowsing2::MySQL; my $storage = Net::Google::SafeBrowsing2::MySQL->new(host => '127.0.0.1', database => 'GoogleSafeBrowsingv2'); ... $storage->close(); =head1 DESCRIPTION This is a base implementation of L using DBI. =cut =head1 CONSTRUCTOR =over 4 =head2 new() This method should be overwritten. Arguments =over 4 =item keep_all Optional. Set to 1 to keep old information (such as expiring full hashes) in the database. 0 (delete) by default. =back =back =cut sub new { my ($class, %args) = @_; my $self = { # default arguments keep_all => 0, %args, }; bless $self, $class or croak "Can't bless $class: $!"; $self->init(); return $self; } =head1 PUBLIC FUNCTIONS =over 4 See L for a complete list of public functions. =head2 close() Cleanup old full hashes, and close the connection to the database. $storage->close(); =cut sub close { my ($self, %args) = @_; if ($self->{keep_all} == 0) { $self->{dbh}->do('DELETE FROM full_hashes WHERE timestamp < ?', { }, time() - Net::Google::SafeBrowsing2::FULL_HASH_TIME); } $self->{dbh}->disconnect; } =head2 export() Export add chunks and sub chunks to a file. The file content looks like what Google sends in redirections. The file can be used with the C function from C. This is useful when moving from one back-end storage to another $storage->export(list => MALWARE); Arguments =over 4 =item list Required. The Google Safe Browsing list to export. =item file Optional. Filename to export to. Uses "$list.dat" by default. =back =cut sub export { my ($self, %args) = @_; my $list = $args{list} || ''; my $file = $args{file} || "$list.dat"; open EXPORT, "> $file" or croak "Cannot open $file: $!\n"; binmode EXPORT; # Add chunks my $num = 0; my $chunk_data = ''; my $hostkey = ''; my $prefixes = ''; my $count = 0; my $sth = $self->{dbh}->prepare("SELECT * FROM a_chunks WHERE list = ? ORDER BY num ASC"); $sth->execute($list); while (my $row = $sth->fetchrow_hashref()) { if (($num != $row->{num} && $num != 0) || $num > 1000) { # if num is too bif, we can not print chr($num) a a single byte $chunk_data .= $hostkey; $chunk_data .= chr($count); $chunk_data .= $prefixes; print EXPORT "a:", $num, ":", length($hostkey), ":", length($chunk_data), "\n"; print EXPORT $chunk_data; $num = $row->{num}; $chunk_data = ''; $hostkey = ''; $prefixes = ''; $count = 0; } elsif ($num == 0) { $num = $row->{num}; $hostkey = $row->{hostkey}; } if ($hostkey ne $row->{hostkey} && $hostkey ne '') { $chunk_data .= $hostkey; $chunk_data .= chr($count); $chunk_data .= $prefixes; $count = 0; $prefixes = ''; } $hostkey = $row->{hostkey}; if (length($row->{prefix}) > 0) { $prefixes .= $row->{prefix}; $count++; } } $sth->finish(); $chunk_data .= $hostkey; $chunk_data .= chr($count); $chunk_data .= $prefixes; print EXPORT "a:", $num, ":", length($hostkey), ":", length($chunk_data) , "\n"; print EXPORT $chunk_data; # sub chunks $num = 0; $chunk_data = ''; $hostkey = ''; $prefixes = ''; $count = 0; $sth = $self->{dbh}->prepare("SELECT * FROM s_chunks WHERE list = ? ORDER BY num ASC"); $sth->execute($list); while (my $row = $sth->fetchrow_hashref()) { if (($num != $row->{num} && $num != 0) || $num > 1000) { # if num is too bif, we can not print chr($num) a a single byte $chunk_data .= $hostkey; $chunk_data .= chr($count) if (length($hostkey) > 0); $chunk_data .= $prefixes; print EXPORT "s:", $num, ":", length($hostkey), ":", length($chunk_data), "\n"; print EXPORT $chunk_data; $num = $row->{num}; $chunk_data = ''; $hostkey = ''; $prefixes = ''; $count = 0; } elsif ($num == 0) { $num = $row->{num}; $hostkey = $row->{hostkey}; } if ($hostkey ne $row->{hostkey} && $hostkey ne '') { $chunk_data .= $hostkey; $chunk_data .= chr($count) if (length($hostkey) > 0); $chunk_data .= $prefixes; $count = 0; $prefixes = ''; } $hostkey = $row->{hostkey}; if ($row->{add_num} > 0) { $prefixes .= $self->ascii_to_hex( sprintf("%08x", $row->{add_num}) ); } if (length($row->{prefix}) > 0 && $row->{add_num} > 0) { $prefixes .= $row->{prefix}; $count++; } } $sth->finish(); $chunk_data .= $hostkey; $chunk_data .= chr($count) if (length($hostkey) > 0); $chunk_data .= $prefixes; print EXPORT "s:", $num, ":", length($hostkey), ":", length($chunk_data) , "\n"; print EXPORT $chunk_data; CORE::close(EXPORT); } =back =cut sub init { my ($self, %args) = @_; # Should connect to database # Shoudl check if all tables exist } sub create_table_updates { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE updates ( last INT NOT NULL DEFAULT '0', wait INT NOT NULL DEFAULT '0', errors INT NOT NULL DEFAULT '1800', list VARCHAR( 50 ) NOT NULL ); }; # Need to handle errors $self->{dbh}->do($schema); } sub create_table_a_chunks { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE a_chunks ( hostkey VARCHAR( 8 ), prefix VARCHAR( 8 ), num INT NOT NULL, list VARCHAR( 50 ) NOT NULL ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE INDEX a_chunks_hostkey ON a_chunks ( hostkey ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX a_chunks_num_list ON a_chunks ( num, list ); }; $self->{dbh}->do($index); $index = qq{ CREATE UNIQUE INDEX a_chunks_unique ON a_chunks ( hostkey, prefix, num, list ); }; $self->{dbh}->do($index); } sub create_table_s_chunks { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE s_chunks ( hostkey VARCHAR( 8 ), prefix VARCHAR( 8 ), num INT NOT NULL, add_num INT Default '0', list VARCHAR( 50 ) NOT NULL ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE INDEX s_chunks_hostkey ON s_chunks ( hostkey ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX s_chunks_num ON s_chunks ( num ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX s_chunks_num_list ON s_chunks ( num, list ); }; $self->{dbh}->do($index); $index = qq{ CREATE UNIQUE INDEX s_chunks_unique ON s_chunks ( hostkey, prefix, num, add_num, list ); }; $self->{dbh}->do($index); } sub create_table_full_hashes { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes ( id INT AUTO_INCREMENT PRIMARY KEY, num INT, hash VARCHAR( 32 ), list VARCHAR( 50 ), timestamp INT Default '0' ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE UNIQUE INDEX hash ON full_hashes ( num, hash, list ); }; $self->{dbh}->do($index); } sub create_table_full_hashes_errors { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes_errors ( id INT AUTO_INCREMENT PRIMARY KEY, errors INT Default '0', prefix VARCHAR( 8 ), timestamp INT Default '0' ); }; $self->{dbh}->do($schema); } sub create_table_mac_keys{ my ($self, %args) = @_; my $schema = qq{ CREATE TABLE mac_keys ( client_key VARCHAR( 50 ) Default '', wrapped_key VARCHAR( 50 ) Default '' ); }; $self->{dbh}->do($schema); } sub add_chunks { my ($self, %args) = @_; my $type = $args{type} || 'a'; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; # $self->{dbh}->do("PRAGMA journal_mode = OFF"); # $self->{dbh}->do("PRAGMA synchronous = OFF"); if ($type eq 's') { $self->add_chunks_s(chunknum => $chunknum, chunks => $chunks, list => $list); } elsif ($type eq 'a') { $self->add_chunks_a(chunknum => $chunknum, chunks => $chunks, list => $list); } # $self->{dbh}->do("PRAGMA journal_mode = DELETE"); # $self->{dbh}->do("PRAGMA synchronous = FULL"); } sub add_chunks_s { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; my $add = $self->{dbh}->prepare('INSERT INTO s_chunks (hostkey, prefix, num, add_num, list) VALUES (?, ?, ?, ?, ?)'); my $del = $self->{dbh}->prepare('DELETE FROM s_chunks WHERE hostkey = ? AND prefix = ? AND num = ? AND add_num = ? AND list = ?'); foreach my $chunk (@$chunks) { $del->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $chunk->{add_chunknum}, $list ); $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $chunk->{add_chunknum}, $list ); } if (scalar @$chunks == 0) { # keep empty chunks $del->execute( '', '', '', $chunknum, $list ); $add->execute( '', '', '', $chunknum, $list ); } } sub add_chunks_a { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; my $add = $self->{dbh}->prepare('INSERT INTO a_chunks (hostkey, prefix, num, list) VALUES (?, ?, ?, ?)'); my $del = $self->{dbh}->prepare('DELETE FROM a_chunks WHERE hostkey = ? AND prefix = ? AND num = ? AND list = ?'); foreach my $chunk (@$chunks) { $del->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $list ); $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $list ); } if (scalar @$chunks == 0) { # keep empty chunks $del->execute( '', '', $chunknum, $list ); $add->execute( '', '', $chunknum, $list ); } } sub get_add_chunks { my ($self, %args) = @_; my $hostkey = $args{hostkey} || ''; # my $list = $args{'list'} || ''; my @list = (); # my $rows = $self->{dbh}->selectall_arrayref("SELECT * FROM a_chunks WHERE hostkey = ? AND list = ?", { Slice => {} }, $hostkey, $list); my $rows = $self->{dbh}->selectall_arrayref("SELECT * FROM a_chunks WHERE hostkey = ?", { Slice => {} }, $hostkey); foreach my $row (@$rows) { push(@list, { chunknum => $row->{num}, prefix => $row->{prefix}, list => $row->{list}, hostkey => $hostkey }); } return @list; } sub get_sub_chunks { my ($self, %args) = @_; my $hostkey = $args{hostkey} || ''; # my $list = $args{'list'} || ''; my @list = (); # my $rows = $self->{dbh}->selectall_arrayref("SELECT * FROM s_chunks WHERE hostkey = ? AND list = ?", { Slice => {} }, $hostkey, $list); my $rows = $self->{dbh}->selectall_arrayref("SELECT * FROM s_chunks WHERE hostkey = ?", { Slice => {} }, $hostkey); foreach my $row (@$rows) { push(@list, { chunknum => $row->{num}, prefix => $row->{prefix}, addchunknum => $row->{add_num}, list => $row->{list} }); } return @list; } sub get_add_chunks_nums { my ($self, %args) = @_; my $list = $args{'list'} || ''; my @list = (); my $rows = $self->{dbh}->selectall_arrayref("SELECT DISTINCT(num) FROM a_chunks WHERE list = ? ORDER BY num ASC", { Slice => {} }, $list); foreach my $row (@$rows) { push(@list, $row->{num}); } return @list; } sub get_sub_chunks_nums { my ($self, %args) = @_; my $list = $args{'list'} || ''; my @list = (); my $rows = $self->{dbh}->selectall_arrayref("SELECT DISTINCT(num) FROM s_chunks WHERE list = ? ORDER BY num ASC", { Slice => {} }, $list); foreach my $row (@$rows) { push(@list, $row->{num}); } return @list; } sub delete_add_ckunks { my ($self, %args) = @_; my $chunknums = $args{chunknums} || []; my $list = $args{'list'} || ''; my $sth = $self->{dbh}->prepare("DELETE FROM a_chunks WHERE num = ? AND list = ?"); foreach my $num (@$chunknums) { $sth->execute($num, $list); } } sub delete_sub_ckunks { my ($self, %args) = @_; my $chunknums = $args{chunknums} || []; my $list = $args{'list'} || ''; my $sth = $self->{dbh}->prepare("DELETE FROM s_chunks WHERE num = ? AND list = ?"); foreach my $num (@$chunknums) { $sth->execute($num, $list); } } sub get_full_hashes { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $timestamp = $args{timestamp} || 0; my $list = $args{list} || ''; my @hashes = (); my $rows = $self->{dbh}->selectall_arrayref("SELECT hash FROM full_hashes WHERE timestamp >= ? AND num = ? AND list = ?", { Slice => {} }, $timestamp, $chunknum, $list); foreach my $row (@$rows) { push(@hashes, $row->{hash}); } return @hashes; } sub updated { my ($self, %args) = @_; my $time = $args{'time'} || time; my $wait = $args{'wait'} || 1800; my $list = $args{'list'} || ''; if ($self->last_update(list => $list)->{'time'} == 0) { $self->{dbh}->do("INSERT INTO updates (last, wait, errors, list) VALUES (?, ?, 0, ?)", undef, $time, $wait, $list); } else { $self->{dbh}->do("UPDATE updates SET last = ?, wait = ?, errors = 0 WHERE list = ?", undef, $time, $wait, $list); } } sub update_error { my ($self, %args) = @_; my $time = $args{'time'} || time; my $list = $args{'list'} || ''; my $wait = $args{'wait'} || 60; my $errors = $args{errors} || 1; if ($self->last_update(list => $list)->{'time'} == 0) { $self->{dbh}->do("INSERT INTO updates (last, wait, errors, list) VALUES (?, ?, ?, ?)", undef, $time, $wait, $errors, $list); } else { $self->{dbh}->do("UPDATE updates SET last = ?, wait = ?, errors = ?, list = ? WHERE 1", undef, $time, $wait, $errors, $list); } } sub last_update { my ($self, %args) = @_; my $list = $args{'list'} || ''; my $rows = $self->{dbh}->selectall_arrayref("SELECT last, wait, errors FROM updates WHERE list = ? LIMIT 1", { Slice => {} }, $list); foreach my $row (@$rows) { return {'time' => $row->{'last'} || 0, 'wait' => $row->{'wait'} || 1800, errors => $row->{'errors'} || 0}; } return {'time' => 0, 'wait' => 1800}; } sub add_full_hashes { my ($self, %args) = @_; my $timestamp = $args{timestamp} || time(); my $full_hashes = $args{full_hashes} || []; foreach my $hash (@$full_hashes) { # $self->{dbh}->do("INSERT OR REPLACE INTO full_hashes (num, hash, list, timestamp) VALUES (?, ?, ?, ?)", { }, $hash->{chunknum}, $hash->{hash}, $hash->{list}, $timestamp); $self->{dbh}->do("DELETE FROM full_hashes WHERE num = ? AND hash = ? AND list = ?", { }, $hash->{chunknum}, $hash->{hash}, $hash->{list}); $self->{dbh}->do("INSERT INTO full_hashes (num, hash, list, timestamp) VALUES (?, ?, ?, ?)", { }, $hash->{chunknum}, $hash->{hash}, $hash->{list}, $timestamp); } } sub delete_full_hashes { my ($self, %args) = @_; my $chunknums = $args{chunknums} || []; my $list = $args{list} || croak "Missing list name\n"; my $sth = $self->{dbh}->prepare("DELETE FROM full_hashes WHERE num = ? AND list = ?"); foreach my $num (@$chunknums) { $sth->execute($num, $list); } } sub full_hash_error { my ($self, %args) = @_; my $timestamp = $args{timestamp} || time(); my $prefix = $args{prefix} || ''; my $rows = $self->{dbh}->selectall_arrayref("SELECT id, errors FROM full_hashes_errors WHERE prefix = ? LIMIT 1", { Slice => {} }, $prefix); if (scalar @$rows == 0) { $self->{dbh}->do("INSERT INTO full_hashes_errors (prefix, errors, timestamp) VALUES (?, 1, ?)", { }, $prefix, $timestamp); } else { my $errors = $rows->[0]->{errors} + 1; $self->{dbh}->do("UPDATE full_hashes_errors SET errors = ?, timestamp = ? WHERE id = ?", $errors, $timestamp, $rows->[0]->{id}); } } sub full_hash_ok { my ($self, %args) = @_; my $timestamp = $args{timestamp} || time(); my $prefix = $args{prefix} || ''; my $rows = $self->{dbh}->selectall_arrayref("SELECT id, errors FROM full_hashes_errors WHERE prefix = ? AND errors > 0 LIMIT 1", { Slice => {} }, $prefix); if (scalar @$rows > 0) { $self->{dbh}->do("UPDATE full_hashes_errors SET errors = 0, timestamp = ? WHERE id = ?", $timestamp, $rows->[0]->{id}); $self->{dbh}->do("DELETE FROM full_hashes_errors WHERE id = ?", $rows->[0]->{id}); } } sub get_full_hash_error { my ($self, %args) = @_; my $prefix = $args{prefix} || ''; my $rows = $self->{dbh}->selectall_arrayref("SELECT timestamp, errors FROM full_hashes_errors WHERE prefix = ? LIMIT 1", { Slice => {} }, $prefix); if (scalar @$rows == 0) { return undef; } else { return $rows->[0]; } } sub get_mac_keys { my ($self, %args) = @_; my $rows = $self->{dbh}->selectall_arrayref("SELECT client_key, wrapped_key FROM mac_keys LIMIT 1", { Slice => {} }); if (scalar @$rows == 0) { return { client_key => '', wrapped_key => '' }; } else { return $rows->[0]; } } sub add_mac_keys { my ($self, %args) = @_; my $client_key = $args{client_key} || ''; my $wrapped_key = $args{wrapped_key} || ''; $self->delete_mac_keys(); $self->{dbh}->do("INSERT INTO mac_keys (client_key, wrapped_key) VALUES (?, ?)", { }, $client_key, $wrapped_key); } sub delete_mac_keys { my ($self, %args) = @_; $self->{dbh}->do("DELETE FROM mac_keys WHERE 1"); } sub reset { my ($self, %args) = @_; my $list = $args{'list'} || ''; my $sth = $self->{dbh}->prepare('DELETE FROM s_chunks WHERE list = ?'); $sth->execute( $list ); $sth = $self->{dbh}->prepare('DELETE FROM a_chunks WHERE list = ?'); $sth->execute( $list ); $sth = $self->{dbh}->prepare('DELETE FROM full_hashes WHERE list = ?'); $sth->execute( $list ); $sth = $self->{dbh}->prepare('DELETE FROM full_hashes_errors'); $sth->execute(); $sth = $self->{dbh}->prepare('DELETE FROM updates WHERE list = ?'); $sth->execute( $list ); } sub create_range { my ($self, %args) = @_; my $numbers = $args{numbers} || []; # should already be ordered return '' if (scalar @$numbers == 0); my $range = $$numbers[0]; my $new_range = 0; for(my $i = 1; $i < scalar @$numbers; $i++) { # next if ($$numbers[$i] == $$numbers[$i-1]); # should not happen if ($$numbers[$i] != $$numbers[$i-1] + 1) { $range .= $$numbers[$i-1] if ($i > 1 && $new_range == 1); $range .= ',' . $$numbers[$i]; $new_range = 0 } elsif ($new_range == 0) { $range .= "-"; $new_range = 1; } } $range .= $$numbers[scalar @$numbers - 1] if ($new_range == 1); return $range; } =head1 CHANGELOG =over 4 =item 0.7 New C function. Keep empty sub chunks. Fix index for sub chunks. =item 0.6 Add option keep_all to keep expired full hashes. Useful for debugging. =item 0.5 Return the hostkey in get_add_chunks. =item 0.4 Fix duplicate insert of add chunks and sub chunks. =item 0.3 Add reset function to reset all tables for a given list =item 0.2 Replace "INSERT OR REPLACE" statements by DELETE + INSERT to work with all databases =back =head1 SEE ALSO See L for handling Google Safe Browsing v2. See L for the list of public functions. See L for a back-end using Sqlite. Google Safe Browsing v2 API: L =head1 AUTHOR Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2011 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1;Net-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/Sqlite.pm0000644000076400007640000001571111762315054025275 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2::Sqlite; use strict; use warnings; use base 'Net::Google::SafeBrowsing2::DBI'; use Carp; use DBI; use List::Util qw(first); our $VERSION = '0.8'; =head1 NAME Net::Google::SafeBrowsing2::Sqlite - Sqlite as back-end storage for the Google Safe Browsing v2 database =head1 SYNOPSIS use Net::Google::SafeBrowsing2::Sqlite; my $storage = Net::Google::SafeBrowsing2::Sqlite->new(file => 'google-v2.db'); ... $storage->close(); =head1 DESCRIPTION This is an implementation of L using Sqlite. =cut =head1 CONSTRUCTOR =over 4 =head2 new() Create a Net::Google::SafeBrowsing2::Sqlite object my $storage = Net::Google::SafeBrowsing2::Sqlite->new(file => 'google-v2.db'); Arguments =over 4 =item file Required. File to store the database. =item keep_all Optional. Set to 1 to keep old information (such as expiring full hashes) in the database. 0 (delete) by default. =back =back =cut sub new { my ($class, %args) = @_; my $self = { # default arguments keep_all => 0, file => 'gsb2.db', %args, }; bless $self, $class or croak "Can't bless $class: $!"; $self->init(); return $self; } =head1 PUBLIC FUNCTIONS =over 4 See L for a complete list of public functions. =head2 close() Cleanup old full hashes, and close the connection to the database. $storage->close(); =back =cut sub init { my ($self, %args) = @_; $self->{dbh} = DBI->connect("dbi:SQLite:dbname=" . $self->{file}, "", ""); $self->{dbh}->do("PRAGMA journal_mode = OFF"); $self->{dbh}->do("PRAGMA synchronous = OFF"); my @tables = $self->{dbh}->tables; if (! defined first { $_ eq '"main"."updates"' || $_ eq '"updates"' } @tables) { $self->create_table_updates(); } if (! defined first { $_ eq '"main"."a_chunks"' || $_ eq '"a_chunks"' } @tables) { $self->create_table_a_chunks(); } if (! defined first { $_ eq '"main"."s_chunks"' || $_ eq '"s_chunks"' } @tables) { $self->create_table_s_chunks(); } if (! defined first { $_ eq '"main"."full_hashes"' || $_ eq '"full_hashes"' } @tables) { $self->create_table_full_hashes(); } if (! defined first { $_ eq '"main"."full_hashes_errors"' || $_ eq '"full_hashes_errors"' } @tables) { $self->create_table_full_hashes_errors(); } if (! defined first { $_ eq '"main"."mac_keys"' || $_ eq '"mac_keys"' } @tables) { $self->create_table_mac_keys(); } } sub create_table_updates { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE updates ( last INTEGER DEFAULT 0, wait INTEGER DEFAULT 1800, errors INTEGER DEFAULT 0, list TEXT ); }; # Need to handle errors $self->{dbh}->do($schema); } sub create_table_a_chunks { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE a_chunks ( hostkey TEXT, prefix TEXT, num INTEGER, list TEXT ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE INDEX a_chunks_hostkey ON a_chunks ( hostkey ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX a_chunks_num_list ON a_chunks ( num, list ); }; $self->{dbh}->do($index); $index = qq{ CREATE UNIQUE INDEX a_chunks_unique ON a_chunks ( hostkey, prefix, num, list ); }; $self->{dbh}->do($index); } sub create_table_s_chunks { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE s_chunks ( hostkey TEXT, prefix TEXT, num INTEGER, add_num INTEGER, list TEXT ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE INDEX s_chunks_hostkey ON s_chunks ( hostkey ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX s_chunks_num ON s_chunks ( num ); }; $self->{dbh}->do($index); $index = qq{ CREATE UNIQUE INDEX s_chunks_unique ON s_chunks ( hostkey, prefix, num, add_num, list ); }; $self->{dbh}->do($index); } sub create_table_full_hashes { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes ( id INTEGER PRIMARY KEY AUTOINCREMENT, num INTEGER, hash TEXT, list TEXT, timestamp INTEGER ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE UNIQUE INDEX hash ON full_hashes ( num, hash, list ); }; $self->{dbh}->do($index); } sub create_table_full_hashes_errors { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes_errors ( id INTEGER PRIMARY KEY AUTOINCREMENT, errors INTEGER, prefix TEXT, timestamp INTEGER ); }; $self->{dbh}->do($schema); } sub create_table_mac_keys{ my ($self, %args) = @_; my $schema = qq{ CREATE TABLE mac_keys ( client_key TEXT Default '', wrapped_key TEXT Default '' ); }; $self->{dbh}->do($schema); } sub add_chunks_s { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; my $add = $self->{dbh}->prepare('INSERT OR IGNORE INTO s_chunks (hostkey, prefix, num, add_num, list) VALUES (?, ?, ?, ?, ?)'); foreach my $chunk (@$chunks) { $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $chunk->{add_chunknum}, $list ); } } sub add_chunks_a { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; my $add = $self->{dbh}->prepare('INSERT OR IGNORE INTO a_chunks (hostkey, prefix, num, list) VALUES (?, ?, ?, ?)'); foreach my $chunk (@$chunks) { $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $list ); } if (scalar @$chunks == 0) { # keep empty chunks $add->execute( '', '', $chunknum, $list ); } } =head1 CHANGELOG =over 4 =item 0.2 Add close() function to clean up old full hashes, and to close the connection to the database cleanly. Add table and function to store and retrieve the Message Authentication Code (MAC) key. In some environments, the module was trying to re-create existing tables. Fixed (Thank you to Luis Alberto Perez). =item 0.3 Fix typos in the documentation. =item 0.4 Disable journalization. This speeds up updated by about 10x. =item 0.5 Use base class L. =item 0.6 Use more efficient add_chunk_a and add_chunk_s functions. =item 0.7 Add option keep_all to keep expired full hashes. Useful for debugging. =item 0.8 Index s_chunks_unique was created at the wrong place. Thanks to colinmkeith. =back =head1 SEE ALSO See L for handling Google Safe Browsing v2. See L for the list of public functions. See L for a back-end using Sqlite. Google Safe Browsing v2 API: L =head1 AUTHOR Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2011 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1;Net-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/MySQL.pm0000644000076400007640000001652611763563000025003 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2::MySQL; use strict; use warnings; use base 'Net::Google::SafeBrowsing2::DBI'; use Carp; use DBI; use List::Util qw(first); our $VERSION = '0.5'; =head1 NAME Net::Google::SafeBrowsing2::MySQL - MySQL as back-end storage for the Google Safe Browsing v2 database =head1 SYNOPSIS use Net::Google::SafeBrowsing2::MySQL; my $storage = Net::Google::SafeBrowsing2::MySQL->new(host => '127.0.0.1', database => 'GoogleSafeBrowsingv2'); ... $storage->close(); =head1 DESCRIPTION This is an implementation of L using MySQL. =cut =head1 CONSTRUCTOR =over 4 =head2 new() Create a Net::Google::SafeBrowsing2::MySQL object my $storage = Net::Google::SafeBrowsing2::MySQL->new( host => '127.0.0.1', database => 'GoogleSafeBrowsingv2', username => 'foo', password => 'bar' ); Arguments =over 4 =item host Required. MySQL host name =item database Required. MySQL database name to connect to. =item username Required. MySQL username. =item password Required. MySQL password. =item port Optional. MySQL port number to connect to. =item keep_all Optional. Set to 1 to keep old information (such as expiring full hashes) in the database. 0 (delete) by default. =back =back =cut sub new { my ($class, %args) = @_; my $self = { # default arguments host => '127.0.0.1', database => 'GoogleSafeBrowsingv2', port => 3306, keep_all => 0, %args, }; bless $self, $class or croak "Can't bless $class: $!"; $self->init(); return $self; } =head1 PUBLIC FUNCTIONS =over 4 See L for a complete list of public functions. =head2 close() Cleanup old full hashes, and close the connection to the database. $storage->close(); =back =cut sub init { my ($self, %args) = @_; $self->{dbh} = DBI->connect("DBI:mysql:database=" . $self->{database} . ";host=" . $self->{host} . ";port=" . $self->{port}, $self->{username}, $self->{password}, {'RaiseError' => 1}); my @tables = $self->{dbh}->tables; if (! defined first { $_ =~ '`updates`' } @tables) { $self->create_table_updates(); } if (! defined first { $_ =~ '`a_chunks`' } @tables) { $self->create_table_a_chunks(); } if (! defined first { $_ =~ '`s_chunks`' } @tables) { $self->create_table_s_chunks(); } if (! defined first { $_ =~ '`full_hashes`' } @tables) { $self->create_table_full_hashes(); } if (! defined first { $_ =~ '`full_hashes_errors`' } @tables) { $self->create_table_full_hashes_errors(); } if (! defined first { $_ =~ '`mac_keys`' } @tables) { $self->create_table_mac_keys(); } } sub create_table_updates { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE updates ( last INT NOT NULL DEFAULT '0', wait INT NOT NULL DEFAULT '0', errors INT NOT NULL DEFAULT '1800', list VARCHAR( 50 ) NOT NULL ); }; # Need to handle errors $self->{dbh}->do($schema); } sub create_table_a_chunks { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE a_chunks ( hostkey VARBINARY( 8 ), prefix VARBINARY( 8 ), num INT NOT NULL, list VARCHAR( 50 ) NOT NULL ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE INDEX a_chunks_hostkey ON a_chunks ( hostkey ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX a_chunks_num_list ON a_chunks ( num, list ); }; $self->{dbh}->do($index); $index = qq{ CREATE UNIQUE INDEX a_chunks_unique ON a_chunks ( hostkey, prefix, num, list ); }; $self->{dbh}->do($index); } sub create_table_s_chunks { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE s_chunks ( hostkey VARBINARY( 8 ), prefix VARBINARY( 8 ), num INT NOT NULL, add_num INT DEFAULT 0, list VARCHAR( 50 ) NOT NULL ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE INDEX s_chunks_hostkey ON s_chunks ( hostkey ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX s_chunks_num ON s_chunks ( num ); }; $self->{dbh}->do($index); $index = qq{ CREATE INDEX s_chunks_num_list ON s_chunks ( num, list ); }; $self->{dbh}->do($index); $index = qq{ CREATE UNIQUE INDEX s_chunks_unique ON s_chunks ( hostkey, prefix, num, add_num, list ); }; $self->{dbh}->do($index); } sub create_table_full_hashes { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes ( id INT AUTO_INCREMENT PRIMARY KEY, num INT, hash VARBINARY( 32 ), list VARCHAR( 50 ), timestamp INT Default '0' ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE UNIQUE INDEX hash ON full_hashes ( num, hash, list ); }; $self->{dbh}->do($index); } sub create_table_full_hashes_errors { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes_errors ( id INT AUTO_INCREMENT PRIMARY KEY, errors INT Default '0', prefix VARBINARY( 8 ), timestamp INT Default '0' ); }; $self->{dbh}->do($schema); } sub create_table_mac_keys{ my ($self, %args) = @_; my $schema = qq{ CREATE TABLE mac_keys ( client_key VARCHAR( 50 ) Default '', wrapped_key VARCHAR( 50 ) Default '' ); }; $self->{dbh}->do($schema); } sub add_chunks_s { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; my $add = $self->{dbh}->prepare('INSERT IGNORE INTO s_chunks (hostkey, prefix, num, add_num, list) VALUES (?, ?, ?, ?, ?)'); foreach my $chunk (@$chunks) { $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $chunk->{add_chunknum}, $list ); } if (scalar @$chunks == 0) { # keep empty chunks $add->execute( '', '', $chunknum, '', $list ); } } sub add_chunks_a { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; my $add = $self->{dbh}->prepare('INSERT IGNORE INTO a_chunks (hostkey, prefix, num, list) VALUES (?, ?, ?, ?)'); foreach my $chunk (@$chunks) { # 32-byte prefix seen at chunk 69961 # If this becomes more of a problem, the schema will have to be adjusted. if (length($chunk->{prefix}) > 8) { $chunk->{prefix} = substr $chunk->{prefix}, 0, 4; } $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $list ); } if (scalar @$chunks == 0) { # keep empty chunks $add->execute( '', '', $chunknum, $list ); } } =head1 CHANGELOG =over 4 =item 0.5 Keep empty sub chunks. Shorten prefixes greater than 8 bytes (workaround tro keep schema tight) =item 0.4 Add option keep_all to keep expired full hashes. Useful for debugging. =item 0.3 Use more efficient add_chunk_a and add_chunk_s functions. Change data type for prefixes from VARCHAR to VARBINARY. =back =head1 SEE ALSO See L for handling Google Safe Browsing v2. See L for the list of public functions. See L for a back-end using Sqlite. Google Safe Browsing v2 API: L =head1 AUTHOR Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2011 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1;Net-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/Storage.pm0000644000076400007640000003543711763037725025456 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2::Storage; use strict; use warnings; use Carp; our $VERSION = '0.4'; =head1 NAME Net::Google::SafeBrowsing2::Storage - Base class for storing the Google Safe Browsing v2 database =head1 SYNOPSIS package Net::Google::SafeBrowsing2::Sqlite; use base 'Net::Google::SafeBrowsing2::Storage'; =head1 DESCRIPTION This is the base class for implementing a storage mechanism for the Google Safe Browsing v2 database. See L for an example of implementation. This module cannot be used on its own as it does not actually store anything. All methods should redefined. Check the code to see which arguments are used, and what should be returned. =cut =head1 CONSTRUCTOR =over 4 =head2 new() Create a Net::Google::SafeBrowsing2::Storage object my $storage => Net::Google::SafeBrowsing2::Storage->new(); =back =cut sub new { my ($class, %args) = @_; my $self = { %args, }; bless $self, $class or croak "Can't bless $class: $!"; return $self; } =head1 PUBLIC FUNCTIONS =over 4 =head2 add_chunks() Add chunk information to the local database $storage->add_chunks(type => 'a', chunknum => 2154, chunks => [{host => HEX, prefix => ''}], list => 'goog-malware-shavar'); Does not return anything. Arguments =over 4 =item type Required. Type of chunk: 'a' (add chunk) or 's' (sub chunk). =item chunknum Required. Chunk number. =item chunks Required. Array of chunks For add chunks, each element of the array is an hash reference in the following format: { host => HEX, prefix => HEX } For sub chunks, each element of the array is an hash reference in the following format: { host => HEX, prefix => HEX, add_chunknum => INTEGER } =item list Required. Google Safe Browsing list name. =back =cut sub add_chunks { my ($self, %args) = @_; my $type = $args{type} || 'a'; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; # Save { type => $type, host => $chunk->{host}, prefix => $chunk->{prefix}, chunknum => $chunknum, list => $list } } =head2 get_add_chunks() Returns a list of chunks for a given host key for all lists. my @chunks = $storage->get_add_chunks(hostkey => HEX); Arguments =over 4 =item hostkey. Required. Host key. =back Return value =over 4 Array of add chunks in the same format as described above: ( { chunknum => 25121, hostkey => hex('12345678'), prefix => '', list => 'goog-malware-shavar' }, { chunknum => '25121', hostkey => hex('12345678'), prefix => hex('2fc96b9f'), list => 'goog-malware-shavar' }, ); =back =cut sub get_add_chunks { my ($self, %args) = @_; my $hostkey = $args{hostkey} || ''; return ( { chunknum => 25121, prefix => '', hostkey => $hostkey, list => 'goog-malware-shavar' }, { chunknum => '25121', prefix => $self->ascii_to_hex('2fc96b9f'), hostkey => $hostkey, list => 'goog-malware-shavar' }, ); } =head2 get_sub_chunks() Returns a list of sub chunks for a given host key for all lists. my @chunks = $storage->get_sub_chunks(hostkey => HEX); Arguments =over 4 =item hostkey Required. Host key. =back Return value =over 4 Array of add chunks in the same format as described above: ( { chunknum => 37441, prefix => '', addchunknum => 23911, list => 'goog-malware-shavar' }, { chunknum => 37441, prefix => '', addchunknum => 22107, list => 'goog-malware-shavar' }, ); =back =cut sub get_sub_chunks { my ($self, %args) = @_; my $hostkey = $args{hostkey} || ''; return ( { chunknum => 37441, prefix => '', addchunknum => 23911, list => 'goog-malware-shavar' }, { chunknum => 37441, prefix => '', addchunknum => 22107, list => 'goog-malware-shavar' }, ); } =head2 get_add_chunks_nums() Returns a list of unique add chunk numbers for a specific list. B: this list should be sorted in ascendant order. my @ids = $storage->get_add_chunks_nums(list => 'goog-malware-shavar'); Arguments =over 4 =item list Required. Google Safe Browsing list name =back Return value =over 4 Array of integers sorted in ascendant order: qw(25121 25122 25123 25124 25125 25126) =back =cut sub get_add_chunks_nums { my ($self, %args) = @_; my $list = $args{'list'} || ''; return qw(25121 25122 25123 25124 25125 25126); } =head2 get_sub_chunks_nums() Returns a list of unique sub chunk numbers for a specific list. B: this list should be sorted in ascendant order. my @ids = $storage->get_sub_chunks_nums(list => 'goog-malware-shavar'); Arguments =over 4 =item list Required. Google Safe Browsing list name =back Return value =over 4 Array of integers sorted in ascendant order: qw(37441 37442 37443 37444 37445 37446 37447 37448 37449 37450) =back =cut sub get_sub_chunks_nums { my ($self, %args) = @_; my $list = $args{'list'} || ''; return qw(37441 37442 37443 37444 37445 37446 37447 37448 37449 37450); } =head2 delete_add_chunks() Delete add chunks from the local database $storage->delete_add_chunks(chunknums => [qw/37444 37445 37446/], list => 'goog-malware-shavar'); Arguments =over 4 =item chunknums Required. Array of chunk numbers =item list Required. Google Safe Browsing list name =back No return value =cut sub delete_add_ckunks { my ($self, %args) = @_; my $chunknums = $args{chunknums} || []; my $list = $args{'list'} || ''; foreach my $num (@$chunknums) { # DELETE FROM [...] WHERE chunknumber = $num AND list = $list } } =head2 delete_sub_chunks() Delete sub chunks from the local database $storage->delete_sub_chunks(chunknums => [qw/37444 37445 37446/], list => 'goog-malware-shavar'); Arguments =over 4 =item chunknums Required. Array of chunk numbers =item list Required. Google Safe Browsing list name =back No return value =cut sub delete_sub_ckunks { my ($self, %args) = @_; my $chunknums = $args{chunknums} || []; my $list = $args{'list'} || ''; foreach my $num (@$chunknums) { # DELETE FROM [...] WHERE chunknumber = $num AND list = $list } } =head2 get_full_hashes() Return a list of full hashes $storage->get_full_hashes(chunknum => 37444, timestamp => time() - 45 * 60 * 60, list => 'goog-malware-shavar'); Arguments =over 4 =item chunknum Required. Add chunk number =item timestamp Required. Request hashes retrieved after this timestamp value. =item list Required. Google Safe Browsing list name =back Return value =over 4 Array of full hashes: (HEX, HEX, HEX) =back =cut sub get_full_hashes { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $timestamp = $args{timestamp} || 0; my $list = $args{list} || ''; return ( $self->ascii_to_hex('eb9744c011d332ad9c92442d18d5a0f913328ad5623983822fc86fad1aab649d'), $self->ascii_to_hex('2ae11a967a5517e24c7be3fa0b8f56e7a13358ce3b07556dc251bc6b650f0f59') ); } =head2 updated() Save information about a successful database update $storage->updated('time' => time(), wait => 1800, list => 'goog-malware-shavar'); Arguments =over 4 =item time Required. Time of the update. =item wait Required. Number o seconds to wait before doing the next update. =item list Required. Google Safe Browsing list name. =back No return value =cut sub updated { my ($self, %args) = @_; my $time = $args{'time'} || time(); my $wait = $args{'wait'} || 1800; my $list = $args{'list'} || ''; # INSERT INTO [...] (last, wait, errors, list) VALUES (?, ?, 0, ?)", $time, $wait, $list); } =head2 update_error() Save information about a failed database update $storage->update_error('time' => time(), wait => 60, list => 'goog-malware-shavar', errors => 1); Arguments =over 4 =item time Required. Time of the update. =item wait Required. Number o seconds to wait before doing the next update. =item list Required. Google Safe Browsing list name. =item errors Required. Number of errors. =back No return value =cut sub update_error { my ($self, %args) = @_; my $time = $args{'time'} || time(); my $list = $args{'list'} || ''; my $wait = $args{'wait'} || 60; my $errors = $args{errors} || 1; # UPDATE updates SET last = $time, wait = $wait, errors = $errors, list = $list } =head2 last_update() Return information about the last database update my $info = $storage->last_update(list => 'goog-malware-shavar'); Arguments =over 4 =item list Required. Google Safe Browsing list name. =back Return value =over 4 Hash reference { time => time(), wait => 1800, errors => 0 } =back =cut sub last_update { my ($self, %args) = @_; my $list = $args{'list'} || ''; return {'time' => time(), 'wait' => 1800, errors => 0}; } =head2 add_full_hashes() Add full hashes to the local database $storage->add_full_hashes(timestamp => time(), full_hashes => [{chunknum => 2154, hash => HEX, list => 'goog-malware-shavar'}]); Arguments =over 4 =item timestamp Required. Time when the full hash was retrieved. =item full_hashes Required. Array of full hashes. Each element is an hash reference in the following format: { chunknum => INTEGER, hash => HEX, list => 'goog-malware-shavar' } =back No return value =cut sub add_full_hashes { my ($self, %args) = @_; my $timestamp = $args{timestamp} || time(); my $full_hashes = $args{full_hashes} || []; foreach my $hash (@$full_hashes) { # INSERT INTO [...] (num, hash, list, timestamp) VALUES ($hash->{chunknum}, $hash->{hash}, $hash->{list}, $timestamp); } } =head2 delete_full_hashes() Delete full hashes from the local database $storage->delete_full_hashes(chunknums => [qw/2154 2156 2158/], list => 'goog-malware-shavar'); Arguments =over 4 =item chunknums Required. Array of chunk numbers. =item list Required. Google Safe Browsing list name. =back No return value =cut sub delete_full_hashes { my ($self, %args) = @_; my $chunknums = $args{chunknums} || []; my $list = $args{list} || croak "Missing list name\n"; foreach my $num (@$chunknums) { # "DELETE FROM [...] WHERE num = $num AND list = $list } } =head2 full_hash_error() Save information about failed attempt to retrieve a full hash $storage->full_hash_error(timestamp => time(), prefix => HEX); Arguments =over 4 =item timestamp Required. Time when the Google returned an error. =item prefix Required. Host prefix. =back No return value =cut sub full_hash_error { my ($self, %args) = @_; my $timestamp = $args{timestamp} || time(); my $prefix = $args{prefix} || ''; # Add 1 to existing error count } =head2 full_hash_ok() Save information about a successful attempt to retrieve a full hash $storage->full_hash_ok(timestamp => time(), prefix => HEX); Arguments =over 4 =item timestamp Required. Time when the Google returned an error. =item prefix Required. Host prefix. =back No return value =cut sub full_hash_ok { my ($self, %args) = @_; my $timestamp = $args{timestamp} || time(); my $prefix = $args{prefix} || ''; # UPDATE full_hashes_errors SET errors = 0, timestamp = $timestamp WHERE prefix = $prefix } =head2 get_full_hash_error() Save information about a successful attempt to retrieve a full hash my $info = $storage->get_full_hash_error(prefix => HEX); Arguments =over 4 =item prefix Required. Host prefix. =back Return value =over 4 undef if there was no error Hash reference in the following format if there was an error: { timestamp => time(), errors => 3 } =back =cut sub get_full_hash_error { my ($self, %args) = @_; my $prefix = $args{prefix} || ''; # no error return undef; # some error # return { timestamp => time(), errors => 3 } } =head2 get_mac_keys() Retrieve the Message Authentication Code (MAC) keys. my $keys = $storage->get_mac_keys(); No arguments Return value =over 4 Hash reference in the following format: { client_key => '', wrapped_key => '' } =back =cut sub get_mac_keys { my ($self, %args) = @_; return { client_key => '', wrapped_key => '' } } =head2 delete_add_keys() Add the Message Authentication Code (MAC) keys. $storage->delete_mac_keys(client_key => 'KEY', wrapped_key => 'KEY'); Arguments =over 4 =item client_key Required. Client key. =item wrapped_key Required. Wrapped key. =back No return value =cut sub add_mac_keys { my ($self, %args) = @_; my $client_key = $args{client_key} || ''; my $wrapped_key = $args{wrapped_key} || ''; # INSERT INTO ... } =head2 delete_mac_keys() Delete the Message Authentication Code (MAC) keys. $storage->delete_mac_keys(); No arguments No return value =cut sub delete_mac_keys { my ($self, %args) = @_; # # DELETE FROM mac_keys WHERE 1 } =head2 reset() Remove all local data $storage->delete_mac_keys(); Arguments =over 4 =item list Required. Google Safe Browsing list name. =back No return value =cut sub reset { my ($self, %args) = @_; my $list = $args{'list'} || ''; # DELETE FROM s_chunks WHERE list = $list # DELETE FROM a_chunks WHERE list = $list # DELETE FROM full_hashes WHERE list = $list # DELETE FROM full_hashes_errors WHERE list = $list # DELETE FROM updates WHERE list = $list } =back =head1 PRIVATE FUNCTIONS These functions are not intended for debugging purpose. =over 4 =head2 hex_to_ascii() Transform hexadecimal strings to printable ASCII strings. Used mainly for debugging. print $storage->hex_to_ascii('hex value'); =cut sub hex_to_ascii { my ($self, $hex) = @_; my $ascii = ''; while (length $hex > 0) { $ascii .= sprintf("%02x", ord( substr($hex, 0, 1, '') ) ); } return $ascii; } =head2 ascii_to_hex() Transform ASCII strings to hexadecimal strings. print $storage->ascii_to_hex('ascii value'); =cut sub ascii_to_hex { my ($self, $ascii) = @_; my $hex = ''; for (my $i = 0; $i < int(length($ascii) / 2); $i++) { $hex .= chr hex( substr($ascii, $i * 2, 2) ); } return $hex; } =back =head1 CHANGELOG =over 4 =item 0.4 Add reset mehtod to empty local database. =item 0.3 Return the hostkey as part of the add chunks (get_add_chunks). =item 0.2 Add functions to store and retrieve Message Authentication Code (MAC) keys. =back =head1 SEE ALSO See L for handling Google Safe Browsing v2. See L or L for an example of storing and managing the Google Safe Browsing database. Google Safe Browsing v2 API: L =head1 AUTHOR Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2011 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1;Net-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/Postgres.pm0000644000076400007640000001273111763552755025655 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2::Postgres; use strict; use warnings; use base 'Net::Google::SafeBrowsing2::DBI'; use Carp; use DBI; use List::Util qw(first); our $VERSION = '0.02'; =head1 NAME Net::Google::SafeBrowsing2::Postgres - Postgres as back-end storage for the Google Safe Browsing v2 database =head1 SYNOPSIS use Net::Google::SafeBrowsing2::Postgres; my $storage = Net::Google::SafeBrowsing2::Postgres->new( host => '127.0.0.1', database => 'google_safe_browsing', username => $username, password => $password, ); # ... $storage->close(); =head1 DESCRIPTION This is an implementation of L using Postgres. =head1 CONSTRUCTOR =over 4 =head2 new() Create a Net::Google::SafeBrowsing2::Postgres object my $storage = Net::Google::SafeBrowsing2::Postgres->new( host => '127.0.0.1', database => 'google_safe_browsing', username => $username, password => $password, ); Arguments =over 4 =item host Specifies Postgres host name. Defaults to 127.0.0.1. =item database Specifies Postgres database name. Defaults to "google_safe_browsing". =item username Specifies the username for the Postgres connection. Required. =item password Specifies the password for the Postgres connection. Required. =item keep_all Optional. Set to 1 to keep old information (such as expiring full hashes) in the database. 0 (delete) by default. =back =back =cut sub new { my ($class, %args) = @_; # Default arguments my $self = { host => '127.0.0.1', database => 'google_safe_browsing', keep_all => 0, %args, }; if (!$self->{username}) { croak "username required"; } if (!$self->{password}) { croak "password required"; } bless $self, $class; $self->init(); return $self; } =head1 PUBLIC FUNCTIONS =over 4 See L for a complete list of public functions. =head2 close() Cleanup old full hashes, and close the connection to the database. $storage->close(); =back =cut sub init { my ($self, %args) = @_; $self->{dbh} = DBI->connect( "DBI:Pg:dbname=" . $self->{database} . ";host=" . $self->{host}, $self->{username}, $self->{password}, { RaiseError => 1, }, ); my @tables = $self->{dbh}->tables; # Postgres reports normal tables (compared to internal ones) with a # prefix of "public." if (! defined first { /public\.updates/ } @tables) { $self->create_table_updates(); } if (! defined first { /public\.a_chunks/ } @tables) { $self->create_table_a_chunks(); } if (! defined first { /public\.s_chunks/ } @tables) { $self->create_table_s_chunks(); } if (! defined first { /public\.full_hashes/ } @tables) { $self->create_table_full_hashes(); } if (! defined first { /public\.full_hashes_errors/ } @tables) { $self->create_table_full_hashes_errors(); } if (! defined first { /public\.mac_keys/ } @tables) { $self->create_table_mac_keys(); } } # Overridden because Postgres uses SERIAL instead of AUTO_INCREMENT. sub create_table_full_hashes { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes ( id SERIAL PRIMARY KEY, num INT, hash VARCHAR( 32 ), list VARCHAR( 50 ), timestamp INT Default '0' ); }; $self->{dbh}->do($schema); my $index = qq{ CREATE UNIQUE INDEX hash ON full_hashes ( num, hash, list ); }; $self->{dbh}->do($index); } # Overridden because Postgres uses SERIAL instead of AUTO_INCREMENT. sub create_table_full_hashes_errors { my ($self, %args) = @_; my $schema = qq{ CREATE TABLE full_hashes_errors ( id SERIAL PRIMARY KEY, errors INT Default '0', prefix VARCHAR( 8 ), timestamp INT Default '0' ); }; $self->{dbh}->do($schema); } sub add_chunks_a { my ($self, %args) = @_; my $chunknum = $args{chunknum} || 0; my $chunks = $args{chunks} || []; my $list = $args{'list'} || ''; $self->{add_chunks_a_ins_sth} ||= $self->{dbh}->prepare('INSERT INTO a_chunks (hostkey, prefix, num, list) VALUES (?, ?, ?, ?)'); $self->{add_chunks_a_del_sth} ||= $self->{dbh}->prepare('DELETE FROM a_chunks WHERE hostkey = ? AND prefix = ? AND num = ? AND list = ?'); my $add = $self->{add_chunks_a_ins_sth}; my $del = $self->{add_chunks_a_del_sth}; foreach my $chunk (@$chunks) { # Crude workaround for longer prefixes. Although Google state that the # length varies, the overwhelming majority are 4 bytes. However, I have # seen a 32 byte one (chunk 69961). # # If this becomes more of a problem, the schema will have to be adjusted. if (length($chunk->{prefix}) > 8) { $chunk->{prefix} = substr $chunk->{prefix}, 0, 4; } $del->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $list ); $add->execute( $chunk->{host}, $chunk->{prefix}, $chunknum, $list ); } if (scalar @$chunks == 0) { # keep empty chunks $del->execute( '', '', $chunknum, $list ); $add->execute( '', '', $chunknum, $list ); } } =head1 SEE ALSO See L for handling Google Safe Browsing v2. =head1 COPYRIGHT AND LICENSE Copyright 2012 Nick Johnston, nickjohnstonsky@gmail.com. Based on C by Julien Sobrier. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1; Net-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2/Lookup.pm0000644000076400007640000002146111762315054025304 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2::Lookup; use strict; use warnings; use Carp; use LWP::UserAgent; use URI; use Text::Trim; our $VERSION = '0.2'; =head1 NAME Net::Google::SafeBrowsing2::Lookup - Perl extension for the Google Safe Browsing v2 Lookup API. =head1 SYNOPSIS use Net::Google::SafeBrowsing2::Lookup; my $gsb = Net::Google::SafeBrowsing2::Lookup->new( key => "my key", ); my %match = $gsb->lookup(url => 'http://www.gumblar.cn/'); if ($match{'http://www.gumblar.cn/'} eq 'malware') { print "http://www.gumblar.cn/ is flagged as a dangerous site\n"; } my %matches = $gsb->lookup(urls => ['http://www.gumblar.cn/', 'http://flahupdate.co.cc']); foreach my $url (keys %matches) { print $url, " is ", $matches{$url}, "\n"; } =head1 DESCRIPTION Net::Google::SafeBrowsing2::Lookup implements the Google Safe Browsing v2 Lookup API. See the API documentation at L. If you need to check more than 10,000 URLs a day, you need to use L. The source code is available on github at L. =head1 CONSTRUCTOR =over 4 =head2 new() Create a Net::Google::SafeBrowsing2::Lookup object my $gsb = Net::Google::SafeBrowsing2::Lookup->new( key => "my key", debug => 0, ); Arguments =over 4 =item key Required. Your Google Safe Browsing API key =item debug Optional. Set to 1 to enable debugging. 0 (disabled) by default. The debug output maybe quite large and can slow down significantly the update and lookup functions. =item errors Optional. Set to 1 to show errors to STDOUT. 0 (disabled by default). =item version Optional. Google Safe Browsing version. 3.0 by default =item delay Optional. Delay, in seconds, between 2 requests to the Google server. See the C function for more details. 0 (no delay) by default =back =back =cut sub new { my ($class, %args) = @_; my $self = { # default arguments key => '', version => '3.0', debug => 0, delay => 0, # errors => 0, # last_error => '', %args, }; bless $self, $class or croak "Can't bless $class: $!"; return $self; } =head1 PUBLIC FUNCTIONS =over 4 =head2 lookup() Lookup a list URLs against the Google Safe Browsing v2 lists. my %match = $gsb->lookup(url => 'http://www.gumblar.cn'); Returns a hash C => C. The possible list of values for C are: "ok" (no match), "malware", "phishing", "malware,phishing" (match both lists) and "error". Arguments =over 4 =item url Optional. Single URL to lookup. =item urls Optional. List of URLs to lookup. The Lookup API allows only 10,000 URL checks a day. if you need more, use the L library. Each requests must contain 500 URLs at most. The lookup() method will split the list of URLS in blocks of 500 URLs if needed. =item delay Optional. If more than 500 URLs are checked, wait C seconds between consecutive requests to avoid rate limiting by Google. =back =cut sub lookup { my ($self, %args) = @_; my $url = $args{url} || ''; my @inputs = @{ $args{urls} || []}; my $delay = $args{delay} || $self->{delay} || 0; if ($url ne '') { push(@inputs, $url); } # Max is 500 URLs per request my %results = (); my $count = 0; while (scalar @inputs > 0) { my @urls = splice(@inputs, 0, 500); my $body = scalar(@urls); foreach my $input (@urls) { my $canonical = $self->canonical_uri($input); $body .= "\n$canonical"; $self->debug("$input => $canonical\n"); } $self->debug("BODY:\n$body\n\n"); my $url = "https://sb-ssl.google.com/safebrowsing/api/lookup?client=perl&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; sleep $delay if ($delay > 0 && $count > 0); my $res = $self->ua->post($url, Content => $body); if ($res->code == 400) { $self->error("Invalid request"); %results = ( %results, $self->errors(@urls) ); } elsif ($res->code == 401) { $self->error("Invalid API key"); %results = ( %results, $self->errors(@urls) ); } elsif ($res->code == 503) { $self->error("Server error, client may have sent too many requests"); %results = ( %results, $self->errors(@urls) ); } else { %results = ( %results, $self->parse(response => $res, urls => [@urls]) ); } $count++; } return %results; } sub parse { my ($self, %args) = @_; my $response = $args{response} || croak "Missing response\n";; my @urls = @{ $args{urls} || []}; if ($response->code == 204) { $self->debug("No match\n"); return map { $_ => 'ok' } @urls; } my %results = (); my @lines = split /\n/, $response->content; if (scalar @urls != scalar @lines) { $self->error("Number of URLs in the reponse does not match the number of URLs in the request"); $self->error( scalar(@urls) . "/" . scalar(@lines)); $self->error($response->content); return $self->errors(@urls); } for(my $i = 0; $i < scalar(@urls); $i++) { $results{$urls[$i]} = $lines[$i]; } return %results; } sub errors { my ($self, @urls) = @_; return map { $_ => 'error' } @urls; } sub ua { my ($self, %args) = @_; if (! exists $self->{ua}) { my $ua = LWP::UserAgent->new; $ua->timeout(60); $self->{ua} = $ua; } return $self->{ua}; } sub debug { my ($self, $message) = @_; print $message if ($self->{debug} > 0); } sub error { my ($self, $message) = @_; print "ERROR - ", $message, "\n" if ($self->{debug} > 0 || $self->{errors} > 0); $self->{last_error} = $message; } sub canonical_uri { my ($self, $url) = @_; $url = trim $url; # Special case for \t \r \n while ($url =~ s/^([^?]+)[\r\t\n]/$1/sgi) { } my $uri = URI->new($url)->canonical; # does not deal with directory traversing # $self->debug("0. $url => " . $uri->as_string . "\n"); if (! $uri->scheme() || $uri->scheme() eq '') { $uri = URI->new("http://$url")->canonical; } $uri->fragment(''); my $escape = $uri->as_string; # Reduce double // to single / in path while ($escape =~ s/^([a-z]+:\/\/[^?]+)\/\//$1\//sgi) { } # Remove empty fragment $escape =~ s/#$//; # canonial does not handle ../ # $self->debug("\t$escape\n"); while($escape =~ s/([^\/])\/([^\/]+)\/\.\.([\/?].*)?$/$1$3/gi) { } # May have removed ending / # $self->debug("\t$escape\n"); $escape .= "/" if ($escape =~ /^[a-z]+:\/\/[^\/\?]+$/); $escape =~ s/^([a-z]+:\/\/[^\/]+)(\?.*)$/$1\/$2/gi; # $self->debug("\t$escape\n"); # other weird case if domain = digits only, try to translte it to IP address if ((my $domain = URI->new($escape)->host) =~/^\d+$/) { my $ip = num2ip($domain); if (validaddr($ip)) { $uri = URI->new($escape); $uri->host($ip); $escape = $uri->as_string; } } # $self->debug("1. $url => $escape\n"); # Try to escape the path again $url = $escape; while (($escape = URI::Escape::uri_unescape($url)) ne $escape) { # wrong for %23 -> # $url = $escape; } # while (($escape = URI->new($url)->canonical->as_string) ne $escape) { # breask more unit tests than previous # $url = $escape; # } # Fix for %23 -> # while($escape =~ s/#/%23/sgi) { } # $self->debug("2. $url => $escape\n"); # Fix over escaping while($escape =~ s/^([^?]+)%%(%.*)?$/$1%25%25$2/sgi) { } # URI has issues with % in domains, it gets the host wrong # 1. fix the host # $self->debug("Domain: " . URI->new($escape)->host . "\n"); my $exception = 0; while ($escape =~ /^[a-z]+:\/\/[^\/]*([^a-z0-9%_.-\/:])[^\/]*(\/.*)$/) { my $source = $1; my $target = sprintf("%02x", ord($source)); $escape =~ s/^([a-z]+:\/\/[^\/]*)\Q$source\E/$1%\Q$target\E/; $exception = 1; } # 2. need to parse the path again if ($exception && $escape =~ /^[a-z]+:\/\/[^\/]+\/(.+)/) { my $source = $1; my $target = URI::Escape::uri_unescape($source); # print "Source: $source\n"; while ($target ne URI::Escape::uri_unescape($target)) { $target = URI::Escape::uri_unescape($target); } $escape =~ s/\/\Q$source\E/\/$target/; while ($escape =~ s/#/%23/sgi) { } # fragement has been removed earlier while ($escape =~ s/^([a-z]+:\/\/[^\/]+\/.*)%5e/$1\&/sgi) { } # not in the host name # while ($escape =~ s/%5e/&/sgi) { } while ($escape =~ s/%([^0-9a-f]|.[^0-9a-f])/%25$1/sgi) { } } # $self->debug("$url => $escape\n"); # $self->debug(URI->new($escape)->as_string . "\n"); return URI->new($escape); } =back =head1 CHANGELOG =over 4 =item 0.2 Documentation update. =back =head1 SEE ALSO See L for the implementation of Google Safe Browsing v2 API. =head1 AUTHOR Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2012 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1; Net-Google-SafeBrowsing2-1.07/lib/Net/Google/SafeBrowsing2.pm0000644000076400007640000012541411764517100024034 0ustar jsobrierjsobrierpackage Net::Google::SafeBrowsing2; use strict; use warnings; use Carp; use LWP::UserAgent; use URI; use Digest::SHA qw(sha256); use List::Util qw(first); use Net::IPAddress; use Text::Trim; use Digest::HMAC_SHA1 qw(hmac_sha1 hmac_sha1_hex); use MIME::Base64::URLSafe; use MIME::Base64; use String::HexConvert; use File::Slurp; use Exporter 'import'; our @EXPORT = qw(DATABASE_RESET MAC_ERROR MAC_KEY_ERROR INTERNAL_ERROR SERVER_ERROR NO_UPDATE NO_DATA SUCCESSFUL MALWARE PHISHING); our $VERSION = '1.07'; =head1 NAME Net::Google::SafeBrowsing2 - Perl extension for the Google Safe Browsing v2 API. (Google Safe Browsing v1 has been deprecated by Google.) =head1 SYNOPSIS use Net::Google::SafeBrowsing2; use Net::Google::SafeBrowsing2::Sqlite; my $storage = Net::Google::SafeBrowsing2::Sqlite->new(file => 'google-v2.db'); my $gsb = Net::Google::SafeBrowsing2->new( key => "my key", storage => $storage, ); $gsb->update(); my $match = $gsb->lookup(url => 'http://www.gumblar.cn/'); if ($match eq MALWARE) { print "http://www.gumblar.cn/ is flagged as a dangerous site\n"; } $storage->close(); =head1 DESCRIPTION Net::Google::SafeBrowsing2 implements the Google Safe Browsing v2 API. The library passes most of the unit tests listed in the API documentation. See the documentation (L) for more details about the failed tests. The Google Safe Browsing database must be stored and managed locally. L uses Sqlite as the storage back-end, L uses MySQL. Other storage mechanisms (databases, memory, etc.) can be added and used transparently with this module. You may want to look at "Google Safe Browsing v2: Implementation Notes" (L), a collection of notes and real-world numbers about the API. This is intended for people who want to learn more about the API, whether as a user or to make their own implementation. The source code is available on github at L. If you do not need to inspect more than 10,000 URLs a day, you can use L with the Google Safe Browsing v2 Lookup API which does not require to store and maintain a local database. IMPORTANT: If you start with an empty database, you will need to perform several updates to retrieve all the Google Safe Browsing information. This may require up to 24 hours. This is a limitation of the Google API, not of this module. See "Google Safe Browsing v2: Implementation Notes" at L. =head1 CONSTANTS Several constants are exported by this module: =over 4 =item DATABASE_RESET Google requested to reset (empty) the local database. =item MAC_ERROR The replies from Google could not be validated with the MAC keys. =item MAC_KEY_ERROR The request for the MAC keys failed. =item INTERNAL_ERROR An internal error occurred. =item SERVER_ERROR The server sent an error back to the client. =item NO_UPDATE No update was performed, probably because it is too early to make a new request to Google Safe Browsing. =item NO_DATA No data was sent back by Google to the client, probably because the database is up to date. =item SUCCESSFUL The operation was successful. =item MALWARE Name of the Malware list in Google Safe Browsing (shortcut to 'goog-malware-shavar') =item PHISHING Name of the Phishing list in Google Safe Browsing (shortcut to 'googpub-phish-shavar') =back =cut use constant { DATABASE_RESET => -6, MAC_ERROR => -5, MAC_KEY_ERROR => -4, INTERNAL_ERROR => -3, # internal/parsing error SERVER_ERROR => -2, # Server sent an error back NO_UPDATE => -1, # no update (too early) NO_DATA => 0, # no data sent SUCCESSFUL => 1, # data sent MALWARE => 'goog-malware-shavar', PHISHING => 'googpub-phish-shavar', FULL_HASH_TIME => 45 * 60, INTERVAL_FULL_HASH_TIME => 'INTERVAL 45 MINUTE', }; =head1 CONSTRUCTOR =over 4 =head2 new() Create a Net::Google::SafeBrowsing2 object my $gsb = Net::Google::SafeBrowsing2->new( key => "my key", storage => Net::Google::SafeBrowsing2::Sqlite->new(file => 'google-v2.db'), debug => 0, mac => 0, list => MALWARE, ); Arguments =over 4 =item key Required. Your Google Safe browsing API key =item storage Required. Object which handle the storage for the Google Safe Browsing database. See L for more details. =item list Optional. The Google Safe Browsing list to handle. By default, handles both MALWARE and PHISHING. =item mac Optional. Set to 1 to enable Message Authentication Code (MAC). 0 (disabled) by default. =item debug Optional. Set to 1 to enable debugging. 0 (disabled) by default. The debug output maybe quite large and can slow down significantly the update and lookup functions. =item errors Optional. Set to 1 to show errors to STDOUT. 0 (disabled by default). =item version Optional. Google Safe Browsing version. 2.2 by default =back =back =cut sub new { my ($class, %args) = @_; my $self = { # default arguments list => ['googpub-phish-shavar', 'goog-malware-shavar'], key => '', version => '2.2', debug => 0, errors => 0, last_error => '', mac => 0, %args, }; if (! exists $self->{storage}) { use Net::Google::SafeBrowsing2::Storage; $self->{storage} = Net::Google::SafeBrowsing2::Storage->new(); } if (ref $self->{list} ne 'ARRAY') { $self->{list} = [$self->{list}]; } bless $self, $class or croak "Can't bless $class: $!"; return $self; } =head1 PUBLIC FUNCTIONS =over 4 =head2 update() Perform a database update. $gsb->update(); Return the status of the update (see the list of constants above): INTERNAL_ERROR, SERVER_ERROR, NO_UPDATE, NO_DATA or SUCCESSFUL This function can handle two lists at the same time. If one of the list should not be updated, it will automatically skip it and update the other one. It is faster to update two lists at once rather than doing them one by one. NOTE: If you start with an empty database, you will need to perform several updates to retrieve all the Google Safe Browsing information. This may require up to 24 hours. This is a limitation of the Google API, not of this module. See "Google Safe Browsing v2: Implementation Notes" at L. Arguments =over 4 =item list Optional. Update a specific list. Use the list(s) from new() by default. =item mac Optional. Set to 1 to enable Message Authentication Code (MAC). Use the value from new() by default. =item force Optional. Force the update (1). Disabled by default (0). Be careful if you set this option to 1 as too frequent updates might result in the blacklisting of your API key. =back =cut sub update { my ($self, %args) = @_; # my @lists = @{[$args{list}]} || @{$self->{list}} || croak "Missing list name\n"; my $list = $args{list}; my $force = $args{force} || 0; my $mac = $args{mac} || $self->{mac} || 0; my @lists = @{$self->{list}}; @lists = @{[$args{list}]} if (defined $list); my $result = 0; # too early to update? need to handle errors my $i = 0; while ($i < scalar @lists) { my $list = $lists[$i]; my $info = $self->{storage}->last_update(list => $list); if ($info->{'time'} + $info->{'wait'} > time && $force == 0) { $self->debug("Too early to update $list\n"); splice(@lists, $i, 1); } else { $self->debug("OK to update $list: " . time() . "/" . ($info->{'time'} + $info->{'wait'}) . "\n"); $i++; } } if (scalar @lists == 0) { $self->debug("Too early to update any list\n"); return NO_UPDATE; } # MAC? my $client_key = ''; my $wrapped_key = ''; if ($mac) { ($client_key, $wrapped_key) = $self->get_mac_keys(); if ($client_key eq '' || $wrapped_key eq '') { return MAC_KEY_ERROR; } } my $ua = $self->ua; my $url = "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; $url .= "&wrkey=$wrapped_key" if ($mac); my $body = ''; foreach my $list (@lists) { # Report existng chunks my $a_range = $self->create_range(numbers => [$self->{storage}->get_add_chunks_nums(list => $list)]); my $s_range = $self->create_range(numbers => [$self->{storage}->get_sub_chunks_nums(list => $list)]); my $chunks_list = ''; if ($a_range ne '') { $chunks_list .= "a:$a_range"; } if ($s_range ne '') { $chunks_list .= ":" if ($a_range ne ''); $chunks_list .= "s:$s_range"; } $body .= "$list;$chunks_list"; $body .= ":mac" if ($mac); $body .= "\n"; } my $res = $ua->post($url, Content => $body); # $self->debug($res->request->as_string . "\n" . $res->as_string . "\n"); $self->debug($res->request->as_string . "\n"); $self->debug($res->as_string . "\n"); if (! $res->is_success) { $self->error("Request failed\n"); foreach my $list (@lists) { $self->update_error('time' => time(), list => $list); } return SERVER_ERROR; } my $last_update = time; my $wait = 0; my @redirections = (); my @lines = split/\s/, $res->decoded_content; $list = ''; foreach my $line (@lines) { if ($line =~ /n:\s*(\d+)\s*$/) { $self->debug("Next poll: $1 seconds\n"); $wait = $1; } elsif ($line =~ /i:\s*(\S+)\s*$/) { $self->debug("List: $1\n"); $list = $1; } elsif ($line =~ /u:\s*(\S+),(\S+)\s*$/) { $self->debug("Redirection: $1\n"); $self->debug("MAC: $2\n"); push(@redirections, [$1, $list, $2]); } elsif ($line =~ /u:\s*(\S+)\s*$/) { $self->debug("Redirection: $1\n"); push(@redirections, [$1, $list, '']); } elsif ($line =~ /ad:(\S+)$/) { $self->debug("Delete Add Chunks: $1\n"); my @nums = $self->expand_range(range => $1); $self->{storage}->delete_add_ckunks(chunknums => [@nums], list => $list); # Delete full hash as well $self->{storage}->delete_full_hashes(chunknums => [@nums], list => $list); $result = 1; } elsif ($line =~ /sd:(\S+)$/) { $self->debug("Delete Sub Chunks: $1\n"); my @nums = $self->expand_range(range => $1); $self->{storage}->delete_sub_ckunks(chunknums => [@nums], list => $list); $result = 1; } elsif ($line =~ /m:(\S+)$/ && $mac) { my $hmac = $1; $self->debug("MAC of request: $hmac\n"); # Remove this line for data my $data = $res->decoded_content; $data =~ s/^m:(\S+)\n//g; if (! $self->validate_data_mac(data => $data, key => $client_key, digest => $hmac) ) { $self->error("MAC error on main request\n"); return MAC_ERROR; } } elsif ($line =~ /e:pleaserekey/ && $mac) { $self->debug("MAC key has been expired\n"); $self->{storage}->delete_mac_keys(); return $self->update(list => $list, force => $force, mac => $mac); } elsif ($line =~ /r:pleasereset/) { $self->debug("Database must be reset\n"); $self->{storage}->reset(list => $list); return DATABASE_RESET; } } $self->debug("\n"); $result = 1 if (scalar @redirections > 0); foreach my $data (@redirections) { my $redirection = $data->[0]; $list = $data->[1]; my $hmac = $data->[2]; $self->debug("Checking redirection http://$redirection ($list)\n"); $res = $ua->get("http://$redirection"); if (! $res->is_success) { $self->error("Request to $redirection failed\n"); foreach my $list (@lists) { $self->update_error('time' => $last_update, list => $list); } return SERVER_ERROR; } $self->debug(substr($res->as_string, 0, 250) . "\n\n") if ($self->{debug}); $self->debug(substr($res->content, 0, 250) . "\n\n") if ($self->{debug}); my $data = $res->content; if ($mac && ! $self->validate_data_mac(data => $data, key => $client_key, digest => $hmac) ) { $self->error("MAC error on redirection\n"); $self->debug("Length of data: " . length($data) . "\n"); return MAC_ERROR; } my $result = $self->parse_data(data => $data, list => $list); if ($result != SUCCESSFUL) { foreach my $list (@lists) { $self->update_error('time' => $last_update, list => $list); } return $result; } } foreach my $list (@lists) { $self->debug("List update: $last_update $wait $list\n"); $self->{storage}->updated('time' => $last_update, 'wait' => $wait, list => $list); } return $result; # ok } =head2 import_chunks() Import add and sub chunks from a file. my $result = $gsb->import_chunks(list => MALWARE, file => 'malware.dat'); Return the status of the import: INTERNAL_ERROR or SUCCESSFUL. This function should be used to initialize an empty back-end storage. Arguments =over 4 =item list Required. Google list to use. =item file Required. File that contains the list of chunks. This file can be created with the C function inherited from C. =back =cut sub import_chunks { my ($self, %args) = @_; my $list = $args{list} || ''; my $file = $args{file} || "$list.dat"; my $data = read_file($file, { binmode => ':raw' }); return $self->parse_data(data => $data, list => $list); } =head2 lookup() Lookup a URL against the Google Safe Browsing database. my $match = $gsb->lookup(url => 'http://www.gumblar.cn'); Returns the name of the list if there is any match, returns an empty string otherwise. Arguments =over 4 =item list Optional. Lookup against a specific list. Use the list(s) from new() by default. =item url Required. URL to lookup. =back =cut sub lookup { my ($self, %args) = @_; my $list = $args{list} || ''; my $url = $args{url} || return ''; my @lists = @{$self->{list}}; @lists = @{[$args{list}]} if ($list ne ''); # TODO: create our own URI management for canonicalization # fix for http:///foo.com (3 ///) $url =~ s/^(https?:\/\/)\/+/$1/; my $uri = URI->new($url)->canonical; my $domain = $uri->host; my @hosts = $self->canonical_domain_suffixes($domain); # only top-3 in this case foreach my $host (@hosts) { $self->debug("Domain for key: $domain => $host\n"); my $suffix = $self->prefix("$host/"); # Don't forget trailing hash $self->debug("Host key: " . $self->hex_to_ascii($suffix) . "\n"); my $match = $self->lookup_suffix(lists => [@lists], url => $url, suffix => $suffix); return $match if ($match ne ''); } return ''; } =head2 get_lists() Returns the name of all the Google Safe Browsing lists my $@lists = $gsb->get_lists(); NOTE: this function is useless in practice because Google includes some lists which cannot be used by the Google Safe Browsing API, like lists used by the Google toolbar. =cut sub get_lists { my ($self, %args) = @_; my $url = "http://safebrowsing.clients.google.com/safebrowsing/list?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; my $res = $self->ua->get($url); return split/\s/, $res->decoded_content; # 1 list per line } =head2 last_error() Get/Set the last error message. print "Last error: ", $gsb->last_error(), "\n"; $gsb->last_error(''); # Reset last error NOTE: the last error message might not come from the last call. Returns an empty string if no errors. =cut sub last_error { my ($self, $message) = @_; if (defined $message) { $self->{last_error} = $message; } else { return $self->{last_error}; } } =pod =back =head1 PRIVATE FUNCTIONS These functions are not intended to be used externally. =over 4 =head2 lookup_suffix() Lookup a host prefix. =cut sub lookup_suffix { my ($self, %args) = @_; my $lists = $args{lists} || croak "Missing lists\n"; my $url = $args{url} || return ''; my $suffix = $args{suffix} || return ''; # Calculcate prefixes my @full_hashes = $self->full_hashes($url); # Get the prefixes from the first 4 bytes my @full_hashes_prefix = map (substr($_, 0, 4), @full_hashes); # Local lookup my @add_chunks = $self->local_lookup_suffix(lists => $lists, url => $url, suffix => $suffix, full_hashes_prefix => [@full_hashes_prefix]); if (scalar @add_chunks == 0) { $self->debug("No hit in local lookup\n"); return ''; } # Check against full hashes my $found = ''; # get stored full hashes foreach my $add_chunk (@add_chunks) { my @hashes = $self->{storage}->get_full_hashes( chunknum => $add_chunk->{chunknum}, timestamp => time() - FULL_HASH_TIME, list => $add_chunk->{list}); $self->debug("Full hashes already stored for chunk " . $add_chunk->{chunknum} . ": " . scalar @hashes . "\n"); foreach my $full_hash (@full_hashes) { foreach my $hash (@hashes) { if ($hash eq $full_hash && defined first { $add_chunk->{list} eq $_ } @$lists) { $self->debug("Full hash was found in storage\n"); $found = $add_chunk->{list}; last; } # elsif ($hash ne $full_hash) { # $self->debug($self->hex_to_ascii($hash) . " ne " . $self->hex_to_ascii($full_hash) . "\n\n"); # } } last if ($found ne ''); } last if ($found ne ''); } return $found if ($found ne ''); # ask for new hashes # TODO: make sure we don't keep asking for the same over and over my @hashes = $self->request_full_hash(prefixes => [ map($_->{prefix} || $_->{hostkey}, @add_chunks) ]); $self->{storage}->add_full_hashes(full_hashes => [@hashes], timestamp => time()); foreach my $full_hash (@full_hashes) { my $hash = first { $_->{hash} eq $full_hash} @hashes; next if (! defined $hash); my $list = first { $hash->{list} eq $_ } @$lists; if (defined $hash && defined $list) { # $self->debug($self->hex_to_ascii($hash->{hash}) . " eq " . $self->hex_to_ascii($full_hash) . "\n\n"); $self->debug("Match\n"); return $hash->{list}; } # elsif (defined $hash) { # $self->debug("hash: " . $self->hex_to_ascii($hash->{hash}) . "\n"); # $self->debug("list: " . $hash->{list} . "\n"); # } } $self->debug("No match\n"); return ''; } =head2 lookup_suffix() Lookup a host prefix in the local database only. =cut sub local_lookup_suffix { my ($self, %args) = @_; my $lists = $args{lists} || croak "Missing lists\n"; my $url = $args{url} || return (); my $suffix = $args{suffix} || return (); my $full_hashe_list = $args{full_hashes} || []; my $full_hashes_prefix_list = $args{full_hashes_prefix} || []; # Step 1: get all add chunks for this host key # Do it for all lists my @add_chunks = $self->{storage}->get_add_chunks(hostkey => $suffix); # return scalar @add_chunks; if (scalar @add_chunks == 0) { # no match $self->debug("No host key\n"); return @add_chunks; } # Step 2: calculcate prefixes # Get the prefixes from the first 4 bytes my @full_hashes_prefix = @{$full_hashes_prefix_list}; if (scalar @full_hashes_prefix == 0) { my @full_hashes = @{$full_hashe_list}; @full_hashes = $self->full_hashes($url) if (scalar @full_hashes == 0); @full_hashes_prefix = map (substr($_, 0, 4), @full_hashes); } # Step 3: filter out add_chunks with prefix my $i = 0; while ($i < scalar @add_chunks) { if ($add_chunks[$i]->{prefix} ne '') { my $found = 0; foreach my $hash_prefix (@full_hashes_prefix) { if ( $add_chunks[$i]->{prefix} eq $hash_prefix) { $found = 1; last; } # else { # $self->debug( $self->hex_to_ascii($add_chunks[$i]->{prefix}) . " ne " . $self->hex_to_ascii($hash_prefix) . "\n" ); # } } if ($found == 0) { $self->debug("No prefix found\n"); splice(@add_chunks, $i, 1); } else { $i++; } } else { $i++; } } if (scalar @add_chunks == 0) { $self->debug("No prefix match for any host key\n"); return @add_chunks; } # Step 4: get all sub chunks for this host key my @sub_chunks = $self->{storage}->get_sub_chunks(hostkey => $suffix); foreach my $sub_chunk (@sub_chunks) { my $i = 0; while ($i < scalar @add_chunks) { my $add_chunk = $add_chunks[$i]; if ($add_chunk->{chunknum} != $sub_chunk->{addchunknum} || $add_chunk->{list} ne $sub_chunk->{list}) { $i++; next; } if ($sub_chunk->{prefix} eq $add_chunk->{prefix}) { splice(@add_chunks, $i, 1); } else { $i++; } } } if (scalar @add_chunks == 0) { $self->debug("All add_chunks have been removed by sub_chunks\n"); } return @add_chunks; } =head2 local_lookup() Lookup a URL against the local Google Safe Browsing database URL. This should be used for debugging purpose only. See the lookup for normal use. my $match = $gsb->local_lookup(url => 'http://www.gumblar.cn'); Returns the name of the list if there is any match, returns an empty string otherwise. Arguments =over 4 =item list Optional. Lookup against a specific list. Use the list(s) from new() by default. =item url Required. URL to lookup. =back =cut sub local_lookup { my ($self, %args) = @_; my $list = $args{list} || ''; my $url = $args{url} || return ''; my @lists = @{$self->{list}}; @lists = @{[$args{list}]} if ($list ne ''); # TODO: create our own URI management for canonicalization # fix for http:///foo.com (3 ///) $url =~ s/^(https?:\/\/)\/+/$1/; my $uri = URI->new($url)->canonical; my $domain = $uri->host; my @hosts = $self->canonical_domain_suffixes($domain); # only top-3 in this case foreach my $host (@hosts) { $self->debug("Domain for key: $domain => $host\n"); my $suffix = $self->prefix("$host/"); # Don't forget trailing hash $self->debug("Host key: " . $self->hex_to_ascii($suffix) . "\n"); my @matches = $self->local_lookup_suffix(lists => [@lists], url => $url, suffix => $suffix); # return $matches[0]->{list} if (scalar @matches > 0); return $matches[0]->{list} . " " . $matches[0]->{chunknum} if (scalar @matches > 0); } return ''; } =head2 request_key() Request the Message Authentication Code (MAC) keys =cut sub get_mac_keys { my ($self, %args) = @_; my $keys = $self->{storage}->get_mac_keys(); if ($keys->{client_key} eq '' || $keys->{wrapped_key} eq '') { my ($client_key, $wrapped_key) = $self->request_mac_keys(); # $self->debug("Client key: $client_key\n"); $self->{storage}->add_mac_keys(client_key => $client_key, wrapped_key => $wrapped_key); return ($client_key, $wrapped_key); } return ($keys->{client_key}, $keys->{wrapped_key}); } =head2 request_mac_keys() Request the Message Authentication Code (MAC) keys from Google. =cut sub request_mac_keys { my ($self, %args) = @_; my $client_key = ''; my $wrapped_key = ''; my $url = "http://sb-ssl.google.com/safebrowsing/newkey?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; my $res = $self->ua->get($url); if (! $res->is_success) { $self->error("Key request failed: " . $res->code . "\n"); return ($client_key, $wrapped_key); } my $data = $res->decoded_content; if ($data =~ s/^clientkey:(\d+)://mi) { my $length = $1; $self->debug("MAC client key length: $length\n"); $client_key = substr($data, 0, $length, ''); $self->debug("MAC client key: $client_key\n"); substr($data, 0, 1, ''); # remove \n if ($data =~ s/^wrappedkey:(\d+)://mi) { $length = $1; $self->debug("MAC wrapped key length: $length\n"); $wrapped_key = substr($data, 0, $length, ''); $self->debug("MAC wrapped key: $wrapped_key\n"); return (decode_base64($client_key), $wrapped_key); } else { return ('', ''); } } return ($client_key, $wrapped_key); } =head2 validate_data_mac() Validate data against the MAC keys. =cut sub validate_data_mac { my ($self, %args) = @_; my $data = $args{data} || ''; my $key = $args{key} || ''; my $digest = $args{digest} || ''; # my $hash = urlsafe_b64encode trim hmac_sha1($data, decode_base64($key)); # my $hash = urlsafe_b64encode (trim (hmac_sha1($data, decode_base64($key)))); my $hash = urlsafe_b64encode(hmac_sha1($data, $key)); $hash .= '='; $self->debug("$hash / $digest\n"); # $self->debug(urlsafe_b64encode(hmac_sha1($data, decode_base64($key))) . "\n"); # $self->debug(urlsafe_b64encode(trim(hmac_sha1($data, decode_base64($key)))) . "\n"); return ($hash eq $digest); } =head2 update_error() Handle server errors during a database update. =cut sub update_error { my ($self, %args) = @_; my $time = $args{'time'} || time; my $list = $args{'list'} || ''; my $info = $self->{storage}->last_update(list => $list); $info->{errors} = 0 if (! exists $info->{errors}); my $errors = $info->{errors} + 1; my $wait = 0; $wait = $errors == 1 ? 60 : $errors == 2 ? int(30 * 60 * (rand(1) + 1)) # 30-60 mins : $errors == 3 ? int(60 * 60 * (rand(1) + 1)) # 60-120 mins : $errors == 4 ? int(2 * 60 * 60 * (rand(1) + 1)) # 120-240 mins : $errors == 5 ? int(4 * 60 * 60 * (rand(1) + 1)) # 240-480 mins : $errors > 5 ? 480 * 60 : 0; $self->{storage}->update_error('time' => $time, list => $list, 'wait' => $wait, errors => $errors); } =head2 lookup_whitelist() Lookup a host prefix and suffix in the whitelist (s chunks) =cut sub lookup_whitelist { my ($self, %args) = @_; my $suffix = $args{suffix} || return 0; my $prefix = $args{prefix} || ''; my $chuknum = $args{chunknum} || return 0; foreach my $schunknum (keys %{ $self->{s_chunks} }) { foreach my $chunk ( @{ $self->{s_chunks}->{$schunknum} }) { if ($chunk->{host} eq $suffix && ($chunk->{prefix} eq $prefix || $chunk->{prefix} eq '') && $chunk->{add_chunknum} == $chuknum) { return 1; } } } return 0; } =head2 ua() Create LWP::UserAgent to make HTTP requests to Google. =cut sub ua { my ($self, %args) = @_; if (! exists $self->{ua}) { my $ua = LWP::UserAgent->new; $ua->timeout(60); $self->{ua} = $ua; } return $self->{ua}; } =head2 parse_s() Parse data from a rediration (add asnd sub chunk information). =cut sub parse_data { my ($self, %args) = @_; my $data = $args{data} || ''; my $list = $args{list} || ''; my $chunk_num = 0; my $hash_length = 0; my $chunk_length = 0; while (length $data > 0) { # print "Length 1: ", length $data, "\n"; # 58748 my $type = substr($data, 0, 2, ''); # s:34321:4:137 # print "Length 1.5: ", length $data, "\n"; # 58746 -2 if ($data =~ /^(\d+):(\d+):(\d+)\n/sgi) { $chunk_num = $1; $hash_length = $2; $chunk_length = $3; # shorten data substr($data, 0, length($chunk_num) + length($hash_length) + length($chunk_length) + 3, ''); # print "Remove ", length($chunk_num) + length($hash_length) + length($chunk_length) + 3, "\n"; # print "Length 2: ", length $data, "\n"; # 58741 -5 my $encoded = substr($data, 0, $chunk_length, ''); # print "Length 3: ", length $data, "\n"; # 58604 -137 if ($type eq 's:') { my @chunks = $self->parse_s(value => $encoded, hash_length => $hash_length); $self->{storage}->add_chunks(type => 's', chunknum => $chunk_num, chunks => [@chunks], list => $list); # Must happen all at once => not 100% sure } elsif ($type eq 'a:') { my @chunks = $self->parse_a(value => $encoded, hash_length => $hash_length); $self->{storage}->add_chunks(type => 'a', chunknum => $chunk_num, chunks => [@chunks], list => $list); # Must happen all at once => not 100% sure } else { $self->error("Incorrect chunk type: $type, should be a: or s:\n"); return INTERNAL_ERROR;# failed } $self->debug("$type$chunk_num:$hash_length:$chunk_length OK\n"); } else { $self->error("could not parse header\n"); return INTERNAL_ERROR;# failed } } return SUCCESSFUL; } =head2 parse_s() Parse s chunks information for a database update. =cut sub parse_s { my ($self, %args) = @_; my $value = $args{value} || return (); my $hash_length = $args{hash_length} || 4; my @data = (); while (length $value > 0) { # my $host = $self->hex_to_ascii( substr($value, 0, 4, '') ); # Host hash my $host = substr($value, 0, 4, ''); # HEX # print "\t Host key: $host\n"; my $count = substr($value, 0, 1, ''); # hex value $count = ord($count); # my $add_chunk_num_hex; if ($count == 0) { # ADDCHUNKNUM only # $self->debug("\nadd_chuknum: " . substr($value, 0, 4) . " => "); my $add_chunknum = hex($self->hex_to_ascii( substr($value, 0, 4, '') ) ); #chunk num # $self->debug("$add_chunknum\n"); push(@data, { host => $host, add_chunknum => $add_chunknum, prefix => '' }); if ($self->{debug}) { $self->debug("\t" . $self->hex_to_ascii($host) . " $add_chunknum\n"); } } else { # ADDCHUNKNUM + PREFIX for(my $i = 0; $i < $count; $i++) { # my $add_chunknum = $self->hex_to_ascii( substr($value, 0, 4, '') ); #chunk num - ACII # $self->debug("\nadd_chuknum: " . substr($value, 0, 4) . " => "); my $add_chunknum = hex($self->hex_to_ascii( substr($value, 0, 4, '') )); # DEC # $self->debug("$add_chunknum\n"); # my $prefix = $self->hex_to_ascii( substr($value, 0, $hash_length, '') ); # ASCII my $prefix = substr($value, 0, $hash_length, ''); # HEX push(@data, { host => $host, add_chunknum => $add_chunknum, prefix => $prefix }); if ($self->{debug}) { $self->debug("\t" . $self->hex_to_ascii($host) . " $add_chunknum " . $self->hex_to_ascii($prefix) . "\n"); } } } } return @data; } =head2 parse_a() Parse a chunks information for a database update. =cut sub parse_a { my ($self, %args) = @_; my $value = $args{value} || return (); my $hash_length = $args{hash_length} || 4; my @data = (); while (length $value > 0) { # my $host = $self->hex_to_ascii( substr($value, 0, 4, '') ); # Host hash my $host = substr($value, 0, 4, ''); # HEX # print "\t Host key: $host\n"; my $count = substr($value, 0, 1, ''); # hex value $count = ord($count); if ($count > 0) { # ADDCHUNKNUM only for(my $i = 0; $i < $count; $i++) { # my $prefix = $self->hex_to_ascii( substr($value, 0, $hash_length, '') ); # ASCII my $prefix = substr($value, 0, $hash_length, ''); # HEX push(@data, { host => $host, prefix => $prefix }); if ($self->{debug}) { $self->debug("\t" . $self->hex_to_ascii($host) . " " . $self->hex_to_ascii($prefix) . "\n"); } } } else { push(@data, { host => $host, prefix => '' }); if ($self->{debug}) { $self->debug("\t" . $self->hex_to_ascii($host) . "\n"); } } } return @data; } =head2 hex_to_ascii() Transform hexadecimal strings to printable ASCII strings. Used mainly for debugging. print $gsb->hex_to_ascii('hex value'); =cut sub hex_to_ascii { my ($self, $hex) = @_; return String::HexConvert::ascii_to_hex($hex); # my $ascii = ''; # # while (length $hex > 0) { # $ascii .= sprintf("%02x", ord( substr($hex, 0, 1, '') ) ); # } # # return $ascii; } =head2 ascii_to_hex() Transform ASCII strings to hexadecimal strings. =cut sub ascii_to_hex { my ($self, $ascii) = @_; my $hex = ''; for (my $i = 0; $i < int(length($ascii) / 2); $i++) { $hex .= chr hex( substr($ascii, $i * 2, 2) ); } return $hex; } =head2 debug() Print debug output. =cut sub debug { my ($self, $message) = @_; print $message if ($self->{debug} > 0); } =head2 error() Print error message. =cut sub error { my ($self, $message) = @_; print "ERROR - ", $message if ($self->{debug} > 0 || $self->{errors} > 0); $self->{last_error} = $message; } =head2 canonical_domain_suffixes() Find all suffixes for a domain. =cut sub canonical_domain_suffixes { my ($self, $domain) = @_; my @domains = (); if ($domain =~ /^\d+\.\d+\.\d+\.\d+$/) { # loose check for IP address, should be enough return ($domain); } my @parts = split/\./, $domain; # take 3 components if (scalar @parts >= 3) { @parts = splice (@parts, -3, 3); push(@domains, join('.', @parts)); splice(@parts, 0, 1); } push(@domains, join('.', @parts)); return @domains; } =head2 canonical_domain() Find all canonical domains a domain. =cut sub canonical_domain { my ($self, $domain) = @_; my @domains = ($domain); if ($domain =~ /^\d+\.\d+\.\d+\.\d+$/) { # loose check for IP address, should be enough return @domains; } my @parts = split/\./, $domain; splice(@parts, 0, -6); # take 5 top most compoments while (scalar @parts > 2) { shift @parts; push(@domains, join(".", @parts) ); } return @domains; } =head2 canonical_path() Find all canonical paths for a URL. =cut sub canonical_path { my ($self, $path) = @_; my @paths = ($path); # return full path if ($path =~ /\?/) { $path =~ s/\?.*$//; push(@paths, $path); } my @parts = split /\//, $path; my $previous = ''; while (scalar @parts > 1 && scalar @paths < 6) { my $val = shift(@parts); $previous .= "$val/"; push(@paths, $previous); } return @paths; } =head2 canonical() Find all canonical URLs for a URL. =cut sub canonical { my ($self, $url) = @_; my @urls = (); # my $uri = URI->new($url)->canonical; my $uri = $self->canonical_uri($url); my @domains = $self->canonical_domain($uri->host); my @paths = $self->canonical_path($uri->path_query); foreach my $domain (@domains) { foreach my $path (@paths) { push(@urls, "$domain$path"); } } return @urls; } =head2 canonical_uri() Create a canonical URI. NOTE: URI cannot handle all the test cases provided by Google. This method is a hack to pass most of the test. A few tests are still failing. The proper way to handle URL canonicalization according to Google would be to create a new module to handle URLs. However, I believe most real-life cases are handled correctly by this function. =cut sub canonical_uri { my ($self, $url) = @_; $url = trim $url; # Special case for \t \r \n while ($url =~ s/^([^?]+)[\r\t\n]/$1/sgi) { } my $uri = URI->new($url)->canonical; # does not deal with directory traversing # $self->debug("0. $url => " . $uri->as_string . "\n"); if (! $uri->scheme() || $uri->scheme() eq '') { $uri = URI->new("http://$url")->canonical; } $uri->fragment(''); my $escape = $uri->as_string; # Reduce double // to single / in path while ($escape =~ s/^([a-z]+:\/\/[^?]+)\/\//$1\//sgi) { } # Remove empty fragment $escape =~ s/#$//; # canonial does not handle ../ # $self->debug("\t$escape\n"); while($escape =~ s/([^\/])\/([^\/]+)\/\.\.([\/?].*)?$/$1$3/gi) { } # May have removed ending / # $self->debug("\t$escape\n"); $escape .= "/" if ($escape =~ /^[a-z]+:\/\/[^\/\?]+$/); $escape =~ s/^([a-z]+:\/\/[^\/]+)(\?.*)$/$1\/$2/gi; # $self->debug("\t$escape\n"); # other weird case if domain = digits only, try to translte it to IP address if ((my $domain = URI->new($escape)->host) =~/^\d+$/) { my $ip = num2ip($domain); if (validaddr($ip)) { $uri = URI->new($escape); $uri->host($ip); $escape = $uri->as_string; } } # $self->debug("1. $url => $escape\n"); # Try to escape the path again $url = $escape; while (($escape = URI::Escape::uri_unescape($url)) ne $escape) { # wrong for %23 -> # $url = $escape; } # while (($escape = URI->new($url)->canonical->as_string) ne $escape) { # breask more unit tests than previous # $url = $escape; # } # Fix for %23 -> # while($escape =~ s/#/%23/sgi) { } # $self->debug("2. $url => $escape\n"); # Fix over escaping while($escape =~ s/^([^?]+)%%(%.*)?$/$1%25%25$2/sgi) { } # URI has issues with % in domains, it gets the host wrong # 1. fix the host # $self->debug("Domain: " . URI->new($escape)->host . "\n"); my $exception = 0; while ($escape =~ /^[a-z]+:\/\/[^\/]*([^a-z0-9%_.-\/:])[^\/]*(\/.*)$/) { my $source = $1; my $target = sprintf("%02x", ord($source)); $escape =~ s/^([a-z]+:\/\/[^\/]*)\Q$source\E/$1%\Q$target\E/; $exception = 1; } # 2. need to parse the path again if ($exception && $escape =~ /^[a-z]+:\/\/[^\/]+\/(.+)/) { my $source = $1; my $target = URI::Escape::uri_unescape($source); # print "Source: $source\n"; while ($target ne URI::Escape::uri_unescape($target)) { $target = URI::Escape::uri_unescape($target); } $escape =~ s/\/\Q$source\E/\/$target/; while ($escape =~ s/#/%23/sgi) { } # fragement has been removed earlier while ($escape =~ s/^([a-z]+:\/\/[^\/]+\/.*)%5e/$1\&/sgi) { } # not in the host name # while ($escape =~ s/%5e/&/sgi) { } while ($escape =~ s/%([^0-9a-f]|.[^0-9a-f])/%25$1/sgi) { } } # $self->debug("$url => $escape\n"); # $self->debug(URI->new($escape)->as_string . "\n"); return URI->new($escape); } =head2 canonical() Return all possible full hashes for a URL. =cut sub full_hashes { my ($self, $url) = @_; my @urls = $self->canonical($url); my @hashes = (); foreach my $url (@urls) { # $self->debug("$url\n"); push(@hashes, sha256($url)); } return @hashes; } =head2 prefix() Return a hash prefix. The size of the prefix is set to 4 bytes. =cut sub prefix { my ($self, $string) = @_; return substr(sha256($string), 0, 4); } =head2 request_full_hash() Request full full hashes for specific prefixes from Google. =cut sub request_full_hash { my ($self, %args) = @_; my $prefixes = $args{prefixes} || return (); my $size = $args{size} || length $prefixes->[0]; # # Handle errors my $i = 0; my $errors; my $delay = sub { my $time = shift; if ((time() - $errors->{timestamp}) < $time) { splice(@$prefixes, $i, 1); } else { $i++; } }; while ($i < scalar @$prefixes) { my $prefix = $prefixes->[$i]; $errors = $self->{storage}->get_full_hash_error(prefix => $prefix); if (defined $errors && $errors->{errors} > 2) { # 2 errors is OK $errors->{errors} == 3 ? $delay->(30 * 60) # 30 minutes : $errors->{errors} == 4 ? $delay->(60 * 60) # 1 hour : $delay->(2 * 60 * 60); # 2 hours } else { $i++; } } my $url = "http://safebrowsing.clients.google.com/safebrowsing/gethash?client=api&apikey=" . $self->{key} . "&appver=$VERSION&pver=" . $self->{version}; my $prefix_list = join('', @$prefixes); my $header = "$size:" . scalar @$prefixes * $size; # print @{$args{prefixes}}, "\n"; # print $$prefixes[0], "\n"; return; my $res = $self->ua->post($url, Content => "$header\n$prefix_list"); if (! $res->is_success) { $self->error("Full hash request failed\n"); $self->debug($res->as_string . "\n"); foreach my $prefix (@$prefixes) { my $errors = $self->{storage}->get_full_hash_error(prefix => $prefix); if (defined $errors && ( $errors->{errors} >=2 # backoff mode || $errors->{errors} == 1 && (time() - $errors->{timestamp}) > 5 * 60)) { # 5 minutes $self->{storage}->full_hash_error(prefix => $prefix, timestamp => time()); # more complicate than this, need to check time between 2 errors } } return (); } else { $self->debug("Full hash request OK\n"); foreach my $prefix (@$prefixes) { $self->{storage}->full_hash_ok(prefix => $prefix, timestamp => time()); } } $self->debug($res->request->as_string . "\n"); $self->debug($res->as_string . "\n"); # $self->debug(substr($res->content, 0, 250), "\n\n"); return $self->parse_full_hashes($res->content); } =head2 parse_full_hashes() Process the request for full hashes from Google. =cut sub parse_full_hashes { my ($self, $data) = @_; my @hashes = (); # goog-malware-shavar:22428:32\nHEX while (length $data > 0) { if ($data !~ /^[a-z-]+:\d+:\d+\n/) { $self->error("list not found\n"); return (); } $data =~ s/^([a-z-]+)://; my $list = $1; $data =~ s/^(\d+)://; my $chunknum = $1; $data =~ s/^(\d+)\n//; my $length = $1; my $current = 0; while ($current < $length) { my $hash = substr($data, 0, 32, ''); push(@hashes, { hash => $hash, chunknum => $chunknum, list => $list }); $current += 32; } } return @hashes; } =head2 get_a_range() Get the list of a chunks ranges for a list update. =cut sub get_a_range { my ($self, %args) = @_; my $list = $args{'list'} || ''; my @nums = $self->{storage}->get_add_chunks_nums(); # trust storage to torder list, or reorder it here? return $self->create_range(numbers => [@nums]); } =head2 get_s_range() Get the list of s chunks ranges for a list update. =cut sub get_s_range { my ($self, %args) = @_; my $list = $args{'list'} || ''; my @nums = $self->{storage}->get_sub_chunks_nums(); # trust storage to torder list, or reorder it here? return $self->create_range(numbers => [@nums]); } =head2 create_range() Create a list of ranges (1-3, 5, 7-11) from a list of numbers. =cut sub create_range { my ($self, %args) = @_; my $numbers = $args{numbers} || []; # should already be ordered return '' if (scalar @$numbers == 0); my $range = $$numbers[0]; my $new_range = 0; for(my $i = 1; $i < scalar @$numbers; $i++) { # next if ($$numbers[$i] == $$numbers[$i-1]); # should not happen if ($$numbers[$i] != $$numbers[$i-1] + 1) { $range .= $$numbers[$i-1] if ($i > 1 && $new_range == 1); $range .= ',' . $$numbers[$i]; $new_range = 0 } elsif ($new_range == 0) { $range .= "-"; $new_range = 1; } } $range .= $$numbers[scalar @$numbers - 1] if ($new_range == 1); return $range; } =head2 expand_range() Explode list of ranges (1-3, 5, 7-11) into a list of numbers (1,2,3,5,7,8,9,10,11). =cut sub expand_range { my ($self, %args) = @_; my $range = $args{range} || return (); my @list = (); my @elements = split /,/, $range; foreach my $data (@elements) { if ($data =~ /^\d+$/) { # single number push(@list, $data); } elsif ($data =~ /^(\d+)-(\d+)$/) { my $start = $1; my $end = $2; for(my $i = $start; $i <= $end; $i++) { push(@list, $i); } } } return @list; } =back =head1 CHANGELOG =over 4 =item 1.07 Add C feature to import add chunks and sub chunks from a file. =item 1.05 No code change. Move C to PRIVATE FUNCTIONS to avoid confusions. =item 1.04 Introduce L. Remind people that Google Safe Browsing v1 has been deprecated by Google. =item 1.03 The source code is available on github at L. =item 1.02 Fix uninitialized $self->{errors} variable =item 1.01 Use String::HexConvert for faster hex_to_ascii. =item 1.0 Separate the error output from the debug output. =item 0.9 Fix bug with local whitelisting (sub chunks). Fix the parsing of full hashes. =item 0.8 Reduce the number of full hash requests. =item 0.7 Add local_lookup to perform a lookup against the local database only. This function should be used for debugging purpose only. Small code optimizations. =item 0.6 Handle local database reset. =item 0.5 Update documentation. =item 0.4 Speed update the database update. The first update went down from 20 minutes to 20 minutes. =item 0.3 Fix typos in the documentation. Remove dependency on Switch (thanks to Curtis Jewel). Fix value of FULL_HASH_TIME. =item 0.2 Add support for Message Authentication Code (MAC) =back =head1 SEE ALSO See L, L and L for information on storing and managing the Google Safe Browsing database. Google Safe Browsing v2 API: L L (Google Safe Browsing v1) is deprecated by Google since 12/01/2011. =head1 AUTHOR Julien Sobrier, Ejsobrier@zscaler.comE or Ejulien@sobrier.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2012 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut 1;Net-Google-SafeBrowsing2-1.07/Changes0000644000076400007640000000446611764516761017625 0ustar jsobrierjsobrierRevision history for Perl extension Net::Google::SafeBrowsing2. 0.1 Wed Jul 21 16:35:48 2010 - original version 0.2 Fri Nov 05 14:45:48 2010 - add support for MAC 0.5 Mon Mar 07 16:55:00 2011 - add MySQl back-end storage - introduce base class Net::Google::SafeBrowsing2::DBI - update documentation Tue Apr 05 14:40:00 2011 - fix Net::Google::SafeBrowsing2::DBI to work with all database types 0.6 Mon Jul 25 13:21:00 2011 - Handle local database reset 0.7 Mon Jul 25 15:18:00 2011 - Add local_lookup to perform a lookup against the local database only for debugging purpose. - Fix duplicate insert of add chunks and sub chunks. 0.8 Tue Aug 02 13:05:00 2011 - Fix chunk retrieval with MySQL backend (VARCHAR to VARBINARY) - Reduce the number of full hashes requested 0.9 Tue Aug 09 15:26:00 2011 - Fix bug with local whitelisting (sub chunks). - Fix the parsing of full hashes. 1.0 Wed Aug 10 15:11:00 - Storage: add keep_all argument to keep expired full hashes in the database - Separate the error output from debug output - A lot of testing 1.01 Wed Aug 26 13:49:00 - Use String::HexConvert for faster hex_to_ascii 1.02 Thu Nov 10 10:00:00 - Fix uninitialized $self->{errors} variable 1.03 Thu Nov 21 17:24:00 - no code change - source code available on github at https://github.com/juliensobrier/Net-Google-SafeBrowsing2 - minor documentation updates 1.04 Wed Dec 07 10:26:00 - add Net::Google::SafeBrowsing2::Lookup (http://code.google.com/apis/safebrowsing/lookup_guide.html) Wed Dec 07 10:49:00 - Net::Google::SafeBrowsing2::Sqlite, xndex s_chunks_unique was created at the wrong place Tue Dec 12 15:41:00 - Net::Google::SafeBrowsing2::Lookup 0.2: documentation update 1.05 Thu Jan 12 14:26:00 - No code change. Move C to PRIVATE FUNCTIONS to avoid confusions. 1.05 Mon May 07 10:22:00 - Added Postgres storage from njohnston 1.06 Sun Jun 03 21:40:00 - Update DBI and MySQL back-end to keep empty sub chunks. Fix index for s_chunks table - MySQL: truncate prefixes longer than 8 bytes - Add a warning about the number of updates required with an empty database - Storage: reset function is required 1.07 Fri Jun 08 18:38:00 - Import/Export feature for add chunks and sub chunks. Can be used to move from one back-end storage to another.Net-Google-SafeBrowsing2-1.07/META.json0000664000076400007640000000251311764517146017742 0ustar jsobrierjsobrier{ "abstract" : "Perl extension for the Google Safe Browsing v2 API. (Google Safe Browsing v1 has been deprecated by Google.)", "author" : [ "Julien Sobrier " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.112150", "license" : [ "unknown" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Net-Google-SafeBrowsing2", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : 0 } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : 0 } }, "runtime" : { "requires" : { "Digest::HMAC_SHA1" : 0, "Digest::SHA" : 0, "File::Slurp" : 0, "LWP::UserAgent" : 0, "List::Util" : 0, "MIME::Base64" : 0, "MIME::Base64::URLSafe" : 0, "Net::IPAddress" : 0, "String::HexConvert" : 0, "Test::More" : 0, "Text::Trim" : 0, "URI" : 0, "constant" : 0 } } }, "release_status" : "stable", "version" : "1.07" } Net-Google-SafeBrowsing2-1.07/README0000644000076400007640000000346311762315054017174 0ustar jsobrierjsobrierNet-Google-SafeBrowsing2 version 1.04 ===================================== Net::Google::SafeBrowsing2 implements the Google Safe Browsing v2 API. Net::Google::SafeBrowsing2::Lookup implements the Google Safe Browsing v2 Lookup API (up to 10,000 URL checks a day). The library passes most of the unit tests listed in the API documentation. See the documentation (http://code.google.com/apis/safebrowsing/developers_guide_v2.html) for more details about the failed tests. The Google Safe Browsing database must be stored and managed locally. Net::Google::SafeBrowsing2::Sqlite uses Sqlite as the storage back-end, Net::Google::SafeBrowsing2::MySQL uses MySQL. Other storage mechanisms (databases, memory, etc.) can be added and used transparently with this module. You may want to look at "Google Safe Browsing v2: Implementation Notes" (http://www.zscaler.com/research/Google%20Safe%20Browsing%20v2%20API.pdf), a collection of notes and real-world numbers about the API. This is intended for people who want to learn more about the API, whether as a user or to make their own implementation. The source code is available on github at https://github.com/juliensobrier/Net-Google-SafeBrowsing2. INSTALLATION To install this module type the following: perl Makefile.PL make make test make install DEPENDENCIES This module requires these other modules and libraries: LWP::UserAgent URI Digest::SHA List::Util constant Net::IPAddress Test::More Text::Trim Digest::HMAC_SHA1 MIME::Base64::URLSafe MIME::Base64 String::HexConvert COPYRIGHT AND LICENCE Copyright (C) 2011 by Julien Sobrier This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available.