WWW-Dict-Leo-Org-1.40/0000755000175100017510000000000012651776445012746 5ustar scipscipWWW-Dict-Leo-Org-1.40/leo0000755000175100017510000003603312651776035013453 0ustar scipscip#!/usr/bin/perl # # This little handy script grabs the german/english translation for a # given term from http://dict.leo.org. Thanks the LEO folks for their # good job! # # Usage is quite simple, the script requires just one parameter, # the term to be translated. It will then return the results in # an unformatted form. # # $Id: leo,v 1.33 2008/04/22 22:23:39 scip Exp $ # # Copyleft (l) 2000-2016 by Thomas v.D. . leo may be # used and distributed under the terms of the GNU General Public License. # All other brand and product names are trademarks, registered trademarks # or service marks of their respective holders. #use lib qw(blib/lib); #use Org; use utf8; use strict; use Getopt::Long; use DB_File; use POSIX qw(isatty); use WWW::Dict::Leo::Org; use Data::Dumper; # # internal settings # my $highlight = 1; my $default_c = "\033[0m"; # reset default terminal color my $bold_c = "\033[0;34m"; # blue color my $copy_c = "\033[0;35m"; # copyright message color (green) my $version = "1.39"; my $config = $ENV{HOME} . "/.leo"; my $cache = $ENV{HOME} . "/.leo-CACHE.db"; my $debugging = 0; #defaults for config my %conf = ( use_cache => "no", use_color => "yes", use_latin => "yes" ); my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0); my %line = %validopts; my %CACHE = (); my $site = ""; my $proxy_user = ""; my $proxy_pass = ""; sub debug; my($o_s, $o_m, $o_c, $o_l, $o_v, $o_h, $o_n, $o_f, $o_d, $o_u, $o_p); isatty(1) && eval q{ use open OUT => ':locale'}; # # commandline options # Getopt::Long::Configure( qw(no_ignore_case)); if (! GetOptions ( "spelltolerance|s=s" => \$o_s, "morphology|m=s" => \$o_m, "chartolerance|c=s" => \$o_c, "language|l=s" => \$o_l, "force|f" => \$o_f, "version|v" => \$o_v, "help|h" => \$o_h, "debug|d" => \$o_d, "noescapechars|n" => \$o_n, "user|u=s" => \$o_u, "passwd|p=s" => \$o_p ) ) { &usage; } if ($o_h) { &usage; } if ($o_v) { print STDERR "leo version $version\n"; exit; } # # search term # my $string = shift; if (!$string) { $string = ; chomp $string; } if (eval { require I18N::Langinfo; require Encode; 1 }) { my $codeset = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); if ($codeset) { for ($string) { $_ = Encode::decode($codeset, $_); } } } # # open the config, if any # if (-e $config) { open C, "<$config" or die "Could not open config $config: $!\n"; local $_; while () { chomp; next if(/^\s*#/); # ignore comments next if(/^\s*$/); # ignore empty lines s/^\s*//; # remove leading whitespace s/\s*$//; # remove trailing whitespace s/\s*#.*$//; # remove trailing comment my($opt, $val) = split /\s*=\s*/; $conf{$opt} = $val; $line{$opt} = $.; } close C; } # # validate the config # foreach my $opt (keys %conf) { if (!exists $validopts{$opt}) { print "<$opt>\n"; print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n"; exit; } } # # feed config values into program # if ($conf{use_color} eq "no") { $highlight = 0; } elsif ($conf{use_color} eq "yes") { $highlight = 1; } # # open the cache, if wanted # if ($conf{use_cache} eq "yes") { eval { require DB_FileX; }; if ($@) { $conf{use_cache} = "no"; } else { dbmopen(%CACHE, $cache, 0600) or $conf{use_cache} = "no"; } } my %PARAM; if ($o_l) { $PARAM{"-Language"} = $o_l; } if(exists $ENV{http_proxy}) { $PARAM{"-Proxy"} = $ENV{http_proxy}; } if ($o_u) { $PARAM{"-ProxyUser"} = $o_u; } if ($o_p) { $PARAM{"-ProxyPass"} = $o_p; } if($o_n) { $highlight = 0; } else { # highlighting turned on, check if possible if (! isatty(1)) { $highlight = 0; } } if ($o_d) { # enable $PARAM{"-Debug"} = 1; } if($o_s) { $PARAM{"-SpellTolerance"} = $o_s; } if($o_m) { $PARAM{"-Morphology"} = $o_m; } if($o_c) { $PARAM{"-CharTolerance"} = $o_c; } if (exists $ENV{http_proxy} and $o_u) { # authenticate if (! $o_p) { # ask for it my $proxy_pass; local $| = 1; print "password: "; eval { local($|) = 1; local(*TTY); open(TTY,"/dev/tty") or die "No /dev/tty!"; system ("stty -echo ); print STDERR "\r\n"; system ("stty echo ; } chomp $proxy_pass; $PARAM{"-ProxyPass"} = $proxy_pass; } } my (@match, $lines, $maxsize); my $cache_key = join ("", sort keys %PARAM) . $string; if ($o_f && $conf{use_cache} eq "yes") { delete $CACHE{$cache_key}; } if(exists $CACHE{$cache_key} && $conf{use_cache} eq "yes") { # deliver from cache my $code = $CACHE{$cache_key}; my ($VAR1, $VAR2, $VAR3); eval $code; @match = @{$VAR1}; $lines = $VAR2; $maxsize = $VAR3; } else { my $leo = new WWW::Dict::Leo::Org(%PARAM) or die "Could not initialize WWW::Dict::Leo::Org: $!\n"; @match = $leo->translate($string); $lines = $leo->lines(); $maxsize = $leo->maxsize(); if($conf{use_cache} eq "yes") { $CACHE{$cache_key} = Dumper(\@match, $lines, $maxsize); } } if ($conf{use_cache} eq "yes") { dbmclose(%CACHE); } if(! @match) { print STDERR "Search for \"$string\" returned no results.\n"; exit 1; } $maxsize += 5; print "Found $lines matches for '$string' on dict.leo.org:\n"; # # print it out in a formated manner, keep the order of dict.leo.org # foreach my $section (@match) { utf8::decode($section->{title}) if ($conf{use_latin}); if ($highlight) { print "\n${bold_c}$section->{title}${default_c}\n"; } else { print "\n$section->{title}\n"; } foreach my $entry (@{$section->{data}}) { $entry->{left} =~ s/^(.*)$/$1 . " " x ($maxsize - length($1))/e; if ($conf{use_latin}) { utf8::decode($entry->{left}); utf8::decode($entry->{right}); } if ($highlight) { $entry->{left} =~ s/(\Q$string\E)/$bold_c . $1 . $default_c/ei; $entry->{right} =~ s/(\Q$string\E)/$bold_c . $1 . $default_c/ei; } print " $entry->{left}$entry->{right}\n"; } } print "$copy_c" if $highlight; print "\n Fetched by leo $version via http://dict.leo.org/"; print "\n Copyright (C) LEO Dictionary Team 1995-2016"; print "\n [leo] GPL Copyleft Thomas v.D. 2000-2016\n\n"; print "$default_c" if $highlight; sub parserror { my $msg = shift; print STDERR "Parse error $msg\n"; print STDERR "Could not recognize site html of target site\n"; print STDERR "dict.leo.org. This might be a bug or the site\n"; print STDERR "might have changed. Please repeat the last step\n"; print STDERR "with debugging enabled (-d) and send the output\n"; print STDERR "to the author. Thanks.\n"; exit 1; } sub usage { my $msg = shift; my $me = $0; $me =~ s(^.*/)(); print "$msg\n" if($msg); print qq(Usage: $me [-slmcfuphdv] [] Translate a term from german to english or vice versa. -s, --spelltolerance=on|off allow spelling errors -m, --morphology=none|forcedAll provide morphology information -c, --chartolerance=fuzzy|exact allow umlaut alternatives -l, --language=en|fr|de2(en|fr)|(en|fr)2de translation direction -n, --noescapechars dont use escapes for highlighting -f, --force don't use the query cache -u, --user=username user for proxy authentication -p, --passwd=password cleartext passphrase for proxy authentication -h, --help display this help and exit -d, --debug enable debugging output -v, --version output version information and exit is the string you are asking to be translated. It will be requested from STDIN if not specified on the commandline. Report bugs to . ); exit 1; } 1; =head1 NAME leo - commandline interface to http://dict.leo.org/. =head1 SYNOPSIS leo [-slmcfuphdv] [] =head1 DESCRIPTION B is a commandline interface to the german/english/french dictionary on http://dict.leo.org/. It supports almost all features which the website supports, plus more. Results will be printed to the terminal. By default the searched key word will be highlighted (which can be turned off, see below). To get faster results, B is able to cache queries if you repeatedly use the same query. B acts as a standard webbrowser as your mozilla or what so ever does, it connects to the website, exectues the query, parses the HTML result and finally prints it somewhat nicely formatted to the terminal. As of this writing B acts as: Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 =head1 OPTIONS =over =item I<-s --spelltolerance> Allow spelling errors. Possible values: B, B or B. Default setting: B. =item I<-m --morphology> Provide morphology information. Possible values: B, B or B. Default setting: B. =item I<-c --chartolerance> Allow umlaut alternatives. Possible values: B, B or B. Default: B. =item I<-l --language> Translation direction. Possible values: B, B, B, B, B or B. B and B do select the translation direction automatically. Default: B. =item I<-n --noescapechars> Don't use escapes for highlighting. Default: do highlighting. Controllable via config file too. See below. No highlighting will be used if STDOUT is not connected to a terminal. =item I<-f --force> Don't use the query cache. Default: use the cache. This option has no effect if B is turned off in the config file. =item I<-u --user> Specify the http proxy user to use if your proxy requires authentication. Read the 'PROXY' section for more details. =item I<-p --passwd> Specify the cleartext password to use with http proxy authentication. This is not recommended and just implemented for completeness. =item I<-h --help> Display this help and exit. =item I<-v --version> Display version information and exit. =item I<-d --debug> Enable debugging output (a lot of it, beware!), which will be printed to STDERR. If you find a bug you must supply the debugging output along with your bugreport. =back B is the key word which you want to translate. If the term contains white spaces quote it using double quotes. If the B parameter is not specified, B will read it from STDIN. =head1 CONFIG B reads a config file B<.leo> in your home directory if it exists. The following variables are supported: =over =item I Turns on conversion of UTF8 characters to their latin* encoding. Default setting (if not given): B. =item I Controls the use of the cache (see later). Possible values: B or B. Default setting(if not given): B. If the commandline option B<-f> or B<--force> has been set then the cache will not be used for the query and if for this query exists an entry in the cache it will be removed from it. =item I Controls the use of escape sequences in the terminal output to highlight the key-waord in the result. Possible values: B or B. Default setting(if not given): B. You can set this option via commandline too: B<-n> or B<--noescapechars>. The config option has higher precedence. =item I You may modify the user agent as B identifies itself on the target site. The default is: User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11) =back =head1 CACHING B supports caching of queries for faster results if you repeatedly use the same query. A query consists of the given B (the key word or string) plus the translation option settings. If you, for example, execute once the following query: % leo langnase and somewhere later: % leo -c exact then B will treat the latter query as a different one than the previous one, because I behaves different when different translation options are given. =head1 PROXY B can be used with a HTTP proxy service. For this to work, you only have to set the environment variable B. It has the following format: PROTO://[USER:PASSWD@]SERVER[:PORT] The only supported protocol is B. If your proxy works without authentication, you can omit the B part. If no port is specified, B<80> will be used. Here is an example (for bash): export http_proxy=http://172.16.120.120:3128 and an example with authentication credentials: export http_proxy=http://max:34dwe2@172.16.120.120:3128 As security is always important, I have to warn you, that other users on the same machine can read your environment using the 'ps -e ..' command, so this is not recommended. The most secure way for proxy authentication is just to specify the server+port with B but no credentials, and instead use the B<-u> commandline parameter to specify a user (do not use B<-p> to specify the password, this will also be readyble in process listing). In this case, B will ask you interactively for the password. It will try its best to hide it from being displayed when you type it (as most such routines in other tools do it as well), it this fails (e.g. because you do not have the 'stty' tool installed), the password will be read from STDIN. =head1 FILES ~/.leo the config file for leo. Not required. ~/.leo-CACHE.db* the cache file. =head1 AUTHOR Thomas v.D. =head1 BUGS B depends on http://dict.leo.org/. It may break B if they change something on the site. Therefore be so kind and inform me if you encounter some weird behavior of B. In most cases it is not a bug of B itself, it is a website change on http://dict.leo.org/. In such a case repeat the failed query and use the commandline flag B<-d> (which enables debugging) and send the full output to me, thanks. =head1 COPYRIGHT B copyleft 2000-2016 Thomas v.D.. All rights reserved. http://dict.leo.org/ copyright (c) 1995-2016 LEO Dictionary Team. The search results returned by B are based on the work of the people at LEO.org. Thanks for the great work. Some time ago they told me that they are disagreed with B, or in other words: from their point of view B seems to break copyright law in some or another way. I thought a long time about this, but I must deny this. B acts as a simple web client, just like mozilla, IE or even lynx are doing. They are providing the service to the public so I use my favorite web browser to make use of it. In fact my favorite browser to view dict.leo.org is B. There is nothing wrong with that. IMHO. If you disagree or are asked by the LEO team to stop using B you may decide this for yourself. I in my case wrote kinda browser, what is not prohibited. At least not today. =head1 VERSION This is the manpage for B version B<1.39>. =cut WWW-Dict-Leo-Org-1.40/MANIFEST0000644000175100017510000000034112651776445014075 0ustar scipscipChangelog Makefile.PL README leo t/run.t Org.pm MANIFEST META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) WWW-Dict-Leo-Org-1.40/META.json0000644000175100017510000000165512651776445014376 0ustar scipscip{ "abstract" : "unknown", "author" : [ "unknown" ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921", "license" : [ "unknown" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "WWW-Dict-Leo-Org", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp::Heavy" : "0", "HTML::TableParser" : "0", "IO::Socket" : "0", "MIME::Base64" : "0" } } }, "release_status" : "stable", "version" : "1.40" } WWW-Dict-Leo-Org-1.40/t/0000755000175100017510000000000012651776445013211 5ustar scipscipWWW-Dict-Leo-Org-1.40/t/run.t0000644000175100017510000000047312651776054014202 0ustar scipscip# -*-perl-*- # testscript for WWW::Dict::Leo::Org Class by Thomas v.D. use Test::More qw(no_plan); BEGIN { use_ok "WWW::Dict::Leo::Org" }; require_ok("WWW::Dict::Leo::Org"); # unfortunately I cannot add more tests, because # this would require internet connectivity which # is not the case for all cpan testers. WWW-Dict-Leo-Org-1.40/README0000644000175100017510000000203012651776134013614 0ustar scipscipINTRODUCTION WWW::Dict::Leo::Org - Interface module to http://dict.leo.org/ leo - commandline interface to WWW::Dict::Leo::Org. INSTALLATION perl Makefile.PL make make test make install DOCUMENTATION man leo perldoc WWW::Dict::Leo::Org HISTORY WWW::Dict::Leo::Org exists as the script "leo" since 1999, which is used widely on earth. Begining with version 1.28 I extracted the most significant code into an extra perlmodule (WWW::Dict::Leo::Org), so that the translation feature can be used from perl too. However, the commandline script "leo" still exists, it will be installed together with the module. Please note, that the script from 1.28 on is no more usable in standalone form, in fact it uses the module to do the job now. COPYRIGHT WWW::Dict::Leo::Org + leo Copyright (c) 2007-2016 by Thomas v.D. http://dict.leo.org/ Copyright (c) 1995-2016 LEO Dictionary Team. The search results returned by leo are based on the work of the people at LEO.org. Thanks for the great work. WWW-Dict-Leo-Org-1.40/META.yml0000644000175100017510000000076712651776445014231 0ustar scipscip--- abstract: unknown author: - unknown build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921' license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: WWW-Dict-Leo-Org no_index: directory: - t - inc requires: Carp::Heavy: 0 HTML::TableParser: 0 IO::Socket: 0 MIME::Base64: 0 version: 1.40 WWW-Dict-Leo-Org-1.40/Org.pm0000644000175100017510000003023212651776436014033 0ustar scipscip# # Copyleft (l) 2000-2016 Thomas v.D. . # # leo may be # used and distributed under the terms of the GNU General Public License. # All other brand and product names are trademarks, registered trademarks # or service marks of their respective holders. package WWW::Dict::Leo::Org; $WWW::Dict::Leo::Org::VERSION = "1.40"; use strict; use warnings; use English '-no_match_vars'; use Carp::Heavy; use Carp; use IO::Socket; use MIME::Base64; use HTML::TableParser; sub debug; sub new { my ($class, %param) = @_; my $type = ref( $class ) || $class; my %settings = ( "-Host" => "pda.leo.org", "-Port" => 80, "-UserAgent" => "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9", "-Proxy" => "", "-ProxyUser" => "", "-ProxyPass" => "", "-Debug" => 0, "-SpellTolerance" => "standard", # on, off "-Morphology" => "standard", # none, forcedAll "-CharTolerance" => "relaxed", # fuzzy, exact "-Language" => "en", # en2de, de2fr, fr2de, de2es, es2de "data" => {}, # the results "section" => [], "title" => "", "segments" => [], "Maxsize" => 0, "Linecount" => 0, ); foreach my $key (keys %param) { $settings{$key} = $param{$key}; # override defaults } my $self = \%settings; bless $self, $type; return $self; } sub translate { my($this, $term) = @_; my $linecount = 0; my $maxsize = 0; my @match = (); # # form var transitions for searchLoc # my %lang = ( de2en => 1, en2de => -1, de2fr => 1, fr2de => -1, es2de => -1, de2es => 1, es => 0, en => 0, fr => 0, speak => "ende" ); if ($this->{"-Language"}) { if ($this->{"-Language"} =~ /fr/) { # used for francaise translation $lang{speak} = "frde"; } elsif ($this->{"-Language"} =~ /es/) { $lang{speak} = "esde"; } if (exists $lang{$this->{"-Language"}}) { $this->{"-Language"} = $lang{$this->{"-Language"}}; } else { croak "Unsupported language: " . $this->{"-Language"}; } } # # cut invalid values for parameters or set defaults if unspecified # my %form = ( spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" }, deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" }, cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" }, searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" }, ); my @form; foreach my $var (keys %form) { if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) { push @form, $var . "=" . $form{$var}->{val}; } } # add language push @form, "lp=$lang{speak}"; # # process whitespaces # my $query = $term; $query =~ s/\s\s*/ /g; $query =~ s/\s/\+/g; push @form, "search=$query"; # # make the query cgi'ish # my $form = join "&", @form; # store for result caching $this->{Form} = $form; # # check for proxy settings and use it if exists # otherwise use direct connection # my ($url, $site); my $ip = $this->{"-Host"}; my $port = $this->{"-Port"}; my $proxy_user = $this->{"-ProxyUser"}; my $proxy_pass = $this->{"-ProxyPass"}; if ($this->{"-Proxy"}) { my $proxy = $this->{"-Proxy"}; $proxy =~ s/^http:\/\///i; if ($proxy =~ /^(.+):(.+)\@(.*)$/) { # proxy user account $proxy_user = $1; $proxy_pass = $2; $proxy = $3; $this->debug( "proxy_user: $proxy_user"); } my($host, $pport) = split /:/, $proxy; if ($pport) { $url = "http://$ip:$port/dictQuery/m-vocab/$lang{speak}/de.html"; $port = $pport; } else { $port = 80; } $ip = $host; $this->debug( "connecting to proxy:", $ip, $port); } else { $this->debug( "connecting to site:", $ip, "port", $port); $url = "/dictQuery/m-vocab/$lang{speak}/de.html"; } my $conn = new IO::Socket::INET( Proto => "tcp", PeerAddr => $ip, PeerPort => $port, ) or die "Unable to connect to $ip:$port: $!\n"; $conn->autoflush(1); $this->debug( "GET $url?$form HTTP/1.0"); print $conn "GET $url?$form HTTP/1.0\r\n"; # be nice, simulate Konqueror. print $conn qq($this->{"-UserAgent"} Host: $this->{"-Host"}:$this->{"-Port"} Accept: text/*;q=1.0, image/png;q=1.0, image/jpeg;q=1.0, image/gif;q=1.0, image/*;q=0.8, */*;q=0.5 Accept-Charset: iso-8859-1;q=1.0, *;q=0.9, utf-8;q=0.8 Accept-Language: en_US, en\r\n); if ($this->{"-Proxy"} and $proxy_user) { # authenticate # construct the auth header my $coded = encode_base64("$proxy_user:$proxy_pass"); $this->debug( "Proxy-Authorization: Basic $coded"); print $conn "Proxy-Authorization: Basic $coded\r\n"; } # finish the request print $conn "\r\n"; # # parse dict.leo.org output # my @line = <$conn>; close $conn or die "Connection failed: $!\n"; $this->debug( "connection: done"); $site = join "", @line; if ($site !~ /HTTP\/1\.(0|1) 200 OK/i) { if ($site =~ /HTTP\/1\.(0|1) (\d+) /i) { # got HTTP error my $err = $2; if ($err == 407) { croak "proxy auth required or access denied!\n"; } else { if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ || $site =~ /found no matches for your search/ ) { return (); } else { croak "got HTTP error $err!\n"; } } } } my @request = ( { id => 2, row => sub { $this->row(@_); }, hdr => sub { $this->hdr(@_); } }, { id => 3, hdr => sub { $this->hdr(@_); }, row => sub { $this->row(@_); } }, { id => 4, hdr => sub { $this->hdr(@_); }, row => sub { $this->row(@_); } } ); $this->{Linecount} = 0; my $p = HTML::TableParser->new( \@request, { Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } ); $site=~s/ /\ \;/g; $p->parse($site); # put the rest on the stack, if any if (@{$this->{section}}) { $this->{data}->{ $this->{title} } = $this->{section}; push @{$this->{segments}}, $this->{title}; } # put back in order my @matches; foreach my $title (@{$this->{segments}}) { push @matches, { title => $title, data => $this->{data}->{$title} }; } return @matches; } sub hdr { my ( $this, $tbl_id, $line_no, $data, $udata ) = @_; if ($data->[1] && $data->[0] eq $data->[1]) { $this->debug("Probable start of a new section: $data->[1]"); if (@{$this->{section}}) { $this->{data}->{ $this->{title} } = $this->{section}; push @{$this->{segments}}, $this->{title}; } $this->{title} = $data->[1]; $this->{section} = []; } } sub row { # # divide rows into titles and lang data. # we get 2 items (left and right column), if they # are equal, it's a segment title, otherwise it's # segment content. left columns ending in HH:MM # are forumposts and ignored as well as rows with # empty left cells. my ( $this, $tbl_id, $line_no, $data, $udata ) = @_; if ($data->[1] && $data->[0] && $data->[0] ne $data->[1] && $data->[0] !~ /\d{2}:\d{2}$/) { if (length($data->[0]) > $this->{Maxsize}) { $this->{Maxsize} = length($data->[0]); } $this->debug("line: $line_no, left: $data->[0], right: $data->[1]"); push @{$this->{section}}, { left => $data->[0], right => $data->[1] }; $this->{Linecount}++; } } sub maxsize { my($this) = @_; return $this->{Maxsize}; } sub lines { my($this) = @_; return $this->{Linecount}; } sub form { my($this) = @_; return $this->{Form}; } sub debug { my($this, $msg) = @_; if ($this->{"-Debug"}) { print STDERR "%DEBUG: $msg\n"; } } 1; =encoding ISO8859-1 =head1 NAME WWW::Dict::Leo::Org - Interface module to dictionary dict.leo.org =head1 SYNOPSIS use WWW::Dict::Leo::Org; my $leo = new WWW::Dict::Leo::Org(); my @matches = $leo->translate($term); =head1 DESCRIPTION B is a module which connects to the website B and translates the given term. It returns an array of hashes. Each hash contains a left side and a right side of the result entry. =head1 OPTIONS B has several parameters, which can be supplied as a hash. All parameters are optional. =over =item I<-Host> The hostname of the dict website to use. For the moment only dict.leo.org is supported, which is also the default - therefore changing the hostname would not make much sense. =item I<-Port> The tcp port to use for connecting, the default is 80, you shouldn't change it. =item I<-UserAgent> The user-agent to send to dict.leo.org site. Currently this is the default: Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 =item I<-Proxy> Fully qualified proxy server. Specify as you would do in the well known environment variable B, example: -Proxy => "http://192.168.1.1:3128" =item I<-ProxyUser> I<-ProxyPass> If your proxy requires authentication, use these parameters to specify the credentials. =item I<-Debug> If enabled (set to 1), prints a lot of debug information to stderr, normally only required for developers or to report bugs (see below). =back Parameters to control behavior of dict.leo.org: =over =item I<-SpellTolerance> Be tolerant to spelling errors. Default: turned on. Possible values: on, off. =item I<-Morphology> Provide morphology information. Default: standard. Possible values: standard, none, forcedAll. =item I<-CharTolerance> Allow umlaut alternatives. Default: relaxed. Possible values: fuzzy, exact, relaxed. =item I<-Language> Translation direction. Please note that dict.leo.org always translates either to or from german. The following values can be used: =over =item de Alias for B - german to english. =item fr Alias for B - german to french. =item es Alias for B - german to espaniol. =item en2de english to german. =item fr2de french to german. =item es2de espaniol to german. =back =back =head1 METHODS =head2 translate($term) Use this method after initialization to connect to dict.leo.org and translate the given term. It returns an array of hashes containing the actual results. use WWW::Dict::Leo::Org; use Data::Dumper; my $leo = new WWW::Dict::Leo::Org(); my @matches = $leo->translate("test"); print Dumper(\@matches); which prints: $VAR1 = [ { 'data' => [ { 'left' => 'check', 'right' => 'der Test' }, { 'left' => 'quiz (Amer.)', 'right' => 'der Test    [Schule]' ], 'title' => 'Unmittelbare Treffer' }, { 'data' => [ { 'left' => 'to fail a test', 'right' => 'einen Test nicht bestehen' }, { 'left' => 'to test', 'right' => 'Tests macheneinen Test machen' } ], 'title' => 'Verben und Verbzusammensetzungen' }, 'data' => [ { 'left' => 'testing  adj.', 'right' => 'im Test' } ], 'title' => 'Wendungen und Ausdrücke' } ]; You might take a look at the B script how to process this data. =head2 maxsize() Returns the size of the largest returned term (left side). =head2 lines() Returns the number of translation results. =head2 form() Returns the submitted form uri. =head1 SEE ALSO L =head1 COPYRIGHT WWW::Dict::Leo::Org - Copyright (c) 2007-2016 by Thomas v.D. L - Copyright (c) 1995-2016 LEO Dictionary Team. =head1 AUTHOR Thomas v.D. =head1 HOW TO REPORT BUGS Use L to report bugs, select the queue for B. Please don't forget to add debugging output! =head1 VERSION 1.40 =cut WWW-Dict-Leo-Org-1.40/Makefile.PL0000644000175100017510000000070712651774330014713 0ustar scipscip# # made for WWW::Dict::Leo::Org 1.33 and up use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'WWW::Dict::Leo::Org', 'VERSION_FROM' => 'Org.pm', 'EXE_FILES' => [ 'leo' ], 'clean' => { FILES => '*~' }, 'EXCLUDE_EXT' => [ qw(README) ], 'PREREQ_PM' => { 'Carp::Heavy' => 0, 'IO::Socket' => 0, 'MIME::Base64' => 0, 'HTML::TableParser' => 0 } ); WWW-Dict-Leo-Org-1.40/Changelog0000644000175100017510000001365112651774330014555 0ustar scipscip1.39: fixed rt.cpan.org#91464: disable caching if DB_File is not found. 1.38: fixed rt.cpan.org#92944, missed translations. The problem was, that the pda.leo.org uses a differnt number of tables depending on the translation. So, now we just fetch all tables (2-4) and ignore those which are not translations (forum posts or empty cells). Applied patch rt.cpan.org#92914 (POD locale encoding). 1.37: fixed rt.cpan.org#92679, the site url and table structure changed. 1.36: applied patch rt.cpan.org#35543, which fixes handling of utf8 terminals. fixed rt.cpan.org#84196 using the suggested changes. applied patch rt.cpan.org#86641, spelling fixes. 1.35: Switched to use pda.leo.org, which is easier to parse, faster to load and seems not change that often. 1.34: Oh, if a search only returns one section, nothing have been returned. This was a bug which has been fixed. 1.33: Replaced handcrafted html parser by HTML::TableParser module. It was over seven years old, so time has come. The new parsing should now be much more stable and catch most, if not all, tinkering on dict.leo.org. The organization of the array of hashes returned by ::translate() has changed. Take a look at the example in the pod or view the supplied 'leo' script how to use it. 1.32: Fixed strange packaging bug. Fixed yet another parsing bug (there are now titles formatted different than other titles, damn). Changed default translation direction to 0, so that dict.leo.org determines automatically the direction to use. 1.31: Fixed dict.leo.org siteupdates (back to stoneage: they added some invalid html again). 1.30: Fixed bug in leo script, it did not load WWW::Dict::Leo::Org but just Org, which I did during development, but which doesn't work after installation. Updated the version in leo script to 1.30 too. 1.29: Added changelog entry for 1.28 (actually I forgot it) Added documentation for the methods of the module (forgotten too) 1.28: Transformed the script into a installable perl module. 'leo' itself still exists but uses now this module for the actual translation job. The perl module WWW::Dict::Leo::Org can be used from perl scipts and applications for translation. 1.27: fixed site updates on dict.leo.org - hey, they finally fixed some faulty html which in fact caused parse errors in the script. changed the default user agent to a some more recent one. 1.26: bugfix - removed gzip accept header. 1.25: fixed latest site update, they added javascript popup-links. Thanks to Sylvia for the hint. 1.24: bugfix: last patch didn't work with proxy. 1.23: fixed latest site update (lang must be part of the cgi url, for whatever reason). Thanks to Tobi Werth for the patch! 1.22: added more translation options (-l): - en (used so far as the default), leo.org: en<->de - fr, leo.org: fr<->de - fr2de - de2fr if no string is given as input, stdin will be read (one line) instead; if stdout is not connected to a tty, no escape chars will be used, e.g.: echo "attention" | leo -l fr | grep acht This allows leo to be used for scripting or something, you may imagine yourself the possibilities. added proxy authentication support. added a section about proxy to the man-page. 1.21: added -d flag to get debugging informations. fixed parser bug, which occured under some rare circumstances. 1.20: applied patch by almaric, which reflects latest site changes. the user agent can now be configured in the config file. 1.19: once more, they changed the site design (and - btw - it contains still HTML failures!). 1.18: reflection of DICT site changes. 1.17: there's a "more Examples..." link on the bottom of doct.leo.org if the query returned too many examples. leo did not properly respond to this, which is fixed now. But: leo does *not* fetch the pending examples at the moment! 1.16: reflection of DICT site changes. new html parser code, seems to be more stable against site changes, I think. 1.15: lost 1.14: added cache feature. added manpage, install Makefile and README file. 1.13: reflection of DICT site changes. revision 1.12 date: 2002/07/22 20:03:17; author: scip; state: Exp; lines: +131 -15 added some commandline options to reflect some new dict.leo.org features ---------------------------- revision 1.11 date: 2002/07/18 19:11:58; author: scip; state: Exp; lines: +3 -3 applied patch by Thomas Glanzmann , which fixes "no result" parsing ---------------------------- revision 1.10 date: 2002/07/17 20:56:52; author: scip; state: Exp; lines: +11 -9 updated 4 new leo (july/2002) ---------------------------- revision 1.9 date: 2001/09/21 17:19:31; author: scip; state: Exp; lines: +31 -10 added proxy support to leo ---------------------------- revision 1.8 date: 2001/06/21 23:42:17; author: scip; state: Exp; lines: +5 -5 committed ---------------------------- revision 1.7 date: 2001/06/05 21:16:24; author: scip; state: Exp; lines: +19 -6 fixed: returns now an error, if leo did not find a match added: we are now advertising us as Konqueror :-) ---------------------------- revision 1.6 date: 2001/05/26 01:48:36; author: scip; state: Exp; lines: +6 -2 added copyright message ---------------------------- revision 1.5 date: 2001/05/24 21:17:34; author: scip; state: Exp; lines: +19 -7 removed call to lynx, use IO::Socket from now on! ---------------------------- revision 1.4 date: 2001/05/24 01:39:25; author: scip; state: Exp; lines: +2 -2 added alphabetical sorting ---------------------------- revision 1.3 date: 2001/05/24 01:33:22; author: scip; state: Exp; lines: +2 -2 changed bold to blue ---------------------------- revision 1.2 date: 2001/05/24 01:15:33; author: scip; state: Exp; lines: +1 -1 initial submit ---------------------------- revision 1.1 date: 2001/05/24 01:14:32; author: scip; state: Exp; branches: 1.1.1; Initial revision ---------------------------- revision 1.1.1.1 date: 2001/05/24 01:14:32; author: scip; state: Exp; lines: +0 -0 scripts entered into cvs