WWW-Dict-Leo-Org-2.02/000755 001751 001751 00000000000 13160647124 014210 5ustar00scipscip000000 000000 WWW-Dict-Leo-Org-2.02/leo000755 001751 001751 00000037567 13160647041 014735 0ustar00scipscip000000 000000 #!/usr/bin/perl # # This little handy script grabs the german/english translation for a # given term from http://dict.leo.org. Thanks the LEO folks for their # good job! # # Usage is quite simple, the script requires just one parameter, # the term to be translated. It will then return the results in # an unformatted form. # # Copyleft (l) 2000-2017 by Thomas v.D. . leo may be # used and distributed under the terms of the GNU General Public License. # All other brand and product names are trademarks, registered trademarks # or service marks of their respective holders. use lib qw(blib/lib); use utf8; use strict; use Getopt::Long; use POSIX qw(isatty); use WWW::Dict::Leo::Org; use Data::Dumper; # # internal settings # my $highlight = 1; my $default_c = "\033[0m"; # reset default terminal color my $bold_c = "\033[0;34m"; # blue color my $copy_c = "\033[0;35m"; # copyright message color (green) my $version = "2.02"; my $config = $ENV{HOME} . "/.leo"; my $cache = $ENV{HOME} . "/.leo-CACHE.db"; my $debugging = 0; #defaults for config my %conf = ( use_cache => "no", use_color => "yes", use_latin => "yes" ); my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0); my %line = %validopts; my %CACHE = (); my $site = ""; my $proxy_user = ""; my $proxy_pass = ""; sub debug; my($o_s, $o_m, $o_c, $o_l, $o_v, $o_h, $o_n, $o_f, $o_d, $o_u, $o_p); isatty(1) && eval q{ use open OUT => ':locale'}; # # commandline options # Getopt::Long::Configure( qw(no_ignore_case)); if (! GetOptions ( "spelltolerance|s=s" => \$o_s, "morphology|m=s" => \$o_m, "chartolerance|c=s" => \$o_c, "language|l=s" => \$o_l, "force|f" => \$o_f, "version|v" => \$o_v, "help|h" => \$o_h, "debug|d" => \$o_d, "noescapechars|n" => \$o_n, "user|u=s" => \$o_u, "passwd|p=s" => \$o_p ) ) { &usage; } if ($o_h) { &usage; } if ($o_v) { print STDERR "leo version $version\n"; exit; } # # search term # my $string = shift; if (!$string) { $string = ; chomp $string; } if (eval { require I18N::Langinfo; require Encode; 1 }) { my $codeset = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); if ($codeset) { for ($string) { $_ = Encode::decode($codeset, $_); } } } # # open the config, if any # if (-e $config) { open C, "<$config" or die "Could not open config $config: $!\n"; local $_; while () { chomp; next if(/^\s*#/); # ignore comments next if(/^\s*$/); # ignore empty lines s/^\s*//; # remove leading whitespace s/\s*$//; # remove trailing whitespace s/\s*#.*$//; # remove trailing comment my($opt, $val) = split /\s*=\s*/; $conf{$opt} = $val; $line{$opt} = $.; } close C; } # # validate the config # foreach my $opt (keys %conf) { if (!exists $validopts{$opt}) { print "<$opt>\n"; print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n"; exit; } } # # feed config values into program # if ($conf{use_color} eq "no") { $highlight = 0; } elsif ($conf{use_color} eq "yes") { $highlight = 1; } # # open the cache, if wanted # if ($conf{use_cache} eq "yes") { $conf{use_cache} = "no"; no strict 'subs'; foreach my $M (qw(DB_File NDBM_File GDBM_File)) { eval { require $M; }; if (! $@) { tie(%CACHE, $M, $cache, O_RDWR|O_CREAT, 0600) or $conf{use_cache} = "no"; $conf{use_cache} = "yes"; last; } } } my %PARAM; if ($o_l) { $PARAM{"-Language"} = $o_l; } if(exists $ENV{http_proxy}) { $PARAM{"-Proxy"} = $ENV{http_proxy}; } if ($o_u) { $PARAM{"-ProxyUser"} = $o_u; } if ($o_p) { $PARAM{"-ProxyPass"} = $o_p; } if($o_n) { $highlight = 0; } else { # highlighting turned on, check if possible if (! isatty(1)) { $highlight = 0; } } if ($o_d) { # enable $PARAM{"-Debug"} = 1; } if($o_s) { $PARAM{"-SpellTolerance"} = $o_s; } if($o_m) { $PARAM{"-Morphology"} = $o_m; } if($o_c) { $PARAM{"-CharTolerance"} = $o_c; } if (exists $ENV{http_proxy} and $o_u) { # authenticate if (! $o_p) { # ask for it my $proxy_pass; local $| = 1; print "password: "; eval { local($|) = 1; local(*TTY); open(TTY,"/dev/tty") or die "No /dev/tty!"; system ("stty -echo ); print STDERR "\r\n"; system ("stty echo ; } chomp $proxy_pass; $PARAM{"-ProxyPass"} = $proxy_pass; } } my (@match, $lines, $maxsize); my $cache_key = join ("", sort keys %PARAM) . $string; if ($o_f && $conf{use_cache} eq "yes") { delete $CACHE{$cache_key}; } if(exists $CACHE{$cache_key} && $conf{use_cache} eq "yes") { # deliver from cache my $code = $CACHE{$cache_key}; my ($VAR1, $VAR2, $VAR3); eval $code; @match = @{$VAR1}; $lines = $VAR2; $maxsize = $VAR3; } else { my $leo = new WWW::Dict::Leo::Org(%PARAM) or die "Could not initialize WWW::Dict::Leo::Org: $!\n"; @match = $leo->translate($string); $lines = $leo->lines(); $maxsize = $leo->maxsize(); if($conf{use_cache} eq "yes") { $CACHE{$cache_key} = Dumper(\@match, $lines, $maxsize); } } if ($conf{use_cache} eq "yes") { dbmclose(%CACHE); } if(! @match) { print STDERR "Search for \"$string\" returned no results.\n"; exit 1; } $maxsize += 5; print "Found $lines matches for '$string' on dict.leo.org:\n"; my $fmt; my $c = "\$fmt = \" %-${maxsize}s %s\n\""; eval $c; # # print it out in a formated manner, keep the order of dict.leo.org # foreach my $section (@match) { utf8::decode($section->{title}) if ($conf{use_latin}); if ($highlight) { print "\n${bold_c}$section->{title}${default_c}\n"; } else { print "\n$section->{title}\n"; } foreach my $entry (@{$section->{data}}) { if ($conf{use_latin}) { utf8::decode($entry->{left}); utf8::decode($entry->{right}); } if ($highlight) { $entry->{left} =~ s/(\Q$string\E)/$bold_c . $1 . $default_c/ei; $entry->{right} =~ s/(\Q$string\E)/$bold_c . $1 . $default_c/ei; } printf $fmt, $entry->{left}, $entry->{right}; } } print "$copy_c" if $highlight; print "\n Fetched by leo $version via http://dict.leo.org/"; print "\n Copyright (C) LEO Dictionary Team 1995-2017"; print "\n [leo] GPL Copyleft Thomas v.D. 2000-2017\n\n"; print "$default_c" if $highlight; sub parserror { my $msg = shift; print STDERR "Parse error $msg\n"; print STDERR "Could not recognize site html of target site\n"; print STDERR "dict.leo.org. This might be a bug or the site\n"; print STDERR "might have changed. Please repeat the last step\n"; print STDERR "with debugging enabled (-d) and send the output\n"; print STDERR "to the author. Thanks.\n"; exit 1; } sub usage { my $msg = shift; my $me = $0; $me =~ s(^.*/)(); print "$msg\n" if($msg); print qq(Usage: $me [-slmcfuphdv] [] Translate a term from german to english or vice versa. -l, --language=[de2][2de] translation direction -n, --noescapechars dont use escapes for highlighting -f, --force don't use the query cache -u, --user=username user for proxy authentication -p, --passwd=password cleartext passphrase for proxy authentication -h, --help display this help and exit -d, --debug enable debugging output -v, --version output version information and exit is the string you are asking to be translated. It will be requested from STDIN if not specified on the commandline. Supported s are: en english es spanish fr french ru russian pt portuguese pl polish ch chinese You can specify only the country code, or append de2 in order to force translation to german, or preprend de2 in order to translate to the other language. Valid examples: ru to or from russian de2pl to polish es2de spanish to german Report bugs to or on https://github.com/TLINDEN/leo/issues. ); exit 1; } 1; =head1 NAME leo - commandline interface to http://dict.leo.org/. =head1 SYNOPSIS leo [-slmcfuphdv] [] =head1 DESCRIPTION B is a commandline interface to the german/english/french dictionary on http://dict.leo.org/. It supports almost all features which the website supports, plus more. Results will be printed to the terminal. By default the searched key word will be highlighted (which can be turned off, see below). To get faster results, B is able to cache queries if you repeatedly use the same query. B acts as a standard webbrowser as your mozilla or what so ever does, it connects to the website, exectues the query, parses the HTML result and finally prints it somewhat nicely formatted to the terminal. As of this writing B acts as: Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 =head1 OPTIONS =over =item I<-s --spelltolerance> Allow spelling errors. Possible values: B, B or B. Default setting: B. =item I<-m --morphology> Provide morphology information. Possible values: B, B or B. Default setting: B. =item I<-c --chartolerance> Allow umlaut alternatives. Possible values: B, B or B. Default: B. =item I<-l --language> Translation direction. Please note that dict.leo.org always translates either to or from german. The following languages are supported: english, polish, spanish, portuguese russian and chinese. You can specify only the country code, or append B in order to force translation to german, or preprend B in order to translate to the other language. Valid examples: ru to or from russian de2pl to polish es2de spanish to german Valid country codes: en english es spanish fr french ru russian pt portuguese pl polish ch chinese Default: B. =item I<-n --noescapechars> Don't use escapes for highlighting. Default: do highlighting. Controllable via config file too. See below. No highlighting will be used if STDOUT is not connected to a terminal. =item I<-f --force> Don't use the query cache. Default: use the cache. This option has no effect if B is turned off in the config file. =item I<-u --user> Specify the http proxy user to use if your proxy requires authentication. Read the 'PROXY' section for more details. =item I<-p --passwd> Specify the cleartext password to use with http proxy authentication. This is not recommended and just implemented for completeness. =item I<-h --help> Display this help and exit. =item I<-v --version> Display version information and exit. =item I<-d --debug> Enable debugging output (a lot of it, beware!), which will be printed to STDERR. If you find a bug you must supply the debugging output along with your bugreport. =back B is the key word which you want to translate. If the term contains white spaces quote it using double quotes. If the B parameter is not specified, B will read it from STDIN. =head1 CONFIG B reads a config file B<.leo> in your home directory if it exists. The following variables are supported: =over =item I Turns on conversion of UTF8 characters to their latin* encoding. Default setting (if not given): B. =item I Controls the use of the cache (see later). Possible values: B or B. Default setting(if not given): B. If the commandline option B<-f> or B<--force> has been set then the cache will not be used for the query and if for this query exists an entry in the cache it will be removed from it. =item I Controls the use of escape sequences in the terminal output to highlight the key-waord in the result. Possible values: B or B. Default setting(if not given): B. You can set this option via commandline too: B<-n> or B<--noescapechars>. The config option has higher precedence. =item I You may modify the user agent as B identifies itself on the target site. The default is: User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11) =back =head1 CACHING B supports caching of queries for faster results if you repeatedly use the same query. A query consists of the given B (the key word or string) plus the translation option settings. If you, for example, execute once the following query: % leo langnase and somewhere later: % leo -c exact then B will treat the latter query as a different one than the previous one, because I behaves different when different translation options are given. =head1 PROXY B can be used with a HTTP proxy service. For this to work, you only have to set the environment variable B. It has the following format: PROTO://[USER:PASSWD@]SERVER[:PORT] The only supported protocol is B. If your proxy works without authentication, you can omit the B part. If no port is specified, B<80> will be used. Here is an example (for bash): export http_proxy=http://172.16.120.120:3128 and an example with authentication credentials: export http_proxy=http://max:34dwe2@172.16.120.120:3128 As security is always important, I have to warn you, that other users on the same machine can read your environment using the 'ps -e ..' command, so this is not recommended. The most secure way for proxy authentication is just to specify the server+port with B but no credentials, and instead use the B<-u> commandline parameter to specify a user (do not use B<-p> to specify the password, this will also be readyble in process listing). In this case, B will ask you interactively for the password. It will try its best to hide it from being displayed when you type it (as most such routines in other tools do it as well), it this fails (e.g. because you do not have the 'stty' tool installed), the password will be read from STDIN. =head1 FILES ~/.leo the config file for leo. Not required. ~/.leo-CACHE.db* the cache file. =head1 AUTHOR Thomas v.D. =head1 BUGS B depends on http://dict.leo.org/. It may break B if they change something on the site. Therefore be so kind and inform me if you encounter some weird behavior of B. In most cases it is not a bug of B itself, it is a website change on http://dict.leo.org/. In such a case repeat the failed query and use the commandline flag B<-d> (which enables debugging) and send the full output to me, thanks. =head1 COPYRIGHT B copyleft 2000-2017 Thomas v.D.. All rights reserved. http://dict.leo.org/ copyright (c) 1995-2017 LEO Dictionary Team. The search results returned by B are based on the work of the people at LEO.org. Thanks for the great work. Some time ago they told me that they are disagreed with B, or in other words: from their point of view B seems to break copyright law in some or another way. I thought a long time about this, but I must deny this. B acts as a simple web client, just like mozilla, IE or even lynx are doing. They are providing the service to the public so I use my favorite web browser to make use of it. In fact my favorite browser to view dict.leo.org is B. There is nothing wrong with that. IMHO. If you disagree or are asked by the LEO team to stop using B you may decide this for yourself. I in my case wrote kinda browser, what is not prohibited. At least not today. =head1 VERSION This is the manpage for B version B<2.01>. =cut WWW-Dict-Leo-Org-2.02/Makefile.PL000644 001751 001751 00000000743 13160646722 016171 0ustar00scipscip000000 000000 # # made for WWW::Dict::Leo::Org 2.01 and up use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'WWW::Dict::Leo::Org', 'VERSION_FROM' => 'Org.pm', 'EXE_FILES' => [ 'leo' ], 'clean' => { FILES => '*~' }, 'EXCLUDE_EXT' => [ qw(README) ], 'PREREQ_PM' => { 'Carp::Heavy' => 0, 'IO::Socket::SSL' => 0, 'MIME::Base64' => 0, 'XML::Simple' => 0 } ); WWW-Dict-Leo-Org-2.02/MANIFEST000644 001751 001751 00000000365 13160647124 015345 0ustar00scipscip000000 000000 Changelog Makefile.PL README leo t/run.t samples/singular.pl Org.pm MANIFEST META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) WWW-Dict-Leo-Org-2.02/Changelog000644 001751 001751 00000015311 13160647010 016015 0ustar00scipscip000000 000000 2.02: Fixed rt.cpan.org#123087: add IO::Socket::SSL as dependency. 2.01: dict.leo.org now forces SSL via Cloudflare, we follow suit. 2.00: Fixed rt.cpan.org#119714 rt.cpan.org#120563 and https://github.com/TLINDEN/leo/pull/1: We're now implementing the XML interface, since the HTML interface is no longer available. Many thanks to Roland Hieber for the help! Fixed DB_File loading, now more portable. 1.45: fixed rt.cpan.org#118472. 1.44: Fixed error handling, added sample code for module usage. 1.43: Fix POD. 1.42: Better tabluar output (better column calc). However, russian grapheme length is still wrong. 1.41: Generalized lang parsing and passing to dict.leo.org, which also adds support for new languages like ru, ch or pl. Thanks to J.A.Eichler. 1.40: not logged, sorry. 1.39: fixed rt.cpan.org#91464: disable caching if DB_File is not found. 1.38: fixed rt.cpan.org#92944, missed translations. The problem was, that the pda.leo.org uses a differnt number of tables depending on the translation. So, now we just fetch all tables (2-4) and ignore those which are not translations (forum posts or empty cells). Applied patch rt.cpan.org#92914 (POD locale encoding). 1.37: fixed rt.cpan.org#92679, the site url and table structure changed. 1.36: applied patch rt.cpan.org#35543, which fixes handling of utf8 terminals. fixed rt.cpan.org#84196 using the suggested changes. applied patch rt.cpan.org#86641, spelling fixes. 1.35: Switched to use pda.leo.org, which is easier to parse, faster to load and seems not change that often. 1.34: Oh, if a search only returns one section, nothing have been returned. This was a bug which has been fixed. 1.33: Replaced handcrafted html parser by HTML::TableParser module. It was over seven years old, so time has come. The new parsing should now be much more stable and catch most, if not all, tinkering on dict.leo.org. The organization of the array of hashes returned by ::translate() has changed. Take a look at the example in the pod or view the supplied 'leo' script how to use it. 1.32: Fixed strange packaging bug. Fixed yet another parsing bug (there are now titles formatted different than other titles, damn). Changed default translation direction to 0, so that dict.leo.org determines automatically the direction to use. 1.31: Fixed dict.leo.org siteupdates (back to stoneage: they added some invalid html again). 1.30: Fixed bug in leo script, it did not load WWW::Dict::Leo::Org but just Org, which I did during development, but which doesn't work after installation. Updated the version in leo script to 1.30 too. 1.29: Added changelog entry for 1.28 (actually I forgot it) Added documentation for the methods of the module (forgotten too) 1.28: Transformed the script into a installable perl module. 'leo' itself still exists but uses now this module for the actual translation job. The perl module WWW::Dict::Leo::Org can be used from perl scipts and applications for translation. 1.27: fixed site updates on dict.leo.org - hey, they finally fixed some faulty html which in fact caused parse errors in the script. changed the default user agent to a some more recent one. 1.26: bugfix - removed gzip accept header. 1.25: fixed latest site update, they added javascript popup-links. Thanks to Sylvia for the hint. 1.24: bugfix: last patch didn't work with proxy. 1.23: fixed latest site update (lang must be part of the cgi url, for whatever reason). Thanks to Tobi Werth for the patch! 1.22: added more translation options (-l): - en (used so far as the default), leo.org: en<->de - fr, leo.org: fr<->de - fr2de - de2fr if no string is given as input, stdin will be read (one line) instead; if stdout is not connected to a tty, no escape chars will be used, e.g.: echo "attention" | leo -l fr | grep acht This allows leo to be used for scripting or something, you may imagine yourself the possibilities. added proxy authentication support. added a section about proxy to the man-page. 1.21: added -d flag to get debugging informations. fixed parser bug, which occured under some rare circumstances. 1.20: applied patch by almaric, which reflects latest site changes. the user agent can now be configured in the config file. 1.19: once more, they changed the site design (and - btw - it contains still HTML failures!). 1.18: reflection of DICT site changes. 1.17: there's a "more Examples..." link on the bottom of doct.leo.org if the query returned too many examples. leo did not properly respond to this, which is fixed now. But: leo does *not* fetch the pending examples at the moment! 1.16: reflection of DICT site changes. new html parser code, seems to be more stable against site changes, I think. 1.15: lost 1.14: added cache feature. added manpage, install Makefile and README file. 1.13: reflection of DICT site changes. revision 1.12 date: 2002/07/22 20:03:17; author: scip; state: Exp; lines: +131 -15 added some commandline options to reflect some new dict.leo.org features ---------------------------- revision 1.11 date: 2002/07/18 19:11:58; author: scip; state: Exp; lines: +3 -3 applied patch by Thomas Glanzmann , which fixes "no result" parsing ---------------------------- revision 1.10 date: 2002/07/17 20:56:52; author: scip; state: Exp; lines: +11 -9 updated 4 new leo (july/2002) ---------------------------- revision 1.9 date: 2001/09/21 17:19:31; author: scip; state: Exp; lines: +31 -10 added proxy support to leo ---------------------------- revision 1.8 date: 2001/06/21 23:42:17; author: scip; state: Exp; lines: +5 -5 committed ---------------------------- revision 1.7 date: 2001/06/05 21:16:24; author: scip; state: Exp; lines: +19 -6 fixed: returns now an error, if leo did not find a match added: we are now advertising us as Konqueror :-) ---------------------------- revision 1.6 date: 2001/05/26 01:48:36; author: scip; state: Exp; lines: +6 -2 added copyright message ---------------------------- revision 1.5 date: 2001/05/24 21:17:34; author: scip; state: Exp; lines: +19 -7 removed call to lynx, use IO::Socket from now on! ---------------------------- revision 1.4 date: 2001/05/24 01:39:25; author: scip; state: Exp; lines: +2 -2 added alphabetical sorting ---------------------------- revision 1.3 date: 2001/05/24 01:33:22; author: scip; state: Exp; lines: +2 -2 changed bold to blue ---------------------------- revision 1.2 date: 2001/05/24 01:15:33; author: scip; state: Exp; lines: +1 -1 initial submit ---------------------------- revision 1.1 date: 2001/05/24 01:14:32; author: scip; state: Exp; branches: 1.1.1; Initial revision ---------------------------- revision 1.1.1.1 date: 2001/05/24 01:14:32; author: scip; state: Exp; lines: +0 -0 scripts entered into cvs WWW-Dict-Leo-Org-2.02/samples/000755 001751 001751 00000000000 13160647124 015654 5ustar00scipscip000000 000000 WWW-Dict-Leo-Org-2.02/t/000755 001751 001751 00000000000 13160647124 014453 5ustar00scipscip000000 000000 WWW-Dict-Leo-Org-2.02/README000644 001751 001751 00000002030 13157727367 015101 0ustar00scipscip000000 000000 INTRODUCTION WWW::Dict::Leo::Org - Interface module to http://dict.leo.org/ leo - commandline interface to WWW::Dict::Leo::Org. INSTALLATION perl Makefile.PL make make test make install DOCUMENTATION man leo perldoc WWW::Dict::Leo::Org HISTORY WWW::Dict::Leo::Org exists as the script "leo" since 1999, which is used widely on earth. Begining with version 1.28 I extracted the most significant code into an extra perlmodule (WWW::Dict::Leo::Org), so that the translation feature can be used from perl too. However, the commandline script "leo" still exists, it will be installed together with the module. Please note, that the script from 1.28 on is no more usable in standalone form, in fact it uses the module to do the job now. COPYRIGHT WWW::Dict::Leo::Org + leo Copyright (c) 2007-2017 by Thomas v.D. http://dict.leo.org/ Copyright (c) 1995-2017 LEO Dictionary Team. The search results returned by leo are based on the work of the people at LEO.org. Thanks for the great work. WWW-Dict-Leo-Org-2.02/Org.pm000644 001751 001751 00000027372 13160647030 015304 0ustar00scipscip000000 000000 # # Copyleft (l) 2000-2017 Thomas v.D. . # # leo may be # used and distributed under the terms of the GNU General Public License. # All other brand and product names are trademarks, registered trademarks # or service marks of their respective holders. package WWW::Dict::Leo::Org; $WWW::Dict::Leo::Org::VERSION = "2.02"; use strict; use warnings; use English '-no_match_vars'; use Carp::Heavy; use Carp; use IO::Socket::SSL; use MIME::Base64; use XML::Simple; use Encode; sub debug; sub new { my ($class, %param) = @_; my $type = ref( $class ) || $class; my %settings = ( "-Host" => "dict.leo.org", "-Port" => 443, "-UserAgent" => "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0", "-Proxy" => "", "-ProxyUser" => "", "-ProxyPass" => "", "-Debug" => 0, "-Language" => "en", # en2de, de2fr, fr2de, de2es, es2de "data" => {}, # the results "section" => [], "title" => "", "segments" => [], "Maxsize" => 0, "Linecount" => 0, ); foreach my $key (keys %param) { $settings{$key} = $param{$key}; # override defaults } my $self = \%settings; bless $self, $type; return $self; } sub translate { my($this, $term) = @_; if (! $term) { croak "No term to translate given!"; } my $linecount = 0; my $maxsize = 0; my @match = (); # # form var transitions for searchLoc(=translation direction) and lp(=language) my %lang = ( speak => "ende" ); my @langs = qw(en es ru pt fr pl ch it); if ($this->{"-Language"}) { # en | fr | ru2en | de2pl etc # de2, 2de, de are not part of lang spec if (! grep { $this->{"-Language"} =~ /$_/ } @langs) { croak "Unsupported language: " . $this->{"-Language"}; } my $spec = $this->{"-Language"}; my $l; if ($spec =~ /(..)2de/) { $l = $1; $this->{"-Language"} = -1; $lang{speak} = "${l}de"; } elsif ($spec =~ /de2(..)/) { $l = $1; $this->{"-Language"} = 1; $lang{speak} = "${l}de"; } else { $lang{speak} = $this->{"-Language"} . 'de'; $this->{"-Language"} = 0; } } # add language my @form; push @form, "lp=$lang{speak}"; # # process whitespaces # my $query = $term; $query =~ s/\s\s*/ /g; $query =~ s/\s/\+/g; push @form, "search=$query"; # # make the query cgi'ish # my $form = join "&", @form; # store for result caching $this->{Form} = $form; # # check for proxy settings and use it if exists # otherwise use direct connection # my ($url, $site); my $ip = $this->{"-Host"}; my $port = $this->{"-Port"}; my $proxy_user = $this->{"-ProxyUser"}; my $proxy_pass = $this->{"-ProxyPass"}; if ($this->{"-Proxy"}) { my $proxy = $this->{"-Proxy"}; $proxy =~ s/^http:\/\///i; if ($proxy =~ /^(.+):(.+)\@(.*)$/) { # proxy user account $proxy_user = $1; $proxy_pass = $2; $proxy = $3; $this->debug( "proxy_user: $proxy_user"); } my($host, $pport) = split /:/, $proxy; if ($pport) { $url = "http://$ip:$port/dictQuery/m-vocab/$lang{speak}/query.xml"; $port = $pport; } else { $port = 80; } $ip = $host; $this->debug( "connecting to proxy:", $ip, $port); } else { $this->debug( "connecting to site:", $ip, "port", $port); $url = "/dictQuery/m-vocab/$lang{speak}/query.xml"; } my $conn = new IO::Socket::SSL( #Proto => "tcp", PeerAddr => $ip, PeerPort => $port, SSL_verify_mode => SSL_VERIFY_NONE ) or die "Unable to connect to $ip:$port: $!\n"; $conn->autoflush(1); $this->debug( "GET $url?$form HTTP/1.0"); print $conn "GET $url?$form HTTP/1.0\r\n"; # be nice, simulate Konqueror. print $conn qq($this->{"-UserAgent"} Host: $this->{"-Host"}:$this->{"-Port"} Accept: text/*;q=1.0, image/png;q=1.0, image/jpeg;q=1.0, image/gif;q=1.0, image/*;q=0.8, */*;q=0.5 Accept-Charset: iso-8859-1;q=1.0, *;q=0.9, utf-8;q=0.8 Accept-Language: en_US, en\r\n); if ($this->{"-Proxy"} and $proxy_user) { # authenticate # construct the auth header my $coded = encode_base64("$proxy_user:$proxy_pass"); $this->debug( "Proxy-Authorization: Basic $coded"); print $conn "Proxy-Authorization: Basic $coded\r\n"; } # finish the request print $conn "\r\n"; # # parse dict.leo.org output # $site = ""; my $got_headers = 0; while (<$conn>) { if ($got_headers) { $site .= $_; } elsif (/^\r?$/) { $got_headers = 1; } elsif ($_ !~ /HTTP\/1\.(0|1) 200 OK/i) { if (/HTTP\/1\.(0|1) (\d+) /i) { # got HTTP error my $err = $2; if ($err == 407) { croak "proxy auth required or access denied!\n"; close $conn; return (); } else { croak "got HTTP error $err!\n"; close $conn; return (); } } } } close $conn or die "Connection failed: $!\n"; $this->debug( "connection: done"); $this->{Linecount} = 0; $this->{Maxsize} = 0; # parse the XML my $xml = new XML::Simple; my $data = $xml->XMLin($site, ForceArray => [ 'section', 'entry' ], ForceContent => 1, KeyAttr => { side => 'lang' } ); my (@matches, $from_lang, $to_lang); $from_lang = substr $lang{speak}, 0, 2; $to_lang = substr $lang{speak}, 2, 2; foreach my $section (@{$data->{sectionlist}->{section}}) { my @entries; foreach my $entry (@{$section->{entry}}) { my $left = $this->parse_word($entry->{side}->{$from_lang}->{words}->{word}); my $right = $this->parse_word($entry->{side}->{$to_lang}->{words}->{word}); push @entries, { left => $left, right => $right }; if ($this->{Maxsize} < length($left)) { $this->{Maxsize} = length($left); } $this->{Linecount}++; } push @matches, { title => encode('UTF-8', $section->{sctTitle}), data => \@entries }; } return @matches; } # parse all the s and build a string sub parse_word { my ($this, $word) = @_; if (ref $word eq "HASH") { if ($word->{content}) { return encode('UTF-8', $word->{content}); } elsif ($word->{cc}) { # chinese simplified, traditional and pinyin return encode('UTF-8', $word->{cc}->{cs}->{content} . "[" . $word->{cc}->{ct}->{content} . "] " . $word->{cc}->{pa}->{content}); } } elsif (ref $word eq "ARRAY") { # FIXME: include alternatives, if any return encode('UTF-8', @{$word}[-1]->{content}); } else { return encode('UTF-8', $word); } } sub grapheme_length { my($this, $str) = @_; my $count = 0; while ($str =~ /\X/g) { $count++ }; return $count; } sub maxsize { my($this) = @_; return $this->{Maxsize}; } sub lines { my($this) = @_; return $this->{Linecount}; } sub form { my($this) = @_; return $this->{Form}; } sub debug { my($this, @msg) = @_; if ($this->{"-Debug"}) { print STDERR "%DEBUG: " . join(" ", @msg) . "\n"; } } 1; =encoding ISO8859-1 =head1 NAME WWW::Dict::Leo::Org - Interface module to dictionary dict.leo.org =head1 SYNOPSIS use WWW::Dict::Leo::Org; my $leo = new WWW::Dict::Leo::Org(); my @matches = $leo->translate($term); =head1 DESCRIPTION B is a module which connects to the website B and translates the given term. It returns an array of hashes. Each hash contains a left side and a right side of the result entry. =head1 OPTIONS B has several parameters, which can be supplied as a hash. All parameters are optional. =over =item I<-Host> The hostname of the dict website to use. For the moment only dict.leo.org is supported, which is also the default - therefore changing the hostname would not make much sense. =item I<-Port> The tcp port to use for connecting, the default is 80, you shouldn't change it. =item I<-UserAgent> The user-agent to send to dict.leo.org site. Currently this is the default: Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 =item I<-Proxy> Fully qualified proxy server. Specify as you would do in the well known environment variable B, example: -Proxy => "http://192.168.1.1:3128" =item I<-ProxyUser> I<-ProxyPass> If your proxy requires authentication, use these parameters to specify the credentials. =item I<-Debug> If enabled (set to 1), prints a lot of debug information to stderr, normally only required for developers or to report bugs (see below). =back Parameters to control behavior of dict.leo.org: =over =item I<-Language> Translation direction. Please note that dict.leo.org always translates either to or from german. The following languages are supported: english, polish, spanish, portuguese russian and chinese. You can specify only the country code, or append B in order to force translation to german, or preprend B in order to translate to the other language. Valid examples: ru to or from russian de2pl to polish es2de spanish to german Valid country codes: en english es spanish fr french ru russian pt portuguese pl polish ch chinese Default: B. =back =head1 METHODS =head2 translate($term) Use this method after initialization to connect to dict.leo.org and translate the given term. It returns an array of hashes containing the actual results. use WWW::Dict::Leo::Org; use Data::Dumper; my $leo = new WWW::Dict::Leo::Org(); my @matches = $leo->translate("test"); print Dumper(\@matches); which prints: $VAR1 = [ { 'data' => [ { 'left' => 'check', 'right' => 'der Test' }, { 'left' => 'quiz (Amer.)', 'right' => 'der Test    [Schule]' ], 'title' => 'Unmittelbare Treffer' }, { 'data' => [ { 'left' => 'to fail a test', 'right' => 'einen Test nicht bestehen' }, { 'left' => 'to test', 'right' => 'Tests macheneinen Test machen' } ], 'title' => 'Verben und Verbzusammensetzungen' }, 'data' => [ { 'left' => 'testing  adj.', 'right' => 'im Test' } ], 'title' => 'Wendungen und Ausdrücke' } ]; You might take a look at the B script how to process this data. =head2 maxsize() Returns the size of the largest returned term (left side). =head2 lines() Returns the number of translation results. =head2 form() Returns the submitted form uri. =head1 SEE ALSO L =head1 COPYRIGHT WWW::Dict::Leo::Org - Copyright (c) 2007-2017 by Thomas v.D. L - Copyright (c) 1995-2016 LEO Dictionary Team. =head1 AUTHOR Thomas v.D. =head1 HOW TO REPORT BUGS Use L to report bugs, select the queue for B. Please don't forget to add debugging output! =head1 VERSION 2.01 =cut WWW-Dict-Leo-Org-2.02/META.json000644 001751 001751 00000001654 13160647124 015637 0ustar00scipscip000000 000000 { "abstract" : "unknown", "author" : [ "unknown" ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.130880", "license" : [ "unknown" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "WWW-Dict-Leo-Org", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp::Heavy" : "0", "IO::Socket::SSL" : "0", "MIME::Base64" : "0", "XML::Simple" : "0" } } }, "release_status" : "stable", "version" : "2.02" } WWW-Dict-Leo-Org-2.02/META.yml000644 001751 001751 00000000766 13160647124 015472 0ustar00scipscip000000 000000 --- abstract: unknown author: - unknown build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.130880' license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: WWW-Dict-Leo-Org no_index: directory: - t - inc requires: Carp::Heavy: 0 IO::Socket::SSL: 0 MIME::Base64: 0 XML::Simple: 0 version: 2.02 WWW-Dict-Leo-Org-2.02/t/run.t000644 001751 001751 00000000473 12652154625 015454 0ustar00scipscip000000 000000 # -*-perl-*- # testscript for WWW::Dict::Leo::Org Class by Thomas v.D. use Test::More qw(no_plan); BEGIN { use_ok "WWW::Dict::Leo::Org" }; require_ok("WWW::Dict::Leo::Org"); # unfortunately I cannot add more tests, because # this would require internet connectivity which # is not the case for all cpan testers. WWW-Dict-Leo-Org-2.02/samples/singular.pl000755 001751 001751 00000001653 12776406703 020055 0ustar00scipscip000000 000000 #!/usr/bin/perl use WWW::Dict::Leo::Org; # configure access to dict.leo.org my $leo = new WWW::Dict::Leo::Org( -UserAgent => 'IE 19', #-Proxy => 'http://127.0.0.1:3128', #-ProxyUser => 'me', #-ProxyPass => 'pw', -Debug => 0, -SpellTolerance => 'on', -Morphology => 'standard', -CharTolerance => 'relaxed', -Language => 'de2ru' ); # fetch matches my @matches = $leo->translate(shift || die "Usage: $0 \n"); # print the first, if any if (@matches && $leo->lines() >= 1) { printf "%s\n", $matches[0]->{data}->[0]->{left}; } else { print "fail\n"; }