HTTP-Proxy-0.300000755001750001750 012130250124 12447 5ustar00bookbook000000000000HTTP-Proxy-0.300/Build.PL000444001750001750 130212130250124 14074 0ustar00bookbook000000000000use strict; use warnings; use Module::Build; my $builder = Module::Build->new( module_name => 'HTTP::Proxy', license => 'perl', dist_author => 'Philippe "BooK" Bruhat ', dist_version_from => 'lib/HTTP/Proxy.pm', configure_requires => { 'Module::Build' => 0.38, }, test_requires => { 'Test::More' => 0, }, requires => { 'HTTP::Daemon' => 1.25, 'LWP::UserAgent' => 2, }, add_to_cleanup => [ 'HTTP-Proxy-*' ], meta_merge => { resources => { repository => 'http://github.com/book/HTTP-Proxy', }, }, ); $builder->create_build_script(); HTTP-Proxy-0.300/Makefile.PL000444001750001750 75612130250124 14546 0ustar00bookbook000000000000use ExtUtils::MakeMaker; WriteMakefile( NAME => 'HTTP::Proxy', VERSION_FROM => 'lib/HTTP/Proxy.pm', PREREQ_PM => { 'Test::More' => 0, 'HTTP::Daemon' => 1.25, 'LWP::UserAgent' => 2, }, PL_FILES => {}, ABSTRACT_FROM => 'lib/HTTP/Proxy.pm', AUTHOR => 'Philippe "BooK" Bruhat ', META_MERGE => { resources => { repository => 'http://github.com/book/HTTP-Proxy', }, }, ); HTTP-Proxy-0.300/MANIFEST000444001750001750 277012130250124 13743 0ustar00bookbook000000000000Build.PL Changes eg/adblock.pl eg/anonymiser.pl eg/ayb.pl eg/bork.pl eg/dragon.pl eg/flv.pl eg/fudd.pl eg/https.pl eg/javascript.pl eg/js.pl eg/leet.pl eg/logger.pl eg/outline.pl eg/pdf.pl eg/perlmonks.pl eg/post.pl eg/proxy-auth.pl eg/proxy.pl eg/rainbow.pl eg/README eg/rfc.pl eg/rot13.pl eg/switch.pl eg/tracker.pl eg/trim.pl eg/yahoogroups.pl lib/HTTP/Proxy.pm lib/HTTP/Proxy/BodyFilter.pm lib/HTTP/Proxy/BodyFilter/complete.pm lib/HTTP/Proxy/BodyFilter/htmlparser.pm lib/HTTP/Proxy/BodyFilter/htmltext.pm lib/HTTP/Proxy/BodyFilter/lines.pm lib/HTTP/Proxy/BodyFilter/save.pm lib/HTTP/Proxy/BodyFilter/simple.pm lib/HTTP/Proxy/BodyFilter/tags.pm lib/HTTP/Proxy/Engine.pm lib/HTTP/Proxy/Engine/Legacy.pm lib/HTTP/Proxy/Engine/NoFork.pm lib/HTTP/Proxy/Engine/ScoreBoard.pm lib/HTTP/Proxy/Engine/Threaded.pm lib/HTTP/Proxy/FilterStack.pm lib/HTTP/Proxy/HeaderFilter.pm lib/HTTP/Proxy/HeaderFilter/simple.pm lib/HTTP/Proxy/HeaderFilter/standard.pm Makefile.PL MANIFEST This list of files META.json META.yml README t/00basic.t t/01pod.t t/02pod-coverage.t t/05new.t t/10init.t t/11log.t t/15accessors.t t/15deprecated.t t/16stash.t t/17fstack.t t/18engine.t t/20dummy.t t/20keepalive.t t/22http.t t/22transparent.t t/23connect.t t/23https.t t/40push_filters.t t/41filters.t t/42will_modify.t t/50hopbyhop.t t/50standard.t t/50via.t t/51simple.t t/51simple2.t t/61simple.t t/61simple2.t t/64htmltext.t t/64lines.t t/64tags.t t/66htmlparser.t t/67complete.t t/67save.t t/71rot13.t t/90httpstatus.t t/README t/test.html t/Utils.pm HTTP-Proxy-0.300/Changes000444001750001750 4710012130250124 14121 0ustar00bookbook000000000000Revision history for Perl extension HTTP::Proxy 0.300 Sun Apr 7 2013 [IMPROVEMENTS] - Downgrade a disconnect message from ERROR to SOCKET debug level in order to reduce the amount of runtime log output. Disconnects are /very/ common in HTTP, and shoudn't be considered errors. (thanks to Salve J. Nilsen) [TEST] - include t/90httpstatus.t in the MANIFEST - fix t/67save.t to not try to create files with a "?" in their name under MSWin32 0.29 Tue Mar 19 2013 [TEST] - fix t/50hopbyhop.t to open the proxy on a random port, and avoid failures when port 8080 is "already in use" 0.28 Thu Mar 14 2013 [FIXES] - the Accept-Encoding header removal code was broken in the previous version. Now the header will be removed as soon as a body filter is configured for the proxy. [TEST] - use File::Spec in the test suite to compute portable file names, to avoid some test failures, like http://www.cpantesters.org/cpan/report/856ca676-6bf5-1014-bfa1-9d8aa3912248 0.27 Fri Mar 8 2013 [IMPROVEMENTS] - in HTTP::Proxy::HeaderFilter::standard, now remove the Accept-Encoding header only when we know we'll actually look at the response body [TESTS] - use httpstat.us to test HTTP statuses 0.26 Wed Feb 6 2013 [IMPROVEMENTS] - remove a "Use of "goto" to jump into a construct is deprecated" warning (Tom Hukins) [DOCUMENTATION] - fix RT #77685 (Tom Hukins) - improved the number of links to other modules from the documentation [TEST] - fix RT #71771 (Tom Hukins) - fix test failures in POD tests (Tom Hukins) 0.25 Sun Jul 3 00:28:10 CEST 2011 [ENHANCEMENTS] - new Engine: HTTP::Proxy::Engine::Threaded, by Angelos Karageorgiou [FIXES] - Correctly call eod() when the response has no body (closed RT ticket #48310) 0.24 Tue Jul 21 21:28:02 CEST 2009 [ENHANCEMENTS] - When a short-circuit response was send, the next response would not be filtered at all. This has been fixed. [FIXES] - yet another fix for t/23connect, proposed by Marek Rouchal (closed RT ticket #38995) [test skipped for now] - HTTP::Headers::Util's split_header_words() returns lower case tokens/keys since October 6, 2008. Fix by Maurice Aubrey. (closed RT tickets #43249, #43622) 0.23 Thu Sep 4 02:29:47 CEST 2008 [ENHANCEMENTS] - HTTP::Proxy::BodyFilter::save had an issue with cygwin because of an incorrect use of File::Spec's catdir(). This is fixed. - CONNECT requests are now forwarded to the upstream proxy, if there is one. Errors from the upstream proxy are relayed to the client. [TESTS] - t/23connect.t does not use sysread() anymore. This time the test should pass about everywhere. 0.22 Thu May 1 00:18:38 CEST 2008 [TESTS] - increased test coverage - t/23connect.t doesn't need an Internet connection any more, thus closing RT ticket #19653. - t/67complete.t tests HTTP::Proxy::BodyFilter::complete [DOCUMENTATION] - closed RT ticket #33465 (Jimbo), by explaining in a little more detail how HTTP::Proxy::BodyFilter::complete works. 0.21 Sun Apr 20 04:34:47 CEST 2008 [ENHANCEMENTS] - HTTP::Proxy::Engine::Legacy and HTTP::Proxy::Engine::ScoreBoard log the number of remaining child processes (in addition to their pids), thanks to Amos Shapira. [FIXES] - HTTP::Proxy::BodyFilter::save had a bug that prevented the 'filename' parameter to be correctly used to compute the filename to save to, and that made the proxy die the second time the filter was called. This fix allowed to close RT tickets #14548 (Max Maischein), #18644 (Mark Tilford) and #33018 (Roland Stigge and Gunnar Wolf). - HTTP::Proxy::BodyFilter::save had many other bugs, which the test suite allowed to spot and fix. [TESTS] - t/67save.t provides 96% coverage of HTTP::Proxy::BodyFilter::save, and helped fix many bugs in it. - fixed t/22http.t and t/22transparent.t not to break when the DNS wrongly resolves an invalid address. [DOCUMENTATION] - closed RT ticket #25295 (Matsuno Tokuhiro) with a doc patch. 0.20 Fri Aug 18 10:25:11 CEST 2006 [ENHANCEMENTS] - Added a will_modify() method to HTTP::Proxy::BodyFilter, that lets the proxy know if a filter may modify the content length, thus closing RT ticket #21051 (Chris Dolan) - If no filter in the current stack will modify the content length, then the header is not removed [FIXES] - closed RT tickets #3184 and #20251 (chunked encoding was enforced while transfering data between a client and server using different versions of HTTP, causing unwanted garbage to appear in the data) - removed useless "ERROR: Getting request failed:" messages when there are simply "No more requests from this connection" [INTERNALS] - Removed the HTTP::Proxy::FilterStack class from inside HTTP::Proxy and put it in its own module file - renamed HTTP::Proxy::FilterStack::active() as will_modify() for consistency reasons [TESTS] - updated t/22http.t and t/23connect.t following Ken Williams' recommandations in RT ticket #19986 [DOCUMENTATION] - patched a small inconsistency in HTTP::Proxy::BodyFilter's documentation (and closed RT ticket #20303) - fully documented HTTP::Proxy::FilterStack 0.19 Fri Apr 28 19:55:41 CEST 2006 [ENHANCEMENTS] - HTTP::Proxy::HeaderFilter::simple now lets one define an end() method as well [FIXES] - HTTP::Proxy::(Body|Header)Filter::simple now provide a default filter() that does nothing if their constructor is not given one (thanks to Merijn Brand) - close RT ticket #14548 by correcting the 'filename' check in HTTP::Proxy::BodyFilter::save (Max Maischein) - ERROR messages are always logged (Mark Tilford) [TESTS] - more tests for log() and logmask() in t/11log.t 0.18 Sun Mar 19 23:49:38 CET 2006 [ENHANCEMENTS] - the new known_methods() method can return all method names know to HTTP::Proxy (can be helpful with the method parameter of push_filter()) [FIXES] - close RT ticket #14898 by using a per-parent lockfile for HTTP::Proxy::Engine::ScoreBoard (Chris Dolan) - close RT ticket #18243 by adding missing DeltaV methods (Stephen Steneker) [EXAMPLES] - eg/perlmonks.pl - redirect perlmonks.com to perlmonks.org 0.17 Wed Sep 28 23:25:17 CEST 2005 [ENHANCEMENTS] - Thanks to Randal Schwartz, a new HTTP::Proxy::Engine::ScoreBoard engine is available. I've benchmarked a twofold speed increase. This engine is still beta, you must enable it by hand. 0.16 Thu Sep 1 19:13:55 CEST 2005 [ENHANCEMENTS] - the new HTTP::Proxy::Engine class and its subclasses now handle the life and death of child processes - the Content-Length header is now removed only if body filters will be applied on the response body - HTTP::Proxy now supports some Apache-like attributes (start_servers, max_clients, max_requests_per_child, min_spare_servers, max_spare_servers, keep_alive, max_keep_alive_requests, keep_alive_timeout) - added support for ALL WebDAV/DeltaV methods - the query argument to push_filter(), added in 0.14, should now work (thanks to Simon Cozens for spotting the problem) - the proxy now has a stash, which is a hash where filters can store data (possibly to share it). (Requested by Mark Fowler) Warning: since the proxy forks for each TCP connection, the data is only shared between filters in the same child process. [DEPRECATION] - the maxchild, maxconn and maxserve accessors are now deprecated. They will disappear in the future: + maxchild has no replacement (should be handled by the engine) + maxconn becomes max_connections + maxserve becomes max_keep_alive_requests - Information regarding the way the engine should behave must passed in the constructor or directly to the engine [NEW METHODS] - $proxy->engine() return the HTTP::Proxy::Engine instance - $proxy->new_connection() increase the TCP connections counter (should only be used by HTTP::Proxy::Engine object) [FIXES] - Makefile.PL was not playing nice with Build.PL in the previous distributions. This has been fixed. Sorry for the inconvenience - no more annoying "getsockname() on closed socket GEN0" warnings (they appeared in 0.14) [Win32 SUPPORT] - Win32 is now supported! badly supported, but supported... - until someone writes a decent engine for Win32, the default Win32 engine will be HTTP::Proxy::Engine::NoFork, which can only handle a single TCP connection at a time [EXAMPLES] - eg/yahoogroups.pl - removes ad interruptions from Yahoo! Groups - eg/https.pl - peek/poke at encrypted web pages - eg/logger.pl - improved the logger script 0.15 Tue Apr 5 21:17:40 CEST 2005 [ENHANCEMENTS] - added support for WebDAV methods (requested by Christian Laursen) - The filter selection is based on the original request and response, as it should - improved kwalitee [DEPRECATION] - the start() method is no longer supported in HTTP::Proxy::BodyFilter subclasses. Use begin() instead. [EXAMPLES] - eg/pdf.pl - save \.pdf files and send a HTML confirmation instead (idea by Emmanuel Di Prétoro) 0.14 Tue Mar 29 11:40:51 CEST 2005 [ANNOUNCE] - It's been more than a year since last release, which is bad. I now plan to release new versions more often, maybe about once a month, or when there are big changes. [DEPRECATION] - the start() method of HTTP::Proxy::Bodyfilter subclasses is renamed begin(), since it now has an end() counterpart. (On a related note, I improve my English. Be sure to check out http://www.landgren.net/perl/lt-2004.html for details) - start() in filters is therefore declared deprecated, an error message is logged. The start() method will not be called any more as from 0.15. - the FILTER constant is now named FILTERS. FILTER will disappear in 0.15 as well. [ENHANCEMENTS] - subclasses of HTTP::Proxy::BodyFilter can now have a finalisation method, named end() - the start^Wbegin() method of HTTP::Proxy::BodyFilter subclasses now receive the message as an argument - new built-in filter: HTTP::Proxy::BodyFilter::save that can save the message body to a file while browsing - new built-in filter: HTTP::Proxy::BodyFilter::complete that stores the message body in memory and passes it on to the next filters only when it's complete - logs have cleaner prefixes and the pid is always shown - should work under 5.005_03 (Thanks to Mathieu Arnold) - transparent proxying support (mostly to please Martin Zdila and Paul Makepeace) - push_filter() should now accept the query parameter [FIXES] - the FILTER constant is now named FILTERS [BUGS] - the proxy does not work under Win32, except if you force maxchild to 0 (no forking at all). - t/20dummy.t (and a few otrhers) hanged under Win32 + Ken Hirsch proposed a patch for HTTP::Daemon (and a workaround for HTTP::Proxy's daemon object) + Bruno De Fraine tracked down the problem to the fork() emulation by Windows threads under Win32 that lead to a deadlock. => both explanations cover the same problem, which I haven't been able to correct yet - all the tests that fork a proxy and a server are therefore skipped under Win32. This is ugly, and will change in the future. [EXAMPLES] - eg/rfc.pl - save rfc\d+\.txt files as we browse them - eg/js.pl - save \.js files as we browse them - eg/dragon.pl - enhance the Dragon Go Server web site - eg/fudd.pl - make the web tawk wike Ewmer J. Fudd - eg/switch.pl - switch proxies as you browse - Changed all the examples so that they can take HTTP::Proxy::new() parameters on the command-line (so, call them with logmask 3, for example) [DOCUMENTATION] - documentation for the filter initialisation methods - removed all references to the so-called "store-and-forward" mechanism (see HTTP::Proxy::BodyFilter::complete) 0.13 Wed Mar 3 17:36:31 CET 2004 [ENHANCEMENTS] - CONNECT support (but only transparently...) - the client_headers() method (similar to hop_headers()) give the filters access to the proxy's LWP::UA Client-* headers - filters are applied on all supported methods by default [FIXES] - removed everything regarding control() and control_regex(), which were not used and confusing [TESTS] - tests for CONNECT support - tests for SSL support (not working yet) [EXAMPLES] - eg/adblock.pl - a very simple adblocker - eg/trim.pl - trims whitespace from HTML pages - eg/javascript.pl - add any text right after [DOCUMENTATION] - separate COPYRIGHT and LICENSE sections in all man pages 0.12 Thu Jan 22 23:54:03 CET 2004 [ENHANCEMENTS] - send the error message to the client when the Proxy agent dies (usually because of a filter error) - the proxy now sends a X-Forwarded-For header by default (and the proxy method x_forwarded_for can toggle this) - the proxy method client_socket() gives access to the socket connected to the current client (the example in Changes for 0.10 was wrong: one can get the IP address of the connected agent from inside a filter with $self->proxy->client_socket->peerhost) [FIXES] - do not block simultaneous connections when not forking - clean up the filter chain after the body-request filters - ensure the filter stack is reinitialised between requests [TESTS] - tests for X-Forwarded-For - test the proxy against http://diveintomark.org/tests/client/http/ [EXAMPLES] - eg/post.pl - outputs the URI and parameters of all POST requests - eg/logger.pl - outputs details of GET and POST requests 0.11 Fri Jan 2 17:02:08 CET 2004 [ENHANCEMENTS] - setting maxchild to 0 prevents forking (Jim Cromie) - filters can now match on the query string - hop-by-hop headers and Max-Forwards headers are correctly supported - new mutators added to HTTP::Proxy: via, hop_headers, request, response - filters can now answer in place of the server, which allows for authorisation filters, cache (?) filters, etc. - new examples scripts: proxy-auth.pl [FIXES] - push_filter() now correctly supports several match criteria [TESTS] - all the Via: header tests are now in t/50via.t - t/50standard.t now checks headers for several request types - new tests: + t/51simple2.t - check response header filters with an actual proxy + t/61simple2.t - check response body filters with an actual proxy [INTERNALS] - new method _send_response_headers 0.10 Wed Nov 19 01:36:59 CET 2003 *** MAJOR INTERFACE CHANGES *** - new base classes HTTP::Proxy::HeaderFilter and HTTP::Proxy::BodyFilter - some useful built-in filter classes: HTTP::Proxy::BodyFilter::htmlparser HTTP::Proxy::BodyFilter::htmltext HTTP::Proxy::BodyFilter::lines HTTP::Proxy::BodyFilter::simple HTTP::Proxy::BodyFilter::tags HTTP::Proxy::HeaderFilter::simple HTTP::Proxy::HeaderFilter::standard - tests for the internal class HTTP::Proxy::FilterStack - tests for the built-in filters - the examples are up-to-date with the new interface - new/enhanced accessors: + the proxy host() attribute becomes actually useful: by default, the proxy is only usable by local user-agents (the socket is bound to localhost) + the filters proxy() accessor gives access to the proxy itself. For example, one can get the IP address of the agent connected to the proxy from inside a filter ($self->proxy->daemon->peerhost) - many documentation changes This version is NOT compatible with the previous ones regarding the way filters work. *** MAJOR INTERFACE CHANGES *** 0.09 Fri Aug 15 21:12:17 CEST 2003 - maxserve is now correctly handled - corrected a bug in the t/20keepalive.t test file that made the tests fail on some machines 0.08 Thu Mar 13 01:41:42 CET 2003 - cleaned up support for filters - added support for "buffering" filters and a new HTTP::Proxy::FilterStack class - added an anonymiser script (eg/anonymiser.pl) - the tests won't break if a local proxy is configured - the interfaces are very likely to change soon 0.07 Tue Feb 18 22:30:43 CET 2003 - the proxy now supports persistent connexions (Yay!) - and tests to check for it - and a new timeout accessor 0.06 Mon Feb 17 00:21:37 CET 2003 - better forking system and better reaping of zombies (thanks to David Landgren and Stéphane Payrard) Still won't work under Windows, though :-( - replaced verbose() by logmask(), so as to fine-tune the logging system - put some of the test functions in a test module (t::Utils) 0.05 Tue Feb 4 00:47:23 CET 2003 - explicitly refuse CONNECT - better support for TRACE method - support the Via: Header (a MUST in RFC 2616) - filters, but this needs more work 0.04 Sat Nov 30 12:19:22 CET 2002 - accept connection from other hosts - better ftp support (no test yet) - basic gopher support (no test yet) - better HTTP error handling - use CRLF in HTTP headers 0.03 Fri Nov 29 11:17:36 CET 2002 - url() method gives a url to reach the proxy - new 'control' attribute defines the control URL - better subprocess management by preforking child processes (thanks to Eric Cholet) - a children handles only one request at a time, for better performance (this means we only do HTTP/1.0 for now) - correctly handle the Proxy-Connection and Connection headers 0.02 Thu Oct 24 23:45:08 CEST 2002 - the system now forks to handle several connections - but needs better test suites 0.01 Tue Oct 1 11:54:07 CEST 2002 - original version HTTP-Proxy-0.300/META.yml000444001750001750 420612130250124 14057 0ustar00bookbook000000000000--- abstract: 'A pure Perl HTTP proxy' author: - "Philippe \"BooK\" Bruhat " build_requires: {} configure_requires: Module::Build: 0.38 dynamic_config: 1 generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.120921' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: HTTP-Proxy provides: HTTP::Proxy: file: lib/HTTP/Proxy.pm version: 0.300 HTTP::Proxy::BodyFilter: file: lib/HTTP/Proxy/BodyFilter.pm version: 0 HTTP::Proxy::BodyFilter::complete: file: lib/HTTP/Proxy/BodyFilter/complete.pm version: 0 HTTP::Proxy::BodyFilter::htmlparser: file: lib/HTTP/Proxy/BodyFilter/htmlparser.pm version: 0 HTTP::Proxy::BodyFilter::htmltext: file: lib/HTTP/Proxy/BodyFilter/htmltext.pm version: 0 HTTP::Proxy::BodyFilter::lines: file: lib/HTTP/Proxy/BodyFilter/lines.pm version: 0 HTTP::Proxy::BodyFilter::save: file: lib/HTTP/Proxy/BodyFilter/save.pm version: 0 HTTP::Proxy::BodyFilter::simple: file: lib/HTTP/Proxy/BodyFilter/simple.pm version: 0 HTTP::Proxy::BodyFilter::tags: file: lib/HTTP/Proxy/BodyFilter/tags.pm version: 0 HTTP::Proxy::Engine: file: lib/HTTP/Proxy/Engine.pm version: 0 HTTP::Proxy::Engine::Legacy: file: lib/HTTP/Proxy/Engine/Legacy.pm version: 0 HTTP::Proxy::Engine::NoFork: file: lib/HTTP/Proxy/Engine/NoFork.pm version: 0 HTTP::Proxy::Engine::ScoreBoard: file: lib/HTTP/Proxy/Engine/ScoreBoard.pm version: 0 HTTP::Proxy::Engine::Threaded: file: lib/HTTP/Proxy/Engine/Threaded.pm version: 0 HTTP::Proxy::FilterStack: file: lib/HTTP/Proxy/FilterStack.pm version: 0 HTTP::Proxy::HeaderFilter: file: lib/HTTP/Proxy/HeaderFilter.pm version: 0 HTTP::Proxy::HeaderFilter::simple: file: lib/HTTP/Proxy/HeaderFilter/simple.pm version: 0 HTTP::Proxy::HeaderFilter::standard: file: lib/HTTP/Proxy/HeaderFilter/standard.pm version: 0 requires: HTTP::Daemon: 1.25 LWP::UserAgent: 2 resources: license: http://dev.perl.org/licenses/ repository: http://github.com/book/HTTP-Proxy version: 0.300 HTTP-Proxy-0.300/README000444001750001750 164112130250124 13466 0ustar00bookbook000000000000HTTP::Proxy ----------- This module is a pure Perl HTTP proxy. Its main use should be to record and/or modify web sessions, so as to help users create web robots, web testing suites, as well as proxy systems than can transparently alter the requests to and answers from an origin server. The eg/ directory holds a few examples. See eg/README for details. There is also a t/README file that explains the tests strategy. !WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING! The way the filters are implemented has changed in version 0.10 of HTTP::Proxy. You can now play with two dedicated filter classes and notice slight changes in the HTTP::Proxy interface. !WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING! Recommended order for reading the documentation: 1) HTTP::Proxy 2) HTTP::Proxy::HeaderFilter and HTTP::Proxy::BodyFilter 3) included standard filter classes and code examples in eg/ HTTP-Proxy-0.300/META.json000444001750001750 623012130250124 14226 0ustar00bookbook000000000000{ "abstract" : "A pure Perl HTTP proxy", "author" : [ "Philippe \"BooK\" Bruhat " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.120921", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "HTTP-Proxy", "prereqs" : { "configure" : { "requires" : { "Module::Build" : "0.38" } }, "runtime" : { "requires" : { "HTTP::Daemon" : "1.25", "LWP::UserAgent" : "2" } } }, "provides" : { "HTTP::Proxy" : { "file" : "lib/HTTP/Proxy.pm", "version" : "0.300" }, "HTTP::Proxy::BodyFilter" : { "file" : "lib/HTTP/Proxy/BodyFilter.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::complete" : { "file" : "lib/HTTP/Proxy/BodyFilter/complete.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::htmlparser" : { "file" : "lib/HTTP/Proxy/BodyFilter/htmlparser.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::htmltext" : { "file" : "lib/HTTP/Proxy/BodyFilter/htmltext.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::lines" : { "file" : "lib/HTTP/Proxy/BodyFilter/lines.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::save" : { "file" : "lib/HTTP/Proxy/BodyFilter/save.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::simple" : { "file" : "lib/HTTP/Proxy/BodyFilter/simple.pm", "version" : 0 }, "HTTP::Proxy::BodyFilter::tags" : { "file" : "lib/HTTP/Proxy/BodyFilter/tags.pm", "version" : 0 }, "HTTP::Proxy::Engine" : { "file" : "lib/HTTP/Proxy/Engine.pm", "version" : 0 }, "HTTP::Proxy::Engine::Legacy" : { "file" : "lib/HTTP/Proxy/Engine/Legacy.pm", "version" : 0 }, "HTTP::Proxy::Engine::NoFork" : { "file" : "lib/HTTP/Proxy/Engine/NoFork.pm", "version" : 0 }, "HTTP::Proxy::Engine::ScoreBoard" : { "file" : "lib/HTTP/Proxy/Engine/ScoreBoard.pm", "version" : 0 }, "HTTP::Proxy::Engine::Threaded" : { "file" : "lib/HTTP/Proxy/Engine/Threaded.pm", "version" : 0 }, "HTTP::Proxy::FilterStack" : { "file" : "lib/HTTP/Proxy/FilterStack.pm", "version" : 0 }, "HTTP::Proxy::HeaderFilter" : { "file" : "lib/HTTP/Proxy/HeaderFilter.pm", "version" : 0 }, "HTTP::Proxy::HeaderFilter::simple" : { "file" : "lib/HTTP/Proxy/HeaderFilter/simple.pm", "version" : 0 }, "HTTP::Proxy::HeaderFilter::standard" : { "file" : "lib/HTTP/Proxy/HeaderFilter/standard.pm", "version" : 0 } }, "release_status" : "stable", "resources" : { "license" : [ "http://dev.perl.org/licenses/" ], "repository" : { "url" : "http://github.com/book/HTTP-Proxy" } }, "version" : "0.300" } HTTP-Proxy-0.300/eg000755001750001750 012130250124 13042 5ustar00bookbook000000000000HTTP-Proxy-0.300/eg/yahoogroups.pl000555001750001750 304512130250124 16120 0ustar00bookbook000000000000#!/usr/bin/perl -w use strict; use HTTP::Proxy qw( :log ); use HTTP::Proxy::HeaderFilter::simple; use CGI::Util qw( unescape ); my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( host => 'groups.yahoo.com', response => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; my $location; # ads start by redirecting to 'interrupt' return unless ( $location = $headers->header('Location') ) && $location =~ m!/interrupt\?!; # fetch the ad page (we need the cookie) # use a new request to avoid modifying the original one $self->proxy->log( FILTERS, "YAHOOGROUPS", "Ad interrupt detected: fetching $location" ); my $r = $self->proxy->agent->simple_request( HTTP::Request->new( GET => $location, $message->request->headers # headers are cloned ) ); # redirect to our original destination # which was stored in the 'done' parameter # and pass the cookie along $location = unescape($location); $location =~ s|^(http://[^/]*).*done=([^&]*).*$|$1$2|; $headers->header( Location => $location ); $headers->header( Set_Cookie => $r->header('Set_Cookie') ); $self->proxy->log( FILTERS, "YAHOOGROUPS", "Set-Cookie: " . $r->header('Set_Cookie') ); } ) ); $proxy->start; HTTP-Proxy-0.300/eg/anonymiser.pl000555001750001750 106112130250124 15721 0ustar00bookbook000000000000#!/usr/bin/perl -w # yeah, I know, I write UK English ;-) use HTTP::Proxy qw( :log ); use HTTP::Proxy::HeaderFilter::simple; use strict; # a very simple proxy my $proxy = HTTP::Proxy->new( @ARGV ); # the anonymising filter $proxy->push_filter( mime => undef, request => HTTP::Proxy::HeaderFilter::simple->new( sub { $_[1]->remove_header(qw( User-Agent From Referer Cookie Cookie2 )) } ), response => HTTP::Proxy::HeaderFilter::simple->new( sub { $_[1]->remove_header(qw( Set-Cookie Set-Cookie2 )) } ) ); $proxy->start; HTTP-Proxy-0.300/eg/tracker.pl000555001750001750 162712130250124 15200 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy; use HTTP::Proxy::HeaderFilter::simple; use Fcntl ':flock'; use strict; # this is a tracker proxy that stores Referer, URL, CODE # and output them to STDOUT or the given file # # Example output: # # NULL http://www.perl.org/ 200 # http://www.perl.org/ http://learn.perl.org/ 200 # my $file = shift || '-'; open OUT, ">> $file" or die "Can't open $file: $!"; my $proxy = HTTP::Proxy->new( @ARGV ); # pass the args you want $proxy->push_filter( response => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; flock( OUT, LOCK_EX ); print OUT join( " ", $message->request->headers->header( 'Referer' ) || 'NULL', $message->request->uri, $message->code ), $/; flock( OUT, LOCK_UN ); } ) ); $proxy->start; END { close OUT; } HTTP-Proxy-0.300/eg/leet.pl000555001750001750 175612130250124 14501 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::tags; use HTTP::Proxy::BodyFilter::htmltext; use strict; # a very simple proxy my $proxy = HTTP::Proxy->new( @ARGV ); my %leet = ( a => [qw( 4 /-\ @ )], b => ['|3'], c => [qw! c ( < [ !], e => [qw( e 3 )], g => [qw( g 6 )], h => [qw! h |-| )-( !], k => [qw( k |< ]{ )], i => ['i', '!'], l => [ 'l', "1", "|" ], m => [ 'm', "|V|", "|\\/|" ], n => ["|\\|"], o => ['o', "0"], s => [ "5", "Z" ], t => [ "7", "+" ], u => [qw( u \_/ )], v => [qw( v \/ )], w => [qw( vv `// )], 'y' => ['j', '`/'], z => ["2"], ); # but a complicated filter $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::tags->new, response => HTTP::Proxy::BodyFilter::htmltext->new( sub { s/([a-zA-Z])/$leet{lc $1}[rand @{$leet{lc $1}}]||$1/ge; } ) ); $proxy->start; HTTP-Proxy-0.300/eg/logger.pl000555001750001750 600612130250124 15020 0ustar00bookbook000000000000#!/usr/bin/perl -w use strict; use HTTP::Proxy; use HTTP::Proxy::HeaderFilter::simple; use HTTP::Proxy::BodyFilter::simple; use CGI::Util qw( unescape ); # get the command-line parameters my %args = ( peek => [], header => [], mime => 'text/*', ); { my $args = '(' . join( '|', keys %args ) . ')'; for ( my $i = 0 ; $i < @ARGV ; $i += 2 ) { if ( $ARGV[$i] =~ /$args/o ) { if ( ref $args{$1} ) { push @{ $args{$1} }, $ARGV[ $i + 1 ]; } else { $args{$1} = $ARGV[ $i + 1 ]; } splice( @ARGV, $i, 2 ); redo if $i < @ARGV; } } } # the headers we want to see my @srv_hdr = ( qw( Content-Type Set-Cookie Set-Cookie2 WWW-Authenticate Location ), @{ $args{header} } ); my @clt_hdr = ( qw( Cookie Cookie2 Referer Referrer Authorization ), @{ $args{header} } ); # NOTE: Body request filters always receive the request body in one pass my $post_filter = HTTP::Proxy::BodyFilter::simple->new( begin => sub { $_[0]->{binary} = 0; }, filter => sub { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; print STDOUT "\n", $message->method, " ", $message->uri, "\n"; print_headers( $message, @clt_hdr ); if ( $self->{binary} || $$dataref =~ /\0/ ) { $self->{binary} = 1; print STDOUT " (not printing binary data)\n"; return; } # this is from CGI.pm, method parse_params() my (@pairs) = split( /[&;]/, $$dataref ); for (@pairs) { my ( $param, $value ) = split( '=', $_, 2 ); $param = unescape($param); $value = unescape($value); printf STDOUT " %-20s => %s\n", $param, $value; } } ); my $get_filter = HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; my $req = $message->request; if ( $req->method ne 'POST' ) { print STDOUT "\n", $req->method, " ", $req->uri, "\n"; print_headers( $req, @clt_hdr ); } print STDOUT $message->status_line, "\n"; print_headers( $message, @srv_hdr ); } ); sub print_headers { my $message = shift; for my $h (@_) { if ( $message->header($h) ) { print STDOUT " $h: $_\n" for ( $message->header($h) ); } } } # create and start the proxy my $proxy = HTTP::Proxy->new(@ARGV); # if we want to look at SOME sites if (@{$args{peek}}) { for (@{$args{peek}}) { $proxy->push_filter( host => $_, method => 'POST', request => $post_filter ); $proxy->push_filter( host => $_, response => $get_filter, mime => $args{mime}, ); } } # otherwise, peek at all sites else { $proxy->push_filter( method => 'POST', request => $post_filter ); $proxy->push_filter( response => $get_filter, mime => $args{mime} ); } $proxy->start; HTTP-Proxy-0.300/eg/adblock.pl000555001750001750 201412130250124 15133 0ustar00bookbook000000000000#!/usr/bin/perl -w use strict; use HTTP::Proxy qw( :log ); use HTTP::Proxy::HeaderFilter::simple; use vars qw( $re ); # this is a very simple ad blocker # a full-fledged ad blocker should be a module # this dot is *not* a web bug ;-) my $no = HTTP::Response->new( 200 ); $no->content_type('text/plain'); $no->content('.'); my $filter = HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; $self->proxy->response( $no ) if $message->uri->host =~ /$re/o; } ); my $proxy = HTTP::Proxy->new( @ARGV ); $proxy->push_filter( request => $filter ); $proxy->start; # a short and basic list BEGIN { $re = join '|', map { quotemeta } qw( ads.wanadooregie.com cybermonitor.com doubleclick.com adfu.blockstackers.com bannerswap.com click2net.com clickxchange.com dimeclicks.com fastclick.net mediacharger.com mediaplex.com myaffiliateprogram.com netads.hotwired.com valueclick.com ); } HTTP-Proxy-0.300/eg/js.pl000555001750001750 55112130250124 14134 0ustar00bookbook000000000000use HTTP::Proxy; use HTTP::Proxy::BodyFilter::save; my $proxy = HTTP::Proxy->new(@ARGV); # save javascript files as we browse them $proxy->push_filter( path => qr!/.js$!, response => HTTP::Proxy::BodyFilter::save->new( template => '%f', prefix => 'javascript', multiple => 0, keep_old => 1, ) ); $proxy->start; HTTP-Proxy-0.300/eg/trim.pl000555001750001750 105012130250124 14506 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::lines; use HTTP::Proxy::BodyFilter::simple; use strict; my $proxy = HTTP::Proxy->new(@ARGV); # a simple proxy that trims whitespace in HTML $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::lines->new(), response => HTTP::Proxy::BodyFilter::simple->new( sub { my ($self, $dataref ) = @_; $$dataref =~ s/^\s+//m; # multi-line data $$dataref =~ s/\s+$//m; } ) ); $proxy->start; HTTP-Proxy-0.300/eg/bork.pl000555001750001750 425612130250124 14503 0ustar00bookbook000000000000#!/usr/bin/perl -w # script kindly offered by glb (Eric Cassagnard) use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::tags; use HTTP::Proxy::BodyFilter::simple; use HTTP::Proxy::BodyFilter::htmltext; use strict; my $proxy = HTTP::Proxy->new(@ARGV); my %noaccent = ( Agrave => 'A', Aacute => 'A', Acirc => 'A', Atilde => 'A', Auml => 'A', Aring => 'A', AElig => 'AE', Ccedil => 'C', Egrave => 'E', Eacute => 'E', Ecirc => 'E', Euml => 'E', Igrave => 'I', Iacute => 'I', Icirc => 'I', Iuml => 'I', Ntilde => 'N', Ograve => 'O', Oacute => 'O', Ocirc => 'O', Otile => 'O', Ouml => 'O', Oslash => 'O', Ugrave => 'U', Uacute => 'U', Ucirc => 'U', Uuml => 'U', Yacute => 'Y', agrave => 'a', aacute => 'a', acirc => 'a', atilde => 'a', auml => 'a', aring => 'a', aelig => 'ae', ccedil => 'c', egrave => 'e', eacute => 'e', ecirc => 'e', euml => 'e', igrave => 'i', iacute => 'i', icirc => 'i', iuml => 'i', ntilde => 'n', ograve => 'o', oacute => 'o', ocirc => 'o', otile => 'o', ouml => 'o', oslash => 'o', ugrave => 'u', uacute => 'u', ucirc => 'u', uuml => 'u', yacute => 'y', 'yuml' => 'y', 'Æ' => 'AE', 'æ' => 'ae', ); my $re = join '|', sort keys %noaccent; my %sounds = ( an => 'un', An => 'Un', au => 'oo', Au => 'Oo', a => 'e', A => 'E', ew => 'oo', e => 'e-a', e => 'i', E => 'I', f => 'ff', ir => 'ur', ow => 'oo', o => 'oo', O => 'Oo', o => 'u', the => 'zee', The => 'Zee', th => 't', tion => 'shun', u => 'oo', U => 'Oo', v => 'f', V => 'F', w => 'v', W => 'V' ); my $sc = join '|', sort keys %sounds; $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::tags->new, response => HTTP::Proxy::BodyFilter::simple->new( sub { ${ $_[ 1 ] } =~ s/&($re);/$noaccent{$1}/go; } ), response => HTTP::Proxy::BodyFilter::htmltext->new( sub { tr{ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ} {AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy}; s/($sc)/$sounds{$1}/go; s/([?!]+)/$1 Bork bork bork !/go ; s/(\.+)(\s|$)/$1 Bork bork bork ! /go ; } ) ); $proxy->start; HTTP-Proxy-0.300/eg/https.pl000555001750001750 502312130250124 14701 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy; use HTTP::Proxy::HeaderFilter::simple; use HTTP::Proxy::BodyFilter::htmlparser; use HTTP::Proxy::BodyFilter::htmltext; use HTML::Parser; use strict; # where to find URI in tag attributes # (it actually a little more complicated, since some tags can have # several attributes that require an URI) my %links = ( a => 'href', area => 'href', base => 'href', link => 'href', frame => 'src', iframe => 'src', img => 'src', input => 'src', script => 'src', form => 'action', body => 'background', ); my $re_tags = join '|', sort keys %links; my $hrefparser = HTML::Parser->new( api_version => 3 ); # turn all https:// links to http://this_is_ssl links $hrefparser->handler( start => sub { my ( $self, $tag, $attr, $attrseq, $text ) = @_; if ( $tag =~ /^($re_tags)$/o && exists $attr->{$links{$1}} && substr( $attr->{$links{$1}}, 0, 8 ) eq "https://" ) { $attr->{$links{$1}} =~ s!^https://!http://this_is_ssl.!; $text = "<$tag " . join( ' ', map { qq($_="$attr->{$_}") } @$attrseq ) . ">"; } $self->{output} .= $text; }, "self,tagname,attr,attrseq,text" ); # by default copy everything $hrefparser->handler( default => sub { my ( $self, $text ) = @_; $self->{output} .= $text; }, "self,text" ); # the proxy itself my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::htmlparser->new( $hrefparser, rw => 1 ), ); # detect https requests $proxy->push_filter( request => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; # find out the actual https site my $uri = $message->uri; if ( $uri =~ m!^http://this_is_ssl\.! ) { $uri->scheme("https"); my $host = $uri->host; $host =~ s!^this_is_ssl\.!!; $uri->host($host); } } ), response => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; # modify Location: headers in the response my $location = $headers->header( 'Location' ); if( $location =~ m!^https://! ) { $location =~ s!^https://!http://this_is_ssl.!; $headers->header( Location => $location ); } } ), ); $proxy->start; HTTP-Proxy-0.300/eg/javascript.pl000555001750001750 164512130250124 15713 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy; use HTML::Parser; use HTTP::Proxy::BodyFilter::htmlparser; # define the filter (the most difficult part) # filters not using HTML::Parser are much simpler :-) my $parser = HTML::Parser->new( api_version => 3 ); $parser->handler( start => sub { my ( $self, $tag, $text ) = @_; $self->{output} .= $text; $self->{output} .= "YOUR JAVASCRIPT HERE" if $tag eq 'body'; }, "self,tagname,text" ); $parser->handler( default => sub { my ($self, $text) = @_; $self->{output} .= $text; }, "self,text" ); # this is a read-write filter (rw => 1) # that is the reason why we had to copy everything into $self->{output} my $filter = HTTP::Proxy::BodyFilter::htmlparser->new( $parser, rw => 1 ); # create and launch the proxy my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( response => $filter, mime => 'text/html' ); $proxy->start(); HTTP-Proxy-0.300/eg/flv.pl000555001750001750 211212130250124 14322 0ustar00bookbook000000000000#!/usr/bin/env perl use strict; use warnings; use HTTP::Proxy; use HTTP::Proxy::BodyFilter::save; use Digest::MD5 qw( md5_hex); use POSIX qw( strftime ); my $proxy = HTTP::Proxy->new(@ARGV); # a filter to save FLV files somewhere my $flv_filter = HTTP::Proxy::BodyFilter::save->new( filename => sub { my ($message) = @_; my $uri = $message->request->uri; # get the id, or fallback to some md5 hash my ($id) = ( $uri->query || '' ) =~ /id=([^&;]+)/i; $id = md5_hex($uri) unless $id; # compute the filename (including the base site name) my ($host) = $uri->host =~ /([^.]+\.[^.]+)$/; my $file = strftime "flv/%Y-%m-%d/${host}_$id.flv", localtime; # ignore it if we already have it return if -e $file && -s $file == $message->content_length; # otherwise, save return $file; }, ); # push the filter for all MIME types we want to catch for my $mime (qw( video/flv video/x-flv )) { $proxy->push_filter( mime => $mime, response => $flv_filter, ); } $proxy->start; HTTP-Proxy-0.300/eg/ayb.pl000555001750001750 353112130250124 14314 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::htmlparser; use HTML::Parser; use strict; # the proxy my $proxy = HTTP::Proxy->new( @ARGV ); # all your base... my @ayb = grep { !/^$/ } split/$/m, << 'AYB'; In A.D. 2101 War was beginning. What happen ? Somebody set up us the bomb. We get signal. What ! Main screen turn on. It's You !! How are you gentlemen !! All your base are belong to us. You are on the way to destruction. What you say !! You have no chance to survive make your time. HA HA HA HA .... Take off every 'zig' !! You know what you doing. Move 'zig'. For great justice. AYB # the AYB parser # replaces heading content with the AYB text my $parser = HTML::Parser->new( api_version => 3 ); $parser->handler( start_document => sub { my $self = shift; $self->{ayb} = 0; $self->{i} = int rand @ayb; }, "self" ); $parser->handler( start => sub { my ( $self, $tag, $attr, $text ) = @_; $self->{ayb} = 1 if $tag =~ /^h\d/; if( $tag eq 'img' ) { $attr->{src} = 'http://home.uchicago.edu/~obmontoy/cats.jpg'; $text = "<$tag " . join(' ', map { qq($_="$attr->{$_}") } keys %$attr ) . ">"; } $self->{output} .= $text; }, "self,tagname,attr,text" ); $parser->handler( end => sub { my ( $self, $tag, $text ) = @_; if( $tag =~ /^h\d/ ) { $self->{ayb} = 0; } $self->{output} .= $text; }, "self,tagname,text" ); $parser->handler( default => sub { my ( $self, $text ) = @_; $self->{output} .= $self->{ayb} ? $ayb[($self->{i} += 1 ) %= @ayb] : $text; }, "self,text" ); $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::htmlparser->new( $parser, rw => 1 ), ); $proxy->start; HTTP-Proxy-0.300/eg/switch.pl000555001750001750 70012130250124 15015 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy; use HTTP::Proxy::HeaderFilter::simple; # call this proxy as # eg/switch.pl proxy http://proxy1:port/,http://proxy2:port/ my %args = @ARGV; my @proxy = split/,/, $args{proxy}; my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( request => HTTP::Proxy::HeaderFilter::simple->new( sub { shift->proxy->agent->proxy( http => $proxy[ rand @proxy ] ); } ) ); $proxy->start; HTTP-Proxy-0.300/eg/dragon.pl000555001750001750 410212130250124 15006 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy; use HTTP::Proxy::HeaderFilter::simple; use HTTP::Proxy::BodyFilter::simple; use HTTP::Proxy::BodyFilter::complete; use MIME::Base64; use Fcntl ':flock'; use strict; # the proxy my $proxy = HTTP::Proxy->new( @ARGV ); # the status page: # - auto-refresh (quickly at first, then more slowly) # - count the number of games and modify the title my $seen_title; $proxy->push_filter( host => 'www.dragongoserver.net', path => '^/status.php', # auto-refresh response => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $response ) = @_; ($response->request->uri->query || '') =~ /reload=(\d+)/; my $n = ($1 || 0) + 1; my $delay = $n < 5 ? 30 : $n < 15 ? 60 : $n < 25 ? 300 : 3600; $headers->push_header( Refresh => "$delay;url=" . $response->request->uri->path . "?reload=$n" ); } ), # count games response => HTTP::Proxy::BodyFilter::complete->new(), response => HTTP::Proxy::BodyFilter::simple->new( filter => sub { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; next if ! $$dataref; # count the games and change the title my $n = 0; $n++ while $$dataref =~ /game\.php\?gid=\d+/g; my $s = $n > 1 ? "s" : ""; $n ||= "No"; $$dataref =~ s!.*?!$n go game$s pending!s; }, ), ); # the game page: # - remove the Message: textarea # - add a link to make it appear when needed $proxy->push_filter( host => 'www.dragongoserver.net', path => '^/game.php', response => HTTP::Proxy::BodyFilter::complete->new(), response => HTTP::Proxy::BodyFilter::simple->new( sub { my $msg = '&msg=yes'; my $uri = $_[2]->request->uri; if( $uri =~ s/$msg//o ) { $msg = ''; } else { ${$_[1]} =~ s|()||; } ${$_[1]} =~ s|(Message:)|$1|; } ) ); $proxy->start; HTTP-Proxy-0.300/eg/rainbow.pl000555001750001750 415512130250124 15205 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::tags; use HTTP::Proxy::BodyFilter::simple; use HTTP::Proxy::BodyFilter::htmltext; use strict; my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::tags->new, # protect tags response => HTTP::Proxy::BodyFilter::simple->new( # rainbow entities sub { ${ $_[1] } =~ s/(&[#\w]+;)/rainbow($1)/eg; } ), response => HTTP::Proxy::BodyFilter::htmltext->new( # rainbow text sub { s/(\S)/rainbow($1)/eg; } ) ); sub rainbow { return sprintf qq{%s}, next_color(), shift; } # the following code courtesy David 'grinder' Landgren # but adapted for our needs use constant PI_2 => 3.14159265359 * 2; my @PRIMES = qw/11 13 17 19 23 29 31 37 41 43 47 53 59/; my $red = rand() * PI_2; my $green = rand() * PI_2; my $blue = rand() * PI_2; my $rdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ]; my $gdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ]; my $bdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ]; my ( $rp, $gp, $bp ) = ( sin $red, sin $green, sin $blue ); my ( $rq, $gq, $bq ) = qw/ 0 0 0/; my ( $rr, $gr, $br ) = qw/ 0 0 0/; $proxy->start; sub next_color { my $rs = sin( $red += $rdelta ); my $rc = $rs * 120 + 120; my $gs = sin( $green += $gdelta ); my $gc = $gs * 120 + 120; my $bs = sin( $blue += $bdelta ); my $bc = $bs * 120 + 120; $rq = $rp <=> $rs; $gq = $gp <=> $gs; $bq = $bp <=> $bs; $rp = $rs; $gp = $gs; $bp = $bs; $rdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ] if ( $rr == 1 and $rq < 1 and $rs < 1 ); $gdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ] if ( $gr == 1 and $gq < 1 and $gs < 1 ); $bdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ] if ( $br == 1 and $bq < 1 and $bs < 1 ); $rr = $rq; $gr = $gq; $br = $bq; $rc = ( $rc < 0 ) ? 0 : ( $rc > 255 ) ? 255 : $rc; $gc = ( $gc < 0 ) ? 0 : ( $gc > 255 ) ? 255 : $gc; $bc = ( $bc < 0 ) ? 0 : ( $bc > 255 ) ? 255 : $bc; return sprintf( "#%02x%02x%02x", $rc, $gc, $bc ); } HTTP-Proxy-0.300/eg/perlmonks.pl000555001750001750 174512130250124 15560 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::HeaderFilter::simple; use strict; # a very simple proxy my $proxy = HTTP::Proxy->new(@ARGV); # this filter redirects all requests to perlmonks.org my $filter = HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $message ) = @_; # modify the host part of the request $self->proxy()->log( ERROR, "FOO", $message->uri() ); $message->uri()->host('perlmonks.org'); # create a new redirect response my $res = HTTP::Response->new( 301, 'Moved to perlmonks.org', [ Location => $message->uri() ] ); # and make the proxy send it back to the client $self->proxy()->response($res); } ); # put this filter on perlmonks.com and www.perlmonks.org $proxy->push_filter( host => 'perlmonks.com', request => $filter ); $proxy->push_filter( host => 'www.perlmonks.org', request => $filter ); $proxy->start(); HTTP-Proxy-0.300/eg/proxy.pl000555001750001750 20612130250124 14676 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use strict; # a very simple proxy my $proxy = HTTP::Proxy->new(@ARGV); $proxy->start; HTTP-Proxy-0.300/eg/rot13.pl000555001750001750 337712130250124 14521 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::tags; use HTTP::Proxy::BodyFilter::simple; use HTTP::Proxy::BodyFilter::htmltext; use strict; my $proxy = HTTP::Proxy->new(@ARGV); my %noaccent = ( Agrave => 'A', Aacute => 'A', Acirc => 'A', Atilde => 'A', Auml => 'A', Aring => 'A', AElig => 'AE', Ccedil => 'C', Egrave => 'E', Eacute => 'E', Ecirc => 'E', Euml => 'E', Igrave => 'I', Iacute => 'I', Icirc => 'I', Iuml => 'I', Ntilde => 'N', Ograve => 'O', Oacute => 'O', Ocirc => 'O', Otile => 'O', Ouml => 'O', Oslash => 'O', Ugrave => 'U', Uacute => 'U', Ucirc => 'U', Uuml => 'U', Yacute => 'Y', agrave => 'a', aacute => 'a', acirc => 'a', atilde => 'a', auml => 'a', aring => 'a', aelig => 'ae', ccedil => 'c', egrave => 'e', eacute => 'e', ecirc => 'e', euml => 'e', igrave => 'i', iacute => 'i', icirc => 'i', iuml => 'i', ntilde => 'n', ograve => 'o', oacute => 'o', ocirc => 'o', otile => 'o', ouml => 'o', oslash => 'o', ugrave => 'u', uacute => 'u', ucirc => 'u', uuml => 'u', yacute => 'y', 'yuml' => 'y', 'Æ' => 'AE', 'æ' => 'ae', ); my $re = join '|', sort keys %noaccent; $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::tags->new, # protect tags response => HTTP::Proxy::BodyFilter::simple->new( # remove accents sub { ${ $_[1] } =~ s/&($re);/$noaccent{$1}/go; } ), response => HTTP::Proxy::BodyFilter::htmltext->new( # rot13 sub { tr{ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ} {AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy}; tr/a-zA-z/n-za-mN-ZA-M/; } ) ); $proxy->start; HTTP-Proxy-0.300/eg/post.pl000555001750001750 152412130250124 14526 0ustar00bookbook000000000000#!/usr/bin/perl -w use strict; use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::simple; use CGI::Util qw( unescape ); # NOTE: Body request filters always receive the request body in one pass my $filter = HTTP::Proxy::BodyFilter::simple->new( sub { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; print STDOUT $message->method, " ", $message->uri, "\n"; # this is from CGI.pm, method parse_params my (@pairs) = split ( /[&;]/, $$dataref ); for (@pairs) { my ( $param, $value ) = split ( '=', $_, 2 ); $param = unescape($param); $value = unescape($value); printf STDOUT " %-30s => %s\n", $param, $value; } } ); my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( method => 'POST', request => $filter ); $proxy->start; HTTP-Proxy-0.300/eg/fudd.pl000555001750001750 66312130250124 14446 0ustar00bookbook000000000000#!/usr/bin/perl -w # based on Google's Elmer Fudd preference setting use HTTP::Proxy; use HTTP::Proxy::BodyFilter::tags; use HTTP::Proxy::BodyFilter::htmltext; use strict; my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::tags->new, response => HTTP::Proxy::BodyFilter::htmltext->new( sub { y/r/w/; s/l(?=\w)/w/g } ) ); $proxy->start; HTTP-Proxy-0.300/eg/README000444001750001750 1001512130250124 14074 0ustar00bookbook000000000000The eg/ directory holds a few example proxies. All scripts accept HTTP::Proxy constructor key/value pairs on the command-line. Example: ./proxy.pl port 3128 host 0.0.0.0 * proxy.pl A very simple proxy. Filter: none * anonymiser.pl A simple anonymizing proxy, similar in functionnality to the one shown by Randal L. Schwartz in his WebTechniques #11 column. http://www.stonehenge.com/merlyn/WebTechniques/col11.html Filter: HTTP::Proxy::HeaderFilter::simple * proxy-auth.pl A very simple proxy, with Basic authentication. Filter: HTTP::Proxy::HeaderFilter::simple * leet.pl * rot13.pl * rainbow.pl * fudd.pl These filters do simple modifications of all HTML pages. Filters: HTTP::Proxy::BodyFilter::tags HTTP::Proxy::BodyFilter::simple HTTP::Proxy::BodyFilter::htmltext * bork.pl This ffiltir elsu duis simpli mudiffixeshuns uff ell HTML pegis. Bork bork bork ! * outline.pl * ayb.pl These proxy do more complicated modifications of HTML pages, and require a HTML::Parser object to do so. All you tag are belong to us. Filter: HTTP::Proxy::BodyFilter::htmlparser * post.pl This filter outputs the request URI and the form parameters of all POST requests. Filter: HTTP::Proxy::HeaderFilter::simple * logger.pl This filter outputs the important information out of GET and POST requests: method, URI, cookies, content-type (text/*) and POST request parameters. Filter: HTTP::Proxy::HeaderFilter::simple * adblock.pl This is a very simple proxy that block ad sites. Filters: HTTP::Proxy::HeaderFilter::simple HTTP::Proxy::BodyFilter::simple * trim.pl A simple proxy that trims lines of HTML text. Filters: HTTP::Proxy::BodyFilter::lines HTTP::Proxy::BodyFilter::simple * javascript.pl A proxy that adds anything/javascript at the beginning of a HTML page. (right after the tag) Filter: HTTP::Proxy::BodyFilter::htmlparser * rfc.pl A proxy that automatically saves the files named rfc\d+\.txt to a file of the same name in the rfc/ directory. Filter: HTTP::Proxy::BodyFilter::save * dragon.pl A proxy that removes some of the shortcomings of the Dragon Go Server website (http://www.dragongoserver.net/) Filters: HTTP::Proxy::HeaderFilter::simple HTTP::Proxy::BodyFilter::simple HTTP::Proxy::BodyFilter::tags * pdf.pl Save all PDF files in the pdf/ directory, and replace it with a HTML message saying "PDF file saved." Filters: HTTP::Proxy::HeaderFilter::simple HTTP::Proxy::Body::simple HTTP::Proxy::Body::save * yahoogroups.pl Removes the advertisment interruptions from Yahoo! Groups Filter: HTTP::Proxy::HeaderFilter::simple * https.pl Modify https:// links to http:// links that the proxy will recognise. The proxy will download the pages using SSL and will then be able to modify them with its filter stacks. Filters: HTTP::Proxy::HeaderFilter::simple HTTP::Proxy::BodyFilter::htmlparser * perlmonks.pl Redirect all requests to perlmonks.com and perlmonks.org to perlmonks.org, thus keeping connection information intact (all cookies will point to a single web site). This code was used as a starting point for the "Rewrite the web" hack in Perl Hacks (O'Reilly 2006). Filter: HTTP::Proxy::HeaderFilter::simple * switch.pl Randomly switch proxies from a list given on the command line. Filter: HTTP::Proxy::HeaderFilter::simple * tracker.pl This tracker proxy stores Referer, URL, CODE and output them to STDOUT or the given file Example output: NULL http://www.perl.org/ 200 http://www.perl.org/ http://learn.perl.org/ 200 Filter: HTTP::Proxy::HeaderFilter::simple * js.pl Save JavaScript files as we browse them. Filter: HTTP::Proxy::Body::save * flv.pl Saves all FLV files in the flv/ directory, with a computed name (id taken from the URI, or MD5 hash of the URI). Filter: HTTP::Proxy::Body::save HTTP-Proxy-0.300/eg/rfc.pl000555001750001750 57312130250124 14276 0ustar00bookbook000000000000use HTTP::Proxy; use HTTP::Proxy::BodyFilter::save; my $proxy = HTTP::Proxy->new(@ARGV); # save RFC files as we browse them $proxy->push_filter( path => qr!/rfc\d+.txt!, mime => 'text/plain', response => HTTP::Proxy::BodyFilter::save->new( template => '%f', prefix => 'rfc', multiple => 0, keep_old => 1, ) ); $proxy->start; HTTP-Proxy-0.300/eg/proxy-auth.pl000555001750001750 201412130250124 15654 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::HeaderFilter::simple; use MIME::Base64 qw( encode_base64 ); use strict; # the encoded user:password pair # login: http # passwd: proxy my $token = "Basic " . encode_base64( "http:proxy", '' ); # a very simple proxy that requires authentication my $proxy = HTTP::Proxy->new(@ARGV); # the authentication filter $proxy->push_filter( request => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $request ) = @_; # check the token against all credentials my $ok = 0; $_ eq $token && $ok++ for $self->proxy->hop_headers->header('Proxy-Authorization'); # no valid credential if ( !$ok ) { my $response = HTTP::Response->new(407); $response->header( Proxy_Authenticate => 'Basic realm="HTTP::Proxy"' ); $self->proxy->response($response); } } ) ); $proxy->start; HTTP-Proxy-0.300/eg/outline.pl000555001750001750 214512130250124 15220 0ustar00bookbook000000000000#!/usr/bin/perl -w use HTTP::Proxy qw( :log ); use HTTP::Proxy::BodyFilter::htmlparser; use HTML::Parser; use strict; my $parser = HTML::Parser->new( api_version => 3 ); $parser->handler( start_document => sub { my $self = shift; $self->{print} = 1 }, "self" ); $parser->handler( start => sub { my ( $self, $tag, $text ) = @_; $self->{print} = 1 if $tag =~ /^h\d/; $self->{output} .= $text if $self->{print}; $self->{print} = 0 if $tag eq 'body'; }, "self,tagname,text" ); $parser->handler( end => sub { my ( $self, $tag, $text ) = @_; $self->{print} = 1 if $tag eq 'body'; $self->{output} .= $text if $self->{print}; $self->{print} = 0 if $tag =~ /^h\d/; }, "self,tagname,text" ); $parser->handler( default => sub { my ( $self, $text ) = @_; $self->{output} .= $text if $self->{print}; }, "self,text" ); my $filter = HTTP::Proxy::BodyFilter::htmlparser->new( $parser, rw => 1 ); my $proxy = HTTP::Proxy->new(@ARGV); $proxy->push_filter( mime => 'text/html', response => $filter ); $proxy->start; HTTP-Proxy-0.300/eg/pdf.pl000555001750001750 265712130250124 14322 0ustar00bookbook000000000000#!/usr/bin/perl # # Saves all PDF files, and just confirm saving to the client # (the PDF file never arrives to the client, but is replaced by # a simple HTML file) # # Based on a request by Emmanuel Di Prétoro # use strict; use warnings; use HTTP::Proxy qw ( :log ); use HTTP::Proxy::BodyFilter::save; use HTTP::Proxy::BodyFilter::simple; use HTTP::Proxy::HeaderFilter::simple; my $proxy = HTTP::Proxy->new( @ARGV ); my $saved; $proxy->push_filter( # you should probably restrict this to certain hosts as well path => qr/\.pdf$/, mime => 'application/pdf', # save the PDF response => HTTP::Proxy::BodyFilter::save->new( template => "%f", prefix => 'pdf' ), # send a HTML message instead response => HTTP::Proxy::BodyFilter::simple->new( begin => sub { my ( $self, $message ) = @_; # for information, saorge $saved = 0; }, filter => sub { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; $$dataref = $saved++ ? "" : sprintf '

Saving PDF file. Go back

', $message->request->header('referer'); } ), # change the response Content-Type response => HTTP::Proxy::HeaderFilter::simple->new( sub { my ( $self, $headers, $response ) = @_; $headers->content_type('text/html'); } ), ); $proxy->start; HTTP-Proxy-0.300/lib000755001750001750 012130250124 13215 5ustar00bookbook000000000000HTTP-Proxy-0.300/lib/HTTP000755001750001750 012130250124 13774 5ustar00bookbook000000000000HTTP-Proxy-0.300/lib/HTTP/Proxy.pm000444001750001750 12150412130250124 15653 0ustar00bookbook000000000000package HTTP::Proxy; use HTTP::Daemon; use HTTP::Date qw(time2str); use LWP::UserAgent; use LWP::ConnCache; use Fcntl ':flock'; # import LOCK_* constants use IO::Select; use Sys::Hostname; # hostname() use Carp; use strict; use vars qw( $VERSION $AUTOLOAD @METHODS @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS ); require Exporter; @ISA = qw(Exporter); @EXPORT = (); # no export by default @EXPORT_OK = qw( ERROR NONE PROXY STATUS PROCESS SOCKET HEADERS FILTERS DATA CONNECT ENGINE ALL ); %EXPORT_TAGS = ( log => [@EXPORT_OK] ); # only one tag $VERSION = '0.300'; my $CRLF = "\015\012"; # "\r\n" is not portable # standard filters use HTTP::Proxy::HeaderFilter::standard; # constants used for logging use constant ERROR => -1; # always log use constant NONE => 0; # never log use constant PROXY => 1; # proxy information use constant STATUS => 2; # HTTP status use constant PROCESS => 4; # sub-process life (and death) use constant SOCKET => 8; # low-level connections use constant HEADERS => 16; # HTTP headers use constant FILTERS => 32; # Messages from filters use constant DATA => 64; # Data received by the filters use constant CONNECT => 128; # Data transmitted by the CONNECT method use constant ENGINE => 256; # Internal information from the Engine use constant ALL => 511; # All of the above # modules that need those constants to be defined use HTTP::Proxy::Engine; use HTTP::Proxy::FilterStack; # Methods we can forward my %METHODS; # HTTP (RFC 2616) $METHODS{http} = [qw( CONNECT DELETE GET HEAD OPTIONS POST PUT TRACE )]; # WebDAV (RFC 2518) $METHODS{webdav} = [ @{ $METHODS{http} }, qw( COPY LOCK MKCOL MOVE PROPFIND PROPPATCH UNLOCK ) ]; # Delta-V (RFC 3253) $METHODS{deltav} = [ @{ $METHODS{webdav} }, qw( BASELINE-CONTROL CHECKIN CHECKOUT LABEL MERGE MKACTIVITY MKWORKSPACE REPORT UNCHECKOUT UPDATE VERSION-CONTROL ), ]; # the whole method list @METHODS = HTTP::Proxy->known_methods(); # useful regexes (from RFC 2616 BNF grammar) my %RX; $RX{token} = qr/[-!#\$%&'*+.0-9A-Z^_`a-z|~]+/; $RX{mime} = qr($RX{token}/$RX{token}); $RX{method} = '(?:' . join ( '|', @METHODS ) . ')'; $RX{method} = qr/$RX{method}/; sub new { my $class = shift; my %params = @_; # some defaults my %defaults = ( agent => undef, chunk => 4096, daemon => undef, host => 'localhost', logfh => *STDERR, logmask => NONE, max_connections => 0, max_keep_alive_requests => 10, port => 8080, stash => {}, timeout => 60, via => hostname() . " (HTTP::Proxy/$VERSION)", x_forwarded_for => 1, ); # non modifiable defaults my $self = bless { conn => 0, loop => 1 }, $class; # support for deprecated stuff { my %convert = ( maxchild => 'max_clients', maxconn => 'max_connections', maxserve => 'max_keep_alive_requests', ); while( my ($old, $new) = each %convert ) { if( exists $params{$old} ) { $params{$new} = delete $params{$old}; carp "$old is deprecated, please use $new"; } } } # get attributes $self->{$_} = exists $params{$_} ? delete( $params{$_} ) : $defaults{$_} for keys %defaults; # choose an engine with the remaining parameters $self->{engine} = HTTP::Proxy::Engine->new( %params, proxy => $self ); $self->log( PROXY, "PROXY", "Selected engine " . ref $self->{engine} ); return $self; } sub known_methods { my ( $class, @args ) = @_; @args = map { lc } @args ? @args : ( keys %METHODS ); exists $METHODS{$_} || carp "Method group $_ doesn't exist" for @args; my %seen; return grep { !$seen{$_}++ } map { @{ $METHODS{$_} || [] } } @args; } sub timeout { my $self = shift; my $old = $self->{timeout}; if (@_) { $self->{timeout} = shift; $self->agent->timeout( $self->{timeout} ) if $self->agent; } return $old; } sub url { my $self = shift; if ( not defined $self->daemon ) { carp "HTTP daemon not started yet"; return undef; } return $self->daemon->url; } # normal accessors for my $attr ( qw( agent chunk daemon host logfh port request response hop_headers logmask via x_forwarded_for client_headers engine max_connections max_keep_alive_requests ) ) { no strict 'refs'; *{"HTTP::Proxy::$attr"} = sub { my $self = shift; my $old = $self->{$attr}; $self->{$attr} = shift if @_; return $old; } } # read-only accessors for my $attr (qw( conn loop client_socket )) { no strict 'refs'; *{"HTTP::Proxy::$attr"} = sub { $_[0]{$attr} } } sub max_clients { shift->engine->max_clients( @_ ) } # deprecated methods are still supported { my %convert = ( maxchild => 'max_clients', maxconn => 'max_connections', maxserve => 'max_keep_alive_requests', ); while ( my ( $old, $new ) = each %convert ) { no strict 'refs'; *$old = sub { carp "$old is deprecated, please use $new"; goto \&$new; }; } } sub stash { my $stash = shift->{stash}; return $stash unless @_; return $stash->{ $_[0] } if @_ == 1; return $stash->{ $_[0] } = $_[1]; } sub new_connection { ++$_[0]{conn} } sub start { my $self = shift; $self->init; $SIG{INT} = $SIG{TERM} = sub { $self->{loop} = 0 }; # the main loop my $engine = $self->engine; $engine->start if $engine->can('start'); while( $self->loop ) { $engine->run; last if $self->max_connections && $self->conn >= $self->max_connections; } $engine->stop if $engine->can('stop'); $self->log( STATUS, "STATUS", "Processed " . $self->conn . " connection(s)" ); return $self->conn; } # semi-private init method sub init { my $self = shift; # must be run only once return if $self->{_init}++; $self->_init_daemon if ( !defined $self->daemon ); $self->_init_agent if ( !defined $self->agent ); # specific agent config $self->agent->requests_redirectable( [] ); $self->agent->agent(''); # for TRACE support $self->agent->protocols_allowed( [qw( http https ftp gopher )] ); # standard header filters $self->{headers}{request} = HTTP::Proxy::FilterStack->new; $self->{headers}{response} = HTTP::Proxy::FilterStack->new; # the same standard filter is used to handle headers my $std = HTTP::Proxy::HeaderFilter::standard->new(); $std->proxy( $self ); $self->{headers}{request}->push( [ sub { 1 }, $std ] ); $self->{headers}{response}->push( [ sub { 1 }, $std ] ); # standard body filters $self->{body}{request} = HTTP::Proxy::FilterStack->new(1); $self->{body}{response} = HTTP::Proxy::FilterStack->new(1); return; } # # private init methods # sub _init_daemon { my $self = shift; my %args = ( LocalAddr => $self->host, LocalPort => $self->port, ReuseAddr => 1, ); delete $args{LocalPort} unless $self->port; # 0 means autoselect my $daemon = HTTP::Daemon->new(%args) or die "Cannot initialize proxy daemon: $!"; $self->daemon($daemon); return $daemon; } sub _init_agent { my $self = shift; my $agent = LWP::UserAgent->new( env_proxy => 1, keep_alive => 2, parse_head => 0, timeout => $self->timeout, ) or die "Cannot initialize proxy agent: $!"; $self->agent($agent); return $agent; } # This is the internal "loop" that lets the child process process the # incoming connections. sub serve_connections { my ( $self, $conn ) = @_; my $response; $self->{client_socket} = $conn; # read-only $self->log( SOCKET, "SOCKET", "New connection from " . $conn->peerhost . ":" . $conn->peerport ); my ( $last, $served ) = ( 0, 0 ); while ( $self->loop() ) { my $req; { local $SIG{INT} = local $SIG{TERM} = 'DEFAULT'; $req = $conn->get_request(); } $served++; # initialisation $self->request($req); $self->response(undef); # Got a request? unless ( defined $req ) { $self->log( SOCKET, "SOCKET", "Getting request failed: " . $conn->reason ) if $conn->reason ne 'No more requests from this connection'; return; } $self->log( STATUS, "REQUEST", $req->method . ' ' . ( $req->method eq 'CONNECT' ? $req->uri->host_port : $req->uri ) ); # can we forward this method? if ( !grep { $_ eq $req->method } @METHODS ) { $response = HTTP::Response->new( 501, 'Not Implemented' ); $response->content_type( "text/plain" ); $response->content( "Method " . $req->method . " is not supported by this proxy." ); $self->response($response); goto SEND; } # transparent proxying support if( not defined $req->uri->scheme ) { if( my $host = $req->header('Host') ) { $req->uri->scheme( 'http' ); $req->uri->host( $host ); } else { $response = HTTP::Response->new( 400, 'Bad request' ); $response->content_type( "text/plain" ); $response->content("Can't do transparent proxying without a Host: header."); $self->response($response); goto SEND; } } # can we serve this protocol? if ( !$self->is_protocol_supported( my $s = $req->uri->scheme ) ) { # should this be 400 Bad Request? $response = HTTP::Response->new( 501, 'Not Implemented' ); $response->content_type( "text/plain" ); $response->content("Scheme $s is not supported by this proxy."); $self->response($response); goto SEND; } # select the request filters $self->{$_}{request}->select_filters( $req ) for qw( headers body ); # massage the request $self->{headers}{request}->filter( $req->headers, $req ); # FIXME I don't know how to get the LWP::Protocol objet... # NOTE: the request is always received in one piece $self->{body}{request}->filter( $req->content_ref, $req, undef ); $self->{body}{request}->eod; # end of data $self->log( HEADERS, "REQUEST", $req->headers->as_string ); # CONNECT method is a very special case if( ! defined $self->response and $req->method eq 'CONNECT' ) { $last = $self->_handle_CONNECT($served); return if $last; } # the header filters created a response, # we won't contact the origin server # FIXME should the response header and body be filtered? goto SEND if defined $self->response; # FIXME - don't forward requests to ourselves! # pop a response my ( $sent, $chunked ) = ( 0, 0 ); $response = $self->agent->simple_request( $req, sub { my ( $data, $response, $proto ) = @_; # first time, filter the headers if ( !$sent ) { $sent++; $self->response( $response ); # select the response filters $self->{$_}{response}->select_filters( $response ) for qw( headers body ); $self->{headers}{response} ->filter( $response->headers, $response ); ( $last, $chunked ) = $self->_send_response_headers( $served ); } # filter and send the data $self->log( DATA, "DATA", "got " . length($data) . " bytes of body data" ); $self->{body}{response}->filter( \$data, $response, $proto ); if ($chunked) { printf $conn "%x$CRLF%s$CRLF", length($data), $data if length($data); # the filter may leave nothing } else { print $conn $data; } }, $self->chunk ); # remove the header added by LWP::UA before it sends the response back $response->remove_header('Client-Date'); # the callback is not called by LWP::UA->request # in some cases (HEAD, redirect, error responses have no body) if ( !$sent ) { $self->response($response); $self->{$_}{response}->select_filters( $response ) for qw( headers body ); $self->{headers}{response} ->filter( $response->headers, $response ); } # do a last pass, in case there was something left in the buffers my $data = ""; # FIXME $protocol is undef here too $self->{body}{response}->filter_last( \$data, $response, undef ); if ( length $data ) { if ($chunked) { printf $conn "%x$CRLF%s$CRLF", length($data), $data; } else { print $conn $data; } } # last chunk print $conn "0$CRLF$CRLF" if $chunked; # no trailers either $self->response($response); # what about X-Died and X-Content-Range? if( my $died = $response->header('X-Died') ) { $self->log( ERROR, "ERROR", $died ); $sent = 0; $response = HTTP::Response->new( 500, "Proxy filter error" ); $response->content_type( "text/plain" ); $response->content($died); $self->response($response); } SEND: $response = $self->response ; # responses that weren't filtered through callbacks # (empty body or error) # FIXME some error response headers might not be filtered if ( !$sent ) { ($last, $chunked) = $self->_send_response_headers( $served ); my $content = $response->content; if ($chunked) { printf $conn "%x$CRLF%s$CRLF", length($content), $content if length($content); # the filter may leave nothing print $conn "0$CRLF$CRLF"; } else { print $conn $content; } } # FIXME ftp, gopher $conn->print( $response->content ) if defined $req->uri->scheme and $req->uri->scheme =~ /^(?:ftp|gopher)$/ and $response->is_success; $self->log( SOCKET, "SOCKET", "Connection closed by the proxy" ), last if $last || $served >= $self->max_keep_alive_requests; } $self->log( SOCKET, "SOCKET", "Connection closed by the client" ) if !$last and $served < $self->max_keep_alive_requests; $self->log( PROCESS, "PROCESS", "Served $served requests" ); $conn->close; } # INTERNAL METHOD # send the response headers for the proxy # expects $served (number of requests served) # returns $last and $chunked (last request served, chunked encoding) sub _send_response_headers { my ( $self, $served ) = @_; my ( $last, $chunked ) = ( 0, 0 ); my $conn = $self->client_socket; my $response = $self->response; # correct headers $response->remove_header("Content-Length") if $self->{body}{response}->will_modify(); $response->header( Server => "HTTP::Proxy/$VERSION" ) unless $response->header( 'Server' ); $response->header( Date => time2str(time) ) unless $response->header( 'Date' ); # this is adapted from HTTP::Daemon if ( $conn->antique_client ) { $last++ } else { my $code = $response->code; $conn->send_status_line( $code, $response->message, $self->request()->protocol() ); if ( $code =~ /^(1\d\d|[23]04)$/ ) { # make sure content is empty $response->remove_header("Content-Length"); $response->content(''); } elsif ( $response->request && $response->request->method eq "HEAD" ) { # probably OK, says HTTP::Daemon } else { if ( $conn->proto_ge("HTTP/1.1") ) { $chunked++; $response->push_header( "Transfer-Encoding" => "chunked" ); $response->push_header( "Connection" => "close" ) if $served >= $self->max_keep_alive_requests; } else { $last++; $conn->force_last_request; } } print $conn $response->headers_as_string($CRLF); print $conn $CRLF; # separates headers and content } $self->log( STATUS, "RESPONSE", $response->status_line ); $self->log( HEADERS, "RESPONSE", $response->headers->as_string ); return ($last, $chunked); } # INTERNAL method # FIXME no man-in-the-middle for now sub _handle_CONNECT { my ($self, $served) = @_; my $last = 0; my $conn = $self->client_socket; my $req = $self->request; my $upstream; # connect upstream if ( my $up = $self->agent->proxy('http') ) { # clean up authentication info from proxy URL $up =~ s{^http://[^/\@]*\@}{http://}; # forward to upstream proxy $self->log( PROXY, "PROXY", "Forwarding CONNECT request to next proxy: $up" ); my $response = $self->agent->simple_request($req); # check the upstream proxy's response my $code = $response->code; if ( $code == 407 ) { # don't forward Proxy Authentication requests my $response_407 = $response->as_string; $response_407 =~ s/^Client-.*$//mg; $response = HTTP::Response->new(502); $response->content_type("text/plain"); $response->content( "Upstream proxy ($up) " . "requested authentication:\n\n" . $response_407 ); $self->response($response); return $last; } elsif ( $code != 200 ) { # forward every other failure $self->response($response); return $last; } $upstream = $response->{client_socket}; } else { # direct connection $upstream = IO::Socket::INET->new( PeerAddr => $req->uri->host_port ); } # no upstream socket obtained if( !$upstream ) { my $response = HTTP::Response->new( 500 ); $response->content_type( "text/plain" ); $response->content( "CONNECT failed: $@"); $self->response($response); return $last; } # send the response headers (FIXME more headers required?) my $response = HTTP::Response->new(200); $self->response($response); $self->{$_}{response}->select_filters( $response ) for qw( headers body ); $self->_send_response_headers( $served ); # we now have a TCP connection $last = 1; my $select = IO::Select->new; for ( $conn, $upstream ) { $_->autoflush(1); $_->blocking(0); $select->add($_); } # loop while there is data while ( my @ready = $select->can_read ) { for (@ready) { my $data = ""; my ($sock, $peer, $from ) = $conn eq $_ ? ( $conn, $upstream, "client" ) : ( $upstream, $conn, "server" ); # read the data my $read = $sock->sysread( $data, 4096 ); # check for errors if(not defined $read ) { $self->log( ERROR, "CONNECT", "Read undef from $from ($!)" ); next; } # end of connection if ( $read == 0 ) { $_->close for ( $sock, $peer ); $select->remove( $sock, $peer ); $self->log( SOCKET, "CONNECT", "Connection closed by the $from" ); $self->log( PROCESS, "PROCESS", "Served $served requests" ); next; } # proxy the data $self->log( CONNECT, "CONNECT", "$read bytes received from $from" ); $peer->syswrite($data, length $data); } } $self->log( CONNECT, "CONNECT", "End of CONNECT proxyfication"); return $last; } sub push_filter { my $self = shift; my %arg = ( mime => 'text/*', method => join( ',', @METHODS ), scheme => 'http', host => '', path => '', query => '', ); # parse parameters for( my $i = 0; $i < @_ ; $i += 2 ) { next if $_[$i] !~ /^(mime|method|scheme|host|path|query)$/; $arg{$_[$i]} = $_[$i+1]; splice @_, $i, 2; $i -= 2; } croak "Odd number of arguments" if @_ % 2; # the proxy must be initialised $self->init; # prepare the variables for the closure my ( $mime, $method, $scheme, $host, $path, $query ) = @arg{qw( mime method scheme host path query )}; if ( defined $mime && $mime ne '' ) { $mime =~ m!/! or croak "Invalid MIME type definition: $mime"; $mime =~ s/\*/$RX{token}/; #turn it into a regex $mime = qr/^$mime(?:$|\s*;?)/; } my @method = split /\s*,\s*/, $method; for (@method) { croak "Invalid method: $_" if !/$RX{method}/ } $method = @method ? '(?:' . join ( '|', @method ) . ')' : ''; $method = qr/^$method$/; my @scheme = split /\s*,\s*/, $scheme; for (@scheme) { croak "Unsupported scheme: $_" if !$self->is_protocol_supported($_); } $scheme = @scheme ? '(?:' . join ( '|', @scheme ) . ')' : ''; $scheme = qr/$scheme/; $host ||= '.*'; $host = qr/$host/i; $path ||= '.*'; $path = qr/$path/; $query ||= '.*'; $query = qr/$query/; # push the filter and its match method on the correct stack while(@_) { my ($message, $filter ) = (shift, shift); croak "'$message' is not a filter stack" unless $message =~ /^(request|response)$/; croak "Not a Filter reference for filter queue $message" unless ref( $filter ) && ( $filter->isa('HTTP::Proxy::HeaderFilter') || $filter->isa('HTTP::Proxy::BodyFilter') ); my $stack; $stack = 'headers' if $filter->isa('HTTP::Proxy::HeaderFilter'); $stack = 'body' if $filter->isa('HTTP::Proxy::BodyFilter'); # MIME can only match on reponse my $mime = $mime; undef $mime if $message eq 'request'; # compute the match sub as a closure # for $self, $mime, $method, $scheme, $host, $path my $match = sub { return 0 if ( defined $mime ) && ( $self->response->content_type || '' ) !~ $mime; return 0 if ( $self->{request}->method || '' ) !~ $method; return 0 if ( $self->{request}->uri->scheme || '' ) !~ $scheme; return 0 if ( $self->{request}->uri->authority || '' ) !~ $host; return 0 if ( $self->{request}->uri->path || '' ) !~ $path; return 0 if ( $self->{request}->uri->query || '' ) !~ $query; return 1; # it's a match }; # push it on the corresponding FilterStack $self->{$stack}{$message}->push( [ $match, $filter ] ); $filter->proxy( $self ); } } sub is_protocol_supported { my ( $self, $scheme ) = @_; my $ok = 1; if ( !$self->agent->is_protocol_supported($scheme) ) { # double check, in case a dummy scheme was added # to be handled directly by a filter $ok = 0; $scheme eq $_ && $ok++ for @{ $self->agent->protocols_allowed }; } $ok; } sub log { my $self = shift; my $level = shift; my $fh = $self->logfh; return unless $self->logmask & $level || $level == ERROR; my ( $prefix, $msg ) = ( @_, '' ); my @lines = split /\n/, $msg; @lines = ('') if not @lines; flock( $fh, LOCK_EX ); print $fh "[" . localtime() . "] ($$) $prefix: $_\n" for @lines; flock( $fh, LOCK_UN ); } 1; __END__ =head1 NAME HTTP::Proxy - A pure Perl HTTP proxy =head1 SYNOPSIS use HTTP::Proxy; # initialisation my $proxy = HTTP::Proxy->new( port => 3128 ); # alternate initialisation my $proxy = HTTP::Proxy->new; $proxy->port( 3128 ); # the classical accessors are here! # this is a MainLoop-like method $proxy->start; =head1 DESCRIPTION This module implements a HTTP proxy, using a L to accept client connections, and a LWP::UserAgent to ask for the requested pages. The most interesting feature of this proxy object is its ability to filter the HTTP requests and responses through user-defined filters. Once the proxy is created, with the C method, it is possible to alter its behaviour by adding so-called "filters". This is done by the C method. Once the filter is ready to run, it can be launched, with the C method. This method does not normally return until the proxy is killed or otherwise stopped. An important thing to note is that the proxy is (except when running the C engine) a I proxy: it doesn't support passing information between child processes, and you can count on reliable information passing only during a single HTTP connection (request + response). =head1 FILTERS You can alter the way the default L works by plugging callbacks (filter objects, actually) at different stages of the request/response handling. When a request is received by the L object, it is filtered through a standard filter that transform this request accordingly to RFC 2616 (by adding the C header, and a few other transformations). This is the default, bare minimum behaviour. The response is also filtered in the same manner. There is a total of four filter chains: C, C, C and C. You can add your own filters to the default ones with the C method. The method pushes a filter on the appropriate filter stack. $proxy->push_filter( response => $filter ); The headers/body category is determined by the base class of the filter. There are two base classes for filters, which are L and L (the names are self-explanatory). See the documentation of those two classes to find out how to write your own header or body filters. The named parameter is used to determine the request/response part. It is possible to push the same filter on the request and response stacks, as in the following example: $proxy->push_filter( request => $filter, response => $filter ); If several filters match the message, they will be applied in the order they were pushed on their filter stack. Named parameters can be used to create the match routine. They are: method - the request method scheme - the URI scheme host - the URI authority (host:port) path - the URI path query - the URI query string mime - the MIME type (for a response-body filter) The filters are applied only when all the the parameters match the request or the response. All these named parameters have default values, which are: method => 'OPTIONS,GET,HEAD,POST,PUT,DELETE,TRACE,CONNECT' scheme => 'http' host => '' path => '' query => '' mime => 'text/*' The C parameter is a glob-like string, with a required C character and a C<*> as a joker. Thus, C<*/*> matches I responses, and C<""> those with no C header. To match any reponse (with or without a C header), use C. The C parameter is only meaningful with the C filter stack. It is ignored if passed to any other filter stack. The C and C parameters are strings consisting of comma-separated values. The C and C parameters are regular expressions. A match routine is compiled by the proxy and used to check if a particular request or response must be filtered through a particular filter. It is also possible to push several filters on the same stack with the same match subroutine: # convert italics to bold $proxy->push_filter( mime => 'text/html', response => HTTP::Proxy::BodyFilter::tags->new(), response => HTTP::Proxy::BodyFilter::simple->new( sub { ${ $_[1] } =~ s!(!$1b>!ig } ) ); For more details regarding the creation of new filters, check the L and L documentation. Here's an example of subclassing a base filter class: # fixes a common typo ;-) # but chances are that this will modify a correct URL { package FilterPerl; use base qw( HTTP::Proxy::BodyFilter ); sub filter { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; $$dataref =~ s/PERL/Perl/g; } } $proxy->push_filter( response => FilterPerl->new() ); Other examples can be found in the documentation for L, L, L, L. # a simple anonymiser # see eg/anonymiser.pl for the complete code $proxy->push_filter( mime => undef, request => HTTP::Proxy::HeaderFilter::simple->new( sub { $_[1]->remove_header(qw( User-Agent From Referer Cookie )) }, ), response => HTTP::Proxy::HeaderFilter::simple->new( sub { $_[1]->remove_header(qw( Set-Cookie )); }, ) ); IMPORTANT: If you use your own L, you must install it before your calls to C, otherwise the match method will make wrong assumptions about the schemes your agent supports. NOTE: It is likely that possibility of changing the agent or the daemon may disappear in future versions. =head1 METHODS =head2 Constructor and initialisation =over 4 =item new() The C method creates a new L object. All attributes can be passed as parameters to replace the default. Parameters that are not L attributes will be ignored and passed to the chosen L object. =item init() C initialise the proxy without starting it. It is usually not needed. This method is called by C if needed. =item push_filter() The C method is used to add filters to the proxy. It is fully described in section L. =back =head2 Accessors and mutators L class has several accessors and mutators. Called with arguments, the accessor returns the current value. Called with a single argument, it sets the current value and returns the previous one, in case you want to keep it. If you call a read-only accessor with a parameter, this parameter will be ignored. The defined accessors are (in alphabetical order): =over 4 =item agent The L object used internally to connect to remote sites. =item chunk The chunk size for the L callbacks. =item client_socket (read-only) The socket currently connected to the client. Mostly useful in filters. =item client_headers This attribute holds a reference to the client headers set up by L (C, C, C, C, C, C, C, C, C, C, C, C, C). They are removed by the filter L from the request and response objects received by the proxy. If a filter (such as a SSL certificate verification filter) need to access them, it must do it through this accessor. =item conn (read-only) The number of connections processed by this L instance. =item daemon The L object used to accept incoming connections. (You usually never need this.) =item engine The L object that manages the child processes. =item hop_headers This attribute holds a reference to the hop-by-hop headers (C, C, C, C, C, C, C, C). They are removed by the filter HTTP::Proxy::HeaderFilter::standard from the request and response objects received by the proxy. If a filter (such as a proxy authorisation filter) need to access them, it must do it through this accessor. =item host The proxy L host (default: 'localhost'). This means that by default, the proxy answers only to clients on the local machine. You can pass a specific interface address or C<"">/C for any interface. This default prevents your proxy to be used as an anonymous proxy by script kiddies. =item known_methods( @groups ) (read-only) This method returns all HTTP (and extensions to HTTP) known to C. Methods are grouped by type. Known method groups are: C, C and C. Called with an empty list, this method will return all known methods. This method is case-insensitive, and will C if an unknown group name is passed. =item logfh A filehandle to a logfile (default: C<*STDERR>). =item logmask( [$mask] ) Be verbose in the logs (default: C). Here are the various elements that can be added to the mask (their values are powers of 2, starting from 0 and listed here in ascending order): NONE - Log only errors PROXY - Proxy information STATUS - Requested URL, reponse status and total number of connections processed PROCESS - Subprocesses information (fork, wait, etc.) SOCKET - Information about low-level sockets HEADERS - Full request and response headers are sent along FILTERS - Filter information DATA - Data received by the filters CONNECT - Data transmitted by the CONNECT method ENGINE - Engine information ALL - Log all of the above If you only want status and process information, you can use: $proxy->logmask( STATUS | PROCESS ); Note that all the logging constants are not exported by default, but by the C<:log> tag. They can also be exported one by one. =item loop (read-only) Internal. False when the main loop is about to be broken. =item max_clients =item maxchild The maximum number of child process the L object will spawn to handle client requests (default: depends on the engine). This method is currently delegated to the L object. C is deprecated and will disappear. =item max_connections =item maxconn The maximum number of TCP connections the proxy will accept before returning from start(). 0 (the default) means never stop accepting connections. C is deprecated. Note: C will be deprecated soon, for two reasons: 1) it is more of an L attribute, 2) not all engines will support it. =item max_keep_alive_requests =item maxserve The maximum number of requests the proxy will serve in a single connection. (same as C in Apache) C is deprecated. =item port The proxy L port (default: 8080). =item request The request originaly received by the proxy from the user-agent, which will be modified by the request filters. =item response The response received from the origin server by the proxy. It is normally C until the proxy actually receives the beginning of a response from the origin server. If one of the request filters sets this attribute, it "short-circuits" the request/response scheme, and the proxy will return this response (which is NOT filtered through the response filter stacks) instead of the expected origin server response. This is useful for caching (though Squid does it much better) and proxy authentication, for example. =item stash The stash is a hash where filters can store data to share between them. The stash() method can be used to set the whole hash (with a HASH reference). To access individual keys simply do: $proxy->stash( 'bloop' ); To set it, type: $proxy->stash( bloop => 'owww' ); It's also possibly to get a reference to the stash: my $s = $filter->proxy->stash(); $s->{bang} = 'bam'; # $proxy->stash( 'bang' ) will now return 'bam' B since the proxy forks for each TCP connection, the data is only shared between filters in the same child process. =item timeout The timeout used by the internal L (default: 60). =item url (read-only) The url where the proxy can be reached. =item via The content of the Via: header. Setting it to an empty string will prevent its addition. (default: C<$hostname (HTTP::Proxy/$VERSION)>) =item x_forwarded_for If set to a true value, the proxy will send the C header. (default: true) =back =head2 Connection handling methods =over 4 =item start() This method works like Tk's C: you hand over control to the L object you created and configured. If C is not zero, C will return after accepting at most that many connections. It will return the total number of connexions. =item serve_connections() This is the internal method used to handle each new TCP connection to the proxy. =back =head2 Other methods =over 4 =item log( $level, $prefix, $message ) Adds C<$message> at the end of C, if $level matches C. The C method also prints a timestamp. The output looks like: [Thu Dec 5 12:30:12 2002] ($$) $prefix: $message where C<$$> is the current processus id. If C<$message> is a multiline string, several log lines will be output, each line starting with C<$prefix>. =item is_protocol_supported( $scheme ) Returns a boolean indicating if $scheme is supported by the proxy. This method is only used internaly. It is essential to allow L users to create "pseudo-schemes" that LWP doesn't know about, but that one of the proxy filters can handle directly. New schemes are added as follows: $proxy->init(); # required to get an agent $proxy->agent->protocols_allowed( [ @{ $proxy->agent->protocols_allowed }, 'myhttp' ] ); =item new_connection() Increase the proxy's TCP connections counter. Only used by L objects. =back =head2 Apache-like attributes L has several Apache-like attributes that control the way the HTTP and TCP connections are handled. The following attributes control the TCP connection. They are passed to the underlying L, which may (or may not) use them to change its behaviour. =over 4 =item start_servers Number of child process to fork at the beginning. =item max_clients Maximum number of concurrent TCP connections (i.e. child processes). =item max_requests_per_child Maximum number of TCP connections handled by the same child process. =item min_spare_servers Minimum number of inactive child processes. =item max_spare_servers Maximum number of inactive child processes. =back Those attributes control the HTTP connection: =over 4 =item keep_alive Support for keep alive HTTP connections. =item max_keep_alive_requests Maximum number of HTTP connections within a single TCP connection. =item keep_alive_timeout Timeout for keep-alive connection. =back =head1 EXPORTED SYMBOLS No symbols are exported by default. The C<:log> tag exports all the logging constants. =head1 BUGS This module does not work under Windows, but I can't see why, and do not have a development platform under that system. Patches and explanations very welcome. I guess it is because C is not well supported. $proxy->maxchild(0); =over 4 =item However, David Fishburn says: This did not work for me under WinXP - ActiveState Perl 5.6, but it DOES work on WinXP ActiveState Perl 5.8. =back Several people have tried to help, but we haven't found a way to make it work correctly yet. As from version 0.16, the default engine is L. Let me know if it works better. =head1 SEE ALSO L, L, L, the examples in F. =head1 AUTHOR Philippe "BooK" Bruhat, Ebook@cpan.orgE. There is also a mailing-list: http-proxy@mongueurs.net for general discussion about L. =head1 THANKS Many people helped me during the development of this module, either on mailing-lists, IRC or over a beer in a pub... So, in no particular order, thanks to the libwww-perl team for such a terrific suite of modules, perl-qa (tips for testing), the French Perl I (for code tricks, beers and encouragements) and my growing user base... C<;-)> I'd like to particularly thank Dan Grigsby, who's been using L since 2003 (before the filter classes even existed). He is apparently making a living from a product based on L. Thanks a lot for your confidence in my work! =head1 COPYRIGHT Copyright 2002-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy000755001750001750 012130250124 15115 5ustar00bookbook000000000000HTTP-Proxy-0.300/lib/HTTP/Proxy/HeaderFilter.pm000444001750001750 672712130250124 20162 0ustar00bookbook000000000000package HTTP::Proxy::HeaderFilter; use strict; use Carp; sub new { my $class = shift; my $self = bless {}, $class; $self->init(@_) if $self->can('init'); return $self; } sub filter { croak "HTTP::Proxy::HeaderFilter cannot be used as a filter"; } sub proxy { my ( $self, $new ) = @_; return $new ? $self->{_hphf_proxy} = $new : $self->{_hphf_proxy}; } 1; __END__ =head1 NAME HTTP::Proxy::HeaderFilter - A base class for HTTP message header filters =head1 SYNOPSIS package MyFilter; use base qw( HTTP::Proxy::HeaderFilter ); # changes the User-Agent header in all requests # this filter must be pushed on the request stack sub filter { my ( $self, $headers, $message ) = @_; $message->headers->header( User_Agent => 'MyFilter/1.0' ); } 1; =head1 DESCRIPTION The L class is used to create filters for HTTP request/response headers. =head2 Creating a HeaderFilter A HeaderFilter is just a derived class that implements some methods called by the proxy. Of all the methods presented below, only C B be defined in the derived class. =over 4 =item filter() The signature of the C method is the following: sub filter { my ( $self, $headers, $message) = @_; ... } where C<$self> is the filter object, C<$headers> is a L object, and $message is either a L or a L object. The $headers L object is the one that will be sent to the client (if the filter is on the response stack) or origin server (if the filter is on the request stack). If C<$headers> is modified by the filter, the modified headers will be sent to the client or server. The C method (if it exists) is called by the C constructeur to perform all initisalisation tasks. It's called once in the filter lifetime. A L object is a blessed hash, and the base class reserves only hash keys that start with C<_hphf>. =item new() The constructor is defined for all subclasses. Initialisation tasks (if any) for subclasses should be done in the C method (see below). =item init() This method is called by the C constructeur to perform all initisalisation tasks. It's called once in the filter lifetime. It receives all the parameters passed to C. =back =head2 Standard HeaderFilters Standard L classes are lowercase. The following HeaderFilters are included in the L distribution: =over 4 =item simple This class lets you create a simple header filter from a code reference. =item standard This is the filter that provides standard headers handling for L. It is loaded automatically by L. =back Please read each filter's documentation for more details about their use. =head1 USEFUL METHODS FOR SUBCLASSES Some methods are available to filters, so that they can eventually use the little knowledge they might have of L's internals. They mostly are accessors. =over 4 =item proxy() Gets a reference to the L objects that owns the filter. This gives access to some of the proxy methods. =back =head1 AUTHOR Philippe "BooK" Bruhat, Ebook@cpan.orgE. =head1 SEE ALSO L, L. =head1 COPYRIGHT Copyright 2003-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/FilterStack.pm000444001750001750 1375512130250124 20056 0ustar00bookbook000000000000package HTTP::Proxy::FilterStack; # Here's a description of the class internals # - filters: the list of (sub, filter) pairs that match the message, # and through which it must go # - current: the actual list of filters, which is computed during # the first call to filter() # - buffers: the buffers associated with each (selected) filter # - body : true if it's a HTTP::Proxy::BodyFilter stack use strict; use Carp; # new( $isbody ) # $isbody is true only for response-body filters stack sub new { my $class = shift; my $self = { body => shift || 0, filters => [], buffers => [], current => undef, }; $self->{type} = $self->{body} ? "HTTP::Proxy::BodyFilter" : "HTTP::Proxy::HeaderFilter"; return bless $self, $class; } # # insert( $index, [ $matchsub, $filter ], ...) # sub insert { my ( $self, $idx ) = ( shift, shift ); $_->[1]->isa( $self->{type} ) or croak("$_ is not a $self->{type}") for @_; splice @{ $self->{filters} }, $idx, 0, @_; } # # remove( $index ) # sub remove { my ( $self, $idx ) = @_; splice @{ $self->{filters} }, $idx, 1; } # # push( [ $matchsub, $filter ], ... ) # sub push { my $self = shift; $_->[1]->isa( $self->{type} ) or croak("$_ is not a $self->{type}") for @_; push @{ $self->{filters} }, @_; } sub all { return @{ $_[0]->{filters} }; } sub will_modify { return $_[0]->{will_modify}; } # # select the filters that will be used on the message # sub select_filters { my ($self, $message ) = @_; # first time we're called this round if ( not defined $self->{current} ) { # select the filters that match $self->{current} = [ map { $_->[1] } grep { $_->[0]->() } @{ $self->{filters} } ]; # create the buffers if ( $self->{body} ) { $self->{buffers} = [ ( "" ) x @{ $self->{current} } ]; $self->{buffers} = [ \( @{ $self->{buffers} } ) ]; } # start the filter if needed (and pass the message) for ( @{ $self->{current} } ) { if ( $_->can('begin') ) { $_->begin( $message ); } elsif ( $_->can('start') ) { $_->proxy->log( HTTP::Proxy::ERROR(), "DEPRECATION", "The start() filter method is *deprecated* and disappeared in 0.15!\nUse begin() in your filters instead!" ); } } # compute the "will_modify" value $self->{will_modify} = $self->{body} ? grep { $_->will_modify() } @{ $self->{current} } : 0; } } # # the actual filtering is done here # sub filter { my $self = shift; # pass the body data through the filter if ( $self->{body} ) { my $i = 0; my ( $data, $message, $protocol ) = @_; for ( @{ $self->{current} } ) { $$data = ${ $self->{buffers}[$i] } . $$data; ${ $self->{buffers}[ $i ] } = ""; $_->filter( $data, $message, $protocol, $self->{buffers}[ $i++ ] ); } } else { $_->filter(@_) for @{ $self->{current} }; $self->eod; } } # # filter what remains in the buffers # sub filter_last { my $self = shift; return unless $self->{body}; # sanity check my $i = 0; my ( $data, $message, $protocol ) = @_; for ( @{ $self->{current} } ) { $$data = ${ $self->{buffers}[ $i ] } . $$data; ${ $self->{buffers}[ $i++ ] } = ""; $_->filter( $data, $message, $protocol, undef ); } # call the cleanup routine if needed for ( @{ $self->{current} } ) { $_->end if $_->can('end'); } # clean up the mess for next time $self->eod; } # # END OF DATA cleanup method # sub eod { $_[0]->{buffers} = []; $_[0]->{current} = undef; } 1; __END__ =head1 NAME HTTP::Proxy::FilterStack - A class to manage filter stacks =head1 DESCRIPTION This class is used internally by L to manage its four filter stacks. From the point of view of L, a filter is actually a (C, C) pair. The match subroutine (generated by L's C method) is run against the current L object to find out which filters must be kept in the stack when handling this message. The filter stack maintains a set of buffers where the filters can store data. This data is appended at the beginning of the next chunk of data, until all the data has been sent. =head1 METHODS The class provides the following methods: =over 4 =item new( $isbody ) Create a new instance of L. If C<$isbody> is true, then the stack will manage body filters (subclasses of L). =item select_filters( $message ) C<$message> is the current L handled by the proxy. It is used (with the help of each filter's match subroutine) to select the subset of filters that will be applied on the given message. =item filter( @args ) This method calls all the currently selected filters in turn, with the appropriate arguments. =item filter_last() This method calls all the currently selected filters in turn, to filter the data remaining in the buffers in a single pass. =item will_modify() Return a boolean value indicating if the list of selected filters in the stack will modify the body content. The value is computed from the result of calling C on all selected filters. =item all() Return a list of all filters in the stack. =item eod() Used for END OF DATA bookkeeping. =item push() Push the given C<[ match, filterobj ]> pairs at the top of the stack. =item insert( $idx, @pairs ) Insert the given C<[ match, filterobj ]> pairs at position C<$idx> in the stack. =item remove( $idx ) Remove the C<[ match, filterobj ]> pair at position C<$idx> in the stack. =back =head1 AUTHOR Philippe "BooK" Bruhat, Ebook@cpan.orgE. =head1 COPYRIGHT Copyright 2002-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/Engine.pm000444001750001750 1001412130250124 17031 0ustar00bookbook000000000000package HTTP::Proxy::Engine; use strict; use Carp; my %engines = ( MSWin32 => 'NoFork', default => 'Legacy', ); # required accessors __PACKAGE__->make_accessors( qw( max_clients )); sub new { my $class = shift; my %params = @_; # the front-end if ( $class eq 'HTTP::Proxy::Engine' ) { my $engine = delete $params{engine}; $engine = $engines{$^O} || $engines{default} unless defined $engine; $class = "HTTP::Proxy::Engine::$engine"; eval "require $class"; croak $@ if $@; } # some error checking croak "No proxy defined" unless exists $params{proxy}; croak "$params{proxy} is not a HTTP::Proxy object" unless UNIVERSAL::isa( $params{proxy}, 'HTTP::Proxy' ); # so we are an actual engine no strict 'refs'; return bless { %{"$class\::defaults"}, %params }, $class; } # run() should be defined in subclasses sub run { my $self = shift; my $class = ref $self; croak "$class doesn't define a run() method"; } sub proxy { $_[0]{proxy} } # class method sub make_accessors { my $class = shift; for my $attr (@_) { no strict 'refs'; *{"$class\::$attr"} = sub { $_[0]{$attr} = $_[1] if defined $_[1]; $_[0]{$attr}; }; } } 1; __END__ =head1 NAME HTTP::Proxy::Engine - Generic child process manager engine for HTTP::Proxy =head1 SYNOPSIS use HTTP::Proxy; # use the default engine for your system my $proxy = HTTP::Proxy->new(); # choose one my $proxy = HTTP::Proxy->new( engine => 'Old' ); =head1 DESCRIPTION The L class is a front-end to actual proxy engine classes. The role of an engine is to implement the main fork+serve loop with all the required bookkeeping. This is also a good way to test various implementation and/or try out new algorithms without too much difficulties. =head1 METHODS =over 4 =item new() Create a new engine. The parameter C is used to decide which kind of engine will be created. Other parameters are passed to the underlying engine. This method also implement the subclasses constructor (they obviously do not need the C parameter). =back =head1 CREATING YOUR OWN ENGINE It is possible to create one's own engine, by creating a simple subclass of L with the following methods: =over 4 =item start() This method should handle any initialisation required when the engine starts. =item run() This method is the main loop of the master process. It defines how child processes are forked, checked and killed. The engine MUST have a run() method, and it will be called again and again until the proxy exits. C<< $self->proxy->daemon >> returns the listening socket that can C connections. The child must call C<< $self->proxy->serve_connections() >> on the returned socket to handle actual TCP connections. =item stop() This optional method should handle any cleanup procedures when the engine stops (typically when the main proxy process is killed). =back A subclass may also define a C<%defaults> hash (with C) that contains the default values for the fields used internaly. =head1 METHODS PROVIDED TO SUBCLASSES L provides the following methods to its subclasses: =over 4 =item proxy() Return the L object that runs the engine. =item max_clients() Get or set the maximum number of TCP clients, that is to say the maximum number of forked child process. Some engines may understand a value of C<0> as I. This is what L does. =item make_accessors( @names ) Create accessors named after C<@names> in the subclass package. All accessors are read/write. This is a utility method. B =back =head1 AUTHOR Philippe "BooK" Bruhat, C<< >>. =head1 COPYRIGHT Copyright 2005-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/BodyFilter.pm000444001750001750 1674412130250124 17707 0ustar00bookbook000000000000package HTTP::Proxy::BodyFilter; use strict; use Carp; sub new { my $class = shift; my $self = bless {}, $class; $self->init(@_) if $self->can('init'); return $self; } sub proxy { my ( $self, $new ) = @_; return $new ? $self->{_hpbf_proxy} = $new : $self->{_hpbf_proxy}; } sub filter { croak "HTTP::Proxy::HeaderFilter cannot be used as a filter"; } sub will_modify { 1 } # by default, we expect the filter to modify data 1; __END__ =head1 NAME HTTP::Proxy::BodyFilter - A base class for HTTP messages body filters =head1 SYNOPSIS package MyFilter; use base qw( HTTP::Proxy::BodyFilter ); # a simple modification, that may break things sub filter { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; $$dataref =~ s/PERL/Perl/g; } 1; =head1 DESCRIPTION The L class is used to create filters for HTTP request/response body data. =head2 Creating a BodyFilter A BodyFilter is just a derived class that implements some methods called by the proxy. Of all the methods presented below, only C B be defined in the derived class. =over 4 =item filter() The signature of the filter() method is the following: sub filter { my ( $self, $dataref, $message, $protocol, $buffer ) = @_; ... } where C<$self> is the filter object, C<$dataref> is a reference to the chunk of body data received, C<$message> is a reference to either a L or a L object, and C<$protocol> is a reference to the L protocol object. Note that this subroutine signature looks a lot like that of the call- backs of L (except that C<$message> is either a L or a L object). C<$buffer> is a reference to a buffer where some of the unprocessed data can be stored for the next time the filter will be called (see L for details). Thanks to the built-in HTTP::Proxy::BodyFilter::* filters, this is rarely needed. It is possible to access the headers of the message with C<< $message->headers() >>. This L object is the one that was sent to the client (if the filter is on the response stack) or origin server (if the filter is on the request stack). Modifying it in the C method is useless, since the headers have already been sent. Since C<$dataref> is a I to the data string, the referent can be modified and the changes will be transmitted through the filters that follows, until the data reaches its recipient. A L object is a blessed hash, and the base class reserves only hash keys that start with C<_hpbf>. =item new() The constructor is defined for all subclasses. Initialisation tasks (if any) for subclasses should be done in the C method (see below). =item init() This method is called by the C constructeur to perform all initisalisation tasks. It's called once in the filter lifetime. It receives all the parameters passed to C. =item begin() Some filters might require initialisation before they are able to handle the data. If a C method is defined in your subclass, the proxy will call it before sending data to the C method. It's called once per HTTP message handled by the filter, before data processing begins. The method signature is as follows: sub begin { my ( $self, $message ) = @_ ... } =item end() Some filters might require finalisation after they are finished handling the data. If a C method is defined in your subclass, the proxy will call it after it has finished sending data to the C method. It's called once per HTTP message handled by the filter, after all data processing is done. This method does not expect any parameters. =item will_modify() This method return a boolean value that indicate if the filter will modify the body data on the fly. The default implementation returns a I value. =back =head2 Using a buffer to store data for a later use Some filters cannot handle arbitrary data: for example a filter that basically lowercases tag name will apply a simple regex such as C\s*(\w+)([^E]*)E/E\L$1\E$2E/g>. But the filter will fail is the chunk of data contains a tag that is cut before the final C>. It would be extremely complicated and error-prone to let each filter (and its author) do its own buffering, so the L architecture handles this too. The proxy passes to each filter, each time it is called, a reference to an empty string (C<$buffer> in the above signature) that the filter can use to store some data for next run. When the reference is C, it means that the filter cannot store any data, because this is the very last run, needed to gather all the data left in all buffers. It is recommended to store as little data as possible in the buffer, so as to avoid (badly) reproducing what L does. In particular, you have to remember that all the data that remains in the buffer after the last piece of data is received from the origin server will be sent back to your filter in one big piece. =head2 The store and forward approach L implements a I mechanism, for those filters which need to have the whole message body to work. It's enabled simply by pushing the L filter on the filter stack. The data is stored in memory by the "complete" filter, which passes it on to the following filter once the full message body has been received. =head2 Standard BodyFilters Standard L classes are lowercase. The following BodyFilters are included in the L distribution: =over 4 =item lines This filter makes sure that the next filter in the filter chain will only receive complete lines. The "chunks" of data received by the following filters with either end with C<\n> or will be the last piece of data for the current HTTP message body. =item htmltext This class lets you create a filter that runs a given code reference against text included in a HTML document (outside CscriptE> and CstyleE> tags). HTML entities are not included in the text. =item htmlparser Creates a filter from a HTML::Parser object. =item simple This class lets you create a simple body filter from a code reference. =item save Store the message body to a file. =item complete This filter stores the whole message body in memory, thus allowing some actions to be taken only when the full page has been received by the proxy. =item tags The L filter makes sure that the next filter in the filter chain will only receive complete tags. The current implementation is not 100% perfect, though. =back Please read each filter's documentation for more details about their use. =head1 USEFUL METHODS FOR SUBCLASSES Some methods are available to filters, so that they can eventually use the little knowledge they might have of HTTP::Proxy's internals. They mostly are accessors. =over 4 =item proxy() Gets a reference to the L objects that owns the filter. This gives access to some of the proxy methods. =back =head1 AUTHOR Philippe "BooK" Bruhat, Ebook@cpan.orgE. =head1 SEE ALSO L, L. =head1 COPYRIGHT Copyright 2003-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/HeaderFilter000755001750001750 012130250124 17453 5ustar00bookbook000000000000HTTP-Proxy-0.300/lib/HTTP/Proxy/HeaderFilter/standard.pm000444001750001750 1057612130250124 21777 0ustar00bookbook000000000000package HTTP::Proxy::HeaderFilter::standard; use strict; use HTTP::Proxy; use HTTP::Headers::Util qw( split_header_words ); use HTTP::Proxy::HeaderFilter; use vars qw( @ISA ); @ISA = qw( HTTP::Proxy::HeaderFilter ); # known hop-by-hop headers my @hopbyhop = qw( Connection Keep-Alive Proxy-Authenticate Proxy-Authorization TE Trailers Transfer-Encoding Upgrade Proxy-Connection Public ); # standard proxy header filter (RFC 2616) sub filter { my ( $self, $headers, $message ) = @_; # the Via: header my $via = $message->protocol() || ''; if ( $self->proxy->via and $via =~ s!HTTP/!! ) { $via .= " " . $self->proxy->via; $headers->header( Via => join ', ', $message->headers->header('Via') || (), $via ); } # the X-Forwarded-For header $headers->push_header( X_Forwarded_For => $self->proxy->client_socket->peerhost ) if $message->isa( 'HTTP::Request' ) && $self->proxy->x_forwarded_for; # make a list of hop-by-hop headers my %h2h = map { (lc) => 1 } @hopbyhop; my $hop = HTTP::Headers->new(); my $client = HTTP::Headers->new(); $h2h{ lc $_->[0] } = 1 for map { split_header_words($_) } $headers->header('Connection'); # hop-by-hop headers are set aside # as well as LWP::UserAgent Client-* headers $headers->scan( sub { my ( $k, $v ) = @_; if ( $h2h{lc $k} ) { $hop->push_header( $k => $v ); $headers->remove_header($k); } if( $k =~ /^Client-/ ) { $client->push_header( $k => $v ); $headers->remove_header($k); } } ); # set the hop-by-hop and client headers in the proxy # only the end-to-end headers are left in the message $self->proxy->hop_headers($hop); $self->proxy->client_headers($client); # handle Max-Forwards if ( $message->isa('HTTP::Request') and defined $headers->header('Max-Forwards') ) { my ( $max, $method ) = ( $headers->header('Max-Forwards'), $message->method ); if ( $max == 0 ) { # answer directly TRACE ou OPTIONS if ( $method eq 'TRACE' ) { my $response = HTTP::Response->new( 200, 'OK', HTTP::Headers->new( Content_Type => 'message/http' , Content_Length => 0), $message->as_string ); $self->proxy->response($response); } elsif ( $method eq 'OPTIONS' ) { my $response = HTTP::Response->new(200); $response->header( Allow => join ', ', @HTTP::Proxy::METHODS ); $self->proxy->response($response); } } # The Max-Forwards header field MAY be ignored for all # other methods defined by this specification (RFC 2616) elsif ( $method =~ /^(?:TRACE|OPTIONS)/ ) { $headers->header( 'Max-Forwards' => --$max ); } } # no encoding accepted (gzip, compress, deflate) # if we plan to do anything with the response body $headers->remove_header( 'Accept-Encoding' ) if @{ $self->proxy->{body}{response}{filters} }; } 1; __END__ =head1 NAME HTTP::Proxy::HeaderFilter::standard - An internal filter to respect RFC2616 =head1 DESCRIPTION This is an internal filter used by HTTP::Proxy to enforce behaviour compliant with RFC 2616. =head1 METHOD This filter implements a single method that is called automatically: =over 4 =item filter() Enforce RFC 2616-compliant behaviour, by adding the C and C headers (except when the proxy was instructed not to add them), decrementing the C header and removing the hop-by-hop and L headers. Note that the filter will automatically remove the C headers if the proxy has at least one L filter. (This is to ensure that the filters will receive uncompressed data.) =back =head1 SEE ALSO L, L, RFC 2616. =head1 AUTHOR Philippe "BooK" Bruhat, Ebook@cpan.orgE. Thanks to Gisle Aas, for directions regarding the handling of the hop-by-hop headers. =head1 COPYRIGHT Copyright 2003-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/HeaderFilter/simple.pm000444001750001750 612312130250124 21441 0ustar00bookbook000000000000package HTTP::Proxy::HeaderFilter::simple; use strict; use Carp; use HTTP::Proxy::HeaderFilter; use vars qw( @ISA ); @ISA = qw( HTTP::Proxy::HeaderFilter ); my $methods = join '|', qw( begin filter end ); $methods = qr/^(?:$methods)$/; sub init { my $self = shift; croak "Constructor called without argument" unless @_; if ( @_ == 1 ) { croak "Single parameter must be a CODE reference" unless ref $_[0] eq 'CODE'; $self->{_filter} = $_[0]; } else { $self->{_filter} = sub { }; # default while (@_) { my ( $name, $code ) = splice @_, 0, 2; # basic error checking croak "Parameter to $name must be a CODE reference" unless ref $code eq 'CODE'; croak "Unkown method $name" unless $name =~ $methods; $self->{"_$name"} = $code; } } } # transparently call the actual methods sub begin { goto &{ $_[0]{_begin} }; } sub filter { goto &{ $_[0]{_filter} }; } sub end { goto &{ $_[0]{_end} }; } sub can { my ( $self, $method ) = @_; return $method =~ $methods ? $self->{"_$method"} : UNIVERSAL::can( $self, $method ); } 1; __END__ =head1 NAME HTTP::Proxy::HeaderFilter::simple - A class for creating simple filters =head1 SYNOPSIS use HTTP::Proxy::HeaderFilter::simple; # a simple User-Agent filter my $filter = HTTP::Proxy::HeaderFilter::simple->new( sub { $_[1]->header( User_Agent => 'foobar/1.0' ); } ); $proxy->push_filter( request => $filter ); =head1 DESCRIPTION L can create BodyFilter without going through the hassle of creating a full-fledged class. Simply pass a code reference to the C method of your filter to the constructor, and you'll get the adequate filter. =head2 Constructor calling convention The constructor is called with a single code reference. The code reference must conform to the standard C signature for header filters: sub filter { my ( $self, $headers, $message) = @_; ... } This code reference is used for the C method. =head1 METHODS This filter "factory" defines the standard L methods, but those are only, erm, "proxies" to the actual CODE references passed to the constructor. These "proxy" methods are: =over 4 =item filter() =item begin() =item end() =back Two other methods are actually L methods, and are called automatically: =over 4 =item init() Initalise the filter instance with the code references passed to the constructor. =item can() Return the actual code reference that will be run, and not the "proxy" methods. If called with any other name than C and C, it calls C instead. =back =head1 SEE ALSO L, L. =head1 AUTHOR Philippe "BooK" Bruhat, Ebook@cpan.orgE. =head1 COPYRIGHT Copyright 2003-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/Engine000755001750001750 012130250124 16322 5ustar00bookbook000000000000HTTP-Proxy-0.300/lib/HTTP/Proxy/Engine/NoFork.pm000444001750001750 262412130250124 20217 0ustar00bookbook000000000000package HTTP::Proxy::Engine::NoFork; use strict; our @ISA = qw( HTTP::Proxy::Engine ); __PACKAGE__->make_accessors( 'select' ); sub start { my $self = shift; my $proxy = $self->proxy; $self->select( IO::Select->new( $proxy->daemon ) ); # clients will not block the proxy by keeping the connection open $proxy->max_keep_alive_requests( 1 ); } sub run { my $self = shift; my $proxy = $self->proxy; # check for new connections for my $fh ( $self->select->can_read() ) { # there's only one, anyway # single-process proxy $proxy->serve_connections( $fh->accept ); $proxy->new_connection; } } 1; __END__ =head1 NAME HTTP::Proxy::Engine::NoFork - A basic, non forking HTTP::Proxy engine =head1 SYNOPSIS use HTTP::Proxy; my $proxy = HTTP::Proxy->new( engine => 'NoFork' ); =head1 DESCRIPTION The L engine runs the proxy without forking. =head1 METHODS =over 4 =item start() Initialise the engine. =item run() Implements the non-forking logic by calling C<< $proxy->serve_requests() >> directly. =back =head1 SEE ALSO L, L. =head1 AUTHOR Philippe "BooK" Bruhat, C<< >>. =head1 COPYRIGHT Copyright 2005-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/Engine/Legacy.pm000444001750001750 1020112130250124 20233 0ustar00bookbook000000000000package HTTP::Proxy::Engine::Legacy; use strict; use POSIX 'WNOHANG'; use HTTP::Proxy; our @ISA = qw( HTTP::Proxy::Engine ); our %defaults = ( max_clients => 12, ); __PACKAGE__->make_accessors( qw( kids select ), keys %defaults ); sub start { my $self = shift; $self->kids( [] ); $self->select( IO::Select->new( $self->proxy->daemon ) ); } sub run { my $self = shift; my $proxy = $self->proxy; my $kids = $self->kids; # check for new connections my @ready = $self->select->can_read(1); for my $fh (@ready) { # there's only one, anyway # single-process proxy (useful for debugging) if ( $self->max_clients == 0 ) { $proxy->max_keep_alive_requests(1); # do not block simultaneous connections $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", "No fork allowed, serving the connection" ); $proxy->serve_connections($fh->accept); $proxy->new_connection; next; } if ( @$kids >= $self->max_clients ) { $proxy->log( HTTP::Proxy::ERROR, "PROCESS", "Too many child process, serving the connection" ); $proxy->serve_connections($fh->accept); $proxy->new_connection; next; } # accept the new connection my $conn = $fh->accept; my $child = fork; if ( !defined $child ) { $conn->close; $proxy->log( HTTP::Proxy::ERROR, "PROCESS", "Cannot fork" ); $self->max_clients( $self->max_clients - 1 ) if $self->max_clients > @$kids; next; } # the parent process if ($child) { $conn->close; $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", "Forked child process $child" ); push @$kids, $child; } # the child process handles the whole connection else { $SIG{INT} = 'DEFAULT'; $proxy->serve_connections($conn); exit; # let's die! } } $self->reap_zombies if @$kids; } sub stop { my $self = shift; my $kids = $self->kids; # wait for remaining children # EOLOOP kill INT => @$kids; $self->reap_zombies while @$kids; } # private reaper sub sub reap_zombies { my $self = shift; my $kids = $self->kids; my $proxy = $self->proxy; while (1) { my $pid = waitpid( -1, WNOHANG ); last if $pid == 0 || $pid == -1; # AS/Win32 returns negative PIDs @$kids = grep { $_ != $pid } @$kids; $proxy->{conn}++; # Cannot use the interface for RO attributes $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", "Reaped child process $pid" ); $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", @$kids . " remaining kids: @$kids" ); } } 1; __END__ =head1 NAME HTTP::Proxy::Engine::Legacy - The "older" HTTP::Proxy engine =head1 SYNOPSIS my $proxy = HTTP::Proxy->new( engine => 'Legacy' ); =head1 DESCRIPTION This engine reproduces the older child creation algorithm of L. Angelos Karageorgiou C<< >> reports: I with the following trick:> max_keep_alive_requests(1); max_clients(120); $HTTP::VERSION(1.0); # just in case I Id requests!> =head1 METHODS The module defines the following methods, used by L main loop: =over 4 =item start() Initialise the engine. =item run() Implements the forking logic: a new process is forked for each new incoming TCP connection. =item stop() Reap remaining child processes. =back The following method is used by the engine internally: =over 4 =item reap_zombies() Process the dead child processes. =back =head1 SEE ALSO L, L. =head1 AUTHOR Philippe "BooK" Bruhat, C<< >>. =head1 COPYRIGHT Copyright 2005-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/Engine/Threaded.pm000444001750001750 437212130250124 20543 0ustar00bookbook000000000000package HTTP::Proxy::Engine::Threaded; use strict; use HTTP::Proxy; use threads; # A massive hack of Engine::Fork to use the threads stuff # Basically created to work under win32 so that the filters # can share global caches among themselves # Angelos Karageorgiou angelos@unix.gr our @ISA = qw( HTTP::Proxy::Engine ); our %defaults = ( max_clients => 60, ); __PACKAGE__->make_accessors( qw( kids select ), keys %defaults ); sub start { my $self = shift; $self->kids( [] ); $self->select( IO::Select->new( $self->proxy->daemon ) ); } sub run { my $self = shift; my $proxy = $self->proxy; my $kids = $self->kids; # check for new connections my @ready = $self->select->can_read(1); for my $fh (@ready) { # there's only one, anyway # single-process proxy (useful for debugging) # accept the new connection my $conn = $fh->accept; my $child=threads->new(\&_worker,$proxy,$conn); if ( !defined $child ) { $conn->close; $proxy->log( HTTP::Proxy::ERROR, "PROCESS", "Cannot spawn thread" ); next; } $child->detach(); } } sub stop { my $self = shift; my $kids = $self->kids; # not needed } sub _worker { my $proxy=shift; my $conn=shift; $proxy->serve_connections($conn); $conn->close(); return; } 1; __END__ =head1 NAME HTTP::Proxy::Engine::Threaded - A scoreboard-based HTTP::Proxy engine =head1 SYNOPSIS my $proxy = HTTP::Proxy->new( engine => 'Threaded' ); =head1 DESCRIPTION This module provides a threaded engine to L. =head1 METHODS The module defines the following methods, used by L main loop: =over 4 =item start() Initialize the engine. =item run() Implements the forking logic: a new process is forked for each new incoming TCP connection. =item stop() Reap remaining child processes. =back =head1 SEE ALSO L, L. =head1 AUTHOR Angelos Karageorgiou C<< >>. (Actual code) Philippe "BooK" Bruhat, C<< >>. (Documentation) =head1 COPYRIGHT Copyright 2010-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/Engine/ScoreBoard.pm000444001750001750 2012312130250124 21056 0ustar00bookbook000000000000package HTTP::Proxy::Engine::ScoreBoard; use strict; use POSIX ":sys_wait_h"; # WNOHANG use Fcntl qw(LOCK_UN LOCK_EX); use IO::Handle; use File::Temp; use HTTP::Proxy; our @ISA = qw( HTTP::Proxy::Engine ); our %defaults = ( start_servers => 4, # start this many, and don't go below max_clients => 12, # don't go above max_requests_per_child => 250, # just in case there's a leak min_spare_servers => 1, # minimum idle (if 0, never start new) max_spare_servers => 12, # maximum idle (should be "single browser max") verify_delay => 60, # minimum time between kids verification ); __PACKAGE__->make_accessors( qw( kids select status_read status_write scoreboard tempfile verify_live_kids_time last_active_time last_fork_time ), keys %defaults ); sub start { my $self = shift; $self->kids( {} ); # set up the communication pipe $self->status_read( IO::Handle->new() ); $self->status_write( IO::Handle->new() ); pipe( $self->status_read(), $self->status_write() ) or die "Can't create pipe: $!"; $self->status_write()->autoflush(1); $self->select( IO::Select->new( $self->status_read() ) ); setpgrp; # set as group leader # scoreboard information $self->verify_live_kids_time( time ); $self->last_active_time( time ); $self->last_fork_time( time ); $self->scoreboard( '' ); # lockfile information $self->tempfile( File::Temp->new( UNLINK => 0, TEMPLATE => 'http-proxy-XXXX' ) ); $self->proxy()->log( HTTP::Proxy::ENGINE, "ENGINE", "Using " . $self->tempfile()->filename() . " as lockfile" ); } my %status = ( A => 'Acccept', B => 'Busy', I => 'Idle' ); sub run { my $self = shift; my $proxy = $self->proxy(); my $kids = $self->kids(); ## first phase: update scoreboard if ( $self->select()->can_read(1) ) { $self->status_read()->sysread( my $buf, 50 ) > 0 # read first 10 changes or die "bad read"; # FIXME while ( length $buf ) { my ( $pid, $status ) = unpack "NA", substr( $buf, 0, 5, "" ); $proxy->log( HTTP::Proxy::ENGINE, 'ENGINE', "Child process $pid updated to $status ($status{$status})" ); $kids->{$pid} = $status; } $self->last_active_time(time); } { my $new = join "", values %$kids; if ( $new ne $self->scoreboard() ) { $proxy->log( HTTP::Proxy::ENGINE, 'ENGINE', "ScoreBoard = $new" ); $self->scoreboard($new); } } ## second phase: delete dead kids while ( ( my $kid = waitpid( -1, WNOHANG ) ) > 0 ) { $proxy->{conn}++; # Cannot use the interface for RO attributes $proxy->log( HTTP::Proxy::PROCESS, 'PROCESS', "Reaped child process $kid" ); $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", keys(%$kids) . " remaining kids: @{[ keys %$kids ]}" ); delete $kids->{$kid}; } ## third phase: verify live kids if ( time > $self->verify_live_kids_time() + $self->verify_delay() ) { for my $kid ( keys %$kids ) { next if kill 0, $kid; # shouldn't happen normally $proxy->log( HTTP::Proxy::ERROR, "ENGINE", "Child process $kid found missing" ); delete $kids->{$kid}; } $self->verify_live_kids_time(time); } ## fourth phase: launch kids my @idlers = grep $kids->{$_} eq "I", keys %$kids; if ( ( @idlers < $self->min_spare_servers() # not enough idlers or keys %$kids < $self->start_servers() # not enough overall ) and keys %$kids < $self->max_clients() # not too many please and time > $self->last_fork_time() # not too fast please ) { my $child = fork(); if ( !defined $child ) { $proxy->log( HTTP::Proxy::ERROR, "PROCESS", "Cannot fork" ); } else { if ($child) { $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", "Forked child process $child" ); $kids->{$child} = "I"; $self->last_fork_time(time); } else { # child process $self->_run_child(); exit; # we're done } } } elsif ( ( @idlers > $self->max_spare_servers() # too many idlers or @idlers > $self->min_spare_servers() # too many lazy idlers and time > $self->last_active_time + $self->verify_delay() ) and keys %$kids > $self->start_servers() # not too few please ) { my $victim = $idlers[ rand @idlers ]; $proxy->log( HTTP::Proxy::ENGINE, "ENGINE", "Killing idle child process $victim" ); kill INT => $victim; # pick one at random $self->last_active_time(time); } } sub stop { my $self = shift; my $kids = $self->kids(); my $proxy = $self->proxy(); kill 'INT' => keys %$kids; # wait for remaining children while (%$kids) { my $pid = waitpid( -1, WNOHANG ); next unless $pid; $proxy->{conn}++; # WRONG for this engine! delete $kids->{$pid}; $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", "Reaped child process $pid" ); $proxy->log( HTTP::Proxy::PROCESS, "PROCESS", keys(%$kids) . " remaining kids: @{[ keys %$kids ]}" ); } # remove the temporary file unlink $self->tempfile()->filename() or do { $proxy->log( HTTP::Proxy::ERROR, "ERROR", "Can't unlink @{[ $self->tempfile()->filename() ]}: $!" ); }; } sub _run_child { my $self = shift; my $proxy = $self->proxy(); my $daemon = $proxy->daemon(); my $status_write = $self->status_write(); open my $lockfh, $self->tempfile()->filename() or do { $proxy->log( HTTP::Proxy::ERROR, "ERROR", "Cannot open lock file: $!" ); exit; }; my $did = 0; # processed count while ( ++$did <= $self->max_requests_per_child() ) { flock $lockfh, LOCK_EX or do { $proxy->log( HTTP::Proxy::ERROR, "ERROR", "Cannot get flock: $!" ); exit; }; last unless $proxy->loop(); 5 == syswrite $status_write, pack "NA", $$, "A" # go accept or $proxy->log( HTTP::Proxy::ERROR, "ERROR", "status A: short write"); my $slave = $daemon->accept() or do { $proxy->log( HTTP::Proxy::ERROR, "ERROR", "Cannot accept: $!"); exit; }; flock $lockfh, LOCK_UN or do { $proxy->log( HTTP::Proxy::ERROR, "ERROR", "Cannot unflock: $!" ); exit; }; 5 == syswrite $status_write, pack "NA", $$, "B" # go busy or $proxy->log( HTTP::Proxy::ERROR, "ERROR", "status B: short write"); $slave->autoflush(1); $proxy->serve_connections($slave); # the real work is done here close $slave; 5 == syswrite $status_write, pack "NA", $$, "I" # go idle or $proxy->log( HTTP::Proxy::ERROR, "ERROR", "status I: short write"); } } 1; __END__ =head1 NAME HTTP::Proxy::Engine::ScoreBoard - A scoreboard-based HTTP::Proxy engine =head1 SYNOPSIS my $proxy = HTTP::Proxy->new( engine => 'ScoreBoard' ); =head1 DESCRIPTION This module provides a scoreboard-based engine to L. =head1 METHODS The module defines the following methods, used by L main loop: =over 4 =item start() Initialise the engine. =item run() Implements the forking logic: a new process is forked for each new incoming TCP connection. =item stop() Reap remaining child processes. =back =head1 SEE ALSO L, L. =head1 AUTHOR Philippe "BooK" Bruhat, C<< >>. Many thanks to Randal L. Schwartz for his help in implementing this module. =head1 COPYRIGHT Copyright 2005-2013, Philippe Bruhat. =head1 LICENSE This module is free software; you can redistribute it or modify it under the same terms as Perl itself. =cut HTTP-Proxy-0.300/lib/HTTP/Proxy/BodyFilter000755001750001750 012130250124 17160 5ustar00bookbook000000000000HTTP-Proxy-0.300/lib/HTTP/Proxy/BodyFilter/save.pm000444001750001750 3121612130250124 20634 0ustar00bookbook000000000000package HTTP::Proxy::BodyFilter::save; use strict; use HTTP::Proxy; use HTTP::Proxy::BodyFilter; use vars qw( @ISA ); @ISA = qw( HTTP::Proxy::BodyFilter ); use Fcntl; use File::Spec; use File::Path; use Carp; sub init { my $self = shift; # options my %args = ( template => File::Spec->catfile( '%h', '%P' ), no_host => 0, no_dirs => 0, cut_dirs => 0, prefix => '', filename => undef, multiple => 1, keep_old => 0, # no_clobber in wget parlance timestamp => 0, status => [ 200 ], @_ ); # keep_old and timestamp can't be selected together croak "Can't timestamp and keep older files at the same time" if $args{keep_old} && $args{timestamp}; croak "status must be an array reference" unless ref($args{status}) eq 'ARRAY'; croak "status must contain only HTTP codes" if grep { !/^[12345]\d\d$/ } @{ $args{status} }; croak "filename must be a code reference" if defined $args{filename} && !UNIVERSAL::isa( $args{filename}, 'CODE' ); $self->{"_hpbf_save_filename_code"} = $args{filename}; $self->{"_hpbf_save_$_"} = $args{$_} for qw( template no_host no_dirs cut_dirs prefix multiple keep_old timestamp status ); } sub begin { my ( $self, $message ) = @_; # internal data initialisation delete @{$self}{qw( _hpbf_save_filename _hpbf_save_fh )}; my $uri = $message->isa( 'HTTP::Request' ) ? $message->uri : $message->request->uri; # save only the accepted status codes if( $message->isa( 'HTTP::Response' ) ) { my $code = $message->code; return unless grep { $code eq $_ } @{ $self->{_hpbf_save_status} }; } my $file = ''; if( defined $self->{_hpbf_save_filename_code} ) { # use the user-provided callback $file = $self->{_hpbf_save_filename_code}->($message); unless ( defined $file and $file ne '' ) { $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save", "Filter will not save $uri" ); return; } } else { # set the template variables from the URI my @segs = $uri->path_segments; # starts with an empty string shift @segs; splice(@segs, 0, $self->{_hpbf_save_cut_dirs} >= @segs ? @segs - 1 : $self->{_hpbf_save_cut_dirs} ); my %vars = ( '%' => '%', h => $self->{_hpbf_save_no_host} ? '' : $uri->host, f => $segs[-1] || 'index.html', # same default as wget p => $self->{_hpbf_save_no_dirs} ? $segs[-1] || 'index.html' : File::Spec->catfile(@segs), q => $uri->query, ); pop @segs; $vars{d} = $self->{_hpbf_save_no_dirs} ? '' : @segs ? File::Spec->catfile(@segs) : ''; $vars{P} = $vars{p} . ( $vars{q} ? "?$vars{q}" : '' ); # create the filename $file = File::Spec->catfile( $self->{_hpbf_save_prefix} || (), $self->{_hpbf_save_template} ); $file =~ s/%(.)/$vars{$1}/g; } $file = File::Spec->rel2abs( $file ); # create the directory my $dir = File::Spec->catpath( (File::Spec->splitpath($file))[ 0, 1 ], '' ); if( ! -e $dir ) { eval { mkpath( $dir ) }; if ($@) { $self->proxy->log( HTTP::Proxy::ERROR, "HTBF::save", "Unable to create directory $dir" ); return; } $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save", "Created directory $dir" ); } # keep old file? if ( -e $file ) { if ( $self->{_hpbf_save_timestamp} ) { # FIXME timestamp } elsif ( $self->{_hpbf_save_keep_old} ) { $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save", "Skip saving $uri" ); delete $self->{_hpbf_save_fh}; # it's a closed filehandle return; } } # open and lock the file my ( $ext, $n, $i ) = ( "", 0 ); my $flags = O_WRONLY | O_EXCL | O_CREAT; while( ! sysopen( $self->{_hpbf_save_fh}, "$file$ext", $flags ) ) { $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save", "Too many errors opening $file$ext" ), return if $i++ - $n == 10; # should be ok now if( $self->{_hpbf_save_multiple} ) { $ext = "." . ++$n while -e $file.$ext; next; } else { $flags = O_WRONLY | O_CREAT; } } # we have an open filehandle $self->{_hpbf_save_filename} = $file.$ext; binmode( $self->{_hpbf_save_fh} ); # for Win32 and friends $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save", "Saving $uri to $file$ext" ); } sub filter { my ( $self, $dataref ) = @_; return unless exists $self->{_hpbf_save_fh}; # save the data to the file my $res = $self->{_hpbf_save_fh}->syswrite( $$dataref ); $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save", "syswrite() error: $!") if ! defined $res; # FIXME error handling } sub end { my ($self) = @_; # close file if( $self->{_hpbf_save_fh} ) { $self->{_hpbf_save_fh}->close; # FIXME error handling delete $self->{_hpbf_save_fh}; } } sub will_modify { 0 } 1; __END__ =encoding utf8 =head1 NAME HTTP::Proxy::BodyFilter::save - A filter that saves transfered data to a file =head1 SYNOPSIS use HTTP::Proxy; use HTTP::Proxy::BodyFilter::save; my $proxy = HTTP::Proxy->new; # save RFC files as we browse them $proxy->push_filter( path => qr!/rfc\d+.txt!, mime => 'text/plain', response => HTTP::Proxy::BodyFilter::save->new( template => '%f', prefix => 'rfc', keep_old => 1, ) ); $proxy->start; =head1 DESCRIPTION The L filter can save HTTP messages (responses or request) bodies to files. The name of the file is determined by a template and the URI of the request. Simply insert this filter in a filter stack, and it will save the data as it flows through the proxy. Depending on where the filter is located in the stack, the saved data can be more or less modified. This filter I create directories if it needs to! I Remember that the default C parameter for C is C and that you may need to change it for other MIME types. =head2 Constructor The constructor accepts quite a few options. Most of them control the construction of the filename that will be used to save the response body. There are two options to compute this filename: =over 4 =item * use a template =item * use your own filename creation routine =back The template option uses the following options: =over 4 =item B