WWW-Mechanize-1.86000755000765000024 013126734347 13254 5ustar00olafstaff000000000000tags100644000765000024 2665713126734347 14256 0ustar00olafstaff000000000000WWW-Mechanize-1.86!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ !_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ !_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ !_TAG_PROGRAM_NAME Exuberant Ctags // !_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ !_TAG_PROGRAM_VERSION 5.7 // AREA_CHECKS t/area_link.t /^AREA_CHECKS: {$/;" l AREA_CHECKS t/find_link.t /^AREA_CHECKS: {$/;" l AUTOCHECK_OFF t/autocheck.t /^AUTOCHECK_OFF: {$/;" l AUTOCHECK_ON t/autocheck.t /^AUTOCHECK_ON: {$/;" l BACK_TO_FIRST_PAGE t/cookies.t /^BACK_TO_FIRST_PAGE: {$/;" l BAD_PAGE t/local/failure.t /^BAD_PAGE: {$/;" l CHECK_DEATH t/die.t /^CHECK_DEATH: {$/;" l CHECK_LIVING t/die.t /^CHECK_LIVING: {$/;" l CLICK_BY_NAME t/local/click_button.t /^CLICK_BY_NAME: {$/;" l CLICK_BY_NUMBER t/local/click_button.t /^CLICK_BY_NUMBER: {$/;" l CLICK_BY_OBJECT_REFERENCE t/local/click_button.t /^CLICK_BY_OBJECT_REFERENCE: {$/;" l COOKIE_SHARING t/clone.t /^COOKIE_SHARING: {$/;" l DESTROY t/local/LocalServer.pm /^sub DESTROY {$/;" s EAT_THE_WARNING t/select.t /^EAT_THE_WARNING: { # Mech complains about the non-existent field$/;" l FIND_BY_CLASS t/find_link_id.t /^FIND_BY_CLASS: {$/;" l FIND_BY_ID t/find_link_id.t /^FIND_BY_ID: {$/;" l FIND_CLASS_BY_REGEX t/find_link_id.t /^FIND_CLASS_BY_REGEX: {$/;" l FIND_ID_BY_REGEX t/find_link_id.t /^FIND_ID_BY_REGEX: {$/;" l FIRST_COOKIE t/cookies.t /^FIRST_COOKIE: {$/;" l FIRST_FORM t/find_inputs.t /^FIRST_FORM: {$/;" l FIRST_GET t/local/reload.t /^FIRST_GET: {$/;" l FORMS lib/WWW/Mechanize.pm /^ FORMS: for my $form (@{ $self->forms }) {$/;" l FORWARD_TO_NONCOOKIE_PAGE t/cookies.t /^FORWARD_TO_NONCOOKIE_PAGE: {$/;" l GET_A_THIRD_COOKIE t/cookies.t /^GET_A_THIRD_COOKIE: {$/;" l GOOD_PAGE t/local/failure.t /^GOOD_PAGE: {$/;" l INITIAL_CLONE t/clone.t /^INITIAL_CLONE: {$/;" l INVALIDATE t/local/reload.t /^INVALIDATE: {$/;" l LANGUAGES t/live/wikipedia.t /^use constant LANGUAGES => qw( en it ja es nl pl );$/;" c LocalServer t/local/LocalServer.pm /^package LocalServer;$/;" p MY Makefile.PL /^sub MY::postamble {$/;" s MyMech t/local/overload.t /^ package MyMech;$/;" p NEW_API t/link.t /^NEW_API: {$/;" l NON_REGEX_STRING t/find_link-warnings.t /^NON_REGEX_STRING: {$/;" l NO_AGENT t/new.t /^NO_AGENT: {$/;" l NO_BASE t/link-base.t /^NO_BASE: {$/;" l NO_GET t/local/reload.t /^NO_GET: {$/;" l NO_STACK t/local/page_stack.t /^NO_STACK: {$/;" l OLD_API t/link.t /^OLD_API: {$/;" l PAIRS t/live/encoding.t /^use constant PAIRS => {$/;" c REGEX_STRING t/find_link-warnings.t /^REGEX_STRING: {$/;" l REGEX_USAGE t/find_link-warnings.t /^REGEX_USAGE: {$/;" l RELOAD t/local/reload.t /^RELOAD: {$/;" l RES_ON_NEW t/new.t /^RES_ON_NEW: {$/;" l SECOND_COOKIE t/cookies.t /^SECOND_COOKIE: {$/;" l SECOND_FORM t/find_inputs.t /^SECOND_FORM: {$/;" l SKIP t/area_link.t /^ SKIP: {$/;" l SKIP t/area_link.t /^SKIP: {$/;" l SKIP t/content.t /^SKIP: {$/;" l SKIP t/live/wikipedia.t /^SKIP: {$/;" l SKIP t/local/back.t /^SKIP: {$/;" l SKIP t/local/get.t /^SKIP: {$/;" l SKIP t/local/referer.t /^SKIP: {$/;" l SKIP t/local/reload.t /^SKIP: {$/;" l SKIP t/local/submit.t /^SKIP: {$/;" l SPACE_PADDED t/find_link-warnings.t /^SPACE_PADDED: {$/;" l STANDARD_STACK t/local/page_stack.t /^STANDARD_STACK: {$/;" l THIRD_FORM t/find_inputs.t /^THIRD_FORM: {$/;" l TestServer t/TestServer.pm /^package TestServer;$/;" p Tools t/Tools.pm /^package Tools;$/;" p UNKNOWN_ALIAS t/warnings.t /^UNKNOWN_ALIAS: {$/;" l URI lib/WWW/Mechanize/Image.pm /^sub URI {$/;" s URI lib/WWW/Mechanize/Link.pm /^sub URI {$/;" s USER_AGENT t/new.t /^USER_AGENT: {$/;" l WWW::Mechanize lib/WWW/Mechanize.pm /^package WWW::Mechanize;$/;" p WWW::Mechanize::Image lib/WWW/Mechanize/Image.pm /^package WWW::Mechanize::Image;$/;" p WWW::Mechanize::Link lib/WWW/Mechanize/Link.pm /^package WWW::Mechanize::Link;$/;" p _SUPER_put lib/WWW/Mechanize.pm /^sub _SUPER_put {$/;" s _check_unhandled_parms lib/WWW/Mechanize.pm /^sub _check_unhandled_parms {$/;" s _clean_keys lib/WWW/Mechanize.pm /^sub _clean_keys {$/;" s _die lib/WWW/Mechanize.pm /^sub _die {$/;" s _extract_forms lib/WWW/Mechanize.pm /^sub _extract_forms {$/;" s _extract_images lib/WWW/Mechanize.pm /^sub _extract_images {$/;" s _extract_links lib/WWW/Mechanize.pm /^sub _extract_links {$/;" s _image_from_token lib/WWW/Mechanize.pm /^sub _image_from_token {$/;" s _is_tainted lib/WWW/Mechanize.pm /^sub _is_tainted {$/;" s _link_from_token lib/WWW/Mechanize.pm /^sub _link_from_token {$/;" s _make_request lib/WWW/Mechanize.pm /^sub _make_request {$/;" s _match_any_image_parms lib/WWW/Mechanize.pm /^sub _match_any_image_parms {$/;" s _match_any_link_parms lib/WWW/Mechanize.pm /^sub _match_any_link_parms {$/;" s _modify_request lib/WWW/Mechanize.pm /^sub _modify_request {$/;" s _push_page_stack lib/WWW/Mechanize.pm /^sub _push_page_stack {$/;" s _reset_page lib/WWW/Mechanize.pm /^sub _reset_page {$/;" s _taintedness lib/WWW/Mechanize.pm /^sub _taintedness {$/;" s _update_page lib/WWW/Mechanize.pm /^sub _update_page {$/;" s _warn lib/WWW/Mechanize.pm /^sub _warn {$/;" s add_header lib/WWW/Mechanize.pm /^sub add_header {$/;" s agent_alias lib/WWW/Mechanize.pm /^sub agent_alias {$/;" s alt lib/WWW/Mechanize/Image.pm /^sub alt { return ($_[0])->{alt}; }$/;" s attrs lib/WWW/Mechanize/Link.pm /^sub attrs { return ($_[0])->[5]; }$/;" s back lib/WWW/Mechanize.pm /^sub back {$/;" s background t/TestServer.pm /^sub background {$/;" s base lib/WWW/Mechanize.pm /^sub base { my $self = shift; return $self->{base}; }$/;" s base lib/WWW/Mechanize/Image.pm /^sub base { return ($_[0])->{base}; }$/;" s base lib/WWW/Mechanize/Link.pm /^sub base { return ($_[0])->[4]; }$/;" s clear_credentials lib/WWW/Mechanize.pm /^sub clear_credentials {$/;" s click lib/WWW/Mechanize.pm /^sub click {$/;" s click_button lib/WWW/Mechanize.pm /^sub click_button {$/;" s clone lib/WWW/Mechanize.pm /^sub clone {$/;" s content lib/WWW/Mechanize.pm /^sub content {$/;" s content_type lib/WWW/Mechanize.pm /^sub content_type { my $self = shift; return $self->{ct}; }$/;" s cookieval t/cookies.t /^sub cookieval {$/;" s credentials lib/WWW/Mechanize.pm /^sub credentials {$/;" s creds_required t/local/LocalServer.pm /^sub creds_required {$/;" s critic Makefile.PL /^critic:$/;" l ct lib/WWW/Mechanize.pm /^sub ct { my $self = shift; return $self->{ct}; }$/;" s current_form lib/WWW/Mechanize.pm /^sub current_form {$/;" s delete_header lib/WWW/Mechanize.pm /^sub delete_header {$/;" s die lib/WWW/Mechanize.pm /^sub die {$/;" s dump_forms bin/mech-dump /^sub dump_forms {$/;" s dump_forms bin/mech-dump.orig /^sub dump_forms {$/;" s dump_forms lib/WWW/Mechanize.pm /^sub dump_forms {$/;" s dump_headers bin/mech-dump /^sub dump_headers {$/;" s dump_headers bin/mech-dump.orig /^sub dump_headers {$/;" s dump_headers lib/WWW/Mechanize.pm /^sub dump_headers {$/;" s dump_images bin/mech-dump /^sub dump_images {$/;" s dump_images bin/mech-dump.orig /^sub dump_images {$/;" s dump_images lib/WWW/Mechanize.pm /^sub dump_images {$/;" s dump_links bin/mech-dump /^sub dump_links {$/;" s dump_links bin/mech-dump.orig /^sub dump_links {$/;" s dump_links lib/WWW/Mechanize.pm /^sub dump_links {$/;" s dump_text bin/mech-dump /^sub dump_text {$/;" s dump_text lib/WWW/Mechanize.pm /^sub dump_text {$/;" s field lib/WWW/Mechanize.pm /^sub field {$/;" s find_all_images lib/WWW/Mechanize.pm /^sub find_all_images {$/;" s find_all_inputs lib/WWW/Mechanize.pm /^sub find_all_inputs {$/;" s find_all_links lib/WWW/Mechanize.pm /^sub find_all_links {$/;" s find_all_submits lib/WWW/Mechanize.pm /^sub find_all_submits {$/;" s find_image lib/WWW/Mechanize.pm /^sub find_image {$/;" s find_link lib/WWW/Mechanize.pm /^sub find_link {$/;" s follow_link lib/WWW/Mechanize.pm /^sub follow_link {$/;" s form_id lib/WWW/Mechanize.pm /^sub form_id {$/;" s form_name lib/WWW/Mechanize.pm /^sub form_name {$/;" s form_number lib/WWW/Mechanize.pm /^sub form_number {$/;" s form_with_fields lib/WWW/Mechanize.pm /^sub form_with_fields {$/;" s forms lib/WWW/Mechanize.pm /^sub forms {$/;" s get lib/WWW/Mechanize.pm /^sub get {$/;" s get_basic_credentials lib/WWW/Mechanize.pm /^sub get_basic_credentials {$/;" s get_output t/local/LocalServer.pm /^sub get_output {$/;" s handle_request t/TestServer.pm /^sub handle_request {$/;" s height lib/WWW/Mechanize/Image.pm /^sub height { return ($_[0])->{height}; }$/;" s hostname t/TestServer.pm /^sub hostname {$/;" s images lib/WWW/Mechanize.pm /^sub images {$/;" s import t/Tools.pm /^sub import {$/;" s is_html lib/WWW/Mechanize.pm /^sub is_html { my $self = shift; return defined $self->ct && ($self->ct eq 'text\/html'); }$/;" s known_agent_aliases lib/WWW/Mechanize.pm /^sub known_agent_aliases {$/;" s links lib/WWW/Mechanize.pm /^sub links {$/;" s main Makefile.PL /^package main;$/;" p name lib/WWW/Mechanize/Image.pm /^sub name { return ($_[0])->{name}; }$/;" s name lib/WWW/Mechanize/Link.pm /^sub name { return ($_[0])->[2]; }$/;" s new lib/WWW/Mechanize.pm /^sub new {$/;" s new lib/WWW/Mechanize/Image.pm /^sub new {$/;" s new lib/WWW/Mechanize/Link.pm /^sub new {$/;" s new t/TestServer.pm /^sub new {$/;" s nosend_cookies t/cookies.t /^sub nosend_cookies {$/;" s port t/local/LocalServer.pm /^sub port {$/;" s put lib/WWW/Mechanize.pm /^sub put {$/;" s quiet lib/WWW/Mechanize.pm /^sub quiet {$/;" s redirect_ok lib/WWW/Mechanize.pm /^sub redirect_ok {$/;" s reload lib/WWW/Mechanize.pm /^sub reload {$/;" s request lib/WWW/Mechanize.pm /^sub request {$/;" s res lib/WWW/Mechanize.pm /^sub res { my $self = shift; return $self->{res}; }$/;" s response lib/WWW/Mechanize.pm /^sub response { my $self = shift; return $self->{res}; }$/;" s root t/TestServer.pm /^sub root {$/;" s run t/TestServer.pm /^sub run {$/;" s save_content lib/WWW/Mechanize.pm /^sub save_content {$/;" s select lib/WWW/Mechanize.pm /^sub select {$/;" s send_cookies t/cookies.t /^sub send_cookies {$/;" s set_dispatch t/TestServer.pm /^sub set_dispatch {$/;" s set_fields lib/WWW/Mechanize.pm /^sub set_fields {$/;" s set_visible lib/WWW/Mechanize.pm /^sub set_visible {$/;" s slurp t/save_content.t /^sub slurp {$/;" s spawn t/local/LocalServer.pm /^sub spawn {$/;" s stack_depth lib/WWW/Mechanize.pm /^sub stack_depth {$/;" s status lib/WWW/Mechanize.pm /^sub status { my $self = shift; return $self->{status}; }$/;" s stop t/TestServer.pm /^sub stop {$/;" s stop t/local/LocalServer.pm /^sub stop {$/;" s submit lib/WWW/Mechanize.pm /^sub submit {$/;" s submit_form lib/WWW/Mechanize.pm /^sub submit_form {$/;" s success lib/WWW/Mechanize.pm /^sub success {$/;" s tag lib/WWW/Mechanize/Image.pm /^sub tag { return ($_[0])->{tag}; }$/;" s tag lib/WWW/Mechanize/Link.pm /^sub tag { return ($_[0])->[3]; }$/;" s tags Makefile.PL /^tags:$/;" l text lib/WWW/Mechanize.pm /^sub text {$/;" s text lib/WWW/Mechanize/Link.pm /^sub text { return ($_[0])->[1]; }$/;" s tick lib/WWW/Mechanize.pm /^sub tick {$/;" s title lib/WWW/Mechanize.pm /^sub title {$/;" s untick lib/WWW/Mechanize.pm /^sub untick {$/;" s update_html lib/WWW/Mechanize.pm /^sub update_html {$/;" s update_html t/local/overload.t /^ sub update_html {$/;" s uri lib/WWW/Mechanize.pm /^sub uri {$/;" s url lib/WWW/Mechanize/Image.pm /^sub url { return ($_[0])->{url}; }$/;" s url lib/WWW/Mechanize/Link.pm /^sub url { return ($_[0])->[0]; }$/;" s url t/local/LocalServer.pm /^sub url {$/;" s url_abs lib/WWW/Mechanize/Image.pm /^sub url_abs {$/;" s url_abs lib/WWW/Mechanize/Link.pm /^sub url_abs {$/;" s value lib/WWW/Mechanize.pm /^sub value {$/;" s warn lib/WWW/Mechanize.pm /^sub warn {$/;" s width lib/WWW/Mechanize/Image.pm /^sub width { return ($_[0])->{width}; }$/;" s INSTALL100644000765000024 220613126734347 14366 0ustar00olafstaff000000000000WWW-Mechanize-1.86This is the Perl distribution WWW-Mechanize. Installing WWW-Mechanize is straightforward. ## Installation with cpanm If you have cpanm, you only need one line: % cpanm WWW::Mechanize If it does not have permission to install modules to the current perl, cpanm will automatically set up and install to a local::lib in your home directory. See the local::lib documentation (https://metacpan.org/pod/local::lib) for details on enabling it in your environment. ## Installing with the CPAN shell Alternatively, if your CPAN shell is set up, you should just be able to do: % cpan WWW::Mechanize ## Manual installation As a last resort, you can manually install it. Download the tarball, untar it, then build it: % perl Makefile.PL % make && make test Then install it: % make install If your perl is system-managed, you can create a local::lib in your home directory to install modules to. For details, see the local::lib documentation: https://metacpan.org/pod/local::lib ## Documentation WWW-Mechanize documentation is available as POD. You can run perldoc from a shell to read the documentation: % perldoc WWW::Mechanize LICENSE100644000765000024 4367113126734347 14375 0ustar00olafstaff000000000000WWW-Mechanize-1.86This software is copyright (c) 2004-2016 by Andy Lester. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Terms of the Perl programming language system itself a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- This software is Copyright (c) 2004-2016 by Andy Lester. This is free software, licensed under: The GNU General Public License, Version 1, February 1989 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! --- The Artistic License 1.0 --- This software is Copyright (c) 2004-2016 by Andy Lester. This is free software, licensed under: The Artistic License 1.0 The Artistic License Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: - "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. - "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder. - "Copyright Holder" is whoever is named in the copyright or copyrights for the package. - "You" is you, if you're thinking about copying or distributing this Package. - "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) - "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as ftp.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) accompany any non-standard executables with their corresponding Standard Version executables, giving the non-standard executables non-standard names, and clearly documenting the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. 7. C or perl subroutines supplied by you and linked into this Package shall not be considered part of this Package. 8. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End Changes100644000765000024 15016213126734347 14675 0ustar00olafstaff000000000000WWW-Mechanize-1.86Revision history for WWW::Mechanize 1.86 2017-07-04 15:48:46Z [FIXED] - use 127.0.0.1 instead of 'localhost' in a test script to avoid the test hanging due to ipv6 issues (GH#31, see also changes in 1.85) 1.85 2017-06-28 22:06:00Z ======================================== [FIXED] - use 127.0.0.1 instead of 'localhost' in a test to avoid the test hanging due to ipv6 issues (GH#31) - Remove private logic for taint checking (Dave Doyle) - Fix Pod (simbabque) - Bump Test::More prereq to get working subtest support (Karen Etheridge) - Fix intermittent failures of taint.t (GH#108) (Kivanc Yazan) - Fix kwalitee issues (GH#107) (Kivanc Yazan) [ENHANCEMENTS] - Print section titles if mech-dump --all is invoked (GH#81) (Сергей Романов) - Add cookbook docs on dumping a req without sending it (#115) (Grigor Karavardanyan) - Document that submit only submits current form (GH#114) (nawglan) - Add Travis testing on Perl 5.26 (Karen Etheridge) - Remove obsolete and unincremented $VERSIONs in test modules (Karen Etheridge) 1.84 2017-03-07 13:34:57-05:00 America/Toronto [ENHANCEMENTS] - Parse url (href attribute) for js window.open GH#11 [FIXED] - Set STDOUT to be utf8 in mech-dump. Fixes issue GH#36 - Added --version option to mech-dump - Don't die on uri() when there has been no request. Fixes issue GH#60 - Remove old information from the cookbook. Fixes issue GH#28 - Documentation correction. Fixes issue GH#65 and GH#59 - Work around Test::More prior to 1.001004. Fixes GH#74 - Fix hostname in test. Fixes GH#73 1.83 2016-10-14 16:45:30-04:00 America/Toronto ======================================== [FIXED] - Moved live tests to be author tests. Run using dzil test --author. (Steve Scaffidi) 1.82 2016-10-06 23:00:30-04:00 America/Toronto ======================================== [ENHANCEMENTS] - Added strict_forms flag to submit_form() which sets the HTML::Form strict flag (Gareth Tunley) [FIXED] - Fixed tests which tried to access HTTPS urls when LWP::Protocol::https wasn't installed (Olaf Alders). Reported by Slaven Rezić. See https://github.com/libwww-perl/WWW-Mechanize/issues/54 1.81 2016-10-06 08:52:44-04:00 America/Toronto ======================================== [FIXED] - Work around bug in HTTP::Cookies that is triggered on reload(). See https://rt.cpan.org/Public/Bug/Display.html?id=75897 (Gianni Ceccarelli) 1.80 2016-09-24 22:38:27-04:00 America/Toronto ======================================== [FIXED] - Fixes behaviour of submit_form() when multiple filters have been supplied (Ed Avis) 1.79 2016-09-16 23:53:48-04:00 America/Toronto ======================================== [ENHANCEMENTS] - Added form_with() method. (Martin Sluka) 1.78 2016-08-08 09:18:59-04:00 America/Toronto ======================================== [OTHER CHANGES] - No changes specific to this version. First non-develepment release in about a year. 1.77 2016-08-05 12:50:12-04:00 America/Toronto (TRIAL RELEASE) ======================================== [TESTS] - Skip Wikipedia tests if LWP::Protocol::https is not installed. 1.76 2016-07-29 12:17:25-04:00 America/Toronto (TRIAL RELEASE) ======================================== [ENHANCEMENTS] - Added history() and history_count() methods. (Ricardo Signes) - click_button() now accepts ids. (Olaf Alders) - Add a more descriptive error message when ->request is called without a parameter. (Max Maischein) [DOCUMENTATION] - Document that form_id warns in addition to returning undef when a form cannot be found. (Olaf Alders) - Document use of a proxy with bin/mech-dump. (Florian Schlichting) [OTHER CHANGES] - New releases for this distribution are now generated by Dist::Zilla 1.75 2015-06-03 ======================================== [OTHER CHANGES] - WWW::Mechanize::Image and WWW::Mechanize::Link now have a defined $VERSION - fixed warning about the use of the encoding pragma (new in 5.22) (RT#91971) - fixed warning about the use of CGI::param in list context (RT#103096) 1.74 2015-01-23 ======================================== [OTHER CHANGES] - updated repository link in metadata 1.73 2013-08-24 ======================================== [TESTS] - Update t/local/back.t to use LocalServer for 404 checking to avoid fails on win32. Fix by Matt S Trout, patient diagnostics and testing provided by jayefuu of freenode #perl - Blow away more proxy env vars in LocalServer, and do it on load so that the LWP env checking doesn't happen before we've done it. [OTHER CHANGES] - Better error when passing only one parameter to follow_link 1.72 Thu Feb 2 18:37:28 EST 2012 ======================================== [DEPENDENCIES] Bumped the HTML::Form dependency to fix failures on CentOS 5 1.71 Tue Nov 14 13:50:41 EDT 2011 ======================================== [ENHANCEMENTS] Recognise application/xhtml+xml as HTML. [DOCUMENTATION] Improved docs about support of JavaScript Typo fixes. [TESTS] Updated tests as oops-music.com is in utf-8 now 1.70 Fri Aug 26 13:46:30 EDT 2011 ======================================== [ENHANCEMENTS] Mech now defaults to _not_ running live tests by default. You can still enable them by running "perl Makefile.PL --live" Thanks to RJBS for the suggestion 1.69_01 ======================================== [INTERNALS] The test suite for the local tests was updated 1.68 Fri Apr 22 01:10:40 EST 2011 ======================================== No changes from 1.67_01 1.67_01 ======================================== [ANNOUNCE] As of this release, Jesse Vincent has taken over maintenance of WWW-Mechanize. The project's repository can be found at: https://github.com/bestpractical/www-mechanize [FIXED] Added prereq for HTML::TreeBuilder. 1.66 Fri Sep 10 16:25:44 CDT 2010 ======================================== [FIXED] Fixed prerequisites on HTTP::Server::Simple on Windows. DNS checks in t/autocheck.t and t/local/failure.t improved. Thanks, Schwern. [ENHANCEMENTS] New $mech->text method returns the text from your HTML page. The exact rendering of this text is simply removing all the HTML tags, but this will change. It's pretty ugly. If anyone wants to work on a better-looking text dump, I'd love to see it. Added mech-dump --text. [DOCUMENTATION] Improvements to the docs explaining explicitly about the subclassed methods we inherit from LWP::UserAgent. Thanks, Lyle Hopkins! 1.64 Thu Jul 1 10:41:00 CDT 2010 ======================================== [THINGS THAT MAY BREAK YOUR CODE] If you've been accessing $mech->{forms} or $mech->{form} values directly, instead of going through the $mech->forms or $mech->current_form accessors, respectively, then this version of Mech will break your code. [ENHANCEMENTS] Parsing of forms has been delayed until they're actually needed. If don't use forms on a page, you'll no longer waste time and memory parsing them. $mech->title now caches the title of the page after parsing the page to find it. mech-dump now takes a --cookie-file parameter for keeping cookies between calls. Thanks, Damien Clark. [DOCUMENTATION] Typo fixes. 1.62 Sat Apr 10 23:10:07 CDT 2010 ======================================== [FIXED] Fixed a declaration in the Movable Type example in WWW::Mechanize::Examples. Quiet warnings if %ENV has undef values. $mech->follow_link() no longer dies with an inappropriate error if the link is not found. $mech->click_button() now checks to see if a form is selected. [INCOMPATIBILITIES] $mech->form_name() and $mech->form_number() no longer throw warnings if they can't find the form specified. They still return undef, though. [DOCUMENTATION] More additions to the FAQ. 1.60 Mon Aug 17 00:41:39 CDT 2009 ======================================== No new features. Exists only to skip tests that always fail on Windows. Fixed up some minor documentation problems. 1.58 Mon Jul 13 22:32:23 CDT 2009 ======================================== No new features. If you have 1.56 installed OK, you do NOT need to install 1.58. [FIXES] Removed prereq of HTTP::Response::Encoding, even though it was never used. Thanks for the catch, Gisle. 1.56 Thu Jul 9 00:36:54 CDT 2009 ======================================== [THINGS THAT MAY BREAK YOUR CODE] For a while, Mech used HTTP::Response::Encoding to try to suss out the proper encoding of the page it receives. Now, it lets LWP::UserAgent do the work, and no longer requires HTTP::Response::Encoding. [ENHANCEMENTS] Added a new dump_headers() method to dump the HTTP response headers. Added --headers flag to mech-dump to dump the HTTP response headers. [FIXES] Now requires LWP version 5.829 because HTTP::Response has memory cycle bugs. [DOCUMENTATION] Added a few notes to the FAQ, and fixed some incorrect docs. 1.55_01 Mon Jul 6 12:17:10 CDT 2009 ======================================== This is mostly a bug fix release. There will be a number of other bug fix releases in the next few days. [FIXED] New test server now randomizes the port it runs on. t/cookies.t should not hang on Windows any more. META.yml has been updated so the search.cpan.org links should be correct. Passing no_proxy would make LWP::UserAgent barf. Thanks to Mike Schilli for the fix. Cookies test would fail under Windows. Fixed, thanks to many people reporting it. [ENHANCEMENTS] $mech->submit_form() now can specify the form by ID using the form_id parameter. [DOCUMENTATION] The docs used to say that ->stack_depth(0) was an infinite stack size. This is wrong. Zero will tell Mech not to keep any history. 1.54 Mon Jan 12 00:36:08 CST 2009 ======================================== [FIXED] Removed the computers4sure test that was failing. 1.52 Tue Nov 25 09:52:30 CST 2008 ======================================== [FIXED] Improved some error messages in $mech->submit_form(). Thanks to Norbert Buchmuller. 1.51_03 Thu Nov 20 11:05:49 CST 2008 ======================================== [FIXED] The $mech->clone() method was not passing the cookie jar to its clone properly. Thanks to David Sainty. The $mech->back() can fail if there's nothing on the stack to go back to. Thanks to Dave Page. $mech->follow_link() did not complain if a link could not be found, even with autocheck on. Now it does. Thanks, Flavio Poletti. [ENHANCEMENTS] Added a $mech->form_id() method so you can look up forms by ID. Added $mech->content_type(), because $mech->ct() is too cryptic. 1.51_02 Tue Nov 18 01:30:54 CST 2008 ======================================== [STILL BROKEN] t/local/click_button.t is still failing its tests for calling ->click on an HTML::Form object. I suspect this is an LWP change, but I haven't dug into it enough yet. [FIXES] Fixed the bad credentials API that stomped on LWP::UserAgent's credentials() method. Thanks to Max Maschien and Matt Lawrence. The $mech->links method now finds links. Thanks to H.Merijn Brand. Makefile.PL explicitly requires Perl 5.8.0. URI.pm has to be version 1.36 or else URIs don't get encoded correctly. LWP has to be 5.819 or we have encoding problems. 1.51_01 Thu Nov 6 15:13:03 CST 2008 ======================================== [FIXES] Page history is now working much better. The $mech->back() method should behave more like a browser now. Most notably, it no longer restores the cookie state, just like your browser doesn't restore cookie state when you page back. It also should use much less memory. 1.50 Sun Sun Oct 26 22:42:46 CDT 2008 ======================================== [THINGS THAT MAY BREAK YOUR CODE] WWW::Mechanize now requires version 5.815 of LWP. This in itself may cause problems for you because of changes in how LWP does authentication. 1.49_01 Sat Sep 27 23:50:04 CDT 2008 ======================================== [THINGS THAT MAY BREAK YOUR CODE] The autocheck argument to the constructor is now ON by default, unless WWW::Mechanize is being subclassed. There are so many new programmers whose ->get() calls fail unchecked that I'm now putting on the seat belts for them. [FIXES] I do believe we are on the way to having all the encoding problems ironed out. This version incorporates a patch from here: http://code.google.com/p/www-mechanize/issues/detail?id=61 and tests from Miyagawa's WWW::Mechanize::DecodedContent http://search.cpan.org/dist/WWW-Mechanize-DecodedContent/ to finally fix this. [ENHANCEMENTS] You can now specify not to set up the proxy, if there is one. The proxy causes problems for Crypt::SSLeay. For details see: http://code.google.com/p/www-mechanize/issues/detail?id=39 [DOCUMENTATION] Fixed internal links. [INTERNALS] Lots of refactoring based on Schwern's "Skimmable Code" talk. http://use.perl.org/~schwern/journal/36704 http://schwern.org/~schwern/talks/Skimmable%20Code%20-%20YAPC-NA-2008.pdf 1.34 Mon Dec 10 00:30:39 CST 2007 ======================================== [FIXES] Many fixes to make the test suite more portable. 1.32 Tue Oct 30 12:02:17 CDT 2007 ======================================== [ENHANCEMENTS] Added dump methods to mirror mech-dump: * $mech->dump_images() * $mech->dump_links() * $mech->dump_forms() * $mech->dump_all() Sanity checks in the WWW::Mechanize::Image constructor. Every Image must have a "url" and "tag" field passed in to it. 1.31_02 Thu Oct 25 11:48:29 CDT 2007 ======================================== [ENHANCEMENTS] Added class, class_regex, id and id_regex limiters to find_link() and find_all_links(). Thanks to Adriano Ferreira. 1.31_01 Mon Sep 17 23:38:03 CDT 2007 ======================================== [FIXES] Mech tests now pass even if your DNS server gives A records for anything (like OpenDNS). Thanks, Miyagawa! Searching for the is now case-inensitive. A better solution would be to actually parse the HTML. [ENHANCEMENTS] mech-dump now handles --user and --password arguments for sites that require authentication. 1.30 Thu May 24 21:31:10 CDT 2007 ======================================== [DOCUMENTATION] Minor doc fixes. Thanks David Steinbrunner. 1.29_01 Tue May 22 14:02:55 CDT 2007 ======================================== Kevin Falcone and I ask for your assistance in figuring out how to handle the warnings thrown by the tests, other than hiding them. [FIXES] Overhauled how tainting was done. Stole code directly from Test::Taint. Have LWP only handle decoding of Content-Encoding, not charset. [DOCUMENTATION] Fixed the docs for $mech->submit_form()'s with_fields arg. Thanks, Peteris Krumins. 1.26 Wed May 16 14:21:29 CDT 2007 ======================================== [FIXES] Re-reversed the content decoding. This is critical for reading from sites with gzip on the fly, like Wikipedia. Content is now properly tainted. [ENHANCEMENTS] mech-dump can now pass --agent and --agent-alias flags so you can fetch from sites like Wikipedia that block LWP user agents. [INSTALLATION] The mech-dump program is now always installed. It no longer is presented as an option. 1.24 Fri May 11 15:57:56 CDT 2007 ======================================== NOTE: Version 1.24 will NOT automatically decode gzipped content for you any more. Consider it a "do not use" release. [FIXES] * Fixed failures in "make test" with some versions of HTTP::Server::Simple * RT #26593: Improved handling of charsets. Thanks Kevin Falcone. * RT #24354: find_link now handles http-equivs with quoted URLs. * Reverses the change in 1.21_01 where it decodes the content. [ENHANCEMENTS] * Added find_all_inputs() and find_all_submits() methods. Thanks, Mike O'Regan. * Test::LongString is no longer needed, so has been removed as a requirement. [TESTS] * Added a test for save_content() 1.22 Fri Mar 2 00:05:57 CST 2007 ======================================== [INTERNALS] Added new tests. Added Perl::Critic changes and a perlcriticrc file. 1.21_04 Sat Oct 7 21:35:42 CDT 2006 ======================================== [FIXES] * $mech->content( type => 'text' ) was not freeing memory. Thanks to Cat Okita for finding it. [INTERNALS] * Made the order of parms to $mech->content() not relevant. 1.21_03 Sat Oct 7 01:21:46 CDT 2006 ======================================== [THINGS THAT MAY BREAK YOUR CODE] * The methods $mech->form() and $mech->follow() have been removed. They've been deprecated since 1.10, which was released in Feb 2005. [ENHANCEMENTS] * I'm trying to nail down what seems to be a memory leak on long-running Mech programs. I'm stringifying URI::URL objects wherever I can. [INTERNALS] * No longer uses UNIVERSAL. 1.21_02 Wed Oct 4 13:14:30 CDT 2006 ======================================== [ENHANCEMENTS THAT MAY BREAK YOUR CODE] * The $mech->stack_depth() setting had no way to say "don't cache any pages at all". How silly! Now, if you set $mech->stack_depth(0), no history of pages will be kept. In the past, it would mean "Keep all pages." This means that if you want to set it to keep all pages, set it to some ridiculously large number. [DOCUMENTATION] * The docs previously refered to Compress::Gzip instead of Compress::Zlib. 1.21_01 Mon Sep 18 17:18:43 CDT 2006 ======================================== [ENHANCEMENTS] * If Compress::Zlib is installed, gzipped content is now accepted and transparently decoded. No additional syntax needed! This should save time and bandwidth in a number of cases. (Mark Stosberg) * Added a put() method. It also calls a subfunction called _SUPER_put that will be removed once LWP::UserAgent supports put(). 1.20 Sat Aug 19 09:09:08 EDT 2006 [ENHANCEMENTS] * Added new two-argument form of credentials() method. $mech->credentials($username, $password); That provides simpler visiting of password-protected resources in the vast majority of cases and still allows the other cases to be supported. (Peter Scott) [BUG FIXES] * autocheck no longer is triggered when informational responses are returned. (Mark Stosberg) [INTERNALS] * test suite no longer fails when Test::Warn is missing. (CPAN testers, Mark Stosberg) * Removed all the testing against live sites. The networking code is not actually in Mech anway, and they were prone to breaking, as the live sites changed. (Mark Stosberg) 1.19_02 Mon Aug 7 23:57:56 CDT 2006 [ENHANCEMENTS] * Add new Do-What-I-Mean submit_form() option. $mech->submit_form( with_fields => \%data ); That expresses that you want to select the first form contains all fields in \%data, and then submit the data to that form. See the docs for form_with_fields() and submit_form() for details. (Mark Stosberg, inspired by RT#6100) [BUG FIXES] * The behavior of clone() now copies over the cookie jar, which is probably what you expected it did in the first place. This fixes bug RT#13541 filed against Test::WWW::Mechanize, which was using clone() internally. (Mark Stosberg) * The correct URL is returned after redirecting. This a regression from 1.04 and was reported as RT#9059, RT#12882, and RT#12786. The documentation about this has also been clarified that we return a URI object, but that it stringifies to the URI itself. [DOCUMENTATION] * Fixed a misleading parm in the constructor. * Document the return value of set_visible (RT#6071, MJD, Mark Stosberg) * Document that form_name and form_number return an HTML::Form object (Mark Stosberg) [INTERNALS] * Made lots of little cleanups based on Perl::Critic * Fix Taint-mode warnings with Perl 5.6.1 (RT#16945) 1.18 Thu Feb 2 00:11:26 CST 2006 [TESTS] * Makefile.PL now takes four new parms: * --live/nolive turns on/off the live tests * --local/nolocal turns on/off the local tests * --mech-dump/nomech-dump installs/doesn't the mech-dump program * --all turns on all tests and installs mech-dump * Fixed some failures in tests. Non-existent URLs now have a "." postpended to them, so if someone's got a search domain with a wildcard (i.e. ignore.us) it'll ignore that. Also, Google's second link is now a https:// link, which some Mechs can't handle. Added a 'url_regex' which now makes it look at the second non-https link. Thanks to Pete Krawczyk. 1.16 Fri Oct 28 17:34:20 CDT 2005 [ENHANCEMENTS] * Sped up Mech significantly (~20% in some cases). Images and links are extracted from the HTML, and objects are created, only when they're actually needed. This will be a speedup for pages where you're only following links, or vice versa. [THINGS THAT MAY BREAK YOUR CODE] * If you've been relying on the $mech->{images} and $mech->{links} fields being populated so that you can bypass the $mech->images() and $mech->links() accessors, your code will break. That's OK, because you should have been using the accessors all along. 1.14 Tue Aug 30 17:17:40 CDT 2005 [DOCUMENTATION] * Added lots of new FAQs. Thanks to Peter Stevens. [INTERNALS] * Now requires Test::LongString. That's not too odious. [FIXES] * Tests now pass with the shuffling around that Google did. 1.13_01 Tue Apr 12 14:11:18 CDT 2005 [ENHANCEMENTS] * Now dies if you call submit_form() with a non-existing form_number or form_name. Before, it would just warn. [DOCUMENTATION] * Added an example of using credentials() in the cookbook. 1.12 Thu Feb 24 23:38:44 CST 2005 [FIXES] * Fixed RT #9026: hang in t/local/back.t under Windows XP. Thanks Andrew Savige. It also should no longer complain about being unable to clean up a temp file. 1.11_01 Mon Feb 14 00:12:48 CST 2005 [THINGS THAT MAY BREAK YOUR CODE] * Removed deprecated _parse_html() method. [FIXES] * Was incorrectly looking for INPUT tags TYPE="SUBMIT" as images. Thanks to Abe Timmerman. [ENHANCEMENTS] * Calling $mech->set_fields() with no current form now dies. Thanks to Julien Beasley. 1.10 Tue Jan 31 11:30pm-ish [FIXES] * Fixed bug where images inside of links would not be found. * Fixed test failures because of Google changes. Thanks to Offer Kaye and others who sent in patches. [DOCUMENTATION] * More samples in the FAQ. Thanks to Joshua Gatcomb. [INTERNALS] * Added explanation of running live tests against Google in Makefile.PL. 1.08 Fri Dec 24 01:01:06 CST 2004 [ENHANCEMENTS] * Added find_image() and find_all_images(). 1.06 Wed Dec 8 14:58:39 CST 2004 [INTERNALS] * Now uses the base pragma instead of setting @ISA. 1.05_04 Fri Nov 5 23:35:38 CST 2004 [ENHANCEMENTS] * Added WWW::Mechanize::Image object for representing images. * Improved the regex on the URL for META tags. * Added --images flag to mech-dump. [FIXES] * When parsing urls out of meta refresh tags, "url" may now be uppercase (RT#8230) * Behavior of back() fixed in a number of cases (RT#8109 reported by Josh Purinton, patched by Dominique Quatravaux) [INTERNALS] * Mark figured out to how to prevent his text editor from putting tabs into the code. Andy's blood pressure dropped slightly. 1.05_03 Sun Oct 31 20:54:33 CST 2004 [ENHANCEMENTS] * click_button() has a new input option for HTML::Form::SubmitInput objects (DOMQ) * content() has new options to return the page formatted as text, with a added. (RT#8087, patch by Dominique Quatravaux) * update_html() method has been added, which can be used to modify the HTML that Mech parses. It should be sub-classed instead of _parse_html(), which has been deprecated. (RT#8087, patch by Dominique Quatravaux) * select() has new option to select an option by number (RT#5789, Scott Lanning) * WWW::Mechanize::Link now has support providing all the attributes of the link through a new attrs() method, which returns them as a hashref. This is a replacement for the alt() method, added in 1.05_01. It's not backwards compatible with that, but, hey, that's what developer releases are for. (RT#8092, Rob Casey and Mark Stosberg) [FIXES] * Upload does not use the default value to prevent attacks, patch by Jan Pazdziora (RT #7843). [INTERNALS] * Improved tests and documentation for select() (RT#5789, Scott Lanning) * Improve taint-safeness on Perl 5.6.1 (RT#8042, patch by Dominique Quatravaux) * Added tests for click_button() (RT#8061, by Dominique Quatravaux) * Require URI 1.25, fixing bug which exposed itself in WWW::Mechanize (RT#3048) * Move select() to better location in docs. Document and test the return values. The return value is now "1" on success instead of the undocumented behavior of returning a form value. (RT#6138, spotted by MJD, patched by Mark Stosberg) * Possible matching tags for the find_link() 'tag_regex' attribute are now documented. (RT#2989, by Mark Stosberg) * refactored find_link() to avoid use of eval(). This should improve performance a bit and avoid potential security issues. (Mark Stosberg) 1.05_02 Sat Oct 2 16:55:59 CDT 2004 [ENHANCEMENTS] * Added the $mech->save_content( $filename ) function, so you can dump stuff to files easily. 1.05_01 Thu Sep 30 21:04:44 CDT 2004 [FIXES] * set_visible() doesn't stop setting values when it finds a zero. [ENHANCEMENTS] * WWW::Mechanize::Link has a new, easier to remember constructor interface. The old one is still supported. Support for including an 'alt' attribute was added, which is useful for links. (RT #3317). Thanks to Mark Stosberg. * When links are extracted from tags, the ALT attribute will be captured and become part of the WWW::Mechanize::Link object. (RT #3317). Patch by Mark Stosberg. [INTERNALS] * t/mech-dump.t is now more portable (RT #7690) * t/local/follow.t has new tests to confirm that 'follow*' functions work with characters like o-umlaut, even when the o-umlaut is encoded in the HTML, but not in the call to follow(). (RT #2416) By Mark Stosberg. 1.04 Wed Sep 15 23:27:53 CDT 2004 [ENHANCEMENTS] * $mech->get() now accepts a WWW::Mechanize::Link object. * $mech->stack_depth(n) lets you set the depth of the mech object's page stack. This way, if you have a Mech that does lots of stuff and never/rarely goes back(), you won't be eating up memory. Thanks to BooK and Chi-Fung. (RT #5362) [FIXES] * Fixed tests that fail under LWP >= 5.800. * Added a workaround for LWP::UserAgent->clone() when ->{proxy} is undef. (RT #6443) * The Referer was getting passed as a URI object sometimes, and that caused sadness. Eugene Haimov supplied a workaround. (RT #6372) [DOCUMENTATION] * Added Ian Langworth's listmod and John Beppu's photobucket uploader programs to WWW::Mechanize::Examples. * Minor doc tweak for find_link() * Finally added a value() func. Thanks to Spoon, who even now, months after his passing, is still contributing to Mechanize. 1.02 Tue Apr 13 22:45:10 CDT 2004 No reason to install if you have 1.00. Fixes are only in tests. [FIXES] * t/referer.t didn't cope with spaces in $FindBin::Bin. Plus, it now forces its URL to localhost. 1.00 Sat Apr 10 00:35:51 CDT 2004 I figure it's about time we hit 1.00, and this version seems like a good place to do it, because of the potential breakage described below... [THINGS THAT WILL BREAK YOUR CODE] * Header handling has changed. There is no more package variable %headers that holds all the headers to be added. They are now added on a per-object basis. If you were adding a header with add_header(), and the code relied on that header still being set later on in a later instance of the class, that code will now break, because the later instance won't have the header set. [ENHANCEMENTS] * You can now prevent a header from being sent by adding it with an undef value, as in: $mech->add_header( Referer => undef ); [FIXES] * Now correctly adds Accept-Encoding to all requests that need it. [INTERNALS] * Added new $mech->_modify_request($req) method to do all the HTTP header modification before the actual request gets sent off. Subclasses are able to override it if they want. * Removed the unused Compress::Zlib stuff. 0.76 Wed Apr 7 22:01:43 CDT 2004 [ENHANCEMENTS] * Added update_html() to let you update the HTML for the page you're on. [FIXES] * Test files account for new Google layout. [INTERNALS] * Rearranged the local tests into their own t/local/ directory. * Made the standalone tests show what server they're hitting. * Checked that it runs under LWP 5.78. 0.74 Mon Mar 22 23:36:46 CST 2004 [ENHANCEMENTS] * WWW::Mechanize now sends an Accept-Encoding header of "identity" to always enforce plaintext responses. Preliminary support for Compress::Zlib is also there, but is disabled by default. * Added click_button() and select() methods. The field() method can now take an arrayref of values, if appropriate. Thanks, Linda Lee Julien. * Added url_abs and url_abs_regex parms to find_all_links(). * URLs in META REFRESH tags are now treated as links. * t/taint.t makes sure that things that should be tainted are. [FIXES] * Still more fixes if the machine you're on doesn't have DNS pointing to it. * The local changes use localhost as the local host name, instead of whatever host name that might be on the box, but not in DNS. Thanks to David Wheeler for letting me play on his box. * The http_proxy and HTTP_PROXY environment variables get deleted during the tests that access the dummy local server. This should let your tests pass, and clear up a lot of RT tickets. 0.72 Mon Jan 26 21:07:20 CST 2004 [ENHANCEMENTS] * Added the set_visible() method, thanks to Peter Scott. [DOCUMENTATION] * Started the Cookbook at WWW::Mechanize::Cookbook.pod. [INTERNALS] * Made the globbing in Makefile.PL a little less command-line intensive. Also fixed the missing files in MANIFEST. * Added t/pod-coverage.t for testing POD coverage. 0.71_02 Mon Dec 22 14:29:13 CST 2003 [THINGS THAT MAY BREAK YOUR CODE] * Added a 5th, optional parameter to WWW::Mechanize::Link's constructor. In 0.71_01, it was at the beginning of the argument list and was required. Now it's at the end and is optional. If, in the 15 hours since 0.71_01 came out, you went and changed all your WWW::Mechanize::Link constructors, you'll have to change them around again. Otherwise, you can just ignore this change. 0.71_01 Sun Dec 21 23:48:12 CST 2003 [THINGS THAT MAY BREAK YOUR CODE] * WWW::Mechanize::Link's constructor has a new argument that needs to be passed in, at the start of the argument list. [ENHANCEMENTS] * WWW::Mechanize::Link object now takes a $base URL, and will return absolute URLs with the url_abs() method. Thanks to Ashley Pond. * Added another script to WWW::Mechanize::Examples. It's a script that didn't make it into Spidering Hacks. [INSTALL & TESTS] * Heavy use of the new Test::Memory::Cycle module. * Fixed Makefile.PL so that the tests are selected under Win32. * Changed t/mech-dump.t so that the test succeeds under Win32. * Updated t/referer.t and t/mech-dump.t so they run under VMS. Thanks to Peter Prymmer. 0.70 Sun Nov 30 23:45:27 CST 2003 [THINGS THAT MAY BREAK YOUR CODE] * Redirects are now handled better by LWP, so the code that changes POSTs to GETs on redirects has been removed. [FIXES] * Fixed redirect_ok(), which had its API changed out from under it in LWP 5.76. [ENHANCEMENTS] * New warnings in find_link() for strings that are space padded, and for text matches that are passed a regex. Thanks to Jim Cromie. [DOCUMENTATION] * Patches from Mark Stosberg and Jim Cromie. [INTERNALS] * Removed all the checking for Carp. I don't know why I was thinking that Carp wasn't core. RT #4523. Also, a big bump in requirements on LWP: We need 5.76. 0.66 Thu Nov 13 14:35:31 CST 2003 No new functionality. Fixed up some install bugs and made a few documentation tweaks, mostly to plug Spidering Hacks. 0.65 Mon Nov 10 00:11:06 CST 2003 [ENHANCEMENTS] * Made a _parse_html() method that you can override or call manually, per request from Gavin Estey. [FIXES] * Made some path naming use File::Spec->catfile so that they work correctly under Windows. * "make clean" cleans up temp flag files. [INTERNALS] * Uses the new Test::Pod 1.00 for simplicity. 0.64 October 23, 2003 11:15pm [ENHANCEMENTS] * Many new tests, based on the excellent coverage reporting created by Paul Johnson's Devel::Cover module. * The start of JavaScript support, sort of! If you have an tag that does an onClick that opens a window, Mech will find the URL from that and make that be the link for the tag. This is for things like Movable Type that pop little windows to rebuild indexes. This is subject to change in the future. I don't know if it will, but I'm not making promises. It might be so buggy I just yank the whole thing. * Big jump in requirements, since we'll soon be using Gisle's new HTML::Form stuff. Also, older versions of HTML::Form don't give output I'm expecting. [FIXES] * Fixed the t/mech-dump.t failure. 0.63 October 13, 2003 2:56pm [ENHANCEMENTS] * mech-dump defaults to dumping forms. * Added name, name_regex, tag and tag_regex options to find_link() and follow_link(). * Added tests from Jim Brandt. 0.62 October 7, 2003 8:46pm [THINGS THAT MIGHT BREAK YOUR CODE] * The parms for find_link()'s url_regex and text_regex must now be actual regex objects, as in qr// objects. They can't just be little text strings. If this is a big bummer, let me know. [ENHANCEMENTS] * Added autocheck parm, to tell your Mech object to die on any error. This saves you from having to check yourself. This closes RT #3056. * Renamed the internal _carp() method as warn(). * Added a die() method. * Can now override the warn() and die() handlers in the constructor. * find_link() now complains if it gets a *_regex parm that isn't actually a regex. See RT #3032. [FIXES] * mech-dump.t no longer runs if you're not installing mech-dump. See RT #3724. [DOCUMENTATION] * More FAQs. Thanks to Gavin Estey. 0.61 October 6, 2003 6:30pm No new functionality here. It's mostly to get the new tests into the pipeline so the CPAN testers can run 'em. [FIXES] * Missing dependency on File::Temp. Thanks, Ask. [ENHANCEMENTS] * Added the test case for the form processing problem as a .t file, since I spent so long getting it down to a simple case. * Internal code uses accessors instead of direct hash entries. Prepare for deprecation of existing hash entries! [DOCUMENTATION] * The FAQ is now its own document at WWW::Mechanize::FAQ. 0.60 September 22, 2003 10:00pm [FIXES] * Changed how t/failure.t tries to fail. It used to hit a bogus hostname in .com, but with Verisign doing its SiteFinder crap, even bogus addresses in .com succeed. [ENHANCEMENTS] * Added _make_request() to let WWW::Mechanize::Cached easily hook into the request chain. 0.59 September 3, 2003 11:56pm [FIXES] * Squelched a warning in follow() where it tries to do a regex match against an undef value. * The page stack functionality, including the back() button, was entirely broken. Now it works. Thanks to the mighty Iain Truskett for help. [ENHANCEMENTS] * Added the mech-dump script, which replaces mech-forms. It will dump forms and lists of links. Eventually it will do lists of images, too, but not yet. 0.58 August 14, 2003 11:30pm [THINGS THAT MIGHT BREAK YOUR CODE] * $mech->uri() now returns a plain string, not a URI object. The automatic stringification of the URI object was causing problems on Win32 and/or threaded Perls, and I didn't feel like figuring out why. If the non-objectness of the uri() method is a problem, let me know. * form(), form_name() and form_number() now return the HTML::Form object of the form that was chosen. They used to return a 1 or 0. This means that if you're explicitly checking for 1 or 0, instead of evaluating the return code in a boolean context, your code will break. [FIXES] * The -handling in extract_links() was incorrectly building the text. * uri() now returns a string, not a URI object. * form(), form_name() and form_number() now return the HTML::Form object of the form that was chosen. [INTERNALS] * Determination of live vs. local tests is now done in Makefile.PL, and we don't have to set those silly semaphore files any more. * Made other cleanups in Makefile.PL, like using ExtUtils::Command instead of rolling my own touch(). * Moved all the *-live.t tests into t/live/*.t, and renamed the *-local.t files to not be -local. * Added more tests for tags. 0.57 July 31, 2003 11:21pm [ENHANCEMENTS] * Added tags to those that are links per find_links(). 0.56 July 24, 2003 12:15pm [THINGS THAT MIGHT BREAK YOUR CODE] * Created agent_alias() method to do the browser string translation. Passing "Windows IE 6" to agent() will get you back exactly that string as the agent. You have to call $a->agent_alias( "Windows IE 6" ) to get the translation. Fortunately, unless you used the new functionality of agent() in the past two days since I released 0.55, it won't be a problem. [ENHANCEMENTS] * Removed the dependencies on Carp and Test::Builder. There still is a dependency on Test::Builder for Test::More, but it's no longer explicit in the Makefile.PL. Mech will use Carp if possible, but it's no longer a requirement. [INTERNALS] * Added _carp method for handling conditional warnings, rather than checking quiet() all the time. 0.55 July 22, 2003 12:10pm [ENHANCEMENTS] * Added WWW::Mechanize::Link object to encapsulate what used to be an array reference of stuff from find_link(). This replaces having to know that $link->[0] was URL and so on. However, since WWW::Mechanize::Link is a blessed arrayref, it's backwards compatible with existing code. * The WWW::Mechanize::Link object now tracks what tag the link came from (, or
Fake Signature Signature Fake Signature save_content.html100644000765000024 21713126734347 17136 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t Però poi si vedrà!!! author000755000765000024 013126734347 15132 5ustar00olafstaff000000000000WWW-Mechanize-1.86/xttidyall.t100644000765000024 52213126734347 17100 0ustar00olafstaff000000000000WWW-Mechanize-1.86/xt/author# This file was automatically generated by Dist::Zilla::Plugin::Test::TidyAll v$VERSION use Test::More 0.88; use Test::Code::TidyAll 0.24; tidyall_ok( verbose => ( exists $ENV{TEST_TIDYALL_VERBOSE} ? $ENV{TEST_TIDYALL_VERBOSE} : 0 ), jobs => ( exists $ENV{TEST_TIDYALL_JOBS} ? $ENV{TEST_TIDYALL_JOBS} : 1 ), ); done_testing; WWW000755000765000024 013126734347 14427 5ustar00olafstaff000000000000WWW-Mechanize-1.86/libMechanize.pm100644000765000024 25573513126734347 17111 0ustar00olafstaff000000000000WWW-Mechanize-1.86/lib/WWWpackage WWW::Mechanize; #ABSTRACT: Handy web browsing in a Perl object use strict; use warnings; our $VERSION = '1.86'; use Tie::RefHash; use HTTP::Request 1.30; use LWP::UserAgent 5.827; use HTML::Form 1.00; use HTML::TokeParser; use Scalar::Util qw(tainted); use base 'LWP::UserAgent'; our $HAS_ZLIB; BEGIN { $HAS_ZLIB = eval 'use Compress::Zlib (); 1;'; } sub new { my $class = shift; my %parent_parms = ( agent => "WWW-Mechanize/$VERSION", cookie_jar => {}, ); my %mech_parms = ( autocheck => ($class eq 'WWW::Mechanize' ? 1 : 0), onwarn => \&WWW::Mechanize::_warn, onerror => \&WWW::Mechanize::_die, quiet => 0, stack_depth => 8675309, # Arbitrarily humongous stack headers => {}, noproxy => 0, ); my %passed_parms = @_; # Keep the mech-specific parms before creating the object. while ( my($key,$value) = each %passed_parms ) { if ( exists $mech_parms{$key} ) { $mech_parms{$key} = $value; } else { $parent_parms{$key} = $value; } } my $self = $class->SUPER::new( %parent_parms ); bless $self, $class; # Use the mech parms now that we have a mech object. for my $parm ( keys %mech_parms ) { $self->{$parm} = $mech_parms{$parm}; } $self->{page_stack} = []; $self->env_proxy() unless $mech_parms{noproxy}; # libwww-perl 5.800 (and before, I assume) has a problem where # $ua->{proxy} can be undef and clone() doesn't handle it. $self->{proxy} = {} unless defined $self->{proxy}; push( @{$self->requests_redirectable}, 'POST' ); $self->_reset_page(); return $self; } my %known_agents = ( 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6', 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/85 (KHTML, like Gecko) Safari/85', 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', ); sub agent_alias { my $self = shift; my $alias = shift; if ( defined $known_agents{$alias} ) { return $self->agent( $known_agents{$alias} ); } else { $self->warn( qq{Unknown agent alias "$alias"} ); return $self->agent(); } } sub known_agent_aliases { return sort keys %known_agents; } sub get { my $self = shift; my $uri = shift; $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link'; $uri = $self->base ? URI->new_abs( $uri, $self->base ) : URI->new( $uri ); # It appears we are returning a super-class method, # but it in turn calls the request() method here in Mechanize return $self->SUPER::get( $uri->as_string, @_ ); } sub put { my $self = shift; my $uri = shift; $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link'; $uri = $self->base ? URI->new_abs( $uri, $self->base ) : URI->new( $uri ); # It appears we are returning a super-class method, # but it in turn calls the request() method here in Mechanize return $self->_SUPER_put( $uri->as_string, @_ ); } # Added until LWP::UserAgent has it. sub _SUPER_put { require HTTP::Request::Common; my($self, @parameters) = @_; my @suff = $self->_process_colonic_headers(\@parameters,1); return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff ); } sub reload { my $self = shift; return unless my $req = $self->{req}; # LWP::UserAgent sets up a request_prepare handler that calls # $self->cookie_jar->add_cookie_header($req) # # HTTP::Cookies::add_cookie_header always preserves existing # cookies in a request object # # we pass an existing request to _make_request # # result: cookies will get repeated every time someone calls # ->reload, sooner or later leading to a "request too big" from # the server # # until https://rt.cpan.org/Public/Bug/Display.html?id=75897 is # fixed, let's clear the cookies from the existing request $req->remove_header('Cookie'); return $self->_update_page( $req, $self->_make_request( $req, @_ ) ); } sub back { my $self = shift; my $stack = $self->{page_stack}; return unless $stack && @{$stack}; my $popped = pop @{$self->{page_stack}}; my $req = $popped->{req}; my $res = $popped->{res}; $self->_update_page( $req, $res ); return 1; } sub history_count { my $self = shift; # If we don't have a "current" page, we certainly don't have any previous # ones. return 0 unless $self->{req} && $self->{res}; my $stack = $self->{page_stack}; return 1 unless $stack; return 1 + @$stack; } sub history { my $self = shift; my $n = shift; return undef unless $self->{req} && $self->{res}; if ($n == 0) { return { req => $self->{req}, res => $self->{res} }; } my $stack = $self->{page_stack}; return undef unless $stack && @$stack >= $n; return { req => $stack->[-$n]{req}, res => $stack->[-$n]{res} }; } sub success { my $self = shift; return $self->res && $self->res->is_success; } sub uri { my $self = shift; return $self->response ? $self->response->request->uri : undef; } sub res { my $self = shift; return $self->{res}; } sub response { my $self = shift; return $self->{res}; } sub status { my $self = shift; return $self->{status}; } sub ct { my $self = shift; return $self->{ct}; } sub content_type { my $self = shift; return $self->{ct}; } sub base { my $self = shift; return $self->{base}; } sub is_html { my $self = shift; return defined $self->ct && ($self->ct eq 'text/html' || $self->ct eq 'application/xhtml+xml'); } sub title { my $self = shift; return unless $self->is_html; if ( not defined $self->{title} ) { require HTML::HeadParser; my $p = HTML::HeadParser->new; $p->parse($self->content); $self->{title} = $p->header('Title'); } return $self->{title}; } sub content { my $self = shift; my %parms = @_; my $content = $self->{content}; if (delete $parms{raw}) { $content = $self->response()->content(); } elsif (delete $parms{decoded_by_headers}) { $content = $self->response()->decoded_content(charset => 'none'); } elsif (my $charset = delete $parms{charset}) { $content = $self->response()->decoded_content(charset => $charset); } elsif ( $self->is_html ) { if ( exists $parms{base_href} ) { my $base_href = (delete $parms{base_href}) || $self->base; $content=~s//\n/i; } if ( my $format = delete $parms{format} ) { if ( $format eq 'text' ) { $content = $self->text; } else { $self->die( qq{Unknown "format" parameter "$format"} ); } } $self->_check_unhandled_parms( %parms ); } return $content; } sub text { my $self = shift; if ( not defined $self->{text} ) { require HTML::TreeBuilder; my $tree = HTML::TreeBuilder->new(); $tree->parse( $self->content ); $tree->eof(); $tree->elementify(); # just for safety $self->{text} = $tree->as_text(); $tree->delete; } return $self->{text}; } sub _check_unhandled_parms { my $self = shift; my %parms = @_; for my $cmd ( sort keys %parms ) { $self->die( qq{Unknown named argument "$cmd"} ); } } sub links { my $self = shift; $self->_extract_links() unless $self->{links}; return @{$self->{links}} if wantarray; return $self->{links}; } sub follow_link { my $self = shift; $self->die( qq{Needs to get key-value pairs of parameters.} ) if @_ % 2; my %parms = ( n=>1, @_ ); if ( $parms{n} eq 'all' ) { delete $parms{n}; $self->warn( q{follow_link(n=>"all") is not valid} ); } my $link = $self->find_link(%parms); if ( $link ) { return $self->get( $link->url ); } if ( $self->{autocheck} ) { $self->die( 'Link not found' ); } return; } sub find_link { my $self = shift; my %parms = ( n=>1, @_ ); my $wantall = ( $parms{n} eq 'all' ); $self->_clean_keys( \%parms, qr/^(n|(text|url|url_abs|name|tag|id|class)(_regex)?)$/ ); my @links = $self->links or return; my $nmatches = 0; my @matches; for my $link ( @links ) { if ( _match_any_link_parms($link,\%parms) ) { if ( $wantall ) { push( @matches, $link ); } else { ++$nmatches; return $link if $nmatches >= $parms{n}; } } } # for @links if ( $wantall ) { return @matches if wantarray; return \@matches; } return; } # find_link # Used by find_links to check for matches # The logic is such that ALL parm criteria that are given must match sub _match_any_link_parms { my $link = shift; my $p = shift; # No conditions, anything matches return 1 unless keys %$p; return if defined $p->{url} && !($link->url eq $p->{url} ); return if defined $p->{url_regex} && !($link->url =~ $p->{url_regex} ); return if defined $p->{url_abs} && !($link->url_abs eq $p->{url_abs} ); return if defined $p->{url_abs_regex} && !($link->url_abs =~ $p->{url_abs_regex} ); return if defined $p->{text} && !(defined($link->text) && $link->text eq $p->{text} ); return if defined $p->{text_regex} && !(defined($link->text) && $link->text =~ $p->{text_regex} ); return if defined $p->{name} && !(defined($link->name) && $link->name eq $p->{name} ); return if defined $p->{name_regex} && !(defined($link->name) && $link->name =~ $p->{name_regex} ); return if defined $p->{tag} && !($link->tag && $link->tag eq $p->{tag} ); return if defined $p->{tag_regex} && !($link->tag && $link->tag =~ $p->{tag_regex} ); return if defined $p->{id} && !($link->attrs->{id} && $link->attrs->{id} eq $p->{id} ); return if defined $p->{id_regex} && !($link->attrs->{id} && $link->attrs->{id} =~ $p->{id_regex} ); return if defined $p->{class} && !($link->attrs->{class} && $link->attrs->{class} eq $p->{class} ); return if defined $p->{class_regex} && !($link->attrs->{class} && $link->attrs->{class} =~ $p->{class_regex} ); # Success: everything that was defined passed. return 1; } # Cleans the %parms parameter for the find_link and find_image methods. sub _clean_keys { my $self = shift; my $parms = shift; my $rx_keyname = shift; for my $key ( keys %$parms ) { my $val = $parms->{$key}; if ( $key !~ qr/$rx_keyname/ ) { $self->warn( qq{Unknown link-finding parameter "$key"} ); delete $parms->{$key}; next; } my $key_regex = ( $key =~ /_regex$/ ); my $val_regex = ( ref($val) eq 'Regexp' ); if ( $key_regex ) { if ( !$val_regex ) { $self->warn( qq{$val passed as $key is not a regex} ); delete $parms->{$key}; next; } } else { if ( $val_regex ) { $self->warn( qq{$val passed as '$key' is a regex} ); delete $parms->{$key}; next; } if ( $val =~ /^\s|\s$/ ) { $self->warn( qq{'$val' is space-padded and cannot succeed} ); delete $parms->{$key}; next; } } } # for keys %parms return; } # _clean_keys() sub find_all_links { my $self = shift; return $self->find_link( @_, n=>'all' ); } sub find_all_inputs { my $self = shift; my %criteria = @_; my $form = $self->current_form() or return; my @found; foreach my $input ( $form->inputs ) { # check every pattern for a match on the current hash my $matched = 1; foreach my $criterion ( sort keys %criteria ) { # Sort so we're deterministic my $field = $criterion; my $is_regex = ( $field =~ s/(?:_regex)$// ); my $what = $input->{$field}; $matched = defined($what) && ( $is_regex ? ( $what =~ $criteria{$criterion} ) : ( $what eq $criteria{$criterion} ) ); last if !$matched; } push @found, $input if $matched; } return @found; } sub find_all_submits { my $self = shift; return $self->find_all_inputs( @_, type_regex => qr/^(submit|image)$/ ); } sub images { my $self = shift; $self->_extract_images() unless $self->{images}; return @{$self->{images}} if wantarray; return $self->{images}; } sub find_image { my $self = shift; my %parms = ( n=>1, @_ ); my $wantall = ( $parms{n} eq 'all' ); $self->_clean_keys( \%parms, qr/^(n|(alt|url|url_abs|tag)(_regex)?)$/ ); my @images = $self->images or return; my $nmatches = 0; my @matches; for my $image ( @images ) { if ( _match_any_image_parms($image,\%parms) ) { if ( $wantall ) { push( @matches, $image ); } else { ++$nmatches; return $image if $nmatches >= $parms{n}; } } } # for @images if ( $wantall ) { return @matches if wantarray; return \@matches; } return; } # Used by find_images to check for matches # The logic is such that ALL parm criteria that are given must match sub _match_any_image_parms { my $image = shift; my $p = shift; # No conditions, anything matches return 1 unless keys %$p; return if defined $p->{url} && !($image->url eq $p->{url} ); return if defined $p->{url_regex} && !($image->url =~ $p->{url_regex} ); return if defined $p->{url_abs} && !($image->url_abs eq $p->{url_abs} ); return if defined $p->{url_abs_regex} && !($image->url_abs =~ $p->{url_abs_regex} ); return if defined $p->{alt} && !(defined($image->alt) && $image->alt eq $p->{alt} ); return if defined $p->{alt_regex} && !(defined($image->alt) && $image->alt =~ $p->{alt_regex} ); return if defined $p->{tag} && !($image->tag && $image->tag eq $p->{tag} ); return if defined $p->{tag_regex} && !($image->tag && $image->tag =~ $p->{tag_regex} ); # Success: everything that was defined passed. return 1; } sub find_all_images { my $self = shift; return $self->find_image( @_, n=>'all' ); } sub forms { my $self = shift; $self->_extract_forms() unless $self->{forms}; return @{$self->{forms}} if wantarray; return $self->{forms}; } sub current_form { my $self = shift; if ( !$self->{current_form} ) { $self->form_number(1); } return $self->{current_form}; } sub form_number { my ($self, $form) = @_; # XXX Should we die if no $form is defined? Same question for form_name() my $forms = $self->forms; if ( $forms->[$form-1] ) { $self->{current_form} = $forms->[$form-1]; return $self->{current_form}; } return; } sub form_name { my ($self, $form) = @_; return $self->form_with( name => $form ); } sub form_id { my ($self, $formid) = @_; defined( my $form = $self->form_with( id => $formid ) ) or $self->warn(qq{ There is no form with ID "$formid"}); return $form; } sub all_forms_with_fields { my ($self, @fields) = @_; die 'no fields provided' unless scalar @fields; my @matches; FORMS: for my $form (@{ $self->forms }) { my @fields_in_form = $form->param(); for my $field (@fields) { next FORMS unless grep { $_ eq $field } @fields_in_form; } push @matches, $form; } return @matches; } sub form_with_fields { my ($self, @fields) = @_; die 'no fields provided' unless scalar @fields; my @matches = $self->all_forms_with_fields(@fields); my $nmatches = @matches; if ( $nmatches > 0 ) { if ( $nmatches > 1 ) { $self->warn( "There are $nmatches forms with the named fields. The first one was used." ) } return $self->{current_form} = $matches[0]; } else { $self->warn( qq{There is no form with the requested fields} ); return undef; } } sub all_forms_with { my ( $self, %spec ) = @_; my @forms = $self->forms; foreach my $attr ( keys %spec ) { @forms = grep _equal( $spec{$attr}, $_->attr($attr) ), @forms or return; } return @forms; } sub form_with { my ( $self, %spec ) = @_; return if not $self->forms; my @forms = $self->all_forms_with(%spec); if ( @forms > 1 ) { # Warn if several forms matched. # For ->form_with( method => 'POST', action => '', id => undef ) we get: # >>There are 2 forms with empty action and no id and method "POST". # The first one was used.<< $self->warn( 'There are ' . @forms . ' forms ' . ( keys %spec # explain search criteria if there were any ? 'with ' . join( ' and ', # "with ... and ... and ..." map { unless ( defined $spec{$_} ) { # case $attr => undef qq{no $_}; } elsif ( $spec{$_} eq '' ) { # case $attr=> '' qq{empty $_}; } else { # case $attr => $value qq{$_ "$spec{$_}"}; } } # case $attr => undef sort keys %spec # sort keys to get deterministic messages ) : '' ) . '. The first one was used.' ); } return $self->{current_form} = $forms[0]; } # NOT an object method! # Expects two values and returns true only when either # both are defined and eq(ual) or when both are not defined. sub _equal { my ( $x, $y ) = @_; defined $x ? defined $y && $x eq $y : !defined $y; } sub field { my ($self, $name, $value, $number) = @_; $number ||= 1; my $form = $self->current_form(); if ($number > 1) { $form->find_input($name, undef, $number)->value($value); } else { if ( ref($value) eq 'ARRAY' ) { $form->param($name, $value); } else { $form->value($name => $value); } } } sub select { my ($self, $name, $value) = @_; my $form = $self->current_form(); my $input = $form->find_input($name); if (!$input) { $self->warn( qq{Input "$name" not found} ); return; } if ($input->type ne 'option') { $self->warn( qq{Input "$name" is not type "select"} ); return; } # For $mech->select($name, {n => 3}) or $mech->select($name, {n => [2,4]}), # transform the 'n' number(s) into value(s) and put it in $value. if (ref($value) eq 'HASH') { for (keys %$value) { $self->warn(qq{Unknown select value parameter "$_"}) unless $_ eq 'n'; } if (defined($value->{n})) { my @inputs = $form->find_input($name, 'option'); my @values = (); # distinguish between multiple and non-multiple selects # (see INPUTS section of `perldoc HTML::Form`) if (@inputs == 1) { @values = $inputs[0]->possible_values(); } else { foreach my $input (@inputs) { my @possible = $input->possible_values(); push @values, pop @possible; } } my $n = $value->{n}; if (ref($n) eq 'ARRAY') { $value = []; for (@$n) { unless (/^\d+$/) { $self->warn(qq{"n" value "$_" is not a positive integer}); return; } push @$value, $values[$_ - 1]; # might be undef } } elsif (!ref($n) && $n =~ /^\d+$/) { $value = $values[$n - 1]; # might be undef } else { $self->warn('"n" value is not a positive integer or an array ref'); return; } } else { $self->warn('Hash value is invalid'); return; } } # hashref if (ref($value) eq 'ARRAY') { $form->param($name, $value); return 1; } $form->value($name => $value); return 1; } sub set_fields { my $self = shift; my %fields = @_; my $form = $self->current_form or $self->die( 'No form defined' ); while ( my ( $field, $value ) = each %fields ) { if ( ref $value eq 'ARRAY' ) { $form->find_input( $field, undef, $value->[1])->value($value->[0] ); } else { $form->value($field => $value); } } # while } # set_fields() sub set_visible { my $self = shift; my $form = $self->current_form; my @inputs = $form->inputs; my $num_set = 0; for my $value ( @_ ) { # Handle type/value pairs an arrayref if ( ref $value eq 'ARRAY' ) { my ( $type, $value ) = @$value; while ( my $input = shift @inputs ) { next if $input->type eq 'hidden'; if ( $input->type eq $type ) { $input->value( $value ); $num_set++; last; } } # while } # by default, it's a value else { while ( my $input = shift @inputs ) { next if $input->type eq 'hidden'; $input->value( $value ); $num_set++; last; } # while } } # for return $num_set; } # set_visible() sub tick { my $self = shift; my $name = shift; my $value = shift; my $set = @_ ? shift : 1; # default to 1 if not passed # loop though all the inputs my $index = 0; while ( my $input = $self->current_form->find_input( $name, 'checkbox', $index ) ) { # Can't guarantee that the first element will be undef and the second # element will be the right name foreach my $val ($input->possible_values()) { next unless defined $val; if ($val eq $value) { $input->value($set ? $value : undef); return; } } # move onto the next input $index++; } # while # got self far? Didn't find anything $self->warn( qq{No checkbox "$name" for value "$value" in form} ); } # tick() sub untick { shift->tick(shift,shift,undef); } sub value { my $self = shift; my $name = shift; my $number = shift || 1; my $form = $self->current_form; if ( $number > 1 ) { return $form->find_input( $name, undef, $number )->value(); } else { return $form->value( $name ); } } # value sub click { my ($self, $button, $x, $y) = @_; for ($x, $y) { $_ = 1 unless defined; } my $request = $self->current_form->click($button, $x, $y); return $self->request( $request ); } sub click_button { my $self = shift; my %args = @_; for ( keys %args ) { if ( !/^(number|name|value|id|input|x|y)$/ ) { $self->warn( qq{Unknown click_button parameter "$_"} ); } } for ($args{x}, $args{y}) { $_ = 1 unless defined; } my $form = $self->current_form or $self->die( 'click_button: No form has been selected' ); my $request; if ( $args{name} ) { $request = $form->click( $args{name}, $args{x}, $args{y} ); } # 0 is a valid id in HTML5 elsif ( defined $args{id} ) { # HTML::Form expects ids to be prefixed with '#' my $input = $form->find_input('#' . $args{id}); $request = $input->click( $form, $args{x}, $args{y} ); } elsif ( $args{number} ) { my $input = $form->find_input( undef, 'submit', $args{number} ); $request = $input->click( $form, $args{x}, $args{y} ); } elsif ( $args{input} ) { $request = $args{input}->click( $form, $args{x}, $args{y} ); } elsif ( $args{value} ) { my $i = 1; while ( my $input = $form->find_input(undef, 'submit', $i) ) { if ( $args{value} && ($args{value} eq $input->value) ) { $request = $input->click( $form, $args{x}, $args{y} ); last; } $i++; } # while } # $args{value} return $self->request( $request ); } sub submit { my $self = shift; my $request = $self->current_form->make_request; return $self->request( $request ); } sub submit_form { my( $self, %args ) = @_; for ( keys %args ) { if ( !/^(form_(number|name|fields|id)|(with_)?fields|button|x|y|strict_forms)$/ ) { # XXX Why not die here? $self->warn( qq{Unknown submit_form parameter "$_"} ); } } my $fields; for (qw/with_fields fields/) { if ($args{$_}) { if ( ref $args{$_} eq 'HASH' ) { $fields = $args{$_}; } else { die "$_ arg to submit_form must be a hashref"; } last; } } my @filtered_sets; if ( $args{with_fields} ) { $fields || die q{must submit some 'fields' with with_fields}; my @got = $self->all_forms_with_fields(keys %{$fields}); die "There is no form with the requested fields" if not @got; push @filtered_sets, \@got; } if ( my $form_number = $args{form_number} ) { my $got = $self->form_number( $form_number ); die "There is no form numbered $form_number" if not $got; push @filtered_sets, [ $got ]; } if ( my $form_name = $args{form_name} ) { my @got = $self->all_forms_with( name => $form_name ); die qq{There is no form named "$form_name"} if not @got; push @filtered_sets, \@got; } if ( my $form_id = $args{form_id} ) { my @got = $self->all_forms_with( id => $form_id ); $self->warn(qq{ There is no form with ID "$form_id"}) if not @got; push @filtered_sets, \@got; } if (not @filtered_sets) { # No form selector was used. # Maybe a form was set separately, or we'll default to the first form. } else { # Need to intersect to apply all the various filters. # Assume that each filtered set only has a given form object once. # So we can count occurrences. # tie my %c, 'Tie::RefHash' or die; foreach (@filtered_sets) { foreach (@$_) { ++$c{$_}; } } my $expected_count = scalar @filtered_sets; my @matched = grep { $c{$_} == $expected_count } keys %c; if (not @matched) { die "There is no form that satisfies all the criteria"; } if (@matched > 1) { die "More than one form satisfies all the criteria"; } $self->{current_form} = $matched[0]; } if (defined($args{strict_forms})) { # Strict argument has been passed, set the flag as appropriate # this must be done prior to attempting to set the fields $self->current_form->strict($args{strict_forms}); } $self->set_fields( %{$fields} ) if $fields; my $response; if ( $args{button} ) { $response = $self->click( $args{button}, $args{x} || 0, $args{y} || 0 ); } else { $response = $self->submit(); } return $response; } sub add_header { my $self = shift; my $npairs = 0; while ( @_ ) { my $key = shift; my $value = shift; ++$npairs; $self->{headers}{$key} = $value; } return $npairs; } sub delete_header { my $self = shift; while ( @_ ) { my $key = shift; delete $self->{headers}{$key}; } return; } sub quiet { my $self = shift; $self->{quiet} = $_[0] if @_; return $self->{quiet}; } sub stack_depth { my $self = shift; $self->{stack_depth} = shift if @_; return $self->{stack_depth}; } sub save_content { my $self = shift; my $filename = shift; my %opts = @_; if (delete $opts{binary}) { $opts{binmode} = ':raw'; $opts{decoded_by_headers} = 1; } open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" ); if ((my $binmode = delete($opts{binmode}) || '') || ($self->content_type() !~ m{^text/})) { if (length($binmode) && (substr($binmode, 0, 1) eq ':')) { binmode $fh, $binmode; } else { binmode $fh; } } print {$fh} $self->content(%opts) or $self->die( "Unable to write to $filename: $!" ); close $fh or $self->die( "Unable to close $filename: $!" ); return; } sub _get_fh_default_stdout { my $self = shift; my $p = shift || ''; if ( !$p ) { return \*STDOUT; } elsif ( !ref($p) ) { open my $fh, '>', $p or $self->die( "Unable to write to $p: $!" );; return $fh; } else { return $p; } } sub dump_headers { my $self = shift; my $fh = $self->_get_fh_default_stdout(shift); print {$fh} $self->response->headers_as_string; return; } sub dump_links { my $self = shift; my $fh = shift || \*STDOUT; my $absolute = shift; for my $link ( $self->links ) { my $url = $absolute ? $link->url_abs : $link->url; $url = '' if not defined $url; print {$fh} $url, "\n"; } return; } sub dump_images { my $self = shift; my $fh = shift || \*STDOUT; my $absolute = shift; for my $image ( $self->images ) { my $url = $absolute ? $image->url_abs : $image->url; $url = '' if not defined $url; print {$fh} $url, "\n"; } return; } sub dump_forms { my $self = shift; my $fh = shift || \*STDOUT; for my $form ( $self->forms ) { print {$fh} $form->dump, "\n"; } return; } sub dump_text { my $self = shift; my $fh = shift || \*STDOUT; my $absolute = shift; print {$fh} $self->text, "\n"; return; } sub clone { my $self = shift; my $clone = $self->SUPER::clone(); $clone->cookie_jar( $self->cookie_jar ); $clone->{headers} = { %{$self->{headers}} }; return $clone; } sub redirect_ok { my $self = shift; my $prospective_request = shift; my $response = shift; my $ok = $self->SUPER::redirect_ok( $prospective_request, $response ); if ( $ok ) { $self->{redirected_uri} = $prospective_request->uri; } return $ok; } sub request { my $self = shift; my $request = shift; _die( '->request was called without a request parameter' ) unless $request; $request = $self->_modify_request( $request ); if ( $request->method eq 'GET' || $request->method eq 'POST' ) { $self->_push_page_stack(); } return $self->_update_page($request, $self->_make_request( $request, @_ )); } sub update_html { my $self = shift; my $html = shift; $self->_reset_page; $self->{ct} = 'text/html'; $self->{content} = $html; return; } sub credentials { my $self = shift; # The latest LWP::UserAgent also supports 2 arguments, # in which case the first is host:port if (@_ == 4 || (@_ == 2 && $_[0] =~ /:\d+$/)) { return $self->SUPER::credentials(@_); } @_ == 2 or $self->die( 'Invalid # of args for overridden credentials()' ); return @$self{qw( __username __password )} = @_; } sub get_basic_credentials { my $self = shift; my @cred = grep { defined } @$self{qw( __username __password )}; return @cred if @cred == 2; return $self->SUPER::get_basic_credentials(@_); } sub clear_credentials { my $self = shift; delete @$self{qw( __username __password )}; } sub _update_page { my ($self, $request, $res) = @_; $self->{req} = $request; $self->{redirected_uri} = $request->uri->as_string; $self->{res} = $res; $self->{status} = $res->code; $self->{base} = $res->base; $self->{ct} = $res->content_type || ''; if ( $res->is_success ) { $self->{uri} = $self->{redirected_uri}; $self->{last_uri} = $self->{uri}; } if ( $res->is_error ) { if ( $self->{autocheck} ) { $self->die( 'Error ', $request->method, 'ing ', $request->uri, ': ', $res->message ); } } $self->_reset_page; # Try to decode the content. Undef will be returned if there's nothing to decompress. # See docs in HTTP::Message for details. Do we need to expose the options there? my $content = $res->decoded_content(); $content = $res->content if (not defined $content); $content .= _taintedness(); if ($self->is_html) { $self->update_html($content); } else { $self->{content} = $content; } return $res; } # _update_page our $_taintbrush; # This is lifted wholesale from Test::Taint sub _taintedness { return $_taintbrush if defined $_taintbrush; # Somehow we need to get some taintedness into our $_taintbrush. # Let's try the easy way first. Either of these should be # tainted, unless somebody has untainted them, so this # will almost always work on the first try. # (Unless, of course, taint checking has been turned off!) $_taintbrush = substr("$0$^X", 0, 0); return $_taintbrush if tainted( $_taintbrush ); # Let's try again. Maybe somebody cleaned those. $_taintbrush = substr(join('', grep { defined } @ARGV, %ENV), 0, 0); return $_taintbrush if tainted( $_taintbrush ); # If those don't work, go try to open some file from some unsafe # source and get data from them. That data is tainted. # (Yes, even reading from /dev/null works!) for my $filename ( qw(/dev/null / . ..), values %INC, $0, $^X ) { if ( open my $fh, '<', $filename ) { my $data; if ( defined sysread $fh, $data, 1 ) { $_taintbrush = substr( $data, 0, 0 ); last if tainted( $_taintbrush ); } } } # Sanity check die "Our taintbrush should have zero length!" if length $_taintbrush; return $_taintbrush; } sub _modify_request { my $self = shift; my $req = shift; # add correct Accept-Encoding header to restore compliance with # http://www.freesoft.org/CIE/RFC/2068/158.htm # http://use.perl.org/~rhesa/journal/25952 if (not $req->header( 'Accept-Encoding' ) ) { # "identity" means "please! unencoded content only!" $req->header( 'Accept-Encoding', $HAS_ZLIB ? 'gzip' : 'identity' ); } my $last = $self->{last_uri}; if ( $last ) { $last = $last->as_string if ref($last); $req->header( Referer => $last ); } while ( my($key,$value) = each %{$self->{headers}} ) { if ( defined $value ) { $req->header( $key => $value ); } else { $req->remove_header( $key ); } } return $req; } sub _make_request { my $self = shift; return $self->SUPER::request(@_); } sub _reset_page { my $self = shift; $self->{links} = undef; $self->{images} = undef; $self->{forms} = undef; $self->{current_form} = undef; $self->{title} = undef; $self->{text} = undef; return; } my %link_tags = ( a => 'href', area => 'href', frame => 'src', iframe => 'src', link => 'href', meta => 'content', ); sub _extract_links { my $self = shift; $self->{links} = []; if ( defined $self->{content} ) { my $parser = HTML::TokeParser->new(\$self->{content}); while ( my $token = $parser->get_tag( keys %link_tags ) ) { my $link = $self->_link_from_token( $token, $parser ); push( @{$self->{links}}, $link ) if $link; } # while } return; } my %image_tags = ( img => 'src', input => 'src', ); sub _extract_images { my $self = shift; $self->{images} = []; if ( defined $self->{content} ) { my $parser = HTML::TokeParser->new(\$self->{content}); while ( my $token = $parser->get_tag( keys %image_tags ) ) { my $image = $self->_image_from_token( $token, $parser ); push( @{$self->{images}}, $image ) if $image; } # while } return; } sub _image_from_token { my $self = shift; my $token = shift; my $parser = shift; my $tag = $token->[0]; my $attrs = $token->[1]; if ( $tag eq 'input' ) { my $type = $attrs->{type} or return; return unless $type eq 'image'; } require WWW::Mechanize::Image; return WWW::Mechanize::Image->new({ tag => $tag, base => $self->base, url => $attrs->{src}, name => $attrs->{name}, height => $attrs->{height}, width => $attrs->{width}, alt => $attrs->{alt}, }); } sub _link_from_token { my $self = shift; my $token = shift; my $parser = shift; my $tag = $token->[0]; my $attrs = $token->[1]; my $url = $attrs->{$link_tags{$tag}}; my $text; my $name; if ( $tag eq 'a' ) { $text = $parser->get_trimmed_text("/$tag"); $text = '' unless defined $text; my $onClick = $attrs->{onclick}; if ( $onClick && ($onClick =~ /^window\.open\(\s*'([^']+)'/) ) { $url = $1; } elsif( $url && $url =~ /^javascript\:\s*(?:void\(\s*)?window\.open\(\s*'([^']+)'/s ){ $url = $1; } } # a # Of the tags we extract from, only 'AREA' has an alt tag # The rest should have a 'name' attribute. # ... but we don't do anything with that bit of wisdom now. $name = $attrs->{name}; if ( $tag eq 'meta' ) { my $equiv = $attrs->{'http-equiv'}; my $content = $attrs->{'content'}; return unless $equiv && (lc $equiv eq 'refresh') && defined $content; if ( $content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i ) { $url = $1; $url =~ s/^"(.+)"$/$1/ or $url =~ s/^'(.+)'$/$1/; } else { undef $url; } } # meta return unless defined $url; # probably just a name link or require WWW::Mechanize::Link; return WWW::Mechanize::Link->new({ url => $url, text => $text, name => $name, tag => $tag, base => $self->base, attrs => $attrs, }); } # _link_from_token sub _extract_forms { my $self = shift; my @forms = HTML::Form->parse( $self->content, $self->base ); $self->{forms} = \@forms; for my $form ( @forms ) { for my $input ($form->inputs) { if ($input->type eq 'file') { $input->value( undef ); } } } return; } sub _push_page_stack { my $self = shift; my $req = $self->{req}; my $res = $self->{res}; return unless $req && $res && $self->stack_depth; # Don't push anything if it's a virgin object my $stack = $self->{page_stack} ||= []; if ( @{$stack} >= $self->stack_depth ) { shift @{$stack}; } push( @{$stack}, { req => $req, res => $res } ); return 1; } sub warn { my $self = shift; return unless my $handler = $self->{onwarn}; return if $self->quiet; return $handler->(@_); } sub die { my $self = shift; return unless my $handler = $self->{onerror}; return $handler->(@_); } # NOT an object method! sub _warn { require Carp; return &Carp::carp; ## no critic } # NOT an object method! sub _die { require Carp; return &Carp::croak; ## no critic } 1; # End of module __END__ =pod =encoding UTF-8 =head1 NAME WWW::Mechanize - Handy web browsing in a Perl object =head1 VERSION version 1.86 =head1 SYNOPSIS WWW::Mechanize supports performing a sequence of page fetches including following links and submitting forms. Each fetched page is parsed and its links and forms are extracted. A link or a form can be selected, form fields can be filled and the next page can be fetched. Mech also stores a history of the URLs you've visited, which can be queried and revisited. use WWW::Mechanize; my $mech = WWW::Mechanize->new(); $mech->get( $url ); $mech->follow_link( n => 3 ); $mech->follow_link( text_regex => qr/download this/i ); $mech->follow_link( url => 'http://host.com/index.html' ); $mech->submit_form( form_number => 3, fields => { username => 'mungo', password => 'lost-and-alone', } ); $mech->submit_form( form_name => 'search', fields => { query => 'pot of gold', }, button => 'Search Now' ); =head1 DESCRIPTION C, or Mech for short, is a Perl module for stateful programmatic web browsing, used for automating interaction with websites. Features include: =over 4 =item * All HTTP methods =item * High-level hyperlink and HTML form support, without having to parse HTML yourself =item * SSL support =item * Automatic cookies =item * Custom HTTP headers =item * Automatic handling of redirections =item * Proxies =item * HTTP authentication =back Mech is well suited for use in testing web applications. If you use one of the Test::*, like L modules, you can check the fetched content and use that as input to a test call. use Test::More; like( $mech->content(), qr/$expected/, "Got expected content" ); Each page fetch stores its URL in a history stack which you can traverse. $mech->back(); If you want finer control over your page fetching, you can use these methods. C and C are just high level wrappers around them. $mech->find_link( n => $number ); $mech->form_number( $number ); $mech->form_name( $name ); $mech->field( $name, $value ); $mech->set_fields( %field_values ); $mech->set_visible( @criteria ); $mech->click( $button ); L is a proper subclass of L and you can also use any of L's methods. $mech->add_header($name => $value); Please note that Mech does NOT support JavaScript, you need additional software for that. Please check L for more. =head1 IMPORTANT LINKS =over 4 =item * L The queue for bugs & enhancements in WWW::Mechanize and Test::WWW::Mechanize. Please note that the queue at L is no longer maintained. =item * L The CPAN documentation page for Mechanize. =item * L Frequently asked questions. Make sure you read here FIRST. =back =head1 CONSTRUCTOR AND STARTUP =head2 new() Creates and returns a new WWW::Mechanize object, hereafter referred to as the "agent". my $mech = WWW::Mechanize->new() The constructor for WWW::Mechanize overrides two of the parms to the LWP::UserAgent constructor: agent => 'WWW-Mechanize/#.##' cookie_jar => {} # an empty, memory-only HTTP::Cookies object You can override these overrides by passing parms to the constructor, as in: my $mech = WWW::Mechanize->new( agent => 'wonderbot 1.01' ); If you want none of the overhead of a cookie jar, or don't want your bot accepting cookies, you have to explicitly disallow it, like so: my $mech = WWW::Mechanize->new( cookie_jar => undef ); Here are the parms that WWW::Mechanize recognizes. These do not include parms that L recognizes. =over 4 =item * C<< autocheck => [0|1] >> Checks each request made to see if it was successful. This saves you the trouble of manually checking yourself. Any errors found are errors, not warnings. The default value is ON, unless it's being subclassed, in which case it is OFF. This means that standalone L instances have autocheck turned on, which is protective for the vast majority of Mech users who don't bother checking the return value of get() and post() and can't figure why their code fails. However, if L is subclassed, such as for L or L, this may not be an appropriate default, so it's off. =item * C<< noproxy => [0|1] >> Turn off the automatic call to the L C function. This needs to be explicitly turned off if you're using L to access a https site via a proxy server. Note: you still need to set your HTTPS_PROXY environment variable as appropriate. =item * C<< onwarn => \&func >> Reference to a C-compatible function, such as C<< L::carp >>, that is called when a warning needs to be shown. If this is set to C, no warnings will ever be shown. However, it's probably better to use the C method to control that behavior. If this value is not passed, Mech uses C if L is installed, or C if not. =item * C<< onerror => \&func >> Reference to a C-compatible function, such as C<< L::croak >>, that is called when there's a fatal error. If this is set to C, no errors will ever be shown. If this value is not passed, Mech uses C if L is installed, or C if not. =item * C<< quiet => [0|1] >> Don't complain on warnings. Setting C<< quiet => 1 >> is the same as calling C<< $mech->quiet(1) >>. Default is off. =item * C<< stack_depth => $value >> Sets the depth of the page stack that keeps track of all the downloaded pages. Default is effectively infinite stack size. If the stack is eating up your memory, then set this to a smaller number, say 5 or 10. Setting this to zero means Mech will keep no history. =back To support forms, WWW::Mechanize's constructor pushes POST on to the agent's C list (see also L.) =head2 $mech->agent_alias( $alias ) Sets the user agent string to the expanded version from a table of actual user strings. I<$alias> can be one of the following: =over 4 =item * Windows IE 6 =item * Windows Mozilla =item * Mac Safari =item * Mac Mozilla =item * Linux Mozilla =item * Linux Konqueror =back then it will be replaced with a more interesting one. For instance, $mech->agent_alias( 'Windows IE 6' ); sets your User-Agent to Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) The list of valid aliases can be returned from C. The current list is: =over =item * Windows IE 6 =item * Windows Mozilla =item * Mac Safari =item * Mac Mozilla =item * Linux Mozilla =item * Linux Konqueror =back =head2 known_agent_aliases() Returns a list of all the agent aliases that Mech knows about. =head1 PAGE-FETCHING METHODS =head2 $mech->get( $uri ) Given a URL/URI, fetches it. Returns an L object. I<$uri> can be a well-formed URL string, a L object, or a L object. The results are stored internally in the agent object, but you don't know that. Just use the accessors listed below. Poking at the internals is deprecated and subject to change in the future. C is a well-behaved overloaded version of the method in L. This lets you do things like $mech->get( $uri, ':content_file' => $tempfile ); and you can rest assured that the parms will get filtered down appropriately. B Because C<:content_file> causes the page contents to be stored in a file instead of the response object, some Mech functions that expect it to be there won't work as expected. Use with caution. =head2 $mech->put( $uri, content => $content ) PUTs I<$content> to $uri. Returns an L object. I<$uri> can be a well-formed URI string, a L object, or a L object. =head2 $mech->reload() Acts like the reload button in a browser: repeats the current request. The history (as per the L method) is not altered. Returns the L object from the reload, or C if there's no current request. =head2 $mech->back() The equivalent of hitting the "back" button in a browser. Returns to the previous page. Won't go back past the first page. (Really, what would it do if it could?) Returns true if it could go back, or false if not. =head2 $mech->history_count() This returns the number of items in the browser history. This number I include the most recently made request. =head2 $mech->history($n) This returns the Ith item in history. The 0th item is the most recent request and response, which would be acted on by methods like C. The 1th item is the state you'd return to if you called C. The maximum useful value for C<$n> is C<< $mech->history_count - 1 >>. Requests beyond that bound will return C. History items are returned as hash references, in the form: { req => $http_request, res => $http_response } =head1 STATUS METHODS =head2 $mech->success() Returns a boolean telling whether the last request was successful. If there hasn't been an operation yet, returns false. This is a convenience function that wraps C<< $mech->res->is_success >>. =head2 $mech->uri() Returns the current URI as a L object. This object stringifies to the URI itself. =head2 $mech->response() / $mech->res() Return the current response as an L object. Synonym for C<< $mech->response() >> =head2 $mech->status() Returns the HTTP status code of the response. This is a 3-digit number like 200 for OK, 404 for not found, and so on. =head2 $mech->ct() / $mech->content_type() Returns the content type of the response. =head2 $mech->base() Returns the base URI for the current response =head2 $mech->forms() When called in a list context, returns a list of the forms found in the last fetched page. In a scalar context, returns a reference to an array with those forms. The forms returned are all L objects. =head2 $mech->current_form() Returns the current form as an L object. =head2 $mech->links() When called in a list context, returns a list of the links found in the last fetched page. In a scalar context it returns a reference to an array with those links. Each link is a L object. =head2 $mech->is_html() Returns true/false on whether our content is HTML, according to the HTTP headers. =head2 $mech->title() Returns the contents of the C<< >> tag, as parsed by L<HTML::HeadParser>. Returns undef if the content is not HTML. =head1 CONTENT-HANDLING METHODS =head2 $mech->content(...) Returns the content that the mech uses internally for the last page fetched. Ordinarily this is the same as C<< $mech->response()->decoded_content() >>, but this may differ for HTML documents if L<< update_html|/$mech->update_html( $html ) >> is overloaded (in which case the value passed to the base-class implementation of same will be returned), and/or extra named arguments are passed to I<content()>: =over 2 =item I<< $mech->content( format => 'text' ) >> Returns a text-only version of the page, with all HTML markup stripped. This feature requires I<HTML::TreeBuilder> to be installed, or a fatal error will be thrown. This works only if the contents are HTML. =item I<< $mech->content( base_href => [$base_href|undef] ) >> Returns the HTML document, modified to contain a C<< <base href="$base_href"> >> mark-up in the header. I<$base_href> is C<< $mech->base() >> if not specified. This is handy to pass the HTML to e.g. L<HTML::Display>. This works only if the contents are HTML. =item I<< $mech->content( raw => 1 ) >> Returns C<< $self->response()->content() >>, i.e. the raw contents from the response. =item I<< $mech->content( decoded_by_headers => 1 ) >> Returns the content after applying all C<Content-Encoding> headers but with not additional mangling. =item I<< $mech->content( charset => $charset ) >> Returns C<< $self->response()->decoded_content(charset => $charset) >> (see L<HTTP::Response> for details). =back To preserve backwards compatibility, additional parameters will be ignored unless none of C<< raw | decoded_by_headers | charset >> is specified and the text is HTML, in which case an error will be triggered. =head2 $mech->text() Returns the text of the current HTML content. If the content isn't HTML, $mech will die. The text is extracted by parsing the content, and then the extracted text is cached, so don't worry about performance of calling this repeatedly. =head1 LINK METHODS =head2 $mech->links() Lists all the links on the current page. Each link is a WWW::Mechanize::Link object. In list context, returns a list of all links. In scalar context, returns an array reference of all links. =head2 $mech->follow_link(...) Follows a specified link on the page. You specify the match to be found using the same parms that C<L<find_link()>> uses. Here some examples: =over 4 =item * 3rd link called "download" $mech->follow_link( text => 'download', n => 3 ); =item * first link where the URL has "download" in it, regardless of case: $mech->follow_link( url_regex => qr/download/i ); or $mech->follow_link( url_regex => qr/(?i:download)/ ); =item * 3rd link on the page $mech->follow_link( n => 3 ); =item * the link with the url $mech->follow_link( url => '/other/page' ); or $mech->follow_link( url => 'http://example.com/page' ); =back Returns the result of the GET method (an HTTP::Response object) if a link was found. If the page has no links, or the specified link couldn't be found, returns undef. =head2 $mech->find_link( ... ) Finds a link in the currently fetched page. It returns a L<WWW::Mechanize::Link> object which describes the link. (You'll probably be most interested in the C<url()> property.) If it fails to find a link it returns undef. You can take the URL part and pass it to the C<get()> method. If that's your plan, you might as well use the C<follow_link()> method directly, since it does the C<get()> for you automatically. Note that C<< <FRAME SRC="..."> >> tags are parsed out of the the HTML and treated as links so this method works with them. You can select which link to find by passing in one or more of these key/value pairs: =over 4 =item * C<< text => 'string', >> and C<< text_regex => qr/regex/, >> C<text> matches the text of the link against I<string>, which must be an exact match. To select a link with text that is exactly "download", use $mech->find_link( text => 'download' ); C<text_regex> matches the text of the link against I<regex>. To select a link with text that has "download" anywhere in it, regardless of case, use $mech->find_link( text_regex => qr/download/i ); Note that the text extracted from the page's links are trimmed. For example, C<< <a> foo </a> >> is stored as 'foo', and searching for leading or trailing spaces will fail. =item * C<< url => 'string', >> and C<< url_regex => qr/regex/, >> Matches the URL of the link against I<string> or I<regex>, as appropriate. The URL may be a relative URL, like F<foo/bar.html>, depending on how it's coded on the page. =item * C<< url_abs => string >> and C<< url_abs_regex => regex >> Matches the absolute URL of the link against I<string> or I<regex>, as appropriate. The URL will be an absolute URL, even if it's relative in the page. =item * C<< name => string >> and C<< name_regex => regex >> Matches the name of the link against I<string> or I<regex>, as appropriate. =item * C<< id => string >> and C<< id_regex => regex >> Matches the attribute 'id' of the link against I<string> or I<regex>, as appropriate. =item * C<< class => string >> and C<< class_regex => regex >> Matches the attribute 'class' of the link against I<string> or I<regex>, as appropriate. =item * C<< tag => string >> and C<< tag_regex => regex >> Matches the tag that the link came from against I<string> or I<regex>, as appropriate. The C<tag_regex> is probably most useful to check for more than one tag, as in: $mech->find_link( tag_regex => qr/^(a|frame)$/ ); The tags and attributes looked at are defined below, at L<< $mech->find_link() : link format >>. =back If C<n> is not specified, it defaults to 1. Therefore, if you don't specify any parms, this method defaults to finding the first link on the page. Note that you can specify multiple text or URL parameters, which will be ANDed together. For example, to find the first link with text of "News" and with "cnn.com" in the URL, use: $mech->find_link( text => 'News', url_regex => qr/cnn\.com/ ); The return value is a reference to an array containing a L<WWW::Mechanize::Link> object for every link in C<< $self->content >>. The links come from the following: =over 4 =item C<< <a href=...> >> =item C<< <area href=...> >> =item C<< <frame src=...> >> =item C<< <iframe src=...> >> =item C<< <link href=...> >> =item C<< <meta content=...> >> =back =head2 $mech->find_all_links( ... ) Returns all the links on the current page that match the criteria. The method for specifying link criteria is the same as in C<L</find_link()>>. Each of the links returned is a L<WWW::Mechanize::Link> object. In list context, C<find_all_links()> returns a list of the links. Otherwise, it returns a reference to the list of links. C<find_all_links()> with no parameters returns all links in the page. =head2 $mech->find_all_inputs( ... criteria ... ) find_all_inputs() returns an array of all the input controls in the current form whose properties match all of the regexes passed in. The controls returned are all descended from HTML::Form::Input. If no criteria are passed, all inputs will be returned. If there is no current page, there is no form on the current page, or there are no submit controls in the current form then the return will be an empty array. You may use a regex or a literal string: # get all textarea controls whose names begin with "customer" my @customer_text_inputs = $mech->find_all_inputs( type => 'textarea', name_regex => qr/^customer/, ); # get all text or textarea controls called "customer" my @customer_text_inputs = $mech->find_all_inputs( type_regex => qr/^(text|textarea)$/, name => 'customer', ); =head2 $mech->find_all_submits( ... criteria ... ) C<find_all_submits()> does the same thing as C<find_all_inputs()> except that it only returns controls that are submit controls, ignoring other types of input controls like text and checkboxes. =head1 IMAGE METHODS =head2 $mech->images Lists all the images on the current page. Each image is a WWW::Mechanize::Image object. In list context, returns a list of all images. In scalar context, returns an array reference of all images. =head2 $mech->find_image() Finds an image in the current page. It returns a L<WWW::Mechanize::Image> object which describes the image. If it fails to find an image it returns undef. You can select which image to find by passing in one or more of these key/value pairs: =over 4 =item * C<< alt => 'string' >> and C<< alt_regex => qr/regex/, >> C<alt> matches the ALT attribute of the image against I<string>, which must be an exact match. To select a image with an ALT tag that is exactly "download", use $mech->find_image( alt => 'download' ); C<alt_regex> matches the ALT attribute of the image against a regular expression. To select an image with an ALT attribute that has "download" anywhere in it, regardless of case, use $mech->find_image( alt_regex => qr/download/i ); =item * C<< url => 'string', >> and C<< url_regex => qr/regex/, >> Matches the URL of the image against I<string> or I<regex>, as appropriate. The URL may be a relative URL, like F<foo/bar.html>, depending on how it's coded on the page. =item * C<< url_abs => string >> and C<< url_abs_regex => regex >> Matches the absolute URL of the image against I<string> or I<regex>, as appropriate. The URL will be an absolute URL, even if it's relative in the page. =item * C<< tag => string >> and C<< tag_regex => regex >> Matches the tag that the image came from against I<string> or I<regex>, as appropriate. The C<tag_regex> is probably most useful to check for more than one tag, as in: $mech->find_image( tag_regex => qr/^(img|input)$/ ); The tags supported are C<< <img> >> and C<< <input> >>. =back If C<n> is not specified, it defaults to 1. Therefore, if you don't specify any parms, this method defaults to finding the first image on the page. Note that you can specify multiple ALT or URL parameters, which will be ANDed together. For example, to find the first image with ALT text of "News" and with "cnn.com" in the URL, use: $mech->find_image( image => 'News', url_regex => qr/cnn\.com/ ); The return value is a reference to an array containing a L<WWW::Mechanize::Image> object for every image in C<< $self->content >>. =head2 $mech->find_all_images( ... ) Returns all the images on the current page that match the criteria. The method for specifying image criteria is the same as in C<L</find_image()>>. Each of the images returned is a L<WWW::Mechanize::Image> object. In list context, C<find_all_images()> returns a list of the images. Otherwise, it returns a reference to the list of images. C<find_all_images()> with no parameters returns all images in the page. =head1 FORM METHODS These methods let you work with the forms on a page. The idea is to choose a form that you'll later work with using the field methods below. =head2 $mech->forms Lists all the forms on the current page. Each form is an L<HTML::Form> object. In list context, returns a list of all forms. In scalar context, returns an array reference of all forms. =head2 $mech->form_number($number) Selects the I<number>th form on the page as the target for subsequent calls to C<L</field()>> and C<L</click()>>. Also returns the form that was selected. If it is found, the form is returned as an L<HTML::Form> object and set internally for later use with Mech's form methods such as C<L</field()>> and C<L</click()>>. Emits a warning and returns undef if no form is found. The first form is number 1, not zero. =head2 $mech->form_name( $name ) Selects a form by name. If there is more than one form on the page with that name, then the first one is used, and a warning is generated. If it is found, the form is returned as an L<HTML::Form> object and set internally for later use with Mech's form methods such as C<L</field()>> and C<L</click()>>. Returns undef if no form is found. =head2 $mech->form_id( $name ) Selects a form by ID. If there is more than one form on the page with that ID, then the first one is used, and a warning is generated. If it is found, the form is returned as an L<HTML::Form> object and set internally for later use with Mech's form methods such as C<L</field()>> and C<L</click()>>. If no form is found it returns C<undef>. This will also trigger a warning, unless C<quiet> is enabled. =head2 $mech->all_forms_with_fields( @fields ) Selects a form by passing in a list of field names it must contain. All matching forms (perhaps none) are returned as a list of L<HTML::Form> objects. =head2 $mech->form_with_fields( @fields ) Selects a form by passing in a list of field names it must contain. If there is more than one form on the page with that matches, then the first one is used, and a warning is generated. If it is found, the form is returned as an L<HTML::Form> object and set internally for later used with Mech's form methods such as C<L</field()>> and C<L</click()>>. Returns undef and emits a warning if no form is found. Note that this functionality requires libwww-perl 5.69 or higher. =head2 $mech->all_forms_with( $attr1 => $value1, $attr2 => $value2, ... ) Searches for forms with arbitrary attribute/value pairs within the E<lt>formE<gt> tag. (Currently does not work for attribute C<action> due to implementation details of L<HTML::Form>.) When given more than one pair, all criteria must match. Using C<undef> as value means that the attribute in question may not be present. All matching forms (perhaps none) are returned as a list of L<HTML::Form> objects. =head2 $mech->form_with( $attr1 => $value1, $attr2 => $value2, ... ) Searches for forms with arbitrary attribute/value pairs within the E<lt>formE<gt> tag. (Currently does not work for attribute C<action> due to implementation details of L<HTML::Form>.) When given more than one pair, all criteria must match. Using C<undef> as value means that the attribute in question may not be present. If it is found, the form is returned as an L<HTML::Form> object and set internally for later used with Mech's form methods such as C<L</field()>> and C<L</click()>>. Returns undef if no form is found. =head1 FIELD METHODS These methods allow you to set the values of fields in a given form. =head2 $mech->field( $name, $value, $number ) =head2 $mech->field( $name, \@values, $number ) Given the name of a field, set its value to the value specified. This applies to the current form (as set by the L</form_name()> or L</form_number()> method or defaulting to the first form on the page). The optional I<$number> parameter is used to distinguish between two fields with the same name. The fields are numbered from 1. =head2 $mech->select($name, $value) =head2 $mech->select($name, \@values) Given the name of a C<select> field, set its value to the value specified. If the field is not C<< <select multiple> >> and the C<$value> is an array, only the B<first> value will be set. [Note: the documentation previously claimed that only the last value would be set, but this was incorrect.] Passing C<$value> as a hash with an C<n> key selects an item by number (e.g. C<< {n => 3} >> or C<< {n => [2,4]} >>). The numbering starts at 1. This applies to the current form. If you have a field with C<< <select multiple> >> and you pass a single C<$value>, then C<$value> will be added to the list of fields selected, without clearing the others. However, if you pass an array reference, then all previously selected values will be cleared. Returns true on successfully setting the value. On failure, returns false and calls C<< $self>warn() >> with an error message. =head2 $mech->set_fields( $name => $value ... ) This method sets multiple fields of the current form. It takes a list of field name and value pairs. If there is more than one field with the same name, the first one found is set. If you want to select which of the duplicate field to set, use a value which is an anonymous array which has the field value and its number as the 2 elements. # set the second foo field $mech->set_fields( $name => [ 'foo', 2 ] ); The fields are numbered from 1. This applies to the current form. =head2 $mech->set_visible( @criteria ) This method sets fields of the current form without having to know their names. So if you have a login screen that wants a username and password, you do not have to fetch the form and inspect the source (or use the F<mech-dump> utility, installed with WWW::Mechanize) to see what the field names are; you can just say $mech->set_visible( $username, $password ); and the first and second fields will be set accordingly. The method is called set_I<visible> because it acts only on visible fields; hidden form inputs are not considered. The order of the fields is the order in which they appear in the HTML source which is nearly always the order anyone viewing the page would think they are in, but some creative work with tables could change that; caveat user. Each element in C<@criteria> is either a field value or a field specifier. A field value is a scalar. A field specifier allows you to specify the I<type> of input field you want to set and is denoted with an arrayref containing two elements. So you could specify the first radio button with $mech->set_visible( [ radio => 'KCRW' ] ); Field values and specifiers can be intermixed, hence $mech->set_visible( 'fred', 'secret', [ option => 'Checking' ] ); would set the first two fields to "fred" and "secret", and the I<next> C<OPTION> menu field to "Checking". The possible field specifier types are: "text", "password", "hidden", "textarea", "file", "image", "submit", "radio", "checkbox" and "option". C<set_visible> returns the number of values set. =head2 $mech->tick( $name, $value [, $set] ) "Ticks" the first checkbox that has both the name and value associated with it on the current form. Dies if there is no named check box for that value. Passing in a false value as the third optional argument will cause the checkbox to be unticked. =head2 $mech->untick($name, $value) Causes the checkbox to be unticked. Shorthand for C<tick($name,$value,undef)> =head2 $mech->value( $name [, $number] ) Given the name of a field, return its value. This applies to the current form. The optional I<$number> parameter is used to distinguish between two fields with the same name. The fields are numbered from 1. If the field is of type file (file upload field), the value is always cleared to prevent remote sites from downloading your local files. To upload a file, specify its file name explicitly. =head2 $mech->click( $button [, $x, $y] ) Has the effect of clicking a button on the current form. The first argument is the name of the button to be clicked. The second and third arguments (optional) allow you to specify the (x,y) coordinates of the click. If there is only one button on the form, C<< $mech->click() >> with no arguments simply clicks that one button. Returns an L<HTTP::Response> object. =head2 $mech->click_button( ... ) Has the effect of clicking a button on the current form by specifying its name, value, or index. Its arguments are a list of key/value pairs. Only one of name, number, input or value must be specified in the keys. =over 4 =item * C<< name => name >> Clicks the button named I<name> in the current form. =item * C<< id => id >> Clicks the button with the id I<id> in the current form. =item * C<< number => n >> Clicks the I<n>th button in the current form. Numbering starts at 1. =item * C<< value => value >> Clicks the button with the value I<value> in the current form. =item * C<< input => $inputobject >> Clicks on the button referenced by $inputobject, an instance of L<HTML::Form::SubmitInput> obtained e.g. from $mech->current_form()->find_input( undef, 'submit' ) $inputobject must belong to the current form. =item * C<< x => x >> =item * C<< y => y >> These arguments (optional) allow you to specify the (x,y) coordinates of the click. =back =head2 $mech->submit() Submits the current form, without specifying a button to click. Actually, no button is clicked at all. Returns an L<HTTP::Response> object. This used to be a synonym for C<< $mech->click( 'submit' ) >>, but is no longer so. =head2 $mech->submit_form( ... ) This method lets you select a form from the previously fetched page, fill in its fields, and submit it. It combines the form_number/form_name, set_fields and click methods into one higher level call. Its arguments are a list of key/value pairs, all of which are optional. =over 4 =item * C<< fields => \%fields >> Specifies the fields to be filled in the current form. =item * C<< with_fields => \%fields >> Probably all you need for the common case. It combines a smart form selector and data setting in one operation. It selects the first form that contains all fields mentioned in C<\%fields>. This is nice because you don't need to know the name or number of the form to do this. (calls C<L</form_with_fields()>> and C<L</set_fields()>>). If you choose this, the form_number, form_name, form_id and fields options will be ignored. =item * C<< form_number => n >> Selects the I<n>th form (calls C<L</form_number()>>). If this parm is not specified, the currently-selected form is used. =item * C<< form_name => name >> Selects the form named I<name> (calls C<L</form_name()>>) =item * C<< form_id => ID >> Selects the form with ID I<ID> (calls C<L</form_id()>>) =item * C<< button => button >> Clicks on button I<button> (calls C<L</click()>>) =item * C<< x => x, y => y >> Sets the x or y values for C<L</click()>> =item * C<< strict_forms => bool >> Sets the HTML::Form strict flag which causes form submission to croak if any of the passed fields don't exist on the page, and/or a value doesn't exist in a select element. By default HTML::Form defaults this value to false. =back If no form is selected, the first form found is used. If I<button> is not passed, then the C<L</submit()>> method is used instead. If you want to submit a file and get its content from a scalar rather than a file in the filesystem, you can use: $mech->submit_form(with_fields => { logfile => [ [ undef, 'whatever', Content => $content ], 1 ] } ); Returns an L<HTTP::Response> object. =head1 MISCELLANEOUS METHODS =head2 $mech->add_header( name => $value [, name => $value... ] ) Sets HTTP headers for the agent to add or remove from the HTTP request. $mech->add_header( Encoding => 'text/klingon' ); If a I<value> is C<undef>, then that header will be removed from any future requests. For example, to never send a Referer header: $mech->add_header( Referer => undef ); If you want to delete a header, use C<delete_header>. Returns the number of name/value pairs added. B<NOTE>: This method was very different in WWW::Mechanize before 1.00. Back then, the headers were stored in a package hash, not as a member of the object instance. Calling C<add_header()> would modify the headers for every WWW::Mechanize object, even after your object no longer existed. =head2 $mech->delete_header( name [, name ... ] ) Removes HTTP headers from the agent's list of special headers. For instance, you might need to do something like: # Don't send a Referer for this URL $mech->add_header( Referer => undef ); # Get the URL $mech->get( $url ); # Back to the default behavior $mech->delete_header( 'Referer' ); =head2 $mech->quiet(true/false) Allows you to suppress warnings to the screen. $mech->quiet(0); # turns on warnings (the default) $mech->quiet(1); # turns off warnings $mech->quiet(); # returns the current quietness status =head2 $mech->stack_depth( $max_depth ) Get or set the page stack depth. Use this if you're doing a lot of page scraping and running out of memory. A value of 0 means "no history at all." By default, the max stack depth is humongously large, effectively keeping all history. =head2 $mech->save_content( $filename, %opts ) Dumps the contents of C<< $mech->content >> into I<$filename>. I<$filename> will be overwritten. Dies if there are any errors. If the content type does not begin with "text/", then the content is saved in binary mode (i.e. C<binmode()> is set on the output filehandle). Additional arguments can be passed as I<key>/I<value> pairs: =over =item I<< $mech->save_content( $filename, binary => 1 ) >> Filehandle is set with C<binmode> to C<:raw> and contents are taken calling C<< $self->content(decoded_by_headers => 1) >>. Same as calling: $mech->save_content( $filename, binmode => ':raw', decoded_by_headers => 1 ); This I<should> be the safest way to save contents verbatim. =item I<< $mech->save_content( $filename, binmode => $binmode ) >> Filehandle is set to binary mode. If C<$binmode> begins with ':', it is passed as a parameter to C<binmode>: binmode $fh, $binmode; otherwise the filehandle is set to binary mode if C<$binmode> is true: binmode $fh; =item I<all other arguments> are passed as-is to C<< $mech->content(%opts) >>. In particular, C<decoded_by_headers> might come handy if you want to revert the effect of line compression performed by the web server but without further interpreting the contents (e.g. decoding it according to the charset). =back =head2 $mech->dump_headers( [$fh] ) Prints a dump of the HTTP response headers for the most recent response. If I<$fh> is not specified or is undef, it dumps to STDOUT. Unlike the rest of the dump_* methods, $fh can be a scalar. It will be used as a file name. =head2 $mech->dump_links( [[$fh], $absolute] ) Prints a dump of the links on the current page to I<$fh>. If I<$fh> is not specified or is undef, it dumps to STDOUT. If I<$absolute> is true, links displayed are absolute, not relative. =head2 $mech->dump_images( [[$fh], $absolute] ) Prints a dump of the images on the current page to I<$fh>. If I<$fh> is not specified or is undef, it dumps to STDOUT. If I<$absolute> is true, links displayed are absolute, not relative. =head2 $mech->dump_forms( [$fh] ) Prints a dump of the forms on the current page to I<$fh>. If I<$fh> is not specified or is undef, it dumps to STDOUT. =head2 $mech->dump_text( [$fh] ) Prints a dump of the text on the current page to I<$fh>. If I<$fh> is not specified or is undef, it dumps to STDOUT. =head1 OVERRIDDEN LWP::UserAgent METHODS =head2 $mech->clone() Clone the mech object. The clone will be using the same cookie jar as the original mech. =head2 $mech->redirect_ok() An overloaded version of C<redirect_ok()> in L<LWP::UserAgent>. This method is used to determine whether a redirection in the request should be followed. Note that WWW::Mechanize's constructor pushes POST on to the agent's C<requests_redirectable> list. =head2 $mech->request( $request [, $arg [, $size]]) Overloaded version of C<request()> in L<LWP::UserAgent>. Performs the actual request. Normally, if you're using WWW::Mechanize, it's because you don't want to deal with this level of stuff anyway. Note that C<$request> will be modified. Returns an L<HTTP::Response> object. =head2 $mech->update_html( $html ) Allows you to replace the HTML that the mech has found. Updates the forms and links parse-trees that the mech uses internally. Say you have a page that you know has malformed output, and you want to update it so the links come out correctly: my $html = $mech->content; $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg; $mech->update_html( $html ); This method is also used internally by the mech itself to update its own HTML content when loading a page. This means that if you would like to I<systematically> perform the above HTML substitution, you would overload I<update_html> in a subclass thusly: package MyMech; use base 'WWW::Mechanize'; sub update_html { my ($self, $html) = @_; $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg; $self->WWW::Mechanize::update_html( $html ); } If you do this, then the mech will use the tidied-up HTML instead of the original both when parsing for its own needs, and for returning to you through L</content>. Overloading this method is also the recommended way of implementing extra validation steps (e.g. link checkers) for every HTML page received. L</warn> and L</die> would then come in handy to signal validation errors. =head2 $mech->credentials( $username, $password ) Provide credentials to be used for HTTP Basic authentication for all sites and realms until further notice. The four argument form described in L<LWP::UserAgent> is still supported. =head2 $mech->get_basic_credentials( $realm, $uri, $isproxy ) Returns the credentials for the realm and URI. =head2 $mech->clear_credentials() Remove any credentials set up with C<credentials()>. =head1 INHERITED UNCHANGED LWP::UserAgent METHODS As a subclass of L<LWP::UserAgent>, WWW::Mechanize inherits all of L<LWP::UserAgent>'s methods. Many of which are overridden or extended. The following methods are inherited unchanged. View the L<LWP::UserAgent> documentation for their implementation descriptions. This is not meant to be an inclusive list. LWP::UA may have added others. =head2 $mech->head() Inherited from L<LWP::UserAgent>. =head2 $mech->post() Inherited from L<LWP::UserAgent>. =head2 $mech->mirror() Inherited from L<LWP::UserAgent>. =head2 $mech->simple_request() Inherited from L<LWP::UserAgent>. =head2 $mech->is_protocol_supported() Inherited from L<LWP::UserAgent>. =head2 $mech->prepare_request() Inherited from L<LWP::UserAgent>. =head2 $mech->progress() Inherited from L<LWP::UserAgent>. =head1 INTERNAL-ONLY METHODS These methods are only used internally. You probably don't need to know about them. =head2 $mech->_update_page($request, $response) Updates all internal variables in $mech as if $request was just performed, and returns $response. The page stack is B<not> altered by this method, it is up to caller (e.g. L</request>) to do that. =head2 $mech->_modify_request( $req ) Modifies a L<HTTP::Request> before the request is sent out, for both GET and POST requests. We add a C<Referer> header, as well as header to note that we can accept gzip encoded content, if L<Compress::Zlib> is installed. =head2 $mech->_make_request() Convenience method to make it easier for subclasses like L<WWW::Mechanize::Cached> to intercept the request. =head2 $mech->_reset_page() Resets the internal fields that track page parsed stuff. =head2 $mech->_extract_links() Extracts links from the content of a webpage, and populates the C<{links}> property with L<WWW::Mechanize::Link> objects. =head2 $mech->_push_page_stack() The agent keeps a stack of visited pages, which it can pop when it needs to go BACK and so on. The current page needs to be pushed onto the stack before we get a new page, and the stack needs to be popped when BACK occurs. Neither of these take any arguments, they just operate on the $mech object. =head2 warn( @messages ) Centralized warning method, for diagnostics and non-fatal problems. Defaults to calling C<CORE::warn>, but may be overridden by setting C<onwarn> in the constructor. =head2 die( @messages ) Centralized error method. Defaults to calling C<CORE::die>, but may be overridden by setting C<onerror> in the constructor. =head1 WWW::MECHANIZE'S GIT REPOSITORY WWW::Mechanize is hosted at GitHub. Repository: L<https://github.com/libwww-perl/WWW-Mechanize>. Bugs: L<https://github.com/libwww-perl/WWW-Mechanize/issues>. =head1 OTHER DOCUMENTATION =head2 I<Spidering Hacks>, by Kevin Hemenway and Tara Calishain I<Spidering Hacks> from O'Reilly (L<http://www.oreilly.com/catalog/spiderhks/>) is a great book for anyone wanting to know more about screen-scraping and spidering. There are six hacks that use Mech or a Mech derivative: =over 4 =item #21 WWW::Mechanize 101 =item #22 Scraping with WWW::Mechanize =item #36 Downloading Images from Webshots =item #44 Archiving Yahoo! Groups Messages with WWW::Yahoo::Groups =item #64 Super Author Searching =item #73 Scraping TV Listings =back The book was also positively reviewed on Slashdot: L<http://books.slashdot.org/article.pl?sid=03/12/11/2126256> =head1 ONLINE RESOURCES AND SUPPORT =over 4 =item * WWW::Mechanize mailing list The Mech mailing list is at L<http://groups.google.com/group/www-mechanize-users> and is specific to Mechanize, unlike the LWP mailing list below. Although it is a users list, all development discussion takes place here, too. =item * LWP mailing list The LWP mailing list is at L<http://lists.perl.org/showlist.cgi?name=libwww>, and is more user-oriented and well-populated than the WWW::Mechanize list. =item * Perlmonks L<http://perlmonks.org> is an excellent community of support, and many questions about Mech have already been answered there. =item * L<WWW::Mechanize::Examples> A random array of examples submitted by users, included with the Mechanize distribution. =back =head1 ARTICLES ABOUT WWW::MECHANIZE =over 4 =item * L<http://www.ibm.com/developerworks/linux/library/wa-perlsecure/> IBM article "Secure Web site access with Perl" =item * L<http://www.oreilly.com/catalog/googlehks2/chapter/hack84.pdf> Leland Johnson's hack #84 in I<Google Hacks, 2nd Edition> is an example of a production script that uses WWW::Mechanize and HTML::TableContentParser. It takes in keywords and returns the estimated price of these keywords on Google's AdWords program. =item * L<http://www.perl.com/pub/a/2004/06/04/recorder.html> Linda Julien writes about using HTTP::Recorder to create WWW::Mechanize scripts. =item * L<http://www.developer.com/lang/other/article.php/3454041> Jason Gilmore's article on using WWW::Mechanize for scraping sales information from Amazon and eBay. =item * L<http://www.perl.com/pub/a/2003/01/22/mechanize.html> Chris Ball's article about using WWW::Mechanize for scraping TV listings. =item * L<http://www.stonehenge.com/merlyn/LinuxMag/col47.html> Randal Schwartz's article on scraping Yahoo News for images. It's already out of date: He manually walks the list of links hunting for matches, which wouldn't have been necessary if the C<find_link()> method existed at press time. =item * L<http://www.perladvent.org/2002/16th/> WWW::Mechanize on the Perl Advent Calendar, by Mark Fowler. =item * L<http://www.linux-magazin.de/Ausgaben/2004/03/Datenruessel/%28language%29/ger-DE> Michael Schilli's article on Mech and L<WWW::Mechanize::Shell> for the German magazine I<Linux Magazin>. =back =head2 Other modules that use Mechanize Here are modules that use or subclass Mechanize. Let me know of any others: =over 4 =item * L<Finance::Bank::LloydsTSB> =item * L<HTTP::Recorder> Acts as a proxy for web interaction, and then generates WWW::Mechanize scripts. =item * L<Win32::IE::Mechanize> Just like Mech, but using Microsoft Internet Explorer to do the work. =item * L<WWW::Bugzilla> =item * L<WWW::CheckSite> =item * L<WWW::Google::Groups> =item * L<WWW::Hotmail> =item * L<WWW::Mechanize::Cached> =item * L<WWW::Mechanize::Cached::GZip> =item * L<WWW::Mechanize::FormFiller> =item * L<WWW::Mechanize::Shell> =item * L<WWW::Mechanize::Sleepy> =item * L<WWW::Mechanize::SpamCop> =item * L<WWW::Mechanize::Timed> =item * L<WWW::SourceForge> =item * L<WWW::Yahoo::Groups> =item * L<WWW::Scripter> =back =head1 ACKNOWLEDGEMENTS Thanks to the numerous people who have helped out on WWW::Mechanize in one way or another, including Kirrily Robert for the original C<WWW::Automate>, Lyle Hopkins, Damien Clark, Ansgar Burchardt, Gisle Aas, Jeremy Ary, Hilary Holz, Rafael Kitover, Norbert Buchmuller, Dave Page, David Sainty, H.Merijn Brand, Matt Lawrence, Michael Schwern, Adriano Ferreira, Miyagawa, Peteris Krumins, Rafael Kitover, David Steinbrunner, Kevin Falcone, Mike O'Regan, Mark Stosberg, Uri Guttman, Peter Scott, Phillipe Bruhat, Ian Langworth, John Beppu, Gavin Estey, Jim Brandt, Ask Bjoern Hansen, Greg Davies, Ed Silva, Mark-Jason Dominus, Autrijus Tang, Mark Fowler, Stuart Children, Max Maischein, Meng Wong, Prakash Kailasa, Abigail, Jan Pazdziora, Dominique Quatravaux, Scott Lanning, Rob Casey, Leland Johnson, Joshua Gatcomb, Julien Beasley, Abe Timmerman, Peter Stevens, Pete Krawczyk, Tad McClellan, and the late great Iain Truskett. =head1 AUTHOR Andy Lester <andy at petdance.com> =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2004-2016 by Andy Lester. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut �����������������������������������form_with_fields.t����������������������������������������������������������������������������������100644��000765��000024�� 11234�13126734347� 17332� 0����������������������������������������������������������������������������������������������������ustar�00olaf����������������������������staff���������������������������000000��000000��WWW-Mechanize-1.86/t���������������������������������������������������������������������������������������������������������������������������������������������������#!perl -T use warnings; use strict; use Test::More 'no_plan'; use Test::Fatal; use Test::Warnings ':all'; use Test::Deep; use URI::file (); BEGIN { delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 use_ok( 'WWW::Mechanize' ); } my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0 ); isa_ok( $mech, 'WWW::Mechanize' ); my $uri = URI::file->new_abs( 't/form_with_fields.html' )->as_string; $mech->get( $uri ); ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; { my $test = 'dies with no input'; like( exception { my $form = $mech->form_with_fields(); }, qr/no fields provided/, $test, ); } { my $form; cmp_deeply( [ warnings { $form = $mech->form_with_fields(qw/1b/) } ], [ re(qr/There are 2 forms with the named fields. The first one was used./) ], 'warning on ambiguous match (1)', ); isa_ok( $form, 'HTML::Form' ); is($form->attr('name'), '1st_form', 'first form matches'); } { my $form = $mech->form_with_fields('1b', 'opt[2]'); isa_ok( $form, 'HTML::Form' ); is($form->attr('name'), '2nd_form', 'second form matches'); } { my $form; cmp_deeply( [ warnings { $form = $mech->form_with_fields('4a', '4b') } ], [ re(qr/There are 2 forms with the named fields. The first one was used./) ], 'warning on ambiguous match (2)', ); isa_ok( $form, 'HTML::Form' ); is($form->attr('name'), '4th_form_1', 'fourth form matches'); } { my @forms = $mech->all_forms_with( name => '3rd_form_ambiguous' ); is( scalar @forms, 2 ); isa_ok( $forms[0], 'HTML::Form' ); isa_ok( $forms[1], 'HTML::Form' ); is($forms[0]->attr('name'), '3rd_form_ambiguous', 'first result of 3rd_form_ambiguous'); is($forms[0]->attr('name'), '3rd_form_ambiguous', 'second result of 3rd_form_ambiguous'); } { $mech->get($uri); like( exception { $mech->submit_form( with_fields => { 'xx' => '' }, ); }, qr/There is no form with the requested fields/, 'submit_form with no match (1)', ); } { $mech->get($uri); like( exception { $mech->submit_form( with_fields => { '1a' => '' }, form_number => 2, ); }, qr/There is no form that satisfies all the criteria/, 'submit_form with no match (2)', ); } { $mech->get($uri); like( exception { $mech->submit_form( form_number => 2, form_name => '3rd_form_ambiguous', ); }, qr/There is no form that satisfies all the criteria/, 'submit_form with no match (3)', ); } { $mech->get($uri); like( exception { $mech->submit_form( form_name => '3rd_form_ambiguous', ); }, qr/More than one form satisfies all the criteria/, 'submit_form with more than one match', ); } { $mech->get($uri); is( exception { $mech->submit_form( with_fields => { 'x' => '' }, form_name => '3rd_form_ambiguous', ); }, undef, 'submit_form with intersection of two criteria', ); } { $mech->get($uri); is( exception { $mech->submit_form( with_fields => { '1b' => '', 'opt[2]' => '' }, ); }, undef, ' submit_form( with_fields => %data ) ', ); } { $mech->get($uri); is( exception { $mech->submit_form( form_name => '1st_form', fields => { '1c' => 'madeup_field', }, ); }, undef, 'submit_form with invalid field and without strict_forms option succeeds', ); } { $mech->get($uri); like( exception { $mech->submit_form( form_name => '1st_form', fields => { '1c' => 'madeup_field', }, strict_forms => 1, ); }, qr/^No such field '1c'/, 'submit_form with invalid field and strict_forms option fails', ); } { $mech->get($uri); is( exception { $mech->submit_form( form_name => '1st_form', fields => { '1a' => 'value1', '1b' => 'value2', }, strict_forms => 1, ); }, undef, 'submit_form with valid fields and strict_forms option succeeds', ); }��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������page_stack.t����������������������������������������������������������������������������������������100644��000765��000024�� 4452�13126734347� 17165� 0����������������������������������������������������������������������������������������������������ustar�00olaf����������������������������staff���������������������������000000��000000��WWW-Mechanize-1.86/t/local���������������������������������������������������������������������������������������������������������������������������������������������use warnings; use strict; use Test::More; use lib 't/local'; use LocalServer; BEGIN { delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; use_ok( 'WWW::Mechanize' ); } my $server = LocalServer->spawn; isa_ok( $server, 'LocalServer' ); STANDARD_STACK: { my $history; my $mech = WWW::Mechanize->new; isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); is( scalar @{$mech->{page_stack}}, 0, 'Page stack starts empty' ); is( $mech->history_count, 0, 'No history count to start' ); is( $mech->history(0), undef, 'No 0th history item yet' ); ok( $mech->get($server->url)->is_success, 'Got start page' ); is( scalar @{$mech->{page_stack}}, 0, 'Page stack empty after first get' ); $history = $mech->history(0); is( $history->{req}->url, $server->url, "0th history is last request"); is( $mech->history(1), undef, 'No 1th history item yet' ); is( $mech->history_count, 1, 'One history count after first get' ); $mech->_push_page_stack(); is( scalar @{$mech->{page_stack}}, 1, 'Pushed item onto page stack' ); is( $mech->history_count, 2, 'Two history count after push' ); $mech->_push_page_stack(); is( scalar @{$mech->{page_stack}}, 2, 'Pushed item onto page stack' ); is( $mech->history_count, 3, 'Three history count after push' ); $mech->back(); is( scalar @{$mech->{page_stack}}, 1, 'Popped item from page stack' ); is( $mech->history_count, 2, 'History count back to 2 post pop' ); $mech->back(); is( scalar @{$mech->{page_stack}}, 0, 'Popped item from page stack' ); is( $mech->history_count, 1, 'History count back to 1 post pop' ); $mech->back(); is( scalar @{$mech->{page_stack}}, 0, 'Cannot pop beyond end of page stack' ); is( $mech->history_count, 1, 'History count stable at 1' ); } NO_STACK: { my $mech = WWW::Mechanize->new; isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); $mech->stack_depth(0); is( scalar @{$mech->{page_stack}}, 0, 'Page stack starts empty' ); ok( $mech->get($server->url)->is_success, 'Got start page' ); is( scalar @{$mech->{page_stack}}, 0, 'Page stack starts empty' ); $mech->_push_page_stack(); is( scalar @{$mech->{page_stack}}, 0, 'Pushing has no effect' ); } done_testing; ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nonascii.html���������������������������������������������������������������������������������������100644��000765��000024�� 623�13126734347� 17344� 0����������������������������������������������������������������������������������������������������ustar�00olaf����������������������������staff���������������������������000000��000000��WWW-Mechanize-1.86/t/local���������������������������������������������������������������������������������������������������������������������������������������������<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head> <title>Query Builder
00-report-prereqs.t100644000765000024 1342613126734347 17222 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t#!perl use strict; use warnings; # This test was generated by Dist::Zilla::Plugin::Test::ReportPrereqs 0.027 use Test::More tests => 1; use ExtUtils::MakeMaker; use File::Spec; # from $version::LAX my $lax_version_re = qr/(?: undef | (?: (?:[0-9]+) (?: \. | (?:\.[0-9]+) (?:_[0-9]+)? )? | (?:\.[0-9]+) (?:_[0-9]+)? ) | (?: v (?:[0-9]+) (?: (?:\.[0-9]+)+ (?:_[0-9]+)? )? | (?:[0-9]+)? (?:\.[0-9]+){2,} (?:_[0-9]+)? ) )/x; # hide optional CPAN::Meta modules from prereq scanner # and check if they are available my $cpan_meta = "CPAN::Meta"; my $cpan_meta_pre = "CPAN::Meta::Prereqs"; my $HAS_CPAN_META = eval "require $cpan_meta; $cpan_meta->VERSION('2.120900')" && eval "require $cpan_meta_pre"; ## no critic # Verify requirements? my $DO_VERIFY_PREREQS = 1; sub _max { my $max = shift; $max = ( $_ > $max ) ? $_ : $max for @_; return $max; } sub _merge_prereqs { my ($collector, $prereqs) = @_; # CPAN::Meta::Prereqs object if (ref $collector eq $cpan_meta_pre) { return $collector->with_merged_prereqs( CPAN::Meta::Prereqs->new( $prereqs ) ); } # Raw hashrefs for my $phase ( keys %$prereqs ) { for my $type ( keys %{ $prereqs->{$phase} } ) { for my $module ( keys %{ $prereqs->{$phase}{$type} } ) { $collector->{$phase}{$type}{$module} = $prereqs->{$phase}{$type}{$module}; } } } return $collector; } my @include = qw( ); my @exclude = qw( ); # Add static prereqs to the included modules list my $static_prereqs = do './t/00-report-prereqs.dd'; # Merge all prereqs (either with ::Prereqs or a hashref) my $full_prereqs = _merge_prereqs( ( $HAS_CPAN_META ? $cpan_meta_pre->new : {} ), $static_prereqs ); # Add dynamic prereqs to the included modules list (if we can) my ($source) = grep { -f } 'MYMETA.json', 'MYMETA.yml'; my $cpan_meta_error; if ( $source && $HAS_CPAN_META && (my $meta = eval { CPAN::Meta->load_file($source) } ) ) { $full_prereqs = _merge_prereqs($full_prereqs, $meta->prereqs); } else { $cpan_meta_error = $@; # capture error from CPAN::Meta->load_file($source) $source = 'static metadata'; } my @full_reports; my @dep_errors; my $req_hash = $HAS_CPAN_META ? $full_prereqs->as_string_hash : $full_prereqs; # Add static includes into a fake section for my $mod (@include) { $req_hash->{other}{modules}{$mod} = 0; } for my $phase ( qw(configure build test runtime develop other) ) { next unless $req_hash->{$phase}; next if ($phase eq 'develop' and not $ENV{AUTHOR_TESTING}); for my $type ( qw(requires recommends suggests conflicts modules) ) { next unless $req_hash->{$phase}{$type}; my $title = ucfirst($phase).' '.ucfirst($type); my @reports = [qw/Module Want Have/]; for my $mod ( sort keys %{ $req_hash->{$phase}{$type} } ) { next if $mod eq 'perl'; next if grep { $_ eq $mod } @exclude; my $file = $mod; $file =~ s{::}{/}g; $file .= ".pm"; my ($prefix) = grep { -e File::Spec->catfile($_, $file) } @INC; my $want = $req_hash->{$phase}{$type}{$mod}; $want = "undef" unless defined $want; $want = "any" if !$want && $want == 0; my $req_string = $want eq 'any' ? 'any version required' : "version '$want' required"; if ($prefix) { my $have = MM->parse_version( File::Spec->catfile($prefix, $file) ); $have = "undef" unless defined $have; push @reports, [$mod, $want, $have]; if ( $DO_VERIFY_PREREQS && $HAS_CPAN_META && $type eq 'requires' ) { if ( $have !~ /\A$lax_version_re\z/ ) { push @dep_errors, "$mod version '$have' cannot be parsed ($req_string)"; } elsif ( ! $full_prereqs->requirements_for( $phase, $type )->accepts_module( $mod => $have ) ) { push @dep_errors, "$mod version '$have' is not in required range '$want'"; } } } else { push @reports, [$mod, $want, "missing"]; if ( $DO_VERIFY_PREREQS && $type eq 'requires' ) { push @dep_errors, "$mod is not installed ($req_string)"; } } } if ( @reports ) { push @full_reports, "=== $title ===\n\n"; my $ml = _max( map { length $_->[0] } @reports ); my $wl = _max( map { length $_->[1] } @reports ); my $hl = _max( map { length $_->[2] } @reports ); if ($type eq 'modules') { splice @reports, 1, 0, ["-" x $ml, "", "-" x $hl]; push @full_reports, map { sprintf(" %*s %*s\n", -$ml, $_->[0], $hl, $_->[2]) } @reports; } else { splice @reports, 1, 0, ["-" x $ml, "-" x $wl, "-" x $hl]; push @full_reports, map { sprintf(" %*s %*s %*s\n", -$ml, $_->[0], $wl, $_->[1], $hl, $_->[2]) } @reports; } push @full_reports, "\n"; } } } if ( @full_reports ) { diag "\nVersions for all modules listed in $source (including optional ones):\n\n", @full_reports; } if ( $cpan_meta_error || @dep_errors ) { diag "\n*** WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ***\n"; } if ( $cpan_meta_error ) { my ($orig_source) = grep { -f } 'MYMETA.json', 'MYMETA.yml'; diag "\nCPAN::Meta->load_file('$orig_source') failed with: $cpan_meta_error\n"; } if ( @dep_errors ) { diag join("\n", "\nThe following REQUIRED prerequisites were not satisfied:\n", @dep_errors, "\n" ); } pass; # vim: ts=4 sts=4 sw=4 et: find_link-warnings.t100644000765000024 315213126734347 17551 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t#!perl -T use warnings; use strict; use Test::More; use URI::file; BEGIN { delete @ENV{ qw( http_proxy HTTP_PROXY PATH IFS CDPATH ENV BASH_ENV) }; } BEGIN { eval 'use Test::Warn;'; plan skip_all => "Test::Warn required to test $0" if $@; plan tests => 19; } BEGIN { use_ok( 'WWW::Mechanize' ); } my $mech = WWW::Mechanize->new( cookie_jar => undef ); isa_ok( $mech, 'WWW::Mechanize' ); my $uri = URI::file->new_abs( 't/find_link.html' )->as_string; $mech->get( $uri ); ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; REGEX_USAGE: { for my $tname (qw( TEXT NAME URL TAG )) { warning_like( sub { $mech->find_link( $tname => 'expect error' ) }, qr/Unknown link-finding parameter/, "detected usage error: $tname => 'string'" ); } } REGEX_STRING: { for my $tn (qw( text name url tag )) { my $tname = $tn.'_regex'; warning_like( sub { $mech->find_link( $tname => 'expect error' ) }, qr/passed as $tname is not a regex/, "detected usage error: $tname => 'string'" ); } } NON_REGEX_STRING: { for my $tname (qw( text name url tag )) { warning_like( sub { $mech->find_link( $tname => qr/foo/ ) }, qr/passed as '$tname' is a regex/, "detected usage error: $tname => Regex" ); } } SPACE_PADDED: { for my $tname (qw( text name url tag )) { warning_like( sub { $mech->find_link( $tname => ' a padded astring ' ) }, qr/is space-padded and cannot succeed/, "detected usage error: $tname => padded-string" ); } } LocalServer.pm100644000765000024 1455313126734347 17501 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t/localpackage LocalServer; # start a fake webserver, fork, and connect to ourselves use warnings; use strict; # this has to happen here because LWP::Simple creates a $ua # on load so any time after this is too late. BEGIN { delete @ENV{qw( HTTP_PROXY http_proxy CGI_HTTP_PROXY HTTPS_PROXY https_proxy HTTP_PROXY_ALL http_proxy_all )}; } use LWP::Simple; use FindBin; use File::Spec; use File::Temp; use URI::URL qw(); use Carp qw(carp croak); =head1 SYNOPSIS use LWP::Simple qw(get); my $server = Test::HTTP::LocalServer->spawn; ok get $server->url, "Retrieve " . $server->url; $server->stop; =head1 METHODS =head2 Cspawn %ARGS> This spawns a new HTTP server. The server will stay running until C<< $server->stop >> is called. Valid arguments are: =over 4 =item * C<< html => >> scalar containing the page to be served =item * C<< file => >> filename containing the page to be served =item * C<< debug => 1 >> to make the spawned server output debug information =item * C<< eval => >> string that will get evaluated per request in the server Try to avoid characters that are special to the shell, especially quotes. A good idea for a slow server would be eval => sleep+10 =back All served HTML will have the first %s replaced by the current location. The following entries will be removed from C<%ENV>: HTTP_PROXY http_proxy CGI_HTTP_PROXY HTTPS_PROXY https_proxy HTTP_PROXY_ALL http_proxy_all =cut sub spawn { my ($class,%args) = @_; my $self = { %args }; bless $self,$class; local $ENV{TEST_HTTP_VERBOSE}; $ENV{TEST_HTTP_VERBOSE} = 1 if (delete $args{debug}); $self->{delete} = []; if (my $html = delete $args{html}) { # write the html to a temp file my ($fh,$tempfile) = File::Temp::tempfile(); binmode $fh; print $fh $html or die "Couldn't write tempfile $tempfile : $!"; close $fh; push @{$self->{delete}},$tempfile; $args{file} = $tempfile; }; my ($fh,$logfile) = File::Temp::tempfile(); close $fh; push @{$self->{delete}},$logfile; $self->{logfile} = $logfile; my $web_page = delete $args{file} || ""; my $server_file = File::Spec->catfile( $FindBin::Bin,'log-server' ); my @opts; push @opts, "-e" => qq{"} . delete($args{ eval }) . qq{"} if $args{ eval }; my $pid = open my $server, qq'$^X "$server_file" "$web_page" "$logfile" @opts|' or croak "Couldn't spawn local server $server_file : $!"; my $url = <$server>; chomp $url; die "Couldn't read back local server url" unless $url; # What is this code supposed to fix? my $lhurl = URI::URL->new( $url ); $lhurl->host( '127.0.0.1' ); $self->{_server_url} = $lhurl; $self->{_fh} = $server; $self->{_pid} = $pid; $self; }; =head2 C<< $server->port >> This returns the port of the current server. As new instances will most likely run under a different port, this is convenient if you need to compare results from two runs. =cut sub port { carp __PACKAGE__ . '::port called without a server' unless $_[0]->{_server_url}; $_[0]->{_server_url}->port }; =head2 C<< $server->url >> This returns the url where you can contact the server. This url is valid until the C<$server> goes out of scope or you call C<< $server->stop >> or C<< $server->get_log >>. =cut sub url { $_[0]->{_server_url}->abs->as_string }; =head2 C<< $server->stop >> This stops the server process by requesting a special url. =cut sub stop { my ($self) = @_; get( $self->quit_server ); undef $self->{_server_url}; if ( $self->{_fh} ) { close $self->{_fh}; delete $self->{_fh}; } }; =head2 C<< $server->kill >> This kills the server process via C. The log cannot be retrieved then. =cut sub kill { CORE::kill( 9 => $_[0]->{ _pid } ); undef $_[0]->{_server_url}; undef $_[0]->{_pid}; }; =head2 C<< $server->get_log >> This stops the server by calling C and then returns the output of the server process. This output will be a list of all requests made to the server concatenated together as a string. =cut sub get_log { my ($self) = @_; my $log = get( $self->get_server_log ); $self->stop; return $log; }; sub DESTROY { $_[0]->stop if $_[0]->{_server_url}; for my $file (@{$_[0]->{delete}}) { unlink $file or warn "Couldn't remove tempfile $file : $!\n"; }; }; =head1 URLs implemented by the server =head2 302 redirect C<< $server->redirect($target) >> This URL will issue a redirect to C<$target>. No special care is taken towards URL-decoding C<$target> as not to complicate the server code. You need to be wary about issuing requests with escaped URL parameters. =head2 404 error C<< $server->error_notfound($target) >> This URL will response with status code 404. =head2 Timeout C<< $server->error_timeout($seconds) >> This URL will send a 599 error after C<$seconds> seconds. =head2 Timeout+close C<< $server->error_close($seconds) >> This URL will send nothing and close the connection after C<$seconds> seconds. =head2 Error in response content C<< $server->error_after_headers >> This URL will send headers for a successfull response but will close the socket with an error after 2 blocks of 16 spaces have been sent. =head2 Chunked response C<< $server->chunked >> This URL will return 5 blocks of 16 spaces at a rate of one block per second in a chunked response. =head2 Other URLs All other URLs will echo back the cookies and query parameters. =cut use vars qw(%urls); %urls = ( 'quit_server' => 'quit_server', 'get_server_log' => 'get_server_log', 'redirect' => 'redirect/%s', 'error_notfound' => 'error/notfound/%s', 'error_timeout' => 'error/timeout/%s', 'error_close' => 'error/close/%s', 'error_after_headers' => 'error/after_headers', 'chunked' => 'chunks', ); for (keys %urls) { no strict 'refs'; my $name = $_; *{ $name } = sub { my $self = shift; $self->url . sprintf $urls{ $name }, @_; }; }; =head1 EXPORT None by default. =head1 COPYRIGHT AND LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Copyright (C) 2003-2011 Max Maischein =head1 AUTHOR Max Maischein, Ecorion@cpan.orgE Please contact me if you find bugs or otherwise improve the module. More tests are also very welcome ! =head1 SEE ALSO L,L,L =cut 1; click_button.t100644000765000024 374313126734347 17546 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t/localuse warnings; use strict; use lib 't/local'; use LocalServer; use Test::More 0.96; BEGIN { delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; use_ok( 'WWW::Mechanize' ); } my $mech = WWW::Mechanize->new(); isa_ok( $mech, 'WWW::Mechanize', 'Created the object' ); my $server = LocalServer->spawn(); isa_ok( $server, 'LocalServer' ); my $response = $mech->get( $server->url ); isa_ok( $response, 'HTTP::Response', 'Got back a response' ); ok( $response->is_success, 'Got URL' ) or die q{Can't even fetch local url}; ok( $mech->is_html, 'Local page is HTML' ); my @forms = $mech->forms; my $form = $forms[0]; subtest 'click by id' => sub { $mech->click_button(id => 0); test_click( $mech ); ok( !eval { $mech->click_button( id => 'i-do-not-exist' ); 1 }, 'Button id not found' ); }; subtest 'click by number' => sub { $mech->click_button(number => 1); test_click( $mech ); ok(! eval { $mech->click_button(number => 2); 1 }, 'Button number out of range'); }; subtest 'click by name' => sub { $mech->click_button(name => 'submit'); test_click( $mech ); ok(! eval { $mech->click_button(name => 'bogus'); 1 }, 'Button name unknown'); }; CLICK_BY_OBJECT_REFERENCE: { subtest 'click by object reference' => sub { my $clicky_button = $form->find_input( undef, 'submit' ); isa_ok( $clicky_button, 'HTML::Form::Input', 'Found the submit button' ); is( $clicky_button->value, 'Go', 'Named the right thing, too' ); my $res = $mech->click_button(input => $clicky_button); local $TODO = q{Calling ->click() on an object doesn't seem to use the submit button.}; test_click( $mech ); diag $res->request->uri; }; } sub test_click { my $mech = shift; like( $mech->uri, qr/formsubmit/, 'Clicking on button' ); like( $mech->uri, qr/submit=Go/, 'Correct button was pressed' ); like( $mech->uri, qr/cat_foo/, 'Parameters got transmitted OK' ); $mech->back; } done_testing(); referer-server100644000765000024 76513126734347 17543 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t/local# Thanks to merlyn for nudging me and giving me this snippet! use HTTP::Daemon; use URI::URL; $|++; my $d = HTTP::Daemon->new or die; my $lhurl = URI::URL->new( $d->url ); $lhurl->host( "127.0.0.1" ); print $lhurl->as_string, "\n"; $counter = 5; while ($counter-- and my $c = $d->accept) { while (my $r = $c->get_request) { my $ref = $r->headers->referer || ""; $c->send_response(HTTP::Response->new(200, "OK", undef, "Referer: '$ref'")); } $c->close; undef($c); } 00-report-prereqs.dd100644000765000024 726213126734347 17327 0ustar00olafstaff000000000000WWW-Mechanize-1.86/tdo { my $x = { 'configure' => { 'requires' => { 'ExtUtils::MakeMaker' => '0' }, 'suggests' => { 'JSON::PP' => '2.27300' } }, 'develop' => { 'requires' => { 'Encode' => '0', 'Pod::Coverage::TrustPod' => '0', 'Test::Code::TidyAll' => '0.50', 'Test::More' => '0.88', 'Test::Needs' => '0', 'Test::Pod::Coverage' => '1.08', 'Test::RequiresInternet' => '0', 'constant' => '0', 'lib' => '0' } }, 'runtime' => { 'requires' => { 'Carp' => '0', 'Getopt::Long' => '0', 'HTML::Form' => '1.00', 'HTML::HeadParser' => '0', 'HTML::TokeParser' => '0', 'HTML::TreeBuilder' => '0', 'HTTP::Cookies' => '0', 'HTTP::Request' => '1.30', 'HTTP::Request::Common' => '0', 'LWP::UserAgent' => '5.827', 'Pod::Usage' => '0', 'Scalar::Util' => '0', 'Tie::RefHash' => '0', 'URI::URL' => '0', 'URI::file' => '0', 'base' => '0', 'perl' => '5.006', 'strict' => '0', 'warnings' => '0' } }, 'test' => { 'recommends' => { 'CPAN::Meta' => '2.120900' }, 'requires' => { 'CGI' => '4.32', 'Exporter' => '0', 'ExtUtils::MakeMaker' => '0', 'File::Spec' => '0', 'File::Temp' => '0', 'FindBin' => '0', 'HTTP::Daemon' => '0', 'HTTP::Response' => '0', 'HTTP::Server::Simple::CGI' => '0', 'LWP' => '0', 'LWP::Simple' => '0', 'Test::Deep' => '0', 'Test::Fatal' => '0', 'Test::More' => '0.96', 'Test::Output' => '0', 'Test::Warnings' => '0', 'URI' => '0', 'URI::Escape' => '0', 'bytes' => '0', 'lib' => '0', 'vars' => '0' } } }; $x; }form_with_fields.html100644000765000024 314413126734347 20014 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t
Like in PHP!
mech-dump000755000765000024 013126734347 15317 5ustar00olafstaff000000000000WWW-Mechanize-1.86/tmech-dump.t100644000765000024 321613126734347 17525 0ustar00olafstaff000000000000WWW-Mechanize-1.86/t/mech-dump#!perl -T use warnings; use strict; use Test::More; use File::Spec; use LWP; BEGIN { delete @ENV{ qw( IFS CDPATH ENV BASH_ENV PATH ) }; } plan skip_all => 'Not installing mech-dump' if -e File::Spec->catfile( qw( t SKIP-MECH-DUMP ) ); plan tests => 4; my $exe = File::Spec->catfile( qw( blib script mech-dump ) ); if ( $^O eq 'VMS' ) { $exe = qq[mcr $^X "-mblib" $exe]; } # Simply use a file: uri instead of the filename to make this test # more independent of what URI::* thinks. my $source = 'file:t/google.html'; my $perl; $perl = $1 if $^X =~ /^(.+)$/; my $command = "$perl -Mblib $exe --forms $source"; my $actual = `$command`; my $expected; if ( $LWP::VERSION < 5.800 ) { $expected = <<'EOF'; GET file:/target-page [bob-the-form] hl=en (hidden) ie=ISO-8859-1 (hidden) q= btnG=Google Search (submit) btnI=I'm Feeling Lucky (submit) EOF } else { $expected = <<'EOF'; GET file:/target-page [bob-the-form] hl=en (hidden readonly) ie=ISO-8859-1 (hidden readonly) q= (text) btnG=Google Search (submit) btnI=I'm Feeling Lucky (submit) EOF } my @actual = split /\s*\n/, $actual; my @expected = split /\s*\n/, $expected; # First line is platform-dependent, so handle it accordingly. shift @expected; my $first = shift @actual; like( $first, qr/^GET file:.*\/target-page \[bob-the-form\]/, 'First line matches' ); cmp_ok( @expected, '>', 0, 'Still some expected' ); cmp_ok( @actual, '>', 0, 'Still some actual' ); is_deeply( \@actual, \@expected, 'Rest of the lines match' ); pod-coverage.t100644000765000024 33413126734347 20012 0ustar00olafstaff000000000000WWW-Mechanize-1.86/xt/author#!perl # This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests. use Test::Pod::Coverage 1.08; use Pod::Coverage::TrustPod; all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' }); Mechanize000755000765000024 013126734347 16332 5ustar00olafstaff000000000000WWW-Mechanize-1.86/lib/WWWFAQ.pod100644000765000024 4016113126734347 17627 0ustar00olafstaff000000000000WWW-Mechanize-1.86/lib/WWW/Mechanize# PODNAME: WWW::Mechanize::FAQ # ABSTRACT: Frequently Asked Questions about WWW::Mechanize __END__ =pod =encoding UTF-8 =head1 NAME WWW::Mechanize::FAQ - Frequently Asked Questions about WWW::Mechanize =head1 VERSION version 1.86 =head1 How to get help with WWW::Mechanize If your question isn't answered here in the FAQ, please turn to the communities at: =over =item * L =item * The libwww-perl mailing list at L =back =head1 JavaScript =head2 I have this web page that has JavaScript on it, and my Mech program doesn't work. That's because WWW::Mechanize doesn't operate on the JavaScript. It only understands the HTML parts of the page. =head2 I thought Mech was supposed to work like a web browser. It does pretty much, but it doesn't support JavaScript. I added some basic attempts at picking up URLs in C calls and return them in C<< $mech->links >>. They work sometimes. Since Javascript is completely visible to the client, it cannot be used to prevent a scraper from following links. But it can make life difficult. If you want to scrape specific pages, then a solution is always possible. One typical use of Javascript is to perform argument checking before posting to the server. The URL you want is probably just buried in the Javascript function. Do a regular expression match on C<< $mech->content() >> to find the link that you want and C<< $mech->get >> it directly (this assumes that you know what you are looking for in advance). In more difficult cases, the Javascript is used for URL mangling to satisfy the needs of some middleware. In this case you need to figure out what the Javascript is doing (why are these URLs always really long?). There is probably some function with one or more arguments which calculates the new URL. Step one: using your favorite browser, get the before and after URLs and save them to files. Edit each file, converting the argument separators ('?', '&' or ';') into newlines. Now it is easy to use diff or comm to find out what Javascript did to the URL. Step 2 - find the function call which created the URL - you will need to parse and interpret its argument list. The Javascript Debugger in the Firebug extension for Firefox helps with the analysis. At this point, it is fairly trivial to write your own function which emulates the Javascript for the pages you want to process. Here's another approach that answers the question, "It works in Firefox, but why not Mech?" Everything the web server knows about the client is present in the HTTP request. If two requests are identical, the results should be identical. So the real question is "What is different between the mech request and the Firefox request?" The Firefox extension "Tamper Data" is an effective tool for examining the headers of the requests to the server. Compare that with what LWP is sending. Once the two are identical, the action of the server should be the same as well. I say "should", because this is an oversimplification - some values are naturally unique, e.g. a SessionID, but if a SessionID is present, that is probably sufficient, even though the value will be different between the LWP request and the Firefox request. The server could use the session to store information which is troublesome, but that's not the first place to look (and highly unlikely to be relevant when you are requesting the login page of your site). Generally the problem is to be found in missing or incorrect POSTDATA arguments, Cookies, User-Agents, Accepts, etc. If you are using mech, then redirects and cookies should not be a problem, but are listed here for completeness. If you are missing headers, C<< $mech->add_header >> can be used to add the headers that you need. =head2 Which modules work like Mechanize and have JavaScript support? In no particular order: L, L, L, L, L =head1 How do I do X? =head2 Can I do [such-and-such] with WWW::Mechanize? If it's possible with LWP::UserAgent, then yes. WWW::Mechanize is a subclass of L, so all the wondrous magic of that class is inherited. =head2 How do I use WWW::Mechanize through a proxy server? See the docs in L on how to use the proxy. Short version: $mech->proxy(['http', 'ftp'], 'http://proxy.example.com:8000/'); or get the specs from the environment: $mech->env_proxy(); # Environment set like so: gopher_proxy=http://proxy.my.place/ wais_proxy=http://proxy.my.place/ no_proxy="localhost,my.domain" export gopher_proxy wais_proxy no_proxy =head2 How can I see what fields are on the forms? Use the mech-dump utility, optionally installed with Mechanize. $ mech-dump --forms http://search.cpan.org Dumping forms GET http://search.cpan.org/search query= mode=all (option) [*all|module|dist|author] =CPAN Search (submit) =head2 How do I get Mech to handle authentication? use MIME::Base64; my $agent = WWW::Mechanize->new(); my @args = ( Authorization => "Basic " . MIME::Base64::encode( USER . ':' . PASS ) ); $agent->credentials( ADDRESS, REALM, USER, PASS ); $agent->get( URL, @args ); If you want to use the credentials for all future requests, you can also use the L C method instead of the extra arguments to C $mech->default_header( Authorization => 'Basic ' . encode_base64( USER . ':' . PASSWORD ) ); =head2 How can I get WWW::Mechanize to execute this JavaScript? You can't. JavaScript is entirely client-based, and WWW::Mechanize is a client that doesn't understand JavaScript. See the top part of this FAQ. =head2 How do I check a checkbox that doesn't have a value defined? Set it to the value of "on". $mech->field( my_checkbox => 'on' ); =head2 How do I handle frames? You don't deal with them as frames, per se, but as links. Extract them with my @frame_links = $mech->find_link( tag => "frame" ); =head2 How do I get a list of HTTP headers and their values? All L methods work on a L object which is returned by the I, I, I, I, I, and I methods. my $mech = WWW::Mechanize->new( autocheck => 1 ); $mech->get( 'http://my.site.com' ); my $response = $mech->response(); for my $key ( $response->header_field_names() ) { print $key, " : ", $response->header( $key ), "\n"; } =head2 How do I enable keep-alive? Since L is a subclass of L, you can use the same mechanism to enable keep-alive: use LWP::ConnCache; ... $mech->conn_cache(LWP::ConnCache->new); =head2 How can I change/specify the action parameter of an HTML form? You can access the action of the form by utilizing the L object returned from one of the specifying form methods. Using C<< $mech->form_number($number) >>: my $mech = WWW::mechanize->new; $mech->get('http://someurlhere.com'); # Access the form using its Zero-Based Index by DOM order $mech->form_number(0)->action('http://newAction'); #ABS URL Using C<< $mech->form_name($number) >>: my $mech = WWW::mechanize->new; $mech->get('http://someurlhere.com'); #Access the form using its Zero-Based Index by DOM order $mech->form_name('trgForm')->action('http://newAction'); #ABS URL =head2 How do I save an image? How do I save a large tarball? An image is just content. You get the image and save it. $mech->get( 'photo.jpg' ); $mech->save_content( '/path/to/my/directory/photo.jpg' ); You can also save any content directly to disk using the C<:content_file> flag to C, which is part of L. $mech->get( 'http://www.cpan.org/src/stable.tar.gz', ':content_file' => 'stable.tar.gz' ); =head2 How do I pick a specific value from a C<<