HTML-Copy-1.31/ 0000755 0000765 0000024 00000000000 12162705071 012461 5 ustar tkurita staff HTML-Copy-1.31/bin/ 0000755 0000765 0000024 00000000000 12162705071 013231 5 ustar tkurita staff HTML-Copy-1.31/bin/htmlcopy 0000755 0000765 0000024 00000003703 12161047502 015016 0 ustar tkurita staff #!/usr/bin/env perl -w
use strict;
use File::Basename;
use File::Spec;
use HTML::Copy;
use Getopt::Long;
use Pod::Usage;
our $VERSION = '1.31';
{
my $man = 0;
my $help = 0;
GetOptions('help|?' => \$help, man => \$man) or pod2usage(2);
pod2usage(-exitstatus => 0, -verbose => 1) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
if (@ARGV > 2) {
pod2usage(-message => 'Too many arguments.',
-exitstatus => 1, -verbose => 1)
}
if (@ARGV < 1) {
pod2usage(-message => 'Required arguments is not given.',
-exitstatus => 1, -verbose => 1)
}
my ($source_path, $target_path) = @ARGV;
my $in;
if ($source_path eq '-' ) {
open $in , " -";
} else {
$in = $source_path;
}
my $p = HTML::Copy->new($in);
#$p->set_encode_suspects(qw/euc-jp shiftjis 7bit-jis/);
unless ($target_path) {
open $target_path, ">-";
}
$p->copy_to($target_path);
}
1;
__END__
=head1 NAME
htmlcopy -- Copy a HTML file without breaking links.
=head1 SYNOPSIS
htmlcopy [OPTION] {SOURCE | -} [DESTINATION]
=head1 DESCRIPTION
htmlcopy a source HTML file into DESTINATION. If the HTML file have links to images, other HTML files, javascripts and cascading style sheets, htmlcopy changing link path in the HTML file to keep the link destination.
When DESTINATION is omitted, the modified HTML is written in the standard output. Also it is assumed that output location is the current working directory.
SOURCE and DESTINATION should be cleanuped pathes. For example, a verbose path like '/aa/bb/../cc' may cause converting links wrongly. This is a limitaion of the URI module's rel method. To cleanup pathes, Cwd::realpath is useful.
=head1 OPTIONS
=over 4
=item -h, --help
Print a brief help message and exits.
=item -m, --man
Prints the manual page and exits.
=back
=head1 AUTHOR
Tetsuro KURITA
=cut
HTML-Copy-1.31/Changes 0000644 0000765 0000024 00000002736 12162701253 013762 0 ustar tkurita staff Revision history for Perl extension HTML::Copy.
1.31 2013-06-27
* Remove devendencies on Cwd package.
* Broken links are not unescaped.
* Ignore template toolikit's variables in links.
* Fixed spelling mistakes in the document.
* Thanks to gregor herrmann.
* Fixed failing tests with Perl 5.18.
* Thanks to gregor hermman.
1.3 2008-02-20
* HTML::Copy can accept file handles instead file pathes.
* htmlcopy can use standard input and output.
* The working in MS Windows platform is expected again.
* Thanks to Taro Nishino.
1.24 2008-01-24
* The test may success in MS Windows platform.
1.23 2008-01-16
* Add error handling routine when a souce file can't be opened.
1.22 2007-08-10
* add HTTP::Headers to Makefile.PL as a prerequired module(PREREQ_PM).
1.21 2007-08-09
* copy_to and parse_to can be accept a path of directory.
* automatically make missing directory
1.2 2007-05-31
* support XHTML
* process with taking account of tag
* use URI module instead of File::Spec to convert link path.
* HTML::Copy may work on non-unix platform.
1.13 2007-03-22
* fix a problem that processing a link including an anchor is not correct.
1.12 2007-02-22
* fix error in POD
* use Class::Accessor
* fix invalid warnning when a file does not exist
1.11 2007-02-19
* blush up POD
* blush up code
* add use 5.008
1.1
* First version to update CPAN
HTML-Copy-1.31/lib/ 0000755 0000765 0000024 00000000000 12162705071 013227 5 ustar tkurita staff HTML-Copy-1.31/lib/HTML/ 0000755 0000765 0000024 00000000000 12162705071 013773 5 ustar tkurita staff HTML-Copy-1.31/lib/HTML/Copy.pm 0000644 0000765 0000024 00000033450 12161047452 015251 0 ustar tkurita staff package HTML::Copy;
use 5.008;
use strict;
use warnings;
use File::Spec;
use File::Basename;
use File::Path;
use utf8;
use Encode;
use Encode::Guess;
use Carp;
use HTML::Parser 3.40;
use HTML::HeadParser;
use URI::file;
use base qw(HTML::Parser Class::Accessor::Fast);
__PACKAGE__->mk_accessors(qw(link_attributes
has_base));
#use Data::Dumper;
our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc', 'user');
# 'livesrc', 'user' and 'csref' are uesed in Adobe GoLive
=head1 NAME
HTML::Copy - copy a HTML file without breaking links.
=head1 VERSION
Version 1.31
=cut
our $VERSION = '1.31';
=head1 SYMPOSIS
use HTML::Copy;
HTML::Copy->htmlcopy($source_path, $destination_path);
# or
$p = HTML::Copy->new($source_path);
$p->copy_to($destination_path);
# or
open my $in, "<", $source_path;
$p = HTML::Copy->new($in)
$p->source_path($source_path); # can be omitted,
# when $source_path is in cwd.
$p->destination_path($destination_path) # can be omitted,
# when $source_path is in cwd.
open my $out, ">", $source_path;
$p->copy_to($out);
=head1 DESCRIPTION
This module is to copy a HTML file without beaking links in the file. This module is a sub class of HTML::Parser.
=head1 REQUIRED MODULES
=over 2
=item L
=back
=head1 CLASS METHODS
=head2 htmlcopy
HTML::Copy->htmlcopy($source_path, $destination_path);
Parse contents of $source_path, change links and write into $destination_path.
=cut
sub htmlcopy($$$) {
my ($class, $source_path, $destination_path) = @_;
my $p = $class->new($source_path);
return $p->copy_to($destination_path);
}
=head2 parse_file
$html_text = HTML::Copy->parse_file($source_path,
$destination_path);
Parse contents of $source_path and change links to copy into $destination_path. But don't make $destination_path. Just return modified HTML. The encoding of strings is converted into utf8.
=cut
sub parse_file($$$) {
my ($class, $source, $destination) = @_;
my $p = $class->new($source);
return $p->parse_to($destination);
}
=head1 CONSTRUCTOR METHODS
=head2 new
$p = HTML::Copy->new($source);
Make an instance of this module with specifying a source of HTML.
The argument $source can be a file path or a file handle. When a file handle is passed, you may need to indicate a file path of the passed file handle by the method L<"source_path">. If calling L<"source_path"> is omitted, it is assumed that the location of the file handle is the current working directory.
=cut
sub new {
my $class = shift @_;
my $self = $class->SUPER::new();
if (@_ > 1) {
my %args = @_;
my @keys = keys %args;
@$self{@keys} = @args{@keys};
} else {
my $file = shift @_;
my $ref = ref($file);
if ($ref =~ /^Path::Class::File/) {
$self->source_path($file);
} elsif (! $ref && (ref(\$file) ne 'GLOB')) {
$self->source_path($file);
} else {
$self->source_handle($file);
}
}
$self->link_attributes(\@default_link_attributes);
$self->has_base(0);
$self->attr_encoded(1);
return $self;
}
=head1 INSTANCE METHODS
=head2 copy_to
$p->copy_to($destination)
Parse contents of $source given in new method, change links and write into $destination.
The argument $destination can be a file path or a file handle. When $destination is a file handle, you may need to indicate the location of the file handle by a method L<"destination_path">. L<"destination_path"> must be called before calling L<"copy_to">. When calling L<"destination_path"> is omitted, it is assumed that the locaiton of the file handle is the current working directory.
=cut
sub copy_to {
my ($self, $destination) = @_;
my $io_layer = $self->io_layer();
my $fh;
if (!ref($destination) && (ref(\$destination) ne "GLOB")) {
$destination = $self->set_destination($destination);
open $fh, ">$io_layer", $destination
or croak "can't open $destination.";
} else {
$fh = $destination;
binmode($fh, $io_layer);
}
$self->{'output_handle'} = $fh;
$self->SUPER::parse($self->{'source_html'});
$self->eof;
close $fh;
$self->source_handle(undef);
return $self->destination_path;
}
=head2 parse_to
$p->parse_to($destination_path)
Parse contents of $source_path given in new method, change links and return HTML contents to wirte $destination_path. Unlike copy_to, $destination_path will not created and just return modified HTML. The encoding of strings is converted into utf8.
=cut
sub parse_to {
my ($self, $destination_path) = @_;
$destination_path = $self->destination_path($destination_path);
my $output = '';
open my $fh, ">", \$output;
$self->copy_to($fh);
return Encode::decode($self->encoding, $output);
}
=head1 ACCESSOR METHODS
=head2 source_path
$p->source_path
$p->source_path($path)
Get and set a source location. Usually source location is specified with the L<"new"> method. When a file handle is passed to L<"new"> and the location of the file handle is not the current working directory, you need to use this method.
=cut
sub source_path {
my $self = shift @_;
if (@_) {
my $path = shift @_;
$self->{'source_path'} = $path;
$self->source_uri(URI::file->new_abs($path));
}
return $self->{'source_path'};
}
=head2 destination_path
$p->destination_path
$p->destination_path($path)
Get and set a destination location. Usually destination location is specified with the L<"copy_to">. When a file handle is passed to L<"copy_to"> and the location of the file handle is not the current working directory, you need to use this method before L<"copy_to">.
=cut
sub destination_path {
my $self = shift @_;
if (@_) {
my $path = shift @_;
$self->{'destination_path'} = $path;
$self->destination_uri(URI::file->new_abs($path));
}
return $self->{'destination_path'};
}
=head2 enchoding
$p->encoding;
Get an encoding of a source HTML.
=cut
sub encoding {
my ($self) = @_;
if ($self->{'encoding'}) {
return $self->{'encoding'};
}
my $in = $self->source_handle;
my $data = do {local $/; <$in>;};
my $p = HTML::HeadParser->new;
$p->utf8_mode(1);
$p->parse($data);
my $content_type = $p->header('content-type');
my $encoding = '';
if ($content_type) {
if ($content_type =~ /charset\s*=(.+)/) {
$encoding = $1;
}
}
unless ($encoding) {
my $decoder;
if (my @suspects = $self->encode_suspects) {
$decoder = Encode::Guess->guess($data, @suspects);
}
else {
$decoder = Encode::Guess->guess($data);
}
ref($decoder) or
die("Can't guess encoding of ".$self->source_path);
$encoding = $decoder->name;
}
$self->{'source_html'} = Encode::decode($encoding, $data);
$self->{'encoding'} = $encoding;
return $encoding;
}
=head2 io_layer
$p->io_layer;
$p->io_layer(':utf8');
Get and set PerlIO layer to read the source path and to write the destination path. Usually it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
=cut
sub io_layer {
my $self = shift @_;
if (@_) {
$self->{'io_layer'} = shift @_;
}
else {
unless ($self->{'io_layer'}) {
$self->{'io_layer'} = $self->check_io_layer();
}
}
return $self->{'io_layer'};
}
=head2 encode_suspects
@suspects = $p->encode_sustects;
$p->encode_suspects(qw/shiftjis euc-jp/);
Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not required to add suspects.
=cut
sub encode_suspects {
my $self = shift @_;
if (@_) {
my @suspects = @_;
$self->{'EncodeSuspects'} = \@suspects;
}
if (my $suspects_ref = $self->{'EncodeSuspects'}) {
return @$suspects_ref;
}
else {
return ();
}
}
=head2 source_html
$p->source_html;
Obtain source HTML's contents
=cut
sub source_html {
my ($self) = @_;
$self->io_layer;
return $self->{'source_html'};
}
=head1 NOTE
Cleanuped pathes should be given to HTML::Copy and it's instances. For example, a verbose path like '/aa/bb/../cc' may cause converting links wrongly. This is a limitaion of the URI module's rel method. To cleanup pathes, Cwd::realpath is useful.
=head1 AUTHOR
Tetsuro KURITA
=cut
##== overriding methods of HTML::Parser
sub declaration { $_[0]->output("") }
sub process { $_[0]->output($_[2]) }
sub end { $_[0]->output($_[2]) }
sub text { $_[0]->output($_[1]) }
sub comment {
my ($self, $comment) = @_;
if ($comment =~ /InstanceBegin template="([^"]+)"/) {
my $uri = URI->new($1);
my $newlink = $self->change_link($uri);
$comment = " InstanceBegin template=\"$newlink\" ";
}
$self->output("");
}
sub process_link {
my ($self, $link_path)= @_;
return undef if ($link_path =~ /^\$/);
return undef if ($link_path =~ /^\[%.*%\]$/);
my $uri = URI->new($link_path);
return undef if ($uri->scheme);
return $self->change_link($uri);
}
sub start {
my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_;
unless ($self->has_base) {
if ($tag eq 'base') {
$self->has_base(1);
}
my $is_changed = 0;
foreach my $an_attr (@{$self->link_attributes}) {
if (exists($attr_dict->{$an_attr})){
my $newlink = $self->process_link($attr_dict->{$an_attr});
next unless ($newlink);
$attr_dict->{$an_attr} = $newlink;
$is_changed = 1;
}
}
if ($tag eq 'param') {
if ($attr_dict->{'name'} eq 'src') {
my $newlink = $self->process_link($attr_dict->{'value'});
if ($newlink) {
$attr_dict->{'value'} = $newlink;
$is_changed = 1;
}
}
}
if ($is_changed) {
my $attrs_text = $self->build_attributes($attr_dict, $attr_names);
$tag_text = "<$tag $attrs_text>";
}
}
$self->output($tag_text);
}
##== private functions
sub complete_destination_path {
my ($self, $dir) = @_;
my $source_path = $self->source_path
or croak "Can't resolve a file name of the destination, because a source path is not given.";
my $filename = basename($source_path)
or croak "Can't resolve a file name of the destination, because given source path is a directory.";
return File::Spec->catfile($dir, $filename);
}
sub set_destination {
my ($self, $destination_path) = @_;
if (-d $destination_path) {
$destination_path = $self->complete_destination_path($destination_path);
} else {
my ($name, $dir) = fileparse($destination_path);
unless ($name) {
$destination_path = $self->complete_destination_path($destination_path);
}
mkpath($dir);
}
return $self->destination_path($destination_path);
}
sub check_io_layer {
my ($self) = @_;
my $encoding = $self->encoding;
return '' unless ($encoding);
my $io_layer = '';
if (grep {/$encoding/} ('utf8', 'utf-8', 'UTF-8') ) {
$io_layer = ":utf8";
}
else {
$io_layer = ":encoding($encoding)";
}
return $io_layer;
}
sub build_attributes {
my ($self, $attr_dict, $attr_names) = @_;
my @attrs = ();
foreach my $attr_name (@{$attr_names}) {
if ($attr_name eq '/') {
push @attrs, '/';
} else {
my $attr_value = $attr_dict->{$attr_name};
push @attrs, "$attr_name=\"$attr_value\"";
}
}
return join(' ', @attrs);
}
sub change_link {
my ($self, $uri) = @_;
my $result_uri;
my $abs_uri = $uri->abs( $self->source_uri );
my $abs_path = $abs_uri->file;
if (-e $abs_path) {
$result_uri = $abs_uri->rel($self->destination_uri);
} else {
warn("$abs_path is not found.\nThe link to this path is not changed.\n");
return "";
}
return $result_uri->as_string;
}
sub output {
my ($self, $out_text) = @_;
print {$self->{'output_handle'}} $out_text;
}
sub source_handle {
my $self = shift @_;
if (@_) {
$self->{'source_handle'} = shift @_;
} elsif (!$self->{'source_handle'}) {
my $path = $self->source_path or croak "source_path is undefined.";
open my $in, "<", $path or croak "Can't open $path.";
$self->{'source_handle'} = $in;
}
return $self->{'source_handle'}
}
sub source_uri {
my $self = shift @_;
if (@_) {
$self->{'source_uri'} = shift @_;
} elsif (!$self->{'source_uri'}) {
$self->{'source_uri'} = do {
if (my $path = $self->source_path) {
URI::file->new_abs($path);
} else {
URI::file->cwd;
}
}
}
return $self->{'source_uri'}
}
sub destination_uri {
my $self = shift @_;
if (@_) {
$self->{'destination_uri'} = shift @_;
} elsif (!$self->{'destination_uri'}) {
$self->{'destination_uri'} = do {
if (my $path = $self->destination_path) {
URI::file->new_abs($path);
} else {
URI::file->cwd;
}
}
}
return $self->{'destination_uri'};
}
1;
HTML-Copy-1.31/Makefile.PL 0000755 0000765 0000024 00000001072 12161047304 014433 0 ustar tkurita staff #!/usr/bin/env perl -w
use 5.008;
use strict;
use ExtUtils::MakeMaker;
my @programs_to_install = qw(htmlcopy);
WriteMakefile(
'NAME' => 'HTML::Copy',
'VERSION_FROM' => 'lib/HTML/Copy.pm',
'EXE_FILES' => [ map {"bin/$_"} @programs_to_install ],
'PREREQ_PM' => {
'Test::More' => 0,
'HTML::Parser' => 3.40,
'HTTP::Headers' => 0,
'Class::Accessor' => 0,
'URI' => 0
},
'dist' => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
'clean' => { FILES => 'HTML-Copy-*' },
);
HTML-Copy-1.31/MANIFEST 0000644 0000765 0000024 00000000247 10564726613 013626 0 ustar tkurita staff Changes
README
MANIFEST
Makefile.PL
lib/HTML/Copy.pm
bin/htmlcopy
t/00-load.t
t/parse.t
META.yml Module meta-data (added by MakeMaker)
HTML-Copy-1.31/META.yml 0000644 0000765 0000024 00000001111 12162705071 013724 0 ustar tkurita staff --- #YAML:1.0
name: HTML-Copy
version: 1.31
abstract: ~
author: []
license: unknown
distribution_type: module
configure_requires:
ExtUtils::MakeMaker: 0
build_requires:
ExtUtils::MakeMaker: 0
requires:
Class::Accessor: 0
HTML::Parser: 3.4
HTTP::Headers: 0
Test::More: 0
URI: 0
no_index:
directory:
- t
- inc
generated_by: ExtUtils::MakeMaker version 6.56
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
HTML-Copy-1.31/README 0000644 0000765 0000024 00000001505 10627760173 013352 0 ustar tkurita staff HTML-Copy
======================
HTML::Copy copy a HTML file without breaking links in the file. HTML::Copy will be useful to mainten web site and to handle HTML templates.
This package provide a perl module "HTML::Copy" and a command line tool "htmlcopy".
== INSTALLATION
To install this module type the following:
perl Makefile.PL
make
make test
make install
== DEPENDENCIES
This module requires these other modules and libraries:
HTML::Parser
Class::Accessor
URI
== COPYRIGHT
Put the correct copyright and licence information here.
Copyright (C) 2007 by Tetsuro KURITA
mailto:tkurita@mac.com
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.3 or,
at your option, any later version of Perl 5 you may have available.
HTML-Copy-1.31/t/ 0000755 0000765 0000024 00000000000 12162705071 012724 5 ustar tkurita staff HTML-Copy-1.31/t/00-load.t 0000644 0000765 0000024 00000000226 10565615170 014252 0 ustar tkurita staff #!perl -T
use 5.008;
use Test::More tests => 1;
BEGIN {
use_ok( 'HTML::Copy' );
}
diag( "Testing HTML::Copy $HTML::Copy::VERSION, Perl $], $^X" );
HTML-Copy-1.31/t/parse.t 0000644 0000765 0000024 00000020120 12161045706 014217 0 ustar tkurita staff #escapeChars {return}
#use lib '../lib';
use strict;
use warnings;
use HTML::Copy;
use utf8;
use File::Spec::Functions;
#use Data::Dumper;
use Encode qw(encode_utf8 decode_utf8);
use Test::More tests => 16;
sub read_and_unlink {
my ($path, $htmlcopy) = @_;
open(my $in, "<".$htmlcopy->io_layer(), $path)
or die "Can't open $path.";
my $contents = do {local $/; <$in>};
close $in;
unlink($path);
return $contents;
}
##== prepare linked HTML file
my $linked_html = <
EOT
my $linked_file_name = "linked$$.html";
open(my $linked_fh, ">", $linked_file_name)
or die "Can't open $linked_file_name.";
print $linked_fh $linked_html;
close $linked_fh;
##== HTML data without charsets
my $source_html_nocharset = <
ああ
EOT
my $result_html_nocharset = <
ああ
EOT
##== write test data
my $sub_dir_name = "sub$$";
mkdir($sub_dir_name);
my $src_file_name = "file$$.html";
my $destination = catfile($sub_dir_name, $src_file_name);
##== Test code with no charsets HTML
open(my $src_fh, ">:utf8", $src_file_name)
or die "Can't open $src_file_name.";
print $src_fh $source_html_nocharset;
close $src_fh;
##=== parse_to UTF-8
my $p = HTML::Copy->new($src_file_name);
my $copy_html = $p->parse_to($destination);
ok($copy_html eq $result_html_nocharset, "parse_to no charset UTF-8");
##=== copty_to UTF8
$p->copy_to($destination);
open(my $in, "<".$p->io_layer(), $destination)
or die "Can't open $destination.";
$copy_html = read_and_unlink($destination, $p);
ok($copy_html eq $result_html_nocharset, "copy_to no charset UTF-8");
##=== write data with shift_jis
open($src_fh, ">:encoding(shiftjis)", $src_file_name)
or die "Can't open $src_file_name.";
print $src_fh $source_html_nocharset;
close $src_fh;
##=== parse_to shift_jis
$p = HTML::Copy->new($src_file_name);
$copy_html = do {
$p->encode_suspects("shiftjis");
$p->parse_to(catfile($sub_dir_name, $src_file_name));
};
ok($copy_html eq $result_html_nocharset, "parse_to no charset shift_jis");
##=== copy_to shift_jis
$copy_html = do {
$p->copy_to($destination);
open(my $in, "<".$p->io_layer, $destination)
or die "Can't open $destination.";
read_and_unlink($destination, $p);
};
ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");
##== HTML with charset uft-8
my $src_html_utf8 = encode_utf8(<
ああ
EOT
my $result_html_utf8 = encode_utf8(<
ああ
EOT
##== Test code with charset utf-8
open($src_fh, ">:utf8", $src_file_name)
or die "Can't open $src_file_name.";
print $src_fh $src_html_utf8;
close $src_fh;
##=== parse_to
$p = HTML::Copy->new($src_file_name);
$copy_html = $p->parse_to($destination);
ok($copy_html eq $result_html_utf8, "parse_to charset UTF-8");
##=== copy_to
$p->copy_to($destination);
$copy_html = read_and_unlink($destination, $p);
ok($copy_html eq $result_html_utf8, "copy_to charset UTF-8");
##=== copy_to gving a file handle
$copy_html = do {
open $in, "<", \$src_html_utf8;
my $outdata ='';;
my $p = HTML::Copy->new($in);
open my $out, ">", $destination;
$p->destination_path($destination);
$p->copy_to($out);
close $out;
read_and_unlink($destination, $p);
};
ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving a file handle");
##=== copy_to gving file handles for input and output
$copy_html = do {
open my $in, "<", \$src_html_utf8;
my $outdata;
my $p = HTML::Copy->new($in);
open my $out, ">".$p->io_layer(), \$outdata;
$p->destination_path($destination);
$p->copy_to($out);
Encode::decode($p->encoding, $outdata);
};
ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
##=== parse_to giving a file handle
$copy_html = do {
open my $in, "<", \$src_html_utf8;
my $p = HTML::Copy->new($in);
$p->parse_to($destination);
};
ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
##=== copy_to with directory destination
$copy_html = do {
my $p = HTML::Copy->new($src_file_name);
my $destination = $p->copy_to($sub_dir_name);
read_and_unlink($destination, $p);
};
ok($copy_html eq $result_html_utf8, "copy_to with a directory destination");
##== HTML with charset shift_jis
my $src_html_shiftjis = <
ああ
EOT
my $result_html_shiftjis = <
ああ
EOT
##== Test code with charset shift_jis
open($src_fh, ">:encoding(shiftjis)", $src_file_name)
or die "Can't open $src_file_name.";
print $src_fh $src_html_shiftjis;
close $src_fh;
##=== parse_to
$p = HTML::Copy->new($src_file_name);
$p->encode_suspects("shiftjis");
$copy_html = $p->parse_to($destination);
ok($copy_html eq $result_html_shiftjis, "parse_to no charset shift_jis");
##=== copy_to
$p->copy_to($destination);
$copy_html = read_and_unlink($destination, $p);
ok($copy_html eq $result_html_shiftjis, "copy_to no charset shift_jis");
##== class_methods
$copy_html = HTML::Copy->parse_file($src_file_name, $destination);
ok($copy_html eq $result_html_shiftjis, "parse_file");
HTML::Copy->htmlcopy($src_file_name, $destination);
open($in, "<".$p->io_layer, $destination)
or die "Can't open $destination.";
{local $/; $copy_html = <$in>};
close $in;
ok($copy_html eq $result_html_shiftjis, "htmlcopy");
unlink($destination);
##== Test with base url
my $src_html_base = <
ああ
EOT
##== Test code with base url
open($src_fh, ">:utf8", $src_file_name)
or die "Can't open $destination.";
print $src_fh $src_html_base;
close $src_fh;
##=== parse_to
$p = HTML::Copy->new($src_file_name);
$copy_html = $p->parse_to($destination);
ok($copy_html eq $src_html_base, "parse_to HTML with base URL");
##=== copy_to
$p->copy_to($destination);
open($in, "<".$p->io_layer, $destination)
or die "Can't open $destination.";
{local $/; $copy_html = <$in>};
close $in;
ok($copy_html eq $src_html_base, "copy_to HTML with base URL");
unlink($destination);
unlink($linked_file_name, $src_file_name, $destination);
rmdir($sub_dir_name);