Unicode-MapUTF8-1.14000755001750001750 013734104216 15047 5ustar00snowharesnowhare000000000000Unicode-MapUTF8-1.14/Build.PL000444001750001750 426113734104216 16503 0ustar00snowharesnowhare000000000000use Module::Build; use File::Copy qw(copy); my $lang = defined($ENV{'LANG'}) ? $ENV{'LANG'} : 'en'; my $target_pod = File::Spec->catfile('lib','Unicode','MapUTF8.pod'); if ($lang =~ m/^(ja|ja_JP|ja_JP.utf-8|ja_JP.utf8|ja.utf8|ja.utf-8)$/i) { $source_pod = File::Spec->catfile('pod','MapUTF8.ja_JP.utf8.pod'); copy ($source_pod, $target_pod); } elsif ($lang =~ m/^(ja_JP.eucjp|ja_JP.euc|ja_euc|ja_eucjp)$/i) { $source_pod = File::Spec->catfile('pod','MapUTF8.ja_JP.eucjp.pod'); copy ($source_pod, $target_pod); } else { $source_pod = File::Spec->catfile('pod','MapUTF8.en.pod'); copy ($source_pod, $target_pod); } my $build = Module::Build->new ( module_name => 'Unicode::MapUTF8', dist_author => 'Jerilyn Franz ', dist_abstract => 'Conversions to and from arbitrary character sets and UTF8', license => 'mit', requires => { 'perl' => '5.006', 'warnings' => 0, 'Carp' => 0, 'File::Copy' => 0, 'Unicode::Map' => 0, 'Unicode::String' => 0, 'Unicode::Map8' => 0, 'Jcode' => 0, }, build_requires => { }, test_requires => { 'perl' => '5.006', 'warnings' => 0, 'Carp' => 0, 'File::Copy' => 0, 'Unicode::Map' => 0, 'Unicode::String' => 0, 'Unicode::Map8' => 0, 'Jcode' => 0, }, meta_merge => { 'meta-spec' => { version => 2 }, resources => { bugtracker => { web => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8/issues', }, homepage => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8', repository => { type => 'git', url => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8.git', web => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8', }, }, }, )->create_build_script; Unicode-MapUTF8-1.14/Changes000444001750001750 457013734104216 16505 0ustar00snowharesnowhare000000000000Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8 1.14 2020.09.27 Fixing pod breakage in EUC-JP pod 1.13 2020.09.27 Fixing MANIFEST.SKIP error 1.12 2020.09.27 Build tool updates. Maintainer updates. POD error fixes. Relicensed under MIT license. 1.11 2005.10.10 Documentation changes. Addition of Build.PL support. Added various build tests, LICENSE, Artistic_License.txt, GPL_License.txt. Split documentation into seperate .pod file. Added Japanese translation of POD. 1.10 2005.05.22 - Fixed bug in conversion of ISO-2022-JP to UTF-8. Problem and fix found by Masahiro HONMA . Similar bugs in conversions of shift_jis and euc-jp to UTF-8 corrected as well. 1.09 2001.08.22 - Fixed multiple typo occurances of 'uft' where 'utf' was meant in code. Problem affected utf16 and utf7 encodings. Problem found by devon smith 1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime setting of character set aliases. Added several alternate names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis, and s_jis). Corrected 'croak' messages for 'from_utf8' functions to appropriate function name. Corrected fatal problem in jcode-unicode internals. Problem and fix found by Brian Wisti . 1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error messages. Problem and fix found by . 1.06 2000.10.30 Fix to handle change in stringification of overloaded objects between Perl 5.005 and 5.6. Problem noticed by Brian Wisti . 1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected 1.04 2000.10.23 Additional diagnostic error messages added for internal errors 1.03 2000.10.22 Bug fix for load time Unicode::Map encoding detection 1.02 2000.10.22 Bug fix to 'from_utf8' method and load time detection of Unicode::Map8 supported character set encodings 1.01 2000.10.02 Initial public release Unicode-MapUTF8-1.14/LICENSE000444001750001750 205613734104216 16214 0ustar00snowharesnowhare000000000000MIT License Copyright (c) 2020 Jerilyn Franz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Unicode-MapUTF8-1.14/MANIFEST000444001750001750 43113734104216 16313 0ustar00snowharesnowhare000000000000Build.PL Changes LICENSE MANIFEST MANIFEST.SKIP Makefile.PL README lib/Unicode/MapUTF8.pm lib/Unicode/MapUTF8.pod pod/MapUTF8.ja_JP.utf8.pod pod/MapUTF8.ja_JP.eucjp.pod pod/MapUTF8.en.pod t/01_unicode_maputf8.t t/97_distribution.t t/98_pod_coverage.t t/99_pod.t META.yml META.json Unicode-MapUTF8-1.14/MANIFEST.SKIP000444001750001750 246013734104216 17104 0ustar00snowharesnowhare000000000000 #!start included /usr/share/perl/5.26/ExtUtils/MANIFEST.SKIP # Avoid version control files. \bRCS\b \bCVS\b \bSCCS\b ,v$ \B\.svn\b \B\.git\b \B\.gitignore\b \b_darcs\b \B\.cvsignore$ # Avoid VMS specific MakeMaker generated files \bDescrip.MMS$ \bDESCRIP.MMS$ \bdescrip.mms$ # Avoid Makemaker generated and utility files. \bMANIFEST\.bak \bMakefile$ \bblib/ \bMakeMaker-\d \bpm_to_blib\.ts$ \bpm_to_blib$ \bblibdirs\.ts$ # 6.18 through 6.25 generated this \b_eumm/ # 7.05_05 and above # Avoid Module::Build generated and utility files. \bBuild$ \b_build/ \bBuild.bat$ \bBuild.COM$ \bBUILD.COM$ \bbuild.com$ # and Module::Build::Tiny generated files \b_build_params$ # Avoid temp and backup files. ~$ \.old$ \#$ \b\.# \.bak$ \.tmp$ \.# \.rej$ \..*\.sw.?$ # Avoid OS-specific files/dirs # Mac OSX metadata \B\.DS_Store # Mac OSX SMB mount metadata files \B\._ # Avoid Devel::Cover and Devel::CoverX::Covered files. \bcover_db\b \bcovered\b # Avoid prove files \B\.prove$ # Avoid MYMETA files ^MYMETA\. #!end included /usr/share/perl/5.26/ExtUtils/MANIFEST.SKIP # Avoid configuration metadata file ^MYMETA\. # Avoid Module::Build generated and utility files. \bBuild$ \bBuild.bat$ \b_build \bBuild.COM$ \bBUILD.COM$ \bbuild.com$ # Avoid archives of this distribution \bUnicode-MapUTF8-[\d\.\_]+ Unicode-MapUTF8-1.14/META.json000444001750001750 356213734104216 16633 0ustar00snowharesnowhare000000000000{ "abstract" : "Conversions to and from arbitrary character sets and UTF8", "author" : [ "Jerilyn Franz " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.4224", "license" : [ "mit" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Unicode-MapUTF8", "prereqs" : { "configure" : { "requires" : { "Module::Build" : "0.42" } }, "runtime" : { "requires" : { "Carp" : "0", "File::Copy" : "0", "Jcode" : "0", "Unicode::Map" : "0", "Unicode::Map8" : "0", "Unicode::String" : "0", "perl" : "5.006", "warnings" : "0" } }, "test" : { "requires" : { "Carp" : "0", "File::Copy" : "0", "Jcode" : "0", "Unicode::Map" : "0", "Unicode::Map8" : "0", "Unicode::String" : "0", "perl" : "5.006", "warnings" : "0" } } }, "provides" : { "Unicode::MapUTF8" : { "file" : "lib/Unicode/MapUTF8.pm", "version" : "1.14" } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/JerilynFranz/perl-Unicode-MapUTF8/issues" }, "homepage" : "https://github.com/JerilynFranz/perl-Unicode-MapUTF8", "license" : [ "http://www.opensource.org/licenses/mit-license.php" ], "repository" : { "type" : "git", "url" : "https://github.com/JerilynFranz/perl-Unicode-MapUTF8.git", "web" : "https://github.com/JerilynFranz/perl-Unicode-MapUTF8" } }, "version" : "1.14", "x_serialization_backend" : "JSON::PP version 2.27400_02" } Unicode-MapUTF8-1.14/META.yml000444001750001750 216313734104216 16457 0ustar00snowharesnowhare000000000000--- abstract: 'Conversions to and from arbitrary character sets and UTF8' author: - 'Jerilyn Franz ' build_requires: Carp: '0' File::Copy: '0' Jcode: '0' Unicode::Map: '0' Unicode::Map8: '0' Unicode::String: '0' perl: '5.006' warnings: '0' configure_requires: Module::Build: '0.42' dynamic_config: 1 generated_by: 'Module::Build version 0.4224, CPAN::Meta::Converter version 2.150010' license: mit meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Unicode-MapUTF8 provides: Unicode::MapUTF8: file: lib/Unicode/MapUTF8.pm version: '1.14' requires: Carp: '0' File::Copy: '0' Jcode: '0' Unicode::Map: '0' Unicode::Map8: '0' Unicode::String: '0' perl: '5.006' warnings: '0' resources: bugtracker: https://github.com/JerilynFranz/perl-Unicode-MapUTF8/issues homepage: https://github.com/JerilynFranz/perl-Unicode-MapUTF8 license: http://www.opensource.org/licenses/mit-license.php repository: https://github.com/JerilynFranz/perl-Unicode-MapUTF8.git version: '1.14' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' Unicode-MapUTF8-1.14/Makefile.PL000444001750001750 422613734104216 17162 0ustar00snowharesnowhare000000000000use ExtUtils::MakeMaker; use File::Spec; use File::Copy qw (copy); # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. my $lang = defined($ENV{'LANG'}) ? $ENV{'LANG'} : 'en'; my $target_pod = File::Spec->catfile('lib','Unicode','MapUTF8.pod'); if ($lang =~ m/^(ja|ja_JP|ja_JP.utf-8|ja_JP.utf8|ja.utf8|ja.utf-8)$/i) { $source_pod = File::Spec->catfile('pod','MapUTF8.ja_JP.utf8.pod'); copy ($source_pod, $target_pod); } elsif ($lang =~ m/^(ja_JP.eucjp|ja_JP.euc|ja_euc|ja_eucjp)$/i) { $source_pod = File::Spec->catfile('pod','MapUTF8.ja_JP.eucjp.pod'); copy ($source_pod, $target_pod); } else { $source_pod = File::Spec->catfile('pod','MapUTF8.en.pod'); copy ($source_pod, $target_pod); } WriteMakefile( NAME => 'Unicode::MapUTF8', VERSION_FROM => 'lib/Unicode/MapUTF8.pm', MIN_PERL_VERSION => '5.006', ($ExtUtils::MakeMaker::VERSION >= 6.3002 ? ('LICENSE' => 'mit', ) : ()), 'linkext' => { }, # no link needed PREREQ_PM => { 'Carp' => 0, 'File::Copy' => 0, 'Unicode::Map' => 0, 'Unicode::String' => 0, 'Unicode::Map8' => 0, 'Jcode' => 0, }, META_MERGE => { 'meta-spec' => { version => 2 }, resources => { bugtracker => { web => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8/issues', }, homepage => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8', repository => { type => 'git', url => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8.git', web => 'https://github.com/JerilynFranz/perl-Unicode-MapUTF8', }, }, }, ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/Unicode/MapUTF8.pod', # retrieve abstract from module AUTHOR => 'Jerilyn Franz ') : ()), PL_FILES => {}, ); Unicode-MapUTF8-1.14/README000444001750001750 156613734104216 16074 0ustar00snowharesnowhare000000000000Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8 Provides an adapter layer between core routines for converting to and from UTF8 and other encodings. In essence, a way to give multiple existing Unicode modules a single common interface so you don't have to know the underlaying implementations to do simple UTF8 to-from other character set string conversions. As such, it wraps the Unicode::String, Unicode::Map8, Unicode::Map and Jcode modules in a standardized and simple API. Mainly intended for use with Perl 5.6 and 5.0 since starting with Perl 5.8 the Encode modules are the preferred way of handling character set encodings. To install: perl Makefile.PL make make test make install Alternatively, if you have Module::Build installed, perl Build.PL ./Build ./Build test ./Build install See 'perldoc Unicode::MapUTF8' for the documentation. Unicode-MapUTF8-1.14/lib000755001750001750 013734104216 15615 5ustar00snowharesnowhare000000000000Unicode-MapUTF8-1.14/lib/Unicode000755001750001750 013734104216 17203 5ustar00snowharesnowhare000000000000Unicode-MapUTF8-1.14/lib/Unicode/MapUTF8.pm000444001750001750 4623713734104216 21116 0ustar00snowharesnowhare000000000000package Unicode::MapUTF8; use strict; use warnings; use Carp qw(confess croak carp); use Unicode::String; use Unicode::Map; use Unicode::Map8; use Jcode; use vars qw ($VERSION @EXPORT @EXPORT_OK @EXPORT_TAGS @ISA); use subs qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias _init_charsets); require Exporter; BEGIN { @ISA = qw(Exporter); @EXPORT = qw (); @EXPORT_OK = qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias); @EXPORT_TAGS = qw (); $VERSION = "1.14"; } ############################ # File level package globals (class variables) my $_Supported_Charsets; my $_Charset_Names; my $_Charset_Aliases; _init_charsets; ############## sub utf8_charset_alias { if ($#_ == -1) { my $aliases = {}; %$aliases = %$_Charset_Aliases; return $aliases; } my $parms; my @parms_list = @_; if (($#parms_list == 0) && (ref ($parms_list[0]) eq 'HASH')) { _set_utf8_charset_alias($parms_list[0]); return; } elsif (($#parms_list > 0) && (($#parms_list % 2) == 1)) { _set_utf8_charset_alias({ @parms_list }); return; } elsif ($#parms_list == 0) { my $lc_charset = lc($parms_list[0]); my $result = $_Charset_Aliases->{$lc_charset}; return $result; } croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_charset_alias() - invalid parameters passed\n"); } ###################################################################### # Sets (or clears ;-) ) a runtime character set alias. sub _set_utf8_charset_alias { my ($parms) = @_; my @alias_names = keys %$parms; foreach my $alias (@alias_names) { my $lc_alias = lc ($alias); my $charset = $parms->{$alias}; if (! defined $charset) { if (exists ($_Charset_Aliases->{$lc_alias})) { delete $_Charset_Aliases->{$lc_alias}; } next; } my $lc_charset = lc ($charset); if (! exists ($_Charset_Names->{$lc_charset})) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_charset_alias() - attempted to set alias '$alias' to point to unknown charset encoding of '$charset'\n"); } if (exists ($_Charset_Names->{$lc_alias})) { carp('[' . localtime(time) . '] [warning] ' . __PACKAGE__ . "::utf8_charset_alias() - Aliased base defined charset name '$alias' to '$charset'."); } $_Charset_Aliases->{$lc_alias} = $lc_charset; } } #### sub utf8_supported_charset { if ($#_ == -1 && wantarray) { my %all_charsets = (%$_Supported_Charsets, %$_Charset_Aliases); my @charsets = sort keys %all_charsets; return @charsets; } my $charset = shift; if (not defined $charset) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_supported_charset() - no character set specified\n"); } my $lc_charset = lc($charset); return 1 if (exists ($_Charset_Names->{$lc_charset})); return 1 if (exists ($_Charset_Aliases->{$lc_charset})); return 0; } #### sub to_utf8 { my @parm_list = @_; my $parms = {}; if (($#parm_list > 0) && (($#parm_list % 2) == 1)) { $parms = { @parm_list }; } elsif ($#parm_list == 0) { $parms = $parm_list[0]; if (! ref($parms)) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - invalid parameters passed\n"); } } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - bad parameters passed\n"); } if (! (exists $parms->{-string})) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - missing '-string' parameter\n"); } my $string = $parms->{-string}; my $charset = $parms->{-charset}; if (! defined ($charset)) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - missing '-charset' parameter value\n"); } my $lc_charset = lc ($charset); my $alias_charset = $_Charset_Aliases->{$lc_charset}; my $true_charset = defined($alias_charset) ? $_Charset_Names->{$alias_charset} : $_Charset_Names->{$lc_charset}; if (! defined $true_charset) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - character set '$charset' is not supported\n"); } $string = '' if (! defined ($string)); my $converter = $_Supported_Charsets->{$true_charset}; if ($converter eq 'map8') { return _unicode_map8_to_utf8 ($string,$true_charset); } if ($converter eq 'unicode-map'){ return _unicode_map_to_utf8 ($string,$true_charset); } elsif ($converter eq 'string') { return _unicode_string_to_utf8 ($string,$true_charset); } elsif ($converter eq 'jcode') { return _jcode_to_utf8 ($string,$true_charset); } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - charset '$charset' is not supported\n"); } } #### sub from_utf8 { my @parm_list = @_; my $parms; if (($#parm_list > 0) && (($#parm_list % 2) == 1)) { $parms = { @parm_list }; } elsif ($#parm_list == 0) { $parms = $parm_list[0]; if (! ref($parms)) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - invalid parameters passed\n"); } } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - bad parameters passed\n"); } if (! (exists $parms->{-string})) { ; croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - missing '-string' parameter\n"); } my $string = $parms->{-string}; my $charset = $parms->{-charset}; if (! defined ($charset)) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - missing '-charset' parameter value\n"); } my $lc_charset = lc ($charset); my $alias_charset = $_Charset_Aliases->{$lc_charset}; my $true_charset = defined($alias_charset) ? $_Charset_Names->{$alias_charset} : $_Charset_Names->{$lc_charset}; if (! defined $true_charset) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - character set '$charset' is not supported\n"); } $string = '' if (! defined ($string)); my $converter = $_Supported_Charsets->{$true_charset}; my $result; if ($converter eq 'map8') { $result = _unicode_map8_from_utf8 ($string,$true_charset); } elsif ($converter eq 'unicode-map') { $result = _unicode_map_from_utf8 ($string,$true_charset); } elsif ($converter eq 'string') { $result = _unicode_string_from_utf8 ($string,$true_charset); } elsif ($converter eq 'jcode') { $result = _jcode_from_utf8 ($string,$true_charset); } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - charset '$charset' is not supported\n"); } return $result; } ###################################################################### # # _unicode_map_from_utf8($string,$target_charset); # # Returns the string converted from UTF8 to the specified target multibyte charset. # sub _unicode_map_from_utf8 { my ($string,$target_charset) = @_; if (! defined $target_charset) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_from_utf8() - (line ' . __LINE__ . ") No target character set specified\n"); } my $ucs2 = from_utf8 ({ -string => $string, -charset => 'ucs2' }); my $target = Unicode::Map->new($target_charset); if (! defined $target) { confess( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_from_utf8() - (line ' . __LINE__ . ") failed to instantate Unicode::Map object for charset '$target_charset': $!\n"); } my $result = $target->from_unicode($ucs2); return $result; } ###################################################################### # # _unicode_map_to_utf8($string,$source_charset); # # Returns the string converted the specified target multibyte charset to UTF8. # sub _unicode_map_to_utf8 { my ($string,$source_charset) = @_; if (! defined $source_charset) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_to_utf8() - (line ' . __LINE__ . ") No source character set specified\n"); } my $source = Unicode::Map->new($source_charset); if (! defined $source) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::Map object: $!\n"); } my $ucs2 = $source->to_unicode($string); my $result = to_utf8({ -string => $ucs2, -charset => 'ucs2' }); return $result; } ###################################################################### # # _unicode_map8_from_utf8($string,$target_charset); # # Returns the string converted from UTF8 to the specified target 8bit charset. # sub _unicode_map8_from_utf8 { my ($string,$target_charset) = @_; if (! defined $target_charset) { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map8_from_utf8() - (line ' . __LINE__ . ") No target character set specified\n"); } my $u = Unicode::String::utf8($string); if (! defined $u) { confess( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_from_utf8() - (line " . __LINE__ . ") failed to instantate Unicode::String::utf8 object: $!\n"); } my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); my $ucs2_string = $u->ucs2; my $target = Unicode::Map8->new($target_charset); if (! defined $target) { confess( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_from_utf8() - (line " . __LINE__ . ") ailed to instantate Unicode::Map8 object for character set '$target_charset': $!\n"); } my $result = $target->to8($ucs2_string); return $result; } ###################################################################### # # _unicode_map8_to_utf8($string,$source_charset); # # Returns the string converted the specified target 8bit charset to UTF8. # # sub _unicode_map8_to_utf8 { my ($string,$source_charset) = @_; my $source = Unicode::Map8->new($source_charset); if (! defined $source) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::Map8 object for character set '$source_charset': $!\n"); } my $ucs2_string = $source->tou($string); if (! defined $ucs2_string) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n"); } my $utf8_string = $ucs2_string->utf8; return $utf8_string; } ###################################################################### # # _unicode_string_from_utf8($string,$target_charset); # # Returns the string converted from UTF8 to the specified unicode encoding. # sub _unicode_string_from_utf8 { my ($string,$target_charset) = @_; $target_charset = lc ($target_charset); my $final; if ($target_charset eq 'utf8') { $final = $string; } elsif ($target_charset eq 'ucs2') { my $u = Unicode::String::utf8($string); my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->ucs2; } elsif ($target_charset eq 'ucs4') { my $u = Unicode::String::utf8($string); my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->ucs4; } elsif ($target_charset eq 'utf16') { my $u = Unicode::String::utf8($string); my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->utf16; } elsif ($target_charset eq 'utf7') { my $u = Unicode::String::utf8($string); my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->utf7; } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_from_utf8() - charset '$target_charset' is not supported\n"); } return $final; } ###################################################################### # # _unicode_string_to_utf8($string,$source_charset); # # Returns the string converted the specified unicode encoding to UTF8. # sub _unicode_string_to_utf8 { my ($string,$source_charset) = @_; $source_charset = lc ($source_charset); my $final; if ($source_charset eq 'utf8') { $final = $string; } elsif ($source_charset eq 'ucs2') { my $u = Unicode::String::utf16($string); if (! defined $u) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n"); } my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->utf8; } elsif ($source_charset eq 'ucs4') { my $u = Unicode::String::ucs4($string); if (! defined $u) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::ucs4 object: $!\n"); } my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->utf8; } elsif ($source_charset eq 'utf16') { my $u = Unicode::String::utf16($string); if (! defined $u) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n"); } my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->utf8; } elsif ($source_charset eq 'utf7') { my $u = Unicode::String::utf7($string); if (! defined $u) { confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf7 object: $!\n"); } my $ordering = $u->ord; $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); $final = $u->utf8; } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . ":: _unicode_string_to_utf8() - charset '$source_charset' is not supported\n"); } return $final; } ###################################################################### # # _jcode_from_utf8($string,$target_charset); # # Returns the string converted from UTF8 to the specified Jcode encoding. # sub _jcode_from_utf8 { my ($string,$target_charset) = @_; my $j = Jcode->new($string,'utf8'); $target_charset = lc ($target_charset); my $final; if ($target_charset =~ m/^iso[-_]2022[-_]jp$/) { $final = $j->iso_2022_jp; } elsif ($target_charset eq 'sjis') { $final = $j->sjis; } elsif ($target_charset eq 'euc-jp') { $final = $j->euc; } elsif ($target_charset eq 'jis') { $final = $j->jis; } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_from_utf8() - charset '$target_charset' is not supported\n"); } return $final; } ###################################################################### # # _jcode_to_utf8($string,$source_charset); # # Returns the string converted from the specified Jcode encoding to UTF8. # sub _jcode_to_utf8 { my ($string,$source_charset) = @_; $source_charset = lc ($source_charset); my $final; if ($source_charset =~ m/^iso[-_]2022[-_]jp$/) { my $j = Jcode->new($string,'jis')->h2z; $final = $j->utf8; } elsif ($source_charset =~m/^(s[-_]?jis|shift[-_]?jis)$/) { my $j = Jcode->new($string,'sjis'); $final = $j->utf8; } elsif ($source_charset eq 'euc-jp') { my $j = Jcode->new($string,'euc'); $final = $j->utf8; } elsif ($source_charset eq 'jis') { my $j = Jcode->new($string,'jis'); $final = $j->utf8; } else { croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_to_utf8() - charset '$source_charset' is not supported\n"); } return $final; } ####################################################################### # # Character set handlers maps # sub _init_charsets { $_Charset_Aliases = {}; $_Supported_Charsets = { 'utf8' => 'string', 'ucs2' => 'string', 'ucs4' => 'string', 'utf7' => 'string', 'utf16' => 'string', 'sjis' => 'jcode', 's-jis' => 'jcode', 's_jis' => 'jcode', 'shiftjis' => 'jcode', 'shift-jis' => 'jcode', 'shift_jis' => 'jcode', 'iso-2022-jp' => 'jcode', 'iso_2022_jp' => 'jcode', 'jis' => 'jcode', 'euc-jp' => 'jcode', }; $_Charset_Names = { map { lc ($_) => $_ } keys %$_Supported_Charsets }; # All the Unicode::Map8 charsets { my @map_ids = &_list_unicode_map8_charsets; foreach my $id (@map_ids) { my $lc_id = lc ($id); next if (exists ($_Charset_Names->{$lc_id})); $_Supported_Charsets->{$id} = 'map8'; $_Charset_Names->{$lc_id} = $id; } } $_Charset_Names = { map { lc ($_) => $_ } keys %$_Supported_Charsets }; # Add any charsets not already listed from Unicode::Map { my $unicode_map = Unicode::Map->new; my @map_ids = $unicode_map->ids; foreach my $id (@map_ids) { my $lc_id = lc ($id); next if (exists ($_Charset_Names->{$lc_id})); $_Supported_Charsets->{$id} = 'unicode-map'; $_Charset_Names->{$lc_id} = $id; } } } ###################################################################### # # Code taken and modified from the 'usr/bin/umap' code distributed # with Unicode::Map8. It wouldn't be necessary if Unicode::Map8 # had a direct method for this.... # sub _list_unicode_map8_charsets { my %set = ( ucs4 => {}, ucs2 => {utf16 => 1}, utf7 => {}, utf8 => {}, ); if (opendir(DIR, $Unicode::Map8::MAPS_DIR)) { my @files = grep(!/^\.\.?$/,readdir(DIR)); foreach my $f (@files) { next unless -f "$Unicode::Map8::MAPS_DIR/$f"; $f =~ s/\.(?:bin|txt)$//; my $supported = $set{$f} = {} if Unicode::Map8->new($f); } } my $avoid_warning = keys %Unicode::Map8::ALIASES; while ( my($alias, $charset) = each %Unicode::Map8::ALIASES) { if (exists $set{$charset}) { $set{$charset}{$alias} = 1; } } my %merged_set = (); foreach my $encoding (keys %set) { $merged_set{$encoding} = 1; my $set_item = $set{$encoding}; while (my ($key,$value) = each (%$set_item)) { $merged_set{$key} = $value; } } my @final_charsets = sort keys %merged_set; return @final_charsets; } ###################################################################### 1; Unicode-MapUTF8-1.14/lib/Unicode/MapUTF8.pod000444001750001750 1762213734104216 21260 0ustar00snowharesnowhare000000000000 =head1 NAME Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8 =head1 SYNOPSIS use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset); # Convert a string in 'ISO-8859-1' to 'UTF8' my $output = to_utf8({ -string => 'An example', -charset => 'ISO-8859-1' }); # Convert a string in 'UTF8' encoding to encoding 'ISO-8859-1' my $other = from_utf8({ -string => 'Other text', -charset => 'ISO-8859-1' }); # List available character set encodings my @character_sets = utf8_supported_charset; # Add a character set alias utf8_charset_alias({ 'ms-japanese' => 'sjis' }); # Convert between two arbitrary (but largely compatible) charset encodings # (SJIS to EUC-JP) my $utf8_string = to_utf8({ -string =>$sjis_string, -charset => 'sjis'}); my $euc_jp_string = from_utf8({ -string => $utf8_string, -charset => 'euc-jp' }) # Verify that a specific character set is supported if (utf8_supported_charset('ISO-8859-1') { # Yes } =head1 DESCRIPTION Provides an adapter layer between core routines for converting to and from UTF8 and other encodings. In essence, a way to give multiple existing Unicode modules a single common interface so you don't have to know the underlaying implementations to do simple UTF8 to-from other character set encoding conversions. As such, it wraps the Unicode::String, Unicode::Map8, Unicode::Map and Jcode modules in a standardized and simple API. This also provides general character set conversion operation based on UTF8 - it is possible to convert between any two compatible and supported character sets via a simple two step chaining of conversions. As with most things Perlish - if you give it a few big chunks of text to chew on instead of lots of small ones it will handle many more characters per second. By design, it can be easily extended to encompass any new charset encoding conversion modules that arrive on the scene. This module is intended to provide good Unicode support to versions of Perl prior to 5.8. If you are using Perl 5.8.0 or later, you probably want to be using the Encode module instead. This module B work with Perl 5.8, but Encode is the preferred method in that environment. =head1 CHANGES 1.14 2020.09.27 Fixing POD breakage in EUC-JP version of POD 1.13 2020.09.27 Fixing MANIFEST.SKIP error 1.12 2020.09.27 Build tool updates. Maintainer updates. POD error fixes. Relicensed under MIT license. 1.11 2005.10.10 Documentation changes. Addition of Build.PL support. Added various build tests, LICENSE, Artistic_License.txt, GPL_License.txt. Split documentation into seperate .pod file. Added Japanese translation of POD. 1.10 2005.05.22 - Fixed bug in conversion of ISO-2022-JP to UTF-8. Problem and fix found by Masahiro HONMA . Similar bugs in conversions of shift_jis and euc-jp to UTF-8 corrected as well. 1.09 2001.08.22 - Fixed multiple typo occurances of 'uft' where 'utf' was meant in code. Problem affected utf16 and utf7 encodings. Problem found by devon smith 1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime setting of character set aliases. Added several alternate names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis, and s_jis). Corrected 'croak' messages for 'from_utf8' functions to appropriate function name. Corrected fatal problem in jcode-unicode internals. Problem and fix found by Brian Wisti . 1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error messages. Problem and fix found by . 1.06 2000.10.30 Fix to handle change in stringification of overloaded objects between Perl 5.005 and 5.6. Problem noticed by Brian Wisti . 1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected 1.04 2000.10.23 Additional diagnostic error messages added for internal errors 1.03 2000.10.22 Bug fix for load time Unicode::Map encoding detection 1.02 2000.10.22 Bug fix to 'from_utf8' method and load time detection of Unicode::Map8 supported character set encodings 1.01 2000.10.02 Initial public release =head1 FUNCTIONS =over 4 =item utf8_charset_alias({ $alias => $charset }); Used for runtime assignment of character set aliases. Called with no parameters, returns a hash of defined aliases and the character sets they map to. Example: my $aliases = utf8_charset_alias; my @alias_names = keys %$aliases; If called with ONE parameter, returns the name of the 'real' charset if the alias is defined. Returns undef if it is not found in the aliases. Example: if (! utf8_charset_alias('VISCII')) { # No alias for this } If called with a list of 'alias' => 'charset' pairs, defines those aliases for use. Example: utf8_charset_alias({ 'japanese' => 'sjis', 'japan' => 'sjis' }); Note: It will croak if a passed pair does not map to a character set defined in the predefined set of character encoding. It is NOT allowed to alias something to another alias. Multiple character set aliases can be set with a single call. To clear an alias, pass a character set mapping of undef. Example: utf8_charset_alias({ 'japanese' => undef }); While an alias is set, the 'utf8_supported_charset' function will return the alias as if it were a predefined charset. Overriding a base defined character encoding with an alias will generate a warning message to STDERR. =back =over 4 =item utf8_supported_charset($charset_name); Returns true if the named charset is supported (including user defined aliases). Returns false if it is not. Example: if (! utf8_supported_charset('VISCII')) { # No support yet } If called in a list context with no parameters, it will return a list of all supported character set names (including user defined aliases). Example: my @charsets = utf8_supported_charset; =back =over 4 =item to_utf8({ -string => $string, -charset => $source_charset }); Returns the string converted to UTF8 from the specified source charset. =back =over 4 =item from_utf8({ -string => $string, -charset => $target_charset}); Returns the string converted from UTF8 to the specified target charset. =back =head1 VERSION 1.14 2020.09.27 =head1 TODO Regression tests for Jcode, 2-byte encodings and encoding aliases =head1 SEE ALSO L L L L L =head1 COPYRIGHT Copyright 2000-2020, Jerilyn Franz. All rights reserved. =head1 AUTHOR Jerilyn Franz =head1 LICENSE MIT License Copyright (c) 2020 Jerilyn Franz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut Unicode-MapUTF8-1.14/pod000755001750001750 013734104216 15631 5ustar00snowharesnowhare000000000000Unicode-MapUTF8-1.14/pod/MapUTF8.en.pod000444001750001750 1762213734104216 20307 0ustar00snowharesnowhare000000000000 =head1 NAME Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8 =head1 SYNOPSIS use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset); # Convert a string in 'ISO-8859-1' to 'UTF8' my $output = to_utf8({ -string => 'An example', -charset => 'ISO-8859-1' }); # Convert a string in 'UTF8' encoding to encoding 'ISO-8859-1' my $other = from_utf8({ -string => 'Other text', -charset => 'ISO-8859-1' }); # List available character set encodings my @character_sets = utf8_supported_charset; # Add a character set alias utf8_charset_alias({ 'ms-japanese' => 'sjis' }); # Convert between two arbitrary (but largely compatible) charset encodings # (SJIS to EUC-JP) my $utf8_string = to_utf8({ -string =>$sjis_string, -charset => 'sjis'}); my $euc_jp_string = from_utf8({ -string => $utf8_string, -charset => 'euc-jp' }) # Verify that a specific character set is supported if (utf8_supported_charset('ISO-8859-1') { # Yes } =head1 DESCRIPTION Provides an adapter layer between core routines for converting to and from UTF8 and other encodings. In essence, a way to give multiple existing Unicode modules a single common interface so you don't have to know the underlaying implementations to do simple UTF8 to-from other character set encoding conversions. As such, it wraps the Unicode::String, Unicode::Map8, Unicode::Map and Jcode modules in a standardized and simple API. This also provides general character set conversion operation based on UTF8 - it is possible to convert between any two compatible and supported character sets via a simple two step chaining of conversions. As with most things Perlish - if you give it a few big chunks of text to chew on instead of lots of small ones it will handle many more characters per second. By design, it can be easily extended to encompass any new charset encoding conversion modules that arrive on the scene. This module is intended to provide good Unicode support to versions of Perl prior to 5.8. If you are using Perl 5.8.0 or later, you probably want to be using the Encode module instead. This module B work with Perl 5.8, but Encode is the preferred method in that environment. =head1 CHANGES 1.14 2020.09.27 Fixing POD breakage in EUC-JP version of POD 1.13 2020.09.27 Fixing MANIFEST.SKIP error 1.12 2020.09.27 Build tool updates. Maintainer updates. POD error fixes. Relicensed under MIT license. 1.11 2005.10.10 Documentation changes. Addition of Build.PL support. Added various build tests, LICENSE, Artistic_License.txt, GPL_License.txt. Split documentation into seperate .pod file. Added Japanese translation of POD. 1.10 2005.05.22 - Fixed bug in conversion of ISO-2022-JP to UTF-8. Problem and fix found by Masahiro HONMA . Similar bugs in conversions of shift_jis and euc-jp to UTF-8 corrected as well. 1.09 2001.08.22 - Fixed multiple typo occurances of 'uft' where 'utf' was meant in code. Problem affected utf16 and utf7 encodings. Problem found by devon smith 1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime setting of character set aliases. Added several alternate names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis, and s_jis). Corrected 'croak' messages for 'from_utf8' functions to appropriate function name. Corrected fatal problem in jcode-unicode internals. Problem and fix found by Brian Wisti . 1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error messages. Problem and fix found by . 1.06 2000.10.30 Fix to handle change in stringification of overloaded objects between Perl 5.005 and 5.6. Problem noticed by Brian Wisti . 1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected 1.04 2000.10.23 Additional diagnostic error messages added for internal errors 1.03 2000.10.22 Bug fix for load time Unicode::Map encoding detection 1.02 2000.10.22 Bug fix to 'from_utf8' method and load time detection of Unicode::Map8 supported character set encodings 1.01 2000.10.02 Initial public release =head1 FUNCTIONS =over 4 =item utf8_charset_alias({ $alias => $charset }); Used for runtime assignment of character set aliases. Called with no parameters, returns a hash of defined aliases and the character sets they map to. Example: my $aliases = utf8_charset_alias; my @alias_names = keys %$aliases; If called with ONE parameter, returns the name of the 'real' charset if the alias is defined. Returns undef if it is not found in the aliases. Example: if (! utf8_charset_alias('VISCII')) { # No alias for this } If called with a list of 'alias' => 'charset' pairs, defines those aliases for use. Example: utf8_charset_alias({ 'japanese' => 'sjis', 'japan' => 'sjis' }); Note: It will croak if a passed pair does not map to a character set defined in the predefined set of character encoding. It is NOT allowed to alias something to another alias. Multiple character set aliases can be set with a single call. To clear an alias, pass a character set mapping of undef. Example: utf8_charset_alias({ 'japanese' => undef }); While an alias is set, the 'utf8_supported_charset' function will return the alias as if it were a predefined charset. Overriding a base defined character encoding with an alias will generate a warning message to STDERR. =back =over 4 =item utf8_supported_charset($charset_name); Returns true if the named charset is supported (including user defined aliases). Returns false if it is not. Example: if (! utf8_supported_charset('VISCII')) { # No support yet } If called in a list context with no parameters, it will return a list of all supported character set names (including user defined aliases). Example: my @charsets = utf8_supported_charset; =back =over 4 =item to_utf8({ -string => $string, -charset => $source_charset }); Returns the string converted to UTF8 from the specified source charset. =back =over 4 =item from_utf8({ -string => $string, -charset => $target_charset}); Returns the string converted from UTF8 to the specified target charset. =back =head1 VERSION 1.14 2020.09.27 =head1 TODO Regression tests for Jcode, 2-byte encodings and encoding aliases =head1 SEE ALSO L L L L L =head1 COPYRIGHT Copyright 2000-2020, Jerilyn Franz. All rights reserved. =head1 AUTHOR Jerilyn Franz =head1 LICENSE MIT License Copyright (c) 2020 Jerilyn Franz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut Unicode-MapUTF8-1.14/pod/MapUTF8.ja_JP.eucjp.pod000444001750001750 1725113734104216 21773 0ustar00snowharesnowhare000000000000=encoding euc-jp =head1 名前 Unicode::MapUTF8 - 任意の文字セットから/へのUTF8の変換 =head1 概要 use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset); # 'ISO-8859-1'の文字列を 'UTF8'に変換する my $output = to_utf8({ -string => 'An example', -charset => 'ISO-8859-1' }); # 'UTF8'エンコーディングの文字列をエンコーディング 'ISO-8859-1'へ変換する my $other = from_utf8({ -string => 'Other text', -charset => 'ISO-8859-1' }); # 利用できる文字セットエンコーディングのリスト my @character_sets = utf8_supported_charset; # 文字セット別名の追加 utf8_charset_alias({ 'ms-japanese' => 'sjis' }); # 2つの任意の(しかし大きく互換性のある)文字セットエンコーディングでの変換 # (SJIS to EUC-JP) my $utf8_string = to_utf8({ -string =>$sjis_string, -charset => 'sjis'}); my $euc_jp_string = from_utf8({ -string => $utf8_string, -charset => 'euc-jp' }) # 特定の文字セットがサポートされているかを確認 if (utf8_supported_charset('ISO-8859-1') { # Yes } =head1 説明 UTF8とその他のエンコーディングから/への変換のための中核となるルーチンの間のアダプタ層を提供します。本質的には、複数存在する Unicodeモジュールへの1つの共通のインターフェース、下敷きになっている実装を知ることなく、簡単にUTF8から/へ他の文字セットエンコーディ ング変換を簡単におこないます。そのため、これはUnicode::String、Unicode::Map8、Unicode::Map、Jcodeモ ジュールを標準化された簡単なAPIの中に包みます。 またこれはUTF-8をベースに一般的な文字セット変換も提供します−これは2段階の変換つなげることにより、2つの互換性があり、サポートされている文字セットで可能です。 ほとんどのことがPerl的なので−かみ砕く対象として数多くの小さいなものの代わりに2、3の大きな固まりを与えると、1秒間により多くの文字を扱います。 設計では、登場するいかなる新しい文字セット・エンコーディング変換モジュールも取り込むように簡単に拡張することができます。 =head1 変更点 (原文のまま) =head1 関数 1.14 2020.09.27 Fixing POD breaking in EUC-JP version of POD 1.13 2020.09.27 Fixing MANIFEST.SKIP error 1.12 2020.09.27 Build tool updates. Maintainer updates. POD error fixes. Relicensed under MIT license. 1.11 2005.10.10 Documentation changes. Addition of Build.PL support. Added various build tests, LICENSE, Artistic_License.txt, GPL_License.txt. Split documentation into seperate .pod file. Added Japanese translation of POD. 1.10 2005.05.22 - Fixed bug in conversion of ISO-2022-JP to UTF-8. Problem and fix found by Masahiro HONMA . Similar bugs in conversions of shift_jis and euc-jp to UTF-8 corrected as well. 1.09 2001.08.22 - Fixed multiple typo occurances of 'uft' where 'utf' was meant in code. Problem affected utf16 and utf7 encodings. Problem found by devon smith 1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime setting of character set aliases. Added several alternate names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis, and s_jis). Corrected 'croak' messages for 'from_utf8' functions to appropriate function name. Corrected fatal problem in jcode-unicode internals. Problem and fix found by Brian Wisti . 1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error messages. Problem and fix found by . 1.06 2000.10.30 Fix to handle change in stringification of overloaded objects between Perl 5.005 and 5.6. Problem noticed by Brian Wisti . 1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected 1.04 2000.10.23 Additional diagnostic error messages added for internal errors 1.03 2000.10.22 Bug fix for load time Unicode::Map encoding detection 1.02 2000.10.22 Bug fix to 'from_utf8' method and load time detection of Unicode::Map8 supported character set encodings 1.01 2000.10.02 Initial public release =head1 ∽ =over =item utf8_charset_alias({ $alias => $charset }); 文字セット別名の実行時の代入に使われます。 引数なしで呼ばれると、定義されている別名と、それにマップされる文字セットのハッシュを返します。 例:: my $aliases = utf8_charset_alias; my @alias_names = keys %$aliases; 1つのパラメータ付きで呼ばれると、もしその別名が定義されていれば、'本当の'文字セットの名前を返します。それが別名に見つからなければundefを返します。 例: if (! utf8_charset_alias('VISCII')) { # No alias for this } もし'alias' => 'charset'の組のリストで呼ばれれば、それらの別名が使えるように定義します。 例: utf8_charset_alias({ 'japanese' => 'sjis', 'japan' => 'sjis' }); 注意:渡された組が、予め定義されている文字セット・エンコーディングの集合に定義されている文字セットにマップされなければ、croakします。これは他の別名への別名を許してはいません。 複数の文字セットを1回の呼出しで設定することができます。 別名をクリーンするためには、undefの文字セットマッピングを渡します。 例: utf8_charset_alias({ 'japanese' => undef }); 別名が設定されている間、もし予め定義されている文字セットであれば、'utf8_supported_charset' 関数は別名を返します。 基本の定義された文字エンコーディングを別名でオーバーライドすると、標準エラー(STDERR)への警告メッセージを出します。 =back =over =item utf8_supported_charset($charset_name); (ユーザが定義した別名も含めて)名づけられた文字セットがサポートされていればtrueを返します。

そうでなければfalseを返します。 例: if (! utf8_supported_charset('VISCII')) { # まだサポートされていません } パラメータなしで、リスト・コンテキストで呼ばれると、(ユーザが定義した別名も含めて)サポートされているすべての文字セット名のリストを返します。 例: my @charsets = utf8_supported_charset; =back =over =item to_utf8({ -string => $string, -charset => $source_charset }); 指定された元の文字セット(source charset)からUTF8に変換された文字列を返します。 =back =over =item from_utf8({ -string => $string, -charset => $target_charset}); UTF8から指定されたターゲットの文字セット(target charset)に変換された文字列を返します。 =back =head1 バージョン 1.12 - 2020.09.27 =head1 やるべきこと Jcode、2バイト・エンコーディングそしてエンコーディング別名のための逆行テスト =head1 参考資料 L L L L =head1 著作権 Copyright 2000-2020, Jerilyn Franz. All rights reserved. =head1 作者 Jerilyn Franz =head1 クレジット 川合孝典 "Kawai,Takanori" - 邦訳 =head1 ライセンス MIT License Copyright (c) 2020 Jerilyn Franz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut Unicode-MapUTF8-1.14/pod/MapUTF8.ja_JP.utf8.pod000444001750001750 2155413734104216 21554 0ustar00snowharesnowhare000000000000=encoding utf8 =head1 Unicode::MapUTF8 - 篁紙絖祉鐚吾UTF8紊 =head1 网荀 use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset); # 'ISO-8859-1'絖 'UTF8'紊 my $output = to_utf8({ -string => 'An example', -charset => 'ISO-8859-1' }); # 'UTF8'潟潟若c潟違絖潟潟若c潟 'ISO-8859-1'後 my $other = from_utf8({ -string => 'Other text', -charset => 'ISO-8859-1' }); # с絖祉潟潟若c潟違鴻 my @character_sets = utf8_supported_charset; # 絖祉ュ菴遵 utf8_charset_alias({ 'ms-japanese' => 'sjis' }); # 鐚ゃ篁紙鐚紊с鋋с鐚絖祉潟潟若c潟違с紊 # (SJIS to EUC-JP) my $utf8_string = to_utf8({ -string =>$sjis_string, -charset => 'sjis'}); my $euc_jp_string = from_utf8({ -string => $utf8_string, -charset => 'euc-jp' }) # 劫絖祉泣若腆肴 if (utf8_supported_charset('ISO-8859-1') { # Yes } =head1 茯 UTF8篁潟潟若c潟違鐚吾紊筝吾若潟≪水韻箴障莖茲医 Unicode≪吾ャ若吾鐚ゃ演ゃ潟帥若с若鴻筝激c絎茖ャ膂≦UTF8鐚娯絖祉潟潟若 潟医膂≦障Unicode::StringUnicode::Map8Unicode::MapJcode 吾ャ若罔羣膂≦API筝帥障 障UTF-8若鴻筝絖祉紊箴障鐚罧級紊ゃ鐚ゃ篋с泣若絖祉у純с 祉Perlр睡鎛乗院医絨篁c鐚鐚紊с冴障筝1腱紊絖宴障 荐荐с糸眼違絖祉祉潟潟若c潟医≪吾ャ若莨若膂≦≦宍с障 =head1 紊雁 鐚障常 1.14 2020.09.27 Fixing POD breakage in EUC-JP version of POD 1.13 2020.09.27 Fixing MANIFEST.SKIP error 1.12 2020.09.27 Build tool updates. Maintainer updates. POD error fixes. Relicensed under MIT license. 1.11 2005.10.10 Documentation changes. Addition of Build.PL support. Added various build tests, LICENSE, Artistic_License.txt, GPL_License.txt. Split documentation into seperate .pod file. Added Japanese translation of POD. 1.10 2005.05.22 - Fixed bug in conversion of ISO-2022-JP to UTF-8. Problem and fix found by Masahiro HONMA . Similar bugs in conversions of shift_jis and euc-jp to UTF-8 corrected as well. 1.09 2001.08.22 - Fixed multiple typo occurances of 'uft' where 'utf' was meant in code. Problem affected utf16 and utf7 encodings. Problem found by devon smith 1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime setting of character set aliases. Added several alternate names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis, and s_jis). Corrected 'croak' messages for 'from_utf8' functions to appropriate function name. Corrected fatal problem in jcode-unicode internals. Problem and fix found by Brian Wisti . 1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error messages. Problem and fix found by . 1.06 2000.10.30 Fix to handle change in stringification of overloaded objects between Perl 5.005 and 5.6. Problem noticed by Brian Wisti . 1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected 1.04 2000.10.23 Additional diagnostic error messages added for internal errors 1.03 2000.10.22 Bug fix for load time Unicode::Map encoding detection 1.02 2000.10.22 Bug fix to 'from_utf8' method and load time detection of Unicode::Map8 supported character set encodings 1.01 2000.10.02 Initial public release =head1 ∽ =over =item utf8_charset_alias({ $alias => $charset }); 絖祉ュ絎茵篁eャ篏帥障 綣違у若違絎臂ュ絖祉激ャ菴障 箴鐚: my $aliases = utf8_charset_alias; my @alias_names = keys %$aliases; 鐚ゃ<若推у若違ュ絎臂違'綵'絖祉菴障ュ荀ゃundef菴障 箴鐚 if (! utf8_charset_alias('VISCII')) { # No alias for this } 'alias' => 'charset'腟鴻у若違違ュ篏帥絎臂障 箴鐚 utf8_charset_alias({ 'japanese' => 'sjis', 'japan' => 'sjis' }); 羈鐚羝<腟篋絎臂絖祉祉潟潟若c潟違絎臂絖祉違croak障篁ュ吾ュ荐宴障 茲違絖祉鐚弱冴ц┃絎с障 ュ若潟undef絖祉潟違羝<障 箴鐚 utf8_charset_alias({ 'japanese' => undef }); ュ荐絎篋絎臂絖祉с違'utf8_supported_charset' ∽違ュ菴障 堺絎臂絖潟潟若c潟違ュс若若ゃ罔羣種STDERR)吾茘<祉若吾冴障 =back =over =item utf8_supported_charset($charset_name); 鐚若吟絎臂ュ鐚ャ絖祉泣若true菴障

сfalse菴障 箴鐚 if (! utf8_supported_charset('VISCII')) { # 障泣若障 } <若帥с鴻祉潟潟鴻у若違鐚若吟絎臂ュ鐚泣若鴻絖祉鴻菴障 箴鐚 my @charsets = utf8_supported_charset; =back =over =item to_utf8({ -string => $string, -charset => $source_charset }); 絎絖祉(source charset)UTF8紊絖菴障 =back =over =item from_utf8({ -string => $string, -charset => $target_charset}); UTF8絎帥若蚊絖祉(target charset)紊絖菴障 =back =head1 若吾с 1.12 - 2020.09.27 =head1 鴻 Jcode2ゃ祉潟潟若c潟違潟潟若c潟医ュ茵鴻 =head1 莖 L L L L =head1 篏罔 Copyright 2000-2020, Jerilyn Franz. All rights reserved. =head1 篏 Jerilyn Franz =head1 吾 綏絖 "Kawai,Takanori" - 荐 =head1 ゃ祉潟 MIT License Copyright (c) 2020 Jerilyn Franz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut Unicode-MapUTF8-1.14/t000755001750001750 013734104216 15312 5ustar00snowharesnowhare000000000000Unicode-MapUTF8-1.14/t/01_unicode_maputf8.t000444001750001750 2146513734104216 21256 0ustar00snowharesnowhare000000000000#!/usr/bin/perl -w use strict; use lib ('./blib','../blib','../lib','./lib'); #use bytes; use Unicode::MapUTF8 qw(utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias); # General info for writing test modules: # # When running as 'make test' the default # working directory is the one _above_ the # 't/' directory. my @do_tests=(1..5); my $test_subs = { 1 => { -code => \&test1, -desc => ' eight-bit ' }, 2 => { -code => \&test2, -desc => ' unicode ' }, 3 => { -code => \&test3, -desc => ' multi-byte ' }, 4 => { -code => \&test4, -desc => ' jcode ' }, 5 => { -code => \&test5, -desc => ' charset aliases ' }, # 6 => { -code => \&big5_with_embedded_ascii, -desc => ' big5 embedded ascii ' }, }; my @charsets = utf8_supported_charset; print $do_tests[0],'..',$do_tests[$#do_tests],"\n"; print STDERR "\n"; my $n_failures = 0; foreach my $test (@do_tests) { my $sub = $test_subs->{$test}->{-code}; my $desc = $test_subs->{$test}->{-desc}; my $failure = ''; eval { $failure = &$sub; }; if ($@) { $failure = $@; } if ($failure ne '') { chomp $failure; print "not ok $test\n"; print STDERR " $desc - $failure\n"; $n_failures++; } else { print "ok $test\n"; print STDERR " $desc - ok\n"; } } print "END\n"; exit; ######################################## # Eight bit conversions # ######################################## sub test1 { my $charset = 'ISO-8859-1'; my $source_string = 'Hello World'; my $utf8_string = 'Hello World'; my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); $source_string = ''; $utf8_string = ''; $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); return ''; } ######################################## # Unicode conversions # ######################################## sub test2 { my $charset = 'UCS2'; my $source_string = "\x00H\x00e\x00l\x00l\x00o\x00 \x00W\x00o\x00r\x00l\x00d"; my $utf8_string = 'Hello World'; my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); $source_string = ''; $utf8_string = ''; $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); return ''; } ######################################## # Multibyte conversions # ######################################## sub test3 { return ''; } ######################################## # Japanese (Jcode) conversions # ######################################## sub test4 { my $charset = 'euc-jp'; my $source_string = "Hello World"; my $utf8_string = 'Hello World'; my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); $source_string = ''; $utf8_string = ''; $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); return ''; } ######################################## # Charset aliases # ######################################## sub test5 { my $charset='ISO-8859-1'; my $alias ='latin-1_sort_of'; eval { utf8_charset_alias({ $alias => $charset }); }; if ($@) { return "$@" } eval { my $aliased = utf8_charset_alias($alias); if ((! defined $aliased) || (lc($charset) ne lc($aliased))) { die("Alias crosscheck for '$alias' -> '$charset' returned a *different* charset of '$aliased'"); } }; if ($@) { return "Failed to alias character set '$charset' to '$alias': $@" } $charset = $alias; my $source_string = 'Hello World'; my $utf8_string = 'Hello World'; my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); $source_string = ''; $utf8_string = ''; $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); return $result if ($result ne ''); eval { utf8_charset_alias({ $alias => undef }); }; if ($@) { return "$@" } $source_string = 'Hello World'; $utf8_string = 'Hello World'; eval { my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); }; if (! defined $@) { return "Failed to catch use of non-aliased charset"; } return ''; } ######################################## # Test Big5 with embedded ASCII # ######################################## sub big5_with_embedded_ascii { my $charset = 'big5'; my @errors = (); { my $source_string = "\xa5\x40\xa5\x41\x30"; my $utf8_string = to_utf8({ -charset => "ucs2", -string => "\x4e\x16\x4e\x15\x00\x30"}); my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); push(@errors,$result) if ($result ne ''); } { my $source_string = "\xa5\x40\xa5\x41\x30\xa5\x30\x41\xa5\x40"; my $utf8_string = to_utf8({ -charset => "ucs2", -string => "\x4e\x16\x4e\x15\x00\x30\x00\x41\x4e\x16"}); my $result = test_general({ -charset => $charset, -source => $source_string, -utf8 => $utf8_string, }); push(@errors,$result) if ($result ne ''); } if (0 < @errors) { return join('',@errors); } return ''; } ######################################## # Generalized test framework # ######################################## sub test_general { my ($parms) = shift; my $source_charset = $parms->{-charset}; my $source_string = $parms->{-source}; my $utf8_string = $parms->{-utf8}; eval { my $result_string = to_utf8({ -string => $source_string, -charset => $source_charset }); if ($utf8_string ne $result_string) { die ('(line ' . __LINE__ . ") conversion from '$source_charset' to UTF8 resulted in unexpected output.\nExpected '" . hexout($utf8_string) . "' but got '" . hexout($result_string) . "'\n"); } }; if ($@) { return "Failed to convert UTF8 text to $source_charset:\n$@" } eval { my $result_string = from_utf8({ '-string' => $utf8_string, '-charset' => $source_charset, }); if ($source_string ne $result_string) { die ("conversion from UTF8 to '$source_charset' resulted in unexpected output.\nExpected '" . hexout($source_string) . "' but got '" . hexout($result_string) . "'\n"); } }; if ($@) { return "Failed to convert '$source_charset' text to UTF8: $@" } eval { my $result_string = from_utf8({ -string => $source_string, -charset => $source_charset, }); if ($source_string ne to_utf8({ -string => $result_string, -charset => $source_charset })) { die ("input and output strings differed"); } }; if ($@) { return "Round trip conversion of '$source_charset' to UTF8 failed: $@" } return ''; } sub hexout { my ($string) = @_; $string =~ s/([\x00-\xff])/unpack("H",$1).unpack("h",$1)/egos; return $string; } Unicode-MapUTF8-1.14/t/97_distribution.t000444001750001750 62013734104216 20650 0ustar00snowharesnowhare000000000000use strict; use lib ('./blib','../blib', './lib', '../lib'); eval { require Test::More; }; if ($@) { $|++; print "1..0 # Skipped: Test::More required for testing distribution\n"; exit; } eval { require Test::Distribution; }; if ($@) { Test::More::plan( skip_all => 'Test::Distribution not installed' ); } Test::Distribution->import('only' => [qw(prereq sig description)]); Unicode-MapUTF8-1.14/t/98_pod_coverage.t000444001750001750 110313734104216 20604 0ustar00snowharesnowhare000000000000use strict; use lib ('./blib','../blib', './lib', '../lib'); eval { require Test::More; }; if ($@) { $|++; print "1..0 # Skipped: Test::More required for testing POD coverage\n"; exit; } eval { require Test::Pod::Coverage; }; if ($@ or (not defined $Test::Pod::Coverage::VERSION) or ($Test::Pod::Coverage::VERSION < 1.06)) { Test::More::plan (skip_all => "Test::Pod::Coverage 1.06 required for testing POD coverage"); exit; } Test::More::plan (tests => 1); Test::Pod::Coverage::pod_coverage_ok( 'Unicode::MapUTF8', { also_private => ['DEBUG'] }); Unicode-MapUTF8-1.14/t/99_pod.t000444001750001750 57413734104216 16725 0ustar00snowharesnowhare000000000000eval { require Test::More; }; if ($@) { $|++; print "1..0 # Skipped: Test::More required for testing POD. Skipping.\n"; exit; } eval { require Test::Pod; }; if ($@ or (not defined $Test::Pod::VERSION) or ($Test::Pod::VERSION < 1.00)) { Test::More::plan (skip_all => "Test::Pod 1.00 required for testing POD"); exit; } Test::Pod::all_pod_files_ok();