XML-Encoding-2.09/0000755000104000244210000000000012342041010015661 5ustar AdministratorsDomain UsersXML-Encoding-2.09/bin/0000755000104000244210000000000012342041007016437 5ustar AdministratorsDomain UsersXML-Encoding-2.09/bin/compile_encoding0000755000104000244210000001465612342041007021677 0ustar AdministratorsDomain Users#!perl -w # # compile_encoding # # Version 1.x Copyright 1998 Clark Cooper # Changes in Version 2.00 onwards Copyright (C) 2007 Steve Hay # All rights reserved. # # This program is free software; you may redistribute it and/or # modify it under the same terms as Perl itself. use 5.008001; my $Usage=<<'End_of_Usage;'; Usage is: compile_encoding [-h] [-o output_file] input_file Compiles the input XML encmap file into a binary encoding file usable by XML::Parser. -h Print this message. -o output_file Put compiled binary into given output file. By default, a file that has the same basename as the input file, but with an extension of .enc is the output. End_of_Usage; package Pfxmap; use fields qw(min max map explen); sub new { my $class = shift; no strict 'refs'; my $pfxmap = fields::new($class); while (@_) { my $key = shift; $pfxmap->{$key} = shift; } $pfxmap; } package main; use XML::Encoding; use integer; use strict; ################################################################ # See the encoding.h file in the top level XML::Encoding directory # to see the format of generated file my $magic = 0xfeebface; my $namelength = 40; my $ofile; while (defined($ARGV[0]) and $ARGV[0] =~ /^-/) { my $opt = shift; if ($opt eq '-o') { $ofile = shift; } elsif ($opt eq '-h') { print $Usage; exit; } else { die "Unrecognized option: $opt\n$Usage"; } } my $infile = shift; die "Encmap XML file not provided\n$Usage" unless defined($infile); unless (defined($ofile)) { my $base = $infile; $base =~ s!^.*/!!; if ($base =~ /(.*)\.xml$/i) { $base = $1; } $ofile = $base . '.enc'; } # Do initializations my @firstbyte; $#firstbyte = 255; my $pfxcount = 0; my $totcount = 0; my @stack = (); my $pfxlenref; my $currmap = new Pfxmap(min => 255, max => 0, map => \@firstbyte); my $p = new XML::Encoding(ErrorContext => 2, ExpatRequired => 1, PushPrefixFcn => \&push_prefix, PopPrefixFcn => \&pop_prefix, RangeSetFcn => \&range_set ); my $name = $p->parsefile($infile); die "Encoding name too long (> $namelength)\n" if length($name) > $namelength; my @prefixes; my $maplen = 0; my $pflen = 0; if ($pfxcount) { push(@prefixes, $currmap); $currmap->{map} = []; $maplen = $totcount + $currmap->{max} - $currmap->{min} + 1; $pflen = $pfxcount + 1; } my $i; for ($i = 0; $i < 256; $i++) { if (defined($firstbyte[$i])) { if ($pfxcount) { $currmap->{map}->[$i] = $firstbyte[$i]; $firstbyte[$i] = - ($firstbyte[$i]->{explen} + 1) if ref($firstbyte[$i]); } } else { $firstbyte[$i] = $i < 128 ? $i : -1; } } open(ENC, ">$ofile") or die "Couldn't open $ofile for writing:\n$!\n"; binmode(ENC); #Note the use of network order packings print ENC pack("Na${namelength}nnN256", $magic, $name, $pflen, $maplen, @firstbyte); my @map = (); my $head = 0; while (@prefixes) { my $pfxmap = shift @prefixes; $head++; my $len = $pfxmap->{max} - $pfxmap->{min} + 1; my $mapstart = @map; my $ispfx = ''; vec($ispfx, 255, 1) = 0; my $ischar = ''; vec($ischar, 255, 1) = 0; for ($i = $pfxmap->{min}; $i <= $pfxmap->{max}; $i++) { my $entry = $pfxmap->{map}->[$i]; if (defined($entry)) { if (ref($entry)) { my $pfxent = $entry; $entry = $head + @prefixes; push(@prefixes, $pfxent); vec($ispfx, $i, 1) = 1; } else { vec($ischar, $i, 1) = 1; } } else { $entry = 0xFFFF; } push(@map, $entry); } print ENC pack('CCn', $pfxmap->{min}, $len, $mapstart), $ispfx, $ischar; } if (@map) { my $packlist = 'n' . int(@map); print ENC pack($packlist, @map); } close(ENC); ################ ## End main ################ sub push_prefix { my ($byte) = @_; return "Prefix too long" if (@stack >= 3); return "Different lengths for same first byte" if (defined($pfxlenref) and defined($$pfxlenref) and $$pfxlenref < @stack); my $pfxmap = $currmap->{map}->[$byte]; if (defined($pfxmap)) { return "Prefix already mapped to a character" unless ref($pfxmap); # Remove what we've already added in for this prefix so we don't # count it twice $totcount -= $pfxmap->{max} - $pfxmap->{min} + 1; } else { $pfxmap = new Pfxmap(min => 255, max => 0, map => []); $currmap->{map}->[$byte] = $pfxmap; } unless (@stack) { $pfxlenref = \$pfxmap->{explen}; } $currmap->{min} = $byte if $byte < $currmap->{min}; $currmap->{max} = $byte if $byte > $currmap->{max}; $pfxcount++; push(@stack, $currmap); $currmap = $pfxmap; return undef; } # End push_prefix sub pop_prefix { return "Attempt to pop un-pushed prefix" unless (@stack); my $count = $currmap->{max} - $currmap->{min} + 1; return "Empty prefix not allowed" unless $count > 0; $totcount += $count; $currmap = pop(@stack); $pfxlenref = undef unless @stack; return undef; } # End pop_prefix sub range_set { my ($byte, $uni, $len) = @_; my $limit = $byte + $len; return "Range too long" if $limit > 256; if (defined($pfxlenref)) { if (defined($$pfxlenref)) { return "Different for same 1st byte" unless $$pfxlenref == @stack; } else { $$pfxlenref = @stack; } } my $i; for ($i = $byte; $i < $limit; $i++, $uni++) { return "Byte already mapped" if defined($currmap->{map}->[$i]); $currmap->{map}->[$i] = $uni; } $currmap->{min} = $byte if $byte < $currmap->{min}; $currmap->{max} = $limit - 1 if $limit >= $currmap->{max}; return undef; } # End range_set __END__ =head1 NAME compile_encoding - compile XML encmap into a binary encoded file for XML::Parser =head1 SYNOPSIS B [B<-h>] [B<-o> I] I =head1 DESCRIPTION B compiles an input XML encmap file into a binary encoded file usable by L. =head1 OPTIONS =over 4 =item B<-o> I Put compiled binary into given output file. By default, a file that has the same basename as the input file, but with an extension of F<.enc> is output. =item B<-h> Print usage information. =back =head1 SEE ALSO L, L, L =head1 AUTHORS This manual page was written by Daniel Leidert Edaniel.leidert@wgdd.deE for the Debian project (but may be used by others). =cut # Tell Emacs that this is really a perl script # Local Variables: # mode:perl # End: XML-Encoding-2.09/bin/make_encmap0000755000104000244210000000742112342041007020631 0ustar AdministratorsDomain Users#!perl -w # # make_encmap # # Copyright 1998 Clark Cooper # Changes in Version 2.00 onwards Copyright (C) 2009 Steve Hay # All rights reserved. # # This program is free software; you may redistribute it and/or # modify it under the same terms as Perl itself. # use 5.008001; my $name = shift; my $file = shift; my $except_str = '$@\^`{}~'; my %Exceptions; foreach (unpack('c*', $except_str)) { $Exceptions{$_} = 1; } die "Usage is:\n\tmake_encmap name file\n" unless (defined($name) and defined($file)); open(MAP, $file) or die "Couldn't open $file"; my @byte1; my $minpos = 256; while () { next if /^\#/; next unless /0x([\da-f]{2,4})\s+0x([\da-f]{4})\s*\#\s*(.*)\s*$/i; my ($from, $to, $name) = ($1, $2, $3); my $flen = length($from); die "Bad line at $., from must be either 2 or 4 digits:\n$_" if $flen == 3; my $toval = hex($to); my $f1 = substr($from, 0, 2); my $f1val = hex($f1); if ($flen == 2) { if ($f1val < 128) { next if $f1val == $toval; warn "The byte '0x$f1' mapped to 0x$to\n" unless defined($Exceptions{$f1val}); } if (defined($byte1[$f1val])) { die "Multiple mappings for 0x$f1val: $to & " . sprintf("0x%x", $byte1[$f1val]) if ($byte1[$f1val] != $toval); } else { $byte1[$f1val] = $toval; $minpos = $f1val if $f1val < $minpos; } } else { my $b1 = $byte1[$f1val]; if (defined($b1)) { die "The 1st byte of '$from' overlaps a single byte definition." unless ref($b1); } else { $b1 = $byte1[$f1val] = []; $minpos = $f1val if $f1val < $minpos; } my $f2 = substr($from, 2, 2); my $f2val = hex($f2); $b1->[$f2val] = $toval; } } close(MAP); die "Minpos never set" unless $minpos < 256; print "\n"; process_byte(2, $minpos, \@byte1); print "\n"; #### ## End main #### sub emit { my ($pre, $start, $lim, $val) = @_; my $len = $lim - $start; if ($len == 1) { printf("$pre\n", $start, $val); } else { printf("$pre\n", $start, $len, $val); } } # End emit sub process_byte { my ($lead, $minpos, $aref) = @_; my $rngstrt; my $rngval; my $i; my $prefix = ' ' x $lead; for ($i = $minpos; $i <= $#{$aref}; $i++) { my $v = $ {$aref}[$i]; if (defined($v)) { if (ref($v)) { emit($prefix, $rngstrt, $i, $rngval) if defined($rngstrt); $rngstrt = undef; printf "$prefix\n", $i; process_byte($lead + 2, 0, $v); print "$prefix\n"; } else { next if (defined($rngstrt) and ($v - $rngval == $i - $rngstrt)); emit($prefix, $rngstrt, $i, $rngval) if defined($rngstrt); $rngstrt = $i; $rngval = $v; } } else { emit($prefix, $rngstrt, $i, $rngval) if defined($rngstrt); $rngstrt = undef; } } emit($prefix, $rngstrt, $i, $rngval) if defined($rngstrt); } # End process_byte __END__ =head1 NAME make_encmap - create an XML representation from an Unicode mapping file =head1 SYNOPSIS B I I =head1 DESCRIPTION B creates a XML encmap file with a given name from an Unicode mapping file, received e.g. from F. The result by default is output to F. =head1 OPTIONS There are no options you can use. =head1 EXAMPLES The following example shows the usage of B for the ISO/IEC 8859-15 table. B< wget ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT make_encmap 8859-15 8859-15.TXT E 8859-15.encmap > =head1 SEE ALSO L, L =head1 AUTHORS This manual page was written by Daniel Leidert Edaniel.leidert@wgdd.deE for the Debian project (but may be used by others). =cut XML-Encoding-2.09/Changes0000644000104000244210000000655512342041007017175 0ustar AdministratorsDomain UsersRevision history for Perl extension XML::Encoding. 2.09 30 May 2014 - Uploaded source code to GitHub and included repository URLs in metadata. - Included META.json file in addition to META.yml. - Set minimum required ExtUtils::MakeMaker version to 6.64 to ensure that all parameters used are supported, to save jumping through hoops to support earlier versions. (This should not be a problem since ExtUtils::MakeMaker 6.64 is easily installed into Perl 5.8.1 and above, that being the whole point of the new choice of minimum supported Perl version.) - Set minimum required Perl version to 5.8.1. This is in line with the minimum requirement of the "Perl Toolchain". 2.08 20 Sep 2010 - Changed test script to use Test::More, renamed it to test.t and moved it into the t/ folder. (Fixes [cpan #61369].) - Moved the perl module into the lib/ folder. 2.07 29 Jan 2009 - Minor correction to POD from Daniel Leidert . 2.06 20 Jan 2009 - Fixed make_encmap to avoid spurious output when there is an error with the input file. (Fixes [cpan #41854].) 2.05 11 Sep 2008 - Changed the shebang line in the two scripts to be less specific. Thanks to Daniel Leidert for the suggestion. 2.04 06 Sep 2008 - Incorporated a patch from Daniel Leidert which updates the maps iso-8859-2.xml to iso-8859-9.xml and adds the maps iso-8859-1.xml, iso-8859-10.xml, iso-8859-11.xml, iso-8859-13.xml to iso-8859-16.xml and windows-1251.xml. - Updated the map windows-1250.xml and added the maps windows-1252.xml to windows-1258.xml using data from ftp://ftp.unicode.org/Public/MAPPINGS. 2.03 08 Jul 2008 - Improved POD formatting of compile_encoding and make_encmap manual pages, courtesy of a patch from Daniel Leidert . 2.02 29 Jun 2008 - Added manual pages supplied by Daniel Leidert for compile_encoding and make_encmap. 2.01 24 Jul 2007 - Fixed crash at end of test script when using perl-5.6.x. (The crash was actually due to a bug in perl that is fixed as of perl-5.8.0 at least, so the test script is simply modified to not tickle that bug.) - Clarified the copyright notices. Only the *changes* in version 2.x are my copyright. 2.00 18 Jul 2007 - Fixed bin/compile_encoding to run with perl-5.9.x by using restricted hashes instead of pseudo-hashes (which have been removed from perl-5.9.x) - Fixed accidental variable interpolation in Encoding.pm - Fixed case of $ExtUtils::MakeMaker::VERSION in Makefile.PL so that ABSTRACT_FROM and AUTHOR get set when intended - Added LICENSE to Makefile.PL 1.01 - Added PREREQ_PM to Makefile.PL - Added x-euc-jp-jisx0221 and x-euc-jp-unicode from Yoshida Masato to maps - With the advice of MURATA Makoto , removed the Shift_JIS encoding and replaced it with 4 variations he provided. He also provided an explanatory message. 1.00 Sun Dec 13 13:43:31 EST 1998 - original version; created by h2xs 1.18 XML-Encoding-2.09/encoding.h0000644000104000244210000000616012342041007017631 0ustar AdministratorsDomain Users/***************************************************************** ** encoding.h ** ** Copyright 1998 Clark Cooper ** All rights reserved. ** ** This program is free software; you can redistribute it and/or ** modify it under the same terms as Perl itself. */ #ifndef ENCODING_H #define ENCODING_H 1 #define ENCMAP_MAGIC 0xfeebface typedef struct prefixmap { unsigned char min; unsigned char len; /* 0 => 256 */ unsigned short bmap_start; unsigned char ispfx[32]; unsigned char ischar[32]; } PrefixMap; typedef struct encinf { unsigned short prefixes_size; unsigned short bytemap_size; int firstmap[256]; PrefixMap *prefixes; unsigned short *bytemap; } Encinfo; typedef struct encmaphdr { unsigned int magic; char name[40]; unsigned short pfsize; unsigned short bmsize; int map[256]; } Encmap_Header; /*================================================================ ** Structure of Encoding map binary encoding ** ** Note that all shorts and ints are in network order, ** so when packing or unpacking with perl, use 'n' and 'N' respectively. ** In C, use the htonl family of functions. ** ** The basic structure is: ** ** _______________________ ** |Header (including map expat needs for 1st byte) ** |PrefixMap * pfsize ** | This section isn't included for single-byte encodings. ** | For multiple byte encodings, when a byte represents a prefix ** | then it indexes into this vector instead of mapping to a ** | Unicode character. The PrefixMap type is declared above. The ** | ispfx and ischar fields are bitvectors indicating whether ** | the byte being mapped is a prefix or character respectively. ** | If neither is set, then the character is not mapped to Unicode. ** | ** | The min field is the 1st byte mapped for this prefix; the ** | len field is the number of bytes mapped; and bmap_start is ** | the starting index of the map for this prefix in the overall ** | map (next section). ** |unsigned short * bmsize ** | This section also is omitted for single-byte encodings. ** | Each short is either a Unicode scalar or an index into the ** | PrefixMap vector. ** ** The header for these files is declared above as the Encmap_Header type. ** The magic field is a magic number which should match the ENCMAP_MAGIC ** macro above. The next 40 bytes stores IANA registered name for the ** encoding. The pfsize field holds the number of PrefixMaps, which should ** be zero for single byte encodings. The bmsize field holds the number of ** shorts used for the overall map. ** ** The map field contains either the Unicode scalar encoded by the 1st byte ** or -n where n is the number of bytes that such a 1st byte implies (Expat ** requires that the number of bytes to encode a character is indicated by ** the 1st byte) or -1 if the byte doesn't map to any Unicode character. ** ** If the encoding is a multiple byte encoding, then there will be PrefixMap ** and character map sections. The 1st PrefixMap (index 0), covers a range ** of bytes that includes all 1st byte prefixes. ** ** Look at convert_to_unicode in Expat.xs to see how this data structure ** is used. */ #endif /* ndef ENCODING_H */ XML-Encoding-2.09/lib/0000755000104000244210000000000012342041004016432 5ustar AdministratorsDomain UsersXML-Encoding-2.09/lib/XML/0000755000104000244210000000000012342041007017075 5ustar AdministratorsDomain UsersXML-Encoding-2.09/lib/XML/Encoding.pm0000644000104000244210000001656012342041007021171 0ustar AdministratorsDomain Users################################################################ # XML::Encoding # # Version 1.x Copyright 1998 Clark Cooper # Changes in Version 2.00 onwards Copyright (C) 2007-2010 Steve Hay # All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # See pod documentation at the end of the file # package XML::Encoding; use 5.008001; use XML::Parser; use strict; use vars qw(@ISA $VERSION); @ISA = qw(XML::Parser); $VERSION = '2.09'; sub new { my $class = shift; my $self = $class->SUPER::new(@_); # Maybe require setting of PushPrefixFcn, PopPrefixFcn, and RangeSetFcn $self->setHandlers(Start => \&start, End => \&end, Final => \&fini); return $self; } sub start { my ($exp, $el, %attr) = @_; return if $exp->{EN_Skip}; $exp->xpcroak("Root element must be encmap") if ($exp->depth == 0 and $el ne 'encmap'); my $xpmode = $exp->{EN_ExpatMode}; if ($el eq 'ch' or $el eq 'range') { my $byte = $attr{byte}; $exp->xpcroak("Missing required byte attribute") unless defined($byte); $byte = cnvnumatt($exp, $byte, 'byte'); $exp->xpcroak("byte attribute > 255") if $byte > 255; my $uni = $attr{uni}; $exp->xpcroak("Missing required uni attribute") unless defined($uni); $uni = cnvnumatt($exp, $uni, 'uni'); $exp->xpcroak("uni attribute > 0xFFFF") if $uni > 0xFFFF; my $len = 1; if ($el eq 'range') { $len = $attr{len}; $exp->xpcroak("Missing required len attribute") unless defined($len); $len = cnvnumatt($exp, $len, 'len'); $exp->xpcroak("Len plus byte > 256") if ($len + $byte) > 256; } check_range($exp, $byte, $len, $uni) if ($xpmode and $byte < 128 and $byte != $uni and not $exp->in_element('prefix')); my $range_set_fcn = $exp->{RangeSetFcn}; if (defined $range_set_fcn) { my $result = &$range_set_fcn($byte, $uni, $len); $exp->xpcroak($result) if ($xpmode and $result); } } elsif ($el eq 'prefix') { $exp->xpcroak("prefix nested too deep") if ($xpmode and $exp->within_element('prefix') >= 3); my $byte = $attr{byte}; $exp->xpcroak("Missing required byte attribute") unless defined($byte); $byte = cnvnumatt($exp, $byte, 'byte'); $exp->xpcroak("byte attribute > 255") if $byte > 255; my $push_pfx_fcn = $exp->{PushPrefixFcn}; if (defined $push_pfx_fcn) { my $result = &$push_pfx_fcn($byte); $exp->xpcroak($result) if ($xpmode and $result); } } elsif ($el eq 'encmap') { my $name = $attr{name}; $exp->xpcroak("Missing required name attribute") unless defined($name); $exp->{EN_Name} = $name; my $expat = $attr{expat}; if (defined($expat)) { $exp->xpcroak("Value of expat attribute should be yes or no") unless ($expat eq 'yes' or $expat eq 'no'); $exp->{EN_ExpatMode} = $expat eq 'yes'; } else { $exp->{EN_ExpatMode} = 0; } $exp->xpcroak("Not an expat mode encmap") if ($exp->{ExpatRequired} and ! $exp->{EN_ExpatMode}); } else { my $depth = $exp->depth; $exp->xpcroak($exp, "Root element isn't encmap") unless $depth; $exp->xpcarp("Skipping unrecognized element '$el'\n"); $exp->{EN_Skip} = $depth; } } # End start sub end { my ($exp, $el) = @_; if ($exp->{EN_Skip}) { $exp->{EN_Skip} = 0 if $exp->{EN_Skip} == $exp->depth; } elsif ($el eq 'prefix') { my $xpmode = $exp->{EN_ExpatMode}; my $pop_pfx_fcn = $exp->{PopPrefixFcn}; if (defined $pop_pfx_fcn) { my $result = &$pop_pfx_fcn(); $exp->xpcroak($result) if ($xpmode and $result); } } } # End end sub fini { my ($exp) = @_; $exp->{EN_Name}; } # End fini sub check_range { my ($exp, $start, $len, $uni) = @_; # The following characters are exceptions to the expat rule that characters # in the ascii set (ordinal values < 128) must have the same value in the # source encoding: $@\^`{}~' # The ordinal values for these are 36,92,94,96,123,125,126,39 # Any len >= 3 implies you have to be hitting some non-special # For 2 just check start == 125 ('}') # For 1 check individually. if ($len == 1) { return if chr($start) =~ /[\$@\\^`{}~']/; } elsif ($len == 2 and $start == 125) { return; } $exp->xpcroak("Sets ascii character to non-ascii value"); } sub cnvnumatt { my ($exp, $str, $name) = @_; $exp->xpcroak("$name attribute is not a decimal or hex value") unless ($str =~ /^(?:(\d+)|x([0-9a-f]+))$/i); if (defined($1)) { return $str + 0; } else { return hex($2); } } # End cnvnumatt 1; __END__ =head1 NAME XML::Encoding - A perl module for parsing XML encoding maps. =head1 SYNOPSIS use XML::Encoding; my $em_parser = new XML::Encoding(ErrorContext => 2, ExpatRequired => 1, PushPrefixFcn => \&push_prefix, PopPrefixFcn => \&pop_prefix, RangeSetFcn => \&range_set); my $encmap_name = $em_parser->parsefile($ARGV[0]); =head1 DESCRIPTION This module, which is built as a subclass of XML::Parser, provides a parser for encoding map files, which are XML files. The file maps/encmap.dtd in the distribution describes the structure of these files. Calling a parse method returns the name of the encoding map (obtained from the name attribute of the root element). The contents of the map are processed through the callback functions push_prefix, pop_prefix, and range_set. =head1 METHODS This module provides no additional methods to those provided by XML::Parser, but it does take the following additional options. =over 4 =item * ExpatRequired When this has a true value, then an error occurs unless the encmap "expat" attribute is set to "yes". Whether or not the ExpatRequired option is given, the parser enters expat mode if this attribute is set. In expat mode, the parser checks if the encoding violates expat restrictions. =item * PushPrefixFcn The corresponding value should be a code reference to be called when a prefix element starts. The single argument to the callback is an integer which is the byte value of the prefix. An undef value should be returned if successful. If in expat mode, a defined value causes an error and is used as the message string. =item * PopPrefixFcn The corresponding value should be a code reference to be called when a prefix element ends. No arguments are passed to this function. An undef value should be returned if successful. If in expat mode, a defined value causes an error and is used as the message string. =item * RangeSetFcn The corresponding value should be a code reference to be called when a "range" or "ch" element is seen. The 3 arguments passed to this function are: (byte, unicode_scalar, length) The byte is the starting byte of a range or the byte being mapped by a "ch" element. The unicode_scalar is the Unicode value that this byte (with the current prefix) maps to. The length of the range is the last argument. This will be 1 for the "ch" element. An undef value should be returned if successful. If in expat mode, a defined value causes an error and is used as the message string. =back =head1 AUTHOR Clark Cooper > Steve Hay > is now maintaining XML::Encoding as of version 2.00 =head1 SEE ALSO XML::Parser =cut XML-Encoding-2.09/Makefile.PL0000644000104000244210000000477612342041007017657 0ustar AdministratorsDomain Users#!perl #=============================================================================== # # Makefile.PL # # DESCRIPTION # Makefile creation script. # # COPYRIGHT # Copyright (C) 2014 Steve Hay. All rights reserved. # # LICENCE # You may distribute under the terms of either the GNU General Public License # or the Artistic License, as specified in the LICENCE file. # #=============================================================================== use 5.008001; use strict; use warnings; use ExtUtils::MakeMaker 6.64; use ExtUtils::MakeMaker qw(WriteMakefile); #=============================================================================== # MAIN PROGRAM #=============================================================================== MAIN: { WriteMakefile( NAME => 'XML::Encoding', ABSTRACT_FROM => 'lib/XML/Encoding.pm', AUTHOR => 'Clark Cooper , Steve Hay ', LICENSE => 'perl_5', VERSION_FROM => 'lib/XML/Encoding.pm', META_MERGE => { 'meta-spec' => { version => 2 }, resources => { repository => { type => 'git', url => 'https://github.com/steve-m-hay/XML-Encoding.git' } } }, MIN_PERL_VERSION => '5.008001', CONFIGURE_REQUIRES => { 'ExtUtils::MakeMaker' => '6.64', 'perl' => '5.008001', 'strict' => '0', 'warnings' => '0' }, TEST_REQUIRES => { 'Test::More' => '0', 'perl' => '5.008001', 'strict' => '0', 'warnings' => '0' }, PREREQ_PM => { 'XML::Parser' => '2.18', 'fields' => '0', 'integer' => '0', 'perl' => '5.008001', 'strict' => '0', 'vars' => '0' }, EXE_FILES => [qw( bin/compile_encoding bin/make_encmap )], dist => { PREOP => 'find $(DISTVNAME) -type d -print|xargs chmod 0755 && ' . 'find $(DISTVNAME) -type f -print|xargs chmod 0644 && ' . 'chmod 0755 $(DISTVNAME)/bin/*', TO_UNIX => 'find $(DISTVNAME) -type f -print|xargs dos2unix' } ); } #=============================================================================== XML-Encoding-2.09/MANIFEST0000644000104000244210000000677312342041007017035 0ustar AdministratorsDomain UsersChanges Change log MANIFEST This file Makefile.PL ExtUtils::MakeMaker script README A brief overview of the distribution bin/compile_encoding script to make foo.enc from foo.xml bin/make_encmap script to generate foo.xml encoding.h Header file describing foo.enc format lib/XML/Encoding.pm XML::Encoding module maps/IANA-assigned-character-sets The name describes it maps/Japanese_Encodings.msg Message about Japanese encodings maps/README A word about how maps were generated maps/big5.xml Big5 encoding map maps/encmap.dtd Doctype declaration for encmap maps/iso-8859-1.xml ISO-8859-1 encoding map maps/iso-8859-2.xml ISO-8859-2 encoding map maps/iso-8859-3.xml ISO-8859-3 encoding map maps/iso-8859-4.xml ISO-8859-4 encoding map maps/iso-8859-5.xml ISO-8859-5 encoding map maps/iso-8859-6.xml ISO-8859-6 encoding map maps/iso-8859-7.xml ISO-8859-7 encoding map maps/iso-8859-8.xml ISO-8859-8 encoding map maps/iso-8859-9.xml ISO-8859-9 encoding map maps/iso-8859-10.xml ISO-8859-10 encoding map maps/iso-8859-11.xml ISO-8859-11 encoding map maps/iso-8859-13.xml ISO-8859-13 encoding map maps/iso-8859-14.xml ISO-8859-14 encoding map maps/iso-8859-15.xml ISO-8859-15 encoding map maps/iso-8859-16.xml ISO-8859-16 encoding map maps/kana-and-kanji.ent external entity used by x-sjis-*.xml maps/oem.ent external entity used by x-sjis-*.xml maps/windows-1250.xml windows-1250 encoding map maps/windows-1251.xml windows-1251 encoding map maps/windows-1252.xml windows-1252 encoding map maps/windows-1253.xml windows-1253 encoding map maps/windows-1254.xml windows-1254 encoding map maps/windows-1255.xml windows-1255 encoding map maps/windows-1256.xml windows-1256 encoding map maps/windows-1257.xml windows-1257 encoding map maps/windows-1258.xml windows-1258 encoding map maps/x-euc-jp-jisx0221.xml x-euc-jp-jisx0221 encoding map maps/x-euc-jp-part1.ent external entity used by x-euc-jp-*.xml maps/x-euc-jp-part2.ent external entity used by x-euc-jp-*.xml maps/x-euc-jp-part3.ent external entity used by x-euc-jp-*.xml maps/x-euc-jp-part4.ent external entity used by x-euc-jp-*.xml maps/x-euc-jp-unicode.xml x-euc-jp-unicode encoding map maps/x-sjis-cp932.xml x-sjis-cp932 encoding map maps/x-sjis-jdk117.xml x-sjis-jdk117 encoding map maps/x-sjis-jisx0221.xml x-sjis-jisx0221 encoding map maps/x-sjis-unicode.xml x-sjis-unicode encoding map t/test.t Test script META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) XML-Encoding-2.09/maps/0000755000104000244210000000000012342041010016621 5ustar AdministratorsDomain UsersXML-Encoding-2.09/maps/big5.xml0000644000104000244210000150540512342041007020211 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/encmap.dtd0000644000104000244210000000730712342041007020576 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/IANA-assigned-character-sets0000644000104000244210000012627712342041007024002 0ustar AdministratorsDomain Users=================================================================== CHARACTER SETS These are the official names for character sets that may be used in the Internet and may be referred to in Internet documentation. These names are expressed in ANSI_X3.4-1968 which is commonly called US-ASCII or simply ASCII. The character set most commonly use in the Internet and used especially in protocol standards is US-ASCII, this is strongly encouraged. The use of the name US-ASCII is also encouraged. The character set names may be up to 40 characters taken from the printable characters of US-ASCII. However, no distinction is made between use of upper and lower case letters. The MIBenum value is a unique value for use in MIBs to identify coded character sets. The value space for MIBenum values has been divided into three regions. The first region (3-999) consists of coded character sets that have been standardized by some standard setting organization. This region is intended for standards that do not have subset implementations. The second region (1000-1999) is for the Unicode and ISO/IEC 10646 coded character sets together with a specification of a (set of) sub-repetoires that may occur. The third region (>1999) is intended for vendor specific coded character sets. Assigned MIB enum Numbers ------------------------- 0 Reserved 1 Reserved 3-106 Set By Standards Organizations 1000-1010 Unicode / 10646 2000-2088 Vendor 2250-2258 Vendor The aliases that start with "cs" have been added for use with the Printer MIB (see RFC 1759) and contain the standard numbers along with suggestive names in order to facilitate applications that want to display the names in user interfaces. The "cs" stands for character set and is provided for applications that need a lower case first letter but want to use mixed case thereafter that cannot contain any special characters, such as underbar ("_") and dash ("-"). If the character set is from an ISO standard, its cs alias is the ISO standard number or name. If the character set is not from an ISO standard, but is registered with ISO (ECMA is the current ISO Registration Authority), the ISO Registry number is specified as ISOnnn followed by letters suggestive of the name or standards number of the code set. When a national or international standard is revised, the year of revision is added to the cs alias of the new character set entry in the IANA Registry in order to distinguish the revised character set from the original character set. Character Set Reference ------------- --------- Name: ANSI_X3.4-1968 [RFC1345,KXS2] MIBenum: 3 Source: ECMA registry Alias: iso-ir-6 Alias: ANSI_X3.4-1986 Alias: ISO_646.irv:1991 Alias: ASCII Alias: ISO646-US Alias: US-ASCII (preferred MIME name) Alias: us Alias: IBM367 Alias: cp367 Alias: csASCII Name: ISO-10646-UCS-2 MIBenum: 1000 Source: the 2-octet Basic Multilingual Plane, aka Unicode this needs to specify network byte order: the standard does not specify (it is a 16-bit integer space) Alias: csUnicode Name: ISO-10646-UCS-4 MIBenum: 1001 Source: the full code space. (same comment about byte order, these are 31-bit numbers. Alias: csUCS4 Name: ISO-10646-UTF-1 MIBenum: 27 Source: Universal Transfer Format (1), this is the multibyte encoding, that subsets ASCII-7. It does not have byte ordering issues. Alias: csISO10646UTF1 Name: ISO_646.basic:1983 [RFC1345,KXS2] MIBenum: 28 Source: ECMA registry Alias: ref Alias: csISO646basic1983 Name: INVARIANT [RFC1345,KXS2] MIBenum: 29 Alias: csINVARIANT Name: ISO_646.irv:1983 [RFC1345,KXS2] MIBenum: 30 Source: ECMA registry Alias: iso-ir-2 Alias: irv Alias: csISO2IntlRefVersion Name: BS_4730 [RFC1345,KXS2] MIBenum: 20 Source: ECMA registry Alias: iso-ir-4 Alias: ISO646-GB Alias: gb Alias: uk Alias: csISO4UnitedKingdom Name: NATS-SEFI [RFC1345,KXS2] MIBenum: 31 Source: ECMA registry Alias: iso-ir-8-1 Alias: csNATSSEFI Name: NATS-SEFI-ADD [RFC1345,KXS2] MIBenum: 32 Source: ECMA registry Alias: iso-ir-8-2 Alias: csNATSSEFIADD Name: NATS-DANO [RFC1345,KXS2] MIBenum: 33 Source: ECMA registry Alias: iso-ir-9-1 Alias: csNATSDANO Name: NATS-DANO-ADD [RFC1345,KXS2] MIBenum: 34 Source: ECMA registry Alias: iso-ir-9-2 Alias: csNATSDANOADD Name: SEN_850200_B [RFC1345,KXS2] MIBenum: 35 Source: ECMA registry Alias: iso-ir-10 Alias: FI Alias: ISO646-FI Alias: ISO646-SE Alias: se Alias: csISO10Swedish Name: SEN_850200_C [RFC1345,KXS2] MIBenum: 21 Source: ECMA registry Alias: iso-ir-11 Alias: ISO646-SE2 Alias: se2 Alias: csISO11SwedishForNames Name: KS_C_5601-1987 [RFC1345,KXS2] MIBenum: 36 Source: ECMA registry Alias: iso-ir-149 Alias: KS_C_5601-1989 Alias: KSC_5601 Alias: korean Alias: csKSC56011987 Name: ISO-2022-KR (preferred MIME name) [RFC1557,Choi] MIBenum: 37 Source: RFC-1557 (see also KS_C_5601-1987) Alias: csISO2022KR Name: EUC-KR (preferred MIME name) [RFC1557,Choi] MIBenum: 38 Source: RFC-1557 (see also KS_C_5861-1992) Alias: csEUCKR Name: ISO-2022-JP (preferred MIME name) [RFC1468,Murai] MIBenum: 39 Source: RFC-1468 (see also RFC-2237) Alias: csISO2022JP Name: ISO-2022-JP-2 (preferred MIME name) [RFC1554,Ohta] MIBenum: 40 Source: RFC-1554 Alias: csISO2022JP2 Name: ISO-2022-CN [RFC1922] MIBenum: 104 Source: RFC-1922 Name: ISO-2022-CN-EXT [RFC1922] MIBenum: 105 Source: RFC-1922 Name: JIS_C6220-1969-jp [RFC1345,KXS2] MIBenum: 41 Source: ECMA registry Alias: JIS_C6220-1969 Alias: iso-ir-13 Alias: katakana Alias: x0201-7 Alias: csISO13JISC6220jp Name: JIS_C6220-1969-ro [RFC1345,KXS2] MIBenum: 42 Source: ECMA registry Alias: iso-ir-14 Alias: jp Alias: ISO646-JP Alias: csISO14JISC6220ro Name: IT [RFC1345,KXS2] MIBenum: 22 Source: ECMA registry Alias: iso-ir-15 Alias: ISO646-IT Alias: csISO15Italian Name: PT [RFC1345,KXS2] MIBenum: 43 Source: ECMA registry Alias: iso-ir-16 Alias: ISO646-PT Alias: csISO16Portuguese Name: ES [RFC1345,KXS2] MIBenum: 23 Source: ECMA registry Alias: iso-ir-17 Alias: ISO646-ES Alias: csISO17Spanish Name: greek7-old [RFC1345,KXS2] MIBenum: 44 Source: ECMA registry Alias: iso-ir-18 Alias: csISO18Greek7Old Name: latin-greek [RFC1345,KXS2] MIBenum: 45 Source: ECMA registry Alias: iso-ir-19 Alias: csISO19LatinGreek Name: DIN_66003 [RFC1345,KXS2] MIBenum: 24 Source: ECMA registry Alias: iso-ir-21 Alias: de Alias: ISO646-DE Alias: csISO21German Name: NF_Z_62-010_(1973) [RFC1345,KXS2] MIBenum: 46 Source: ECMA registry Alias: iso-ir-25 Alias: ISO646-FR1 Alias: csISO25French Name: Latin-greek-1 [RFC1345,KXS2] MIBenum: 47 Source: ECMA registry Alias: iso-ir-27 Alias: csISO27LatinGreek1 Name: ISO_5427 [RFC1345,KXS2] MIBenum: 48 Source: ECMA registry Alias: iso-ir-37 Alias: csISO5427Cyrillic Name: JIS_C6226-1978 [RFC1345,KXS2] MIBenum: 49 Source: ECMA registry Alias: iso-ir-42 Alias: csISO42JISC62261978 Name: BS_viewdata [RFC1345,KXS2] MIBenum: 50 Source: ECMA registry Alias: iso-ir-47 Alias: csISO47BSViewdata Name: INIS [RFC1345,KXS2] MIBenum: 51 Source: ECMA registry Alias: iso-ir-49 Alias: csISO49INIS Name: INIS-8 [RFC1345,KXS2] MIBenum: 52 Source: ECMA registry Alias: iso-ir-50 Alias: csISO50INIS8 Name: INIS-cyrillic [RFC1345,KXS2] MIBenum: 53 Source: ECMA registry Alias: iso-ir-51 Alias: csISO51INISCyrillic Name: ISO_5427:1981 [RFC1345,KXS2] MIBenum: 54 Source: ECMA registry Alias: iso-ir-54 Alias: ISO5427Cyrillic1981 Name: ISO_5428:1980 [RFC1345,KXS2] MIBenum: 55 Source: ECMA registry Alias: iso-ir-55 Alias: csISO5428Greek Name: GB_1988-80 [RFC1345,KXS2] MIBenum: 56 Source: ECMA registry Alias: iso-ir-57 Alias: cn Alias: ISO646-CN Alias: csISO57GB1988 Name: GB_2312-80 [RFC1345,KXS2] MIBenum: 57 Source: ECMA registry Alias: iso-ir-58 Alias: chinese Alias: csISO58GB231280 Name: NS_4551-1 [RFC1345,KXS2] MIBenum: 25 Source: ECMA registry Alias: iso-ir-60 Alias: ISO646-NO Alias: no Alias: csISO60DanishNorwegian Alias: csISO60Norwegian1 Name: NS_4551-2 [RFC1345,KXS2] MIBenum: 58 Source: ECMA registry Alias: ISO646-NO2 Alias: iso-ir-61 Alias: no2 Alias: csISO61Norwegian2 Name: NF_Z_62-010 [RFC1345,KXS2] MIBenum: 26 Source: ECMA registry Alias: iso-ir-69 Alias: ISO646-FR Alias: fr Alias: csISO69French Name: videotex-suppl [RFC1345,KXS2] MIBenum: 59 Source: ECMA registry Alias: iso-ir-70 Alias: csISO70VideotexSupp1 Name: PT2 [RFC1345,KXS2] MIBenum: 60 Source: ECMA registry Alias: iso-ir-84 Alias: ISO646-PT2 Alias: csISO84Portuguese2 Name: ES2 [RFC1345,KXS2] MIBenum: 61 Source: ECMA registry Alias: iso-ir-85 Alias: ISO646-ES2 Alias: csISO85Spanish2 Name: MSZ_7795.3 [RFC1345,KXS2] MIBenum: 62 Source: ECMA registry Alias: iso-ir-86 Alias: ISO646-HU Alias: hu Alias: csISO86Hungarian Name: JIS_C6226-1983 [RFC1345,KXS2] MIBenum: 63 Source: ECMA registry Alias: iso-ir-87 Alias: x0208 Alias: JIS_X0208-1983 Alias: csISO87JISX0208 Name: greek7 [RFC1345,KXS2] MIBenum: 64 Source: ECMA registry Alias: iso-ir-88 Alias: csISO88Greek7 Name: ASMO_449 [RFC1345,KXS2] MIBenum: 65 Source: ECMA registry Alias: ISO_9036 Alias: arabic7 Alias: iso-ir-89 Alias: csISO89ASMO449 Name: iso-ir-90 [RFC1345,KXS2] MIBenum: 66 Source: ECMA registry Alias: csISO90 Name: JIS_C6229-1984-a [RFC1345,KXS2] MIBenum: 67 Source: ECMA registry Alias: iso-ir-91 Alias: jp-ocr-a Alias: csISO91JISC62291984a Name: JIS_C6229-1984-b [RFC1345,KXS2] MIBenum: 68 Source: ECMA registry Alias: iso-ir-92 Alias: ISO646-JP-OCR-B Alias: jp-ocr-b Alias: csISO92JISC62991984b Name: JIS_C6229-1984-b-add [RFC1345,KXS2] MIBenum: 69 Source: ECMA registry Alias: iso-ir-93 Alias: jp-ocr-b-add Alias: csISO93JIS62291984badd Name: JIS_C6229-1984-hand [RFC1345,KXS2] MIBenum: 70 Source: ECMA registry Alias: iso-ir-94 Alias: jp-ocr-hand Alias: csISO94JIS62291984hand Name: JIS_C6229-1984-hand-add [RFC1345,KXS2] MIBenum: 71 Source: ECMA registry Alias: iso-ir-95 Alias: jp-ocr-hand-add Alias: csISO95JIS62291984handadd Name: JIS_C6229-1984-kana [RFC1345,KXS2] MIBenum: 72 Source: ECMA registry Alias: iso-ir-96 Alias: csISO96JISC62291984kana Name: ISO_2033-1983 [RFC1345,KXS2] MIBenum: 73 Source: ECMA registry Alias: iso-ir-98 Alias: e13b Alias: csISO2033 Name: ANSI_X3.110-1983 [RFC1345,KXS2] MIBenum: 74 Source: ECMA registry Alias: iso-ir-99 Alias: CSA_T500-1983 Alias: NAPLPS Alias: csISO99NAPLPS Name: ISO_8859-1:1987 [RFC1345,KXS2] MIBenum: 4 Source: ECMA registry Alias: iso-ir-100 Alias: ISO_8859-1 Alias: ISO-8859-1 (preferred MIME name) Alias: latin1 Alias: l1 Alias: IBM819 Alias: CP819 Alias: csISOLatin1 Name: ISO_8859-2:1987 [RFC1345,KXS2] MIBenum: 5 Source: ECMA registry Alias: iso-ir-101 Alias: ISO_8859-2 Alias: ISO-8859-2 (preferred MIME name) Alias: latin2 Alias: l2 Alias: csISOLatin2 Name: T.61-7bit [RFC1345,KXS2] MIBenum: 75 Source: ECMA registry Alias: iso-ir-102 Alias: csISO102T617bit Name: T.61-8bit [RFC1345,KXS2] MIBenum: 76 Alias: T.61 Source: ECMA registry Alias: iso-ir-103 Alias: csISO103T618bit Name: ISO_8859-3:1988 [RFC1345,KXS2] MIBenum: 6 Source: ECMA registry Alias: iso-ir-109 Alias: ISO_8859-3 Alias: ISO-8859-3 (preferred MIME name) Alias: latin3 Alias: l3 Alias: csISOLatin3 Name: ISO_8859-4:1988 [RFC1345,KXS2] MIBenum: 7 Source: ECMA registry Alias: iso-ir-110 Alias: ISO_8859-4 Alias: ISO-8859-4 (preferred MIME name) Alias: latin4 Alias: l4 Alias: csISOLatin4 Name: ECMA-cyrillic [RFC1345,KXS2] MIBenum: 77 Source: ECMA registry Alias: iso-ir-111 Alias: csISO111ECMACyrillic Name: CSA_Z243.4-1985-1 [RFC1345,KXS2] MIBenum: 78 Source: ECMA registry Alias: iso-ir-121 Alias: ISO646-CA Alias: csa7-1 Alias: ca Alias: csISO121Canadian1 Name: CSA_Z243.4-1985-2 [RFC1345,KXS2] MIBenum: 79 Source: ECMA registry Alias: iso-ir-122 Alias: ISO646-CA2 Alias: csa7-2 Alias: csISO122Canadian2 Name: CSA_Z243.4-1985-gr [RFC1345,KXS2] MIBenum: 80 Source: ECMA registry Alias: iso-ir-123 Alias: csISO123CSAZ24341985gr Name: ISO_8859-6:1987 [RFC1345,KXS2] MIBenum: 9 Source: ECMA registry Alias: iso-ir-127 Alias: ISO_8859-6 Alias: ISO-8859-6 (preferred MIME name) Alias: ECMA-114 Alias: ASMO-708 Alias: arabic Alias: csISOLatinArabic Name: ISO_8859-6-E [RFC1556,IANA] MIBenum: 81 Source: RFC-1556 Alias: csISO88596E Name: ISO_8859-6-I [RFC1556,IANA] MIBenum: 82 Source: RFC-1556 Alias: csISO88596I Name: ISO_8859-7:1987 [RFC1947,RFC1345,KXS2] MIBenum: 10 Source: ECMA registry Alias: iso-ir-126 Alias: ISO_8859-7 Alias: ISO-8859-7 (preferred MIME name) Alias: ELOT_928 Alias: ECMA-118 Alias: greek Alias: greek8 Alias: csISOLatinGreek Name: T.101-G2 [RFC1345,KXS2] MIBenum: 83 Source: ECMA registry Alias: iso-ir-128 Alias: csISO128T101G2 Name: ISO_8859-8:1988 [RFC1345,KXS2] MIBenum: 11 Source: ECMA registry Alias: iso-ir-138 Alias: ISO_8859-8 Alias: ISO-8859-8 (preferred MIME name) Alias: hebrew Alias: csISOLatinHebrew Name: ISO_8859-8-E [RFC1556,Nussbacher] MIBenum: 84 Source: RFC-1556 Alias: csISO88598E Name: ISO_8859-8-I [RFC1556,Nussbacher] MIBenum: 85 Source: RFC-1556 Alias: csISO88598I Name: CSN_369103 [RFC1345,KXS2] MIBenum: 86 Source: ECMA registry Alias: iso-ir-139 Alias: csISO139CSN369103 Name: JUS_I.B1.002 [RFC1345,KXS2] MIBenum: 87 Source: ECMA registry Alias: iso-ir-141 Alias: ISO646-YU Alias: js Alias: yu Alias: csISO141JUSIB1002 Name: ISO_6937-2-add [RFC1345,KXS2] MIBenum: 14 Source: ECMA registry and ISO 6937-2:1983 Alias: iso-ir-142 Alias: csISOTextComm Name: IEC_P27-1 [RFC1345,KXS2] MIBenum: 88 Source: ECMA registry Alias: iso-ir-143 Alias: csISO143IECP271 Name: ISO_8859-5:1988 [RFC1345,KXS2] MIBenum: 8 Source: ECMA registry Alias: iso-ir-144 Alias: ISO_8859-5 Alias: ISO-8859-5 (preferred MIME name) Alias: cyrillic Alias: csISOLatinCyrillic Name: JUS_I.B1.003-serb [RFC1345,KXS2] MIBenum: 89 Source: ECMA registry Alias: iso-ir-146 Alias: serbian Alias: csISO146Serbian Name: JUS_I.B1.003-mac [RFC1345,KXS2] MIBenum: 90 Source: ECMA registry Alias: macedonian Alias: iso-ir-147 Alias: csISO147Macedonian Name: ISO_8859-9:1989 [RFC1345,KXS2] MIBenum: 12 Source: ECMA registry Alias: iso-ir-148 Alias: ISO_8859-9 Alias: ISO-8859-9 (preferred MIME name) Alias: latin5 Alias: l5 Alias: csISOLatin5 Name: greek-ccitt [RFC1345,KXS2] MIBenum: 91 Source: ECMA registry Alias: iso-ir-150 Alias: csISO150 Alias: csISO150GreekCCITT Name: NC_NC00-10:81 [RFC1345,KXS2] MIBenum: 92 Source: ECMA registry Alias: cuba Alias: iso-ir-151 Alias: ISO646-CU Alias: csISO151Cuba Name: ISO_6937-2-25 [RFC1345,KXS2] MIBenum: 93 Source: ECMA registry Alias: iso-ir-152 Alias: csISO6937Add Name: GOST_19768-74 [RFC1345,KXS2] MIBenum: 94 Source: ECMA registry Alias: ST_SEV_358-88 Alias: iso-ir-153 Alias: csISO153GOST1976874 Name: ISO_8859-supp [RFC1345,KXS2] MIBenum: 95 Source: ECMA registry Alias: iso-ir-154 Alias: latin1-2-5 Alias: csISO8859Supp Name: ISO_10367-box [RFC1345,KXS2] MIBenum: 96 Source: ECMA registry Alias: iso-ir-155 Alias: csISO10367Box Name: latin6 [RFC1345,KXS2] MIBenum: 13 Source: ECMA registry Alias: iso-ir-157 Alias: l6 Alias: ISO_8859-10:1992 Alias: csISOLatin6 Name: latin-lap [RFC1345,KXS2] MIBenum: 97 Source: ECMA registry Alias: lap Alias: iso-ir-158 Alias: csISO158Lap Name: JIS_X0212-1990 [RFC1345,KXS2] MIBenum: 98 Source: ECMA registry Alias: x0212 Alias: iso-ir-159 Alias: csISO159JISX02121990 Name: DS_2089 [RFC1345,KXS2] MIBenum: 99 Source: Danish Standard, DS 2089, February 1974 Alias: DS2089 Alias: ISO646-DK Alias: dk Alias: csISO646Danish Name: us-dk [RFC1345,KXS2] MIBenum: 100 Alias: csUSDK Name: dk-us [RFC1345,KXS2] MIBenum: 101 Alias: csDKUS Name: JIS_X0201 [RFC1345,KXS2] MIBenum: 15 Source: JIS X 0201-1976. One byte only, this is equivalent to JIS/Roman (similar to ASCII) plus eight-bit half-width Katakana Alias: X0201 Alias: csHalfWidthKatakana Name: KSC5636 [RFC1345,KXS2] MIBenum: 102 Alias: ISO646-KR Alias: csKSC5636 Name: DEC-MCS [RFC1345,KXS2] MIBenum: 2008 Source: VAX/VMS User's Manual, Order Number: AI-Y517A-TE, April 1986. Alias: dec Alias: csDECMCS Name: hp-roman8 [HP-PCL5,RFC1345,KXS2] MIBenum: 2004 Source: LaserJet IIP Printer User's Manual, HP part no 33471-90901, Hewlet-Packard, June 1989. Alias: roman8 Alias: r8 Alias: csHPRoman8 Name: macintosh [RFC1345,KXS2] MIBenum: 2027 Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991 Alias: mac Alias: csMacintosh Name: IBM037 [RFC1345,KXS2] MIBenum: 2028 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp037 Alias: ebcdic-cp-us Alias: ebcdic-cp-ca Alias: ebcdic-cp-wt Alias: ebcdic-cp-nl Alias: csIBM037 Name: IBM038 [RFC1345,KXS2] MIBenum: 2029 Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 Alias: EBCDIC-INT Alias: cp038 Alias: csIBM038 Name: IBM273 [RFC1345,KXS2] MIBenum: 2030 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP273 Alias: csIBM273 Name: IBM274 [RFC1345,KXS2] MIBenum: 2031 Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 Alias: EBCDIC-BE Alias: CP274 Alias: csIBM274 Name: IBM275 [RFC1345,KXS2] MIBenum: 2032 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: EBCDIC-BR Alias: cp275 Alias: csIBM275 Name: IBM277 [RFC1345,KXS2] MIBenum: 2033 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: EBCDIC-CP-DK Alias: EBCDIC-CP-NO Alias: csIBM277 Name: IBM278 [RFC1345,KXS2] MIBenum: 2034 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP278 Alias: ebcdic-cp-fi Alias: ebcdic-cp-se Alias: csIBM278 Name: IBM280 [RFC1345,KXS2] MIBenum: 2035 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP280 Alias: ebcdic-cp-it Alias: csIBM280 Name: IBM281 [RFC1345,KXS2] MIBenum: 2036 Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 Alias: EBCDIC-JP-E Alias: cp281 Alias: csIBM281 Name: IBM284 [RFC1345,KXS2] MIBenum: 2037 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP284 Alias: ebcdic-cp-es Alias: csIBM284 Name: IBM285 [RFC1345,KXS2] MIBenum: 2038 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP285 Alias: ebcdic-cp-gb Alias: csIBM285 Name: IBM290 [RFC1345,KXS2] MIBenum: 2039 Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 Alias: cp290 Alias: EBCDIC-JP-kana Alias: csIBM290 Name: IBM297 [RFC1345,KXS2] MIBenum: 2040 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp297 Alias: ebcdic-cp-fr Alias: csIBM297 Name: IBM420 [RFC1345,KXS2] MIBenum: 2041 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990, IBM NLS RM p 11-11 Alias: cp420 Alias: ebcdic-cp-ar1 Alias: csIBM420 Name: IBM423 [RFC1345,KXS2] MIBenum: 2042 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp423 Alias: ebcdic-cp-gr Alias: csIBM423 Name: IBM424 [RFC1345,KXS2] MIBenum: 2043 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp424 Alias: ebcdic-cp-he Alias: csIBM424 Name: IBM437 [RFC1345,KXS2] MIBenum: 2011 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp437 Alias: 437 Alias: csPC8CodePage437 Name: IBM500 [RFC1345,KXS2] MIBenum: 2044 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP500 Alias: ebcdic-cp-be Alias: ebcdic-cp-ch Alias: csIBM500 Name: IBM775 [HP-PCL5] MIBenum: 2087 Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996 Alias: cp775 Alias: csPC775Baltic Name: IBM850 [RFC1345,KXS2] MIBenum: 2009 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp850 Alias: 850 Alias: csPC850Multilingual Name: IBM851 [RFC1345,KXS2] MIBenum: 2045 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp851 Alias: 851 Alias: csIBM851 Name: IBM852 [RFC1345,KXS2] MIBenum: 2010 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp852 Alias: 852 Alias: csPCp852 Name: IBM855 [RFC1345,KXS2] MIBenum: 2046 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp855 Alias: 855 Alias: csIBM855 Name: IBM857 [RFC1345,KXS2] MIBenum: 2047 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp857 Alias: 857 Alias: csIBM857 Name: IBM860 [RFC1345,KXS2] MIBenum: 2048 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp860 Alias: 860 Alias: csIBM860 Name: IBM861 [RFC1345,KXS2] MIBenum: 2049 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp861 Alias: 861 Alias: cp-is Alias: csIBM861 Name: IBM862 [RFC1345,KXS2] MIBenum: 2013 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp862 Alias: 862 Alias: csPC862LatinHebrew Name: IBM863 [RFC1345,KXS2] MIBenum: 2050 Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991 Alias: cp863 Alias: 863 Alias: csIBM863 Name: IBM864 [RFC1345,KXS2] MIBenum: 2051 Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991 Alias: cp864 Alias: csIBM864 Name: IBM865 [RFC1345,KXS2] MIBenum: 2052 Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987) Alias: cp865 Alias: 865 Alias: csIBM865 Name: IBM866 [Pond] MIBenum: 2086 Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994 Alias: cp866 Alias: 866 Alias: csIBM866 Name: IBM868 [RFC1345,KXS2] MIBenum: 2053 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP868 Alias: cp-ar Alias: csIBM868 Name: IBM869 [RFC1345,KXS2] MIBenum: 2054 Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991 Alias: cp869 Alias: 869 Alias: cp-gr Alias: csIBM869 Name: IBM870 [RFC1345,KXS2] MIBenum: 2055 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP870 Alias: ebcdic-cp-roece Alias: ebcdic-cp-yu Alias: csIBM870 Name: IBM871 [RFC1345,KXS2] MIBenum: 2056 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP871 Alias: ebcdic-cp-is Alias: csIBM871 Name: IBM880 [RFC1345,KXS2] MIBenum: 2057 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp880 Alias: EBCDIC-Cyrillic Alias: csIBM880 Name: IBM891 [RFC1345,KXS2] MIBenum: 2058 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp891 Alias: csIBM891 Name: IBM903 [RFC1345,KXS2] MIBenum: 2059 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp903 Alias: csIBM903 Name: IBM904 [RFC1345,KXS2] MIBenum: 2060 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: cp904 Alias: 904 Alias: csIBBM904 Name: IBM905 [RFC1345,KXS2] MIBenum: 2061 Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 Alias: CP905 Alias: ebcdic-cp-tr Alias: csIBM905 Name: IBM918 [RFC1345,KXS2] MIBenum: 2062 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP918 Alias: ebcdic-cp-ar2 Alias: csIBM918 Name: IBM1026 [RFC1345,KXS2] MIBenum: 2063 Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 Alias: CP1026 Alias: csIBM1026 Name: EBCDIC-AT-DE [RFC1345,KXS2] MIBenum: 2064 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csIBMEBCDICATDE Name: EBCDIC-AT-DE-A [RFC1345,KXS2] MIBenum: 2065 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICATDEA Name: EBCDIC-CA-FR [RFC1345,KXS2] MIBenum: 2066 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICCAFR Name: EBCDIC-DK-NO [RFC1345,KXS2] MIBenum: 2067 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICDKNO Name: EBCDIC-DK-NO-A [RFC1345,KXS2] MIBenum: 2068 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICDKNOA Name: EBCDIC-FI-SE [RFC1345,KXS2] MIBenum: 2069 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICFISE Name: EBCDIC-FI-SE-A [RFC1345,KXS2] MIBenum: 2070 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICFISEA Name: EBCDIC-FR [RFC1345,KXS2] MIBenum: 2071 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICFR Name: EBCDIC-IT [RFC1345,KXS2] MIBenum: 2072 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICIT Name: EBCDIC-PT [RFC1345,KXS2] MIBenum: 2073 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alais: csEBCDICPT Name: EBCDIC-ES [RFC1345,KXS2] MIBenum: 2074 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICES Name: EBCDIC-ES-A [RFC1345,KXS2] MIBenum: 2075 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICESA Name: EBCDIC-ES-S [RFC1345,KXS2] MIBenum: 2076 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICESS Name: EBCDIC-UK [RFC1345,KXS2] MIBenum: 2077 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICUK Name: EBCDIC-US [RFC1345,KXS2] MIBenum: 2078 Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 Alias: csEBCDICUS Name: UNKNOWN-8BIT [RFC1428] MIBenum: 2079 Alias: csUnknown8BiT Name: MNEMONIC [RFC1345,KXS2] MIBenum: 2080 Source: RFC 1345, also known as "mnemonic+ascii+38" Alias: csMnemonic Name: MNEM [RFC1345,KXS2] MIBenum: 2081 Source: RFC 1345, also known as "mnemonic+ascii+8200" Alias: csMnem Name: VISCII [RFC1456] MIBenum: 2082 Source: RFC 1456 Alias: csVISCII Name: VIQR [RFC1456] MIBenum: 2083 Source: RFC 1456 Alias: csVIQR Name: KOI8-R (preferred MIME name) [RFC1489] MIBenum: 2084 Source: RFC 1489, based on GOST-19768-74, ISO-6937/8, INIS-Cyrillic, ISO-5427. Alias: csKOI8R Name: KOI8-U [RFC2319] MIBenum: 2088 Source: RFC 2319 Name: UNICODE-1-1 [RFC1641] MIBenum: 1010 Source: RFC 1641 Alias: csUnicode11 Name: UNICODE-1-1-UTF-7 [RFC1642] MIBenum: 103 Source: RFC 1642 Alias: csUnicode11UTF7 Name: UTF-7 [RFC2152] MIBenum: 104 Source: RFC 2152 Alias: NONE Name: UTF-8 [RFC2279] MIBenum: 106 Source: RFC 2279 Alias: Name: JIS_Encoding MIBenum: 16 Source: JIS X 0202-1991. Uses ISO 2022 escape sequences to shift code sets as documented in JIS X 0202-1991. Alias: csJISEncoding Name: Shift_JIS (preferred MIME name) MIBenum: 17 Source: A Microsoft code that extends csHalfWidthKatakana to include kanji by adding a second byte when the value of the first byte is in the ranges 81-9F or E0-EF. Alias: MS_Kanji Alias: csShiftJIS Name: Extended_UNIX_Code_Packed_Format_for_Japanese MIBenum: 18 Source: Standardized by OSF, UNIX International, and UNIX Systems Laboratories Pacific. Uses ISO 2022 rules to select code set 0: US-ASCII (a single 7-bit byte set) code set 1: JIS X0208-1990 (a double 8-bit byte set) restricted to A0-FF in both bytes code set 2: Half Width Katakana (a single 7-bit byte set) requiring SS2 as the character prefix code set 3: JIS X0212-1990 (a double 7-bit byte set) restricted to A0-FF in both bytes requiring SS3 as the character prefix Alias: csEUCPkdFmtJapanese Alias: EUC-JP (preferred MIME name) Name: Extended_UNIX_Code_Fixed_Width_for_Japanese MIBenum: 19 Source: Used in Japan. Each character is 2 octets. code set 0: US-ASCII (a single 7-bit byte set) 1st byte = 00 2nd byte = 20-7E code set 1: JIS X0208-1990 (a double 7-bit byte set) restricted to A0-FF in both bytes code set 2: Half Width Katakana (a single 7-bit byte set) 1st byte = 00 2nd byte = A0-FF code set 3: JIS X0212-1990 (a double 7-bit byte set) restricted to A0-FF in the first byte and 21-7E in the second byte Alias: csEUCFixWidJapanese Name: ISO-10646-UCS-Basic MIBenum: 1002 Source: ASCII subset of Unicode. Basic Latin = collection 1 See ISO 10646, Appendix A Alias: csUnicodeASCII Name: ISO-10646-Unicode-Latin1 MIBenum: 1003 Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 Supplement = collections 1 and 2. See ISO 10646, Appendix A. See RFC 1815. Alias: csUnicodeLatin1 Alias: ISO-10646 Name: ISO-10646-J-1 Source: ISO 10646 Japanese, see RFC 1815. Name: ISO-8859-1 MIBenum: 1004 Source: IBM Latin-1 SAA Core Coded Character Set. Extended ISO 8859-1 Presentation Set, GCSGID: 2039 Alias: csUnicodeIBM2039 Name: ISO-Unicode-IBM-1261 MIBenum: 1005 Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261 Alias: csUnicodeIBM1261 Name: ISO-Unicode-IBM-1268 MIBenum: 1006 Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268 Alias: csUnidoceIBM1268 Name: ISO-Unicode-IBM-1276 MIBenum: 1007 Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276 Alias: csUnicodeIBM1276 Name: ISO-Unicode-IBM-1264 MIBenum: 1008 Source: IBM Arabic Presentation Set, GCSGID: 1264 Alias: csUnicodeIBM1264 Name: ISO-Unicode-IBM-1265 MIBenum: 1009 Source: IBM Hebrew Presentation Set, GCSGID: 1265 Alias: csUnicodeIBM1265 Name: ISO-8859-1-Windows-3.0-Latin-1 [HP-PCL5] MIBenum: 2000 Source: Extended ISO 8859-1 Latin-1 for Windows 3.0. PCL Symbol Set id: 9U Alias: csWindows30Latin1 Name: ISO-8859-1-Windows-3.1-Latin-1 [HP-PCL5] MIBenum: 2001 Source: Extended ISO 8859-1 Latin-1 for Windows 3.1. PCL Symbol Set id: 19U Alias: csWindows31Latin1 Name: ISO-8859-2-Windows-Latin-2 [HP-PCL5] MIBenum: 2002 Source: Extended ISO 8859-2. Latin-2 for Windows 3.1. PCL Symbol Set id: 9E Alias: csWindows31Latin2 Name: ISO-8859-9-Windows-Latin-5 [HP-PCL5] MIBenum: 2003 Source: Extended ISO 8859-9. Latin-5 for Windows 3.1 PCL Symbol Set id: 5T Alias: csWindows31Latin5 Name: Adobe-Standard-Encoding [Adobe] MIBenum: 2005 Source: PostScript Language Reference Manual PCL Symbol Set id: 10J Alias: csAdobeStandardEncoding Name: Ventura-US [HP-PCL5] MIBenum: 2006 Source: Ventura US. ASCII plus characters typically used in publishing, like pilcrow, copyright, registered, trade mark, section, dagger, and double dagger in the range A0 (hex) to FF (hex). PCL Symbol Set id: 14J Alias: csVenturaUS Name: Ventura-International [HP-PCL5] MIBenum: 2007 Source: Ventura International. ASCII plus coded characters similar to Roman8. PCL Symbol Set id: 13J Alias: csVenturaInternational Name: PC8-Danish-Norwegian [HP-PCL5] MIBenum: 2012 Source: PC Danish Norwegian 8-bit PC set for Danish Norwegian PCL Symbol Set id: 11U Alias: csPC8DanishNorwegian Name: PC8-Turkish [HP-PCL5] MIBenum: 2014 Source: PC Latin Turkish. PCL Symbol Set id: 9T Alias: csPC8Turkish Name: IBM-Symbols [IBM-CIDT] MIBenum: 2015 Source: Presentation Set, CPGID: 259 Alias: csIBMSymbols Name: IBM-Thai [IBM-CIDT] MIBenum: 2016 Source: Presentation Set, CPGID: 838 Alias: csIBMThai Name: HP-Legal [HP-PCL5] MIBenum: 2017 Source: PCL 5 Comparison Guide, Hewlett-Packard, HP part number 5961-0510, October 1992 PCL Symbol Set id: 1U Alias: csHPLegal Name: HP-Pi-font [HP-PCL5] MIBenum: 2018 Source: PCL 5 Comparison Guide, Hewlett-Packard, HP part number 5961-0510, October 1992 PCL Symbol Set id: 15U Alias: csHPPiFont Name: HP-Math8 [HP-PCL5] MIBenum: 2019 Source: PCL 5 Comparison Guide, Hewlett-Packard, HP part number 5961-0510, October 1992 PCL Symbol Set id: 8M Alias: csHPMath8 Name: Adobe-Symbol-Encoding [Adobe] MIBenum: 2020 Source: PostScript Language Reference Manual PCL Symbol Set id: 5M Alias: csHPPSMath Name: HP-DeskTop [HP-PCL5] MIBenum: 2021 Source: PCL 5 Comparison Guide, Hewlett-Packard, HP part number 5961-0510, October 1992 PCL Symbol Set id: 7J Alias: csHPDesktop Name: Ventura-Math [HP-PCL5] MIBenum: 2022 Source: PCL 5 Comparison Guide, Hewlett-Packard, HP part number 5961-0510, October 1992 PCL Symbol Set id: 6M Alias: csVenturaMath Name: Microsoft-Publishing [HP-PCL5] MIBenum: 2023 Source: PCL 5 Comparison Guide, Hewlett-Packard, HP part number 5961-0510, October 1992 PCL Symbol Set id: 6J Alias: csMicrosoftPublishing Name: Windows-31J MIBenum: 2024 Source: Windows Japanese. A further extension of csShiftJIS to include several OEM-specific kanji extensions. Like csShiftJIS, it adds a second byte when the value of the first byte is in the ranges 81-9F or E0-EF. PCL Symbol Set id: 19K Alias: csWindows31J Name: GB2312 (preferred MIME name) MIBenum: 2025 Source: Chinese for People's Republic of China (PRC) mixed one byte, two byte set: 20-7E = one byte ASCII A1-FE = two byte PRC Kanji See GB 2312-80 PCL Symbol Set Id: 18C Alias: csGB2312 Name: Big5 (preferred MIME name) MIBenum: 2026 Source: Chinese for Taiwan Multi-byte set. PCL Symbol Set Id: 18T Alias: csBig5 Name: windows-1250 MIBenum: 2250 Source: Microsoft (see ../character-set-info/windows-1250) [Lazhintseva] Alias: Name: windows-1251 MIBenum: 2251 Source: Microsoft (see ../character-set-info/windows-1251) [Lazhintseva] Alias: Name: windows-1253 MIBenum: 2253 Source: Microsoft (see ../character-set-info/windows-1253) [Lazhintseva] Alias: Name: windows-1254 MIBenum: 2254 Source: Microsoft (see ../character-set-info/windows-1254) [Lazhintseva] Alias: Name: windows-1255 MIBenum: 2255 Source: Microsoft (see ../character-set-info/windows-1255) [Lazhintseva] Alias: Name: windows-1256 MIBenum: 2256 Source: Microsoft (see ../character-set-info/windows-1256) [Lazhintseva] Alias: Name: windows-1257 MIBenum: 2257 Source: Microsoft (see ../character-set-info/windows-1257) [Lazhintseva] Alias: Name: windows-1258 MIBenum: 2258 Source: Microsoft (see ../character-set-info/windows-1258) [Lazhintseva] Alias: Name: TIS-620 MIBenum: 2259 Source: Thai Industrial Standards Institute (TISI) [Tantsetthi] Name: HZ-GB-2312 MIBenum: 2085 Source: RFC 1842, RFC 1843 [RFC1842, RFC1843] REFERENCES [RFC1345] Simonsen, K., "Character Mnemonics & Character Sets", RFC 1345, Rationel Almen Planlaegning, Rationel Almen Planlaegning, June 1992. [RFC1428] Vaudreuil, G., "Transition of Internet Mail from Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February 1993. [RFC1456] Vietnamese Standardization Working Group, "Conventions for Encoding the Vietnamese Language VISCII: VIetnamese Standard Code for Information Interchange VIQR: VIetnamese Quoted-Readable Specification Revision 1.1", RFC 1456, May 1993. [RFC1468] Murai, J., Crispin, M., and E. van der Poel, "Japanese Character Encoding for Internet Messages", RFC 1468, Keio University, Panda Programming, June 1993. [RFC1489] Chernov, A., "Registration of a Cyrillic Character Set", RFC1489, RELCOM Development Team, July 1993. [RFC1554] Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual Extension of ISO-2022-JP", RFC1554, Tokyo Institute of Technology, ETL, December 1993. [RFC1556] Nussbacher, H., "Handling of Bi-directional Texts in MIME", RFC1556, Israeli Inter-University, December 1993. [RFC1557] Choi, U., Chon, K., and H. Park, "Korean Character Encoding for Internet Messages", KAIST, Solvit Chosun Media, December 1993. [RFC1641] Goldsmith, D., and M. Davis, "Using Unicode with MIME", RFC1641, Taligent, Inc., July 1994. [RFC1642] Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent, Inc., July 1994. [RFC1815] Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1", RFC 1815, Tokyo Institute of Technology, July 1995. [Adobe] Adobe Systems Incorporated, PostScript Language Reference Manual, second edition, Addison-Wesley Publishing Company, Inc., 1990. [HP-PCL5] Hewlett-Packard Company, "HP PCL 5 Comparison Guide", (P/N 5021-0329) pp B-13, 1996. [IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference for Core Interchange Digitized Type", Publication number S544-3708-01 [RFC1842] Wei, Y., J. Li, and Y. Jiang, "ASCII Printable Characters-Based Chinese Character Encoding for Internet Messages", RFC 1842, Harvard University, Rice University, University of Maryland, August 1995. [RFC1843] Lee, F., "HZ - A Data Format for Exchanging Files of Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843, Stanford University, August 1995. [RFC2152] Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation Format of Unicode", RFC 2152, Apple Computer, Inc., Taligent Inc., May 1997. [RFC2279] Yergeau, F., "UTF-8, A Transformation Format of ISO 10646", RFC 2279, Alis Technologies, January, 1998. PEOPLE [KXS2] Keld Simonsen [Choi] Woohyong Choi [Lazhintseva] Katya Lazhintseva, , May 1996. [Murai] Jun Murai [Ohta] Masataka Ohta, , July 1995. [Nussbacher] Hank Nussbacher [Pond] Rick Pond March 1997. [Tantsetthi] Trin Tantsetthi , September 1998. [] XML-Encoding-2.09/maps/iso-8859-1.xml0000644000104000244210000000013512342041007020713 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-10.xml0000644000104000244210000000416512342041007021002 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-11.xml0000644000104000244210000000026312342041007020776 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-13.xml0000644000104000244210000000474012342041007021004 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-14.xml0000644000104000244210000000255112342041007021003 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-15.xml0000644000104000244210000000105112342041007020776 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-16.xml0000644000104000244210000000372512342041007021011 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-2.xml0000644000104000244210000000540112342041007020715 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-3.xml0000644000104000244210000000324712342041007020724 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-4.xml0000644000104000244210000000444512342041007020726 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-5.xml0000644000104000244210000000054412342041007020723 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-6.xml0000644000104000244210000000051512342041007020722 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-7.xml0000644000104000244210000000130512342041007020721 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-8.xml0000644000104000244210000000061512342041007020725 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/iso-8859-9.xml0000644000104000244210000000066612342041007020734 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/Japanese_Encodings.msg0000644000104000244210000001132512342041007023060 0ustar AdministratorsDomain UsersMapping files for Japanese encodings 1998 12/25 Fuji Xerox Information Systems MURATA Makoto 1. Overview This version of XML::Parser and XML::Encoding does not come with map files for the charset "Shift_JIS" and the charset "euc-jp". Unfortunately, each of these charsets has more than one mapping. None of these mappings are considered as authoritative. Therefore, we have come to believe that it is dangerous to provide map files for these charsets. Rather, we introduce several private charsets and map files for these private charsets. If IANA, Unicode Consoritum, and JIS eventually reach a consensus, we will be able to provide map files for "Shift_JIS" and "euc-jp". 2. Different mappings from existing charsets to Unicode 1) Different mappings in JIS X0221 and Unicode The mapping between JIS X0208:1990 and Unicode 1.1 and the mapping between JIS X0212:1990 and Unicode 1.1 are published from Unicode consortium. They are available at ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0208.TXT and ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0212.TXT, respectively.) These mapping files have a note as below: # The kanji mappings are a normative part of ISO/IEC 10646. The # non-kanji mappings are provisional, pending definition of # official mappings by Japanese standards bodies. Unfortunately, the non-kanji mappings in the Japanese standard for ISO 10646/1, namely JIS X 0221:1995, is different from the Unicode Consortium mapping since 0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather than U+2015 (horizontal bar). Furthermore, JIS X 0221 clearly says that the mapping is informational and non-normative. As a result, some companies (e.g., Microsoft and Apple) have introduced slightly different mappings. Therefore, neither the Unicode consortium mapping nor the JIS X 0221 mapping are considered as authoritative. 2) Shift-JIS This charset is especially problematic, since its definition has been unclear since its inception. The current registration of the charset "Shift_JIS" is as below: >Name: Shift_JIS (preferred MIME name) >MIBenum: 17 >Source: A Microsoft code that extends csHalfWidthKatakana to include > kanji by adding a second byte when the value of the first > byte is in the ranges 81-9F or E0-EF. >Alias: MS_Kanji >Alias: csShiftJIS First, this does not reference to the mapping "Shift-JIS to Unicode" published by the Unicode consortium (available at ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/SHIFTJIS.TXT). Second, "kanji" in this registration can be interepreted in different ways. Does this "kanji" reference to JIS X0208:1978, JIS X0208:1983, or JIS X0208:1990(== JIS X0208:1997)? These three standards are *incompatible* with each other. Moreover, we can even argue that "kanji" refers to JIS X0212 or ideographic characters in other countries. Third, each company has extended Shift JIS. For example, Microsoft introduced OEM extensions (NEC extensionsand IBM extensions). Forth, Shift JIS uses JIS X0201, which is almost upper-compatible with US-ASCII but is not quite. 5C and 7E of JIS X 0201 are different from backslash and tilde, respectively. However, many programming languages (e.g., Java) ignore this difference and assumes that 5C and 7E of Shift JIS are backslash and tilde. 3. Proposed charsets and mappings As a tentative solution, we introduce two private charsets for EUC-JP and four priviate charsets for Shift JIS. 1) EUC-JP We have two charsets, namely "x-eucjp-unicode" and "x-eucjp-jisx0221". Their difference is only one code point. The mapping for the former is based on the Unicode Consortium mapping, while the latter is based on the JIS X0221 mapping. 2) Shift JIS We have four charsets, namely x-sjis-unicode, x-sjis-jisx0221, x-sjis-jdk117, and x-sjis-cp932. The mapping for the charset x-sjis-unicode is the one published by the Unicode consortium. The mapping for x-sjis-jisx0221 is almost equivalent to x-sjis-unicode, but 0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather than U+2015. The charset x-sjis-jdk117 is again almost equivalent to x-sjis-unicode, but 0x5C and 0x7E of JIS X0201 are mapped to backslash and tilde. The charset x-sjis-cp932 is used by Microsoft Windows, and its mapping is published from the Unicode Consortium (available at: ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.txt). The coded character set for this charset includes NEC-extensions and IBM-extensions. 0x5C and 0x7E of JIS X0201 are mapped to backslash and tilde; 0x213D is mapped to U+2015; and 0x2140, 0x2141, 0x2142, and 0x215E of JIS X 0208 are mapped to compatibility characters. Makoto Fuji Xerox Information Systems Tel: +81-44-812-7230 Fax: +81-44-812-7231 E-mail: murata@apsdc.ksp.fujixerox.co.jp XML-Encoding-2.09/maps/kana-and-kanji.ent0000644000104000244210000063755412342041007022127 0ustar AdministratorsDomain Users &X815C; &X815F; &X8160; &X8161; &X817C; &X8191-2; &X81CA; XML-Encoding-2.09/maps/oem.ent0000644000104000244210000006205112342041007020123 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/README0000644000104000244210000000410012342041007017502 0ustar AdministratorsDomain UsersThis directory contains encoding maps for some selected encodings. These maps were generated by a perl script, make_encmap, from mapping information available from the Internet at ftp://ftp.unicode.org/Public/MAPPINGS. If you edit the generated XML file to add the "expat='yes'" to the encmap start tag, then you can use the compile_encoding script to check whether the map meets expat requirements (and also create the corresponding binary encmap file.) The file encmap.dtd is the document type declaration for these files and contains information about the semantics. This should give you sufficient information to build your own encoding map. I can't vouch for the validity of the DTD, since I haven't processed it. It is provided for informational purposes only. As mentioned in the DTD, there are some restrictions on what kinds of encodings can be loaded due to restrictions that the expat library places on us for efficiency reasons. One of those restrictions is that the encoding must represent the ASCII set of characters with a single byte and that byte must be equal to the equivalent Unicode scalar value with the exception of a few punctuation characters. This distribution contains four contributed encodings from MURATA Makoto that are variations on the encoding commonly called Shift_JIS: x-sjis-cp932.xml x-sjis-jdk117.xml x-sjis-jisx0221.xml x-sjis-unicode.xml (This is the same encoding as the shift_jis.xml that was distributed with this module in version 1.00) Please read his message (Japanese_Encodings.msg) about why these are here and why I've removed the shift_jis.xml encoding. We also have two contributed encodings that are variations of the EUC-JP encoding from Yoshida Masato : x-euc-jp-jisx0221.xml x-euc-jp-unicode.xml The comments that MURATA Makoto made in his message apply to these encodings too. I've taken the liberty of breaking out common sections for these two as external entities. So the fault of the uninformative names of these four external entites is mine. Clark Cooper December 26, 1998 XML-Encoding-2.09/maps/windows-1250.xml0000644000104000244210000000632712342041007021440 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1251.xml0000644000104000244210000000335612342041007021440 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1252.xml0000644000104000244210000000152312342041007021433 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1253.xml0000644000104000244210000000203512342041007021433 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1254.xml0000644000104000244210000000215712342041007021441 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1255.xml0000644000104000244210000000205112342041007021433 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1256.xml0000644000104000244210000000375512342041007021450 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1257.xml0000644000104000244210000000565012342041007021445 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/windows-1258.xml0000644000104000244210000000314012342041007021436 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/x-euc-jp-jisx0221.xml0000644000104000244210000000102012342041007022252 0ustar AdministratorsDomain Users ]> &part1; &part2; &part3; &part4; XML-Encoding-2.09/maps/x-euc-jp-part1.ent0000644000104000244210000046020112342041007022017 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/x-euc-jp-part2.ent0000644000104000244210000000132612342041007022017 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/x-euc-jp-part3.ent0000644000104000244210000000325212342041007022020 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/x-euc-jp-part4.ent0000644000104000244210000063523712342041010022031 0ustar AdministratorsDomain Users XML-Encoding-2.09/maps/x-euc-jp-unicode.xml0000644000104000244210000000076212342041010022424 0ustar AdministratorsDomain Users ]> &part1; &part2; &part3; &part4; XML-Encoding-2.09/maps/x-sjis-cp932.xml0000644000104000244210000000155112342041010021420 0ustar AdministratorsDomain Users"> "> "> "> "> "> "> "> "> ]> &X5C; &X7E; &kana-and-kanji; &oem; XML-Encoding-2.09/maps/x-sjis-jdk117.xml0000644000104000244210000000117312342041010021561 0ustar AdministratorsDomain Users"> "> "> "> "> "> "> "> "> ]> &X5C; &X7E; &kana-and-kanji; XML-Encoding-2.09/maps/x-sjis-jisx0221.xml0000644000104000244210000000120712342041010022040 0ustar AdministratorsDomain Users"> "> "> "> "> "> "> "> "> ]> ¥-sign; &overline; &kana-and-kanji; XML-Encoding-2.09/maps/x-sjis-unicode.xml0000644000104000244210000000117412342041010022207 0ustar AdministratorsDomain Users"> "> "> "> "> "> "> "> "> ]> &X5C; &X7E; &kana-and-kanji; XML-Encoding-2.09/META.json0000644000104000244210000000273112342041010017305 0ustar AdministratorsDomain Users{ "abstract" : "A perl module for parsing XML encoding maps.", "author" : [ "Clark Cooper , Steve Hay " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.140640", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "XML-Encoding", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "6.64", "perl" : "5.008001", "strict" : "0", "warnings" : "0" } }, "runtime" : { "requires" : { "XML::Parser" : "2.18", "fields" : "0", "integer" : "0", "perl" : "5.008001", "strict" : "0", "vars" : "0" } }, "test" : { "requires" : { "Test::More" : "0", "perl" : "5.008001", "strict" : "0", "warnings" : "0" } } }, "release_status" : "stable", "resources" : { "repository" : { "type" : "git", "url" : "https://github.com/steve-m-hay/XML-Encoding.git" } }, "version" : "2.09" } XML-Encoding-2.09/META.yml0000644000104000244210000000146012342041010017133 0ustar AdministratorsDomain Users--- abstract: 'A perl module for parsing XML encoding maps.' author: - 'Clark Cooper , Steve Hay ' build_requires: ExtUtils::MakeMaker: '0' Test::More: '0' perl: '5.008001' strict: '0' warnings: '0' configure_requires: ExtUtils::MakeMaker: '6.64' perl: '5.008001' strict: '0' warnings: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.140640' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: XML-Encoding no_index: directory: - t - inc requires: XML::Parser: '2.18' fields: '0' integer: '0' perl: '5.008001' strict: '0' vars: '0' resources: repository: https://github.com/steve-m-hay/XML-Encoding.git version: '2.09' XML-Encoding-2.09/README0000644000104000244210000000160312342041010016541 0ustar AdministratorsDomain Users XML::Encoding Version 1.x Copyright (c) 1998 Clark Cooper Changes in Version 2.00 onwards Copyright (C) 2007-2010 Steve Hay All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This module, built as a subclass of XML::Parser, parses encoding map XML files. Included in the distribution is the compile_encoding script that compiles these to the binary form used by XML::Parser in order to parse scripts in the given encoding. Also included is a script that generates encoding map XML files from the type of mapping files obtained at ftp://ftp.unicode.org/Public/MAPPINGS. Several encoding maps are included in the maps subdirectory. There is nothing to build, but to test and install you need to do the following: perl Makefile.PL make test make install Clark Cooper December 12, 1998 XML-Encoding-2.09/t/0000755000104000244210000000000012342041010016124 5ustar AdministratorsDomain UsersXML-Encoding-2.09/t/test.t0000644000104000244210000000253612342041010017276 0ustar AdministratorsDomain Users# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' use 5.008001; use strict; use warnings; use Test::More tests => 15; BEGIN { use_ok('XML::Encoding'); } my @prefixes = (); my $pops = 0; my @rnginfo = (); sub pushpfx { my ($byte) = @_; push(@prefixes, $byte); undef; } sub poppfx { $pops++; undef; } sub range { my ($byte, $uni, $len) = @_; push(@rnginfo, @_); undef; } my $doc =<<'End_of_doc;'; End_of_doc; my @exprng = (0xa0, 0x3000, 6, 0x41, 0x0753, 1, 0x50, 0x0400, 32); my $p = new XML::Encoding(PushPrefixFcn => \&pushpfx, PopPrefixFcn => \&poppfx, RangeSetFcn => \&range); my $name = $p->parse($doc); is($name, 'foo'); is($prefixes[0], 0x81); is($pops, scalar @prefixes); cmp_ok(scalar @rnginfo, '<=', @exprng); foreach (0 .. $#exprng) { is($rnginfo[$_], $exprng[$_]); } $doc =~ s/='32'/='200'/; # Don't use an eval {} here to trap the parse() error # because it causes a crash under perl-5.6.x { local $SIG{__DIE__} = sub { my $err = $_[0]; ok($err and $err =~ /^Len plus byte > 256/); exit; }; $p->parse($doc); } ok(0);