libtext-wrapi18n-perl-0.06.orig/0040755000175000017500000000000007676264164016042 5ustar kubotakubotalibtext-wrapi18n-perl-0.06.orig/Changes0100644000175000017500000000064307676262442017332 0ustar kubotakubotaRevision history for Perl extension Text::WrapI18N. 0.06 Wed Jun 25 17:28:20 2003 - Clearification of the license. 0.05 Sun Jun 22 17:32:30 2003 - Added PREREQ_PM for Makefile.PL . 0.04 Sun Jun 22 11:54:32 2003 - A bugfix when given string is ended by \n. 0.03 Sun Jun 22 09:19:40 2003 - Added test. 0.02 Fri Jun 20 22:59:20 2003 - Added EXPORT_TAGS. 0.01 Thu Jun 19 22:03:11 2003 - initial version. libtext-wrapi18n-perl-0.06.orig/Makefile.PL0100644000175000017500000000030007675264602017776 0ustar kubotakubotause ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'Text::WrapI18N', 'VERSION_FROM' => 'WrapI18N.pm', # finds $VERSION 'PREREQ_PM' => { 'Text::CharWidth' => '0.02', } ); libtext-wrapi18n-perl-0.06.orig/README0100644000175000017500000000315307676263705016721 0ustar kubotakubotaText::WrapI18N version 0.06 =========================== This is a module which intends to substitute Text::Wrap, which supports internationalized texts including: - multibyte encodings such as UTF-8, EUC-JP, EUC-KR, GB2312, and Big5, - fullwidth characters like east Asian characters which appear in UTF-8, EUC-JP, EUC-KR, GB2312, Big5, and so on, - combining characters like diacritical marks which appear in UTF-8, ISO-8859-11 (aka TIS-620), and so on, and - languages which don't use whitespaces between words, like Chinese and Japanese. The text is to be given in locale encoding, not always in UTF-8. (Of course locale encoding is UTF-8 in UTF-8 locales.) $Text::WrapI18N::columns is available like $Text::Wrap::columns. Also, $Text::WrapI18N::separator is available. However, $Text::WrapI18N::huge, $Text::WrapI18N::break, $Text::WrapI18N::tabstop, and $Text::WrapI18N::unexpand are not available yet. This module is originally written for "debconf", a standard configuration system of Debian. Prerequisites ------------- This needs Text::CharWidth module. Build and Install ----------------- Please use standard way to build and install this module: perl Makefile.PL make make install COPYRIGHT AND LICENCE --------------------- Copyright (C) 2003 Tomohiro KUBOTA This library is free software; you can redistribute it and/or modify it under the terms of either: a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" which comes with Perl. libtext-wrapi18n-perl-0.06.orig/MANIFEST0100644000175000017500000000007007675173034017157 0ustar kubotakubotaChanges Makefile.PL MANIFEST README WrapI18N.pm t/use.t libtext-wrapi18n-perl-0.06.orig/t/0040755000175000017500000000000007675173554016306 5ustar kubotakubotalibtext-wrapi18n-perl-0.06.orig/t/use.t0100644000175000017500000000034407675173554017265 0ustar kubotakubota#!/usr/bin/perl -w use strict; use Test::Simple tests => 2; use Text::WrapI18N qw(:all); $columns = 9; ok(wrap("", "", "abcdefg") eq "abcdefg"); ok(wrap("# ", "! ", "abcdefg hijklmn") eq "# abcdefg\n! hijklmn"); exit; __END__ libtext-wrapi18n-perl-0.06.orig/WrapI18N.pm0100644000175000017500000001434107676263761017713 0ustar kubotakubotapackage Text::WrapI18N; require Exporter; use strict; use warnings; our @ISA = qw(Exporter); our @EXPORT = qw(wrap); our @EXPORT_OK = qw($columns $separator); our %EXPORT_TAGS = ('all' => [ @EXPORT, @EXPORT_OK ]); our $VERSION = '0.06'; use vars qw($columns $break $tabstop $separator $huge $unexpand $charmap); use Text::CharWidth qw(mbswidth mblen); BEGIN { $columns = 76; # $break, $separator, $huge, and $unexpand are not supported yet. $break = '\s'; $tabstop = 8; $separator = "\n"; $huge = 'wrap'; $unexpand = 1; undef $charmap; } sub wrap { my $top1=shift; my $top2=shift; my $text=shift; $text = $top1 . $text; # $out already-formatted text for output including current line # $len visible width of the current line without the current word # $word the current word which might be sent to the next line # $wlen visible width of the current word # $c the current character # $b whether to allow line-breaking after the current character # $cont_lf true when LF (line feed) characters appear continuously # $w visible width of the current character my $out = ''; my $len = 0; my $word = ''; my $wlen = 0; my $cont_lf = 0; my ($c, $w, $b); $text =~ s/\n+$/\n/; while(1) { if (length($text) == 0) { return $out . $word; } ($c, $text, $w, $b) = _extract($text); if ($c eq "\n") { $out .= $word . $separator; if (length($text) == 0) {return $out;} $len = 0; $text = $top2 . $text; $word = '' ; $wlen = 0; next; } elsif ($w == -1) { # all control characters other than LF are ignored next; } # when the current line have enough room # for the curren character if ($len + $wlen + $w <= $columns) { if ($c eq ' ' || $b) { $out .= $word . $c; $len += $wlen + $w; $word = ''; $wlen = 0; } else { $word .= $c; $wlen += $w; } next; } # when the current line overflows with the # current character if ($c eq ' ') { # the line ends by space $out .= $word . $separator; $len = 0; $text = $top2 . $text; $word = ''; $wlen = 0; } elsif ($wlen + $w <= $columns) { # the current word is sent to next line $out .= $separator; $len = 0; $text = $top2 . $word . $c . $text; $word = ''; $wlen = 0; } else { # the current word is too long to fit a line $out .= $word . $separator; $len = 0; $text = $top2 . $c . $text; $word = ''; $wlen = 0; } } } # Extract one character from the beginning from the given string. # Supports multibyte encodings such as UTF-8, EUC-JP, EUC-KR, # GB2312, and Big5. # # return value: (character, rest string, width, line breakable) # character: a character. This may consist from multiple bytes. # rest string: given string without the extracted character. # width: number of columns which the character occupies on screen. # line breakable: true if the character allows line break after it. sub _extract { my $string=shift; my ($l, $c, $r, $w, $b, $u); if (length($string) == 0) { return ('', '', 0, 0); } $l = mblen($string); if ($l == 0 || $l == -1) { return ('?', substr($string,1), 1, 0); } $c = substr($string, 0, $l); $r = substr($string, $l); $w = mbswidth($c); if (!defined($charmap)) { $charmap = `/usr/bin/locale charmap`; } if ($charmap =~ /UTF.8/i) { # UTF-8 if ($l == 3) { # U+0800 - U+FFFF $u = (ord(substr($c,0,1))&0x0f) * 0x1000 + (ord(substr($c,1,1))&0x3f) * 0x40 + (ord(substr($c,2,1))&0x3f); $b = _isCJ($u); } elsif ($l == 4) { # U+10000 - U+10FFFF $u = (ord(substr($c,0,1))&7) * 0x40000 + (ord(substr($c,1,1))&0x3f) * 0x1000 + (ord(substr($c,2,1))&0x3f) * 0x40 + (ord(substr($c,3,1))&0x3f); $b = _isCJ($u); } else { $b = 0; } } elsif ($charmap =~ /(^EUC)|(^GB)|(^BIG)/i) { # East Asian legacy encodings # (EUC-JP, EUC-KR, GB2312, Big5, Big5HKSCS, and so on) if (ord(substr($c,0,1)) >= 0x80) {$b = 1;} else {$b = 0;} } else { $b = 0; } return ($c, $r, $w, $b); } # Returns 1 for Chinese and Japanese characters. This means that # these characters allow line wrapping after this character even # without whitespaces because these languages don't use whitespaces # between words. # # Character must be given in UCS-4 codepoint value. sub _isCJ { my $u=shift; if ($u >= 0x3000 && $u <= 0x312f) { if ($u == 0x300a || $u == 0x300c || $u == 0x300e || $u == 0x3010 || $u == 0x3014 || $u == 0x3016 || $u == 0x3018 || $u == 0x301a) {return 0;} return 1; } # CJK punctuations, Hiragana, Katakana, Bopomofo if ($u >= 0x31a0 && $u <= 0x31bf) {return 1;} # Bopomofo if ($u >= 0x31f0 && $u <= 0x31ff) {return 1;} # Katakana extension if ($u >= 0x3400 && $u <= 0x9fff) {return 1;} # Han Ideogram if ($u >= 0xf900 && $u <= 0xfaff) {return 1;} # Han Ideogram if ($u >= 0x20000 && $u <= 0x2ffff) {return 1;} # Han Ideogram return 0; } 1; __END__ =head1 NAME Text::WrapI18N - Line wrapping module with support for multibyte, fullwidth, and combining characters and languages without whitespaces between words =head1 SYNOPSIS use Text::WrapI18N qw(wrap $columns); wrap(firstheader, nextheader, texts); =head1 DESCRIPTION This module intends to be a better Text::Wrap module. This module is needed to support multibyte character encodings such as UTF-8, EUC-JP, EUC-KR, GB2312, and Big5. This module also supports characters with irregular widths, such as combining characters (which occupy zero columns on terminal, like diacritical marks in UTF-8) and fullwidth characters (which occupy two columns on terminal, like most of east Asian characters). Also, minimal handling of languages which doesn't use whitespaces between words (like Chinese and Japanese) is supported. Like Text::Wrap, hyphenation and "kinsoku" processing are not supported, to keep simplicity. I is the main subroutine of Text::WrapI18N module to execute the line wrapping. Input parameters and output data emulate Text::Wrap. The texts have to be written in locale encoding. =head1 SEE ALSO locale(5), utf-8(7), charsets(7) =head1 AUTHOR Tomohiro KUBOTA, Ekubota@debian.orgE =head1 COPYRIGHT AND LICENSE Copyright 2003 by Tomohiro KUBOTA This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut