reformat-20040319.orig/0002755000175000017500000000000010074642314015343 5ustar sbeyersbeyer00000000000000reformat-20040319.orig/reformat0000755000175000017500000001706510065664651017127 0ustar sbeyersbeyer00000000000000#!/usr/bin/perl -WT # (C) Stephan Beyer, GPL # reformat -h or see the pod (at the bottom of the file, or 'man reformat') # default options # Note: booleans are true if ($boolean % 2) $width = 72; # -w $justify = 0; # -j (boolean) $lmargin = ""; # -l $allpar = 0; # -p (boolean) # default stuff $version = "20040319"; $newline = 0; $lines = 0; # options for ($i = 0; $i < @ARGV; $i++) { $ARGV[$i] = $ARGV[$i]; if ($ARGV[$i] eq "-j") { $justify++; } elsif ($ARGV[$i] eq "-l") { $i++; $lmargin = ""; for (my $tmp = 0; $tmp < int($ARGV[$i]); $tmp++) { $lmargin .= " "; } } elsif ($ARGV[$i] eq "-p") { $allpar++; } elsif ($ARGV[$i] eq "-w") { $i++; $width = int($ARGV[$i]); } else { if ($ARGV[$i] ne "-h") { print STDERR "Unknown option: ".$ARGV[$i]."\n"; } print <, GPL http://www.s-beyer.de/ Usage: $0 [options] [outputfile] "reformat" only handles with stdin and stdout (and sometimes stderr). Pipe as much as you want :) Options (alphabetically): -h Displays this help. -j Switches hyphenless justification. [$justify] -l margin Sets a left margin (numeric). [$lmargin] -p Also reformat lines beginning with a whitespace. [$allpar] -w width Sets a width (numeric) for the text. [$width] Have fun! STOP ; if ($ARGV[$i] eq "-h") { exit(0); } else { exit(1); } } } # Usage: prepare (STRING) -> prepares a string (see source) sub prepare { my $ret = $_[0]; $ret =~ s/[\r\n\t]/ /g; $ret =~ s/ +/ /g; $ret =~ s/^ //g; $ret =~ s/ $//g; return $ret; } # Usage: round (FLOAT) -> rounded to integer sub round { my $ret; if ($_[0] - int($_[0]) >= 0.5) { $ret = int($_[0]) + 1; } else { $ret = int($_[0]); } return $ret; } # Usage: splen (STRING) -> returns length of STRING + 1 (added space) sub splen { return length($_[0])+1; } # Usage: mindiff (INTEGER, ARRAY OF INTEGERS) # -> returns integer value (of the array) of # smallest absolute difference betwwen INTEGER and the ARRAY sub mindiff { my $num = $_[0]; shift(@_); my $min = -1; my $ret; for(@_) { if ((abs($_-$num) < $min) or ($min < 0)) { $min = abs($_-$num); $ret = $_; } } return $ret; } # Usage: newline; -> registers a new line sub newline { print "\n"; $newline = 0; $lines++; } # almost MAIN: # Usage: reformat (STRING) -> reformat string sub reformat { my $str = $_[0]; my @words = split(/ /, prepare($str)); my $wc = @words; # word count my $lc = $width + 1; # length count my $last = 0; # old $i, used later for ($i = 0; $i <= $wc; $i++) { if ($newline == 0) { print $lmargin; $newline = 1; } if ($i < $wc) { # substract current word $lc -= splen($words[$i]); # print $i." : ".$words[$i]." : ".length($words[$i])." : ".$lc."\n"; # is our mission impossible? if (length($words[$i]) > $width) { print STDERR ("ERROR: word is too long to fit in width ($width).\n"); print STDERR ("(\"".$words[$i]."\")\n"); print STDERR ("Aborting.\n"); exit(2); } } if ($lc < 0) { # add the last substracted word $lc += splen($words[$i]); if ($justify % 2) { # justify? # j1. $lc is how many spaces we need to fill # j2. now we should find landmarks # LandMark Factor * {1..lc+1} = set of landmarks $lmf = $width / ($lc + 1); #--> TODO not very correct because the length changes when adding a space # find available spaces $tmp = 0; for ($ii = $last; $ii < $i; $ii++) { $tmp += splen($words[$ii]); push(@spaces, $tmp); } pop(@spaces); # print "DEBUG: ".@spaces." spaces available: @spaces\n"; # find landmarks for ($ii = 1; $ii <= $lc+1; $ii++) { $tmp = round($lmf*$ii); push(@landmarks, mindiff($tmp, @spaces)); # positions of our landmark } pop(@landmarks); # print "DEBUG: $lc landmarks to set: @landmarks\n"; $tmp = 0; $lmhc = 0; # landmark help counter } for ($ii = $last; $ii < $i; $ii++) { print $words[$ii]; if ($justify % 2) { $tmp += splen($words[$ii]); while (($lmhc < @landmarks) and ($tmp == $landmarks[$lmhc])) { $lmhc++; print " "; } } if ($ii < $i-1) { print " "; } else { newline; } } $last = $i; $lc = $width - length($words[$i]); } # remove landmarks & spaces if ($justify % 2) { for(@landmarks) { pop(@landmarks); } for(@spaces) { pop(@spaces); } } # end of paragraph - print as usual (word1 word2 word3.\n) if ($i == $wc) { for ($ii = $last; $ii < $i; $ii++) { print $words[$ii]; if ($ii < $i-1) { print " "; } else { newline; } } } } } # Usage: unformatted (STRING) sub unformatted { $_[0] =~ s/[\r\n]//; print $_[0]; newline; } ############ ### MAIN ### ############ $cache = ""; while() { if ($allpar % 2) { # -p set $_ =~ s/^[\t ]*//; } if ($_ =~ m/^[\r\n\t ]/) { # ignore those lines # reformat cache reformat($cache); # print ignored line unformatted($_); # clear cache $cache = ""; } else { $cache .= $_; } } reformat($cache); exit(0); =head1 NAME reformat - tool to simple format plain ascii texts =head1 SYNOPSIS B [B<-h>] [B<-j>] [B<-l> I] [B<-p>] [B<-w> I] =head1 DESCRIPTION B is a simple tool to reformat plain texts. reformat reads from F and writes to F. Available options are: =over =item B<-h> prints usage information =item B<-j> switch justify mode: Each line of a paragraph will have the same width (see B<-w> option). To reach this, spaces (' ') will be added between words. Default: disabled =item B<-l> I Sets the left margin to I. The margin is produced by I spaces (' '), no tabs will be used. Default: 0 =item B<-p> Accept lines beginning with a whitespace as usual paragraphs, too. =item B<-w> I Sets the paragraph width to I. No reformatted line will be longer than I (plus defined margins) then. Default: 72 =back =head1 LIMITATIONS =over =item B isn't an intelligent program. It just reads a whole paragraph into a buffer and then reformats it. The end of a paragraph is indicated by an empty line (may also contain spaces or tabs) or at a line beginning with whitespaces (if you don't use B<-p> option). Lines beginning with whitespaces are lines to keep untouched. Nothing happens with them, unless you use B<-p> option (as just mentioned). =item B doesn't look for hyphenation and hyphens at all. It won't make new lines when reached a hyphen. B works word-by-word. =item B doesn't detect 'small paragraphs' (paragraphs without an empty line). =item Check for errors! If B detects a word with a length greater than the specified width, it will abort. =item B has problems with control characters. Some text documents contain the B<^L> character (0x0c), for example. =back =head1 TODO Planned features are: =over =item Fix I problems, see L. =item Add an option to declare a string that indicates "don't reformat" in the text. Would be nice on reformatting emails, and don't touch the quoteas ('> '-lines). =item Add an option (e.g. B<-i>) to keep indenting. =back =head1 EXAMPLES =over =item reformat < foo > bar Reads text from F, reformats and writes to F. =item reformat -l 15 -j -w 50 < foo Nice justified, centered text from file F on an 80x25 terminal. =back =head1 SEE ALSO L =head1 AUTHOR AND COPYRIGHT (C) Stephan Beyer Es-beyer@gmx.netE, 2003-2004, GPL =cut