#!/usr/bin/perl # # A script for converting Baen's WebScription books into an HTML format # that works better with the Rocket eBook (and looks better too). # # Name: baen2rocket # Author: Wayne Davison . # Version: 4.0.2 # # Feel free to use and distribute this script however you like. # # Type "baen2rocket -h" to see a list of options. # # See http://www.baen.com/ws_faq.htm for more information on a WebScription. # # See http://www.rocket-ebook.com/ for information on the Rocket eBook. # Some not yet standard characters that the Rocket eBook understands: # # Open/close single quote: \x91 \x92 (‘ ’) # Open/close double quote: \x93 \x94 (“ ”) # Single/double dash: \x96 \x97 (– —) # elipses: \x85 (…) use Getopt::Std; $opt_h = $opt_s = 0; # Remove single-use warnings $OPTS = 'eghjlps'; &usage if !getopts($OPTS) || $opt_h || !@ARGV; if ($opt_p) { if ($opt_j) { $PARASTART = "

"; } else { $PARASTART = "

"; } $PARAEND = "

"; $INDENT = ""; } else { if ($opt_j) { $PARASTART = "
"; $PARAEND = "
"; } else { $PARASTART = "
"; $PARAEND = ""; } $INDENT = "\xA0\xA0\xA0\xA0\xA0"; # Each of these is a   } if ($opt_s) { $STAR_LINE = '

* * *

'; } elsif ($opt_p) { $STAR_LINE = "


"; } else { $STAR_LINE = "

"; } if ($opt_g) { $useImageMagick = 1; my $sep = $^O =~ /mswin32/i? ';' : ':'; foreach (split(/$sep/o, $ENV{PATH})) { if (-x "$_/ppmtogif") { $useImageMagick = 0; last; } } if ($useImageMagick) { require 'Image/Magick.pm'; import Image::Magick; } } if (grep(/[*?]/, @ARGV)) { foreach (@ARGV) { push(@files, glob $_); } } else { @files = @ARGV; } foreach (@files) { next unless /^\d\d\d\d\d\d/; s/(_+c_|_+\d+|_toc)?\.(htm|jpg|gif|png)$//i; $prefixes{$_} = 1; } undef $/; foreach $prefix (sort keys %prefixes) { next unless -f "$prefix.htm"; &processPrefix; } print "\n"; exit; sub processPrefix { print "\nProcessing $prefix*.htm:\n"; $copyright_file = "${prefix}__c_.htm"; $copyright_file = "${prefix}_c_.htm" unless -f $copyright_file; ($copyright_prefix) = $copyright_file =~ /(.+_c_)/; # If no up-to-date $prefix_toc.htm file exists, create one. $mtime = (stat("$prefix.htm"))[9]; if (!$mtime) { print "Didn't find $prefix.htm -- skipping\n"; return; } $toc_mtime = (stat("${prefix}_toc.htm"))[9]; if ($toc_mtime && $toc_mtime < $mtime) { if (!open(IN, "${prefix}_toc.htm")) { print "Unable to read ${prefix}_toc.htm -- skipping\n"; return; } $_ = ; close(IN); $toc_mtime = 0 if /; close(IN); my $unreleased_chapters = 0; if (m%var lastPg = .*//(,.*)%) { my $chapter_para_nums = $1; $unreleased_chapters = $chapter_para_nums =~ tr/,//; } ($title) = m%(.*?)%is; $title =~ s/\s+/ /g; $title =~ /(.*?) by (.*\S) *-+ *Baen.*/; $book_title = $1; $author = $2; # Now we run through all the files to output their chapter headings. ($underscores) = $copyright_file =~ /(_+)c_/; $first = "$prefix${underscores}_0.htm"; # $first = "$prefix${underscores}_p.htm" unless -f $first; $first = "$prefix${underscores}_1.htm" unless -f $first; open(OUT, ">${prefix}_toc.htm") or die "Unable to write ${prefix}_toc.htm\n"; print OUT < $book_title

Back | Next

$book_title

Table of Contents

EOT my $at_top = 1; my $next = $first; while (defined($next) && $next !~ /_c_\./) { my $fn = $next; if (!open(IN, $fn)) { print "Unable to open $fn\n"; unlink("${prefix}_toc.htm"); return; } $_ = ; close(IN); ($next) = m%Next%; &cleanPage; if (($part,$chapt) = m%

(.*?)

.{0,384}?]*>(.*?)%is) { $part =~ s/^\s*
\s*//i; print OUT '
' unless $at_top; print OUT qq|$part

\n|; } elsif (($chapt) = m%

Preface

.*?]*>(.*?)%is and $chapt !~ /\bpostscript\b/i) { $chapt = "$chapt (w/Preface)"; } else { ($chapt) = m%

(.*?)

%is or ($chapt) = m%

(chapter .*?)

%is or ($chapt) = m%

(.*?)

%is or ($chapt) = m%

(.*?)

%is; } $chapt =~ s/<.*?>/ /g; print OUT qq|$chapt
\n|; $at_top = 0; } if ($unreleased_chapters) { my $s = $unreleased_chapters == 1? '' : 's'; print OUT "
Plus $unreleased_chapters unreleased chapter$s
\n"; } print OUT <

Back | Next EOT close(OUT); } if (!open(IN, "${prefix}_toc.htm")) { print "Unable to open ${prefix}_toc.htm -- skipping\n"; return; } $_ = ; close(IN); ($book_title) = m%(.*?)%is; $book_title =~ s/<.*?>/ /g; $book_title =~ s/\s+/ /g; $book_title =~ s/^\s+//; $book_title =~ s/\s+$//; ($author) = /; close(IN); ($next) = m%Next%; &cleanPage; &tweakPunctuation unless $opt_l; s%%%ig; if (/]*src="?(\d+X*_+m[_1])\.jpg/) { my($map,$suf,$cnt,$max,$postfix); $map = $1; if ($map =~ /_m1$/) { ($max) = /.*]*src="?\d+X*_+m(\d)\.jpg/s; } else { $max = 1; } for ($cnt = 1; $cnt <= $max; $cnt++) { $map =~ s/_m\d$/_m$cnt/; $postfix = $max > 1? " #$cnt" : ''; &makeMapGifs("$map.jpg","$map.gif") if $opt_g; $suf = -f "$map.gif"? '.gif' : '.jpg'; s%]*src="?$map\.jpg[^>]*>%\n%; $menumark_map_html .= qq|Map$postfix
\n|; if (-f "big$map$suf") { $_ .= qq|


\n|; $menumark_map_html .= qq|Big Map$postfix
\n|; } } } open(OUT, ">$safetitle.htm") or die "Unable to write $safetitle.htm\n"; print OUT < $book_title


EOT $toc, qq|
|, $_, "


\n"; # OK! We're finally ready to run through the files, reformat # them, and append the text onto our single HTML file. while (defined($next) && $next !~ /_c_\./) { $fn = $next; ($fn_prefix) = $fn =~ /([^.]+)/; open(IN, $fn) or die "Unable to open $fn\n"; $_ = ; close(IN); ($next) = m%Next%; next if $fn =~ /_toc\./; &cleanPage; &tweakPunctuation unless $opt_l; &changeParagraphs; print OUT qq|\n|, $_; } close(OUT); # The "menumark*" file allows us to generate links in the "Go To" menu. open(OUT, ">menumark_$safetitle.htm") or die "Unable to write menumark_$safetitle.htm\n"; print OUT < MenuMark

Contents
Book Details
$menumark_map_html

EOT close(OUT); } sub cleanPage { s%^[ \t]+%%mg; s%.*?%%igs; s%<\?xml [^>]*>\n?%%; s%]*>\n?%%; s%\r%%g; s%]*src *= *"?$prefix\.jpg[^>]*>(\s*
)?%%i; s%(<[^>]*\S) +/?>%$1>%g; s%]+>%%ig; s%%%ig; s%.*]*)?>%%is; s%.*%%is; s%]+>(.*?)%$1%ig; s%]+>(Back|Next|Framed|Contents)( *\| *)?%%ig; s%(]*)`/$1\x0F/; 1 while s/(<[^>]*)"/$1\x0E/; 1 while s/(<[^>]*)'/$1\x0D/; 1 while s/(<[^>]*)&/$1\x0C/; 1 while s/(<[^>]*)-/$1\x0B/; # Make regex matching easier and the final file size smaller. s/"?/"/g; s/…?/\x85/g; s/‘?/\x91/g; s/’?/\x92/g; s/“?/\x93/g; s/”?/\x94/g; s/—?/\x97/g; s/ ?/\xA0/g; # We make our life easier by putting each paragraph on a single line. s%\n+([^<])% $1%g; # Now, transform the blah ASCII into more interesting punctuation. s%`%\x91%g; s%'%\x92%g; s%\.\s*\.\s*\.%\x85%gs; s%\x85(\s*[.!?]+["\x92\x94]+)%\x85$1%igs; s%[-\x96]{2,}%\x97%g; s%[-\x96\x97]([."\x92\x94]+)%\x97$1%ig; s%%%ig; s%"([^"\x93\x94\n]+)"%\x93$1\x94%g; s%(^|[\xA0\s])((?:<[^>]*>)*)"%$1$2\x93%gm; s%(\x93[^\x94"\n]+)["\x93]%$1\x94%g; s%\s*["\x94]((?:<[^>]*>)*;?)([\s)\xA0]|$)%\x94$1$2%gm; # Unmangle any obfuscated HTML tags. 1 while s/(<[^>]*)\x0F/$1`/; 1 while s/(<[^>]*)\x0E/$1"/; 1 while s/(<[^>]*)\x0D/$1'/; 1 while s/(<[^>]*)\x0C/$1&/; 1 while s/(<[^>]*)\x0B/$1-/; if ($opt_e) { # Turn enhanced single-character punctuation into HTML entities. s/\x85/…/g; s/\x91/‘/g; s/\x92/’/g; s/\x93/“/g; s/\x94/”/g; s/\x97/—/g; s/\xA0/ /g; } } sub changeParagraphs { if (!s%<(?:hr|/table)>.*%
\n\n%is) { s%.*?]*>.*?\n?%%s; s%(
\s*)?]*>.*%
\n\n%s; } s%.*]*>%%is; s%(]+)?>%$1P>%ig; s%

%

%g; # Prepare for trickery below s%
\n*(.*?[^\n])\n*
%

$1

%ig; s%\n?

]*>\s*\xA0*(\*+\s*)*(

)?\s*\n%\n%igs; s%

]*>%$PARASTART$INDENT%igs; s%(

.*?%$PARAEND%g; # No 'i' on purpose (see above) s%\s*\n($PARASTART)%$1\n%igo; s%(\n?){2,}%\n%g; s%(\n?)%$1%ig; s%(\n?<(h[1-6]|blockquote))%$1%ig; s%()\n?
\n?%$1\n%ig; s%$PARASTART\s*$INDENT(\s+<\1>%
%g; # This is for a bogus end of paragraph before a period. s%(\w)$PARASTART\s*$INDENT\.%$1.%igo; # This is for a bogus quote placement in early Minds, Machines, and Evol. s%\n\x93$PARASTART$INDENT%\x94$PARASTART\n$INDENT%igo; s%()
%$1%ig; s%

\s*$PARASTART\s*(


)%

$1%igso; s%

%%ig; s%(\n?
)?%$STAR_LINE%g; s%^%

%; s/\xA0/ /g if $opt_e; } sub makeCoverGif { my($prefix) = @_; if ($useImageMagick) { my $image = Image::Magick->new; my $w = $image->Read("$prefix.jpg"); warn $w if $w; $image->Scale(geometry=>'312x472'); $image->Set(magick=>'GIF'); $w = $image->Write("$prefix.gif"); warn $w if $w; undef $image; 1; ##?? } else { system "djpeg -grayscale $prefix.jpg |" . "pnmscale -xysize 312 472 |" . "pgmtopbm -fs |" . "pnmdepth 1 |" . "ppmtogif >$prefix.gif"; $? == 0; } } sub makeMapGifs { my($jpg,$gif) = @_; if ($useImageMagick) { my $image = Image::Magick->new; my $w = $image->Read($jpg); warn $w if $w; $image->Set(magick=>'GIF'); $w = $image->Write($gif); warn $w if $w; undef @$image; $w = $image->Read($jpg); warn $w if $w; my($width, $height) = $image->Get('width', 'height'); # $image->Zoom(width=>$width*2, height=>$height*2,filter=>'Gaussian',blur=>0.1); $image->Scale(width=>$width*2, height=>$height*2); $image->Set(magick=>'GIF'); $w = $image->Write("big$gif"); warn $w if $w; undef $image; 1; ##?? } else { # Make the regular sized map image. system "djpeg -grayscale $jpg |" . "pgmenhance |" . "pgmtopbm -fs |" . "pnmdepth 1 |" . "ppmtogif >$gif"; # Make the large map image. system "djpeg -grayscale $jpg |" . "pnmenlarge 2 |" . "pnmsmooth |" . "pgmenhance |" . "pgmtopbm -fs |" . "pnmdepth 1 |" . "ppmtogif >big$gif"; $? == 0; } } sub usage { die <