#!/bin/sh # # mommsen.sh # # This shell script converts HTML source of Theodor Mommsen's # 'Roman History' into printable TeX documents. # # Author: Thomas Spahni # Version: 1.0 # Date: 2008-04-18 # Copyright: public domain # # Download all chapters found at # http://gutenberg.spiegel.de/?id=19&autorid=422&autor_vorname=+Theodor&autor_nachname=Mommsen&cHash=b31bbae2c6 # (printable version) into subdirectories namend 'Buch_1', 'Buch_2' ... 'Buch_8' # and then run this script from the parent directory. # # Usage: # prompt> ./mommsen.sh # prompt> ./mommsen.sh clean # # Requires: # TeX with Babel hyphenation for 'german' # i.e. your /etc/texmf/web2c/fmtutil.cnf should contain a line like: # tex tex language.dat -translate-file=cp227.tcx bplain.ini TEXPROG="/usr/bin/tex" # Adjustable values for TeX: # Tolerance limits how much a line may be stretched # 200 is the TeX default # 310 works good, but fails for greek words which can not be # automatically hyphenated. For this reason tolerance is # set to 5000 for footnotes in book.sty # 10000 disables the limit TOLERANCE=310 # Book 8 has an appendix with 10 charts. Including them makes # a huge .ps file. If you don't want this disable inclusion here. CHARTS=yes # CHARTS=no # Generate PDF as well? # Charts (if enabled) will be ignored for PDF documents #PDF=yes PDF=yes PDFTEXPROG="/usr/bin/pdftex" ################################################################ ## End of user configuration ################################################################ BOOKS="Buch_1 Buch_2 Buch_3 Buch_4 Buch_5 Buch_8" if test "$1" == "clean" ; then for TARGET in $BOOKS ; do rm -f $TARGET.tex rm -f $TARGET.dvi rm -f $TARGET.ps rm -f $TARGET.pdf rm rm -f $TARGET.log done rm -f *~ exit 0 fi # Function: conversion of greek letters and encoded signs greek() { echo "$1" | sed \ -e 's/Α/$A$/g' -e 's/α/$\\alpha $/g' \ -e 's/Β/$B$/g' -e 's/β/$\\beta $/g' \ -e 's/Γ/$\\Gamma $/g' -e 's/γ/$\\gamma $/g' \ -e 's/Δ/$\\Delta $/g' -e 's/δ/$\\delta $/g' \ -e 's/Ε/$E$/g' -e 's/ε/$\\epsilon $/g' \ -e 's/Ζ/$Z$/g' -e 's/ζ/$\\zeta $/g' \ -e 's/Η/$H$/g' -e 's/η/$\\eta $/g' -e 's/Ή/$\\eta $/g' \ -e 's/Θ/$\\Theta $/g' -e 's/θ/$\\theta $/g' \ -e 's/Ι/$I$/g' -e 's/ι/$\\iota $/g' \ -e 's/Κ/$K$/g' -e 's/κ/$\\kappa $/g' \ -e 's/Λ/$\\Lambda $/g' -e 's/λ/$\\lambda $/g' \ -e 's/Μ/$M$/g' -e 's/μ/$\\mu $/g' \ -e 's/Ν/$N$/g' -e 's/ν/$\\nu $/g' \ -e 's/Ξ/$\\Xi $/g' -e 's/ξ/$\\xi $/g' \ -e 's/Ο/$O$/g' -e 's/ο/$o$/g' \ -e 's/Π/$\\Pi $/g' -e 's/π/$\\pi $/g' \ -e 's/Ρ/$P$/g' -e 's/ρ/$\\rho $/g' \ -e 's/Σ/$\\Sigma $/g' -e 's/ς/$\\varsigma $/g' \ -e 's/σ/$\\sigma $/g' \ -e 's/Τ/$T$/g' -e 's/τ/$\\tau $/g' \ -e 's/Υ/$Y$/g' -e 's/υ/$\\upsilon $/g' \ -e 's/Φ/$\\Phi $/g' -e 's/φ/$\\phi $/g' \ -e 's/Χ/$X$/g' -e 's/χ/$\\chi $/g' \ -e 's/Ψ/$\\Psi $/g' -e 's/ψ/$\\psi $/g' \ -e 's/Ω/$\\Omega $/g' -e 's/ω/$\\omega $/g' \ -e 's/ύ/$\\upsilon $/g' \ -e 's/ϑ/$\\vartheta $/g' \ -e 's/ϒ/$\\Upsilon $/g' \ -e 's/ϖ/$\\varpi $/g' \ -e 's/ά/$\\alpha $/g' \ -e 's/έ/$\\varepsilon $/g' \ -e 's/ή/$\\eta $/g' \ -e 's/ί/$\\iota $/g' \ -e 's/ό/$o$/g' \ -e 's/ύ/$\\upsilon $/g' \ -e 's/ώ/$\\omega $/g' \ -e 's/\$\$//g' \ -e 's/ā/\\=a/g' \ -e 's/ă/\\u{a}/g' \ -e 's/č/\\=c/g' \ -e 's/Đ/-\\kern-5pt D/g' \ -e 's/ē/\\=e/g' \ -e 's/ĕ/\\=e/g' \ -e 's/ī/\\={\\i}/g' \ -e 's/ĭ/\\u{\\i}/g' \ -e 's/ň/\\~n/g' \ -e 's/ō/\\=o/g' \ -e 's/ŏ/\\=o/g' \ -e 's/ū/\\=u/g' \ -e 's/ŭ/\\u{u}/g' \ -e "s/΄/$'$/g" \ -e "s/Ά/$'A$/g" \ -e "s/Έ/$'E$/g" \ -e "s/Ί/$'I$/g" \ -e 's/⅜/$3\\over 8$/g' \ -e 's/І/{\\tt |}/g' \ -e 's/–/--/g' \ -e "s/“/''/g" \ -e "s/”/''/g" \ -e 's/‘/`/g' \ -e 's/†/\\dag{}/g' } TAB=' ' # rename some files for easier handling for BUCH in $BOOKS ; do #echo $BUCH if test -f "$BUCH/Vorrede zu der zweiten Auflage.html" ; then mv "$BUCH/Vorrede zu der zweiten Auflage.html" \ "$BUCH/00.Vorrede-2.Aufl.html" fi if test -f "$BUCH/Einleitung.html" ; then mv "$BUCH/Einleitung.html" "$BUCH/00.Einleitung.html" fi for KAPITEL in $BUCH/*Kapitel.html ; do NEWNAME=$(basename "$KAPITEL" | sed \ -e "s/^[1-9]\\./0&/" \ -e "s/ //g") if ! test "$KAPITEL" == "$BUCH/$NEWNAME" ; then mv "$KAPITEL" $BUCH/$NEWNAME fi done done # define header for TeX-files HEADER="\\input book.sty \\input epsf.tex \\def\\subtitel#1{\\vskip 0.3in \\goodbreak\\noindent {\\bf #1}\\bigskip} \\hfuzz 1pt \\tolerance $TOLERANCE \\overfullrule=0pt" FOOTER='\vfill\eject' # Translate all texts for BUCH in $BOOKS ; do echo "-----------------------------------------------------------------------" echo "$BUCH" # Header for a Book case $BUCH in Buch_1) TBAND="Erster Band"; TBUCH="Erstes Buch" TITEL="\\centerline{Bis zur Abschaffung des römischen Königtums}" ;; Buch_2) TBAND="Erster Band"; TBUCH="Zweites Buch" TITEL="\\centerline{Von der Abschaffung des römischen Königtums}\\centerline{bis zur Einigung Italiens}" ;; Buch_3) TBAND="Erster Band"; TBUCH="Drittes Buch" TITEL="\\centerline{Von der Einigung Italiens bis auf die}\\centerline{Unterwerfung Karthagos und der Griechischen Staaten}" ;; Buch_4) TBAND="Zweiter Band"; TBUCH="Viertes Buch" TITEL="\\centerline{Die Revolution}" ;; Buch_5) TBAND="Dritter Band"; TBUCH="Fünftes Buch" TITEL="\\centerline{Die Begründung der Militärmonarchie}" ;; Buch_8) TBAND="Fünfter Band"; TBUCH="Achtes Buch" TITEL="\\centerline{Länder und Leute von Caesar bis Diocletian}" ;; esac OUTFILE=$(echo $BUCH | sed -e 's/$/.tex/') DVIFILE=$(echo $BUCH | sed -e 's/$/.dvi/') PSFILE=$(echo $BUCH | sed -e 's/$/.ps/') # Add title for each book echo "$HEADER" > $OUTFILE echo '\centerline{\cmhkf Theodor Mommsen} \vskip 0.2in' >> $OUTFILE echo '\hfil\epsfbox{t.mommsen.ps}\hfil\par\vskip 0.5in' >> $OUTFILE echo '\centerline{\cmbf Römische Geschichte} \medskip' >> $OUTFILE echo "\\centerline{\\cmrf $TBAND} \\medskip" >> $OUTFILE echo "\\centerline{\\cmrf $TBUCH} \\bigskip" >> $OUTFILE echo "{\\cmrkf $TITEL}" >> $OUTFILE # Process chapters for this book for HTMLFILE in $BUCH/*.html ; do # Preprocessing of the source # use to correct coding errors in the source HTMLCORR=$(cat $HTMLFILE | sed \ -e 's///g' \ -e 's/<\/sub>/<\/sup>/g' \ -e 's///' \ -e 's/Lektorat: Bild des Stammbaums fehlt!!!<\/span>/(Bild fehlt)/' \ -e 's/2b2-267/262-267/g' \ -e 's/Mithradates den Claudius im Jahre 41/Mithradates, den Claudius im Jahre 41/' \ -e 's/der, \.fast/der, fast/' \ -e 's/2s<\/i> Denar/2\/3<\/i> Denar/g' \ -e 's/Ge lehrte aller Art/Gelehrte aller Art/') # preprocessing of footnotes # extract footnotes one per line # some are missing '-------' at the end # ex.: vorbereitet haben.
2 Die Verteidigung # no tables in fn # mask '&' for subsequent insertion with sed FN=$(echo "$HTMLCORR" | tr -d '\012' | sed \ -e 's/
------*<\/p>
/ÿ /ÿ ------*<\/p>/ÿ/g' | tr 'ÿ' '\012' | sed \
-e '/^ //g' \
-e 's/<\/p><\/td>/$\\\\quad$/g' \
-e 's/<\/tr>//g' \
-e 's/<\/table> / / ------*<\\/p> *$FNUM/ ------*<\/p>//' \
-e "sþ * *$FNUM<\\/sup>þ$FNTEXT<\\/span>þ")
HTMLRAW="$HTMLTMP"
done
fi
HTML=$(echo "$HTMLRAW" | tr 'ÿ' '\012')
TXTPRE=$(echo "$HTML" | sed \
-e 's/{/$\\lbrace$/g' \
-e 's/}/$\\rbrace$/g' \
-e "s/^ *//" \
-e "s/ /ÿ&/g" \
-e "s/ÿ]*>//' \
-e 's/
/
/g' \
-e 's/]*>
/' \
-e 's/<\/p>$//' \
-e 's/<\/p>
/g' \
-e 's/ */ /g' \
-e 's/^ *//' \
-e 's/&/\\\&/g')
#if test $HTMLFILE == "Buch_3/12.Kapitel.html" ; then
#echo "$FN" ; exit
#fi
# List of numbers for all footnotes
FNUMBERS=$(echo "$HTMLCORR" | tr -d '\012' | sed \
-e 's/[1-9][0-9]*<\/sup>/ÿ&ÿ/g' | tr 'ÿ' '\012' | sed \
-e '/^[1-9][0-9]*<\/sup>/ !d' \
-e 's///' \
-e 's/<\/sup>//' | sort -n -r -u)
HTMLRAW=$(echo "$HTMLCORR" | sed -e 's/$/ÿ/' | tr -d '\012')
if test -n "$FNUMBERS" ; then
for FNUM in $FNUMBERS ; do
FNTEXT=$(echo "$FN" | sed -e "$FNUM !d")
# remove text of footnote
# insert footnote at the right place
HTMLTMP=$(echo "$HTMLRAW" | sed \
-e "s/
$TAB*/
/g" \
-e 's/;-&/; - \&/g' \
-e "s/<[^\\/]/ÿ&/g" \
-e "s/ÿ