#!/bin/bash # # script to convert (almost) any plain ascii - text to a TeX format # converts most basic characters and does: # - apply a header # - apply commands at the end # - convert special characters # # Usage: totex [-tgeuopxh] # # where: -t means to start TeX as well # g means: document in German # e means: document in English # u means: document in US-English # o means: OriginalTeX (all functions of german.tex and # umlaut.tex are switched off) # p means: enable/disable page numbers # x means: plain TeX (german.tex and umlaut.tex are # not required) # h means: display help # # Output is to a new file 'file-to-convert.tex' # # # Format of the input file: # # totex is designed to accept a wide variety of input files. However, input # must be a text file with no special formatting characters added. # There is a very limited range of options to take influence on the final # format through the input file: # - a dot `.' as the first character on a line (optionally # preceded by blanks) forces a new alinea while continuing # with the same item{} # - embracing a ~word~ with a pair of tilde characters # or with two *asterisk* will set it in italic # ##################################################### # # Part for User Configuration # ##################################################### # # values set in inches: # -------------------- PAGE_HEIGHT=8.9 PAGE_WIDTH=6.5 # # values set in pt # ---------------- # what is the basic line spacing? LINE_HEIGHT=15 # how much additional spacing for each paragraph? PARA_SKIP=7 # additional spacing for new alineas in numbered items ALIN_SKIP=2 MORE_SKIP=15 # amount of indentation for each new paragraph PARA_INDENT=20 # how much additional space before each title? TITLESKIP=0.2 # # select language default (overridden by command line options) # uncomment one of these: #LANGUAGE=USenglish #LANGUAGE=english LANGUAGE=german # # Do you want the pages numbered at the bottom? # (default to be overridden by command line option) PAGENUMBERS=yes #PAGENUMBERS=no # # if you have problems with lines beginning like "30. November" and # these are falsely treated as numbered items then set to "no". # normally "yes" NUMBERED_ITEMS=yes # # should the script start a new line, when it finds that the first word of # the line is followed by a colon [:] (its nice for chatroom listings!) COLON_NEWLINE=yes # # select your fonts. Remember: double backslash is needed to make one # backslash NORMAL_FONT='cmr10 scaled \\magstep 1' BOLD___FONT='cmb10 scaled \\magstep 1' TITLE__FONT='cmb10 scaled \\magstep 1' EMPHA__FONT='cmti10 scaled \\magstep 1' ##################################################### # ( End of user configuration ) ##################################################### VERSION="Ver. 1.1" OPTLETTERS="tgeuopxh" LINEMODE='\\obeylines\\parskip = 0 pt\\par\\noindent' TITLFONT='\\cmb' TITLETEX="\\\\vskip $TITLESKIP in\\\\goodbreak$TITLFONT" ALINEA_SKIP=$(($LINE_HEIGHT+$ALIN_SKIP)) NEW_ALINEA="\\\\hfill\\\\break\\\\vbox to $ALINEA_SKIP pt{ }\\\\indent" STAR_FILL='\\hfill\\break\\line{\\leaders\\hbox to 1em{\\hss*\\hss}\\hfill}' EQU__FILL='\\leaders\\hbox to 1 em{\\hss =\\hss}\\hfill' PENALTY='\\widowpenalty=1000 \\clubpenalty=1000 \\tolerance=10000' HOR_LINE='{\\par\\medskip\\hrule\\par\\medskip}' CTRSKIP="\\\\par\\\\vskip $ALIN_SKIP " MORSKIP="\\\\par\\\\vskip $MORE_SKIP " CENTERED=${CTRSKIP}"pt\\\\centerline{" # Check for valid options # sorry, XX is needed, otherwise -n and -e are not useable, # because they are recognized by 'echo' as option OPTIONS=`echo "XX$1" | sed -e "/^XX-[$OPTLETTERS][$OPTLETTERS]*/!d"` SOMETHI=`echo "XX$1" | sed -e '/^XX-/!d'` if [ "$SOMETHI" != "$OPTIONS" ]; then echo -n "Illegal option " echo "$SOMETHI" | sed -e "s/XX-/-/" exit 0 fi # The first Argument is a valid option; the second must be # the name of the file to process. if [ -n "$SOMETHI" ]; then ARGUMENT="$2" else ARGUMENT="$1" fi OPT_TEX=`echo $OPTIONS | sed -e "/t/!d"` OPT_GER=`echo $OPTIONS | sed -e "/g/!d"` OPT_ENG=`echo $OPTIONS | sed -e "/e/!d"` OPT_USE=`echo $OPTIONS | sed -e "/u/!d"` OPT_ORI=`echo $OPTIONS | sed -e "/o/!d"` OPT_PAG=`echo $OPTIONS | sed -e "/p/!d"` OPT_PLN=`echo $OPTIONS | sed -e "/x/!d"` OPT_HLP=`echo $OPTIONS | sed -e "/h/!d"` CMT="" QTONCHAR="\`\`" QTOFFCHAR="\'\'" if [ -n "$OPT_TEX" ]; then TEXFLAG="yes" else TEXFLAG="no" fi if [ -n "$OPT_GER" ]; then LANGUAGE="german" QTONCHAR="\"\`" QTOFFCHAR="\"\'" fi if [ -n "$OPT_ENG" ]; then LANGUAGE="english" fi if [ -n "$OPT_USE" ]; then LANGUAGE="USenglish" fi if [ -n "$OPT_ORI" ]; then SELORIGINAL="\\originalTeX" else SELORIGINAL="%\\originalTeX" fi if [ -n "$OPT_PLN" ]; then QTONCHAR="\`\`" QTOFFCHAR="\'\'" CMT="%" SELORIGINAL="%" IN_UML="%" IN_GER="%" IN_SEL="%" else IN_UML="$CMT\\\\input umlaut.tex" IN_GER="$CMT\\\\input german.tex" IN_SEL="$CMT\\\\selectlanguage{$LANGUAGE}" fi if [ -n "$OPT_HLP" ]; then echo "totex Shell Script $VERSION" echo "Convert ASCII-text to TeX format" echo "Usage: totex [-$OPTLETTERS] " echo "where: -t means: start TeX as well" echo " g means: document in German" echo " e means: document in English" echo " u means: document in US-English" echo " o means: OriginalTeX (all functions of german.tex" echo " are switched off)" echo " x means: plain TeX (no special characters in source text)" echo " overrides options -geuo" echo " german.tex and umlaut.tex are not required" echo " p means: enable/disable page numbers" echo " h means: display this help screen" exit 1 fi if [ -z "$ARGUMENT" ]; then echo "totex Shell Script $VERSION" echo "Convert ASCII-text to TeX format" echo "Usage: totex [-$OPTLETTERS] " echo " totex -h for help" exit 1 fi if ! [ -f "$ARGUMENT" ]; then echo "Input file $ARGUMENT not found" exit 1 fi TMPFILE="$ARGUMENT".tmp OUTFILE=`echo "$ARGUMENT" | sed -e "s/\\.txt$//"`.tex echo "Converting $ARGUMENT to $OUTFILE" if [ $PAGENUMBERS = yes ]; then if [ -n "$OPT_PAG" ]; then PAGENUMBERS="no" fi else if [ -n "$OPT_PAG" ]; then PAGENUMBERS="yes" fi fi if [ $PAGENUMBERS = yes ]; then PAGETEX='%\\nopagenumbers' echo "pagenumbers enabled" else PAGETEX='\\nopagenumbers' fi if [ $COLON_NEWLINE = yes ]; then START_NEWLINE='\\par\\noindent' else START_NEWLINE="" fi if [ $NUMBERED_ITEMS = yes ]; then NIS_SED='s/^[ ]*[1-9]\. /\\par\\vskip '$ALIN_SKIP' pt\\item{&} /' NIM_SED='s/^[ ]*[1-9][0-9]\. /\\par\\vskip '$ALIN_SKIP' pt\\item{&} /' else NIS_SED="" NIM_SED="" fi # - replace all $ by \$ # - preserve curled braces in the text # - deal with quotes # - replace special characters % & ~ # ^ < > « » ¢ cat $ARGUMENT | tr '\322\251' '\047\242' | sed \ -e "s/\\$/\\\\$/g" \ -e "s/{/\$\\\\{\$/g" \ -e "s/}/\$\\\\}\$/g" \ -e "s/^\"/&{\\\\qton}/" \ -e "s/[ ][ ]*[(]*\"/&{\\\\qton}/g" \ -e "s/=\"/&{\\\\qton}/g" \ -e "s/^(\"/&{\\\\qton}/" \ -e "s/\"[\\.,!;)]*[ ][ ]*/{\\\\qtoff}&/g" \ -e "s/\">/{\\\\qtoff}>/g" \ -e "s/\"[\\.,!;)>]*$/{\\\\qtoff}&/g" \ -e "s/%/\\\\%/g" \ -e "s/&/\\\\&/g" \ -e "s/\(~\)\([A-Z,a-z][A-Z,a-z]*\)\(~\)/{\\\\italic \2}/g" \ -e "s/~/\\\\~{}/g" \ -e "s/#/\\\\#/g" \ -e "s/\\^'/'/g" \ -e "s/\\^/\\\\^{}/g" \ -e "s//$>$/g" \ -e "s/«/\\\\flqq{}/g" \ -e "s/»/\\\\frqq{}/g" \ -e "s/¢/\\\\copyright{}/g" \ -e "s/^[ ]*___*[ ]*$/\\ $HOR_LINE/" \ -e "s/^ *[-_] [-_] [-_][-_ ]*$/\\ $HOR_LINE/" \ -e "s/_/\\\\_{}/g" \ -e "s/^[ ]*---*[ ]*$/\\ $HOR_LINE/" \ -e "s/\\.\\.\\.\\.*[ ]*/{\\\\dots} /g" \ -e "s/\\.\\.\\.[ ][ ]*/{\\\\dots} /g" \ -e "s/\\.\\.[ ][ ]*/{\\\\dots} /g" \ -e "s/====*/\\\\equfill /g" \ -e "s/^[ ]*\\./\\\\newalinea{}/" \ -e "s/\(\\*\)\([A-Za-z][a-z]*\)\(\\*\)/{\\\\italic \2}/g" \ > $TMPFILE # # This is all the stuff to be included at the top # of the TeX - file: cat $TMPFILE | sed \ -e "1 i\\" \ -e "% TeX file generated by totex $VERSION\\" \ -e "\\\\font\\\\cm=$NORMAL_FONT\\" \ -e "\\\\font\\\\cmb=$TITLE__FONT\\" \ -e "\\\\font\\\\bold=$BOLD___FONT\\" \ -e "\\\\font\\\\italic=$EMPHA__FONT\\" \ -e "\\\\vsize = $PAGE_HEIGHT in\\" \ -e "\\\\hsize = $PAGE_WIDTH in\\" \ -e "\\\\baselineskip = $LINE_HEIGHT pt\\" \ -e "\\\\parskip = $PARA_SKIP pt\\" \ -e "\\\\parindent = $PARA_INDENT pt\\" \ -e "$PAGETEX\\" \ -e "\\\\raggedbottom\\" \ -e "$PENALTY\\" \ -e "$IN_UML\\" \ -e "$IN_GER\\" \ -e "$IN_SEL\\" \ -e "$SELORIGINAL\\" \ -e "\\\\def\\\\qton{$QTONCHAR}\\" \ -e "\\\\def\\\\qtoff{$QTOFFCHAR}\\" \ -e "\\\\def\\\\newalinea{$NEW_ALINEA}\\" \ -e "\\\\def\\\\equfill{$EQU__FILL}\\" \ -e "\\\\cm\\ " \ > $OUTFILE # Where is the beginning of a new line? # Isolate single lines from EMail; # the last is to try to catch alineas indicated with * or - and # numbered items cat $OUTFILE | sed \ -e "s/^[ ]*From:.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^[ ]*From .*@.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^[ ]*To:.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^[ ]*Date:.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^[ ]*Subject:.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^[ ]*Host:.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^[ ]*Newsgroups:.*$/{\\\\par\\\\noindent &}/g" \ -e "s/^[ ]*[A-Za-z0-9][-A-Za-z0-9]*:[ ]/{$START_NEWLINE &}/" \ -e "s/^[ ]*[A-Za-z0-9][A-Z ]*:[ ]/{$START_NEWLINE &}/" \ -e "s/^[ ]*[0-9]*\\.[0-9][0-9]*[ -]/\\\\par\\\\noindent &/" \ -e "s/^ *[+] .*/{$LINEMODE &\\\\par}/" \ -e "s/^\\\\[%#].*/{$LINEMODE &\\\\par}/" \ -e "s/^[ ]*([a-z]) /\\\\par &/" \ -e "s/^[ ]*([1-9])/\\\\par &/" \ -e "s/^[ ]*([1-9][0-9])/\\\\par &/" \ -e "s/^\\$>\\$.*$/{$LINEMODE &\\\\par}/g" \ -e "s/^ *\\$<\\$.*$/{$LINEMODE &\\\\par}/g" \ -e "s/\"{\\\\qton}/{\\\\qton}/g" \ -e "s/{\\\\qtoff}\"/{\\\\qtoff}/g" \ -e "s/^[ ]*\\*\\**[ ]*$/$STAR_FILL/" \ -e "s/^[ ]*\\*/ \\\\item{\\$\\\\bullet\\$} /" \ -e "s/^[ ]*-[ ][ ]*/ \\\\item{-} /" \ -e "s/^[ ]*[A-Z0-9,()][A-Z0-9,() ][A-Z0-9,'?!\\.() -]*$/$CENTERED&}/" \ -e "s/\\\\centerline{[ ]*/\\\\centerline{/g" \ -e "s/\\[LINK\\]//g" \ -e "s/\\[INLINE\\]//g" \ -e "s/^[ , ]*$//" \ -e "$ a\\" \ -e "\\\\vfill\\" \ -e "\\\\eject\\" \ -e "\\\\end" \ > $TMPFILE # # In this section the whole file is taken to the hold buffer # as *one single line* to look for titles and new paragraphs cat $TMPFILE | sed -n \ -e "1,$ H" \ -e "$ g" \ -e "s/\\n\\n[ ]*[1-9][0-9]\\./&@@@/g" \ -e "s/\\n\\n[ ]*[1-9]\\./&@@@/g" \ -e "s/\\n\\n[ ]*[A-ZÄÖÜ][A-Za-zÄ-ü0-9 '@+=,-]*/&@+@/g" \ -e "s/[A-ZÄÖÜ][A-Za-zÄ-ü0-9 '@+=,-]*\\n\\n/@+@&/g" \ -e "s/\\n\\n\\\\newalinea/\ \\\\newalinea/g" \ -e "s/\\n\\n$CTRSKIP/\\ \\ $MORSKIP/g" \ -e "$ p" \ > $OUTFILE cat $OUTFILE | sed \ -e "s/@+@..*@+@$/{$TITLETEX &}/" \ -e "s/^[ ]*(ii*) */\\\\par &/" \ -e "s/^[ ]*(i*vi*) */\\\\par &/" \ -e "s/^[ ]*(i*xi*) */\\\\par &/" \ -e "s/^[XVI][XVI]*\\. .*$/{$TITLETEX &}/" \ -e "s/^[ ]*[a-z]\\. */\\\\par\\\\item{&} /" \ -e "s/^[ ]*[1-9][0-9]\\.@@@/\\\\par\\\\vskip $ALIN_SKIP pt\\\\item{&} /" \ -e "s/^[ ]*[1-9]\\.@@@/\\\\par\\\\vskip $ALIN_SKIP pt\\\\item{&} /" \ -e "$NIS_SED" \ -e "$NIM_SED" \ -e "s/@+@//g" \ -e "s/@@@//g" \ -e "s/\\\\item{[ ]*/\\\\item{/" \ -e "s/\\\\item{[0-9]*\\.[ ]*}/&@+@/" \ -e "s/[ ]*}@+@/}/" \ -e "s/\\\\par [ ]*/\\\\par /g" \ > $TMPFILE cp $TMPFILE $OUTFILE if [ "$TEXFLAG" = "yes" ]; then tex $OUTFILE fi rm $TMPFILE echo "done" exit 0