# Conversion of a M$word file (tested on the Czech Clean Air Act draft) # to a html form, then made smaller and cleaner wvHtml $1 $1.tmp2 # utf8 je sice univerzální kód, ale trochu neúsporný # a těžko čitelný při editaci, proto konverze do il2: tcs -f utf -t 8859-2 $1.tmp2 >$1.tmp # html code to a TeX one, correct information, deleting nonsense infomation, # approximating various double quotes: sed "s/&\(.\)caron;/\\\v{\1}/g ; \ s/// ; \ s/=UTF-8/=iso-8859-2/ ; \ s/line-height: \(.*\)\"/\"/ ; s/cols=\"\(.*\)>/>/ ; \ s/„/,,/g ; s/“/\`\`/g ; s/”/\'\'/g " \ $1.tmp >$1.tmp2 # to convert \v{s} to il2 š (cstocs is a part of CSTeX): cstocs tex il2 $1.tmp2 >$1.tmp # tidy is an indispensable tool for getting an editable html # from a wild-looking source: tidy -raw -c -o -f $1.errf $1.tmp >$1.tmp2 rm $1.tmp # just to get rid of a long free space right from the text of the law: sed s/44.31mm/0mm/ $1.tmp2 >$2 rm $1.tmp2 # prevence radkovych zlomu za jednopismennymi predlozkami a spojkami: vlnka2 $2 h