#!/bin/zsh

function filtre1()
{
for i in `ls TMPMS`; do
    #perl -pe 'BEGIN { "\x0a" } s/~\x0a//g; s/\x0a/ /g; s/^$/\\\//g; s/\[.*?\]//g' TMPMS/$i| tr '\n' ' '
perl -pe 's/\[.*?\]//g; s/§//g; s/^$/§/g; s/^@@.*@//g; s/\x0a/ /g; s/~ //g; s/\xe6//g; s/\xa7//g; s/\xa3/#/g; s/\xb0/°/g; s/\xc2//g' TMPMS/$i | sed "s/§§/§/g;
s/§§/§/g; 
s/ ~/~/g
s/\xb0/°/g;
s/A/^a/g;
s/B/^b/g;
s/C/^c/g;
s/D/^d/g;
s/E/^e/g;
s/F/^f/g;
s/G/^g/g;
s/H/^h/g;
s/I/^i/g;
s/J/^j/g;
s/K/^k/g;
s/L/^l/g;
s/M/^m/g;
s/N/^n/g;
s/O/^o/g;
s/P/^p/g;
s/Q/^q/g;
s/R/^r/g;
s/S/^s/g;
s/T/^t/g;
s/U/^u/g;
s/V/^v/g;
s/X/^x/g;
s/Y/^y/g;
s/Z/^z/g;
s/ae/e/g; 
s/oe/e/g;
s/dc/cc/g; 
s/df/ff/g; 
s/dg/gg/g; 
s/dl/ll/g; 
s/dm/mm/g; 
s/dn/nn/g; 
s/dp/pp/g; 
s/dr/rr/g; 
s/ds/ss/g; 
s/dt/tt/g; 
s/nm/mm/g; 
s/nb/mb/g; 
s/np/mp/g; 
s/nr/rr/g; 
s/bp/pp/g; 
s/cq/dq/g; 
s/y/i/g; 
s/ph/f/g;
s/h°/h#oc/g; 
s/^h//g;
s/h//g;
s/tia/cia/g; 
s/tie/cie/g; 
s/tii/cii/g; 
s/tio/cio/g; 
s/tiu/ciu/g; 
s/  / /g; 
s/  / /g; 
s/q-/q\#ue/g; 
s/2/\#ur/g; 
s/p|/p\#ro/g; 
s/p-/p\#rae/g;
s/\xc3\xa6/\#\xc3\xa6/g;  
s/'/\#us/g; 
s/q\"/q\#ua/g; 
s/p\"/p\#ra/g; 
s/\"/\#er/g; 
s/\&/\#et/g; 
s/q+/q\#ue/g; 
s/b+/b\#us/g; 
s/s+/s\#ed/g; 
s/ s- / s\#unt /g; 
s/ c- / c\#u- /g; 
s/c-/co-/g; 
s/qm-/q\#uo\#niam/g; 
s/qnm-/q\#uoniam/g; 
s/n-/no-/g; 
s/p_/p\#er/g; 
s/\xc3\xa6/\#\xc3\xa6/g; 
s/q\`/q\#ui/g; 
s/aut-/aut\#em/g; 
s/au-/au\#tem/g; 
s/t-c/t\#unc/g; 
s/n-c/n\#unc/g; 
s/m-/m\#e-/g; 
s/q°/q\#uo/g; 
s/ %/ \#est/g; 
s/p\`/p\#ri/g; 
s/t\`/t\#ri/g; 
s/o=i/o\#mni/g; 
s/o-i/o\#mni/g; 
s/e=e/e\#sse/g; 
s/r-t/r\#unt/g; 
s/s-t/s\#unt/g; 
s/s- /s\#unt /g; 
s/=/\#er/g; 
s/ e- / \#est /g; 
s/q</q\#uia/g; 
s/7-/e\#t\#iam/g; 
s/7/e\#t/g; 
s/t°/t\#ro/g; 
s/u°/u\#ero/g; 
s/v°/u\#ero/g; 
s/v\xb0/u\#ero/g; 
s/r%/r\#um/g; 
s/r+/r\#um/g; 
s/uer-/uer\#u-/g; 
s/ver-/uer\#u-/g; 
s/9/c\#on/g; 
s/it-/it\#e-/g;
s/\æ/@/g; 
s/\xe6/@/g; 
s/£/\#/g; 
s/\xa3/\#/g"|cat > TMPMS/$i.bis
rm -Rf TMPMS/$i
mv TMPMS/$i.bis TMPMS/$i
echo "\n" >> TMPMS/$i
done
}

function filtre2()
{
for i in `ls TMPMS`; do
    #perl -pe 'BEGIN { "\x0a" } s/~\x0a//g; s/\x0a/ /g; s/^$/\\\//g; s/\[.*?\]//g' TMPMS/$i| tr '\n' ' '
perl -pe 's/\[.*?\]//g; s/§//g; s/^$/§/g; s/^@.*@//g; s/\x0a/ /g; s/~ //g; s/\xe6//g; s/\xa7//g; s/\xa3/#/g; s/\xb0/°/g; s/\xc2//g' TMPMS/$i | sed "s/§§/§/g;
s/§§/§/g; 
s/ ~/~/g
s/\xb0/°/g;
s/A/^a/g;
s/B/^b/g;
s/C/^c/g;
s/D/^d/g;
s/E/^e/g;
s/F/^f/g;
s/G/^g/g;
s/H/^h/g;
s/I/^i/g;
s/J/^j/g;
s/K/^k/g;
s/L/^l/g;
s/M/^m/g;
s/N/^n/g;
s/O/^o/g;
s/P/^p/g;
s/Q/^q/g;
s/R/^r/g;
s/S/^s/g;
s/T/^t/g;
s/U/^u/g;
s/V/^v/g;
s/X/^x/g;
s/Y/^y/g;
s/Z/^z/g;
s/  / /g; 
s/  / /g; 
s/h°/h#oc/g; 
s/q-/q\#ue/g; 
s/2/\#ur/g; 
s/p|/p\#ro/g; 
s/p-/p\#rae/g; 
s/'/\#us/g; 
s/q\"/q\#ua/g; 
s/p\"/p\#ra/g; 
s/\"/\#er/g; 
s/\&/\#et/g; 
s/q+/q\#ue/g; 
s/b+/b\#us/g; 
s/s+/s\#ed/g; 
s/ s- / s\#unt /g; 
s/ c- / c\#u- /g; 
s/c-/co-/g; 
s/qm-/q\#uo\#niam/g; 
s/qnm-/q\#uoniam/g; 
s/n-/no-/g; 
s/p_/p\#er/g; 
s/\xc3\xa6/\#\xc3\xa6/g; 
s/q\`/q\#ui/g; 
s/aut-/aut\#em/g; 
s/au-/au\#tem/g; 
s/t-c/t\#unc/g; 
s/n-c/n\#unc/g; 
s/q°/q\#uo/g; 
s/ %/ \#est/g; 
s/p\`/p\#ri/g; 
s/t\`/t\#ri/g; 
s/o=i/o\#mni/g; 
s/o-i/o\#mni/g; 
s/e=e/e\#sse/g; 
s/r-t/r\#unt/g; 
s/s-t/s\#unt/g; 
s/s- /s\#unt /g; 
s/=/\#er/g; 
s/ e- / e\#st /g; 
s/q</q\#uia/g; 
s/7-/e\#t\#iam/g; 
s/7/e\#t/g; 
s/t°/t\#ro/g; 
s/u°/u\#ero/g; 
s/v°/u\#ero/g; 
s/v\xb0/u\#ero/g; 
s/r%/\#rum/g; 
s/r+/\#rum/g; 
s/uer-/uer\#u-/g; 
s/ver-/uer\#u-/g; 
s/9/c\#on/g; 
s/it-/it\#e-/g;
s/\æ/@/g; 
s/\xe6/@/g; 
s/£/\#/g; 
s/\xa3/\#/g"|cat > TMPMS/$i.bis
rm -Rf TMPMS/$i
mv TMPMS/$i.bis TMPMS/$i
echo "\n" >> TMPMS/$i
done
}

function filtre3()
{
for i in `ls TMPMS`; do
    #perl -pe 'BEGIN { "\x0a" } s/~\x0a//g; s/\x0a/ /g; s/^$/\\\//g; s/\[.*?\]//g' TMPMS/$i| tr '\n' ' '
perl -pe 's/\[.*?\]//g; s/§//g; s/^$/§/g; s/^@.*@//g; s/\x0a/ /g; s/~ //g; s/\xe6//g; s/\xa7//g; s/\xa3//g; s/\xb0/°/g; s/\xc2//g' TMPMS/$i | sed "s/§§/§/g;
s/§§/§/g; 
s/ ~/~/g
s/\xb0/°/g;
s/A/a/g;
s/B/b/g;
s/C/c/g;
s/D/d/g;
s/E/e/g;
s/F/f/g;
s/G/g/g;
s/H/h/g;
s/I/i/g;
s/J/j/g;
s/K/k/g;
s/L/l/g;
s/M/m/g;
s/N/n/g;
s/O/o/g;
s/P/p/g;
s/Q/q/g;
s/R/r/g;
s/S/s/g;
s/T/t/g;
s/U/u/g;
s/V/v/g;
s/X/x/g;
s/Y/y/g;
s/Z/z/g;
s/ae/e/g; 
s/oe/e/g;
s/dc/cc/g; 
s/df/ff/g; 
s/dg/gg/g; 
s/dl/ll/g; 
s/dm/mm/g; 
s/dn/nn/g; 
s/dp/pp/g; 
s/dr/rr/g; 
s/ds/ss/g; 
s/dt/tt/g; 
s/nm/mm/g; 
s/nb/mb/g; 
s/np/mp/g; 
s/nr/rr/g; 
s/bp/pp/g; 
s/cq/dq/g; 
s/y/i/g; 
s/ph/f/g;
s/h°/hoc/g; 
s/h//g;
s/tia/cia/g; 
s/tie/cie/g; 
s/tii/cii/g; 
s/tio/cio/g; 
s/tiu/ciu/g; 
s/  / /g; 
s/  / /g; 
s/q-/que/g; 
s/2/ur/g; 
s/p|/pro/g; 
s/p-/prae/g; 
s/'/us/g; 
s/q\"/qua/g; 
s/p\"/pra/g; 
s/\"/er/g; 
s/\&/et/g; 
s/q+/que/g; 
s/b+/bus/g; 
s/s+/sed/g; 
s/ s- / sunt /g; 
s/ c- / cu- /g; 
s/c-/co-/g; 
s/qm-/quoniam/g; 
s/qnm-/quoniam/g; 
s/n-/no-/g; 
s/p_/per/g; 
s/q\`/qui/g; 
s/aut-/autem/g; 
s/au-/autem/g; 
s/t-c/tunc/g; 
s/n-c/nunc\#/g; 
s/m-/me-/g; 
s/q°/quo/g; 
s/ %/ est/g; 
s/p\`/pri/g; 
s/t\`/tri/g; 
s/o=i/omni/g; 
s/o-i/omni/g; 
s/e=e/esse/g; 
s/r-t/runt/g; 
s/s-t/sunt/g; 
s/s- /sunt /g; 
s/=/er/g; 
s/ e- / est /g; 
s/q</quia/g; 
s/7-/etiam/g; 
s/7/et/g; 
s/t°/tro/g; 
s/u°/uero/g; 
s/v°/uero/g; 
s/v\xb0/uero/g; 
s/r%/rum/g; 
s/r+/rum/g; 
s/uer-/ueru-/g; 
s/ver-/ueru-/g; 
s/9/con/g; 
s/it-/ite-/g;
s/\æ/@/g; 
s/\xe6/@/g; 
s/£//g; 
s/\xa3//g;
s/#//g;
s/\^//g"|cat > TMPMS/$i.bis
rm -Rf TMPMS/$i
mv TMPMS/$i.bis TMPMS/$i
echo "\n" >> TMPMS/$i
done
}

function filtre4()
{
for i in `ls TMPMS`; do
    #perl -pe 'BEGIN { "\x0a" } s/~\x0a//g; s/\x0a/ /g; s/^$/\\\//g; s/\[.*?\]//g' TMPMS/$i| tr '\n' ' '
perl -pe 's/\[.*?\]//g; s/§//g; s/^$/§/g; s/^@.*@//g; s/\x0a/ /g; s/~ //g; s/\xe6//g; s/\xa7//g; s/\xa3//g; s/\xb0/°/g; s/\xc2//g' TMPMS/$i | sed "s/§§/§/g;
s/§§/§/g; 
s/ ~/~/g
s/\xb0/°/g;
s/A/a/g;
s/B/b/g;
s/C/c/g;
s/D/d/g;
s/E/e/g;
s/F/f/g;
s/G/g/g;
s/H/h/g;
s/I/i/g;
s/J/j/g;
s/K/k/g;
s/L/l/g;
s/M/m/g;
s/N/n/g;
s/O/o/g;
s/P/p/g;
s/Q/q/g;
s/R/r/g;
s/S/s/g;
s/T/t/g;
s/U/u/g;
s/V/v/g;
s/X/x/g;
s/Y/y/g;
s/Z/z/g;
s/h°/hoc/g; 
s/  / /g; 
s/  / /g; 
s/q-/que/g; 
s/2/ur/g; 
s/p|/pro/g; 
s/p-/prae/g; 
s/'/us/g; 
s/q\"/qua/g; 
s/p\"/pra/g; 
s/\"/er/g; 
s/\&/et/g; 
s/q+/que/g; 
s/b+/bus/g; 
s/s+/sed/g; 
s/ s- / sunt /g; 
s/ c- / cu- /g; 
s/c-/co-/g; 
s/qm-/quoniam/g; 
s/qnm-/quoniam/g; 
s/n-/no-/g; 
s/p_/per/g; 
s/q\`/qui/g; 
s/aut-/autem/g; 
s/au-/autem/g; 
s/t-c/tunc/g; 
s/n-c/nunc\#/g; 
s/m-/me-/g; 
s/q°/quo/g; 
s/ %/ est/g; 
s/p\`/pri/g; 
s/t\`/tri/g; 
s/o=i/omni/g; 
s/o-i/omni/g; 
s/e=e/esse/g; 
s/r-t/runt/g; 
s/s-t/sunt/g; 
s/s- /sunt /g; 
s/=/er/g; 
s/ e- / est /g; 
s/q</quia/g; 
s/7-/etiam/g; 
s/7/et/g; 
s/t°/tro/g; 
s/u°/uero/g; 
s/v°/uero/g; 
s/v\xb0/uero/g; 
s/r%/rum/g; 
s/r+/rum/g; 
s/uer-/ueru-/g; 
s/ver-/ueru-/g; 
s/9/con/g; 
s/it-/ite-/g;
s/\æ/@/g; 
s/\xe6/@/g; 
s/£//g; 
s/\xa3//g;
s/#//g;
s/\^//g"|cat > TMPMS/$i.bis
rm -Rf TMPMS/$i
mv TMPMS/$i.bis TMPMS/$i
echo "\n" >> TMPMS/$i
done
}


cp -R $1 ./TMPMS
echo -e "\n Le programme établit des « coûts » de substitution différents en fonction des caractères comparés; ainsi, le remplacement d'un « n » par un « m » ne « coûte » pas aussi cher que le remplacement, par exemple, d'un « m » par un « r » (voir le code de « distances.ml » pour obtenir la table des substitutions). Par défaut, la présence d'abréviations (signalées par les combinaisons indiquées dans le code, par - après voyelle pour marquer une nasale ou simplement par # avec le développement de l'abréviation) est très légèrement prise en compte, de même que celle de capitales. Il est possible de ne pas tenir compte de ces éléments, et de minimiser les variantes graphiques portant sur les combinaisons de caractères les plus fréquentes (assimilations, etc.), en choisissant parmi les possibilités suivantes:
\n\t 1. Prise en compte des abréviations et capitales, minimisation des variantes graphiques.\n\t 2. Prise en compte des abréviations et capitales ainsi que des variantes graphiques.\n\t 3. Suppression des marques d'abréviations et de capitales, minimisation des variantes graphiques.\n\t 4. Suppression des marques d'abréviations et de capitales, prise en compte des variantes graphiques."
read reponse
echo -e "\n\nDans quel fichier voulez-vous exporter l'arbre (format ps)?"
read fichier

if [[ $reponse == "1" ]]
then
    filtre1
elif [[ $reponse == "2" ]]
then
    filtre2
elif [[ $reponse == "3" ]]
then
    filtre3
elif [[ $reponse == "4" ]]
then
    filtre4
fi
echo -e "\nCalcul de la matrice des distances sur l'ensemble des textes que vous avez fournis. S'il y en a plus de 15, allez prendre un café...\n"
./distances TMPMS/*|sed "s/TMPMS\///g"|cat > intree
cp ~/bin/fontfile .
drawtree

rm -Rf TMPMS
rm -Rf fontfile
mv plotfile $fichier
gv $fichier
temps=`date +%y``date +%m``date +%d``date +%X|sed "s/\://g"` 
cp $fichier ~/RECHERCHE/MARTIANUS-VIII/COLLATIONS/ARCHIVE-ARBRES/`echo $temps`$fichier
