diff options
| author | Matthieu Pignolet <matthieu@puffer.fish> | 2025-05-18 19:01:45 +0400 |
|---|---|---|
| committer | Matthieu Pignolet <matthieu@puffer.fish> | 2025-05-18 19:01:45 +0400 |
| commit | bf476d486b5362805a34bafbf277d3377a7957a0 (patch) | |
| tree | 48b2bd1abe91b99af126a3c369e3cc3dbb3778bb | |
| parent | a66ec73159f71d62097e5634acb956d1428d2a42 (diff) | |
feat: adding a compare script to compare aline-rs with the nltk implementation
| -rw-r--r-- | compare/.gitignore | 1 | ||||
| -rwxr-xr-x | compare/compare.sh | 6 | ||||
| -rw-r--r-- | compare/reference.py | 94 |
3 files changed, 101 insertions, 0 deletions
diff --git a/compare/.gitignore b/compare/.gitignore new file mode 100644 index 0000000..12782d5 --- /dev/null +++ b/compare/.gitignore @@ -0,0 +1 @@ +*.aline
\ No newline at end of file diff --git a/compare/compare.sh b/compare/compare.sh new file mode 100755 index 0000000..53a4112 --- /dev/null +++ b/compare/compare.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +python ./reference.py > reference.aline +cargo run > aline-rs.aline + +git diff --no-index reference.aline aline-rs.aline > difference.aline diff --git a/compare/reference.py b/compare/reference.py new file mode 100644 index 0000000..99b2d30 --- /dev/null +++ b/compare/reference.py @@ -0,0 +1,94 @@ +from nltk.metrics.aline import * + +def demo(): + """ + A demonstration of the result of aligning phonetic sequences + used in Kondrak's (2002) dissertation. + """ + data = [pair.split(",") for pair in cognate_data.split("\n")] + for pair in data: + alignment = align(pair[0], pair[1])[0] + alignment = [f"({a[0]}, {a[1]})" for a in alignment] + alignment = " ".join(alignment) + print(f"{pair[0]} ~ {pair[1]} : {alignment}") + + +cognate_data = """jo,ʒə +tu,ty +nosotros,nu +kjen,ki +ke,kwa +todos,tu +una,ən +dos,dø +tres,trwa +ombre,om +arbol,arbrə +pluma,plym +kabeθa,kap +boka,buʃ +pje,pje +koraθon,kœr +ber,vwar +benir,vənir +deθir,dir +pobre,povrə +ðis,dIzes +ðæt,das +wat,vas +nat,nixt +loŋ,laŋ +mæn,man +fleʃ,flajʃ +bləd,blyt +feðər,fEdər +hær,hAr +ir,Or +aj,awgə +nowz,nAzə +mawθ,munt +təŋ,tsuŋə +fut,fys +nij,knI +hænd,hant +hart,herts +livər,lEbər +ænd,ante +æt,ad +blow,flAre +ir,awris +ijt,edere +fiʃ,piʃkis +flow,fluere +staɾ,stella +ful,plenus +græs,gramen +hart,kordis +horn,korny +aj,ego +nij,genU +məðər,mAter +mawntən,mons +nejm,nomen +njuw,nowus +wən,unus +rawnd,rotundus +sow,suere +sit,sedere +θrij,tres +tuwθ,dentis +θin,tenwis +kinwawa,kenuaʔ +nina,nenah +napewa,napɛw +wapimini,wapemen +namesa,namɛʔs +okimawa,okemaw +ʃiʃipa,seʔsep +ahkohkwa,ahkɛh +pematesiweni,pematesewen +asenja,aʔsɛn""" + + +if __name__ == "__main__": + demo()
\ No newline at end of file |
