summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Pignolet <matthieu@puffer.fish>2025-05-18 19:01:45 +0400
committerMatthieu Pignolet <matthieu@puffer.fish>2025-05-18 19:01:45 +0400
commitbf476d486b5362805a34bafbf277d3377a7957a0 (patch)
tree48b2bd1abe91b99af126a3c369e3cc3dbb3778bb
parenta66ec73159f71d62097e5634acb956d1428d2a42 (diff)
feat: adding a compare script to compare aline-rs with the nltk implementation
-rw-r--r--compare/.gitignore1
-rwxr-xr-xcompare/compare.sh6
-rw-r--r--compare/reference.py94
3 files changed, 101 insertions, 0 deletions
diff --git a/compare/.gitignore b/compare/.gitignore
new file mode 100644
index 0000000..12782d5
--- /dev/null
+++ b/compare/.gitignore
@@ -0,0 +1 @@
+*.aline \ No newline at end of file
diff --git a/compare/compare.sh b/compare/compare.sh
new file mode 100755
index 0000000..53a4112
--- /dev/null
+++ b/compare/compare.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+python ./reference.py > reference.aline
+cargo run > aline-rs.aline
+
+git diff --no-index reference.aline aline-rs.aline > difference.aline
diff --git a/compare/reference.py b/compare/reference.py
new file mode 100644
index 0000000..99b2d30
--- /dev/null
+++ b/compare/reference.py
@@ -0,0 +1,94 @@
+from nltk.metrics.aline import *
+
+def demo():
+ """
+ A demonstration of the result of aligning phonetic sequences
+ used in Kondrak's (2002) dissertation.
+ """
+ data = [pair.split(",") for pair in cognate_data.split("\n")]
+ for pair in data:
+ alignment = align(pair[0], pair[1])[0]
+ alignment = [f"({a[0]}, {a[1]})" for a in alignment]
+ alignment = " ".join(alignment)
+ print(f"{pair[0]} ~ {pair[1]} : {alignment}")
+
+
+cognate_data = """jo,ʒə
+tu,ty
+nosotros,nu
+kjen,ki
+ke,kwa
+todos,tu
+una,ən
+dos,dø
+tres,trwa
+ombre,om
+arbol,arbrə
+pluma,plym
+kabeθa,kap
+boka,buʃ
+pje,pje
+koraθon,kœr
+ber,vwar
+benir,vənir
+deθir,dir
+pobre,povrə
+ðis,dIzes
+ðæt,das
+wat,vas
+nat,nixt
+loŋ,laŋ
+mæn,man
+fleʃ,flajʃ
+bləd,blyt
+feðər,fEdər
+hær,hAr
+ir,Or
+aj,awgə
+nowz,nAzə
+mawθ,munt
+təŋ,tsuŋə
+fut,fys
+nij,knI
+hænd,hant
+hart,herts
+livər,lEbər
+ænd,ante
+æt,ad
+blow,flAre
+ir,awris
+ijt,edere
+fiʃ,piʃkis
+flow,fluere
+staɾ,stella
+ful,plenus
+græs,gramen
+hart,kordis
+horn,korny
+aj,ego
+nij,genU
+məðər,mAter
+mawntən,mons
+nejm,nomen
+njuw,nowus
+wən,unus
+rawnd,rotundus
+sow,suere
+sit,sedere
+θrij,tres
+tuwθ,dentis
+θin,tenwis
+kinwawa,kenuaʔ
+nina,nenah
+napewa,napɛw
+wapimini,wapemen
+namesa,namɛʔs
+okimawa,okemaw
+ʃiʃipa,seʔsep
+ahkohkwa,ahkɛh
+pematesiweni,pematesewen
+asenja,aʔsɛn"""
+
+
+if __name__ == "__main__":
+ demo() \ No newline at end of file