diff options
| author | Matthieu Pignolet <matthieu@puffer.fish> | 2025-05-18 18:58:51 +0400 |
|---|---|---|
| committer | Matthieu Pignolet <matthieu@puffer.fish> | 2025-05-18 18:58:51 +0400 |
| commit | 5918da5103683fa33efdb5d47639fdde6bdd50ab (patch) | |
| tree | f7c4d48a438c48f69c27410794c49cb4c678bdcb | |
| parent | 08c9b6b9335c3fe7e41ff951e60f30ac8cbba94b (diff) | |
feat: deleting old files
| -rw-r--r-- | bin/aline-demo/.gitignore | 1 | ||||
| -rw-r--r-- | bin/aline-demo/Cargo.lock | 117 | ||||
| -rw-r--r-- | bin/aline-demo/Cargo.toml | 7 | ||||
| -rw-r--r-- | bin/aline-demo/src/main.rs | 102 | ||||
| -rw-r--r-- | src/constants.rs | 33 | ||||
| -rw-r--r-- | src/extract.json | 1009 | ||||
| -rwxr-xr-x | src/extract.py | 20 | ||||
| -rw-r--r-- | src/lib.rs | 236 |
8 files changed, 0 insertions, 1525 deletions
diff --git a/bin/aline-demo/.gitignore b/bin/aline-demo/.gitignore deleted file mode 100644 index 9f97022..0000000 --- a/bin/aline-demo/.gitignore +++ /dev/null @@ -1 +0,0 @@ -target/
\ No newline at end of file diff --git a/bin/aline-demo/Cargo.lock b/bin/aline-demo/Cargo.lock deleted file mode 100644 index 787cfb2..0000000 --- a/bin/aline-demo/Cargo.lock +++ /dev/null @@ -1,117 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aline" -version = "0.1.0" -dependencies = [ - "array2d", - "once_cell", - "serde", - "serde_json", -] - -[[package]] -name = "aline-demo" -version = "0.1.0" -dependencies = [ - "aline", -] - -[[package]] -name = "array2d" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b39cb2c1bf5a7c0dd097aa95ab859cf87dab5a4328900f5388942dc1889f74" - -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "syn" -version = "2.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" diff --git a/bin/aline-demo/Cargo.toml b/bin/aline-demo/Cargo.toml deleted file mode 100644 index 29aaab5..0000000 --- a/bin/aline-demo/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "aline-demo" -version = "0.1.0" -edition = "2024" - -[dependencies] -aline = { path = "../../" }
\ No newline at end of file diff --git a/bin/aline-demo/src/main.rs b/bin/aline-demo/src/main.rs deleted file mode 100644 index 619c0eb..0000000 --- a/bin/aline-demo/src/main.rs +++ /dev/null @@ -1,102 +0,0 @@ -use aline; - -fn main() { - let mut data: Vec<(String, String)> = vec![]; - for line in COGNATE_DATE.split("\n") { - let mut pair = line.split(","); - - let a = pair.next().unwrap().to_string(); - let b = pair.next().unwrap().to_string(); - - data.push((a, b)); - } - - - for (a, b) in data.into_iter() { - let alignment = &aline::align(a.clone(), b.clone(), None)[0]; - - print!("{} ~ {} : ", a, b); - for alignment in alignment { - let alignment = &alignment; - print!("{} {}", alignment.0, alignment.1); - } - print!("\n"); - } -} - - -const COGNATE_DATE: &str = r"jo,ʒə -tu,ty -nosotros,nu -kjen,ki -ke,kwa -todos,tu -una,ən -dos,dø -tres,trwa -ombre,om -arbol,arbrə -pluma,plym -kabeθa,kap -boka,buʃ -pje,pje -koraθon,kœr -ber,vwar -benir,vənir -deθir,dir -pobre,povrə -ðis,dIzes -ðæt,das -wat,vas -nat,nixt -loŋ,laŋ -mæn,man -fleʃ,flajʃ -bləd,blyt -feðər,fEdər -hær,hAr -ir,Or -aj,awgə -nowz,nAzə -mawθ,munt -təŋ,tsuŋə -fut,fys -nij,knI -hænd,hant -hart,herts -livər,lEbər -ænd,ante -æt,ad -blow,flAre -ir,awris -ijt,edere -fiʃ,piʃkis -flow,fluere -staɾ,stella -ful,plenus -græs,gramen -hart,kordis -horn,korny -aj,ego -nij,genU -məðər,mAter -mawntən,mons -nejm,nomen -njuw,nowus -wən,unus -rawnd,rotundus -sow,suere -sit,sedere -θrij,tres -tuwθ,dentis -θin,tenwis -kinwawa,kenuaʔ -nina,nenah -napewa,napɛw -wapimini,wapemen -namesa,namɛʔs -okimawa,okemaw -ʃiʃipa,seʔsep -ahkohkwa,ahkɛh -pematesiweni,pematesewen -asenja,aʔsɛn";
\ No newline at end of file diff --git a/src/constants.rs b/src/constants.rs deleted file mode 100644 index e18dc3d..0000000 --- a/src/constants.rs +++ /dev/null @@ -1,33 +0,0 @@ - - -use std::collections::{HashMap, HashSet}; - -use once_cell::sync::Lazy; -use serde::{Serialize, Deserialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct Extracted { - #[serde(rename = "C_skip")] - pub cskip: f32, - #[serde(rename = "C_sub")] - pub csub: f32, - #[serde(rename = "C_exp")] - pub cexp: f32, - #[serde(rename = "C_vwl")] - pub cvwl: f32, - pub consonants: HashSet<String>, - #[serde(rename = "R_c")] - pub rc: HashSet<String>, - #[serde(rename = "R_v")] - pub rv: HashSet<String>, - pub similarity_matrix: HashMap<String, f32>, - pub salience: HashMap<String, f32>, - pub feature_matrix: HashMap<String, HashMap<String, String>>, -} - - -const EXTRACTED_JSON: &str = include_str!("extract.json"); -pub static EXTRACTED: Lazy<Extracted> = Lazy::new(|| { - serde_json::from_str(EXTRACTED_JSON).unwrap() -}); - diff --git a/src/extract.json b/src/extract.json deleted file mode 100644 index 7100910..0000000 --- a/src/extract.json +++ /dev/null @@ -1,1009 +0,0 @@ -{ - "C_skip": 10, - "C_sub": 35, - "C_exp": 45, - "C_vwl": 5, - "consonants": [ - "B", - "N", - "R", - "b", - "c", - "d", - "f", - "g", - "h", - "j", - "k", - "l", - "m", - "n", - "p", - "q", - "r", - "s", - "t", - "v", - "x", - "z", - "ç", - "ð", - "ħ", - "ŋ", - "ɖ", - "ɟ", - "ɢ", - "ɣ", - "ɦ", - "ɬ", - "ɮ", - "ɰ", - "ɱ", - "ɲ", - "ɳ", - "ɴ", - "ɸ", - "ɹ", - "ɻ", - "ɽ", - "ɾ", - "ʀ", - "ʁ", - "ʂ", - "ʃ", - "ʈ", - "ʋ", - "ʐ ", - "ʒ", - "ʔ", - "ʕ", - "ʙ", - "ʝ", - "β", - "θ", - "χ", - "ʐ", - "w" - ], - "R_c": [ - "aspirated", - "lateral", - "manner", - "nasal", - "place", - "retroflex", - "syllabic", - "voice" - ], - "R_v": [ - "back", - "lateral", - "long", - "manner", - "nasal", - "place", - "retroflex", - "round", - "syllabic", - "voice" - ], - "similarity_matrix": { - "bilabial": 1.0, - "labiodental": 0.95, - "dental": 0.9, - "alveolar": 0.85, - "retroflex": 0.8, - "palato-alveolar": 0.75, - "palatal": 0.7, - "velar": 0.6, - "uvular": 0.5, - "pharyngeal": 0.3, - "glottal": 0.1, - "labiovelar": 1.0, - "vowel": -1.0, - "stop": 1.0, - "affricate": 0.9, - "fricative": 0.85, - "trill": 0.7, - "tap": 0.65, - "approximant": 0.6, - "high vowel": 0.4, - "mid vowel": 0.2, - "low vowel": 0.0, - "vowel2": 0.5, - "high": 1.0, - "mid": 0.5, - "low": 0.0, - "front": 1.0, - "central": 0.5, - "back": 0.0, - "plus": 1.0, - "minus": 0.0 - }, - "salience": { - "syllabic": 5, - "place": 40, - "manner": 50, - "voice": 5, - "nasal": 20, - "retroflex": 10, - "lateral": 10, - "aspirated": 5, - "long": 0, - "high": 3, - "back": 2, - "round": 2 - }, - "feature_matrix": { - "p": { - "place": "bilabial", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "b": { - "place": "bilabial", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "t": { - "place": "alveolar", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "d": { - "place": "alveolar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʈ": { - "place": "retroflex", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɖ": { - "place": "retroflex", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "c": { - "place": "palatal", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɟ": { - "place": "palatal", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "k": { - "place": "velar", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "g": { - "place": "velar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "q": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɢ": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʔ": { - "place": "glottal", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "m": { - "place": "bilabial", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɱ": { - "place": "labiodental", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "n": { - "place": "alveolar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɳ": { - "place": "retroflex", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɲ": { - "place": "palatal", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ŋ": { - "place": "velar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɴ": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "N": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʙ": { - "place": "bilabial", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "B": { - "place": "bilabial", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "r": { - "place": "alveolar", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʀ": { - "place": "uvular", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "R": { - "place": "uvular", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɾ": { - "place": "alveolar", - "manner": "tap", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɽ": { - "place": "retroflex", - "manner": "tap", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɸ": { - "place": "bilabial", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "β": { - "place": "bilabial", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "f": { - "place": "labiodental", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "v": { - "place": "labiodental", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "θ": { - "place": "dental", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ð": { - "place": "dental", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "s": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "z": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʃ": { - "place": "palato-alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʒ": { - "place": "palato-alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʂ": { - "place": "retroflex", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʐ": { - "place": "retroflex", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "ç": { - "place": "palatal", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʝ": { - "place": "palatal", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "x": { - "place": "velar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɣ": { - "place": "velar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "χ": { - "place": "uvular", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʁ": { - "place": "uvular", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ħ": { - "place": "pharyngeal", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ʕ": { - "place": "pharyngeal", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "h": { - "place": "glottal", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɦ": { - "place": "glottal", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɬ": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "plus", - "aspirated": "minus" - }, - "ɮ": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "plus", - "aspirated": "minus" - }, - "ʋ": { - "place": "labiodental", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɹ": { - "place": "alveolar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɻ": { - "place": "retroflex", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus" - }, - "j": { - "place": "palatal", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "ɰ": { - "place": "velar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "l": { - "place": "alveolar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "plus", - "aspirated": "minus" - }, - "w": { - "place": "labiovelar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus" - }, - "i": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "y": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "front", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "e": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "E": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "minus", - "long": "plus", - "aspirated": "minus" - }, - "ø": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "ɛ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "œ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "æ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "a": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "A": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "front", - "round": "minus", - "long": "plus", - "aspirated": "minus" - }, - "ɨ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "central", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "ʉ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "central", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "ə": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "central", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "u": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "back", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "U": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "back", - "round": "plus", - "long": "plus", - "aspirated": "minus" - }, - "o": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "back", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "O": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "back", - "round": "plus", - "long": "plus", - "aspirated": "minus" - }, - "ɔ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "back", - "round": "plus", - "long": "minus", - "aspirated": "minus" - }, - "ɒ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "back", - "round": "minus", - "long": "minus", - "aspirated": "minus" - }, - "I": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "front", - "round": "minus", - "long": "plus", - "aspirated": "minus" - } - } -} diff --git a/src/extract.py b/src/extract.py deleted file mode 100755 index b2faba5..0000000 --- a/src/extract.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 - -from nltk.metrics.aline import * -import json - -extract = { - 'C_skip': C_skip, - 'C_sub': C_sub, - 'C_exp': C_exp, - 'C_vwl': C_vwl, - - 'consonants': consonants, - 'R_c': R_c, - 'R_v': R_v, - 'similarity_matrix': similarity_matrix, - 'salience': salience, - 'feature_matrix': feature_matrix, -} - -print(json.dumps(extract, indent=2, ensure_ascii=False)) diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index de0db50..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,236 +0,0 @@ -use array2d::Array2D; -use core::f32; -use std::collections::{HashMap, HashSet}; -mod constants; - -use constants::EXTRACTED; - -/// Compute the alignment of two phonetic strings. -/// -/// (Kondrak 2002: 51) -pub fn align(str1: String, str2: String, epsilon: Option<f32>) -> Vec<Vec<(String, String)>> { - let epsilon = epsilon - .or(Some(0f32)) - .expect("the default value of epsilon should be 0"); - - assert!( - 0f32 <= epsilon && epsilon <= 1f32, - "Epsilon must be between 0.0 and 1.0." - ); - - let m = str1.len(); - let n = str2.len(); - - let mut S = Array2D::filled_with(0f32, m + 1, n + 1); - - for i in 1..m + 1 { - for j in 1..n + 1 { - let mut edit: [f32; 5] = [ - S[(i - 1, j)] + sigma_skip(&str1[i - 1..i - 1]), - S[(i, j - 1)] + sigma_skip(&str2[j - 1..j - 1]), - S[(i - 1, j - 1)] + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]), - f32::MIN, - f32::MIN, - ]; - - if i > 1 { - edit[3] = S[(i - 2, j - 1)] + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]); - } - - if j > 1 { - edit[4] = S[(i - 1, j - 2)] + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]); - } - - S[(i, j)] = edit - .into_iter() - .max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("a S row was empty")) - .expect("the S matrix is empty"); - } - } - - // T = (1 - epsilon) * np.amax(S) - let T = (1f32 - epsilon) - * S.rows_iter() - .map(|r| { - r.max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("a S row is empty")) - .expect("a S row is empty") - }) - .max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("S is empty")) - .expect("S is empty"); - - let mut alignments: Vec<Vec<(String, String)>> = vec![]; - for i in 1..m + 1 { - for j in 1..n + 1 { - if S[(i, j)] >= T { - let mut al = vec![]; - _retrieve(i, j, 0f32, &S, T, &str1, &str2, &mut al); - alignments.push(al); - } - } - } - - alignments -} - -/// Retrieve the path through the similarity matrix S starting at (i, j). -fn _retrieve( - i: usize, - j: usize, - s: f32, - S: &Array2D<f32>, - T: f32, - str1: &str, - str2: &str, - out: &mut Vec<(String, String)>, -) { - if S[(i, j)] == 0f32 { - return; - } - - if j > 1 && S[(i - 1, j - 2)] + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]) + s >= T { - out.insert( - 0, - (str1[i - 1..i - 1].to_string(), str2[j - 2..j].to_string()), - ); - - _retrieve( - i - 1, - j - 2, - s + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]), - S, - T, - str1, - str2, - out, - ); - } else if i > 1 && S[(i - 2, j - 1)] + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]) + s >= T - { - out.insert( - 0, - (str1[i - 2..i].to_string(), str2[j - 1..j - 1].to_string()), - ); - - _retrieve( - i - 2, - j - 1, - s + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]), - S, - T, - str1, - str2, - out, - ); - } else if S[(i, j - 1)] + sigma_skip(&str2[j - 1..j - 1]) + s >= T { - out.insert(0, ("-".to_string(), str2[j - 1..j - 1].to_string())); - - _retrieve( - i, - j - 1, - s + sigma_skip(&str2[j - 1..j - 1]), - S, - T, - str1, - str2, - out, - ); - } else if S[(i - 1, j)] + sigma_skip(&str1[i - 1..i - 1]) + s >= T { - out.insert(0, (str1[i - 1..i - 1].to_string(), "-".to_string())); - _retrieve( - i - 1, - j, - s + sigma_skip(&str1[i - 1..i - 1]), - S, - T, - str1, - str2, - out, - ); - } else if S[(i - 1, j - 1)] + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]) + s >= T { - out.insert( - 0, - ( - str1[i - 1..i - 1].to_string(), - str2[j - 1..j - 1].to_string(), - ), - ); - _retrieve( - i - 1, - j - 1, - s + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]), - S, - T, - str1, - str2, - out, - ); - } -} - -/// Returns score of an indel of P. -/// -/// (Kondrak 2002: 54) -fn sigma_skip(_p: &str) -> f32 { - return EXTRACTED.cskip; -} - -/// Returns score of a substitution of P with Q. -/// -/// (Kondrak 2002: 54) -fn sigma_sub(p: &str, q: &str) -> f32 { - return EXTRACTED.csub - delta(p, q) - V(p) - V(q); -} - -/// Returns score of an expansion/compression. -/// -/// (Kondrak 2002: 54) -fn sigma_exp(p: &str, q: &str) -> f32 { - let q1 = &q[0..0]; - let q2 = &q[1..1]; - - return EXTRACTED.cexp - delta(p, q1) - delta(p, q2) - V(p) - f32::max(V(&q1), V(&q2)); -} - -/// Return weighted sum of difference between P and Q. -/// -/// (Kondrak 2002: 54) -fn delta(p: &str, q: &str) -> f32 { - let features = R(p, q); - let mut total = 0f32; - - for f in features { - total += diff(p, q, f) * EXTRACTED.salience[f]; - } - - return total; -} -/// Returns difference between phonetic segments P and Q for feature F. -/// -/// (Kondrak 2002: 52, 54) -fn diff(p: &str, q: &str, f: &str) -> f32 { - let p_features: &HashMap<String, String> = &EXTRACTED.feature_matrix[p]; - let q_features: &HashMap<String, String> = &EXTRACTED.feature_matrix[q]; - - return (EXTRACTED.similarity_matrix[&p_features[f]] - - EXTRACTED.similarity_matrix[&q_features[f]]) - .abs(); -} - -/// Return relevant features for segment comparison. -/// -/// (Kondrak 2002: 54) -fn R(p: &str, q: &str) -> &'static HashSet<std::string::String> { - if EXTRACTED.consonants.contains(p) || EXTRACTED.consonants.contains(q) { - return &EXTRACTED.rc; - } - return &EXTRACTED.rc; -} - -/// Return vowel weight if P is vowel. -/// -/// (Kondrak 2002: 54) -fn V(p: &str) -> f32 { - if EXTRACTED.consonants.contains(p) { - return 0f32; - } - return EXTRACTED.cvwl; -} |
