summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Pignolet <matthieu@puffer.fish>2025-05-18 18:58:51 +0400
committerMatthieu Pignolet <matthieu@puffer.fish>2025-05-18 18:58:51 +0400
commit5918da5103683fa33efdb5d47639fdde6bdd50ab (patch)
treef7c4d48a438c48f69c27410794c49cb4c678bdcb
parent08c9b6b9335c3fe7e41ff951e60f30ac8cbba94b (diff)
feat: deleting old files
-rw-r--r--bin/aline-demo/.gitignore1
-rw-r--r--bin/aline-demo/Cargo.lock117
-rw-r--r--bin/aline-demo/Cargo.toml7
-rw-r--r--bin/aline-demo/src/main.rs102
-rw-r--r--src/constants.rs33
-rw-r--r--src/extract.json1009
-rwxr-xr-xsrc/extract.py20
-rw-r--r--src/lib.rs236
8 files changed, 0 insertions, 1525 deletions
diff --git a/bin/aline-demo/.gitignore b/bin/aline-demo/.gitignore
deleted file mode 100644
index 9f97022..0000000
--- a/bin/aline-demo/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-target/ \ No newline at end of file
diff --git a/bin/aline-demo/Cargo.lock b/bin/aline-demo/Cargo.lock
deleted file mode 100644
index 787cfb2..0000000
--- a/bin/aline-demo/Cargo.lock
+++ /dev/null
@@ -1,117 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "aline"
-version = "0.1.0"
-dependencies = [
- "array2d",
- "once_cell",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "aline-demo"
-version = "0.1.0"
-dependencies = [
- "aline",
-]
-
-[[package]]
-name = "array2d"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8b39cb2c1bf5a7c0dd097aa95ab859cf87dab5a4328900f5388942dc1889f74"
-
-[[package]]
-name = "itoa"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
-
-[[package]]
-name = "memchr"
-version = "2.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
-
-[[package]]
-name = "once_cell"
-version = "1.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.95"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
-
-[[package]]
-name = "serde"
-version = "1.0.219"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.219"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.140"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
-dependencies = [
- "itoa",
- "memchr",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.101"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
diff --git a/bin/aline-demo/Cargo.toml b/bin/aline-demo/Cargo.toml
deleted file mode 100644
index 29aaab5..0000000
--- a/bin/aline-demo/Cargo.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[package]
-name = "aline-demo"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-aline = { path = "../../" } \ No newline at end of file
diff --git a/bin/aline-demo/src/main.rs b/bin/aline-demo/src/main.rs
deleted file mode 100644
index 619c0eb..0000000
--- a/bin/aline-demo/src/main.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use aline;
-
-fn main() {
- let mut data: Vec<(String, String)> = vec![];
- for line in COGNATE_DATE.split("\n") {
- let mut pair = line.split(",");
-
- let a = pair.next().unwrap().to_string();
- let b = pair.next().unwrap().to_string();
-
- data.push((a, b));
- }
-
-
- for (a, b) in data.into_iter() {
- let alignment = &aline::align(a.clone(), b.clone(), None)[0];
-
- print!("{} ~ {} : ", a, b);
- for alignment in alignment {
- let alignment = &alignment;
- print!("{} {}", alignment.0, alignment.1);
- }
- print!("\n");
- }
-}
-
-
-const COGNATE_DATE: &str = r"jo,ʒə
-tu,ty
-nosotros,nu
-kjen,ki
-ke,kwa
-todos,tu
-una,ən
-dos,dø
-tres,trwa
-ombre,om
-arbol,arbrə
-pluma,plym
-kabeθa,kap
-boka,buʃ
-pje,pje
-koraθon,kœr
-ber,vwar
-benir,vənir
-deθir,dir
-pobre,povrə
-ðis,dIzes
-ðæt,das
-wat,vas
-nat,nixt
-loŋ,laŋ
-mæn,man
-fleʃ,flajʃ
-bləd,blyt
-feðər,fEdər
-hær,hAr
-ir,Or
-aj,awgə
-nowz,nAzə
-mawθ,munt
-təŋ,tsuŋə
-fut,fys
-nij,knI
-hænd,hant
-hart,herts
-livər,lEbər
-ænd,ante
-æt,ad
-blow,flAre
-ir,awris
-ijt,edere
-fiʃ,piʃkis
-flow,fluere
-staɾ,stella
-ful,plenus
-græs,gramen
-hart,kordis
-horn,korny
-aj,ego
-nij,genU
-məðər,mAter
-mawntən,mons
-nejm,nomen
-njuw,nowus
-wən,unus
-rawnd,rotundus
-sow,suere
-sit,sedere
-θrij,tres
-tuwθ,dentis
-θin,tenwis
-kinwawa,kenuaʔ
-nina,nenah
-napewa,napɛw
-wapimini,wapemen
-namesa,namɛʔs
-okimawa,okemaw
-ʃiʃipa,seʔsep
-ahkohkwa,ahkɛh
-pematesiweni,pematesewen
-asenja,aʔsɛn"; \ No newline at end of file
diff --git a/src/constants.rs b/src/constants.rs
deleted file mode 100644
index e18dc3d..0000000
--- a/src/constants.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-
-
-use std::collections::{HashMap, HashSet};
-
-use once_cell::sync::Lazy;
-use serde::{Serialize, Deserialize};
-
-#[derive(Debug, Serialize, Deserialize)]
-pub(crate) struct Extracted {
- #[serde(rename = "C_skip")]
- pub cskip: f32,
- #[serde(rename = "C_sub")]
- pub csub: f32,
- #[serde(rename = "C_exp")]
- pub cexp: f32,
- #[serde(rename = "C_vwl")]
- pub cvwl: f32,
- pub consonants: HashSet<String>,
- #[serde(rename = "R_c")]
- pub rc: HashSet<String>,
- #[serde(rename = "R_v")]
- pub rv: HashSet<String>,
- pub similarity_matrix: HashMap<String, f32>,
- pub salience: HashMap<String, f32>,
- pub feature_matrix: HashMap<String, HashMap<String, String>>,
-}
-
-
-const EXTRACTED_JSON: &str = include_str!("extract.json");
-pub static EXTRACTED: Lazy<Extracted> = Lazy::new(|| {
- serde_json::from_str(EXTRACTED_JSON).unwrap()
-});
-
diff --git a/src/extract.json b/src/extract.json
deleted file mode 100644
index 7100910..0000000
--- a/src/extract.json
+++ /dev/null
@@ -1,1009 +0,0 @@
-{
- "C_skip": 10,
- "C_sub": 35,
- "C_exp": 45,
- "C_vwl": 5,
- "consonants": [
- "B",
- "N",
- "R",
- "b",
- "c",
- "d",
- "f",
- "g",
- "h",
- "j",
- "k",
- "l",
- "m",
- "n",
- "p",
- "q",
- "r",
- "s",
- "t",
- "v",
- "x",
- "z",
- "ç",
- "ð",
- "ħ",
- "ŋ",
- "ɖ",
- "ɟ",
- "ɢ",
- "ɣ",
- "ɦ",
- "ɬ",
- "ɮ",
- "ɰ",
- "ɱ",
- "ɲ",
- "ɳ",
- "ɴ",
- "ɸ",
- "ɹ",
- "ɻ",
- "ɽ",
- "ɾ",
- "ʀ",
- "ʁ",
- "ʂ",
- "ʃ",
- "ʈ",
- "ʋ",
- "ʐ ",
- "ʒ",
- "ʔ",
- "ʕ",
- "ʙ",
- "ʝ",
- "β",
- "θ",
- "χ",
- "ʐ",
- "w"
- ],
- "R_c": [
- "aspirated",
- "lateral",
- "manner",
- "nasal",
- "place",
- "retroflex",
- "syllabic",
- "voice"
- ],
- "R_v": [
- "back",
- "lateral",
- "long",
- "manner",
- "nasal",
- "place",
- "retroflex",
- "round",
- "syllabic",
- "voice"
- ],
- "similarity_matrix": {
- "bilabial": 1.0,
- "labiodental": 0.95,
- "dental": 0.9,
- "alveolar": 0.85,
- "retroflex": 0.8,
- "palato-alveolar": 0.75,
- "palatal": 0.7,
- "velar": 0.6,
- "uvular": 0.5,
- "pharyngeal": 0.3,
- "glottal": 0.1,
- "labiovelar": 1.0,
- "vowel": -1.0,
- "stop": 1.0,
- "affricate": 0.9,
- "fricative": 0.85,
- "trill": 0.7,
- "tap": 0.65,
- "approximant": 0.6,
- "high vowel": 0.4,
- "mid vowel": 0.2,
- "low vowel": 0.0,
- "vowel2": 0.5,
- "high": 1.0,
- "mid": 0.5,
- "low": 0.0,
- "front": 1.0,
- "central": 0.5,
- "back": 0.0,
- "plus": 1.0,
- "minus": 0.0
- },
- "salience": {
- "syllabic": 5,
- "place": 40,
- "manner": 50,
- "voice": 5,
- "nasal": 20,
- "retroflex": 10,
- "lateral": 10,
- "aspirated": 5,
- "long": 0,
- "high": 3,
- "back": 2,
- "round": 2
- },
- "feature_matrix": {
- "p": {
- "place": "bilabial",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "b": {
- "place": "bilabial",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "t": {
- "place": "alveolar",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "d": {
- "place": "alveolar",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʈ": {
- "place": "retroflex",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɖ": {
- "place": "retroflex",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "c": {
- "place": "palatal",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɟ": {
- "place": "palatal",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "k": {
- "place": "velar",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "g": {
- "place": "velar",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "q": {
- "place": "uvular",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɢ": {
- "place": "uvular",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʔ": {
- "place": "glottal",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "m": {
- "place": "bilabial",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɱ": {
- "place": "labiodental",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "n": {
- "place": "alveolar",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɳ": {
- "place": "retroflex",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɲ": {
- "place": "palatal",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ŋ": {
- "place": "velar",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɴ": {
- "place": "uvular",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "N": {
- "place": "uvular",
- "manner": "stop",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "plus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʙ": {
- "place": "bilabial",
- "manner": "trill",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "B": {
- "place": "bilabial",
- "manner": "trill",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "r": {
- "place": "alveolar",
- "manner": "trill",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʀ": {
- "place": "uvular",
- "manner": "trill",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "R": {
- "place": "uvular",
- "manner": "trill",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɾ": {
- "place": "alveolar",
- "manner": "tap",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɽ": {
- "place": "retroflex",
- "manner": "tap",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɸ": {
- "place": "bilabial",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "β": {
- "place": "bilabial",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "f": {
- "place": "labiodental",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "v": {
- "place": "labiodental",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "θ": {
- "place": "dental",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ð": {
- "place": "dental",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "s": {
- "place": "alveolar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "z": {
- "place": "alveolar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʃ": {
- "place": "palato-alveolar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʒ": {
- "place": "palato-alveolar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʂ": {
- "place": "retroflex",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʐ": {
- "place": "retroflex",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ç": {
- "place": "palatal",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʝ": {
- "place": "palatal",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "x": {
- "place": "velar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɣ": {
- "place": "velar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "χ": {
- "place": "uvular",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʁ": {
- "place": "uvular",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ħ": {
- "place": "pharyngeal",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ʕ": {
- "place": "pharyngeal",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "h": {
- "place": "glottal",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɦ": {
- "place": "glottal",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɬ": {
- "place": "alveolar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "minus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "plus",
- "aspirated": "minus"
- },
- "ɮ": {
- "place": "alveolar",
- "manner": "fricative",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "plus",
- "aspirated": "minus"
- },
- "ʋ": {
- "place": "labiodental",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɹ": {
- "place": "alveolar",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɻ": {
- "place": "retroflex",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "plus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "j": {
- "place": "palatal",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "ɰ": {
- "place": "velar",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "l": {
- "place": "alveolar",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "plus",
- "aspirated": "minus"
- },
- "w": {
- "place": "labiovelar",
- "manner": "approximant",
- "syllabic": "minus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "aspirated": "minus"
- },
- "i": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "front",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "y": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "front",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "e": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "front",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "E": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "front",
- "round": "minus",
- "long": "plus",
- "aspirated": "minus"
- },
- "ø": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "front",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "ɛ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "front",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "œ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "front",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "æ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "low",
- "back": "front",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "a": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "low",
- "back": "front",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "A": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "low",
- "back": "front",
- "round": "minus",
- "long": "plus",
- "aspirated": "minus"
- },
- "ɨ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "central",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "ʉ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "central",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "ə": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "central",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "u": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "back",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "U": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "back",
- "round": "plus",
- "long": "plus",
- "aspirated": "minus"
- },
- "o": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "back",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "O": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "back",
- "round": "plus",
- "long": "plus",
- "aspirated": "minus"
- },
- "ɔ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "mid",
- "back": "back",
- "round": "plus",
- "long": "minus",
- "aspirated": "minus"
- },
- "ɒ": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "low",
- "back": "back",
- "round": "minus",
- "long": "minus",
- "aspirated": "minus"
- },
- "I": {
- "place": "vowel",
- "manner": "vowel2",
- "syllabic": "plus",
- "voice": "plus",
- "nasal": "minus",
- "retroflex": "minus",
- "lateral": "minus",
- "high": "high",
- "back": "front",
- "round": "minus",
- "long": "plus",
- "aspirated": "minus"
- }
- }
-}
diff --git a/src/extract.py b/src/extract.py
deleted file mode 100755
index b2faba5..0000000
--- a/src/extract.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env python3
-
-from nltk.metrics.aline import *
-import json
-
-extract = {
- 'C_skip': C_skip,
- 'C_sub': C_sub,
- 'C_exp': C_exp,
- 'C_vwl': C_vwl,
-
- 'consonants': consonants,
- 'R_c': R_c,
- 'R_v': R_v,
- 'similarity_matrix': similarity_matrix,
- 'salience': salience,
- 'feature_matrix': feature_matrix,
-}
-
-print(json.dumps(extract, indent=2, ensure_ascii=False))
diff --git a/src/lib.rs b/src/lib.rs
deleted file mode 100644
index de0db50..0000000
--- a/src/lib.rs
+++ /dev/null
@@ -1,236 +0,0 @@
-use array2d::Array2D;
-use core::f32;
-use std::collections::{HashMap, HashSet};
-mod constants;
-
-use constants::EXTRACTED;
-
-/// Compute the alignment of two phonetic strings.
-///
-/// (Kondrak 2002: 51)
-pub fn align(str1: String, str2: String, epsilon: Option<f32>) -> Vec<Vec<(String, String)>> {
- let epsilon = epsilon
- .or(Some(0f32))
- .expect("the default value of epsilon should be 0");
-
- assert!(
- 0f32 <= epsilon && epsilon <= 1f32,
- "Epsilon must be between 0.0 and 1.0."
- );
-
- let m = str1.len();
- let n = str2.len();
-
- let mut S = Array2D::filled_with(0f32, m + 1, n + 1);
-
- for i in 1..m + 1 {
- for j in 1..n + 1 {
- let mut edit: [f32; 5] = [
- S[(i - 1, j)] + sigma_skip(&str1[i - 1..i - 1]),
- S[(i, j - 1)] + sigma_skip(&str2[j - 1..j - 1]),
- S[(i - 1, j - 1)] + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]),
- f32::MIN,
- f32::MIN,
- ];
-
- if i > 1 {
- edit[3] = S[(i - 2, j - 1)] + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]);
- }
-
- if j > 1 {
- edit[4] = S[(i - 1, j - 2)] + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]);
- }
-
- S[(i, j)] = edit
- .into_iter()
- .max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("a S row was empty"))
- .expect("the S matrix is empty");
- }
- }
-
- // T = (1 - epsilon) * np.amax(S)
- let T = (1f32 - epsilon)
- * S.rows_iter()
- .map(|r| {
- r.max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("a S row is empty"))
- .expect("a S row is empty")
- })
- .max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("S is empty"))
- .expect("S is empty");
-
- let mut alignments: Vec<Vec<(String, String)>> = vec![];
- for i in 1..m + 1 {
- for j in 1..n + 1 {
- if S[(i, j)] >= T {
- let mut al = vec![];
- _retrieve(i, j, 0f32, &S, T, &str1, &str2, &mut al);
- alignments.push(al);
- }
- }
- }
-
- alignments
-}
-
-/// Retrieve the path through the similarity matrix S starting at (i, j).
-fn _retrieve(
- i: usize,
- j: usize,
- s: f32,
- S: &Array2D<f32>,
- T: f32,
- str1: &str,
- str2: &str,
- out: &mut Vec<(String, String)>,
-) {
- if S[(i, j)] == 0f32 {
- return;
- }
-
- if j > 1 && S[(i - 1, j - 2)] + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]) + s >= T {
- out.insert(
- 0,
- (str1[i - 1..i - 1].to_string(), str2[j - 2..j].to_string()),
- );
-
- _retrieve(
- i - 1,
- j - 2,
- s + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if i > 1 && S[(i - 2, j - 1)] + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]) + s >= T
- {
- out.insert(
- 0,
- (str1[i - 2..i].to_string(), str2[j - 1..j - 1].to_string()),
- );
-
- _retrieve(
- i - 2,
- j - 1,
- s + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if S[(i, j - 1)] + sigma_skip(&str2[j - 1..j - 1]) + s >= T {
- out.insert(0, ("-".to_string(), str2[j - 1..j - 1].to_string()));
-
- _retrieve(
- i,
- j - 1,
- s + sigma_skip(&str2[j - 1..j - 1]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if S[(i - 1, j)] + sigma_skip(&str1[i - 1..i - 1]) + s >= T {
- out.insert(0, (str1[i - 1..i - 1].to_string(), "-".to_string()));
- _retrieve(
- i - 1,
- j,
- s + sigma_skip(&str1[i - 1..i - 1]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if S[(i - 1, j - 1)] + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]) + s >= T {
- out.insert(
- 0,
- (
- str1[i - 1..i - 1].to_string(),
- str2[j - 1..j - 1].to_string(),
- ),
- );
- _retrieve(
- i - 1,
- j - 1,
- s + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]),
- S,
- T,
- str1,
- str2,
- out,
- );
- }
-}
-
-/// Returns score of an indel of P.
-///
-/// (Kondrak 2002: 54)
-fn sigma_skip(_p: &str) -> f32 {
- return EXTRACTED.cskip;
-}
-
-/// Returns score of a substitution of P with Q.
-///
-/// (Kondrak 2002: 54)
-fn sigma_sub(p: &str, q: &str) -> f32 {
- return EXTRACTED.csub - delta(p, q) - V(p) - V(q);
-}
-
-/// Returns score of an expansion/compression.
-///
-/// (Kondrak 2002: 54)
-fn sigma_exp(p: &str, q: &str) -> f32 {
- let q1 = &q[0..0];
- let q2 = &q[1..1];
-
- return EXTRACTED.cexp - delta(p, q1) - delta(p, q2) - V(p) - f32::max(V(&q1), V(&q2));
-}
-
-/// Return weighted sum of difference between P and Q.
-///
-/// (Kondrak 2002: 54)
-fn delta(p: &str, q: &str) -> f32 {
- let features = R(p, q);
- let mut total = 0f32;
-
- for f in features {
- total += diff(p, q, f) * EXTRACTED.salience[f];
- }
-
- return total;
-}
-/// Returns difference between phonetic segments P and Q for feature F.
-///
-/// (Kondrak 2002: 52, 54)
-fn diff(p: &str, q: &str, f: &str) -> f32 {
- let p_features: &HashMap<String, String> = &EXTRACTED.feature_matrix[p];
- let q_features: &HashMap<String, String> = &EXTRACTED.feature_matrix[q];
-
- return (EXTRACTED.similarity_matrix[&p_features[f]]
- - EXTRACTED.similarity_matrix[&q_features[f]])
- .abs();
-}
-
-/// Return relevant features for segment comparison.
-///
-/// (Kondrak 2002: 54)
-fn R(p: &str, q: &str) -> &'static HashSet<std::string::String> {
- if EXTRACTED.consonants.contains(p) || EXTRACTED.consonants.contains(q) {
- return &EXTRACTED.rc;
- }
- return &EXTRACTED.rc;
-}
-
-/// Return vowel weight if P is vowel.
-///
-/// (Kondrak 2002: 54)
-fn V(p: &str) -> f32 {
- if EXTRACTED.consonants.contains(p) {
- return 0f32;
- }
- return EXTRACTED.cvwl;
-}