summaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs236
1 files changed, 0 insertions, 236 deletions
diff --git a/src/lib.rs b/src/lib.rs
deleted file mode 100644
index de0db50..0000000
--- a/src/lib.rs
+++ /dev/null
@@ -1,236 +0,0 @@
-use array2d::Array2D;
-use core::f32;
-use std::collections::{HashMap, HashSet};
-mod constants;
-
-use constants::EXTRACTED;
-
-/// Compute the alignment of two phonetic strings.
-///
-/// (Kondrak 2002: 51)
-pub fn align(str1: String, str2: String, epsilon: Option<f32>) -> Vec<Vec<(String, String)>> {
- let epsilon = epsilon
- .or(Some(0f32))
- .expect("the default value of epsilon should be 0");
-
- assert!(
- 0f32 <= epsilon && epsilon <= 1f32,
- "Epsilon must be between 0.0 and 1.0."
- );
-
- let m = str1.len();
- let n = str2.len();
-
- let mut S = Array2D::filled_with(0f32, m + 1, n + 1);
-
- for i in 1..m + 1 {
- for j in 1..n + 1 {
- let mut edit: [f32; 5] = [
- S[(i - 1, j)] + sigma_skip(&str1[i - 1..i - 1]),
- S[(i, j - 1)] + sigma_skip(&str2[j - 1..j - 1]),
- S[(i - 1, j - 1)] + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]),
- f32::MIN,
- f32::MIN,
- ];
-
- if i > 1 {
- edit[3] = S[(i - 2, j - 1)] + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]);
- }
-
- if j > 1 {
- edit[4] = S[(i - 1, j - 2)] + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]);
- }
-
- S[(i, j)] = edit
- .into_iter()
- .max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("a S row was empty"))
- .expect("the S matrix is empty");
- }
- }
-
- // T = (1 - epsilon) * np.amax(S)
- let T = (1f32 - epsilon)
- * S.rows_iter()
- .map(|r| {
- r.max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("a S row is empty"))
- .expect("a S row is empty")
- })
- .max_by(|x, y| x.abs().partial_cmp(&y.abs()).expect("S is empty"))
- .expect("S is empty");
-
- let mut alignments: Vec<Vec<(String, String)>> = vec![];
- for i in 1..m + 1 {
- for j in 1..n + 1 {
- if S[(i, j)] >= T {
- let mut al = vec![];
- _retrieve(i, j, 0f32, &S, T, &str1, &str2, &mut al);
- alignments.push(al);
- }
- }
- }
-
- alignments
-}
-
-/// Retrieve the path through the similarity matrix S starting at (i, j).
-fn _retrieve(
- i: usize,
- j: usize,
- s: f32,
- S: &Array2D<f32>,
- T: f32,
- str1: &str,
- str2: &str,
- out: &mut Vec<(String, String)>,
-) {
- if S[(i, j)] == 0f32 {
- return;
- }
-
- if j > 1 && S[(i - 1, j - 2)] + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]) + s >= T {
- out.insert(
- 0,
- (str1[i - 1..i - 1].to_string(), str2[j - 2..j].to_string()),
- );
-
- _retrieve(
- i - 1,
- j - 2,
- s + sigma_exp(&str1[i - 1..i - 1], &str2[j - 2..j]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if i > 1 && S[(i - 2, j - 1)] + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]) + s >= T
- {
- out.insert(
- 0,
- (str1[i - 2..i].to_string(), str2[j - 1..j - 1].to_string()),
- );
-
- _retrieve(
- i - 2,
- j - 1,
- s + sigma_exp(&str2[j - 1..j - 1], &str1[i - 2..i]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if S[(i, j - 1)] + sigma_skip(&str2[j - 1..j - 1]) + s >= T {
- out.insert(0, ("-".to_string(), str2[j - 1..j - 1].to_string()));
-
- _retrieve(
- i,
- j - 1,
- s + sigma_skip(&str2[j - 1..j - 1]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if S[(i - 1, j)] + sigma_skip(&str1[i - 1..i - 1]) + s >= T {
- out.insert(0, (str1[i - 1..i - 1].to_string(), "-".to_string()));
- _retrieve(
- i - 1,
- j,
- s + sigma_skip(&str1[i - 1..i - 1]),
- S,
- T,
- str1,
- str2,
- out,
- );
- } else if S[(i - 1, j - 1)] + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]) + s >= T {
- out.insert(
- 0,
- (
- str1[i - 1..i - 1].to_string(),
- str2[j - 1..j - 1].to_string(),
- ),
- );
- _retrieve(
- i - 1,
- j - 1,
- s + sigma_sub(&str1[i - 1..i - 1], &str2[j - 1..j - 1]),
- S,
- T,
- str1,
- str2,
- out,
- );
- }
-}
-
-/// Returns score of an indel of P.
-///
-/// (Kondrak 2002: 54)
-fn sigma_skip(_p: &str) -> f32 {
- return EXTRACTED.cskip;
-}
-
-/// Returns score of a substitution of P with Q.
-///
-/// (Kondrak 2002: 54)
-fn sigma_sub(p: &str, q: &str) -> f32 {
- return EXTRACTED.csub - delta(p, q) - V(p) - V(q);
-}
-
-/// Returns score of an expansion/compression.
-///
-/// (Kondrak 2002: 54)
-fn sigma_exp(p: &str, q: &str) -> f32 {
- let q1 = &q[0..0];
- let q2 = &q[1..1];
-
- return EXTRACTED.cexp - delta(p, q1) - delta(p, q2) - V(p) - f32::max(V(&q1), V(&q2));
-}
-
-/// Return weighted sum of difference between P and Q.
-///
-/// (Kondrak 2002: 54)
-fn delta(p: &str, q: &str) -> f32 {
- let features = R(p, q);
- let mut total = 0f32;
-
- for f in features {
- total += diff(p, q, f) * EXTRACTED.salience[f];
- }
-
- return total;
-}
-/// Returns difference between phonetic segments P and Q for feature F.
-///
-/// (Kondrak 2002: 52, 54)
-fn diff(p: &str, q: &str, f: &str) -> f32 {
- let p_features: &HashMap<String, String> = &EXTRACTED.feature_matrix[p];
- let q_features: &HashMap<String, String> = &EXTRACTED.feature_matrix[q];
-
- return (EXTRACTED.similarity_matrix[&p_features[f]]
- - EXTRACTED.similarity_matrix[&q_features[f]])
- .abs();
-}
-
-/// Return relevant features for segment comparison.
-///
-/// (Kondrak 2002: 54)
-fn R(p: &str, q: &str) -> &'static HashSet<std::string::String> {
- if EXTRACTED.consonants.contains(p) || EXTRACTED.consonants.contains(q) {
- return &EXTRACTED.rc;
- }
- return &EXTRACTED.rc;
-}
-
-/// Return vowel weight if P is vowel.
-///
-/// (Kondrak 2002: 54)
-fn V(p: &str) -> f32 {
- if EXTRACTED.consonants.contains(p) {
- return 0f32;
- }
- return EXTRACTED.cvwl;
-}