summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Pignolet <matthieu@puffer.fish>2025-05-19 08:00:56 +0400
committerMatthieu Pignolet <matthieu@puffer.fish>2025-05-19 08:00:56 +0400
commit7ba6c10f983353b9ada243a4d7b8b3e88255e240 (patch)
tree4cf62c59e77d53adfbdb5fd5df90c7cb467478ae
parent86c882e0d834082d65cc6a87113817b65439f0e1 (diff)
feat: tidy up project and export more functions
-rw-r--r--src/lib.rs25
1 files changed, 7 insertions, 18 deletions
diff --git a/src/lib.rs b/src/lib.rs
index e79e040..a917e4d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,3 @@
-// ALINE phonetic sequence alignment in Rust
-// Port of NLTK's ALINE module (Greg Kondrak, 2002)
-
/// ALINE
/// https://webdocs.cs.ualberta.ca/~kondrak/
/// Copyright 2002 by Grzegorz Kondrak.
@@ -129,8 +126,6 @@ fn retrieve<'a>(
}
if j > 1 && (s[i - 1][j - 2] + sigma_exp(str1[i - 1], str2[j - 2], str2[j - 1]) + score) >= t {
- // j > 1 and S[i - 1, j - 2] + sigma_exp(str1[i - 1], str2[j - 2 : j]) + s >= T
-
let key = str2[j - 2..j].join("");
out.insert(0, (str1[i - 1].to_string(), key));
@@ -147,7 +142,6 @@ fn retrieve<'a>(
} else if i > 1
&& (s[i - 2][j - 1] + sigma_exp(str2[j - 1], str1[i - 2], str1[i - 1]) + score) >= t
{
- // i > 1 and S[i - 2, j - 1] + sigma_exp(str2[j - 1], str1[i - 2 : i]) + s >= T
let key = str1[i - 2..i].join("");
out.insert(0, (key, str2[j - 1].to_string()));
@@ -162,17 +156,12 @@ fn retrieve<'a>(
out,
);
} else if (s[i][j - 1] + sigma_skip() + score) >= t {
- // S[i, j - 1] + sigma_skip(str2[j - 1]) + s >= T
-
out.insert(0, ("-".to_string(), str2[j - 1].to_string()));
retrieve(i, j - 1, score + sigma_skip(), s, t, str1, str2, out);
} else if (s[i - 1][j] + sigma_skip() + score) >= t {
- // S[i - 1, j] + sigma_skip(str1[i - 1]) + s >= T
-
out.insert(0, (str1[i - 1].to_string(), "-".to_string()));
retrieve(i - 1, j, score + sigma_skip(), s, t, str1, str2, out);
} else if (s[i - 1][j - 1] + sigma_sub(str1[i - 1], str2[j - 1]) + score) >= t {
- // S[i - 1, j - 1] + sigma_sub(str1[i - 1], str2[j - 1]) + s >= T
out.insert(0, (str1[i - 1].to_string(), str2[j - 1].to_string()));
retrieve(
@@ -194,7 +183,7 @@ fn retrieve<'a>(
///
/// (Kondrak 2002: 54)
#[inline]
-fn sigma_skip() -> f64 {
+pub fn sigma_skip() -> f64 {
EXTRACTED.cskip
}
@@ -202,7 +191,7 @@ fn sigma_skip() -> f64 {
///
/// (Kondrak 2002: 54)
#[inline]
-fn sigma_sub(p: &str, q: &str) -> f64 {
+pub fn sigma_sub(p: &str, q: &str) -> f64 {
EXTRACTED.csub - delta(p, q) - v(p) - v(q)
}
@@ -210,7 +199,7 @@ fn sigma_sub(p: &str, q: &str) -> f64 {
///
/// (Kondrak 2002: 54)
#[inline]
-fn sigma_exp(p: &str, q1: &str, q2: &str) -> f64 {
+pub fn sigma_exp(p: &str, q1: &str, q2: &str) -> f64 {
EXTRACTED.cexp - delta(p, q1) - delta(p, q2) - v(p) - f64::max(v(q1), v(q2))
}
@@ -218,7 +207,7 @@ fn sigma_exp(p: &str, q1: &str, q2: &str) -> f64 {
///
/// (Kondrak 2002: 54)
#[inline]
-fn delta(p: &str, q: &str) -> f64 {
+pub fn delta(p: &str, q: &str) -> f64 {
let features = r(p, q);
features
.iter()
@@ -230,7 +219,7 @@ fn delta(p: &str, q: &str) -> f64 {
///
/// (Kondrak 2002: 52, 54)
#[inline]
-fn diff(p: &str, q: &str, f: &str) -> f64 {
+pub fn diff(p: &str, q: &str, f: &str) -> f64 {
let p_features = &EXTRACTED.feature_matrix[&p.to_string()][f];
let q_features = &EXTRACTED.feature_matrix[&q.to_string()][f];
let p_similarity = *EXTRACTED
@@ -248,7 +237,7 @@ fn diff(p: &str, q: &str, f: &str) -> f64 {
///
/// (Kondrak 2002: 54)
#[inline]
-fn r<'a>(p: &str, q: &str) -> &'static HashSet<String> {
+pub fn r<'a>(p: &str, q: &str) -> &'static HashSet<String> {
if EXTRACTED.consonants.contains(&p.to_string())
|| EXTRACTED.consonants.contains(&q.to_string())
{
@@ -262,7 +251,7 @@ fn r<'a>(p: &str, q: &str) -> &'static HashSet<String> {
///
/// (Kondrak 2002: 54)
#[inline]
-fn v(p: &str) -> f64 {
+pub fn v(p: &str) -> f64 {
if !EXTRACTED.consonants.contains(&p.to_string()) {
EXTRACTED.cvwl
} else {