summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Pignolet <matthieu@matthieu-dev.xyz>2025-01-21 14:17:26 +0400
committerMatthieu Pignolet <matthieu@matthieu-dev.xyz>2025-01-21 14:17:26 +0400
commit3d88aa6b6244c73720e5067ffeb0e77d6edc8f8b (patch)
tree68ca1e0bed6de6e46dfe11c03608dbd3f2c48e08
parentc4c27a43bd32a3dcd9fe6c984b2b6b6521c5def9 (diff)
remove the fuzzy logic to replace with a proper logic based on syllabes
-rw-r--r--autofeur_db/Cargo.lock16
-rw-r--r--autofeur_db/Cargo.toml2
-rw-r--r--autofeur_db/src/inference.rs34
3 files changed, 26 insertions, 26 deletions
diff --git a/autofeur_db/Cargo.lock b/autofeur_db/Cargo.lock
index 1a40894..d2cfdd1 100644
--- a/autofeur_db/Cargo.lock
+++ b/autofeur_db/Cargo.lock
@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
name = "adler"
@@ -57,9 +57,9 @@ dependencies = [
"bincode",
"csv",
"hyper",
+ "hypher",
"itertools",
"kdam",
- "levenshtein",
"querystring",
"rand",
"reqwest",
@@ -543,6 +543,12 @@ dependencies = [
]
[[package]]
+name = "hypher"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b24ad5637230df201ab1034d593f1d09bf7f2a9274f2e8897638078579f4265"
+
+[[package]]
name = "idna"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -645,12 +651,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
-name = "levenshtein"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"
-
-[[package]]
name = "libc"
version = "0.2.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/autofeur_db/Cargo.toml b/autofeur_db/Cargo.toml
index c305fb3..ac0fa13 100644
--- a/autofeur_db/Cargo.toml
+++ b/autofeur_db/Cargo.toml
@@ -23,7 +23,7 @@ kdam = { version = "0.3", features = ["gradient", "template"] }
anyhow = "1.0.68"
itertools = "0.10.5"
querystring = "1.1.0"
-levenshtein = "1.0.5"
+hypher = { version = "0.1", features = ["english", "french"] }
[[bin]]
name = "generate"
diff --git a/autofeur_db/src/inference.rs b/autofeur_db/src/inference.rs
index b8f2f87..49192f3 100644
--- a/autofeur_db/src/inference.rs
+++ b/autofeur_db/src/inference.rs
@@ -1,9 +1,8 @@
-use std::{collections::VecDeque, env, ops::Add};
+use std::{env, ops::Add};
use anyhow::anyhow;
+use hypher::hyphenate;
use itertools::Itertools;
-use levenshtein::levenshtein;
-use unicode_segmentation::UnicodeSegmentation;
use crate::save::Save;
@@ -38,26 +37,27 @@ impl Save<'_> {
println!("Matching {} by adding {}", word, completion);
- // we finally just need to compute the end of the word which matches the sound
- let mut found = None;
+ let mut completed_syllabes: Vec<&str> = hyphenate(word, hypher::Lang::French).into_iter().collect_vec();
+ let source_word_syllabes: Vec<&str> = hyphenate(prefix, hypher::Lang::French).into_iter().collect_vec();
- let mut characters: VecDeque<&str> = word.graphemes(true).collect();
- while let Some(_) = characters.pop_front() {
- let sub: String = characters.iter().join("");
- let inference = call_inference_service(&sub).await?;
+ // input: test
+ // output found: testames
+ // out syl: tes - tames
+ // output expect: tames
+ // we just need to remove the prefix
- if levenshtein(&inference, &completion) < 5 {
- found = Some(sub);
- break;
+
+ let mut i = 0;
+ for (index, syl) in source_word_syllabes.iter().enumerate() {
+ if *source_word_syllabes[index] == **syl {
+ i = index
} else {
- if found.is_none() {
- found = Some(sub);
- }
- println!("did not match a={}, b={}", inference, completion)
+ break;
}
}
- let found = found.ok_or_else(|| anyhow!("no prefix could be matched"))?;
+ completed_syllabes.drain(0..i); // we finally just need to compute the end of the word which matches the sound
+ let found = completed_syllabes.join("");
println!("{} is equivalent to {}", completion, found);
Ok(format!("{} ({})", found, word))