diff options
Diffstat (limited to 'autofeur_db')
| -rw-r--r-- | autofeur_db/Cargo.lock | 16 | ||||
| -rw-r--r-- | autofeur_db/Cargo.toml | 2 | ||||
| -rw-r--r-- | autofeur_db/src/inference.rs | 34 | 
3 files changed, 26 insertions, 26 deletions
diff --git a/autofeur_db/Cargo.lock b/autofeur_db/Cargo.lock index 1a40894..d2cfdd1 100644 --- a/autofeur_db/Cargo.lock +++ b/autofeur_db/Cargo.lock @@ -1,6 +1,6 @@  # This file is automatically @generated by Cargo.  # It is not intended for manual editing. -version = 3 +version = 4  [[package]]  name = "adler" @@ -57,9 +57,9 @@ dependencies = [   "bincode",   "csv",   "hyper", + "hypher",   "itertools",   "kdam", - "levenshtein",   "querystring",   "rand",   "reqwest", @@ -543,6 +543,12 @@ dependencies = [  ]  [[package]] +name = "hypher" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b24ad5637230df201ab1034d593f1d09bf7f2a9274f2e8897638078579f4265" + +[[package]]  name = "idna"  version = "0.3.0"  source = "registry+https://github.com/rust-lang/crates.io-index" @@ -645,12 +651,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"  checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"  [[package]] -name = "levenshtein" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" - -[[package]]  name = "libc"  version = "0.2.139"  source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/autofeur_db/Cargo.toml b/autofeur_db/Cargo.toml index c305fb3..ac0fa13 100644 --- a/autofeur_db/Cargo.toml +++ b/autofeur_db/Cargo.toml @@ -23,7 +23,7 @@ kdam = { version = "0.3", features = ["gradient", "template"] }  anyhow = "1.0.68"  itertools = "0.10.5"  querystring = "1.1.0" -levenshtein = "1.0.5" +hypher = { version = "0.1", features = ["english", "french"] }  [[bin]]  name = "generate" diff --git a/autofeur_db/src/inference.rs b/autofeur_db/src/inference.rs index b8f2f87..49192f3 100644 --- a/autofeur_db/src/inference.rs +++ b/autofeur_db/src/inference.rs @@ -1,9 +1,8 @@ -use std::{collections::VecDeque, env, ops::Add}; +use std::{env, ops::Add};  use anyhow::anyhow; +use hypher::hyphenate;  use itertools::Itertools; -use levenshtein::levenshtein; -use unicode_segmentation::UnicodeSegmentation;  use crate::save::Save; @@ -38,26 +37,27 @@ impl Save<'_> {          println!("Matching {} by adding {}", word, completion); -        // we finally just need to compute the end of the word which matches the sound -        let mut found = None; +        let mut completed_syllabes: Vec<&str> = hyphenate(word, hypher::Lang::French).into_iter().collect_vec(); +        let source_word_syllabes: Vec<&str> = hyphenate(prefix, hypher::Lang::French).into_iter().collect_vec(); -        let mut characters: VecDeque<&str> = word.graphemes(true).collect(); -        while let Some(_) = characters.pop_front() { -            let sub: String = characters.iter().join(""); -            let inference = call_inference_service(&sub).await?; +        // input:           test +        // output found:    testames +        // out syl:         tes - tames +        // output expect:   tames +        // we just need to remove the prefix -            if levenshtein(&inference, &completion) < 5 { -                found = Some(sub); -                break; + +        let mut i = 0; +        for (index, syl) in source_word_syllabes.iter().enumerate() { +            if *source_word_syllabes[index] == **syl { +                i = index              } else { -                if found.is_none() { -                    found = Some(sub); -                } -                println!("did not match a={}, b={}", inference, completion) +                break;              }          } -        let found = found.ok_or_else(|| anyhow!("no prefix could be matched"))?; +        completed_syllabes.drain(0..i);        // we finally just need to compute the end of the word which matches the sound +        let found = completed_syllabes.join("");          println!("{} is equivalent to {}", completion, found);          Ok(format!("{} ({})", found, word))  | 
