summaryrefslogtreecommitdiff
path: root/autofeur_db/src/bin/generate.rs
blob: f4996c6b0d44c3b3b2e283418d39e3195399173d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
use std::fs;

use autofeur::save::Save;
use kdam::tqdm;

#[tokio::main]
async fn main() {
    let mut save = Save::default();

    // Read from the
    let mut vocabulary = csv::Reader::from_path("./assets/dictionary.csv").unwrap();
    let mut phonems = vec![];

    // Reduce all the records into the save index
    // this is used to get all the phonemes represented in the csv
    for record in tqdm!(
        vocabulary.records(),
        total = 245178,
        colour = "gradient(#5A56E0,#EE6FF8)"
    ) {
        let record = record.unwrap();
        let word = record.get(0).unwrap().to_string();
        let mut pron: Vec<String> = record
            .get(1)
            .unwrap()
            .split(',')
            .map(|a| {
                a.to_string()
                    .trim()
                    .replace("/", "")
                    .replace("ʼ", "")
                    .replace("ː", "")
                    .replace(" ", "")
                    .replace(".", "")
            })
            .collect();
        for a in &pron {
            save.reverse_index.insert(a.clone(), word.clone());
        }
        phonems.append(&mut pron);
    }

    for phoneme in tqdm!(phonems.iter()) {
        save.trie.insert(&phoneme);
    }

    fs::write("assets/db.bin", bincode::serialize(&save).unwrap()).unwrap();

    println!("Generated to assets/db.bin");
}