summaryrefslogtreecommitdiff
path: root/bin/index/src/main.rs
blob: 79780c971c76757c08c1f6307bf91b4cdeedef79 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
use std::io::{stdin, stdout};

use db::{
    save::Save,
    types::{GraphemeString, PhonemeString},
};

fn main() {
    let mut save = Save::default();

    // Read from the
    let mut vocabulary = csv::ReaderBuilder::new()
        .has_headers(false)
        .double_quote(false)
        .escape(Some(b'\\'))
        .flexible(true)
        .comment(Some(b'#'))
        .from_reader(stdin());

    let mut phonems = vec![];

    // Reduce all the records into the save index
    // this is used to get all the phonemes represented in the csv
    for record in vocabulary.records() {
        let record = record.unwrap();
        let word = GraphemeString(record.get(0).unwrap().to_string());
        let mut pron: Vec<PhonemeString> = record
            .get(1)
            .unwrap()
            .split(',')
            .map(|a| PhonemeString(a.to_string().trim().to_string()))
            .collect();
        for a in &pron {
            save.reverse_index.insert(a.clone(), word.clone());
        }
        phonems.append(&mut pron);
    }

    for phoneme in phonems.iter() {
        save.trie.insert(&phoneme);
    }

    let mut stdout = stdout();
    bincode::encode_into_std_write(
        bincode::serde::Compat(&save),
        &mut stdout,
        bincode::config::standard(),
    )
    .unwrap();
}