blob: 79780c971c76757c08c1f6307bf91b4cdeedef79 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
use std::io::{stdin, stdout};
use db::{
save::Save,
types::{GraphemeString, PhonemeString},
};
fn main() {
let mut save = Save::default();
// Read from the
let mut vocabulary = csv::ReaderBuilder::new()
.has_headers(false)
.double_quote(false)
.escape(Some(b'\\'))
.flexible(true)
.comment(Some(b'#'))
.from_reader(stdin());
let mut phonems = vec![];
// Reduce all the records into the save index
// this is used to get all the phonemes represented in the csv
for record in vocabulary.records() {
let record = record.unwrap();
let word = GraphemeString(record.get(0).unwrap().to_string());
let mut pron: Vec<PhonemeString> = record
.get(1)
.unwrap()
.split(',')
.map(|a| PhonemeString(a.to_string().trim().to_string()))
.collect();
for a in &pron {
save.reverse_index.insert(a.clone(), word.clone());
}
phonems.append(&mut pron);
}
for phoneme in phonems.iter() {
save.trie.insert(&phoneme);
}
let mut stdout = stdout();
bincode::encode_into_std_write(
bincode::serde::Compat(&save),
&mut stdout,
bincode::config::standard(),
)
.unwrap();
}
|