]> git.puffer.fish Git - matthieu/gru.git/commitdiff
autofeur v3 - custom db
authorMatthieuCoder <matthieu@matthieu-dev.xyz>
Fri, 20 Jan 2023 08:59:08 +0000 (12:59 +0400)
committerMatthieuCoder <matthieu@matthieu-dev.xyz>
Fri, 20 Jan 2023 08:59:08 +0000 (12:59 +0400)
29 files changed:
README.md
autofeur_db/.dockerignore [new file with mode: 0644]
autofeur_db/.gitignore [new file with mode: 0644]
autofeur_db/Cargo.lock [new file with mode: 0644]
autofeur_db/Cargo.toml [new file with mode: 0644]
autofeur_db/Dockerfile [new file with mode: 0644]
autofeur_db/src/bin/generate.rs [new file with mode: 0644]
autofeur_db/src/bin/server.rs [new file with mode: 0644]
autofeur_db/src/french_ipa.rs [new file with mode: 0644]
autofeur_db/src/inference.rs [new file with mode: 0644]
autofeur_db/src/lib.rs [new file with mode: 0644]
autofeur_db/src/save.rs [new file with mode: 0644]
autofeur_db/src/trie.rs [new file with mode: 0644]
autofeur_nova/.dockerignore [new file with mode: 0644]
autofeur_nova/.gitignore
autofeur_nova/Dockerfile
autofeur_nova/config/default.example.yml [new file with mode: 0644]
autofeur_nova/data.json [deleted file]
autofeur_nova/package.json
autofeur_nova/src/algo.mts [deleted file]
autofeur_nova/src/index.mts
autofeur_nova/src/phonemizelib.mts [deleted file]
autofeur_nova/src/preprocess.mts [deleted file]
autofeur_nova/yarn.lock
deep_phonemizer/.dockerignore
deep_phonemizer/.gitignore
deep_phonemizer/Dockerfile
deep_phonemizer/app.py
docker-compose.yaml

index a097517c587bac5983662d0a37746eefad2786a8..fff0c8832a0bc742351b183f11e160fa5562a482 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,3 +1,74 @@
 # Autofeur
 
-Bot that reponds with end of words
\ No newline at end of file
+Bot that reponds with end of words
+
+## Structure
+
+Autofeur is composed of a few components that make up this bot
+
+|Name|Description|
+|-|-|
+|autofeur_nova|This is the component for handling discord events, it uses the [nova framework](https://github.com/discordnova/nova) under the hood and is developped with TypeScript|
+|deep_phonemizer|This is the component that transforms a grapheme into a phoneme using [DeepPhonemizer](https://github.com/as-ideas/DeepPhonemizer)|
+|autofeur_db|This is the component used for completing te end of the words, its a DB specialized into completing this specific task|
+
+## Running Autofeur
+
+### Getting trained models
+
+You'll need two files to get running with Autofeur, a trained `DeepPhonemizer` model and a IPA Dictionary file.
+
+You can get the `DeepPhonemizer` model on the project [github page](https://github.com/as-ideas/DeepPhonemizer#pretrained-models) or follow the instructions there to create your own datasets.
+
+You can get the IPA Dictionary on this [github page](https://github.com/open-dict-data/ipa-dict) or use your own, it's simply a CSV file with two columns, one for the word and another for the phonemized word.
+
+### Starting `deep_phonemizer`
+
+To run it inside docker, we recommand
+`docker-compose up deep-phonemizer`
+If you want to use bare metal, follow the following commands
+You'll need to move your trained model into the `deep_phonemizer/assets/model.pt` file.
+
+```sh
+# Go into the folder
+cd deep_phonemizer
+# Create a Virtual environment with dependencies
+python3 -m venv ./venv
+source ./venv/bin/activate
+pip install -r requirements.txt
+
+# Run the flash application
+flask run
+```
+
+### Starting `autofeur_db`
+
+#### Generating the database
+The autofeur DB needs to be pre-computed in order to deliver excellent performance.
+First of all, you to have your dictionary file in the `autofeur_db/assets/dictionary.csv` file.
+Only then you can start generating the DB.
+```sh
+cd autofeur_db
+cargo run --release --bin generate
+```
+
+This will output a `autofeur_db/assets/db.bin` which will be used for the db to complete words.
+
+### Starting the service
+To start `autofeur_db` you can simply use the docker-container `docker-compose up autofeur_db`
+or use the bare-metal command
+```sh
+cd autofeur_db
+cargo run --release --bin server
+```
+
+### Starting the nova components
+You'll need nova to use this bot, however setup is quite easy and only requires a configuration file
+you can find on the [GitHub's project](https://github.com/discordnova/nova) or use this project example config file located in `autofeur_nova/config/default.example.yml`.
+Your config file will need to be named `autofeur_nova/config/defauly.yml`.
+
+To start nova, you can either use the `all-in-one` binary or the full blown docker compose services
+to get started using the all in one binary, simply execute `yarn nova` in the `autofeur_nova/` folder. Or you can simply execute `docker-compose up nats redis cache gateway0 rest ratelimiter webhook` to start all nova components.
+
+### Starting `autofeur_nova`
+This component requires basically no configuration as it is configured in docker using environment variables and defaults work using localhost, you cant refer to the component readme to get the configuration environment variables available. `yarn start` or `docker-compose up autofeur_nova`
diff --git a/autofeur_db/.dockerignore b/autofeur_db/.dockerignore
new file mode 100644 (file)
index 0000000..8797006
--- /dev/null
@@ -0,0 +1,2 @@
+target/
+assets/
diff --git a/autofeur_db/.gitignore b/autofeur_db/.gitignore
new file mode 100644 (file)
index 0000000..7e0b272
--- /dev/null
@@ -0,0 +1,2 @@
+target/
+assets/
\ No newline at end of file
diff --git a/autofeur_db/Cargo.lock b/autofeur_db/Cargo.lock
new file mode 100644 (file)
index 0000000..1a40894
--- /dev/null
@@ -0,0 +1,1692 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.68"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61"
+
+[[package]]
+name = "async-compression"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
+dependencies = [
+ "brotli",
+ "flate2",
+ "futures-core",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "autofeur"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bincode",
+ "csv",
+ "hyper",
+ "itertools",
+ "kdam",
+ "levenshtein",
+ "querystring",
+ "rand",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tower",
+ "tower-http",
+ "trie-rs",
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "base64"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "brotli"
+version = "3.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "2.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "bstr"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
+dependencies = [
+ "lazy_static",
+ "memchr",
+ "regex-automata",
+ "serde",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
+
+[[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
+name = "bytes"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
+
+[[package]]
+name = "cc"
+version = "1.0.78"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "colorgrad"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a5f405d474b9d05e0a093d3120e77e9bf26461b57a84b40aa2a221ac5617fb6"
+dependencies = [
+ "csscolorparser",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
+
+[[package]]
+name = "crc32fast"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+ "memoffset",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "csscolorparser"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb2a7d3066da2de787b7f032c736763eb7ae5d355f81a68bab2675a96008b0bf"
+dependencies = [
+ "phf",
+]
+
+[[package]]
+name = "csv"
+version = "1.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
+dependencies = [
+ "bstr",
+ "csv-core",
+ "itoa 0.4.8",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "either"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "errno"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
+dependencies = [
+ "errno-dragonfly",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "errno-dragonfly"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "fastrand"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
+dependencies = [
+ "instant",
+]
+
+[[package]]
+name = "fid-rs"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28658c0c3420305705adde833a0d2d614207507d013a5f25707553fb2ae2cd"
+dependencies = [
+ "rayon",
+]
+
+[[package]]
+name = "flate2"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "formatx"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e94c620058a2b0d38357c78968094788d66016c4cc24bb0efd2e246d9781391c"
+
+[[package]]
+name = "futures-channel"
+version = "0.3.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac"
+
+[[package]]
+name = "futures-sink"
+version = "0.3.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9"
+
+[[package]]
+name = "futures-task"
+version = "0.3.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea"
+
+[[package]]
+name = "futures-util"
+version = "0.3.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "h2"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4"
+dependencies = [
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hdrhistogram"
+version = "7.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f19b9f54f7c7f55e31401bb647626ce0cf0f67b0004982ce815b3ee72a02aa8"
+dependencies = [
+ "byteorder",
+ "num-traits",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "http"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa 1.0.5",
+]
+
+[[package]]
+name = "http-body"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
+dependencies = [
+ "bytes",
+ "http",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "http-range-header"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29"
+
+[[package]]
+name = "httparse"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
+
+[[package]]
+name = "httpdate"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
+
+[[package]]
+name = "hyper"
+version = "0.14.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa 1.0.5",
+ "pin-project-lite",
+ "socket2",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "want",
+]
+
+[[package]]
+name = "hyper-tls"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
+dependencies = [
+ "bytes",
+ "hyper",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+]
+
+[[package]]
+name = "idna"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "io-lifetimes"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146"
+
+[[package]]
+name = "iri-string"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f0f7638c1e223529f1bfdc48c8b133b9e0b434094d1d28473161ee48b235f78"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
+
+[[package]]
+name = "itoa"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
+
+[[package]]
+name = "js-sys"
+version = "0.3.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "kdam"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "333c30a42347fd0f02d948465848f50116854855c9fc7e77e98f3dd31fc73190"
+dependencies = [
+ "colorgrad",
+ "formatx",
+ "terminal_size",
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "levenshtein"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"
+
+[[package]]
+name = "libc"
+version = "0.2.139"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
+
+[[package]]
+name = "lock_api"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "louds-rs"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e16a91fb20f74b6d9a758a0103a2884af525a2fa34fbfe19f4b3c5482a4a54e9"
+dependencies = [
+ "fid-rs",
+]
+
+[[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
+[[package]]
+name = "memoffset"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "mime"
+version = "0.3.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
+
+[[package]]
+name = "mime_guess"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
+dependencies = [
+ "adler",
+]
+
+[[package]]
+name = "mio"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de"
+dependencies = [
+ "libc",
+ "log",
+ "wasi",
+ "windows-sys",
+]
+
+[[package]]
+name = "native-tls"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
+dependencies = [
+ "lazy_static",
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
+
+[[package]]
+name = "openssl"
+version = "0.10.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b102428fd03bc5edf97f62620f7298614c45cedf287c271e7ed450bbaf83f2e1"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23bbbf7854cd45b83958ebe919f0e8e516793727652e27fda10a8384cfc790b7"
+dependencies = [
+ "autocfg",
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba1ef8814b5c993410bb3adfad7a5ed269563e4a2f90c41f5d85be7fb47133bf"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-sys",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
+
+[[package]]
+name = "phf"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
+dependencies = [
+ "phf_shared",
+ "rand",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "pin-project"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
+dependencies = [
+ "pin-project-internal",
+]
+
+[[package]]
+name = "pin-project-internal"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "querystring"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9318ead08c799aad12a55a3e78b82e0b6167271ffd1f627b758891282f739187"
+
+[[package]]
+name = "quote"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rayon"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-utils",
+ "num_cpus",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "reqwest"
+version = "0.11.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9"
+dependencies = [
+ "base64 0.21.0",
+ "bytes",
+ "encoding_rs",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "hyper",
+ "hyper-tls",
+ "ipnet",
+ "js-sys",
+ "log",
+ "mime",
+ "native-tls",
+ "once_cell",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "winreg",
+]
+
+[[package]]
+name = "rustix"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03"
+dependencies = [
+ "bitflags",
+ "errno",
+ "io-lifetimes",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
+
+[[package]]
+name = "schannel"
+version = "0.1.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+
+[[package]]
+name = "security-framework"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "645926f31b250a2dca3c232496c2d898d91036e45ca0e97e0e2390c54e11be36"
+dependencies = [
+ "bitflags",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.152"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.152"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
+dependencies = [
+ "itoa 1.0.5",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa 1.0.5",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "siphasher"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
+
+[[package]]
+name = "slab"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
+
+[[package]]
+name = "socket2"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.107"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "libc",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
+]
+
+[[package]]
+name = "terminal_size"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb20089a8ba2b69debd491f8d2d023761cbf196e999218c591fa1e7e15a21907"
+dependencies = [
+ "rustix",
+ "windows-sys",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
+
+[[package]]
+name = "tokio"
+version = "1.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "597a12a59981d9e3c38d216785b0c37399f6e415e8d0712047620f189371b0bb"
+dependencies = [
+ "autocfg",
+ "bytes",
+ "libc",
+ "memchr",
+ "mio",
+ "num_cpus",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "tower"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "hdrhistogram",
+ "indexmap",
+ "pin-project",
+ "pin-project-lite",
+ "rand",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858"
+dependencies = [
+ "async-compression",
+ "base64 0.13.1",
+ "bitflags",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-range-header",
+ "httpdate",
+ "iri-string",
+ "mime",
+ "mime_guess",
+ "percent-encoding",
+ "pin-project-lite",
+ "tokio",
+ "tokio-util",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "uuid",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+
+[[package]]
+name = "tower-service"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+
+[[package]]
+name = "tracing"
+version = "0.1.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
+dependencies = [
+ "cfg-if",
+ "log",
+ "pin-project-lite",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "trie-rs"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5096c019d49566aff57593a06e401c7f588da84e9a575d0ed2ac0913f51928c0"
+dependencies = [
+ "louds-rs",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
+
+[[package]]
+name = "unicase"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0046be40136ef78dc325e0edefccf84ccddacd0afcc1ca54103fa3c61bbdab1d"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a"
+
+[[package]]
+name = "url"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
+[[package]]
+name = "uuid"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "want"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
+dependencies = [
+ "log",
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
+
+[[package]]
+name = "web-sys"
+version = "0.3.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-sys"
+version = "0.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
+
+[[package]]
+name = "winreg"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
+dependencies = [
+ "winapi",
+]
diff --git a/autofeur_db/Cargo.toml b/autofeur_db/Cargo.toml
new file mode 100644 (file)
index 0000000..c305fb3
--- /dev/null
@@ -0,0 +1,32 @@
+[package]
+name = "autofeur"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+trie-rs = "0.1.1"
+csv = "1.1.6"
+unicode-segmentation = "1.10.0"
+rand = "0.8.5"
+serde = { version = "1.0.152", features = ["derive"] }
+serde_json = "1.0.91"
+bincode = "1.3.3"
+
+tower = { version = "0.4", features = ["full"] }
+tower-http = { version = "0.3.5", features = ["full"] }
+hyper = { version = "0.14.23", features = ["server"] }
+tokio = { version = "1.24.2", features = ["full"] }
+reqwest = "0.11.14"
+kdam = { version = "0.3", features = ["gradient", "template"] }
+anyhow = "1.0.68"
+itertools = "0.10.5"
+querystring = "1.1.0"
+levenshtein = "1.0.5"
+
+[[bin]]
+name = "generate"
+
+[[bin]]
+name = "server"
\ No newline at end of file
diff --git a/autofeur_db/Dockerfile b/autofeur_db/Dockerfile
new file mode 100644 (file)
index 0000000..7c1e2f9
--- /dev/null
@@ -0,0 +1,21 @@
+FROM lukemathwalker/cargo-chef:latest-rust-1 AS chef
+WORKDIR /app
+
+FROM chef AS planner
+COPY . .
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder 
+COPY --from=planner /app/recipe.json recipe.json
+# Build dependencies - this is the caching Docker layer!
+RUN cargo chef cook --release --recipe-path recipe.json
+# Build application
+COPY . .
+RUN cargo build --release --bin server
+
+# We do not need the Rust toolchain to run the binary!
+FROM debian:buster-slim AS runtime
+WORKDIR /app
+RUN apt-get update && apt-get install -y ca-certificates libssl-dev
+COPY --from=builder /app/target/release/server /usr/local/bin/server
+ENTRYPOINT ["/usr/local/bin/server"]
diff --git a/autofeur_db/src/bin/generate.rs b/autofeur_db/src/bin/generate.rs
new file mode 100644 (file)
index 0000000..8db8809
--- /dev/null
@@ -0,0 +1,60 @@
+use std::fs;
+
+use autofeur::french_ipa::parse_word;
+use autofeur::save::Save;
+use kdam::tqdm;
+
+#[tokio::main]
+/// Generates the DB file foe easy usage.
+async fn main() {
+    let mut save = Save::default();
+
+    // Read from the
+    let mut vocabulary = csv::Reader::from_path("./assets/dictionary.csv").unwrap();
+    let mut phonems = vec![];
+
+    // Reduce all the records into the save index
+    // this is used to get all the phonemes represented in the csv
+    for record in tqdm!(
+        vocabulary.records(),
+        total = 245178,
+        colour = "gradient(#5A56E0,#EE6FF8)"
+    ) {
+        let record = record.unwrap();
+        let word = record.get(0).unwrap().to_string();
+        let mut pron: Vec<String> = record
+            .get(1)
+            .unwrap()
+            .split(',')
+            .map(|a| {
+                a.to_string()
+                    .trim()
+                    .replace("/", "")
+                    .replace("ʼ", "")
+                    .replace("ː", "")
+                    .replace(" ", "")
+                    .replace(".", "")
+            })
+            .collect();
+        for a in &pron {
+            save.reverse_index.insert(a.clone(), word.clone());
+        }
+        phonems.append(&mut pron);
+    }
+
+    let mut invalid = 0;
+    for phoneme in tqdm!(phonems.iter()) {
+        match parse_word(&phoneme) {
+            Some(a) => save.trie.insert(a),
+            None => {
+                invalid += 1;
+            }
+        }
+    }
+
+    println!("Invalid items count: {}", invalid);
+
+    fs::write("assets/db.bin", bincode::serialize(&save).unwrap()).unwrap();
+
+    println!("Generated to assets/db.bin");
+}
diff --git a/autofeur_db/src/bin/server.rs b/autofeur_db/src/bin/server.rs
new file mode 100644 (file)
index 0000000..376b4e1
--- /dev/null
@@ -0,0 +1,55 @@
+use anyhow::anyhow;
+use autofeur::save::Save;
+use hyper::http::{Request, Response};
+use hyper::{server::Server, Body};
+use std::collections::HashMap;
+use std::{fs, net::SocketAddr, sync::Arc};
+use tower::{make::Shared, ServiceBuilder};
+use tower_http::add_extension::AddExtensionLayer;
+
+fn parse_query(query: &str) -> HashMap<String, String> {
+    query
+        .split('&')
+        .filter_map(|s| {
+            s.split_once('=')
+                .and_then(|t| Some((t.0.to_owned(), t.1.to_owned())))
+        })
+        .collect()
+}
+
+async fn handler(request: Request<Body>) -> Result<Response<Body>, anyhow::Error> {
+    let save: &Arc<Save> = request.extensions().get().unwrap();
+    let query = request
+        .uri()
+        .query()
+        .ok_or_else(|| anyhow!("query does not exists"))?;
+    let data = parse_query(query)
+        .get("grapheme")
+        .ok_or_else(|| anyhow!("grapheme argument is not specified"))?
+        .clone();
+
+    let infered = save
+        .inference(&data)
+        .await
+        .or_else(|_| Err(anyhow!("cannot find data")))?;
+
+    Ok(Response::builder().body(Body::from(infered)).unwrap())
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let checkpoint: Save = bincode::deserialize(&fs::read("assets/db.bin").unwrap()).unwrap();
+    let service = ServiceBuilder::new()
+        .layer(AddExtensionLayer::new(Arc::new(checkpoint)))
+        // Wrap a `Service` in our middleware stack
+        .service_fn(handler);
+
+    // And run our service using `hyper`
+    let addr = SocketAddr::from(([0, 0, 0, 0], 3000));
+    Server::bind(&addr)
+        .http1_only(true)
+        .serve(Shared::new(service))
+        .await
+        .expect("server error");
+    Ok(())
+}
diff --git a/autofeur_db/src/french_ipa.rs b/autofeur_db/src/french_ipa.rs
new file mode 100644 (file)
index 0000000..b758779
--- /dev/null
@@ -0,0 +1,129 @@
+use std::hash::Hash;
+
+use unicode_segmentation::UnicodeSegmentation;
+
+macro_rules! ipa_element_to_number {
+    (@step $_idx:expr, $ident:ident,) => {
+        None
+    };
+
+    (@step $idx:expr, $ident:ident, $head:literal, $($tail:literal,)*) => {
+        if $ident == $head {
+            Some(Self($idx))
+        }
+        else {
+            ipa_element_to_number!(@step $idx + 1usize, $ident, $($tail,)*)
+        }
+    };
+}
+macro_rules! ipa_number_to_ipa {
+    (@step $_idx:expr, $ident:ident,) => {
+        "unreachable!()"
+    };
+
+    (@step $idx:expr, $ident:ident, $head:literal, $($tail:literal,)*) => {
+        if $ident == $idx {
+            $head
+        }
+        else {
+            ipa_number_to_ipa!(@step $idx + 1usize, $ident, $($tail,)*)
+        }
+    };
+}
+
+macro_rules! replace_expr {
+    ($_t:tt $sub:expr) => {
+        $sub
+    };
+}
+
+macro_rules! count_tts {
+    ($($tts:tt)*) => {0usize $(+ replace_expr!($tts 1usize))*};
+}
+
+macro_rules! ipa_map {
+    ($name:ident, $($l:literal),*) => {
+        use serde::{Deserialize, Serialize};
+        #[derive(Eq, Hash, PartialEq, Debug, Copy, Clone, Serialize, Deserialize)]
+        pub struct $name(pub usize);
+
+        impl $name {
+            pub const SIZE: usize = count_tts!($($l,)*);
+
+            pub fn from_char(ch: &str) -> Option<$name> {
+                ipa_element_to_number!(@step 0usize, ch, $($l,)*)
+            }
+            pub fn to_char(self) -> &'static str {
+                let num = self.0;
+                ipa_number_to_ipa!(@step 0usize, num, $($l,)*)
+            }
+        }
+    };
+}
+
+ipa_map!(
+    FrenchIPAChar,
+    "a",
+    "ɑ",
+    "ɑ̃",
+    "e",
+    "ɛ",
+    "ɛ̃",
+    "ə",
+    "i",
+    "o",
+    "ɔ",
+    "ɔ̃",
+    "œ",
+    "œ̃",
+    "ø",
+    "u",
+    "y",
+    "j",
+    "ɥ",
+    "w",
+    "b",
+    "d",
+    "f",
+    "g",
+    "k",
+    "l",
+    "m",
+    "n",
+    "ɲ",
+    "ŋ",
+    "p",
+    "ʁ",
+    "s",
+    "ʃ",
+    "t",
+    "v",
+    "z",
+    "ʒ",
+    "g",
+    "ɡ",
+    "ɪ",
+    "ʊ",
+    "x",
+    "r"
+);
+
+pub type FrenchIPAWord = Vec<FrenchIPAChar>;
+
+pub fn parse_word(str: &str) -> Option<FrenchIPAWord> {
+    let mut word = FrenchIPAWord::default();
+    let graphemes: Vec<&str> = str.graphemes(true).collect();
+    for (_, grapheme) in graphemes.iter().enumerate() {
+        let a = FrenchIPAChar::from_char(grapheme);
+
+        word.push(match a {
+            None => {
+                println!("invalid char: {}", grapheme);
+                return None;
+            }
+            Some(a) => a,
+        })
+    }
+
+    Some(word)
+}
diff --git a/autofeur_db/src/inference.rs b/autofeur_db/src/inference.rs
new file mode 100644 (file)
index 0000000..8b833a0
--- /dev/null
@@ -0,0 +1,61 @@
+use std::{collections::VecDeque, env, ops::Add};
+
+use anyhow::anyhow;
+use itertools::Itertools;
+use levenshtein::levenshtein;
+use unicode_segmentation::UnicodeSegmentation;
+
+use crate::{french_ipa::parse_word, save::Save};
+
+async fn call_inference_service(word: &str) -> anyhow::Result<String> {
+    let server: Result<String, anyhow::Error> =
+        env::var("PHONEMIZER").or_else(|_| Ok("".to_string()));
+    Ok(
+        reqwest::get(format!("{}?grapheme={}", server.unwrap(), word))
+            .await?
+            .text()
+            .await?,
+    )
+}
+
+impl Save {
+    pub async fn inference(&self, prefix: &str) -> anyhow::Result<String> {
+        let phonemes = call_inference_service(prefix).await?;
+        let ipa_phonemes =
+            parse_word(&phonemes).ok_or_else(|| anyhow!("failed to parse the word"))?;
+
+        let completion = self
+            .trie
+            .random_starting_with(ipa_phonemes)
+            .ok_or_else(|| anyhow!("no matches"))?;
+
+        let infered = phonemes.add(&completion);
+        let word = self
+            .reverse_index
+            .get(&infered)
+            .ok_or_else(|| anyhow!("matched values is not in dictionary"))?;
+
+        println!("Matching {} by adding {}", word, completion);
+
+        // we finally just need to compute the end of the word which matches the sound
+        let mut found = None;
+
+        let mut characters: VecDeque<&str> = word.graphemes(true).collect();
+        while let Some(_) = characters.pop_front() {
+            let sub: String = characters.iter().join("");
+            let inference = call_inference_service(&sub).await?;
+
+            if levenshtein(&inference, &completion) < 2 {
+                found = Some(sub);
+                break;
+            } else {
+                println!("did not match a={}, b={}", inference, completion)
+            }
+        }
+
+        let found = found.ok_or_else(|| anyhow!("no prefix could be matched"))?;
+        println!("{} is equivalent to {}", completion, found);
+
+        Ok(format!("{} ({})", found, word))
+    }
+}
diff --git a/autofeur_db/src/lib.rs b/autofeur_db/src/lib.rs
new file mode 100644 (file)
index 0000000..9c8ba4b
--- /dev/null
@@ -0,0 +1,4 @@
+pub mod trie;
+pub mod french_ipa;
+pub mod save;
+pub mod inference;
diff --git a/autofeur_db/src/save.rs b/autofeur_db/src/save.rs
new file mode 100644 (file)
index 0000000..c64a430
--- /dev/null
@@ -0,0 +1,11 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::trie::Trie;
+
+#[derive(Debug, Deserialize, Serialize, Default)]
+pub struct Save {
+    pub trie: Trie,
+    pub reverse_index: HashMap<String, String>
+}
\ No newline at end of file
diff --git a/autofeur_db/src/trie.rs b/autofeur_db/src/trie.rs
new file mode 100644 (file)
index 0000000..43a1be8
--- /dev/null
@@ -0,0 +1,169 @@
+use std::collections::HashMap;
+
+use rand::{thread_rng, Rng};
+use serde::{Deserialize, Serialize};
+
+use crate::french_ipa::{FrenchIPAChar, FrenchIPAWord};
+
+#[derive(Debug, Serialize, Deserialize, Default)]
+pub struct TrieNode {
+    value: Option<FrenchIPAChar>,
+    is_final: bool,
+    child_nodes: HashMap<FrenchIPAChar, TrieNode>,
+    child_count: u64,
+}
+
+impl TrieNode {
+    // Create new node
+    pub fn new(c: FrenchIPAChar, is_final: bool) -> TrieNode {
+        TrieNode {
+            value: Option::Some(c),
+            is_final,
+            child_nodes: HashMap::with_capacity(FrenchIPAChar::SIZE),
+            child_count: 0,
+        }
+    }
+
+    pub fn new_root() -> TrieNode {
+        TrieNode {
+            value: Option::None,
+            is_final: false,
+            child_nodes: HashMap::with_capacity(FrenchIPAChar::SIZE),
+            child_count: 0,
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Default)]
+pub struct Trie {
+    root_node: Box<TrieNode>,
+}
+
+impl Trie {
+    // Create a TrieStruct
+    pub fn new() -> Trie {
+        Trie {
+            root_node: Box::new(TrieNode::new_root()),
+        }
+    }
+
+    // Insert a string
+    pub fn insert(&mut self, char_list: FrenchIPAWord) {
+        let mut current_node: &mut TrieNode = self.root_node.as_mut();
+        let mut last_match = 0;
+
+        // Find the minimum existing math
+        for letter_counter in 0..char_list.len() {
+            if current_node
+                .child_nodes
+                .contains_key(&char_list[letter_counter])
+            {
+                current_node = current_node
+                    .child_nodes
+                    .get_mut(&char_list[letter_counter])
+                    .unwrap();
+                // we mark the node as containing our children.
+                current_node.child_count += 1;
+            } else {
+                last_match = letter_counter;
+                break;
+            }
+            last_match = letter_counter + 1;
+        }
+
+        // if we found an already exsting node
+        if last_match == char_list.len() {
+            current_node.is_final = true;
+        } else {
+            for new_counter in last_match..char_list.len() {
+                let key = char_list[new_counter];
+                current_node
+                    .child_nodes
+                    .insert(key, TrieNode::new(char_list[new_counter], false));
+                current_node = current_node.child_nodes.get_mut(&key).unwrap();
+                current_node.child_count += 1;
+            }
+            current_node.is_final = true;
+        }
+    }
+
+    // Find a string
+    pub fn random_starting_with(&self, prefix: FrenchIPAWord) -> Option<String> {
+        let mut current_node: &TrieNode = self.root_node.as_ref();
+        let mut str = String::new();
+        let mut i = prefix.len();
+        // Descend as far as possible into the tree
+        for counter in prefix {
+            if let Some(node) = current_node.child_nodes.get(&counter) {
+                current_node = node;
+                if let Some(value) = current_node.value {
+                    str += value.to_char();
+                    i -= 1;
+                }
+            } else {
+                // couldn't descend fully into the tree
+                return None;
+            }
+        }
+
+        println!("Found common root node {}", str);
+
+        // Ignore the 0-len matches
+        if i == 0 && current_node.child_nodes.len() == 0 {
+            println!("removing 0-len match");
+            return None;
+        }
+        str = String::new();
+
+        // now that we have the node we descend by respecting the probabilities
+        while current_node.child_nodes.len() != 0 && current_node.child_count > 0 {
+            println!("Descending into node {}", str);
+            let max = current_node.child_count;
+            let random_number = thread_rng().gen_range(0..max);
+            let mut increment = 0;
+
+            let mut did_change = false;
+            // find node corresponding to the node
+            for (_, node) in &current_node.child_nodes {
+                if node.child_count + increment >= random_number {
+                    println!("changing node");
+                    current_node = node;
+                    did_change = true;
+                    break;
+                } else {
+                    println!(
+                        "didn't change node: {}<{}",
+                        node.child_count + increment,
+                        random_number
+                    )
+                }
+                increment += node.child_count;
+            }
+            if did_change {
+                if let Some(value) = current_node.value {
+                    println!("added {}", value.to_char());
+                    str += value.to_char();
+                }
+            } else {
+                println!(
+                    "WARNING: DIDNT CHANGE NODE child_count={}",
+                    current_node.child_count
+                )
+            }
+            // if this node is a final node, we have a probability of using it
+            if current_node.is_final && current_node.child_count > 0 {
+                let random_number = thread_rng().gen_range(0..current_node.child_count);
+                if random_number == 0 {
+                    break;
+                }
+            }
+        }
+
+        if str == "" {
+            return None;
+        }
+
+        // selected word
+        Some(str)
+    }
+}
diff --git a/autofeur_nova/.dockerignore b/autofeur_nova/.dockerignore
new file mode 100644 (file)
index 0000000..cbc73d0
--- /dev/null
@@ -0,0 +1,3 @@
+config/
+dist/
+node_modules/
index bfe03e6e6f8eea6413c163486f4dc5f57240a764..b5017339f90a14bb9f7865cd3192434ba1c87c2f 100644 (file)
@@ -1,4 +1,5 @@
 bin/
 node_modules/
 dist/
-config/
\ No newline at end of file
+config/*
+!config/default.example.yml
index 7da8c1f0718f1aa92ae287afbd33520fb433e39d..d18b2012cea9638fcf1c2e6df8c38c666485fee8 100644 (file)
@@ -2,28 +2,18 @@ FROM node as builder
 
 # Create app directory
 WORKDIR /usr/src/app
-
 # Install app dependencies
 COPY package*.json ./
-
 RUN npm i
-
 COPY . .
-
-RUN npm run build
+RUN npm run build || true
 
 FROM node:slim
-
 # Create app directory
 WORKDIR /usr/src/app
-
 # Install app dependencies
 COPY package*.json ./
-COPY data.json .
-
 RUN npm i --omit=dev --production
-
 COPY --from=builder /usr/src/app/dist ./dist
 
-EXPOSE 8080
-CMD [ "node", "dist/index.mjs" ]
\ No newline at end of file
+CMD [ "node", "dist/index.mjs" ]
diff --git a/autofeur_nova/config/default.example.yml b/autofeur_nova/config/default.example.yml
new file mode 100644 (file)
index 0000000..4dc36b1
--- /dev/null
@@ -0,0 +1,59 @@
+gateway:
+  token: # You need to fill this!
+  intents: 3276799
+  shard: 0
+  shard_total: 1
+
+rest:
+  discord:
+    token: # You need to fill this!
+  server:
+    listening_adress: 0.0.0.0:8090
+  ratelimiter_address: localhost # You need to change this to your ratelimit server address!
+  ratelimiter_port: 8092
+
+webhook:
+  discord:
+    public_key: # You need to fill this
+  server:
+    listening_adress: 0.0.0.0:8091
+
+cache:
+  toggles:
+    - channels_cache
+    - guilds_cache
+    - guild_schedules_cache
+    - stage_instances_cache
+    - integrations_cache
+    - members_cache
+    - bans_cache
+    - reactions_cache
+    - messages_cache
+    - threads_cache
+    - invites_cache
+    - roles_cache
+    - automoderation_cache
+    - voice_states_cache
+
+ratelimiter:
+  server:
+    listening_adress: 0.0.0.0:8092
+
+# Prometheus monitoring configuration
+monitoring:
+  enabled: false
+  address: 0.0.0.0
+  port: 9000
+
+# Nats broker configuration
+nats:
+  host: nats
+
+redis:
+  url: redis://redis
+
+#opentelemetry:
+ # metrics:
+ #   endpoint: http://otelcol:4317
+ # traces:
+ #   endpoint: http://otelcol:4317
diff --git a/autofeur_nova/data.json b/autofeur_nova/data.json
deleted file mode 100644 (file)
index 53533d5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-[{"phoneme":"ta mɛʁ","word":"ta mère","partials":{"ɛʁ":"ère","mɛʁ":"mère"," mɛʁ":" mère","a mɛʁ":"a mère","ta mɛʁ":"ta mère"}},{"phoneme":"tapi","word":"tapis","partials":{"pi":"pis","api":"apis","tapi":"tapis"}},{"phoneme":"tape","word":"taper","partials":{"pɛʁ":"per","ape":"aper","tape":"taper"}},{"phoneme":"taʁe","word":"taré","partials":{"aʁe":"aré","taʁe":"taré"}},{"phoneme":"tabase","word":"tabasser","partials":{"sɛʁ":"ser","se":"sser","ase":"asser","bɑse":"basser","abase":"abasser","tabase":"tabasser"}},{"phoneme":"tabuʁɛ","word":"tabouret","partials":{"ʁɛ":"ret","yʁɛ":"uret","uʁɛ":"ouret","buʁɛ":"bouret","abuʁɛ":"abouret","tabuʁɛ":"tabouret"}},{"phoneme":"ʁiɡɔl","word":"rigole","partials":{"ɔl":"ole","ɡɔl":"gole","iɡɔl":"igole","ʁiɡɔl":"rigole"}},{"phoneme":"amɛn","word":"amène","partials":{"ɛn":"ène","mɛn":"mène","amɛn":"amène"}},{"phoneme":"atʃum","word":"atchoum","partials":{"um":"houm","ʃum":"choum","tʃum":"tchoum","atʃum":"atchoum"}},{"phoneme":"abʁakadabʁa","word":"abracadabra","partials":{"bʁa":"bra","abʁa":"abra","dabʁa":"dabra","adabʁa":"adabra","kadabʁa":"cadabra","akadabʁa":"acadabra","ʁakadabʁa":"racadabra","bʁakadabʁa":"bracadabra","abʁakadabʁa":"abracadabra"}},{"phoneme":"abɛj","word":"abeille","partials":{"l":"lle","ilj":"ille","ɛj":"eille","bɛj":"beille","abɛj":"abeille"}},{"phoneme":"alibaba","word":"alibaba","partials":{"aba":"aba","baba":"baba","ibaba":"ibaba","libaba":"libaba","alibaba":"alibaba"}},{"phoneme":"aʁnak","word":"arnaque","partials":{"kə":"que","ak":"aque","nak":"naque","ʁnak":"rnaque","aʁnak":"arnaque"}},{"phoneme":"mɛzɔ̃","word":"maison","partials":{"sɔ̃":"son","izɔ̃":"ison","ɛzɔ̃":"aison","mɛzɔ̃":"maison"}},{"phoneme":"nɔ̃bʁil","word":"nombril","partials":{"ʁil":"ril","bʁil":"bril","mbʁil":"mbril","ɔ̃bʁil":"ombril","nɔ̃bʁil":"nombril"}},{"phoneme":"lapɛ̃","word":"lapin","partials":{"pɛ̃":"pin","apɛ̃":"apin","lapɛ̃":"lapin"}},{"phoneme":"wistiti","word":"ouistiti","partials":{"iti":"iti","titi":"titi","stiti":"stiti","istiti":"istiti","ɥistiti":"uistiti","wistiti":"ouistiti"}},{"phoneme":"wifi","word":"wifi","partials":{"ifi":"ifi","wifi":"wifi"}},{"phoneme":"wiski","word":"wisky","partials":{"ski":"sky","iski":"isky","wiski":"wisky"}},{"phoneme":"ʁənaʁ","word":"renard","partials":{"aʁ":"ard","naʁ":"nard","ɑ̃naʁ":"enard","ʁənaʁ":"renard"}},{"phoneme":"ʁəkɛ̃","word":"requin","partials":{"wɛ̃":"uin","kɛ̃":"quin","əkɛ̃":"equin","ʁəkɛ̃":"requin"}},{"phoneme":"ʁəpɑ","word":"repas","partials":{"pa":"pas","epɑ":"epas","ʁəpɑ":"repas"}},{"phoneme":"ʁətaʁ","word":"retard","partials":{"aʁ":"ard","taʁ":"tard","etaʁ":"etard","ʁətaʁ":"retard"}},{"phoneme":"kwafœʁ","word":"coiffeur","partials":{"œʁ":"eur","fœʁ":"ffeur","ifœʁ":"iffeur","wafœʁ":"oiffeur","kwafœʁ":"coiffeur"}},{"phoneme":"kwaføz","word":"coiffeuse","partials":{"yz":"use","øz":"euse","føz":"ffeuse","iføz":"iffeuse","waføz":"oiffeuse","kwaføz":"coiffeuse"}},{"phoneme":"kiʁiku","word":"kirikou","partials":{"ku":"kou","iku":"ikou","ʁiku":"rikou","iʁiku":"irikou","kiʁiku":"kirikou"}},{"phoneme":"kiʁi","word":"kiri","partials":{"iʁi":"iri","kiʁi":"kiri"}},{"phoneme":"wɛstɛʁn","word":"western","partials":{"ɛʁn":"ern","tɛʁ":"tern","stɛʁn":"stern","ɛstɛʁ":"estern","wɛstɛʁn":"western"}},{"phoneme":"œ̃ dø","word":"un deux","partials":{"ø":"eux","dø":"deux"," dø":" deux","ɛn dø":"n deux","œ̃ dø":"un deux"}},{"phoneme":"dø tʁwa","word":"deux trois","partials":{"wa":"ois","ʁwa":"rois","tʁwa":"trois"," tʁwa":" trois","iks tʁwa":"x trois","yks tʁwa":"ux trois","ø tʁwa":"eux trois","dø tʁwa":"deux trois"}},{"phoneme":"jɔplɛ","word":"yoplait","partials":{"ɛ":"ait","lɛ":"lait","plɛ":"plait","ɔplɛ":"oplait","jɔplɛ":"yoplait"}},{"phoneme":"avalɑ̃ʃ","word":"avalanche","partials":{"ʃə":"che","ɛnʃ":"nche","ɑ̃ʃ":"anche","lɑ̃ʃ":"lanche","alɑ̃ʃ":"alanche","valɑ̃ʃ":"valanche","avalɑ̃ʃ":"avalanche"}},{"phoneme":"mwazisyʁ","word":"moisissure","partials":{"yʁ":"ure","syʁ":"ssure","isyʁ":"issure","sisyʁ":"sissure","izisyʁ":"isissure","wazisyʁ":"oisissure","mwazisyʁ":"moisissure"}},{"phoneme":"mwasɔ̃","word":"moisson","partials":{"sɔ̃":"sson","isɔ̃":"isson","wasɔ̃":"oisson","mwasɔ̃":"moisson"}},{"phoneme":"mwano","word":"moineau","partials":{"o":"eau","no":"neau","ino":"ineau","wano":"oineau","mwano":"moineau"}},{"phoneme":"ekɔl","word":"école","partials":{"ɔl":"ole","kɔl":"cole","ekɔl":"école"}},{"phoneme":"kɔmɑ̃tɛʁ","word":"commentaire","partials":{"iʁ":"ire","ɛʁ":"aire","tɛʁ":"taire","ɛntɛʁ":"ntaire","ɑ̃tɛʁ":"entaire","mɑ̃tɛʁ":"mmentaire","ɔmɑ̃tɛʁ":"ommentaire","kɔmɑ̃tɛʁ":"commentaire"}},{"phoneme":"kɑ̃tifikatœʁ","word":"quantificateur","partials":{"œʁ":"eur","tœʁ":"teur","atœʁ":"ateur","katœʁ":"cateur","ikatœʁ":"icateur","fikatœʁ":"ficateur","ifikatœʁ":"ificateur","tifikatœʁ":"tificateur","̃tifikatœʁ":"ntificateur","ɑ̃tifikatœʁ":"antificateur","ɥɑ̃tifikatœʁ":"uantificateur","kɑ̃tifikatœʁ":"quantificateur"}},{"phoneme":"kɔmɑ̃dɑ̃","word":"commandant","partials":{"ɑ̃":"ant","dɑn":"dant","ndɑ̃":"ndant","ɑ̃dɑ̃":"andant","mɑ̃dɑ̃":"mmandant","ɔmɑ̃dɑ̃":"ommandant","kɔmɑ̃dɑ̃":"commandant"}},{"phoneme":"klɛʁ kazal","word":"claire chazal","partials":{"zal":"zal","azal":"hazal","kazal":"chazal"," kazal":" chazal","ø kazal":"e chazal","ʁ kazal":"re chazal","iʁ kazal":"ire chazal","ɛʁ kazal":"aire chazal","lɛʁ kazal":"laire chazal","klɛʁ kazal":"claire chazal"}},{"phoneme":"tɔʁnad","word":"tornade","partials":{"ad":"ade","nad":"nade","ʁnad":"rnade","ɔʁnad":"ornade","tɔʁnad":"tornade"}},{"phoneme":"bɔt","word":"bottes","partials":{"te":"tes","t":"ttes","ɔt":"ottes","bɔt":"bottes"}},{"phoneme":"bɔ̃swaʁ paʁijis","word":"bonsoir pariiiss","partials":{"is":"iiss","jis":"iiiss","ʁijis":"riiiss","aʁijis":"ariiiss","paʁijis":"pariiiss"," paʁijis":" pariiiss","ɛʁ paʁijis":"r pariiiss","iʁ paʁijis":"ir pariiiss","waʁ paʁijis":"oir pariiiss","swar paʁijis":"soir pariiiss","nswaʁ paʁijis":"nsoir pariiiss","ɔ̃swaʁ paʁijis":"onsoir pariiiss","bɔ̃swaʁ paʁijis":"bonsoir pariiiss"}},{"phoneme":"kuʁtwa","word":"courtois","partials":{"wa":"ois","twa":"tois","ʁtwa":"rtois","yʁtwa":"urtois","uʁtwa":"ourtois","kuʁtwa":"courtois"}},{"phoneme":"faktœʁ","word":"facteur","partials":{"œʁ":"eur","tœʁ":"teur","ktœʁ":"cteur","aktœʁ":"acteur","faktœʁ":"facteur"}},{"phoneme":"ʒeʁaʁ","word":"gérard","partials":{"aʁ":"ard","ʁaʁ":"rard","eʁaʁ":"érard","ʒeʁaʁ":"gérard"}},{"phoneme":"kwadʁilatɛʁ","word":"quoidrilatère","partials":{"ɛʁ":"ère","tɛʁ":"tère","atɛʁ":"atère","latɛʁ":"latère","ilatɛʁ":"ilatère","ʁilatɛʁ":"rilatère","dʁilatɛʁ":"drilatère","idʁilatɛʁ":"idrilatère","wadʁilatɛʁ":"oidrilatère","ywadʁilatɛʁ":"uoidrilatère","kwadʁilatɛʁ":"quoidrilatère"}},{"phoneme":"pəp","word":"pepe","partials":{"ɛp":"epe","pəp":"pepe"}},{"phoneme":"sœʁfœʁ","word":"surfeur","partials":{"œʁ":"eur","fœʁ":"feur","ʁfœʁ":"rfeur","œʁfœʁ":"urfeur","sœʁfœʁ":"surfeur"}},{"phoneme":"twalɛt","word":"toilettes","partials":{"te":"tes","t":"ttes","ɛt":"ettes","lɛt":"lettes","ilɛt":"ilettes","walɛt":"oilettes","twalɛt":"toilettes"}},{"phoneme":"ləbʁɔ̃ ʒam","word":"lebron james","partials":{"me":"mes","am":"ames","ʒam":"james"," ʒam":" james","ɛn ʒam":"n james","ɔ̃ ʒam":"on james","ʁɔ̃ ʒam":"ron james","bʁɔ̃ ʒam":"bron james","ebʁɔ̃ ʒam":"ebron james","ləbʁɔ̃ ʒam":"lebron james"}},{"phoneme":"sɛst də la mɛʁd","word":"c'est de la merde","partials":{"ʁd":"rde","ɛʁd":"erde","mɛʁd":"merde"," mɛʁd":" merde","a mɛʁd":"a merde","la mɛʁd":"la merde"," la mɛʁd":" la merde","ø la mɛʁd":"e la merde","də la mɛʁd":"de la merde"," də la mɛʁd":" de la merde","te də la mɛʁd":"t de la merde","st də la mɛʁd":"st de la merde","ɛ də la mɛʁd":"'est de la merde","sɛst də la mɛʁd":"c'est de la merde"}},{"phoneme":"tʁwa kat","word":"trois quatre","partials":{"tʁ":"tre","atʁ":"atre","yatʁ":"uatre","kat":"quatre"," kat":" quatre","ɛs kat":"s quatre","i kat":"is quatre","wa kat":"ois quatre","ʁwa kat":"rois quatre","tʁwa kat":"trois quatre"}},{"phoneme":"kat sɛ̃","word":"quatre cinq","partials":{"ɛ̃ky":"inq","sɛ̃":"cinq"," sɛ̃":" cinq","ø sɛ̃":"e cinq","ʁ sɛ̃":"re cinq","tʁ sɛ̃":"tre cinq","atʁ sɛ̃":"atre cinq","yatʁ sɛ̃":"uatre cinq","kat sɛ̃":"quatre cinq"}},{"phoneme":"sɛ̃ si","word":"cinq six","partials":{"si":"six"," si":" six","ky si":"q six","ɛneky si":"nq six","ɛ̃ky si":"inq six","sɛ̃ si":"cinq six"}},{"phoneme":"si sɛ","word":"six sept","partials":{"ɛp":"ept","sɛ":"sept"," sɛ":" sept","iks sɛ":"ix sept","si sɛ":"six sept"}}]
\ No newline at end of file
index d254b9723b9c05a9b263bdd2053ee760693771f2..b40f60cbba27bb8271b4338eb4f3cab6d4d4a8f8 100644 (file)
@@ -8,8 +8,8 @@
   "type": "module",
   "license": "Apache-2.0",
   "dependencies": {
-    "@discordnova/nova-cli": "0.0.4",
-    "@discordnova/nova-js": "^0.0.4",
+    "@discordnova/nova-cli": "0.0.5",
+    "@discordnova/nova-js": "0.0.5",
     "source-map-support": "^0.5.21",
     "tslib": "^2.4.1",
     "undici": "^5.15.0"
@@ -17,7 +17,7 @@
   "devDependencies": {
     "@types/node": "^18.11.18",
     "discord-api-types": "^0.37.25",
-    "typescript": "^4.9.4",
+    "typescript": "^5.0.0-dev.20230120",
     "xo": "^0.53.1"
   },
   "scripts": {
diff --git a/autofeur_nova/src/algo.mts b/autofeur_nova/src/algo.mts
deleted file mode 100644 (file)
index f4532d2..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-import { readFileSync } from "fs";
-import { request } from "undici";
-import { phonemize } from "./phonemizelib.mjs";
-
-let data: {
-  word: string;
-  phoneme: string;
-  partials: string[];
-}[] = JSON.parse(readFileSync("./data.json").toString("utf8"));
-
-const cutWord = (sentence: string) => {
-  let lastWord = sentence.split(" ").slice(-1)[0].replace(/(\?)/g, "");
-  return phonemize(lastWord);
-};
-
-export const match = async (sentence: string) => {
-  let scores: { complete: string; score: number }[] = [];
-  let sentenceWord = await cutWord(sentence);
-  console.debug("handling word phoneme = ", sentenceWord);
-
-  for (const { phoneme, word, partials } of data) {
-    console.debug("\ttesting with word = ", word, phoneme);
-
-    
-    for (let i = 1; i < phoneme.length; i++) {
-      // add n last characters from the phoneme
-      let add = phoneme.slice(phoneme.length - i, phoneme.length);
-      console.debug(
-        "\t\ttesting match with = ",
-        add,
-        " add = ",
-        sentenceWord + add
-      );
-
-      // we matched a phoneme
-      if (phoneme == sentenceWord + add) {
-        let score = 1 / (i / phoneme.length);
-
-        // next, we need to find the completion of the word
-        // this is relatively easy since we only need to 
-        let complete = partials[add];
-
-        if (!complete) {
-          // cannot find the comlpetion count.
-          // default to index
-          console.log("couldn't find corresponding cut", add);
-          complete = word;
-          continue;
-        }
-
-        console.log("\t\tmatched with score = ", score, " complete = ", complete);
-
-        // need to change to the cut-ed version.
-        scores.push({ score, complete });
-        break;
-      }
-    }
-  }
-
-  let resp = scores.sort((a, b) => b.score - a.score);
-  return resp[0]?.complete;
-};
-
-
-match("quoi");
\ No newline at end of file
index b808d4d829b76d618f633c7547f7d817b1655406..1c0dfbdf47f60778ca38380b9342dfdf88bbf1d4 100644 (file)
@@ -4,31 +4,70 @@ import {
   RESTPostAPIChannelMessageJSONBody,
   Routes,
 } from "discord-api-types/v10";
-import { match } from "./algo.mjs";
 import { Client } from "@discordnova/nova-js/src/lib/client.js";
+import { request } from "undici";
 
+// `autofeur_db` service
+export const DB = process.env.DB || "http://localhost:3000";
+// nats broker for connecting to nova
 export const NATS = process.env.NATS || "localhost:4222";
+// rest endpoint for connecting to nova
 export const REST = process.env.REST || "http://localhost:8090/api";
 
-(async () => {
-  const emitter = new Client({
-    transport: {
-      additionalEvents: [],
-      nats: {
-        servers: [NATS],
-      },
-      queue: "nova-worker-common",
-    },
-    rest: {
-      api: REST,
+/**
+ * Completes a grapheme using the `autofeur_db` service.
+ * @param grapheme Grapheme to complete
+ * @returns Completed grapheme
+ */
+export const completeWord = (grapheme: string) =>
+  request(`${DB}?grapheme=${encodeURIComponent(grapheme)}`).then((x) =>
+    x.body.text()
+  );
+
+/**
+ * Cleans a sentence for usage with this program, strips unwanted chars
+ * @param sentence Raw discord sentence
+ * @returns The last word without any specials characters
+ */
+const cutWord = (sentence: string) => {
+  let lastWord = sentence
+    .split(" ")
+    .slice(-1)[0]
+    .replaceAll(/(\s)?([^\x41-\x5A\s^\x61-\x7A^\xC0-\xFF])/g, "");
+  return lastWord;
+};
+
+/**
+ * Nova client for receiving events
+ */
+const emitter = new Client({
+  transport: {
+    additionalEvents: [],
+    nats: {
+      servers: [NATS],
     },
-  });
+    queue: "autofeur_nova",
+  },
+  rest: {
+    api: REST,
+  },
+});
+
+/**
+ * Handle the message creation event
+ */
+emitter.on(
+  "messageCreate",
+  async (message: GatewayMessageCreateDispatch["d"]) => {
+    // we shall not repond to bots
+    if (message.author.bot) return;
+    try {
+      // Get the completed word found by the db.
+      let response = await completeWord(cutWord(message.content));
 
-  emitter.on(
-    "messageCreate",
-    async (message: GatewayMessageCreateDispatch["d"]) => {
-      let response = await match(message.content);
-      if (response) {
+      // Ignore if there is no completion
+      if (response || response === "") {
+        // Respond to the message.
         await emitter.rest.post(Routes.channelMessages(message.channel_id), {
           body: {
             content: response,
@@ -36,9 +75,9 @@ export const REST = process.env.REST || "http://localhost:8090/api";
           } as RESTPostAPIChannelMessageJSONBody,
         });
       }
-    }
-  );
+    } catch (e) {}
+  }
+);
 
-  // We connect ourselves to the nova nats broker.
-  await emitter.start();
-})();
+// Start the service (listening for events.)
+(async () => await emitter.start())();
diff --git a/autofeur_nova/src/phonemizelib.mts b/autofeur_nova/src/phonemizelib.mts
deleted file mode 100644 (file)
index e8547ba..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-import { request } from "undici";
-
-export const PHONEMIZER = process.env.PHONEMIZER || "http://localhost:5000";
-export const phonemize = (grapheme: string) =>
-  request(`${PHONEMIZER}?grapheme=${encodeURIComponent(grapheme)}`).then((x) =>
-    x.body.text()
-  );
diff --git a/autofeur_nova/src/preprocess.mts b/autofeur_nova/src/preprocess.mts
deleted file mode 100644 (file)
index 7671006..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-import { writeFileSync } from "fs";
-import { request } from "undici";
-import { phonemize } from "./phonemizelib.mjs";
-
-let jsonData: {
-  word: string;
-  phoneme: string;
-  partials: Record<string, string>;
-}[] = [];
-
-let words: string[] = [
-  "ta mère",
-  "tapis",
-  "taper",
-  "taré",
-  "tabasser",
-  "tabouret",
-  "rigole",
-  "amène",
-  "atchoum",
-  "abracadabra",
-  "abeille",
-  "alibaba",
-  "arnaque",
-  "maison",
-  "nombril",
-  "lapin",
-  "ouistiti",
-  "wifi",
-  "wisky",
-  "renard",
-  "requin",
-  "repas",
-  "retard",
-  "coiffeur",
-  "coiffeuse",
-  "kirikou",
-  "kiri",
-  "western",
-  "un deux",
-  "deux trois",
-  "yoplait",
-  "avalanche",
-  "moisissure",
-  "moisson",
-  "moineau",
-  "école",
-  "commentaire",
-  "quantificateur",
-  "commandant",
-  "claire chazal",
-  "tornade",
-  "bottes",
-  "bonsoir pariiiss",
-  "courtois",
-  "facteur",
-  "gérard",
-  "quoidrilatère",
-  "pepe",
-  "surfeur",
-  "toilettes",
-  "lebron james",
-  "c'est de la merde",
-  "trois quatre",
-  "quatre cinq",
-  "cinq six",
-  "six sept",
-];
-
-(async () => {
-  for (const word of words) {
-    let phoneme = await phonemize(word);
-    let partials: Record<string, string> = {};
-
-    for (let i = 3; i <= word.length; i++) {
-      // add n last characters from the phoneme
-      let add = word.slice(word.length - i, word.length);
-      partials[await phonemize(add)] = add;
-    }
-
-    jsonData.push({ phoneme, word, partials });
-  }
-
-  writeFileSync("./data.json", JSON.stringify(jsonData));
-})();
index 32f1a900c70f49c2be533af704e5ecfc4394f887..b6b82ec5206248e247168f494a0b553bac3261a9 100644 (file)
     tslib "^2.4.1"
     ws "^8.11.0"
 
-"@discordnova/nova-cli@0.0.4":
-  version "0.0.4"
-  resolved "https://registry.yarnpkg.com/@discordnova/nova-cli/-/nova-cli-0.0.4.tgz#66583e349f14c8fafc6f4e9fd184d7bb481304c8"
-  integrity sha512-n+1+Nzc8tTgfT6f6+0E5ELfrj6b5vP73H5FQlnb7gbMFDkCJeHuDlZqxJuy9uax6bIQuMF+uJyan2lTdRF6Z7g==
+"@discordnova/nova-cli@0.0.5":
+  version "0.0.5"
+  resolved "https://registry.yarnpkg.com/@discordnova/nova-cli/-/nova-cli-0.0.5.tgz#9ad013bb25e3aa91795654cfa0ba8bbfb7f3b2fe"
+  integrity sha512-ielyAYo6cTxLT8CyEgDTGxOv9A3gRl3IQCvoETjyDdJrZGd4CJFwBTOB5Dl1tq8wYohsaxEywLb3UnKyTHsYaA==
   dependencies:
     undici "^5.15.0"
 
-"@discordnova/nova-js@^0.0.4":
-  version "0.0.4"
-  resolved "https://registry.yarnpkg.com/@discordnova/nova-js/-/nova-js-0.0.4.tgz#10f530d06f0ccd920491cb4881e2cc4d82e29ea1"
-  integrity sha512-6t23zVkHBzw4jFYkYYkhUbIFBGsQo1CL4xnvuq3oruCpEhVVt0jkkc7RhQB6EspfIiLFuhyfMjRdlKJ1YEpsQg==
+"@discordnova/nova-js@0.0.5":
+  version "0.0.5"
+  resolved "https://registry.yarnpkg.com/@discordnova/nova-js/-/nova-js-0.0.5.tgz#753e126696e789fdd1fda43b08a6a493fff4fc27"
+  integrity sha512-ok1G2czehvptn6ICZYUP5CSqPuRzvI8b+rNFsdEQOEn1G2hLRFvYTn21QzirFcEO5y1yOa9zOr7VnQ3o0HH2Cw==
   dependencies:
     "@discordjs/core" "^0.3.0"
     "@discordjs/rest" "^1.5.0"
@@ -2734,11 +2734,16 @@ typed-array-length@^1.0.4:
     for-each "^0.3.3"
     is-typed-array "^1.1.9"
 
-typescript@^4.9.3, typescript@^4.9.4:
+typescript@^4.9.3:
   version "4.9.4"
   resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.4.tgz#a2a3d2756c079abda241d75f149df9d561091e78"
   integrity sha512-Uz+dTXYzxXXbsFpM86Wh3dKCxrQqUcVMxwU54orwlJjOpO3ao8L7j5lH+dWfTwgCwIuM9GQ2kvVotzYJMXTBZg==
 
+typescript@^5.0.0-dev.20230120:
+  version "5.0.0-dev.20230120"
+  resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.0.0-dev.20230120.tgz#8b8872448cfb88296c7dc530dc4a60e138230c75"
+  integrity sha512-vgmInMYmRogavAvGVDd+gnrckZJPFbfBp2l7ubTRtZ4CDw9YeUu3121tclQy+0FlmzvhfPUvbJ/ZWEqjQTKGbw==
+
 unbox-primitive@^1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.2.tgz#29032021057d5e6cdbd08c5129c226dff8ed6f9e"
index d75edeaec0ee8b547be47de1ceacdec0a414aeed..76e84c7812e127e50099b5c26c435aa757a06968 100644 (file)
@@ -1,2 +1,3 @@
 venv
-__pycache__
\ No newline at end of file
+__pycache__
+assets/
\ No newline at end of file
index 9769ae9e8938329cd9e9f70285eebe273da6f2d3..a7cae7e86add94dedd0aedb26860e348bd2ad317 100644 (file)
@@ -1,3 +1,3 @@
 venv/
 __pycache__/
-latin_ipa_forward.pt
+assets/
\ No newline at end of file
index ba226a2339446ed314f9ac294fb969965379d578..9c9248194a3b43d7fd5126db131f4d619254d14d 100644 (file)
@@ -1,8 +1,7 @@
-
 FROM python:3.7.3-slim
 COPY requirements.txt /
 RUN apt-get update && apt-get install -y build-essential
 RUN pip3 install -r /requirements.txt
 COPY . /app
 WORKDIR /app
-ENTRYPOINT gunicorn app:app -w 2 --threads 2 -b 0.0.0.0:8000
\ No newline at end of file
+ENTRYPOINT gunicorn app:app -w 8 --threads 8 -b 0.0.0.0:8000
\ No newline at end of file
index 542ff0d090948c49331218ec577593283e97ee57..ab5626ce1cb157a39a22255fdde07b38befe213e 100644 (file)
@@ -2,10 +2,18 @@ from dp.phonemizer import Phonemizer
 from flask import Flask
 from flask import request
 
-phonemizer = Phonemizer.from_checkpoint('latin_ipa_forward.pt')
+phonemizer = Phonemizer.from_checkpoint('assets/model.pt')
 app = Flask(__name__, instance_relative_config=True)
 
 @app.route('/')
 def handle():
-    searchword = request.args.get('grapheme', '')
-    return phonemizer(searchword, lang = 'fr')
+    """
+    Simple route that handles the phonem to grapheme translation.
+    """
+    grapheme = request.args.get('grapheme')
+    if grapheme is None:
+        return "You are missing the 'grapheme' parameter", 400
+    lang = request.args.get('language')
+    if lang is None:
+        lang = 'fr'
+    return phonemizer(grapheme, lang = lang), 200
index 3deb8e8cb79fc0017239d3e7b43ea9b72232c6d0..f0f8d41f24ca7ac0c9ce3c3b39f595e8f606c92b 100644 (file)
@@ -4,16 +4,27 @@ services:
     build: autofeur_nova
     restart: always
     depends_on:
-      - deep_phonemizer
+      - autofeur_db
       - nats
     environment:
       - NATS=nats
       - REST=http://rest:8090/api
+      - DB=http://autofeur_db:3000/
+  
+  autofeur_db:
+    build: autofeur_db
+    restart: always
+    depends_on:
+      - deep_phonemizer
+    environment:
       - PHONEMIZER=http://deep_phonemizer:8000/
+    volumes:
+      - ./autofeur_db/assets/db.bin:/app/assets/db.bin
   deep_phonemizer:
     build: deep_phonemizer
     restart: always
-
+    volumes:
+      - ./deep_phonemizer/assets/model.pt:/app/assets/model.pt
   nats:
     image: nats
     restart: always
@@ -31,9 +42,11 @@ services:
       - nats
       - redis
   
-  gateway:
+  gateway0:
     image: ghcr.io/discordnova/nova/gateway
     restart: always
+    environment:
+      - NOVA__GATEWAY__SHARD=0
     volumes:
       - ./autofeur_nova/config/default.yml:/config/default.yml
     depends_on: