diff options
| author | Matthieu Pignolet <m@mpgn.dev> | 2025-05-05 19:06:18 +0400 |
|---|---|---|
| committer | Matthieu Pignolet <m@mpgn.dev> | 2025-05-05 19:06:18 +0400 |
| commit | c4d22da11b779d92faca44ac2a6b11cefc0e3d77 (patch) | |
| tree | ad7240b525c6ef7c03cef9c00242bffdfa3448ac | |
| parent | fe305988e05f8a59a80459577712b84da00a6e69 (diff) | |
feat: add extract script for constants
| -rw-r--r-- | src/extract.json | 1009 | ||||
| -rwxr-xr-x | src/extract.py | 20 |
2 files changed, 1029 insertions, 0 deletions
diff --git a/src/extract.json b/src/extract.json new file mode 100644 index 0000000..4e952e1 --- /dev/null +++ b/src/extract.json @@ -0,0 +1,1009 @@ +{ + "C_skip": 10, + "C_sub": 35, + "C_exp": 45, + "C_vwl": 5, + "consonants": [ + "B", + "N", + "R", + "b", + "c", + "d", + "f", + "g", + "h", + "j", + "k", + "l", + "m", + "n", + "p", + "q", + "r", + "s", + "t", + "v", + "x", + "z", + "\u00e7", + "\u00f0", + "\u0127", + "\u014b", + "\u0256", + "\u025f", + "\u0262", + "\u0263", + "\u0266", + "\u026c", + "\u026e", + "\u0270", + "\u0271", + "\u0272", + "\u0273", + "\u0274", + "\u0278", + "\u0279", + "\u027b", + "\u027d", + "\u027e", + "\u0280", + "\u0281", + "\u0282", + "\u0283", + "\u0288", + "\u028b", + "\u0290 ", + "\u0292", + "\u0294", + "\u0295", + "\u0299", + "\u029d", + "\u03b2", + "\u03b8", + "\u03c7", + "\u0290", + "w" + ], + "R_c": [ + "aspirated", + "lateral", + "manner", + "nasal", + "place", + "retroflex", + "syllabic", + "voice" + ], + "R_v": [ + "back", + "lateral", + "long", + "manner", + "nasal", + "place", + "retroflex", + "round", + "syllabic", + "voice" + ], + "similarity_matrix": { + "bilabial": 1.0, + "labiodental": 0.95, + "dental": 0.9, + "alveolar": 0.85, + "retroflex": 0.8, + "palato-alveolar": 0.75, + "palatal": 0.7, + "velar": 0.6, + "uvular": 0.5, + "pharyngeal": 0.3, + "glottal": 0.1, + "labiovelar": 1.0, + "vowel": -1.0, + "stop": 1.0, + "affricate": 0.9, + "fricative": 0.85, + "trill": 0.7, + "tap": 0.65, + "approximant": 0.6, + "high vowel": 0.4, + "mid vowel": 0.2, + "low vowel": 0.0, + "vowel2": 0.5, + "high": 1.0, + "mid": 0.5, + "low": 0.0, + "front": 1.0, + "central": 0.5, + "back": 0.0, + "plus": 1.0, + "minus": 0.0 + }, + "salience": { + "syllabic": 5, + "place": 40, + "manner": 50, + "voice": 5, + "nasal": 20, + "retroflex": 10, + "lateral": 10, + "aspirated": 5, + "long": 0, + "high": 3, + "back": 2, + "round": 2 + }, + "feature_matrix": { + "p": { + "place": "bilabial", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "b": { + "place": "bilabial", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "t": { + "place": "alveolar", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "d": { + "place": "alveolar", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0288": { + "place": "retroflex", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0256": { + "place": "retroflex", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "c": { + "place": "palatal", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u025f": { + "place": "palatal", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "k": { + "place": "velar", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "g": { + "place": "velar", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "q": { + "place": "uvular", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0262": { + "place": "uvular", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0294": { + "place": "glottal", + "manner": "stop", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "m": { + "place": "bilabial", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0271": { + "place": "labiodental", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "n": { + "place": "alveolar", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0273": { + "place": "retroflex", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0272": { + "place": "palatal", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u014b": { + "place": "velar", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0274": { + "place": "uvular", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "N": { + "place": "uvular", + "manner": "stop", + "syllabic": "minus", + "voice": "plus", + "nasal": "plus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0299": { + "place": "bilabial", + "manner": "trill", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "B": { + "place": "bilabial", + "manner": "trill", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "r": { + "place": "alveolar", + "manner": "trill", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0280": { + "place": "uvular", + "manner": "trill", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "R": { + "place": "uvular", + "manner": "trill", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u027e": { + "place": "alveolar", + "manner": "tap", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u027d": { + "place": "retroflex", + "manner": "tap", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0278": { + "place": "bilabial", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u03b2": { + "place": "bilabial", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "f": { + "place": "labiodental", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "v": { + "place": "labiodental", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u03b8": { + "place": "dental", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u00f0": { + "place": "dental", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "s": { + "place": "alveolar", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "z": { + "place": "alveolar", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0283": { + "place": "palato-alveolar", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0292": { + "place": "palato-alveolar", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0282": { + "place": "retroflex", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0290": { + "place": "retroflex", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u00e7": { + "place": "palatal", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u029d": { + "place": "palatal", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "x": { + "place": "velar", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0263": { + "place": "velar", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u03c7": { + "place": "uvular", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0281": { + "place": "uvular", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0127": { + "place": "pharyngeal", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0295": { + "place": "pharyngeal", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "h": { + "place": "glottal", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0266": { + "place": "glottal", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u026c": { + "place": "alveolar", + "manner": "fricative", + "syllabic": "minus", + "voice": "minus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "plus", + "aspirated": "minus" + }, + "\u026e": { + "place": "alveolar", + "manner": "fricative", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "plus", + "aspirated": "minus" + }, + "\u028b": { + "place": "labiodental", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0279": { + "place": "alveolar", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u027b": { + "place": "retroflex", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "plus", + "lateral": "minus", + "aspirated": "minus" + }, + "j": { + "place": "palatal", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "\u0270": { + "place": "velar", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "l": { + "place": "alveolar", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "plus", + "aspirated": "minus" + }, + "w": { + "place": "labiovelar", + "manner": "approximant", + "syllabic": "minus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "aspirated": "minus" + }, + "i": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "front", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "y": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "front", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "e": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "front", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "E": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "front", + "round": "minus", + "long": "plus", + "aspirated": "minus" + }, + "\u00f8": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "front", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "\u025b": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "front", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "\u0153": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "front", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "\u00e6": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "low", + "back": "front", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "a": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "low", + "back": "front", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "A": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "low", + "back": "front", + "round": "minus", + "long": "plus", + "aspirated": "minus" + }, + "\u0268": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "central", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "\u0289": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "central", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "\u0259": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "central", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "u": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "back", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "U": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "back", + "round": "plus", + "long": "plus", + "aspirated": "minus" + }, + "o": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "back", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "O": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "back", + "round": "plus", + "long": "plus", + "aspirated": "minus" + }, + "\u0254": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "mid", + "back": "back", + "round": "plus", + "long": "minus", + "aspirated": "minus" + }, + "\u0252": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "low", + "back": "back", + "round": "minus", + "long": "minus", + "aspirated": "minus" + }, + "I": { + "place": "vowel", + "manner": "vowel2", + "syllabic": "plus", + "voice": "plus", + "nasal": "minus", + "retroflex": "minus", + "lateral": "minus", + "high": "high", + "back": "front", + "round": "minus", + "long": "plus", + "aspirated": "minus" + } + } +} diff --git a/src/extract.py b/src/extract.py new file mode 100755 index 0000000..c874348 --- /dev/null +++ b/src/extract.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +from nltk.metrics.aline import * +import json + +extract = { + 'C_skip': C_skip, + 'C_sub': C_sub, + 'C_exp': C_exp, + 'C_vwl': C_vwl, + + 'consonants': consonants, + 'R_c': R_c, + 'R_v': R_v, + 'similarity_matrix': similarity_matrix, + 'salience': salience, + 'feature_matrix': feature_matrix, +} + +print(json.dumps(extract, indent=2)) |
