diff options
| author | Matthieu Pignolet <matthieu@puffer.fish> | 2025-05-18 22:56:25 +0400 |
|---|---|---|
| committer | Matthieu Pignolet <matthieu@puffer.fish> | 2025-05-18 22:56:25 +0400 |
| commit | 2d92468b5ee98bd624d2dee20bcf19eb8b8c5e16 (patch) | |
| tree | 59850e0a19b65c8c86e59eb50961c30e0abce6f8 | |
| parent | 1f82b7df50418b624cc88baaec0d039774f4748e (diff) | |
feat: add documentation strings
| -rw-r--r-- | aline/src/lib.rs | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/aline/src/lib.rs b/aline/src/lib.rs index fde55b1..e79e040 100644 --- a/aline/src/lib.rs +++ b/aline/src/lib.rs @@ -1,6 +1,48 @@ // ALINE phonetic sequence alignment in Rust // Port of NLTK's ALINE module (Greg Kondrak, 2002) +/// ALINE +/// https://webdocs.cs.ualberta.ca/~kondrak/ +/// Copyright 2002 by Grzegorz Kondrak. +/// +/// ALINE is an algorithm for aligning phonetic sequences, described in [1]. +/// This module is a port of Kondrak's (2002) ALINE. It provides functions for +/// phonetic sequence alignment and similarity analysis. These are useful in +/// historical linguistics, sociolinguistics and synchronic phonology. +/// +/// ALINE has parameters that can be tuned for desired output. These parameters are: +/// - C_skip, C_sub, C_exp, C_vwl +/// - Salience weights +/// - Segmental features +/// +/// In this implementation, some parameters have been changed from their default +/// values as described in [1], in order to replicate published results. All changes +/// are noted in comments. +/// +/// # Get optimal alignment of two phonetic sequences +/// +/// ``` +/// use aline::align; +/// +/// let alignment = align("θin", "tenwis", 0.0); +/// +/// assert_eq!( +/// alignment, +/// vec![ +/// vec![ +/// ("θ", "t"), +/// ("i", "e"), +/// ("n", "n") +/// ].iter() +/// .map(|(a, b)| (a.to_string(), b.to_string())) +/// .collect::<Vec<(String, String)>>() +/// ] +/// ); +/// ``` +/// +/// [1] G. Kondrak. Algorithms for Language Reconstruction. PhD dissertation, +/// University of Toronto. + use std::{collections::HashSet, f64}; use constants::EXTRACTED; @@ -10,6 +52,9 @@ mod constants; #[cfg(test)] mod test; +/// Compute the alignment of two phonetic strings. +/// +/// (Kondrak 2002: 51) pub fn align(str1: &str, str2: &str, epsilon: f64) -> Vec<Vec<(String, String)>> { assert!( (0.0..=1.0).contains(&epsilon), |
