summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Pignolet <matthieu@puffer.fish>2025-05-18 22:56:25 +0400
committerMatthieu Pignolet <matthieu@puffer.fish>2025-05-18 22:56:25 +0400
commit2d92468b5ee98bd624d2dee20bcf19eb8b8c5e16 (patch)
tree59850e0a19b65c8c86e59eb50961c30e0abce6f8
parent1f82b7df50418b624cc88baaec0d039774f4748e (diff)
feat: add documentation strings
-rw-r--r--aline/src/lib.rs45
1 files changed, 45 insertions, 0 deletions
diff --git a/aline/src/lib.rs b/aline/src/lib.rs
index fde55b1..e79e040 100644
--- a/aline/src/lib.rs
+++ b/aline/src/lib.rs
@@ -1,6 +1,48 @@
// ALINE phonetic sequence alignment in Rust
// Port of NLTK's ALINE module (Greg Kondrak, 2002)
+/// ALINE
+/// https://webdocs.cs.ualberta.ca/~kondrak/
+/// Copyright 2002 by Grzegorz Kondrak.
+///
+/// ALINE is an algorithm for aligning phonetic sequences, described in [1].
+/// This module is a port of Kondrak's (2002) ALINE. It provides functions for
+/// phonetic sequence alignment and similarity analysis. These are useful in
+/// historical linguistics, sociolinguistics and synchronic phonology.
+///
+/// ALINE has parameters that can be tuned for desired output. These parameters are:
+/// - C_skip, C_sub, C_exp, C_vwl
+/// - Salience weights
+/// - Segmental features
+///
+/// In this implementation, some parameters have been changed from their default
+/// values as described in [1], in order to replicate published results. All changes
+/// are noted in comments.
+///
+/// # Get optimal alignment of two phonetic sequences
+///
+/// ```
+/// use aline::align;
+///
+/// let alignment = align("θin", "tenwis", 0.0);
+///
+/// assert_eq!(
+/// alignment,
+/// vec![
+/// vec![
+/// ("θ", "t"),
+/// ("i", "e"),
+/// ("n", "n")
+/// ].iter()
+/// .map(|(a, b)| (a.to_string(), b.to_string()))
+/// .collect::<Vec<(String, String)>>()
+/// ]
+/// );
+/// ```
+///
+/// [1] G. Kondrak. Algorithms for Language Reconstruction. PhD dissertation,
+/// University of Toronto.
+
use std::{collections::HashSet, f64};
use constants::EXTRACTED;
@@ -10,6 +52,9 @@ mod constants;
#[cfg(test)]
mod test;
+/// Compute the alignment of two phonetic strings.
+///
+/// (Kondrak 2002: 51)
pub fn align(str1: &str, str2: &str, epsilon: f64) -> Vec<Vec<(String, String)>> {
assert!(
(0.0..=1.0).contains(&epsilon),