2023-10-11 16:02:27 -05:00
|
|
|
use icu::{
|
|
|
|
collator::{Collator, CollatorOptions, Strength},
|
|
|
|
segmenter::GraphemeClusterSegmenter,
|
|
|
|
};
|
|
|
|
use itertools::Itertools;
|
2023-10-11 13:32:05 -05:00
|
|
|
use std::collections::HashSet;
|
|
|
|
|
2023-10-11 16:02:27 -05:00
|
|
|
pub fn anagrams_for<'a>(word: &str, possible_anagrams: &'a [&str]) -> HashSet<&'a str> {
|
|
|
|
let mut options = CollatorOptions::new();
|
|
|
|
// Ignore case sensitivity, accents, etc.
|
|
|
|
options.strength = Some(Strength::Primary);
|
|
|
|
// Strictly speaking an anagram only makes sense within a given locale,
|
|
|
|
// but we don't know it, so leave it as undefined.
|
|
|
|
let collator = Collator::try_new(Default::default(), options).unwrap();
|
|
|
|
let word_anagram = first_anagram(&word, &collator);
|
|
|
|
possible_anagrams
|
|
|
|
.iter()
|
|
|
|
.copied()
|
|
|
|
.filter(|possibility| {
|
|
|
|
let possibility_anagram = first_anagram(&possibility, &collator);
|
|
|
|
let is_anagram = collator.compare(&word_anagram, &possibility_anagram).is_eq();
|
|
|
|
let is_same = collator.compare(&possibility, &word).is_eq();
|
|
|
|
is_anagram && !is_same
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
|
|
|
// We define the first anagram of a string as the concatenation of
|
|
|
|
// sorting all of its grapheme clusters. To do this fully correctly,
|
|
|
|
// we would want to know what language we should use when considering
|
|
|
|
// whether two grapheme clusters are "equal".
|
|
|
|
fn first_anagram(word: &str, collator: &Collator) -> String {
|
|
|
|
let segmenter = GraphemeClusterSegmenter::new();
|
|
|
|
let mut graphemes: Vec<&str> = segmenter
|
|
|
|
.segment_str(&word)
|
|
|
|
.tuple_windows()
|
|
|
|
.map(|(i, j)| &word[i..j])
|
|
|
|
.collect();
|
|
|
|
graphemes.sort_by(|a, b| collator.compare(a, b));
|
|
|
|
graphemes.into_iter().collect()
|
2023-10-11 13:32:05 -05:00
|
|
|
}
|