use icu::{ collator::{AlternateHandling, Collator, CollatorOptions, MaxVariable, Strength}, segmenter::GraphemeClusterSegmenter, }; use itertools::Itertools; use std::collections::HashSet; // We define an anagram as any permutation of the graphemes of a string, ignoring // case, accents/diacritical marks, punctuation, spaces, etc. There are some // language-specific instances where this fails, but it covers a lot of cases. pub fn anagrams_for<'a>(word: &str, possible_anagrams: &'a [&str]) -> HashSet<&'a str> { let mut options = CollatorOptions::new(); // Ignore case sensitivity, accents, punctuation, spaces, etc. options.strength = Some(Strength::Primary); options.alternate_handling = Some(AlternateHandling::Shifted); options.max_variable = Some(MaxVariable::Punctuation); // Strictly speaking an anagram only makes sense within a given locale, // but we don't know it, so leave it as undefined. let collator = Collator::try_new(Default::default(), options).unwrap(); let word_permutation = first_grapheme_permutation(&word, &collator); possible_anagrams .iter() .copied() .filter(|possibility| { let possibility_permutation = first_grapheme_permutation(&possibility, &collator); // If two strings are anagrams of each other, they will have the same first // permutation. let is_anagram = collator .compare(&word_permutation, &possibility_permutation) .is_eq(); let is_same = collator.compare(&possibility, &word).is_eq(); is_anagram && !is_same }) .collect() } fn first_grapheme_permutation(word: &str, collator: &Collator) -> String { let segmenter = GraphemeClusterSegmenter::new(); segmenter .segment_str(&word) .tuple_windows() .map(|(i, j)| &word[i..j]) .sorted_by(|a, b| collator.compare(a, b)) .collect() }