use icu::{ collator::{Collator, CollatorOptions, Strength}, segmenter::GraphemeClusterSegmenter, }; use itertools::Itertools; use std::collections::HashSet; pub fn anagrams_for<'a>(word: &str, possible_anagrams: &'a [&str]) -> HashSet<&'a str> { let mut options = CollatorOptions::new(); // Ignore case sensitivity, accents, etc. options.strength = Some(Strength::Primary); // Strictly speaking an anagram only makes sense within a given locale, // but we don't know it, so leave it as undefined. let collator = Collator::try_new(Default::default(), options).unwrap(); let word_anagram = first_anagram(&word, &collator); possible_anagrams .iter() .copied() .filter(|possibility| { let possibility_anagram = first_anagram(&possibility, &collator); let is_anagram = collator.compare(&word_anagram, &possibility_anagram).is_eq(); let is_same = collator.compare(&possibility, &word).is_eq(); is_anagram && !is_same }) .collect() } // We define the first anagram of a string as the concatenation of // sorting all of its grapheme clusters. To do this fully correctly, // we would want to know what language we should use when considering // whether two grapheme clusters are "equal". fn first_anagram(word: &str, collator: &Collator) -> String { let segmenter = GraphemeClusterSegmenter::new(); let mut graphemes: Vec<&str> = segmenter .segment_str(&word) .tuple_windows() .map(|(i, j)| &word[i..j]) .collect(); graphemes.sort_by(|a, b| collator.compare(a, b)); graphemes.into_iter().collect() }