use icu::{
    collator::{AlternateHandling, Collator, CollatorOptions, MaxVariable, Strength},
    segmenter::GraphemeClusterSegmenter,
};
use itertools::Itertools;
use std::collections::HashSet;

// We define an anagram as any permutation of the graphemes of a string, ignoring
// case, accents/diacritical marks, punctuation, spaces, etc. There are some
// language-specific instances where this fails, but it covers a lot of cases.
pub fn anagrams_for<'a>(word: &str, possible_anagrams: &'a [&str]) -> HashSet<&'a str> {
    let mut options = CollatorOptions::new();
    // Ignore case sensitivity, accents, punctuation, spaces, etc.
    options.strength = Some(Strength::Primary);
    options.alternate_handling = Some(AlternateHandling::Shifted);
    options.max_variable = Some(MaxVariable::Punctuation);
    // Strictly speaking an anagram only makes sense within a given locale,
    // but we don't know it, so leave it as undefined.
    let collator = Collator::try_new(Default::default(), options).unwrap();
    let word_permutation = first_grapheme_permutation(&word, &collator);
    possible_anagrams
        .iter()
        .copied()
        .filter(|possibility| {
            let possibility_permutation = first_grapheme_permutation(&possibility, &collator);
            // If two strings are anagrams of each other, they will have the same first
            // permutation.
            let is_anagram = collator
                .compare(&word_permutation, &possibility_permutation)
                .is_eq();
            let is_same = collator.compare(&possibility, &word).is_eq();
            is_anagram && !is_same
        })
        .collect()
}

fn first_grapheme_permutation(word: &str, collator: &Collator) -> String {
    let segmenter = GraphemeClusterSegmenter::new();
    segmenter
        .segment_str(&word)
        .tuple_windows()
        .map(|(i, j)| &word[i..j])
        .sorted_by(|a, b| collator.compare(a, b))
        .collect()
}