exercism-solutions/rust/anagram/src/lib.rs

46 lines
1.9 KiB
Rust
Raw Normal View History

2023-10-11 16:02:27 -05:00
use icu::{
2023-10-13 13:41:57 -05:00
collator::{AlternateHandling, Collator, CollatorOptions, MaxVariable, Strength},
2023-10-11 16:02:27 -05:00
segmenter::GraphemeClusterSegmenter,
};
use itertools::Itertools;
2023-10-11 13:32:05 -05:00
use std::collections::HashSet;
2023-10-13 13:41:57 -05:00
// We define an anagram as any permutation of the graphemes of a string, ignoring
// case, accents/diacritical marks, punctuation, spaces, etc. There are some
// language-specific instances where this fails, but it covers a lot of cases.
2023-10-11 16:02:27 -05:00
pub fn anagrams_for<'a>(word: &str, possible_anagrams: &'a [&str]) -> HashSet<&'a str> {
let mut options = CollatorOptions::new();
2023-10-13 13:41:57 -05:00
// Ignore case sensitivity, accents, punctuation, spaces, etc.
2023-10-11 16:02:27 -05:00
options.strength = Some(Strength::Primary);
2023-10-13 13:41:57 -05:00
options.alternate_handling = Some(AlternateHandling::Shifted);
options.max_variable = Some(MaxVariable::Punctuation);
2023-10-11 16:02:27 -05:00
// Strictly speaking an anagram only makes sense within a given locale,
// but we don't know it, so leave it as undefined.
let collator = Collator::try_new(Default::default(), options).unwrap();
2023-10-13 13:41:57 -05:00
let word_permutation = first_grapheme_permutation(&word, &collator);
2023-10-11 16:02:27 -05:00
possible_anagrams
.iter()
.copied()
.filter(|possibility| {
2023-10-13 13:41:57 -05:00
let possibility_permutation = first_grapheme_permutation(&possibility, &collator);
// If two strings are anagrams of each other, they will have the same first
// permutation.
let is_anagram = collator
.compare(&word_permutation, &possibility_permutation)
.is_eq();
2023-10-11 16:02:27 -05:00
let is_same = collator.compare(&possibility, &word).is_eq();
is_anagram && !is_same
})
.collect()
}
2023-10-13 13:41:57 -05:00
fn first_grapheme_permutation(word: &str, collator: &Collator) -> String {
2023-10-11 16:02:27 -05:00
let segmenter = GraphemeClusterSegmenter::new();
2023-10-13 13:41:57 -05:00
segmenter
2023-10-11 16:02:27 -05:00
.segment_str(&word)
.tuple_windows()
.map(|(i, j)| &word[i..j])
2023-10-13 13:41:57 -05:00
.sorted_by(|a, b| collator.compare(a, b))
.collect()
2023-10-11 13:32:05 -05:00
}