Solve rust/anagram
This commit is contained in:
parent
a675ae7b81
commit
a2adfa0e9d
3 changed files with 42 additions and 15 deletions
|
@ -2,3 +2,7 @@
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
name = "anagram"
|
name = "anagram"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
icu = "1.3.2"
|
||||||
|
itertools = "0.11.0"
|
||||||
|
|
|
@ -1,5 +1,41 @@
|
||||||
|
use icu::{
|
||||||
|
collator::{Collator, CollatorOptions, Strength},
|
||||||
|
segmenter::GraphemeClusterSegmenter,
|
||||||
|
};
|
||||||
|
use itertools::Itertools;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
pub fn anagrams_for<'a>(word: &str, possible_anagrams: &[&str]) -> HashSet<&'a str> {
|
pub fn anagrams_for<'a>(word: &str, possible_anagrams: &'a [&str]) -> HashSet<&'a str> {
|
||||||
todo!("For the '{word}' word find anagrams among the following words: {possible_anagrams:?}");
|
let mut options = CollatorOptions::new();
|
||||||
|
// Ignore case sensitivity, accents, etc.
|
||||||
|
options.strength = Some(Strength::Primary);
|
||||||
|
// Strictly speaking an anagram only makes sense within a given locale,
|
||||||
|
// but we don't know it, so leave it as undefined.
|
||||||
|
let collator = Collator::try_new(Default::default(), options).unwrap();
|
||||||
|
let word_anagram = first_anagram(&word, &collator);
|
||||||
|
possible_anagrams
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.filter(|possibility| {
|
||||||
|
let possibility_anagram = first_anagram(&possibility, &collator);
|
||||||
|
let is_anagram = collator.compare(&word_anagram, &possibility_anagram).is_eq();
|
||||||
|
let is_same = collator.compare(&possibility, &word).is_eq();
|
||||||
|
is_anagram && !is_same
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// We define the first anagram of a string as the concatenation of
|
||||||
|
// sorting all of its grapheme clusters. To do this fully correctly,
|
||||||
|
// we would want to know what language we should use when considering
|
||||||
|
// whether two grapheme clusters are "equal".
|
||||||
|
fn first_anagram(word: &str, collator: &Collator) -> String {
|
||||||
|
let segmenter = GraphemeClusterSegmenter::new();
|
||||||
|
let mut graphemes: Vec<&str> = segmenter
|
||||||
|
.segment_str(&word)
|
||||||
|
.tuple_windows()
|
||||||
|
.map(|(i, j)| &word[i..j])
|
||||||
|
.collect();
|
||||||
|
graphemes.sort_by(|a, b| collator.compare(a, b));
|
||||||
|
graphemes.into_iter().collect()
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,6 @@ fn no_matches() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn detect_simple_anagram() {
|
fn detect_simple_anagram() {
|
||||||
let word = "ant";
|
let word = "ant";
|
||||||
|
|
||||||
|
@ -32,7 +31,6 @@ fn detect_simple_anagram() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn does_not_confuse_different_duplicates() {
|
fn does_not_confuse_different_duplicates() {
|
||||||
let word = "galea";
|
let word = "galea";
|
||||||
|
|
||||||
|
@ -44,7 +42,6 @@ fn does_not_confuse_different_duplicates() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn eliminate_anagram_subsets() {
|
fn eliminate_anagram_subsets() {
|
||||||
let word = "good";
|
let word = "good";
|
||||||
|
|
||||||
|
@ -56,7 +53,6 @@ fn eliminate_anagram_subsets() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn detect_anagram() {
|
fn detect_anagram() {
|
||||||
let word = "listen";
|
let word = "listen";
|
||||||
|
|
||||||
|
@ -68,7 +64,6 @@ fn detect_anagram() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn multiple_anagrams() {
|
fn multiple_anagrams() {
|
||||||
let word = "allergy";
|
let word = "allergy";
|
||||||
|
|
||||||
|
@ -87,7 +82,6 @@ fn multiple_anagrams() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn case_insensitive_anagrams() {
|
fn case_insensitive_anagrams() {
|
||||||
let word = "Orchestra";
|
let word = "Orchestra";
|
||||||
|
|
||||||
|
@ -99,7 +93,6 @@ fn case_insensitive_anagrams() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn unicode_anagrams() {
|
fn unicode_anagrams() {
|
||||||
let word = "ΑΒΓ";
|
let word = "ΑΒΓ";
|
||||||
|
|
||||||
|
@ -112,7 +105,6 @@ fn unicode_anagrams() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn misleading_unicode_anagrams() {
|
fn misleading_unicode_anagrams() {
|
||||||
// Despite what a human might think these words contain different letters, the input uses Greek
|
// Despite what a human might think these words contain different letters, the input uses Greek
|
||||||
// A and B while the list of potential anagrams uses Latin A and B.
|
// A and B while the list of potential anagrams uses Latin A and B.
|
||||||
|
@ -126,7 +118,6 @@ fn misleading_unicode_anagrams() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn does_not_detect_a_word_as_its_own_anagram() {
|
fn does_not_detect_a_word_as_its_own_anagram() {
|
||||||
let word = "banana";
|
let word = "banana";
|
||||||
|
|
||||||
|
@ -138,7 +129,6 @@ fn does_not_detect_a_word_as_its_own_anagram() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn does_not_detect_a_differently_cased_word_as_its_own_anagram() {
|
fn does_not_detect_a_differently_cased_word_as_its_own_anagram() {
|
||||||
let word = "banana";
|
let word = "banana";
|
||||||
|
|
||||||
|
@ -150,7 +140,6 @@ fn does_not_detect_a_differently_cased_word_as_its_own_anagram() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn does_not_detect_a_differently_cased_unicode_word_as_its_own_anagram() {
|
fn does_not_detect_a_differently_cased_unicode_word_as_its_own_anagram() {
|
||||||
let word = "ΑΒΓ";
|
let word = "ΑΒΓ";
|
||||||
|
|
||||||
|
@ -162,7 +151,6 @@ fn does_not_detect_a_differently_cased_unicode_word_as_its_own_anagram() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn same_bytes_different_chars() {
|
fn same_bytes_different_chars() {
|
||||||
let word = "a⬂"; // 61 E2 AC 82
|
let word = "a⬂"; // 61 E2 AC 82
|
||||||
|
|
||||||
|
@ -174,7 +162,6 @@ fn same_bytes_different_chars() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn different_words_but_same_ascii_sum() {
|
fn different_words_but_same_ascii_sum() {
|
||||||
let word = "bc";
|
let word = "bc";
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue