From 22f6f09577e5833dc16b729955a9a4dd5d8d5689 Mon Sep 17 00:00:00 2001 From: Avery Winters Date: Sun, 17 Dec 2023 21:17:17 -0600 Subject: [PATCH] Solve parallel letter frequency --- .../.exercism/config.json | 42 ++++++ .../.exercism/metadata.json | 1 + rust/parallel-letter-frequency/.gitignore | 8 ++ rust/parallel-letter-frequency/Cargo.toml | 7 + rust/parallel-letter-frequency/HELP.md | 86 ++++++++++++ rust/parallel-letter-frequency/README.md | 74 +++++++++++ .../benches/benchmark.rs | 102 +++++++++++++++ rust/parallel-letter-frequency/src/lib.rs | 49 +++++++ .../tests/parallel-letter-frequency.rs | 122 ++++++++++++++++++ 9 files changed, 491 insertions(+) create mode 100644 rust/parallel-letter-frequency/.exercism/config.json create mode 100644 rust/parallel-letter-frequency/.exercism/metadata.json create mode 100644 rust/parallel-letter-frequency/.gitignore create mode 100644 rust/parallel-letter-frequency/Cargo.toml create mode 100644 rust/parallel-letter-frequency/HELP.md create mode 100644 rust/parallel-letter-frequency/README.md create mode 100644 rust/parallel-letter-frequency/benches/benchmark.rs create mode 100644 rust/parallel-letter-frequency/src/lib.rs create mode 100644 rust/parallel-letter-frequency/tests/parallel-letter-frequency.rs diff --git a/rust/parallel-letter-frequency/.exercism/config.json b/rust/parallel-letter-frequency/.exercism/config.json new file mode 100644 index 0000000..8a69050 --- /dev/null +++ b/rust/parallel-letter-frequency/.exercism/config.json @@ -0,0 +1,42 @@ +{ + "authors": [ + "EduardoBautista" + ], + "contributors": [ + "andrewclarkson", + "ashleygwilliams", + "ccouzens", + "ClashTheBunny", + "coriolinus", + "cwhakes", + "EduardoBautista", + "efx", + "ErikSchierboom", + "etrepum", + "glennpratt", + "IanWhitney", + "kytrinyx", + "lutostag", + "mkantor", + "nfiles", + "petertseng", + "rofrol", + "sjwarner-bp", + "stringparser", + "xakon", + "ZapAnton" + ], + "files": { + "solution": [ + "src/lib.rs", + "Cargo.toml" + ], + "test": [ + "tests/parallel-letter-frequency.rs" + ], + "example": [ + ".meta/example.rs" + ] + }, + "blurb": "Count the frequency of letters in texts using parallel computation." +} diff --git a/rust/parallel-letter-frequency/.exercism/metadata.json b/rust/parallel-letter-frequency/.exercism/metadata.json new file mode 100644 index 0000000..db21d08 --- /dev/null +++ b/rust/parallel-letter-frequency/.exercism/metadata.json @@ -0,0 +1 @@ +{"track":"rust","exercise":"parallel-letter-frequency","id":"eec61e4632aa4a81860dd7cfa6a5349f","url":"https://exercism.org/tracks/rust/exercises/parallel-letter-frequency","handle":"averywinters","is_requester":true,"auto_approve":false} \ No newline at end of file diff --git a/rust/parallel-letter-frequency/.gitignore b/rust/parallel-letter-frequency/.gitignore new file mode 100644 index 0000000..db7f315 --- /dev/null +++ b/rust/parallel-letter-frequency/.gitignore @@ -0,0 +1,8 @@ +# Generated by Cargo +# will have compiled files and executables +/target/ +**/*.rs.bk + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock +Cargo.lock diff --git a/rust/parallel-letter-frequency/Cargo.toml b/rust/parallel-letter-frequency/Cargo.toml new file mode 100644 index 0000000..91768b8 --- /dev/null +++ b/rust/parallel-letter-frequency/Cargo.toml @@ -0,0 +1,7 @@ +[package] +edition = "2021" +name = "parallel-letter-frequency" +version = "0.0.0" + +[dependencies] +rayon = "1.8" \ No newline at end of file diff --git a/rust/parallel-letter-frequency/HELP.md b/rust/parallel-letter-frequency/HELP.md new file mode 100644 index 0000000..67add76 --- /dev/null +++ b/rust/parallel-letter-frequency/HELP.md @@ -0,0 +1,86 @@ +# Help + +## Running the tests + +Execute the tests with: + +```bash +$ cargo test +``` + +All but the first test have been ignored. After you get the first test to +pass, open the tests source file which is located in the `tests` directory +and remove the `#[ignore]` flag from the next test and get the tests to pass +again. Each separate test is a function with `#[test]` flag above it. +Continue, until you pass every test. + +If you wish to run _only ignored_ tests without editing the tests source file, use: + +```bash +$ cargo test -- --ignored +``` + +If you are using Rust 1.51 or later, you can run _all_ tests with + +```bash +$ cargo test -- --include-ignored +``` + +To run a specific test, for example `some_test`, you can use: + +```bash +$ cargo test some_test +``` + +If the specific test is ignored, use: + +```bash +$ cargo test some_test -- --ignored +``` + +To learn more about Rust tests refer to the online [test documentation][rust-tests]. + +[rust-tests]: https://doc.rust-lang.org/book/ch11-02-running-tests.html + +## Submitting your solution + +You can submit your solution using the `exercism submit src/lib.rs Cargo.toml` command. +This command will upload your solution to the Exercism website and print the solution page's URL. + +It's possible to submit an incomplete solution which allows you to: + +- See how others have completed the exercise +- Request help from a mentor + +## Need to get help? + +If you'd like help solving the exercise, check the following pages: + +- The [Rust track's documentation](https://exercism.org/docs/tracks/rust) +- The [Rust track's programming category on the forum](https://forum.exercism.org/c/programming/rust) +- [Exercism's programming category on the forum](https://forum.exercism.org/c/programming/5) +- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs) + +Should those resources not suffice, you could submit your (incomplete) solution to request mentoring. + +## Rust Installation + +Refer to the [exercism help page][help-page] for Rust installation and learning +resources. + +## Submitting the solution + +Generally you should submit all files in which you implemented your solution (`src/lib.rs` in most cases). If you are using any external crates, please consider submitting the `Cargo.toml` file. This will make the review process faster and clearer. + +## Feedback, Issues, Pull Requests + +The GitHub [track repository][github] is the home for all of the Rust exercises. If you have feedback about an exercise, or want to help implement new exercises, head over there and create an issue. Members of the rust track team are happy to help! + +If you want to know more about Exercism, take a look at the [contribution guide]. + +## Submitting Incomplete Solutions +It's possible to submit an incomplete solution so you can see how others have completed the exercise. + +[help-page]: https://exercism.org/tracks/rust/learning +[github]: https://github.com/exercism/rust +[contribution guide]: https://exercism.org/docs/community/contributors \ No newline at end of file diff --git a/rust/parallel-letter-frequency/README.md b/rust/parallel-letter-frequency/README.md new file mode 100644 index 0000000..1f5f6ba --- /dev/null +++ b/rust/parallel-letter-frequency/README.md @@ -0,0 +1,74 @@ +# Parallel Letter Frequency + +Welcome to Parallel Letter Frequency on Exercism's Rust Track. +If you need help running the tests or submitting your code, check out `HELP.md`. + +## Instructions + +Count the frequency of letters in texts using parallel computation. + +Parallelism is about doing things in parallel that can also be done sequentially. +A common example is counting the frequency of letters. +Create a function that returns the total frequency of each letter in a list of texts and that employs parallelism. + +Learn more about concurrency in Rust here: + +- [Concurrency](https://doc.rust-lang.org/book/ch16-00-concurrency.html) + +## Bonus + +This exercise also includes a benchmark, with a sequential implementation as a +baseline. You can compare your solution to the benchmark. Observe the +effect different size inputs have on the performance of each. Can you +surpass the benchmark using concurrent programming techniques? + +As of this writing, test::Bencher is unstable and only available on +*nightly* Rust. Run the benchmarks with Cargo: + +``` +cargo bench +``` + +If you are using rustup.rs: + +``` +rustup run nightly cargo bench +``` + +- [Benchmark tests](https://doc.rust-lang.org/stable/unstable-book/library-features/test.html) + +Learn more about nightly Rust: + +- [Nightly Rust](https://doc.rust-lang.org/book/appendix-07-nightly-rust.html) +- [Installing Rust nightly](https://rust-lang.github.io/rustup/concepts/channels.html#working-with-nightly-rust) + +## Source + +### Created by + +- @EduardoBautista + +### Contributed to by + +- @andrewclarkson +- @ashleygwilliams +- @ccouzens +- @ClashTheBunny +- @coriolinus +- @cwhakes +- @EduardoBautista +- @efx +- @ErikSchierboom +- @etrepum +- @glennpratt +- @IanWhitney +- @kytrinyx +- @lutostag +- @mkantor +- @nfiles +- @petertseng +- @rofrol +- @sjwarner-bp +- @stringparser +- @xakon +- @ZapAnton \ No newline at end of file diff --git a/rust/parallel-letter-frequency/benches/benchmark.rs b/rust/parallel-letter-frequency/benches/benchmark.rs new file mode 100644 index 0000000..0e5bbee --- /dev/null +++ b/rust/parallel-letter-frequency/benches/benchmark.rs @@ -0,0 +1,102 @@ +#![feature(test)] +extern crate parallel_letter_frequency; +extern crate test; + +use std::collections::HashMap; +use test::Bencher; + +#[bench] +fn bench_tiny_parallel(b: &mut Bencher) { + let tiny = &["a"]; + b.iter(|| parallel_letter_frequency::frequency(tiny, 3)); +} + +#[bench] +fn bench_tiny_sequential(b: &mut Bencher) { + let tiny = &["a"]; + b.iter(|| frequency(tiny)); +} + +#[bench] +fn bench_small_parallel(b: &mut Bencher) { + let texts = all_texts(1); + b.iter(|| parallel_letter_frequency::frequency(&texts, 3)); +} + +#[bench] +fn bench_small_sequential(b: &mut Bencher) { + let texts = all_texts(1); + b.iter(|| frequency(&texts)); +} + +#[bench] +fn bench_large_parallel(b: &mut Bencher) { + let texts = all_texts(30); + b.iter(|| parallel_letter_frequency::frequency(&texts, 3)); +} + +#[bench] +fn bench_large_sequential(b: &mut Bencher) { + let texts = all_texts(30); + b.iter(|| frequency(&texts)); +} + +/// Simple sequential char frequency. Can it be beat? +pub fn frequency(texts: &[&str]) -> HashMap { + let mut map = HashMap::new(); + + for line in texts { + for chr in line.chars().filter(|c| c.is_alphabetic()) { + if let Some(c) = chr.to_lowercase().next() { + (*map.entry(c).or_insert(0)) += 1; + } + } + } + + map +} + +fn all_texts(repeat: usize) -> Vec<&'static str> { + [ODE_AN_DIE_FREUDE, WILHELMUS, STAR_SPANGLED_BANNER] + .iter() + .cycle() + .take(3 * repeat) + .flat_map(|anthem| anthem.iter().cloned()) + .collect() +} + +// Poem by Friedrich Schiller. The corresponding music is the European Anthem. +pub const ODE_AN_DIE_FREUDE: [&str; 8] = [ + "Freude schöner Götterfunken", + "Tochter aus Elysium,", + "Wir betreten feuertrunken,", + "Himmlische, dein Heiligtum!", + "Deine Zauber binden wieder", + "Was die Mode streng geteilt;", + "Alle Menschen werden Brüder,", + "Wo dein sanfter Flügel weilt.", +]; + +// Dutch national anthem +pub const WILHELMUS: [&str; 8] = [ + "Wilhelmus van Nassouwe", + "ben ik, van Duitsen bloed,", + "den vaderland getrouwe", + "blijf ik tot in den dood.", + "Een Prinse van Oranje", + "ben ik, vrij, onverveerd,", + "den Koning van Hispanje", + "heb ik altijd geëerd.", +]; + +// American national anthem +pub const STAR_SPANGLED_BANNER: [&str; 8] = [ + "O say can you see by the dawn's early light,", + "What so proudly we hailed at the twilight's last gleaming,", + "Whose broad stripes and bright stars through the perilous fight,", + "O'er the ramparts we watched, were so gallantly streaming?", + "And the rockets' red glare, the bombs bursting in air,", + "Gave proof through the night that our flag was still there;", + "O say does that star-spangled banner yet wave,", + "O'er the land of the free and the home of the brave?", +]; diff --git a/rust/parallel-letter-frequency/src/lib.rs b/rust/parallel-letter-frequency/src/lib.rs new file mode 100644 index 0000000..c87a41b --- /dev/null +++ b/rust/parallel-letter-frequency/src/lib.rs @@ -0,0 +1,49 @@ +use ::{ + rayon::{prelude::*, ThreadPoolBuilder}, + std::collections::HashMap, +}; + +pub fn frequency(input: &[&str], worker_count: usize) -> HashMap { + ThreadPoolBuilder::new() + .num_threads(worker_count) + .build() + .expect("Failed to build thread pool.") + .install(|| parallel_frequency(input)) +} + +fn parallel_frequency(input: &[&str]) -> HashMap { + input + .par_iter() + .flat_map(|input| input.par_chars()) + .fold(|| HashMap::new(), merge_char) + .reduce(|| HashMap::new(), merge_maps) +} + +fn merge_char(freq: HashMap, c: char) -> HashMap { + let counts = c + .to_lowercase() + .filter(|c| c.is_alphabetic()) + .map(|c| (c, 1)); + merge_counts(freq, counts) +} + +fn merge_maps(left: HashMap, right: HashMap) -> HashMap { + let (mut from, into) = if left.len() > right.len() { + (right, left) + } else { + (left, right) + }; + merge_counts(into, from.drain()) +} + +fn merge_counts( + mut into: HashMap, + counts: impl Iterator, +) -> HashMap { + for (c, count) in counts { + into.entry(c) + .and_modify(|into_count| *into_count += count) + .or_insert(count); + } + into +} diff --git a/rust/parallel-letter-frequency/tests/parallel-letter-frequency.rs b/rust/parallel-letter-frequency/tests/parallel-letter-frequency.rs new file mode 100644 index 0000000..18411b3 --- /dev/null +++ b/rust/parallel-letter-frequency/tests/parallel-letter-frequency.rs @@ -0,0 +1,122 @@ +use std::collections::HashMap; + +use parallel_letter_frequency as frequency; + +// Poem by Friedrich Schiller. The corresponding music is the European Anthem. +const ODE_AN_DIE_FREUDE: [&str; 8] = [ + "Freude schöner Götterfunken", + "Tochter aus Elysium,", + "Wir betreten feuertrunken,", + "Himmlische, dein Heiligtum!", + "Deine Zauber binden wieder", + "Was die Mode streng geteilt;", + "Alle Menschen werden Brüder,", + "Wo dein sanfter Flügel weilt.", +]; + +// Dutch national anthem +const WILHELMUS: [&str; 8] = [ + "Wilhelmus van Nassouwe", + "ben ik, van Duitsen bloed,", + "den vaderland getrouwe", + "blijf ik tot in den dood.", + "Een Prinse van Oranje", + "ben ik, vrij, onverveerd,", + "den Koning van Hispanje", + "heb ik altijd geëerd.", +]; + +// American national anthem +const STAR_SPANGLED_BANNER: [&str; 8] = [ + "O say can you see by the dawn's early light,", + "What so proudly we hailed at the twilight's last gleaming,", + "Whose broad stripes and bright stars through the perilous fight,", + "O'er the ramparts we watched, were so gallantly streaming?", + "And the rockets' red glare, the bombs bursting in air,", + "Gave proof through the night that our flag was still there;", + "O say does that star-spangled banner yet wave,", + "O'er the land of the free and the home of the brave?", +]; + +#[test] +fn no_texts() { + assert_eq!(frequency::frequency(&[], 4), HashMap::new()); +} + +#[test] +fn one_letter() { + let mut hm = HashMap::new(); + hm.insert('a', 1); + assert_eq!(frequency::frequency(&["a"], 4), hm); +} + +#[test] +fn case_insensitivity() { + let mut hm = HashMap::new(); + hm.insert('a', 2); + assert_eq!(frequency::frequency(&["aA"], 4), hm); +} + +#[test] +fn many_empty_lines() { + let v = vec![""; 1000]; + assert_eq!(frequency::frequency(&v[..], 4), HashMap::new()); +} + +#[test] +fn many_times_same_text() { + let v = vec!["abc"; 1000]; + let mut hm = HashMap::new(); + hm.insert('a', 1000); + hm.insert('b', 1000); + hm.insert('c', 1000); + assert_eq!(frequency::frequency(&v[..], 4), hm); +} + +#[test] +fn punctuation_doesnt_count() { + assert!(!frequency::frequency(&WILHELMUS, 4).contains_key(&',')); +} + +#[test] +fn numbers_dont_count() { + assert!(!frequency::frequency(&["Testing, 1, 2, 3"], 4).contains_key(&'1')); +} + +#[test] +fn all_three_anthems_1_worker() { + let mut v = Vec::new(); + for anthem in [ODE_AN_DIE_FREUDE, WILHELMUS, STAR_SPANGLED_BANNER].iter() { + for line in anthem.iter() { + v.push(*line); + } + } + let freqs = frequency::frequency(&v[..], 1); + assert_eq!(freqs.get(&'a'), Some(&49)); + assert_eq!(freqs.get(&'t'), Some(&56)); + assert_eq!(freqs.get(&'ü'), Some(&2)); +} + +#[test] +fn all_three_anthems_3_workers() { + let mut v = Vec::new(); + for anthem in [ODE_AN_DIE_FREUDE, WILHELMUS, STAR_SPANGLED_BANNER].iter() { + for line in anthem.iter() { + v.push(*line); + } + } + let freqs = frequency::frequency(&v[..], 3); + assert_eq!(freqs.get(&'a'), Some(&49)); + assert_eq!(freqs.get(&'t'), Some(&56)); + assert_eq!(freqs.get(&'ü'), Some(&2)); +} + +#[test] +fn non_integer_multiple_of_threads() { + let v = vec!["abc"; 999]; + let mut hm = HashMap::new(); + hm.insert('a', 999); + hm.insert('b', 999); + hm.insert('c', 999); + assert_eq!(frequency::frequency(&v[..], 4), hm); +}