From 1e16c79d5b75d2d96761e90fbc9f9984081c14c2 Mon Sep 17 00:00:00 2001 From: Avery Winters Date: Mon, 18 Mar 2024 15:10:20 -0500 Subject: [PATCH] Various optimizations --- Cargo.lock | 16 ++++++ calculate/Cargo.toml | 1 + calculate/src/optimized.rs | 104 ++++++++++++++++++++++++------------- 3 files changed, 85 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b2305e..f737a2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,6 +81,7 @@ version = "0.1.0" dependencies = [ "ahash", "anyhow", + "bumpalo-herd", "clap", "hashbrown", "indicatif", @@ -98,6 +99,21 @@ dependencies = [ "indicatif", ] +[[package]] +name = "bumpalo" +version = "3.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" + +[[package]] +name = "bumpalo-herd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c51ab7ee02d3459317cc16fec045966c9120733a10229541acaf3cc81b137c95" +dependencies = [ + "bumpalo", +] + [[package]] name = "cfg-if" version = "1.0.0" diff --git a/calculate/Cargo.toml b/calculate/Cargo.toml index 39665c7..1989c15 100644 --- a/calculate/Cargo.toml +++ b/calculate/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" clap = { version = "4.5.2", features = ["derive"] } # This is the latest version that actually uses AES on arm64 ahash = "=0.8.8" +bumpalo-herd = "0.1.2" anyhow = "1.0.80" indicatif = "0.17.8" memchr = "2.7.1" diff --git a/calculate/src/optimized.rs b/calculate/src/optimized.rs index 2805729..79ab840 100644 --- a/calculate/src/optimized.rs +++ b/calculate/src/optimized.rs @@ -1,9 +1,11 @@ use { crate::Args, + ahash::RandomState, anyhow::{Context, Result}, + bumpalo_herd::{Herd, Member}, hashbrown::HashMap, indicatif::{ProgressBar, ProgressStyle}, - memchr::{memchr, memchr2_iter}, + memchr::memchr, memmap2::{Advice, MmapOptions}, std::{ fs::File, @@ -40,6 +42,7 @@ pub(crate) fn run(args: Args) -> Result<()> { let chunks = PARALLELISM_FACTOR * std::thread::available_parallelism()?.get(); let target_chunk_size = input.len() / chunks; + let city_herd = Herd::new(); let data = std::thread::scope(|s| -> Result<_> { let mut results = Vec::with_capacity(chunks); let mut start = 0; @@ -52,10 +55,12 @@ pub(crate) fn run(args: Args) -> Result<()> { }; let input = &input[start..end]; let pb = pb.clone(); - results.push(s.spawn(move || chunk(input, pb))); + let city_bumper = city_herd.get(); + results.push(s.spawn(move || chunk(input, pb, city_bumper))); start = end; } - let mut data: HashMap<&[u8], Data> = HashMap::with_capacity(UNIQUE_CITY_COUNT); + let mut data: HashMap<&[u8], Data, RandomState> = + HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default()); for res in results { let chunk_data = res.join().unwrap()?; for ( @@ -71,8 +76,12 @@ pub(crate) fn run(args: Args) -> Result<()> { let entry = data.entry(city); entry .and_modify(|data| { - data.min = data.min.min(min); - data.max = data.max.max(max); + if min < data.min { + data.min = min; + } + if max > data.max { + data.max = max; + } data.sum += sum; data.count += count; }) @@ -125,41 +134,64 @@ fn parse_temp(input: &[u8], sep_pos: usize, nl_pos: usize) -> i64 { sign * (a * 100 + b * 10 + c) } -fn chunk(input: &[u8], pb: ProgressBar) -> Result> { - let mut data: HashMap<&[u8], Data> = HashMap::with_capacity(UNIQUE_CITY_COUNT); +fn find_sep(b: &[u8]) -> Option { + memchr(b';', b) +} - let mut start = 0; - let mut last_pb_update: usize = 0; - let mut iter = memchr2_iter(b'\n', b';', input); - while let Some(sep_pos) = iter.next() { - if unsafe { *input.get_unchecked(sep_pos) } != b';' { - start = sep_pos + 1; +fn find_nl(b: &[u8]) -> Option { + memchr(b'\n', b) +} + +fn chunk<'input, 'bump>( + mut input: &'input [u8], + pb: ProgressBar, + city_bumper: Member<'bump>, +) -> Result> { + let mut data: HashMap<&'bump [u8], Data, RandomState> = + HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default()); + + let mut pb_since_last_inc: usize = 0; + loop { + if pb_since_last_inc >= 10_000_000 { + pb.inc(pb_since_last_inc as u64); + pb_since_last_inc = 0; + } + let pos_sep = find_sep(input); + let pos_nl = find_nl(input); + let Some(pos_sep) = pos_sep else { + break; + }; + let pos_nl = pos_nl.unwrap_or(input.len()); + if pos_nl < pos_sep { + input = &input[pos_nl + 1..]; + pb_since_last_inc += pos_nl + 1; continue; } - let nl_pos = unsafe { iter.next().unwrap_unchecked() }; - let city = unsafe { input.get_unchecked(start..sep_pos) }; - let temperature = parse_temp(input, sep_pos, nl_pos); - start = nl_pos + 1; - let entry = data.entry(city); - entry - .and_modify(|data| { - data.min = data.min.min(temperature); - data.max = data.max.max(temperature); - data.sum += temperature; - data.count += 1; - }) - .or_insert_with(|| Data { - min: temperature, - max: temperature, - sum: temperature, - count: 1, - }); - let last_pb_update_delta = nl_pos - last_pb_update; - if last_pb_update_delta >= 10_000_000 { - pb.inc(last_pb_update_delta as u64); - last_pb_update = nl_pos; + let city = unsafe { input.get_unchecked(..pos_sep) }; + let temperature = parse_temp(input, pos_sep, pos_nl); + input = &input[pos_nl + 1..]; + pb_since_last_inc += pos_nl + 1; + + let (_key, data) = data.raw_entry_mut().from_key(city).or_insert_with(|| { + ( + city_bumper.alloc_slice_copy(city), + Data { + min: i64::MAX, + max: i64::MIN, + sum: 0, + count: 0, + }, + ) + }); + if temperature < data.min { + data.min = temperature; } + if temperature > data.max { + data.max = temperature; + } + data.sum += temperature; + data.count += 1; } - pb.inc((input.len() - last_pb_update) as u64); + pb.inc(pb_since_last_inc as u64); Ok(data) }