Various optimizations

This commit is contained in:
Avery Winters 2024-03-18 15:10:20 -05:00
parent a33cdba173
commit 1e16c79d5b
Signed by: avery
SSH key fingerprint: SHA256:eesvLB5MMqHLZrAMFt6kEhqJWnASMLcET6Sgmw0FqZI
3 changed files with 85 additions and 36 deletions

16
Cargo.lock generated
View file

@ -81,6 +81,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
"bumpalo-herd",
"clap", "clap",
"hashbrown", "hashbrown",
"indicatif", "indicatif",
@ -98,6 +99,21 @@ dependencies = [
"indicatif", "indicatif",
] ]
[[package]]
name = "bumpalo"
version = "3.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa"
[[package]]
name = "bumpalo-herd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c51ab7ee02d3459317cc16fec045966c9120733a10229541acaf3cc81b137c95"
dependencies = [
"bumpalo",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"

View file

@ -9,6 +9,7 @@ edition = "2021"
clap = { version = "4.5.2", features = ["derive"] } clap = { version = "4.5.2", features = ["derive"] }
# This is the latest version that actually uses AES on arm64 # This is the latest version that actually uses AES on arm64
ahash = "=0.8.8" ahash = "=0.8.8"
bumpalo-herd = "0.1.2"
anyhow = "1.0.80" anyhow = "1.0.80"
indicatif = "0.17.8" indicatif = "0.17.8"
memchr = "2.7.1" memchr = "2.7.1"

View file

@ -1,9 +1,11 @@
use { use {
crate::Args, crate::Args,
ahash::RandomState,
anyhow::{Context, Result}, anyhow::{Context, Result},
bumpalo_herd::{Herd, Member},
hashbrown::HashMap, hashbrown::HashMap,
indicatif::{ProgressBar, ProgressStyle}, indicatif::{ProgressBar, ProgressStyle},
memchr::{memchr, memchr2_iter}, memchr::memchr,
memmap2::{Advice, MmapOptions}, memmap2::{Advice, MmapOptions},
std::{ std::{
fs::File, fs::File,
@ -40,6 +42,7 @@ pub(crate) fn run(args: Args) -> Result<()> {
let chunks = PARALLELISM_FACTOR * std::thread::available_parallelism()?.get(); let chunks = PARALLELISM_FACTOR * std::thread::available_parallelism()?.get();
let target_chunk_size = input.len() / chunks; let target_chunk_size = input.len() / chunks;
let city_herd = Herd::new();
let data = std::thread::scope(|s| -> Result<_> { let data = std::thread::scope(|s| -> Result<_> {
let mut results = Vec::with_capacity(chunks); let mut results = Vec::with_capacity(chunks);
let mut start = 0; let mut start = 0;
@ -52,10 +55,12 @@ pub(crate) fn run(args: Args) -> Result<()> {
}; };
let input = &input[start..end]; let input = &input[start..end];
let pb = pb.clone(); let pb = pb.clone();
results.push(s.spawn(move || chunk(input, pb))); let city_bumper = city_herd.get();
results.push(s.spawn(move || chunk(input, pb, city_bumper)));
start = end; start = end;
} }
let mut data: HashMap<&[u8], Data> = HashMap::with_capacity(UNIQUE_CITY_COUNT); let mut data: HashMap<&[u8], Data, RandomState> =
HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default());
for res in results { for res in results {
let chunk_data = res.join().unwrap()?; let chunk_data = res.join().unwrap()?;
for ( for (
@ -71,8 +76,12 @@ pub(crate) fn run(args: Args) -> Result<()> {
let entry = data.entry(city); let entry = data.entry(city);
entry entry
.and_modify(|data| { .and_modify(|data| {
data.min = data.min.min(min); if min < data.min {
data.max = data.max.max(max); data.min = min;
}
if max > data.max {
data.max = max;
}
data.sum += sum; data.sum += sum;
data.count += count; data.count += count;
}) })
@ -125,41 +134,64 @@ fn parse_temp(input: &[u8], sep_pos: usize, nl_pos: usize) -> i64 {
sign * (a * 100 + b * 10 + c) sign * (a * 100 + b * 10 + c)
} }
fn chunk(input: &[u8], pb: ProgressBar) -> Result<HashMap<&[u8], Data>> { fn find_sep(b: &[u8]) -> Option<usize> {
let mut data: HashMap<&[u8], Data> = HashMap::with_capacity(UNIQUE_CITY_COUNT); memchr(b';', b)
}
let mut start = 0; fn find_nl(b: &[u8]) -> Option<usize> {
let mut last_pb_update: usize = 0; memchr(b'\n', b)
let mut iter = memchr2_iter(b'\n', b';', input); }
while let Some(sep_pos) = iter.next() {
if unsafe { *input.get_unchecked(sep_pos) } != b';' { fn chunk<'input, 'bump>(
start = sep_pos + 1; mut input: &'input [u8],
pb: ProgressBar,
city_bumper: Member<'bump>,
) -> Result<HashMap<&'bump [u8], Data, RandomState>> {
let mut data: HashMap<&'bump [u8], Data, RandomState> =
HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default());
let mut pb_since_last_inc: usize = 0;
loop {
if pb_since_last_inc >= 10_000_000 {
pb.inc(pb_since_last_inc as u64);
pb_since_last_inc = 0;
}
let pos_sep = find_sep(input);
let pos_nl = find_nl(input);
let Some(pos_sep) = pos_sep else {
break;
};
let pos_nl = pos_nl.unwrap_or(input.len());
if pos_nl < pos_sep {
input = &input[pos_nl + 1..];
pb_since_last_inc += pos_nl + 1;
continue; continue;
} }
let nl_pos = unsafe { iter.next().unwrap_unchecked() }; let city = unsafe { input.get_unchecked(..pos_sep) };
let city = unsafe { input.get_unchecked(start..sep_pos) }; let temperature = parse_temp(input, pos_sep, pos_nl);
let temperature = parse_temp(input, sep_pos, nl_pos); input = &input[pos_nl + 1..];
start = nl_pos + 1; pb_since_last_inc += pos_nl + 1;
let entry = data.entry(city);
entry let (_key, data) = data.raw_entry_mut().from_key(city).or_insert_with(|| {
.and_modify(|data| { (
data.min = data.min.min(temperature); city_bumper.alloc_slice_copy(city),
data.max = data.max.max(temperature); Data {
data.sum += temperature; min: i64::MAX,
data.count += 1; max: i64::MIN,
}) sum: 0,
.or_insert_with(|| Data { count: 0,
min: temperature, },
max: temperature, )
sum: temperature, });
count: 1, if temperature < data.min {
}); data.min = temperature;
let last_pb_update_delta = nl_pos - last_pb_update;
if last_pb_update_delta >= 10_000_000 {
pb.inc(last_pb_update_delta as u64);
last_pb_update = nl_pos;
} }
if temperature > data.max {
data.max = temperature;
}
data.sum += temperature;
data.count += 1;
} }
pb.inc((input.len() - last_pb_update) as u64); pb.inc(pb_since_last_inc as u64);
Ok(data) Ok(data)
} }