Manual SIMD for locating separators and newlines

This commit is contained in:
Avery Winters 2024-03-18 17:04:52 -05:00
parent 1e16c79d5b
commit d5330e58f2
Signed by: avery
SSH key fingerprint: SHA256:eesvLB5MMqHLZrAMFt6kEhqJWnASMLcET6Sgmw0FqZI
2 changed files with 88 additions and 1 deletions

View file

@ -1,3 +1,5 @@
#![feature(portable_simd)]
use ::{
anyhow::Result,
clap::{Parser, ValueEnum},

View file

@ -10,6 +10,7 @@ use {
std::{
fs::File,
io::{BufWriter, Write},
simd::prelude::*,
},
};
@ -33,7 +34,12 @@ pub(crate) fn run(args: Args) -> Result<()> {
input.advise(Advice::Sequential)?;
input.advise(Advice::WillNeed)?;
let input = &input[..];
let mut input = &input[..];
while input.first() == Some(&b'#') {
let pos_nl = memchr(b'\n', input).context("bad input")?;
input = &input[pos_nl + 1..];
}
let style: ProgressStyle = ProgressStyle::with_template(
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
)
@ -134,6 +140,9 @@ fn parse_temp(input: &[u8], sep_pos: usize, nl_pos: usize) -> i64 {
sign * (a * 100 + b * 10 + c)
}
const LANES: usize = 32;
type Chunk = Simd<u8, LANES>;
fn find_sep(b: &[u8]) -> Option<usize> {
memchr(b';', b)
}
@ -150,7 +159,82 @@ fn chunk<'input, 'bump>(
let mut data: HashMap<&'bump [u8], Data, RandomState> =
HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default());
let sep = Chunk::splat(b';');
let nl = Chunk::splat(b'\n');
let mut pb_since_last_inc: usize = 0;
'outer: while input.len() > LANES {
if pb_since_last_inc >= 10_000_000 {
pb.inc(pb_since_last_inc as u64);
pb_since_last_inc = 0;
}
let chunk = Chunk::from_slice(input);
let mut mask_sep = sep.simd_eq(chunk).to_bitmask();
let mut mask_nl = nl.simd_eq(chunk).to_bitmask();
let mut i_sep = 0;
if mask_sep == 0 {
loop {
i_sep += 1;
let chunk_input = &input[i_sep * LANES..];
if chunk_input.len() < LANES {
break 'outer;
}
let chunk = Chunk::from_slice(chunk_input);
mask_sep = sep.simd_eq(chunk).to_bitmask();
mask_nl = nl.simd_eq(chunk).to_bitmask();
if mask_sep != 0 {
break;
}
}
}
let mut i_nl = i_sep;
if mask_nl == 0 {
loop {
i_nl += 1;
let chunk_input = &input[i_nl * LANES..];
if chunk_input.len() < LANES {
break 'outer;
}
let chunk = Chunk::from_slice(chunk_input);
mask_nl = nl.simd_eq(chunk).to_bitmask();
if mask_nl != 0 {
break;
}
}
}
let offset_sep = mask_sep.trailing_zeros();
let offset_nl = mask_nl.trailing_zeros();
let pos_sep = LANES * i_sep + offset_sep as usize;
let pos_nl = LANES * i_nl + offset_nl as usize;
let city = unsafe { input.get_unchecked(..pos_sep) };
let temperature = parse_temp(input, pos_sep, pos_nl);
input = &input[pos_nl + 1..];
pb_since_last_inc += pos_nl + 1;
let (_key, data) = data.raw_entry_mut().from_key(city).or_insert_with(|| {
(
city_bumper.alloc_slice_copy(city),
Data {
min: i64::MAX,
max: i64::MIN,
sum: 0,
count: 0,
},
)
});
if temperature < data.min {
data.min = temperature;
}
if temperature > data.max {
data.max = temperature;
}
data.sum += temperature;
data.count += 1;
}
loop {
if pb_since_last_inc >= 10_000_000 {
pb.inc(pb_since_last_inc as u64);
@ -169,6 +253,7 @@ fn chunk<'input, 'bump>(
}
let city = unsafe { input.get_unchecked(..pos_sep) };
let temperature = parse_temp(input, pos_sep, pos_nl);
input = &input[pos_nl + 1..];
pb_since_last_inc += pos_nl + 1;