Manual SIMD for locating separators and newlines
This commit is contained in:
parent
1e16c79d5b
commit
d5330e58f2
2 changed files with 88 additions and 1 deletions
|
@ -1,3 +1,5 @@
|
|||
#![feature(portable_simd)]
|
||||
|
||||
use ::{
|
||||
anyhow::Result,
|
||||
clap::{Parser, ValueEnum},
|
||||
|
|
|
@ -10,6 +10,7 @@ use {
|
|||
std::{
|
||||
fs::File,
|
||||
io::{BufWriter, Write},
|
||||
simd::prelude::*,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -33,7 +34,12 @@ pub(crate) fn run(args: Args) -> Result<()> {
|
|||
input.advise(Advice::Sequential)?;
|
||||
input.advise(Advice::WillNeed)?;
|
||||
|
||||
let input = &input[..];
|
||||
let mut input = &input[..];
|
||||
while input.first() == Some(&b'#') {
|
||||
let pos_nl = memchr(b'\n', input).context("bad input")?;
|
||||
input = &input[pos_nl + 1..];
|
||||
}
|
||||
|
||||
let style: ProgressStyle = ProgressStyle::with_template(
|
||||
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
||||
)
|
||||
|
@ -134,6 +140,9 @@ fn parse_temp(input: &[u8], sep_pos: usize, nl_pos: usize) -> i64 {
|
|||
sign * (a * 100 + b * 10 + c)
|
||||
}
|
||||
|
||||
const LANES: usize = 32;
|
||||
type Chunk = Simd<u8, LANES>;
|
||||
|
||||
fn find_sep(b: &[u8]) -> Option<usize> {
|
||||
memchr(b';', b)
|
||||
}
|
||||
|
@ -150,7 +159,82 @@ fn chunk<'input, 'bump>(
|
|||
let mut data: HashMap<&'bump [u8], Data, RandomState> =
|
||||
HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default());
|
||||
|
||||
let sep = Chunk::splat(b';');
|
||||
let nl = Chunk::splat(b'\n');
|
||||
let mut pb_since_last_inc: usize = 0;
|
||||
'outer: while input.len() > LANES {
|
||||
if pb_since_last_inc >= 10_000_000 {
|
||||
pb.inc(pb_since_last_inc as u64);
|
||||
pb_since_last_inc = 0;
|
||||
}
|
||||
let chunk = Chunk::from_slice(input);
|
||||
let mut mask_sep = sep.simd_eq(chunk).to_bitmask();
|
||||
let mut mask_nl = nl.simd_eq(chunk).to_bitmask();
|
||||
|
||||
let mut i_sep = 0;
|
||||
if mask_sep == 0 {
|
||||
loop {
|
||||
i_sep += 1;
|
||||
let chunk_input = &input[i_sep * LANES..];
|
||||
if chunk_input.len() < LANES {
|
||||
break 'outer;
|
||||
}
|
||||
let chunk = Chunk::from_slice(chunk_input);
|
||||
mask_sep = sep.simd_eq(chunk).to_bitmask();
|
||||
mask_nl = nl.simd_eq(chunk).to_bitmask();
|
||||
if mask_sep != 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut i_nl = i_sep;
|
||||
if mask_nl == 0 {
|
||||
loop {
|
||||
i_nl += 1;
|
||||
let chunk_input = &input[i_nl * LANES..];
|
||||
if chunk_input.len() < LANES {
|
||||
break 'outer;
|
||||
}
|
||||
let chunk = Chunk::from_slice(chunk_input);
|
||||
mask_nl = nl.simd_eq(chunk).to_bitmask();
|
||||
if mask_nl != 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let offset_sep = mask_sep.trailing_zeros();
|
||||
let offset_nl = mask_nl.trailing_zeros();
|
||||
let pos_sep = LANES * i_sep + offset_sep as usize;
|
||||
let pos_nl = LANES * i_nl + offset_nl as usize;
|
||||
|
||||
let city = unsafe { input.get_unchecked(..pos_sep) };
|
||||
let temperature = parse_temp(input, pos_sep, pos_nl);
|
||||
|
||||
input = &input[pos_nl + 1..];
|
||||
pb_since_last_inc += pos_nl + 1;
|
||||
|
||||
let (_key, data) = data.raw_entry_mut().from_key(city).or_insert_with(|| {
|
||||
(
|
||||
city_bumper.alloc_slice_copy(city),
|
||||
Data {
|
||||
min: i64::MAX,
|
||||
max: i64::MIN,
|
||||
sum: 0,
|
||||
count: 0,
|
||||
},
|
||||
)
|
||||
});
|
||||
if temperature < data.min {
|
||||
data.min = temperature;
|
||||
}
|
||||
if temperature > data.max {
|
||||
data.max = temperature;
|
||||
}
|
||||
data.sum += temperature;
|
||||
data.count += 1;
|
||||
}
|
||||
loop {
|
||||
if pb_since_last_inc >= 10_000_000 {
|
||||
pb.inc(pb_since_last_inc as u64);
|
||||
|
@ -169,6 +253,7 @@ fn chunk<'input, 'bump>(
|
|||
}
|
||||
let city = unsafe { input.get_unchecked(..pos_sep) };
|
||||
let temperature = parse_temp(input, pos_sep, pos_nl);
|
||||
|
||||
input = &input[pos_nl + 1..];
|
||||
pb_since_last_inc += pos_nl + 1;
|
||||
|
||||
|
|
Loading…
Reference in a new issue