Manual SIMD for locating separators and newlines
This commit is contained in:
parent
1e16c79d5b
commit
d5330e58f2
2 changed files with 88 additions and 1 deletions
|
@ -1,3 +1,5 @@
|
||||||
|
#![feature(portable_simd)]
|
||||||
|
|
||||||
use ::{
|
use ::{
|
||||||
anyhow::Result,
|
anyhow::Result,
|
||||||
clap::{Parser, ValueEnum},
|
clap::{Parser, ValueEnum},
|
||||||
|
|
|
@ -10,6 +10,7 @@ use {
|
||||||
std::{
|
std::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{BufWriter, Write},
|
io::{BufWriter, Write},
|
||||||
|
simd::prelude::*,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -33,7 +34,12 @@ pub(crate) fn run(args: Args) -> Result<()> {
|
||||||
input.advise(Advice::Sequential)?;
|
input.advise(Advice::Sequential)?;
|
||||||
input.advise(Advice::WillNeed)?;
|
input.advise(Advice::WillNeed)?;
|
||||||
|
|
||||||
let input = &input[..];
|
let mut input = &input[..];
|
||||||
|
while input.first() == Some(&b'#') {
|
||||||
|
let pos_nl = memchr(b'\n', input).context("bad input")?;
|
||||||
|
input = &input[pos_nl + 1..];
|
||||||
|
}
|
||||||
|
|
||||||
let style: ProgressStyle = ProgressStyle::with_template(
|
let style: ProgressStyle = ProgressStyle::with_template(
|
||||||
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
||||||
)
|
)
|
||||||
|
@ -134,6 +140,9 @@ fn parse_temp(input: &[u8], sep_pos: usize, nl_pos: usize) -> i64 {
|
||||||
sign * (a * 100 + b * 10 + c)
|
sign * (a * 100 + b * 10 + c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const LANES: usize = 32;
|
||||||
|
type Chunk = Simd<u8, LANES>;
|
||||||
|
|
||||||
fn find_sep(b: &[u8]) -> Option<usize> {
|
fn find_sep(b: &[u8]) -> Option<usize> {
|
||||||
memchr(b';', b)
|
memchr(b';', b)
|
||||||
}
|
}
|
||||||
|
@ -150,7 +159,82 @@ fn chunk<'input, 'bump>(
|
||||||
let mut data: HashMap<&'bump [u8], Data, RandomState> =
|
let mut data: HashMap<&'bump [u8], Data, RandomState> =
|
||||||
HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default());
|
HashMap::with_capacity_and_hasher(UNIQUE_CITY_COUNT, RandomState::default());
|
||||||
|
|
||||||
|
let sep = Chunk::splat(b';');
|
||||||
|
let nl = Chunk::splat(b'\n');
|
||||||
let mut pb_since_last_inc: usize = 0;
|
let mut pb_since_last_inc: usize = 0;
|
||||||
|
'outer: while input.len() > LANES {
|
||||||
|
if pb_since_last_inc >= 10_000_000 {
|
||||||
|
pb.inc(pb_since_last_inc as u64);
|
||||||
|
pb_since_last_inc = 0;
|
||||||
|
}
|
||||||
|
let chunk = Chunk::from_slice(input);
|
||||||
|
let mut mask_sep = sep.simd_eq(chunk).to_bitmask();
|
||||||
|
let mut mask_nl = nl.simd_eq(chunk).to_bitmask();
|
||||||
|
|
||||||
|
let mut i_sep = 0;
|
||||||
|
if mask_sep == 0 {
|
||||||
|
loop {
|
||||||
|
i_sep += 1;
|
||||||
|
let chunk_input = &input[i_sep * LANES..];
|
||||||
|
if chunk_input.len() < LANES {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
let chunk = Chunk::from_slice(chunk_input);
|
||||||
|
mask_sep = sep.simd_eq(chunk).to_bitmask();
|
||||||
|
mask_nl = nl.simd_eq(chunk).to_bitmask();
|
||||||
|
if mask_sep != 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut i_nl = i_sep;
|
||||||
|
if mask_nl == 0 {
|
||||||
|
loop {
|
||||||
|
i_nl += 1;
|
||||||
|
let chunk_input = &input[i_nl * LANES..];
|
||||||
|
if chunk_input.len() < LANES {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
let chunk = Chunk::from_slice(chunk_input);
|
||||||
|
mask_nl = nl.simd_eq(chunk).to_bitmask();
|
||||||
|
if mask_nl != 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let offset_sep = mask_sep.trailing_zeros();
|
||||||
|
let offset_nl = mask_nl.trailing_zeros();
|
||||||
|
let pos_sep = LANES * i_sep + offset_sep as usize;
|
||||||
|
let pos_nl = LANES * i_nl + offset_nl as usize;
|
||||||
|
|
||||||
|
let city = unsafe { input.get_unchecked(..pos_sep) };
|
||||||
|
let temperature = parse_temp(input, pos_sep, pos_nl);
|
||||||
|
|
||||||
|
input = &input[pos_nl + 1..];
|
||||||
|
pb_since_last_inc += pos_nl + 1;
|
||||||
|
|
||||||
|
let (_key, data) = data.raw_entry_mut().from_key(city).or_insert_with(|| {
|
||||||
|
(
|
||||||
|
city_bumper.alloc_slice_copy(city),
|
||||||
|
Data {
|
||||||
|
min: i64::MAX,
|
||||||
|
max: i64::MIN,
|
||||||
|
sum: 0,
|
||||||
|
count: 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
});
|
||||||
|
if temperature < data.min {
|
||||||
|
data.min = temperature;
|
||||||
|
}
|
||||||
|
if temperature > data.max {
|
||||||
|
data.max = temperature;
|
||||||
|
}
|
||||||
|
data.sum += temperature;
|
||||||
|
data.count += 1;
|
||||||
|
}
|
||||||
loop {
|
loop {
|
||||||
if pb_since_last_inc >= 10_000_000 {
|
if pb_since_last_inc >= 10_000_000 {
|
||||||
pb.inc(pb_since_last_inc as u64);
|
pb.inc(pb_since_last_inc as u64);
|
||||||
|
@ -169,6 +253,7 @@ fn chunk<'input, 'bump>(
|
||||||
}
|
}
|
||||||
let city = unsafe { input.get_unchecked(..pos_sep) };
|
let city = unsafe { input.get_unchecked(..pos_sep) };
|
||||||
let temperature = parse_temp(input, pos_sep, pos_nl);
|
let temperature = parse_temp(input, pos_sep, pos_nl);
|
||||||
|
|
||||||
input = &input[pos_nl + 1..];
|
input = &input[pos_nl + 1..];
|
||||||
pb_since_last_inc += pos_nl + 1;
|
pb_since_last_inc += pos_nl + 1;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue