Branchless temperature parsing

This commit is contained in:
Avery Winters 2024-03-18 11:39:32 -05:00
parent 9d597c8e39
commit e934cec398
Signed by: avery
SSH key fingerprint: SHA256:eesvLB5MMqHLZrAMFt6kEhqJWnASMLcET6Sgmw0FqZI

View file

@ -116,15 +116,12 @@ pub(crate) fn run(args: Args) -> Result<()> {
Ok(())
}
fn parse_temp(temp: &[u8]) -> i64 {
let dec = |b| (b - b'0') as i64;
let (sign, a, b, c) = match temp {
[b'-', a, b, b'.', c] => (-1, dec(a), dec(b), dec(c)),
[b'-', b, b'.', c] => (-1, 0, dec(b), dec(c)),
[a, b, b'.', c] => (1, dec(a), dec(b), dec(c)),
[b, b'.', c] => (1, 0, dec(b), dec(c)),
_ => panic!("bad input"),
};
fn parse_temp(input: &[u8], sep_pos: usize, nl_pos: usize) -> i64 {
let sign = 2 * (unsafe { *input.get_unchecked(sep_pos + 1) } != b'-') as i64 - 1;
let a_byte = unsafe { *input.get_unchecked(nl_pos - 4) };
let a = (b'0' <= a_byte && a_byte <= b'9') as i64 * a_byte.wrapping_sub(b'0') as i64;
let b = (unsafe { *input.get_unchecked(nl_pos - 3) } - b'0') as i64;
let c = (unsafe { *input.get_unchecked(nl_pos - 1) } - b'0') as i64;
sign * (a * 100 + b * 10 + c)
}
@ -134,16 +131,15 @@ fn chunk(input: &[u8], pb: ProgressBar) -> Result<HashMap<&[u8], Data>> {
let mut start = 0;
let mut last_pb_update: usize = 0;
let mut iter = memchr2_iter(b'\n', b';', input);
while let Some(split) = iter.next() {
if input[split] != b';' {
start = split + 1;
while let Some(sep_pos) = iter.next() {
if unsafe { *input.get_unchecked(sep_pos) } != b';' {
start = sep_pos + 1;
continue;
}
let pos = iter.next().context("bad input")?;
let city = &input[start..split];
let temperature = &input[split + 1..pos];
start = pos + 1;
let temperature = parse_temp(temperature);
let nl_pos = unsafe { iter.next().unwrap_unchecked() };
let city = unsafe { input.get_unchecked(start..sep_pos) };
let temperature = parse_temp(input, sep_pos, nl_pos);
start = nl_pos + 1;
let entry = data.entry(city);
entry
.and_modify(|data| {
@ -158,12 +154,12 @@ fn chunk(input: &[u8], pb: ProgressBar) -> Result<HashMap<&[u8], Data>> {
sum: temperature,
count: 1,
});
let last_pb_update_delta = pos - last_pb_update;
let last_pb_update_delta = nl_pos - last_pb_update;
if last_pb_update_delta >= 10_000_000 {
pb.inc(last_pb_update_delta.try_into()?);
last_pb_update = pos;
pb.inc(last_pb_update_delta as u64);
last_pb_update = nl_pos;
}
}
pb.inc((input.len() - last_pb_update).try_into()?);
pb.inc((input.len() - last_pb_update) as u64);
Ok(data)
}