Changes based on sampling

This commit is contained in:
Avery Winters 2024-06-14 11:00:41 -05:00
parent 0c6d2b076f
commit 34068f029b
Signed by: avery
SSH key fingerprint: SHA256:eesvLB5MMqHLZrAMFt6kEhqJWnASMLcET6Sgmw0FqZI
4 changed files with 14 additions and 10 deletions

8
Cargo.lock generated
View file

@ -4,9 +4,9 @@ version = 3
[[package]] [[package]]
name = "ahash" name = "ahash"
version = "0.8.11" version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"getrandom", "getrandom",
@ -101,9 +101,9 @@ dependencies = [
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.15.4" version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]] [[package]]
name = "bumpalo-herd" name = "bumpalo-herd"

View file

@ -4,11 +4,13 @@ version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.release]
debug = true
[dependencies] [dependencies]
clap = { version = "4.5.2", features = ["derive"] } clap = { version = "4.5.2", features = ["derive"] }
# This is the latest version that actually uses AES on arm64 # This is the latest version that actually uses AES on arm64
ahash = "0.8.11" ahash = "=0.8.8"
bumpalo-herd = "0.1.2" bumpalo-herd = "0.1.2"
anyhow = "1.0.80" anyhow = "1.0.80"
indicatif = "0.17.8" indicatif = "0.17.8"

View file

@ -24,15 +24,15 @@ struct Data {
count: u64, count: u64,
} }
const PARALLELISM_FACTOR: usize = 2; const PARALLELISM_FACTOR: usize = 1;
const BUFFER_SIZE: usize = 1 << 30; const BUFFER_SIZE: usize = 1 << 30;
const UNIQUE_CITY_COUNT: usize = 10_000; const UNIQUE_CITY_COUNT: usize = 10_000;
pub(crate) fn run(args: Args) -> Result<()> { pub(crate) fn run(args: Args) -> Result<()> {
let input = File::open(args.input)?; let input_file = File::open(args.input)?;
let mut mmap_options = MmapOptions::new(); let mut mmap_options = MmapOptions::new();
mmap_options.populate(); mmap_options.populate();
let input = unsafe { mmap_options.map(&input)? }; let input = unsafe { mmap_options.map(&input_file)? };
input.advise(Advice::Sequential)?; input.advise(Advice::Sequential)?;
input.advise(Advice::WillNeed)?; input.advise(Advice::WillNeed)?;
@ -164,8 +164,9 @@ fn chunk<'input, 'bump>(
let sep = Chunk::splat(b';'); let sep = Chunk::splat(b';');
let nl = Chunk::splat(b'\n'); let nl = Chunk::splat(b'\n');
let mut pb_since_last_inc: usize = 0; let mut pb_since_last_inc: usize = 0;
const PB_INC_THRESHOLD: usize = 100_000_000;
'outer: while input.len() > LANES { 'outer: while input.len() > LANES {
if pb_since_last_inc >= 10_000_000 { if pb_since_last_inc >= PB_INC_THRESHOLD {
pb.inc(pb_since_last_inc as u64); pb.inc(pb_since_last_inc as u64);
pb_since_last_inc = 0; pb_since_last_inc = 0;
} }
@ -238,7 +239,7 @@ fn chunk<'input, 'bump>(
data.count += 1; data.count += 1;
} }
loop { loop {
if pb_since_last_inc >= 10_000_000 { if pb_since_last_inc >= PB_INC_THRESHOLD {
pb.inc(pb_since_last_inc as u64); pb.inc(pb_since_last_inc as u64);
pb_since_last_inc = 0; pb_since_last_inc = 0;
} }

View file

@ -41,6 +41,7 @@
{package = pkgs.clangStdenv;} {package = pkgs.clangStdenv;}
{package = pkgs.fenix.default.toolchain; } {package = pkgs.fenix.default.toolchain; }
{package = pkgs.cargo-flamegraph;} {package = pkgs.cargo-flamegraph;}
{package = pkgs.samply;}
{package = pkgs.nil;} {package = pkgs.nil;}
{package = pkgs.rust-analyzer-nightly;} {package = pkgs.rust-analyzer-nightly;}
]; ];