Initial optimized implementation
This commit is contained in:
parent
9add76b415
commit
9233682c5a
8 changed files with 316 additions and 10 deletions
90
Cargo.lock
generated
90
Cargo.lock
generated
|
@ -2,6 +2,24 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.13"
|
||||
|
@ -59,6 +77,15 @@ checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
|
|||
[[package]]
|
||||
name = "bilrow-calculate"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"fast-float",
|
||||
"hashbrown",
|
||||
"indicatif",
|
||||
"memchr",
|
||||
"memmap2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bilrow-generate"
|
||||
|
@ -141,12 +168,28 @@ version = "0.3.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "fast-float"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
|
@ -187,12 +230,33 @@ version = "0.2.153"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "number_prefix"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.6.0"
|
||||
|
@ -252,6 +316,12 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
|
@ -317,3 +387,23 @@ name = "windows_x86_64_msvc"
|
|||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
[workspace]
|
||||
resolver = "2"
|
||||
members = ["calculate", "generate"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
|
@ -6,3 +6,10 @@ edition = "2021"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5.2", features = ["derive"] }
|
||||
anyhow = "1.0.80"
|
||||
indicatif = "0.17.8"
|
||||
memchr = "2.7.1"
|
||||
memmap2 = "0.9.4"
|
||||
hashbrown = "0.14.3"
|
||||
fast-float = "0.2.0"
|
||||
|
|
|
@ -1,3 +1,34 @@
|
|||
fn main() {
|
||||
println!("Hello, world!");
|
||||
use ::{
|
||||
anyhow::Result,
|
||||
clap::{Parser, ValueEnum},
|
||||
std::path::PathBuf,
|
||||
};
|
||||
|
||||
mod optimized;
|
||||
mod reference;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
match args.mode {
|
||||
Mode::Reference => reference::run(args),
|
||||
Mode::Optimized => optimized::run(args),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, ValueEnum)]
|
||||
enum Mode {
|
||||
Reference,
|
||||
Optimized,
|
||||
}
|
||||
|
||||
/// Program to generate input files for the one billion row challenge.
|
||||
#[derive(Parser)]
|
||||
#[command(version, about)]
|
||||
struct Args {
|
||||
#[arg(value_enum, short, long, default_value_t = Mode::Reference)]
|
||||
mode: Mode,
|
||||
|
||||
/// The file path to read the input from
|
||||
#[arg()]
|
||||
input: PathBuf,
|
||||
}
|
||||
|
|
97
calculate/src/optimized.rs
Normal file
97
calculate/src/optimized.rs
Normal file
|
@ -0,0 +1,97 @@
|
|||
use {
|
||||
crate::Args,
|
||||
anyhow::{Context, Result},
|
||||
fast_float::parse,
|
||||
hashbrown::HashMap,
|
||||
indicatif::{ProgressBar, ProgressStyle},
|
||||
memchr::{memchr, memchr_iter},
|
||||
memmap2::{Advice, MmapOptions},
|
||||
std::{
|
||||
fs::File,
|
||||
io::{BufWriter, Write},
|
||||
},
|
||||
};
|
||||
|
||||
struct Data {
|
||||
min: f64,
|
||||
max: f64,
|
||||
sum: f64,
|
||||
count: u64,
|
||||
}
|
||||
|
||||
const BUFFER_SIZE: usize = 1 << 30;
|
||||
const UNIQUE_CITY_COUNT: usize = 10_000;
|
||||
|
||||
pub(crate) fn run(args: Args) -> Result<()> {
|
||||
let input = File::open(args.input)?;
|
||||
let mut mmap_options = MmapOptions::new();
|
||||
mmap_options.populate();
|
||||
let input = unsafe { mmap_options.map(&input)? };
|
||||
input.advise(Advice::Sequential)?;
|
||||
input.advise(Advice::WillNeed)?;
|
||||
let input = &input[..];
|
||||
let style: ProgressStyle = ProgressStyle::with_template(
|
||||
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
||||
)
|
||||
.expect("bad progress bar style");
|
||||
let pb = ProgressBar::new(input.len().try_into()?).with_style(style);
|
||||
|
||||
let mut data: HashMap<&[u8], Data> = HashMap::with_capacity(UNIQUE_CITY_COUNT);
|
||||
|
||||
let mut start = 0;
|
||||
let mut last_pb_update: usize = 0;
|
||||
for pos in memchr_iter(b'\n', input) {
|
||||
let line = &input[start..pos];
|
||||
start = pos + 1;
|
||||
if line.first() == Some(&b'#') {
|
||||
continue;
|
||||
}
|
||||
let split = memchr(b';', line).context("bad input")?;
|
||||
let city = &line[..split];
|
||||
let temperature = &line[split + 1..];
|
||||
let temperature = parse(temperature)?;
|
||||
|
||||
let entry = data.entry(city);
|
||||
entry
|
||||
.and_modify(|data| {
|
||||
data.min = data.min.min(temperature);
|
||||
data.max = data.max.max(temperature);
|
||||
data.sum += temperature;
|
||||
data.count += 1;
|
||||
})
|
||||
.or_insert_with(|| Data {
|
||||
min: temperature,
|
||||
max: temperature,
|
||||
sum: temperature,
|
||||
count: 1,
|
||||
});
|
||||
if pos - last_pb_update >= 10_000_000 {
|
||||
pb.set_position(pos.try_into()?);
|
||||
last_pb_update = pos;
|
||||
}
|
||||
}
|
||||
pb.finish();
|
||||
let mut data: Vec<_> = data.iter().collect();
|
||||
data.sort_unstable_by_key(|(&key, _)| key);
|
||||
let output = std::io::stdout();
|
||||
let output = output.lock();
|
||||
let mut output = BufWriter::with_capacity(BUFFER_SIZE, output);
|
||||
for (
|
||||
city,
|
||||
Data {
|
||||
min,
|
||||
max,
|
||||
sum,
|
||||
count,
|
||||
},
|
||||
) in data
|
||||
{
|
||||
writeln!(
|
||||
output,
|
||||
"{};{min:.1};{:.1};{max:.1}",
|
||||
std::str::from_utf8(city)?,
|
||||
sum / *count as f64
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
81
calculate/src/reference.rs
Normal file
81
calculate/src/reference.rs
Normal file
|
@ -0,0 +1,81 @@
|
|||
use {
|
||||
crate::Args,
|
||||
::{
|
||||
anyhow::{Context, Result},
|
||||
indicatif::{ProgressBar, ProgressStyle},
|
||||
std::{
|
||||
collections::BTreeMap,
|
||||
fs::File,
|
||||
io::{BufRead, BufReader, BufWriter, Write},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const BUFFER_SIZE: usize = 1 << 30;
|
||||
|
||||
pub(crate) fn run(args: Args) -> Result<()> {
|
||||
let input = File::open(args.input)?;
|
||||
let len = input.metadata()?.len();
|
||||
let input = BufReader::with_capacity(BUFFER_SIZE, input);
|
||||
|
||||
let style: ProgressStyle = ProgressStyle::with_template(
|
||||
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
||||
)
|
||||
.expect("bad progress bar style");
|
||||
let pb = ProgressBar::new(len).with_style(style);
|
||||
let input = pb.wrap_read(input);
|
||||
|
||||
struct StationData {
|
||||
min: f64,
|
||||
max: f64,
|
||||
sum: f64,
|
||||
count: u64,
|
||||
}
|
||||
|
||||
let mut stations: BTreeMap<Box<str>, StationData> = BTreeMap::new();
|
||||
|
||||
for (i, line) in input.lines().enumerate() {
|
||||
let line = line?;
|
||||
if line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
let (city, temperature) = line
|
||||
.split_once(';')
|
||||
.with_context(|| format!("line {i} malformed"))?;
|
||||
let temperature = temperature.parse::<f64>()?;
|
||||
if let Some(data) = stations.get_mut(city) {
|
||||
data.min = data.min.min(temperature);
|
||||
data.max = data.max.max(temperature);
|
||||
data.sum += temperature;
|
||||
data.count += 1;
|
||||
} else {
|
||||
stations.insert(
|
||||
city.into(),
|
||||
StationData {
|
||||
min: temperature,
|
||||
max: temperature,
|
||||
sum: temperature,
|
||||
count: 1,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
pb.finish();
|
||||
|
||||
let output = std::io::stdout();
|
||||
let output = output.lock();
|
||||
let mut output = BufWriter::with_capacity(BUFFER_SIZE, output);
|
||||
for (
|
||||
city,
|
||||
StationData {
|
||||
min,
|
||||
max,
|
||||
sum,
|
||||
count,
|
||||
},
|
||||
) in stations
|
||||
{
|
||||
writeln!(output, "{city};{min:.1};{:.1};{max:.1}", sum / count as f64)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
|
@ -32,6 +32,7 @@
|
|||
commands = [
|
||||
{package = pkgs.clangStdenv;}
|
||||
{package = pkgs.cargo;}
|
||||
{package = pkgs.cargo-flamegraph;}
|
||||
{package = pkgs.nil;}
|
||||
{package = pkgs.rustc;}
|
||||
{package = pkgs.rustfmt;}
|
||||
|
|
|
@ -18,7 +18,7 @@ mod cities;
|
|||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
let output = File::create(&args.output_path)?;
|
||||
let output = File::create(&args.output)?;
|
||||
let mut output = BufWriter::with_capacity(args.buffer, output);
|
||||
let mut rng = args
|
||||
.seed
|
||||
|
@ -28,7 +28,7 @@ fn main() -> Result<()> {
|
|||
writeln!(
|
||||
output,
|
||||
"# Generated with `bilrow-generate --cities {} --entries {} --buffer {} --seed {} {:?}`",
|
||||
args.cities, args.entries, args.buffer, seed, args.output_path
|
||||
args.cities, args.entries, args.buffer, seed, args.output
|
||||
)?;
|
||||
// Fork the RNG so we can change the order we use them in later if we want without
|
||||
// affecting reproducibility.
|
||||
|
@ -39,15 +39,11 @@ fn main() -> Result<()> {
|
|||
ProgressStyle::with_template("[{elapsed}/{duration}] [{bar}] {percent}% ({per_sec})")
|
||||
.expect("bad progress bar style");
|
||||
let pb = ProgressBar::new(args.entries.get()).with_style(style);
|
||||
const PB_UPDATE_INTERVAL: u64 = 1 << 20;
|
||||
for i in 0..args.entries.get() {
|
||||
for _ in pb.wrap_iter(0..args.entries.get()) {
|
||||
let city = cities_rng.choice(&chosen_cities).expect("no chosen cities");
|
||||
// Map 0..1 to -99.9..99.9
|
||||
let temperature = 99.9 * (2.0 * temperature_rng.f64() - 1.0);
|
||||
writeln!(output, "{city};{temperature:.1}")?;
|
||||
if i % PB_UPDATE_INTERVAL == 0 {
|
||||
pb.inc(PB_UPDATE_INTERVAL);
|
||||
}
|
||||
}
|
||||
pb.finish();
|
||||
Ok(())
|
||||
|
@ -74,5 +70,5 @@ struct Args {
|
|||
|
||||
/// The file path to write the output to
|
||||
#[arg()]
|
||||
output_path: PathBuf,
|
||||
output: PathBuf,
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue