Initial optimized implementation
This commit is contained in:
parent
9add76b415
commit
9233682c5a
8 changed files with 316 additions and 10 deletions
90
Cargo.lock
generated
90
Cargo.lock
generated
|
@ -2,6 +2,24 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "allocator-api2"
|
||||||
|
version = "0.2.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.6.13"
|
version = "0.6.13"
|
||||||
|
@ -59,6 +77,15 @@ checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bilrow-calculate"
|
name = "bilrow-calculate"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"clap",
|
||||||
|
"fast-float",
|
||||||
|
"hashbrown",
|
||||||
|
"indicatif",
|
||||||
|
"memchr",
|
||||||
|
"memmap2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bilrow-generate"
|
name = "bilrow-generate"
|
||||||
|
@ -141,12 +168,28 @@ version = "0.3.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fast-float"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastrand"
|
name = "fastrand"
|
||||||
version = "2.0.1"
|
version = "2.0.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
|
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.14.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||||
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
|
"allocator-api2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
@ -187,12 +230,33 @@ version = "0.2.153"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memmap2"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "number_prefix"
|
name = "number_prefix"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "portable-atomic"
|
name = "portable-atomic"
|
||||||
version = "1.6.0"
|
version = "1.6.0"
|
||||||
|
@ -252,6 +316,12 @@ version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.52.0"
|
version = "0.52.0"
|
||||||
|
@ -317,3 +387,23 @@ name = "windows_x86_64_msvc"
|
||||||
version = "0.52.4"
|
version = "0.52.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy"
|
||||||
|
version = "0.7.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy-derive"
|
||||||
|
version = "0.7.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
members = ["calculate", "generate"]
|
members = ["calculate", "generate"]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = true
|
|
@ -6,3 +6,10 @@ edition = "2021"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
clap = { version = "4.5.2", features = ["derive"] }
|
||||||
|
anyhow = "1.0.80"
|
||||||
|
indicatif = "0.17.8"
|
||||||
|
memchr = "2.7.1"
|
||||||
|
memmap2 = "0.9.4"
|
||||||
|
hashbrown = "0.14.3"
|
||||||
|
fast-float = "0.2.0"
|
||||||
|
|
|
@ -1,3 +1,34 @@
|
||||||
fn main() {
|
use ::{
|
||||||
println!("Hello, world!");
|
anyhow::Result,
|
||||||
|
clap::{Parser, ValueEnum},
|
||||||
|
std::path::PathBuf,
|
||||||
|
};
|
||||||
|
|
||||||
|
mod optimized;
|
||||||
|
mod reference;
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
|
match args.mode {
|
||||||
|
Mode::Reference => reference::run(args),
|
||||||
|
Mode::Optimized => optimized::run(args),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, ValueEnum)]
|
||||||
|
enum Mode {
|
||||||
|
Reference,
|
||||||
|
Optimized,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Program to generate input files for the one billion row challenge.
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(version, about)]
|
||||||
|
struct Args {
|
||||||
|
#[arg(value_enum, short, long, default_value_t = Mode::Reference)]
|
||||||
|
mode: Mode,
|
||||||
|
|
||||||
|
/// The file path to read the input from
|
||||||
|
#[arg()]
|
||||||
|
input: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
97
calculate/src/optimized.rs
Normal file
97
calculate/src/optimized.rs
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
use {
|
||||||
|
crate::Args,
|
||||||
|
anyhow::{Context, Result},
|
||||||
|
fast_float::parse,
|
||||||
|
hashbrown::HashMap,
|
||||||
|
indicatif::{ProgressBar, ProgressStyle},
|
||||||
|
memchr::{memchr, memchr_iter},
|
||||||
|
memmap2::{Advice, MmapOptions},
|
||||||
|
std::{
|
||||||
|
fs::File,
|
||||||
|
io::{BufWriter, Write},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Data {
|
||||||
|
min: f64,
|
||||||
|
max: f64,
|
||||||
|
sum: f64,
|
||||||
|
count: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
const BUFFER_SIZE: usize = 1 << 30;
|
||||||
|
const UNIQUE_CITY_COUNT: usize = 10_000;
|
||||||
|
|
||||||
|
pub(crate) fn run(args: Args) -> Result<()> {
|
||||||
|
let input = File::open(args.input)?;
|
||||||
|
let mut mmap_options = MmapOptions::new();
|
||||||
|
mmap_options.populate();
|
||||||
|
let input = unsafe { mmap_options.map(&input)? };
|
||||||
|
input.advise(Advice::Sequential)?;
|
||||||
|
input.advise(Advice::WillNeed)?;
|
||||||
|
let input = &input[..];
|
||||||
|
let style: ProgressStyle = ProgressStyle::with_template(
|
||||||
|
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
||||||
|
)
|
||||||
|
.expect("bad progress bar style");
|
||||||
|
let pb = ProgressBar::new(input.len().try_into()?).with_style(style);
|
||||||
|
|
||||||
|
let mut data: HashMap<&[u8], Data> = HashMap::with_capacity(UNIQUE_CITY_COUNT);
|
||||||
|
|
||||||
|
let mut start = 0;
|
||||||
|
let mut last_pb_update: usize = 0;
|
||||||
|
for pos in memchr_iter(b'\n', input) {
|
||||||
|
let line = &input[start..pos];
|
||||||
|
start = pos + 1;
|
||||||
|
if line.first() == Some(&b'#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let split = memchr(b';', line).context("bad input")?;
|
||||||
|
let city = &line[..split];
|
||||||
|
let temperature = &line[split + 1..];
|
||||||
|
let temperature = parse(temperature)?;
|
||||||
|
|
||||||
|
let entry = data.entry(city);
|
||||||
|
entry
|
||||||
|
.and_modify(|data| {
|
||||||
|
data.min = data.min.min(temperature);
|
||||||
|
data.max = data.max.max(temperature);
|
||||||
|
data.sum += temperature;
|
||||||
|
data.count += 1;
|
||||||
|
})
|
||||||
|
.or_insert_with(|| Data {
|
||||||
|
min: temperature,
|
||||||
|
max: temperature,
|
||||||
|
sum: temperature,
|
||||||
|
count: 1,
|
||||||
|
});
|
||||||
|
if pos - last_pb_update >= 10_000_000 {
|
||||||
|
pb.set_position(pos.try_into()?);
|
||||||
|
last_pb_update = pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pb.finish();
|
||||||
|
let mut data: Vec<_> = data.iter().collect();
|
||||||
|
data.sort_unstable_by_key(|(&key, _)| key);
|
||||||
|
let output = std::io::stdout();
|
||||||
|
let output = output.lock();
|
||||||
|
let mut output = BufWriter::with_capacity(BUFFER_SIZE, output);
|
||||||
|
for (
|
||||||
|
city,
|
||||||
|
Data {
|
||||||
|
min,
|
||||||
|
max,
|
||||||
|
sum,
|
||||||
|
count,
|
||||||
|
},
|
||||||
|
) in data
|
||||||
|
{
|
||||||
|
writeln!(
|
||||||
|
output,
|
||||||
|
"{};{min:.1};{:.1};{max:.1}",
|
||||||
|
std::str::from_utf8(city)?,
|
||||||
|
sum / *count as f64
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
81
calculate/src/reference.rs
Normal file
81
calculate/src/reference.rs
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
use {
|
||||||
|
crate::Args,
|
||||||
|
::{
|
||||||
|
anyhow::{Context, Result},
|
||||||
|
indicatif::{ProgressBar, ProgressStyle},
|
||||||
|
std::{
|
||||||
|
collections::BTreeMap,
|
||||||
|
fs::File,
|
||||||
|
io::{BufRead, BufReader, BufWriter, Write},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const BUFFER_SIZE: usize = 1 << 30;
|
||||||
|
|
||||||
|
pub(crate) fn run(args: Args) -> Result<()> {
|
||||||
|
let input = File::open(args.input)?;
|
||||||
|
let len = input.metadata()?.len();
|
||||||
|
let input = BufReader::with_capacity(BUFFER_SIZE, input);
|
||||||
|
|
||||||
|
let style: ProgressStyle = ProgressStyle::with_template(
|
||||||
|
"[{elapsed}/{duration}] [{bar}] {percent}% ({binary_bytes_per_sec})",
|
||||||
|
)
|
||||||
|
.expect("bad progress bar style");
|
||||||
|
let pb = ProgressBar::new(len).with_style(style);
|
||||||
|
let input = pb.wrap_read(input);
|
||||||
|
|
||||||
|
struct StationData {
|
||||||
|
min: f64,
|
||||||
|
max: f64,
|
||||||
|
sum: f64,
|
||||||
|
count: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut stations: BTreeMap<Box<str>, StationData> = BTreeMap::new();
|
||||||
|
|
||||||
|
for (i, line) in input.lines().enumerate() {
|
||||||
|
let line = line?;
|
||||||
|
if line.starts_with('#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let (city, temperature) = line
|
||||||
|
.split_once(';')
|
||||||
|
.with_context(|| format!("line {i} malformed"))?;
|
||||||
|
let temperature = temperature.parse::<f64>()?;
|
||||||
|
if let Some(data) = stations.get_mut(city) {
|
||||||
|
data.min = data.min.min(temperature);
|
||||||
|
data.max = data.max.max(temperature);
|
||||||
|
data.sum += temperature;
|
||||||
|
data.count += 1;
|
||||||
|
} else {
|
||||||
|
stations.insert(
|
||||||
|
city.into(),
|
||||||
|
StationData {
|
||||||
|
min: temperature,
|
||||||
|
max: temperature,
|
||||||
|
sum: temperature,
|
||||||
|
count: 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pb.finish();
|
||||||
|
|
||||||
|
let output = std::io::stdout();
|
||||||
|
let output = output.lock();
|
||||||
|
let mut output = BufWriter::with_capacity(BUFFER_SIZE, output);
|
||||||
|
for (
|
||||||
|
city,
|
||||||
|
StationData {
|
||||||
|
min,
|
||||||
|
max,
|
||||||
|
sum,
|
||||||
|
count,
|
||||||
|
},
|
||||||
|
) in stations
|
||||||
|
{
|
||||||
|
writeln!(output, "{city};{min:.1};{:.1};{max:.1}", sum / count as f64)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -32,6 +32,7 @@
|
||||||
commands = [
|
commands = [
|
||||||
{package = pkgs.clangStdenv;}
|
{package = pkgs.clangStdenv;}
|
||||||
{package = pkgs.cargo;}
|
{package = pkgs.cargo;}
|
||||||
|
{package = pkgs.cargo-flamegraph;}
|
||||||
{package = pkgs.nil;}
|
{package = pkgs.nil;}
|
||||||
{package = pkgs.rustc;}
|
{package = pkgs.rustc;}
|
||||||
{package = pkgs.rustfmt;}
|
{package = pkgs.rustfmt;}
|
||||||
|
|
|
@ -18,7 +18,7 @@ mod cities;
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
let output = File::create(&args.output_path)?;
|
let output = File::create(&args.output)?;
|
||||||
let mut output = BufWriter::with_capacity(args.buffer, output);
|
let mut output = BufWriter::with_capacity(args.buffer, output);
|
||||||
let mut rng = args
|
let mut rng = args
|
||||||
.seed
|
.seed
|
||||||
|
@ -28,7 +28,7 @@ fn main() -> Result<()> {
|
||||||
writeln!(
|
writeln!(
|
||||||
output,
|
output,
|
||||||
"# Generated with `bilrow-generate --cities {} --entries {} --buffer {} --seed {} {:?}`",
|
"# Generated with `bilrow-generate --cities {} --entries {} --buffer {} --seed {} {:?}`",
|
||||||
args.cities, args.entries, args.buffer, seed, args.output_path
|
args.cities, args.entries, args.buffer, seed, args.output
|
||||||
)?;
|
)?;
|
||||||
// Fork the RNG so we can change the order we use them in later if we want without
|
// Fork the RNG so we can change the order we use them in later if we want without
|
||||||
// affecting reproducibility.
|
// affecting reproducibility.
|
||||||
|
@ -39,15 +39,11 @@ fn main() -> Result<()> {
|
||||||
ProgressStyle::with_template("[{elapsed}/{duration}] [{bar}] {percent}% ({per_sec})")
|
ProgressStyle::with_template("[{elapsed}/{duration}] [{bar}] {percent}% ({per_sec})")
|
||||||
.expect("bad progress bar style");
|
.expect("bad progress bar style");
|
||||||
let pb = ProgressBar::new(args.entries.get()).with_style(style);
|
let pb = ProgressBar::new(args.entries.get()).with_style(style);
|
||||||
const PB_UPDATE_INTERVAL: u64 = 1 << 20;
|
for _ in pb.wrap_iter(0..args.entries.get()) {
|
||||||
for i in 0..args.entries.get() {
|
|
||||||
let city = cities_rng.choice(&chosen_cities).expect("no chosen cities");
|
let city = cities_rng.choice(&chosen_cities).expect("no chosen cities");
|
||||||
// Map 0..1 to -99.9..99.9
|
// Map 0..1 to -99.9..99.9
|
||||||
let temperature = 99.9 * (2.0 * temperature_rng.f64() - 1.0);
|
let temperature = 99.9 * (2.0 * temperature_rng.f64() - 1.0);
|
||||||
writeln!(output, "{city};{temperature:.1}")?;
|
writeln!(output, "{city};{temperature:.1}")?;
|
||||||
if i % PB_UPDATE_INTERVAL == 0 {
|
|
||||||
pb.inc(PB_UPDATE_INTERVAL);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
pb.finish();
|
pb.finish();
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -74,5 +70,5 @@ struct Args {
|
||||||
|
|
||||||
/// The file path to write the output to
|
/// The file path to write the output to
|
||||||
#[arg()]
|
#[arg()]
|
||||||
output_path: PathBuf,
|
output: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue