Initial generator

This commit is contained in:
Avery Winters 2024-03-12 13:22:11 -05:00
commit 9add76b415
Signed by: avery
SSH key fingerprint: SHA256:eesvLB5MMqHLZrAMFt6kEhqJWnASMLcET6Sgmw0FqZI
11 changed files with 25540 additions and 0 deletions

2
.envrc Normal file
View file

@ -0,0 +1,2 @@
#!/usr/bin/env bash
use flake

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
target/
data/

319
Cargo.lock generated Normal file
View file

@ -0,0 +1,319 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anstream"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
[[package]]
name = "anstyle-parse"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
[[package]]
name = "bilrow-calculate"
version = "0.1.0"
[[package]]
name = "bilrow-generate"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"fastrand",
"indicatif",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys",
]
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "fastrand"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-width",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "portable-atomic"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
[[package]]
name = "proc-macro2"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "strsim"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
[[package]]
name = "syn"
version = "2.0.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-width"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
[[package]]
name = "windows_i686_gnu"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
[[package]]
name = "windows_i686_msvc"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"

3
Cargo.toml Normal file
View file

@ -0,0 +1,3 @@
[workspace]
resolver = "2"
members = ["calculate", "generate"]

8
calculate/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "bilrow-calculate"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

3
calculate/src/main.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

88
flake.lock Normal file
View file

@ -0,0 +1,88 @@
{
"nodes": {
"devshell": {
"inputs": {
"flake-utils": [
"flake-utils"
],
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1710156081,
"narHash": "sha256-4PMY6aumJi5dLFjBzF5O4flKXmadMNq3AGUHKYfchh0=",
"owner": "numtide",
"repo": "devshell",
"rev": "bc68b058dc7e6d4d6befc4ec6c60082b6e844b7d",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "devshell",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": [
"systems"
]
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1709961763,
"narHash": "sha256-6H95HGJHhEZtyYA3rIQpvamMKAGoa8Yh2rFV29QnuGw=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "3030f185ba6a4bf4f18b87f345f104e6a6961f34",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"devshell": "devshell",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs",
"systems": "systems"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

42
flake.nix Normal file
View file

@ -0,0 +1,42 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
{
inputs = {
devshell = {
url = "github:numtide/devshell";
inputs.nixpkgs.follows = "nixpkgs";
inputs.flake-utils.follows = "flake-utils";
};
flake-utils = {
url = "github:numtide/flake-utils";
inputs.systems.follows = "systems";
};
nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
systems.url = "github:nix-systems/default";
};
outputs = {
devshell,
flake-utils,
nixpkgs,
...
}: (flake-utils.lib.eachDefaultSystem (system: let
pkgs = import nixpkgs {
inherit system;
overlays = [devshell.overlays.default];
};
in {
formatter = pkgs.alejandra;
devShells.default = pkgs.devshell.mkShell {
commands = [
{package = pkgs.clangStdenv;}
{package = pkgs.cargo;}
{package = pkgs.nil;}
{package = pkgs.rustc;}
{package = pkgs.rustfmt;}
{package = pkgs.rust-analyzer;}
];
};
}));
}

12
generate/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "bilrow-generate"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.5.2", features = ["derive"] }
anyhow = "1.0.80"
fastrand = "2.0.1"
indicatif = "0.17.8"

24983
generate/src/cities.rs Normal file

File diff suppressed because it is too large Load diff

78
generate/src/main.rs Normal file
View file

@ -0,0 +1,78 @@
use {
crate::cities::CITIES,
::{
anyhow::Result,
clap::Parser,
fastrand::Rng,
indicatif::{ProgressBar, ProgressStyle},
std::{
fs::File,
io::{BufWriter, Write},
num::{NonZeroU64, NonZeroUsize},
path::PathBuf,
},
},
};
mod cities;
fn main() -> Result<()> {
let args = Args::parse();
let output = File::create(&args.output_path)?;
let mut output = BufWriter::with_capacity(args.buffer, output);
let mut rng = args
.seed
.map(|seed| Rng::with_seed(seed))
.unwrap_or_else(|| Rng::new());
let seed = rng.get_seed();
writeln!(
output,
"# Generated with `bilrow-generate --cities {} --entries {} --buffer {} --seed {} {:?}`",
args.cities, args.entries, args.buffer, seed, args.output_path
)?;
// Fork the RNG so we can change the order we use them in later if we want without
// affecting reproducibility.
let mut cities_rng = rng.fork();
let mut temperature_rng = rng.fork();
let chosen_cities = cities_rng.choose_multiple(CITIES.iter(), args.cities.get());
let style: ProgressStyle =
ProgressStyle::with_template("[{elapsed}/{duration}] [{bar}] {percent}% ({per_sec})")
.expect("bad progress bar style");
let pb = ProgressBar::new(args.entries.get()).with_style(style);
const PB_UPDATE_INTERVAL: u64 = 1 << 20;
for i in 0..args.entries.get() {
let city = cities_rng.choice(&chosen_cities).expect("no chosen cities");
// Map 0..1 to -99.9..99.9
let temperature = 99.9 * (2.0 * temperature_rng.f64() - 1.0);
writeln!(output, "{city};{temperature:.1}")?;
if i % PB_UPDATE_INTERVAL == 0 {
pb.inc(PB_UPDATE_INTERVAL);
}
}
pb.finish();
Ok(())
}
/// Program to generate input files for the one billion row challenge.
#[derive(Parser)]
#[command(version, about)]
struct Args {
/// Number of cities to sample entries from
#[arg(short, long, default_value_t = NonZeroUsize::new(10_000).unwrap())]
cities: NonZeroUsize,
/// Number of entries to generate
#[arg(short, long, default_value_t = NonZeroU64::new(1_000_000_000).unwrap())]
entries: NonZeroU64,
/// A token that allows reproducing an output file
#[arg(short, long)]
seed: Option<u64>,
#[arg(short, long, default_value_t = 8_048_576)]
buffer: usize,
/// The file path to write the output to
#[arg()]
output_path: PathBuf,
}