Benchmark a custom string normalisation function #139

Merged
wojtek merged 4 commits from 138---benchmark-a-custom-string-normalisation-function into main 2024-02-19 20:56:04 +01:00
5 changed files with 156 additions and 94 deletions

48
Cargo.lock generated
View File

@ -235,17 +235,6 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "getrandom"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]] [[package]]
name = "gimli" name = "gimli"
version = "0.28.1" version = "0.28.1"
@ -423,7 +412,6 @@ dependencies = [
"mockall", "mockall",
"once_cell", "once_cell",
"openssh", "openssh",
"rand",
"ratatui", "ratatui",
"serde", "serde",
"serde_json", "serde_json",
@ -544,12 +532,6 @@ version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]] [[package]]
name = "predicates" name = "predicates"
version = "3.1.0" version = "3.1.0"
@ -618,36 +600,6 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "ratatui" name = "ratatui"
version = "0.26.0" version = "0.26.0"

View File

@ -24,7 +24,6 @@ version_check = "0.9.4"
[dev-dependencies] [dev-dependencies]
mockall = "0.12.1" mockall = "0.12.1"
once_cell = "1.19.0" once_cell = "1.19.0"
rand = "0.8.5"
tempfile = "3.10.0" tempfile = "3.10.0"
[features] [features]

View File

@ -0,0 +1,144 @@
// Date: 2024-02-19
pub const ARTISTS: [&str; 141] = [
"Abadden",
"Acid Drinkers",
"Adema",
"Æther Realm",
"Alestorm",
"Alex Rivers",
"Alien Weaponry",
"Allegaeon",
"Alter Bridge",
"Amon Amarth",
"Amorphis",
"Apocalyptica",
"Arch Enemy",
"Аркона",
"Artas",
"As I Lay Dying",
"Avenged Sevenfold",
"Aversions Crown",
"Aviators",
"Azarath",
"Baaba Kulka",
"Battle Beast",
"Beast in Black",
"Behemoth",
"Black Sabbath",
"Blind Guardian",
"Blind Guardian Twilight Orchestra",
"Bloodbath",
"Bloodbound",
"Brothers of Metal",
"Carnation",
"Cellar Darling",
"Children of Bodom",
"Chimaira",
"Crystalic",
"Dark Tranquillity",
"Dethklok",
"DevilDriver",
"Dismember",
"Disturbed",
"The Dreadnoughts",
"Dynazty",
"Edguy",
"Eluveitie",
"Eminem",
"Enforcer",
"Ensiferum",
"Epica",
"Era",
"Evile",
"Ex Deo",
"Exit Eden",
"Faithful Darkness",
"Fear Factory",
"Fit for an Autopsy",
"Five Finger Death Punch",
"Fleshgod Apocalypse",
"Flotsam and Jetsam",
"Frontside",
"Furyon",
"Godsmack",
"Grand Magus",
"Grave Digger",
"Graveworm",
"Guns N Roses",
"Haggard",
"Hate",
"Havukruunu",
"Heaven Shall Burn",
"Heavens Basement",
"Heavy Load",
"Hermh",
"Immortal",
"In Flames",
"Insomnium",
"Iron Maiden",
"Kalmah",
"Kataklysm",
"Kontrust",
"Korn",
"Korpiklaani",
"The Last Hangmen",
"Level 70 Elite Tauren Chieftain",
"Linkin Park",
"Lost Dreams",
"Man Must Die",
"Me and That Man",
"Mercyful Fate",
"Metallica",
"Michael Jackson",
"Miracle of Sound",
"Misery Index",
"Mudvayne",
"Månegarm",
"Nickelback",
"Nightwish",
"Nile",
"Nine Treasures",
"Obscura",
"The Offspring",
"Oomph!",
"P.O.D.",
"Paddy and the Rats",
"Paul Stanley",
"Persefone",
"Peyton Parrish",
"Powerwolf",
"Primitai",
"Primordial",
"ProPain",
"Rammstein",
"Red Hot Chili Peppers",
"Revocation",
"Rob Zombie",
"Sabaton",
"Savatage",
"Scars on Broadway",
"Scorpions",
"Silent Descent",
"Slayer",
"Slipknot",
"Soilwork",
"Sonic Syndicate",
"Soulfallen",
"Spiritfall",
"Stratovarius",
"Sylosis",
"System of a Down",
"Tarot",
"Timecry",
"Trivium",
"Tuomas Holopainen",
"VNV Nation",
"Vader",
"Vicious Crusade",
"The Wages of Sin",
"Whitechapel",
"Within Temptation",
"Woe of Tyrants",
"Wovenwar",
"Xandria",
];

View File

@ -333,3 +333,7 @@ mod tests {
app.unwrap_critical(); app.unwrap_critical();
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -18,8 +18,8 @@ use crate::tui::{
// //
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018, // U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign // U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
static SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', '']; const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
static REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"]; const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> = static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap()); Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
@ -467,60 +467,23 @@ mod tests {
#[cfg(test)] #[cfg(test)]
mod benches { mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions. // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use rand::Rng;
use test::Bencher; use test::Bencher;
use crate::tui::lib::MockIMusicHoard; use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
use super::*; use super::*;
type Search = AppMachine<MockIMusicHoard, AppSearch>; type Search = AppMachine<MockIMusicHoard, AppSearch>;
fn random_utf8_string(len: usize) -> String {
rand::thread_rng()
.sample_iter::<char, _>(&rand::distributions::Standard)
.take(len)
.collect()
}
fn random_alpanumeric_string(len: usize) -> String {
rand::thread_rng()
.sample_iter(&rand::distributions::Alphanumeric)
.take(len)
.map(char::from)
.collect()
}
fn generate_sample(f: fn(usize) -> String) -> Vec<String> {
(0..1000).map(|_| f(10)).collect()
}
#[bench] #[bench]
fn is_char_sensitive_alphanumeric(b: &mut Bencher) { fn is_char_sensitive(b: &mut Bencher) {
let strings = generate_sample(random_alpanumeric_string); let mut iter = ARTISTS.iter().cycle();
let mut iter = strings.iter().cycle();
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap()))) b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
} }
#[bench] #[bench]
fn is_char_sensitive_utf8(b: &mut Bencher) { fn normalize_search(b: &mut Bencher) {
let strings = generate_sample(random_utf8_string); let mut iter = ARTISTS.iter().cycle();
let mut iter = strings.iter().cycle(); b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn normalize_search_alphanumeric(b: &mut Bencher) {
let strings = generate_sample(random_alpanumeric_string);
let mut iter = strings.iter().cycle();
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), false, true)))
}
#[bench]
fn normalize_search_utf8(b: &mut Bencher) {
let strings = generate_sample(random_utf8_string);
let mut iter = strings.iter().cycle();
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), false, true)))
} }
} }