From 0f352c5a9ab9762333a8e0aa9af096b83df91f4e Mon Sep 17 00:00:00 2001 From: Wojciech Kozlowski Date: Fri, 3 Jan 2025 15:20:58 +0100 Subject: [PATCH] Move srting utilities to core --- .../machine => core/collection}/benchmod.rs | 33 ++++++-- src/core/collection/mod.rs | 5 ++ src/core/collection/string.rs | 62 ++++++++++++++ src/lib.rs | 3 + src/main.rs | 4 - src/tui/app/machine/mod.rs | 4 - src/tui/app/machine/search_state.rs | 83 ++----------------- 7 files changed, 102 insertions(+), 92 deletions(-) rename src/{tui/app/machine => core/collection}/benchmod.rs (83%) create mode 100644 src/core/collection/string.rs diff --git a/src/tui/app/machine/benchmod.rs b/src/core/collection/benchmod.rs similarity index 83% rename from src/tui/app/machine/benchmod.rs rename to src/core/collection/benchmod.rs index 6d45a3a..f401e77 100644 --- a/src/tui/app/machine/benchmod.rs +++ b/src/core/collection/benchmod.rs @@ -1,5 +1,7 @@ -// Date: 2024-02-19 -pub const ARTISTS: [&str; 141] = [ +// 1. `beet ls -a -f '$albumartist'`. +// 2. `M-x delete-duplicate-lines`. +// Date: 2025-01-03 +pub const ARTISTS: [&str; 156] = [ "Abadden", "Acid Drinkers", "Adema", @@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [ "Аркона", "Artas", "As I Lay Dying", + "At the Gates", "Avenged Sevenfold", "Aversions Crown", "Aviators", @@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [ "Bloodbath", "Bloodbound", "Brothers of Metal", + "Carcass", "Carnation", "Cellar Darling", "Children of Bodom", @@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [ "Dynazty", "Edguy", "Eluveitie", - "Eminem", "Enforcer", "Ensiferum", "Epica", @@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [ "Heaven’s Basement", "Heavy Load", "Hermh", + "Ignea", "Immortal", "In Flames", "Insomnium", "Iron Maiden", + "Judas Priest", "Kalmah", "Kataklysm", "Kontrust", @@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [ "Linkin Park", "Lost Dreams", "Man Must Die", + "Månegarm", "Me and That Man", + "Megaton Sword", "Mercyful Fate", + "Metal Church", "Metallica", - "Michael Jackson", "Miracle of Sound", "Misery Index", + "Mortal Sin", "Mudvayne", - "Månegarm", "Nickelback", "Nightwish", "Nile", @@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [ "Oomph!", "P.O.D.", "Paddy and the Rats", + "Pain", "Paul Stanley", - "Persefone", "Peyton Parrish", "Powerwolf", "Primitai", @@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [ "Rammstein", "Red Hot Chili Peppers", "Revocation", + "Ride the Sky", "Rob Zombie", "Sabaton", + "Saltatio Mortis", + "Saltatio Mortis & Lara Loft", + "Satan", "Savatage", + "Scar Symmetry", "Scars on Broadway", "Scorpions", "Silent Descent", @@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [ "Timecry", "Trivium", "Tuomas Holopainen", - "VNV Nation", + "Turisas", "Vader", + "Vesania", "Vicious Crusade", - "The Wages of Sin", + "Vintersorg", + "VNV Nation", + "W.A.S.P.", "Whitechapel", "Within Temptation", "Woe of Tyrants", "Wovenwar", "Xandria", + "Jonathan Young", + "Jonathan Young, Peyton Parrish & Colm R. McGuinness", ]; diff --git a/src/core/collection/mod.rs b/src/core/collection/mod.rs index 3dbc419..542f018 100644 --- a/src/core/collection/mod.rs +++ b/src/core/collection/mod.rs @@ -4,6 +4,7 @@ pub mod album; pub mod artist; pub mod merge; pub mod musicbrainz; +pub mod string; pub mod track; use std::fmt::{self, Display}; @@ -40,3 +41,7 @@ impl From for Error { Error::MbidError(err.to_string()) } } + +#[cfg(nightly)] +#[cfg(test)] +mod benchmod; diff --git a/src/core/collection/string.rs b/src/core/collection/string.rs new file mode 100644 index 0000000..f2cb6e5 --- /dev/null +++ b/src/core/collection/string.rs @@ -0,0 +1,62 @@ +use aho_corasick::AhoCorasick; +use once_cell::sync::Lazy; + +// Unlikely that this covers all possible strings, but it should at least cover strings +// relevant for music (at least in English). The list of characters handled is based on +// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters. +// +// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018, +// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign +const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−']; +const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"]; +static AC: Lazy = + Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap()); + +pub fn is_case_sensitive(string: &str) -> bool { + string + .chars() + .any(|ch| ch.is_alphabetic() && ch.is_uppercase()) +} + +pub fn is_char_sensitive(string: &str) -> bool { + // Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to + // a high constant cost of AhoCorasick and the otherwise simple nature of the task. + string.chars().any(|ch| SPECIAL.contains(&ch)) +} + +pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String { + if asciify { + if lowercase { + AC.replace_all(&string.to_lowercase(), &REPLACE) + } else { + AC.replace_all(string, &REPLACE) + } + } else if lowercase { + string.to_lowercase() + } else { + string.to_owned() + } +} + +#[cfg(nightly)] +#[cfg(test)] +mod benches { + // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions. + use test::Bencher; + + use crate::core::collection::benchmod::ARTISTS; + + use super::*; + + #[bench] + fn bench_is_char_sensitive(b: &mut Bencher) { + let mut iter = ARTISTS.iter().cycle(); + b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap()))) + } + + #[bench] + fn bench_normalize_string(b: &mut Bencher) { + let mut iter = ARTISTS.iter().cycle(); + b.iter(|| test::black_box(normalize_string(&iter.next().unwrap(), true, true))) + } +} diff --git a/src/lib.rs b/src/lib.rs index 0213dc6..6e8d317 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,7 @@ //! MusicHoard - a music collection manager. +#![cfg_attr(nightly, feature(test))] +#[cfg(nightly)] +extern crate test; mod core; pub mod external; diff --git a/src/main.rs b/src/main.rs index 4577ea6..cfbf750 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,3 @@ -#![cfg_attr(nightly, feature(test))] -#[cfg(nightly)] -extern crate test; - mod tui; use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread}; diff --git a/src/tui/app/machine/mod.rs b/src/tui/app/machine/mod.rs index 19f930c..2d0322f 100644 --- a/src/tui/app/machine/mod.rs +++ b/src/tui/app/machine/mod.rs @@ -601,7 +601,3 @@ mod tests { app.unwrap_critical(); } } - -#[cfg(nightly)] -#[cfg(test)] -mod benchmod; diff --git a/src/tui/app/machine/search_state.rs b/src/tui/app/machine/search_state.rs index ce6d519..6fe26b6 100644 --- a/src/tui/app/machine/search_state.rs +++ b/src/tui/app/machine/search_state.rs @@ -1,7 +1,4 @@ -use aho_corasick::AhoCorasick; -use once_cell::sync::Lazy; - -use musichoard::collection::{album::Album, artist::Artist, track::Track}; +use musichoard::collection::{album::Album, artist::Artist, string, track::Track}; use crate::tui::app::{ machine::{App, AppInner, AppMachine}, @@ -9,17 +6,6 @@ use crate::tui::app::{ AppPublicState, AppState, Category, IAppInteractSearch, }; -// Unlikely that this covers all possible strings, but it should at least cover strings -// relevant for music (at least in English). The list of characters handled is based on -// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters. -// -// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018, -// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign -const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−']; -const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"]; -static AC: Lazy = - Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap()); - pub struct SearchState { string: String, orig: ListSelection, @@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate { fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool; fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool; fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool; - - fn is_case_sensitive(artist_name: &str) -> bool; - fn is_char_sensitive(artist_name: &str) -> bool; - fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String; } impl IAppInteractSearchPrivate for AppMachine { @@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine { where P: FnMut(bool, bool, &str, &T) -> bool, { - let case_sens = Self::is_case_sensitive(name); - let char_sens = Self::is_char_sensitive(name); - let search = Self::normalize_search(name, !case_sens, !char_sens); + let case_sens = string::is_case_sensitive(name); + let char_sens = string::is_char_sensitive(name); + let search = string::normalize_string(name, !case_sens, !char_sens); let mut index = st.index; if next && ((index + 1) < st.list.len()) { @@ -177,12 +159,12 @@ impl IAppInteractSearchPrivate for AppMachine { } fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool { - let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens); + let name = string::normalize_string(&probe.meta.id.name, !case_sens, !char_sens); let mut result = name.starts_with(search); if let Some(ref probe_sort) = probe.meta.sort { if !result { - let name = Self::normalize_search(probe_sort, !case_sens, !char_sens); + let name = string::normalize_string(probe_sort, !case_sens, !char_sens); result = name.starts_with(search); } } @@ -199,33 +181,7 @@ impl IAppInteractSearchPrivate for AppMachine { } fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool { - Self::normalize_search(title, !case_sens, !char_sens).starts_with(search) - } - - fn is_case_sensitive(artist_name: &str) -> bool { - artist_name - .chars() - .any(|ch| ch.is_alphabetic() && ch.is_uppercase()) - } - - fn is_char_sensitive(artist_name: &str) -> bool { - // Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to - // a high constant cost of AhoCorasick and the otherwise simple nature of the task. - artist_name.chars().any(|ch| SPECIAL.contains(&ch)) - } - - fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String { - if asciify { - if lowercase { - AC.replace_all(&search.to_lowercase(), &REPLACE) - } else { - AC.replace_all(search, &REPLACE) - } - } else if lowercase { - search.to_lowercase() - } else { - search.to_owned() - } + string::normalize_string(title, !case_sens, !char_sens).starts_with(search) } } @@ -544,28 +500,3 @@ mod tests { assert_eq!(browse.inner.selection.selected(), None); } } - -#[cfg(nightly)] -#[cfg(test)] -mod benches { - // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions. - use test::Bencher; - - use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard}; - - use super::*; - - type Search = AppMachine; - - #[bench] - fn is_char_sensitive(b: &mut Bencher) { - let mut iter = ARTISTS.iter().cycle(); - b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap()))) - } - - #[bench] - fn normalize_search(b: &mut Bencher) { - let mut iter = ARTISTS.iter().cycle(); - b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true))) - } -}