Move srting utilities to core

This commit is contained in:
Wojciech Kozlowski 2025-01-03 15:20:58 +01:00
parent 0f658f8730
commit 0f352c5a9a
7 changed files with 102 additions and 92 deletions

View File

@ -1,5 +1,7 @@
// Date: 2024-02-19 // 1. `beet ls -a -f '$albumartist'`.
pub const ARTISTS: [&str; 141] = [ // 2. `M-x delete-duplicate-lines`.
// Date: 2025-01-03
pub const ARTISTS: [&str; 156] = [
"Abadden", "Abadden",
"Acid Drinkers", "Acid Drinkers",
"Adema", "Adema",
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
"Аркона", "Аркона",
"Artas", "Artas",
"As I Lay Dying", "As I Lay Dying",
"At the Gates",
"Avenged Sevenfold", "Avenged Sevenfold",
"Aversions Crown", "Aversions Crown",
"Aviators", "Aviators",
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
"Bloodbath", "Bloodbath",
"Bloodbound", "Bloodbound",
"Brothers of Metal", "Brothers of Metal",
"Carcass",
"Carnation", "Carnation",
"Cellar Darling", "Cellar Darling",
"Children of Bodom", "Children of Bodom",
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
"Dynazty", "Dynazty",
"Edguy", "Edguy",
"Eluveitie", "Eluveitie",
"Eminem",
"Enforcer", "Enforcer",
"Ensiferum", "Ensiferum",
"Epica", "Epica",
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
"Heavens Basement", "Heavens Basement",
"Heavy Load", "Heavy Load",
"Hermh", "Hermh",
"Ignea",
"Immortal", "Immortal",
"In Flames", "In Flames",
"Insomnium", "Insomnium",
"Iron Maiden", "Iron Maiden",
"Judas Priest",
"Kalmah", "Kalmah",
"Kataklysm", "Kataklysm",
"Kontrust", "Kontrust",
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
"Linkin Park", "Linkin Park",
"Lost Dreams", "Lost Dreams",
"Man Must Die", "Man Must Die",
"Månegarm",
"Me and That Man", "Me and That Man",
"Megaton Sword",
"Mercyful Fate", "Mercyful Fate",
"Metal Church",
"Metallica", "Metallica",
"Michael Jackson",
"Miracle of Sound", "Miracle of Sound",
"Misery Index", "Misery Index",
"Mortal Sin",
"Mudvayne", "Mudvayne",
"Månegarm",
"Nickelback", "Nickelback",
"Nightwish", "Nightwish",
"Nile", "Nile",
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
"Oomph!", "Oomph!",
"P.O.D.", "P.O.D.",
"Paddy and the Rats", "Paddy and the Rats",
"Pain",
"Paul Stanley", "Paul Stanley",
"Persefone",
"Peyton Parrish", "Peyton Parrish",
"Powerwolf", "Powerwolf",
"Primitai", "Primitai",
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
"Rammstein", "Rammstein",
"Red Hot Chili Peppers", "Red Hot Chili Peppers",
"Revocation", "Revocation",
"Ride the Sky",
"Rob Zombie", "Rob Zombie",
"Sabaton", "Sabaton",
"Saltatio Mortis",
"Saltatio Mortis & Lara Loft",
"Satan",
"Savatage", "Savatage",
"Scar Symmetry",
"Scars on Broadway", "Scars on Broadway",
"Scorpions", "Scorpions",
"Silent Descent", "Silent Descent",
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
"Timecry", "Timecry",
"Trivium", "Trivium",
"Tuomas Holopainen", "Tuomas Holopainen",
"VNV Nation", "Turisas",
"Vader", "Vader",
"Vesania",
"Vicious Crusade", "Vicious Crusade",
"The Wages of Sin", "Vintersorg",
"VNV Nation",
"W.A.S.P.",
"Whitechapel", "Whitechapel",
"Within Temptation", "Within Temptation",
"Woe of Tyrants", "Woe of Tyrants",
"Wovenwar", "Wovenwar",
"Xandria", "Xandria",
"Jonathan Young",
"Jonathan Young, Peyton Parrish & Colm R. McGuinness",
]; ];

View File

@ -4,6 +4,7 @@ pub mod album;
pub mod artist; pub mod artist;
pub mod merge; pub mod merge;
pub mod musicbrainz; pub mod musicbrainz;
pub mod string;
pub mod track; pub mod track;
use std::fmt::{self, Display}; use std::fmt::{self, Display};
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
Error::MbidError(err.to_string()) Error::MbidError(err.to_string())
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -0,0 +1,62 @@
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub fn is_case_sensitive(string: &str) -> bool {
string
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
pub fn is_char_sensitive(string: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
string.chars().any(|ch| SPECIAL.contains(&ch))
}
pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
if lowercase {
AC.replace_all(&string.to_lowercase(), &REPLACE)
} else {
AC.replace_all(string, &REPLACE)
}
} else if lowercase {
string.to_lowercase()
} else {
string.to_owned()
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use test::Bencher;
use crate::core::collection::benchmod::ARTISTS;
use super::*;
#[bench]
fn bench_is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn bench_normalize_string(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap(), true, true)))
}
}

View File

@ -1,4 +1,7 @@
//! MusicHoard - a music collection manager. //! MusicHoard - a music collection manager.
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod core; mod core;
pub mod external; pub mod external;

View File

@ -1,7 +1,3 @@
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod tui; mod tui;
use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread}; use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};

View File

@ -601,7 +601,3 @@ mod tests {
app.unwrap_critical(); app.unwrap_critical();
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -1,7 +1,4 @@
use aho_corasick::AhoCorasick; use musichoard::collection::{album::Album, artist::Artist, string, track::Track};
use once_cell::sync::Lazy;
use musichoard::collection::{album::Album, artist::Artist, track::Track};
use crate::tui::app::{ use crate::tui::app::{
machine::{App, AppInner, AppMachine}, machine::{App, AppInner, AppMachine},
@ -9,17 +6,6 @@ use crate::tui::app::{
AppPublicState, AppState, Category, IAppInteractSearch, AppPublicState, AppState, Category, IAppInteractSearch,
}; };
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub struct SearchState { pub struct SearchState {
string: String, string: String,
orig: ListSelection, orig: ListSelection,
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool; fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool; fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool; fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
fn is_case_sensitive(artist_name: &str) -> bool;
fn is_char_sensitive(artist_name: &str) -> bool;
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
} }
impl IAppInteractSearchPrivate for AppMachine<SearchState> { impl IAppInteractSearchPrivate for AppMachine<SearchState> {
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
where where
P: FnMut(bool, bool, &str, &T) -> bool, P: FnMut(bool, bool, &str, &T) -> bool,
{ {
let case_sens = Self::is_case_sensitive(name); let case_sens = string::is_case_sensitive(name);
let char_sens = Self::is_char_sensitive(name); let char_sens = string::is_char_sensitive(name);
let search = Self::normalize_search(name, !case_sens, !char_sens); let search = string::normalize_string(name, !case_sens, !char_sens);
let mut index = st.index; let mut index = st.index;
if next && ((index + 1) < st.list.len()) { if next && ((index + 1) < st.list.len()) {
@ -177,12 +159,12 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
} }
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool { fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens); let name = string::normalize_string(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.starts_with(search); let mut result = name.starts_with(search);
if let Some(ref probe_sort) = probe.meta.sort { if let Some(ref probe_sort) = probe.meta.sort {
if !result { if !result {
let name = Self::normalize_search(probe_sort, !case_sens, !char_sens); let name = string::normalize_string(probe_sort, !case_sens, !char_sens);
result = name.starts_with(search); result = name.starts_with(search);
} }
} }
@ -199,33 +181,7 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
} }
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool { fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
Self::normalize_search(title, !case_sens, !char_sens).starts_with(search) string::normalize_string(title, !case_sens, !char_sens).starts_with(search)
}
fn is_case_sensitive(artist_name: &str) -> bool {
artist_name
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
fn is_char_sensitive(artist_name: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
artist_name.chars().any(|ch| SPECIAL.contains(&ch))
}
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
if lowercase {
AC.replace_all(&search.to_lowercase(), &REPLACE)
} else {
AC.replace_all(search, &REPLACE)
}
} else if lowercase {
search.to_lowercase()
} else {
search.to_owned()
}
} }
} }
@ -544,28 +500,3 @@ mod tests {
assert_eq!(browse.inner.selection.selected(), None); assert_eq!(browse.inner.selection.selected(), None);
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use test::Bencher;
use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
use super::*;
type Search = AppMachine<MockIMusicHoard, SearchState>;
#[bench]
fn is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn normalize_search(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
}
}