Move srting utilities to core

This commit is contained in:
Wojciech Kozlowski 2025-01-03 15:20:58 +01:00
parent 0f658f8730
commit 0f352c5a9a
7 changed files with 102 additions and 92 deletions

View File

@ -1,5 +1,7 @@
// Date: 2024-02-19
pub const ARTISTS: [&str; 141] = [
// 1. `beet ls -a -f '$albumartist'`.
// 2. `M-x delete-duplicate-lines`.
// Date: 2025-01-03
pub const ARTISTS: [&str; 156] = [
"Abadden",
"Acid Drinkers",
"Adema",
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
"Аркона",
"Artas",
"As I Lay Dying",
"At the Gates",
"Avenged Sevenfold",
"Aversions Crown",
"Aviators",
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
"Bloodbath",
"Bloodbound",
"Brothers of Metal",
"Carcass",
"Carnation",
"Cellar Darling",
"Children of Bodom",
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
"Dynazty",
"Edguy",
"Eluveitie",
"Eminem",
"Enforcer",
"Ensiferum",
"Epica",
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
"Heavens Basement",
"Heavy Load",
"Hermh",
"Ignea",
"Immortal",
"In Flames",
"Insomnium",
"Iron Maiden",
"Judas Priest",
"Kalmah",
"Kataklysm",
"Kontrust",
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
"Linkin Park",
"Lost Dreams",
"Man Must Die",
"Månegarm",
"Me and That Man",
"Megaton Sword",
"Mercyful Fate",
"Metal Church",
"Metallica",
"Michael Jackson",
"Miracle of Sound",
"Misery Index",
"Mortal Sin",
"Mudvayne",
"Månegarm",
"Nickelback",
"Nightwish",
"Nile",
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
"Oomph!",
"P.O.D.",
"Paddy and the Rats",
"Pain",
"Paul Stanley",
"Persefone",
"Peyton Parrish",
"Powerwolf",
"Primitai",
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
"Rammstein",
"Red Hot Chili Peppers",
"Revocation",
"Ride the Sky",
"Rob Zombie",
"Sabaton",
"Saltatio Mortis",
"Saltatio Mortis & Lara Loft",
"Satan",
"Savatage",
"Scar Symmetry",
"Scars on Broadway",
"Scorpions",
"Silent Descent",
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
"Timecry",
"Trivium",
"Tuomas Holopainen",
"VNV Nation",
"Turisas",
"Vader",
"Vesania",
"Vicious Crusade",
"The Wages of Sin",
"Vintersorg",
"VNV Nation",
"W.A.S.P.",
"Whitechapel",
"Within Temptation",
"Woe of Tyrants",
"Wovenwar",
"Xandria",
"Jonathan Young",
"Jonathan Young, Peyton Parrish & Colm R. McGuinness",
];

View File

@ -4,6 +4,7 @@ pub mod album;
pub mod artist;
pub mod merge;
pub mod musicbrainz;
pub mod string;
pub mod track;
use std::fmt::{self, Display};
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
Error::MbidError(err.to_string())
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -0,0 +1,62 @@
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub fn is_case_sensitive(string: &str) -> bool {
string
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
pub fn is_char_sensitive(string: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
string.chars().any(|ch| SPECIAL.contains(&ch))
}
pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
if lowercase {
AC.replace_all(&string.to_lowercase(), &REPLACE)
} else {
AC.replace_all(string, &REPLACE)
}
} else if lowercase {
string.to_lowercase()
} else {
string.to_owned()
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use test::Bencher;
use crate::core::collection::benchmod::ARTISTS;
use super::*;
#[bench]
fn bench_is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn bench_normalize_string(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap(), true, true)))
}
}

View File

@ -1,4 +1,7 @@
//! MusicHoard - a music collection manager.
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod core;
pub mod external;

View File

@ -1,7 +1,3 @@
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod tui;
use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};

View File

@ -601,7 +601,3 @@ mod tests {
app.unwrap_critical();
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -1,7 +1,4 @@
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
use musichoard::collection::{album::Album, artist::Artist, track::Track};
use musichoard::collection::{album::Album, artist::Artist, string, track::Track};
use crate::tui::app::{
machine::{App, AppInner, AppMachine},
@ -9,17 +6,6 @@ use crate::tui::app::{
AppPublicState, AppState, Category, IAppInteractSearch,
};
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub struct SearchState {
string: String,
orig: ListSelection,
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
fn is_case_sensitive(artist_name: &str) -> bool;
fn is_char_sensitive(artist_name: &str) -> bool;
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
}
impl IAppInteractSearchPrivate for AppMachine<SearchState> {
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
where
P: FnMut(bool, bool, &str, &T) -> bool,
{
let case_sens = Self::is_case_sensitive(name);
let char_sens = Self::is_char_sensitive(name);
let search = Self::normalize_search(name, !case_sens, !char_sens);
let case_sens = string::is_case_sensitive(name);
let char_sens = string::is_char_sensitive(name);
let search = string::normalize_string(name, !case_sens, !char_sens);
let mut index = st.index;
if next && ((index + 1) < st.list.len()) {
@ -177,12 +159,12 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
}
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens);
let name = string::normalize_string(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.starts_with(search);
if let Some(ref probe_sort) = probe.meta.sort {
if !result {
let name = Self::normalize_search(probe_sort, !case_sens, !char_sens);
let name = string::normalize_string(probe_sort, !case_sens, !char_sens);
result = name.starts_with(search);
}
}
@ -199,33 +181,7 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
}
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
Self::normalize_search(title, !case_sens, !char_sens).starts_with(search)
}
fn is_case_sensitive(artist_name: &str) -> bool {
artist_name
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
fn is_char_sensitive(artist_name: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
artist_name.chars().any(|ch| SPECIAL.contains(&ch))
}
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
if lowercase {
AC.replace_all(&search.to_lowercase(), &REPLACE)
} else {
AC.replace_all(search, &REPLACE)
}
} else if lowercase {
search.to_lowercase()
} else {
search.to_owned()
}
string::normalize_string(title, !case_sens, !char_sens).starts_with(search)
}
}
@ -544,28 +500,3 @@ mod tests {
assert_eq!(browse.inner.selection.selected(), None);
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use test::Bencher;
use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
use super::*;
type Search = AppMachine<MockIMusicHoard, SearchState>;
#[bench]
fn is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn normalize_search(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
}
}