Move srting utilities to core
This commit is contained in:
parent
0f658f8730
commit
0f352c5a9a
@ -1,5 +1,7 @@
|
||||
// Date: 2024-02-19
|
||||
pub const ARTISTS: [&str; 141] = [
|
||||
// 1. `beet ls -a -f '$albumartist'`.
|
||||
// 2. `M-x delete-duplicate-lines`.
|
||||
// Date: 2025-01-03
|
||||
pub const ARTISTS: [&str; 156] = [
|
||||
"Abadden",
|
||||
"Acid Drinkers",
|
||||
"Adema",
|
||||
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Аркона",
|
||||
"Artas",
|
||||
"As I Lay Dying",
|
||||
"At the Gates",
|
||||
"Avenged Sevenfold",
|
||||
"Aversions Crown",
|
||||
"Aviators",
|
||||
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Bloodbath",
|
||||
"Bloodbound",
|
||||
"Brothers of Metal",
|
||||
"Carcass",
|
||||
"Carnation",
|
||||
"Cellar Darling",
|
||||
"Children of Bodom",
|
||||
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Dynazty",
|
||||
"Edguy",
|
||||
"Eluveitie",
|
||||
"Eminem",
|
||||
"Enforcer",
|
||||
"Ensiferum",
|
||||
"Epica",
|
||||
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Heaven’s Basement",
|
||||
"Heavy Load",
|
||||
"Hermh",
|
||||
"Ignea",
|
||||
"Immortal",
|
||||
"In Flames",
|
||||
"Insomnium",
|
||||
"Iron Maiden",
|
||||
"Judas Priest",
|
||||
"Kalmah",
|
||||
"Kataklysm",
|
||||
"Kontrust",
|
||||
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Linkin Park",
|
||||
"Lost Dreams",
|
||||
"Man Must Die",
|
||||
"Månegarm",
|
||||
"Me and That Man",
|
||||
"Megaton Sword",
|
||||
"Mercyful Fate",
|
||||
"Metal Church",
|
||||
"Metallica",
|
||||
"Michael Jackson",
|
||||
"Miracle of Sound",
|
||||
"Misery Index",
|
||||
"Mortal Sin",
|
||||
"Mudvayne",
|
||||
"Månegarm",
|
||||
"Nickelback",
|
||||
"Nightwish",
|
||||
"Nile",
|
||||
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Oomph!",
|
||||
"P.O.D.",
|
||||
"Paddy and the Rats",
|
||||
"Pain",
|
||||
"Paul Stanley",
|
||||
"Persefone",
|
||||
"Peyton Parrish",
|
||||
"Powerwolf",
|
||||
"Primitai",
|
||||
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Rammstein",
|
||||
"Red Hot Chili Peppers",
|
||||
"Revocation",
|
||||
"Ride the Sky",
|
||||
"Rob Zombie",
|
||||
"Sabaton",
|
||||
"Saltatio Mortis",
|
||||
"Saltatio Mortis & Lara Loft",
|
||||
"Satan",
|
||||
"Savatage",
|
||||
"Scar Symmetry",
|
||||
"Scars on Broadway",
|
||||
"Scorpions",
|
||||
"Silent Descent",
|
||||
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Timecry",
|
||||
"Trivium",
|
||||
"Tuomas Holopainen",
|
||||
"VNV Nation",
|
||||
"Turisas",
|
||||
"Vader",
|
||||
"Vesania",
|
||||
"Vicious Crusade",
|
||||
"The Wages of Sin",
|
||||
"Vintersorg",
|
||||
"VNV Nation",
|
||||
"W.A.S.P.",
|
||||
"Whitechapel",
|
||||
"Within Temptation",
|
||||
"Woe of Tyrants",
|
||||
"Wovenwar",
|
||||
"Xandria",
|
||||
"Jonathan Young",
|
||||
"Jonathan Young, Peyton Parrish & Colm R. McGuinness",
|
||||
];
|
@ -4,6 +4,7 @@ pub mod album;
|
||||
pub mod artist;
|
||||
pub mod merge;
|
||||
pub mod musicbrainz;
|
||||
pub mod string;
|
||||
pub mod track;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
|
||||
Error::MbidError(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benchmod;
|
||||
|
62
src/core/collection/string.rs
Normal file
62
src/core/collection/string.rs
Normal file
@ -0,0 +1,62 @@
|
||||
use aho_corasick::AhoCorasick;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
// Unlikely that this covers all possible strings, but it should at least cover strings
|
||||
// relevant for music (at least in English). The list of characters handled is based on
|
||||
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
|
||||
//
|
||||
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
|
||||
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
|
||||
const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−'];
|
||||
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
|
||||
static AC: Lazy<AhoCorasick> =
|
||||
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
|
||||
|
||||
pub fn is_case_sensitive(string: &str) -> bool {
|
||||
string
|
||||
.chars()
|
||||
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
|
||||
}
|
||||
|
||||
pub fn is_char_sensitive(string: &str) -> bool {
|
||||
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
|
||||
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
|
||||
string.chars().any(|ch| SPECIAL.contains(&ch))
|
||||
}
|
||||
|
||||
pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String {
|
||||
if asciify {
|
||||
if lowercase {
|
||||
AC.replace_all(&string.to_lowercase(), &REPLACE)
|
||||
} else {
|
||||
AC.replace_all(string, &REPLACE)
|
||||
}
|
||||
} else if lowercase {
|
||||
string.to_lowercase()
|
||||
} else {
|
||||
string.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
|
||||
use test::Bencher;
|
||||
|
||||
use crate::core::collection::benchmod::ARTISTS;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[bench]
|
||||
fn bench_is_char_sensitive(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_normalize_string(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap(), true, true)))
|
||||
}
|
||||
}
|
@ -1,4 +1,7 @@
|
||||
//! MusicHoard - a music collection manager.
|
||||
#![cfg_attr(nightly, feature(test))]
|
||||
#[cfg(nightly)]
|
||||
extern crate test;
|
||||
|
||||
mod core;
|
||||
pub mod external;
|
||||
|
@ -1,7 +1,3 @@
|
||||
#![cfg_attr(nightly, feature(test))]
|
||||
#[cfg(nightly)]
|
||||
extern crate test;
|
||||
|
||||
mod tui;
|
||||
|
||||
use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};
|
||||
|
@ -601,7 +601,3 @@ mod tests {
|
||||
app.unwrap_critical();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benchmod;
|
||||
|
@ -1,7 +1,4 @@
|
||||
use aho_corasick::AhoCorasick;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use musichoard::collection::{album::Album, artist::Artist, track::Track};
|
||||
use musichoard::collection::{album::Album, artist::Artist, string, track::Track};
|
||||
|
||||
use crate::tui::app::{
|
||||
machine::{App, AppInner, AppMachine},
|
||||
@ -9,17 +6,6 @@ use crate::tui::app::{
|
||||
AppPublicState, AppState, Category, IAppInteractSearch,
|
||||
};
|
||||
|
||||
// Unlikely that this covers all possible strings, but it should at least cover strings
|
||||
// relevant for music (at least in English). The list of characters handled is based on
|
||||
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
|
||||
//
|
||||
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
|
||||
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
|
||||
const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−'];
|
||||
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
|
||||
static AC: Lazy<AhoCorasick> =
|
||||
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
|
||||
|
||||
pub struct SearchState {
|
||||
string: String,
|
||||
orig: ListSelection,
|
||||
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
|
||||
fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
|
||||
fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
|
||||
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
|
||||
|
||||
fn is_case_sensitive(artist_name: &str) -> bool;
|
||||
fn is_char_sensitive(artist_name: &str) -> bool;
|
||||
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
|
||||
}
|
||||
|
||||
impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
where
|
||||
P: FnMut(bool, bool, &str, &T) -> bool,
|
||||
{
|
||||
let case_sens = Self::is_case_sensitive(name);
|
||||
let char_sens = Self::is_char_sensitive(name);
|
||||
let search = Self::normalize_search(name, !case_sens, !char_sens);
|
||||
let case_sens = string::is_case_sensitive(name);
|
||||
let char_sens = string::is_char_sensitive(name);
|
||||
let search = string::normalize_string(name, !case_sens, !char_sens);
|
||||
|
||||
let mut index = st.index;
|
||||
if next && ((index + 1) < st.list.len()) {
|
||||
@ -177,12 +159,12 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
}
|
||||
|
||||
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
|
||||
let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens);
|
||||
let name = string::normalize_string(&probe.meta.id.name, !case_sens, !char_sens);
|
||||
let mut result = name.starts_with(search);
|
||||
|
||||
if let Some(ref probe_sort) = probe.meta.sort {
|
||||
if !result {
|
||||
let name = Self::normalize_search(probe_sort, !case_sens, !char_sens);
|
||||
let name = string::normalize_string(probe_sort, !case_sens, !char_sens);
|
||||
result = name.starts_with(search);
|
||||
}
|
||||
}
|
||||
@ -199,33 +181,7 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
}
|
||||
|
||||
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
|
||||
Self::normalize_search(title, !case_sens, !char_sens).starts_with(search)
|
||||
}
|
||||
|
||||
fn is_case_sensitive(artist_name: &str) -> bool {
|
||||
artist_name
|
||||
.chars()
|
||||
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
|
||||
}
|
||||
|
||||
fn is_char_sensitive(artist_name: &str) -> bool {
|
||||
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
|
||||
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
|
||||
artist_name.chars().any(|ch| SPECIAL.contains(&ch))
|
||||
}
|
||||
|
||||
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
|
||||
if asciify {
|
||||
if lowercase {
|
||||
AC.replace_all(&search.to_lowercase(), &REPLACE)
|
||||
} else {
|
||||
AC.replace_all(search, &REPLACE)
|
||||
}
|
||||
} else if lowercase {
|
||||
search.to_lowercase()
|
||||
} else {
|
||||
search.to_owned()
|
||||
}
|
||||
string::normalize_string(title, !case_sens, !char_sens).starts_with(search)
|
||||
}
|
||||
}
|
||||
|
||||
@ -544,28 +500,3 @@ mod tests {
|
||||
assert_eq!(browse.inner.selection.selected(), None);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
|
||||
use test::Bencher;
|
||||
|
||||
use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
|
||||
|
||||
use super::*;
|
||||
|
||||
type Search = AppMachine<MockIMusicHoard, SearchState>;
|
||||
|
||||
#[bench]
|
||||
fn is_char_sensitive(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn normalize_search(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user