User normalised strings
Some checks failed
Cargo CI / Build and Test (pull_request) Failing after 30s
Cargo CI / Lint (pull_request) Failing after 28s

This commit is contained in:
Wojciech Kozlowski 2025-01-03 15:49:09 +01:00
parent 0f352c5a9a
commit 711e2ed2ee
5 changed files with 49 additions and 25 deletions

View File

@ -8,6 +8,7 @@ use crate::core::collection::{
album::{Album, AlbumLibId}, album::{Album, AlbumLibId},
merge::{Merge, MergeCollections}, merge::{Merge, MergeCollections},
musicbrainz::{MbArtistRef, MbRefOption}, musicbrainz::{MbArtistRef, MbRefOption},
string::{self, NormalString},
}; };
/// An artist. /// An artist.
@ -63,9 +64,9 @@ impl Merge for Artist {
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct MergeAlbums { struct MergeAlbums {
primary_by_lib_id: HashMap<u32, Album>, primary_by_lib_id: HashMap<u32, Album>,
primary_by_singleton: HashMap<String, Album>, primary_by_singleton: HashMap<NormalString, Album>,
primary_by_title: HashMap<String, Vec<Album>>, primary_by_title: HashMap<NormalString, Vec<Album>>,
secondary_by_title: HashMap<String, Vec<Album>>, secondary_by_title: HashMap<NormalString, Vec<Album>>,
merged: Vec<Album>, merged: Vec<Album>,
} }
@ -90,7 +91,7 @@ impl MergeAlbums {
} }
AlbumLibId::Singleton => assert!(cache AlbumLibId::Singleton => assert!(cache
.primary_by_singleton .primary_by_singleton
.insert(album.meta.id.title.clone(), album) .insert(string::normalize_string(&album.meta.id.title), album)
.is_none()), .is_none()),
_ => unreachable!("primary collection should always have a lib_id"), _ => unreachable!("primary collection should always have a lib_id"),
} }
@ -104,25 +105,25 @@ impl MergeAlbums {
} }
for (_, primary_album) in self.primary_by_lib_id.drain() { for (_, primary_album) in self.primary_by_lib_id.drain() {
self.primary_by_title self.primary_by_title
.entry(primary_album.meta.id.title.clone()) .entry(string::normalize_string(&primary_album.meta.id.title))
.or_default() .or_default()
.push(primary_album); .push(primary_album);
} }
for (title, primary_album) in self.primary_by_singleton.drain() { for (title, primary_album) in self.primary_by_singleton.drain() {
self.primary_by_title self.primary_by_title
.entry(title) .entry(title.clone())
.or_default() .or_default()
.push(primary_album); .push(primary_album);
} }
} }
fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) { fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
let title = &secondary_album.meta.id.title; let title = string::normalize_string(&secondary_album.meta.id.title);
let id_opt = secondary_album.meta.id.lib_id; let id_opt = secondary_album.meta.id.lib_id;
let find = match id_opt { let find = match id_opt {
AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value), AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
AlbumLibId::Singleton => self.primary_by_singleton.remove(title), AlbumLibId::Singleton => self.primary_by_singleton.remove(&title),
AlbumLibId::None => None, AlbumLibId::None => None,
}; };
@ -134,7 +135,7 @@ impl MergeAlbums {
secondary_album.meta.id.lib_id = AlbumLibId::None; secondary_album.meta.id.lib_id = AlbumLibId::None;
self.secondary_by_title self.secondary_by_title
.entry(secondary_album.meta.id.title.clone()) .entry(string::normalize_string(&secondary_album.meta.id.title))
.or_default() .or_default()
.push(secondary_album); .push(secondary_album);
} }

View File

@ -1,5 +1,7 @@
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable}; use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
use crate::core::collection::string::NormalString;
/// A trait for merging two objects. The merge is asymmetric with the left argument considered to be /// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
/// the primary whose properties are to be kept in case of collisions. /// the primary whose properties are to be kept in case of collisions.
pub trait Merge { pub trait Merge {
@ -83,10 +85,10 @@ where
pub struct MergeCollections; pub struct MergeCollections;
impl MergeCollections { impl MergeCollections {
pub fn merge_by_name<T, It>(mut primary: HashMap<String, Vec<T>>, secondary: It) -> Vec<T> pub fn merge_by_name<T, It>(mut primary: HashMap<NormalString, Vec<T>>, secondary: It) -> Vec<T>
where where
T: Merge, T: Merge,
It: IntoIterator<Item = (String, Vec<T>)>, It: IntoIterator<Item = (NormalString, Vec<T>)>,
{ {
let mut merged = vec![]; let mut merged = vec![];
for (title, mut secondary_items) in secondary.into_iter() { for (title, mut secondary_items) in secondary.into_iter() {

View File

@ -24,8 +24,13 @@ pub fn is_char_sensitive(string: &str) -> bool {
string.chars().any(|ch| SPECIAL.contains(&ch)) string.chars().any(|ch| SPECIAL.contains(&ch))
} }
pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String { #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
if asciify { pub struct NormalString {
pub string: String,
}
pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString {
let string = if asciify {
if lowercase { if lowercase {
AC.replace_all(&string.to_lowercase(), &REPLACE) AC.replace_all(&string.to_lowercase(), &REPLACE)
} else { } else {
@ -35,6 +40,13 @@ pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String
string.to_lowercase() string.to_lowercase()
} else { } else {
string.to_owned() string.to_owned()
};
NormalString { string }
}
pub fn normalize_string(string: &str) -> NormalString {
NormalString {
string: AC.replace_all(&string.to_lowercase(), &REPLACE),
} }
} }
@ -54,9 +66,15 @@ mod benches {
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap()))) b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
} }
#[bench]
fn bench_normalize_string_with(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true)))
}
#[bench] #[bench]
fn bench_normalize_string(b: &mut Bencher) { fn bench_normalize_string(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle(); let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap(), true, true))) b.iter(|| test::black_box(normalize_string(&iter.next().unwrap())))
} }
} }

View File

@ -5,6 +5,7 @@ use crate::core::{
album::{Album, AlbumId}, album::{Album, AlbumId},
artist::{Artist, ArtistId}, artist::{Artist, ArtistId},
merge::MergeCollections, merge::MergeCollections,
string::{self, NormalString},
Collection, Collection,
}, },
musichoard::{Error, MusicHoard}, musichoard::{Error, MusicHoard},
@ -59,19 +60,19 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
} }
fn merge_collections(&self) -> Collection { fn merge_collections(&self) -> Collection {
let mut primary = HashMap::<String, Vec<Artist>>::new(); let mut primary = HashMap::<NormalString, Vec<Artist>>::new();
let mut secondary = HashMap::<String, Vec<Artist>>::new(); let mut secondary = HashMap::<NormalString, Vec<Artist>>::new();
for artist in self.library_cache.iter().cloned() { for artist in self.library_cache.iter().cloned() {
primary primary
.entry(artist.meta.id.name.clone()) .entry(string::normalize_string(&artist.meta.id.name))
.or_default() .or_default()
.push(artist); .push(artist);
} }
for artist in self.database_cache.iter().cloned() { for artist in self.database_cache.iter().cloned() {
secondary secondary
.entry(artist.meta.id.name.clone()) .entry(string::normalize_string(&artist.meta.id.name))
.or_default() .or_default()
.push(artist); .push(artist);
} }

View File

@ -144,7 +144,7 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
{ {
let case_sens = string::is_case_sensitive(name); let case_sens = string::is_case_sensitive(name);
let char_sens = string::is_char_sensitive(name); let char_sens = string::is_char_sensitive(name);
let search = string::normalize_string(name, !case_sens, !char_sens); let search = string::normalize_string_with(name, !case_sens, !char_sens);
let mut index = st.index; let mut index = st.index;
if next && ((index + 1) < st.list.len()) { if next && ((index + 1) < st.list.len()) {
@ -154,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
let slice = &st.list[index..]; let slice = &st.list[index..];
slice slice
.iter() .iter()
.position(|probe| pred(case_sens, char_sens, &search, probe)) .position(|probe| pred(case_sens, char_sens, &search.string, probe))
.map(|slice_index| index + slice_index) .map(|slice_index| index + slice_index)
} }
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool { fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
let name = string::normalize_string(&probe.meta.id.name, !case_sens, !char_sens); let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.starts_with(search); let mut result = name.string.starts_with(search);
if let Some(ref probe_sort) = probe.meta.sort { if let Some(ref probe_sort) = probe.meta.sort {
if !result { if !result {
let name = string::normalize_string(probe_sort, !case_sens, !char_sens); let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens);
result = name.starts_with(search); result = name.string.starts_with(search);
} }
} }
@ -181,7 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
} }
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool { fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
string::normalize_string(title, !case_sens, !char_sens).starts_with(search) string::normalize_string_with(title, !case_sens, !char_sens)
.string
.starts_with(search)
} }
} }