User normalised strings
Some checks failed
Cargo CI / Build and Test (pull_request) Failing after 30s
Cargo CI / Lint (pull_request) Failing after 28s

This commit is contained in:
Wojciech Kozlowski 2025-01-03 15:49:09 +01:00
parent 0f352c5a9a
commit 711e2ed2ee
5 changed files with 49 additions and 25 deletions

View File

@ -8,6 +8,7 @@ use crate::core::collection::{
album::{Album, AlbumLibId},
merge::{Merge, MergeCollections},
musicbrainz::{MbArtistRef, MbRefOption},
string::{self, NormalString},
};
/// An artist.
@ -63,9 +64,9 @@ impl Merge for Artist {
#[derive(Debug, Default)]
struct MergeAlbums {
primary_by_lib_id: HashMap<u32, Album>,
primary_by_singleton: HashMap<String, Album>,
primary_by_title: HashMap<String, Vec<Album>>,
secondary_by_title: HashMap<String, Vec<Album>>,
primary_by_singleton: HashMap<NormalString, Album>,
primary_by_title: HashMap<NormalString, Vec<Album>>,
secondary_by_title: HashMap<NormalString, Vec<Album>>,
merged: Vec<Album>,
}
@ -90,7 +91,7 @@ impl MergeAlbums {
}
AlbumLibId::Singleton => assert!(cache
.primary_by_singleton
.insert(album.meta.id.title.clone(), album)
.insert(string::normalize_string(&album.meta.id.title), album)
.is_none()),
_ => unreachable!("primary collection should always have a lib_id"),
}
@ -104,25 +105,25 @@ impl MergeAlbums {
}
for (_, primary_album) in self.primary_by_lib_id.drain() {
self.primary_by_title
.entry(primary_album.meta.id.title.clone())
.entry(string::normalize_string(&primary_album.meta.id.title))
.or_default()
.push(primary_album);
}
for (title, primary_album) in self.primary_by_singleton.drain() {
self.primary_by_title
.entry(title)
.entry(title.clone())
.or_default()
.push(primary_album);
}
}
fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
let title = &secondary_album.meta.id.title;
let title = string::normalize_string(&secondary_album.meta.id.title);
let id_opt = secondary_album.meta.id.lib_id;
let find = match id_opt {
AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
AlbumLibId::Singleton => self.primary_by_singleton.remove(title),
AlbumLibId::Singleton => self.primary_by_singleton.remove(&title),
AlbumLibId::None => None,
};
@ -134,7 +135,7 @@ impl MergeAlbums {
secondary_album.meta.id.lib_id = AlbumLibId::None;
self.secondary_by_title
.entry(secondary_album.meta.id.title.clone())
.entry(string::normalize_string(&secondary_album.meta.id.title))
.or_default()
.push(secondary_album);
}

View File

@ -1,5 +1,7 @@
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
use crate::core::collection::string::NormalString;
/// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
/// the primary whose properties are to be kept in case of collisions.
pub trait Merge {
@ -83,10 +85,10 @@ where
pub struct MergeCollections;
impl MergeCollections {
pub fn merge_by_name<T, It>(mut primary: HashMap<String, Vec<T>>, secondary: It) -> Vec<T>
pub fn merge_by_name<T, It>(mut primary: HashMap<NormalString, Vec<T>>, secondary: It) -> Vec<T>
where
T: Merge,
It: IntoIterator<Item = (String, Vec<T>)>,
It: IntoIterator<Item = (NormalString, Vec<T>)>,
{
let mut merged = vec![];
for (title, mut secondary_items) in secondary.into_iter() {

View File

@ -24,8 +24,13 @@ pub fn is_char_sensitive(string: &str) -> bool {
string.chars().any(|ch| SPECIAL.contains(&ch))
}
pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NormalString {
pub string: String,
}
pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString {
let string = if asciify {
if lowercase {
AC.replace_all(&string.to_lowercase(), &REPLACE)
} else {
@ -35,6 +40,13 @@ pub fn normalize_string(string: &str, lowercase: bool, asciify: bool) -> String
string.to_lowercase()
} else {
string.to_owned()
};
NormalString { string }
}
pub fn normalize_string(string: &str) -> NormalString {
NormalString {
string: AC.replace_all(&string.to_lowercase(), &REPLACE),
}
}
@ -54,9 +66,15 @@ mod benches {
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn bench_normalize_string_with(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true)))
}
#[bench]
fn bench_normalize_string(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap(), true, true)))
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap())))
}
}

View File

@ -5,6 +5,7 @@ use crate::core::{
album::{Album, AlbumId},
artist::{Artist, ArtistId},
merge::MergeCollections,
string::{self, NormalString},
Collection,
},
musichoard::{Error, MusicHoard},
@ -59,19 +60,19 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
}
fn merge_collections(&self) -> Collection {
let mut primary = HashMap::<String, Vec<Artist>>::new();
let mut secondary = HashMap::<String, Vec<Artist>>::new();
let mut primary = HashMap::<NormalString, Vec<Artist>>::new();
let mut secondary = HashMap::<NormalString, Vec<Artist>>::new();
for artist in self.library_cache.iter().cloned() {
primary
.entry(artist.meta.id.name.clone())
.entry(string::normalize_string(&artist.meta.id.name))
.or_default()
.push(artist);
}
for artist in self.database_cache.iter().cloned() {
secondary
.entry(artist.meta.id.name.clone())
.entry(string::normalize_string(&artist.meta.id.name))
.or_default()
.push(artist);
}

View File

@ -144,7 +144,7 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
{
let case_sens = string::is_case_sensitive(name);
let char_sens = string::is_char_sensitive(name);
let search = string::normalize_string(name, !case_sens, !char_sens);
let search = string::normalize_string_with(name, !case_sens, !char_sens);
let mut index = st.index;
if next && ((index + 1) < st.list.len()) {
@ -154,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
let slice = &st.list[index..];
slice
.iter()
.position(|probe| pred(case_sens, char_sens, &search, probe))
.position(|probe| pred(case_sens, char_sens, &search.string, probe))
.map(|slice_index| index + slice_index)
}
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
let name = string::normalize_string(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.starts_with(search);
let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.string.starts_with(search);
if let Some(ref probe_sort) = probe.meta.sort {
if !result {
let name = string::normalize_string(probe_sort, !case_sens, !char_sens);
result = name.starts_with(search);
let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens);
result = name.string.starts_with(search);
}
}
@ -181,7 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
}
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
string::normalize_string(title, !case_sens, !char_sens).starts_with(search)
string::normalize_string_with(title, !case_sens, !char_sens)
.string
.starts_with(search)
}
}