diff --git a/.gitea/workflows/gitea-ci.yaml b/.gitea/workflows/gitea-ci.yaml index 4df9bd6..fceaad3 100644 --- a/.gitea/workflows/gitea-ci.yaml +++ b/.gitea/workflows/gitea-ci.yaml @@ -38,7 +38,7 @@ jobs: --ignore "tests/*" --ignore "src/main.rs" --ignore "src/bin/musichoard-edit.rs" - --excl-line "^#\[derive|unimplemented\!\(\)" + --excl-line "^#\[derive|unimplemented\!\(|unreachable\!\(" --excl-start "GRCOV_EXCL_START|mod tests \{" --excl-stop "GRCOV_EXCL_STOP" --output-path ./target/debug/coverage/ diff --git a/Cargo.toml b/Cargo.toml index 4582b62..70a9760 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,9 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -aho-corasick = { version = "1.1.2", optional = true } +aho-corasick = { version = "1.1.2" } crossterm = { version = "0.28.1", optional = true} -once_cell = { version = "1.19.0", optional = true} +once_cell = { version = "1.19.0" } openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true} paste = { version = "1.0.15", optional = true } ratatui = { version = "0.28.1", optional = true} @@ -37,7 +37,7 @@ database-json = ["serde", "serde_json"] library-beets = [] library-beets-ssh = ["openssh", "tokio"] musicbrainz = ["paste", "reqwest", "serde", "serde_json"] -tui = ["aho-corasick", "crossterm", "once_cell", "ratatui", "tui-input"] +tui = ["crossterm", "ratatui", "tui-input"] [[bin]] name = "musichoard" diff --git a/README.md b/README.md index c2aeb62..7e1bc40 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ grcov codecov/debug/profraw \ --ignore "tests/*" \ --ignore "src/main.rs" \ --ignore "src/bin/musichoard-edit.rs" \ - --excl-line "^#\[derive|unimplemented\!\(\)" \ + --excl-line "^#\[derive|unimplemented\!\(|unreachable\!\(" \ --excl-start "GRCOV_EXCL_START|mod tests \{" \ --excl-stop "GRCOV_EXCL_STOP" \ --output-path ./codecov/debug/coverage/ diff --git a/src/core/collection/album.rs b/src/core/collection/album.rs index cd53346..f27192a 100644 --- a/src/core/collection/album.rs +++ b/src/core/collection/album.rs @@ -1,11 +1,13 @@ use std::mem; use crate::core::collection::{ - merge::{Merge, MergeName, MergeSorted}, + merge::{Merge, MergeSorted}, musicbrainz::{MbAlbumRef, MbRefOption}, track::{Track, TrackFormat}, }; +use super::string; + /// An album is a collection of tracks that were released together. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Album { @@ -54,12 +56,6 @@ impl AlbumLibId { /// Unique database identifier. Use MBID for this purpose. pub type AlbumMbRef = MbRefOption; -impl MergeName for Album { - fn name(&self) -> &str { - &self.meta.id.title - } -} - // There are crates for handling dates, but we don't need much complexity beyond year-month-day. /// The album's release date. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] @@ -320,7 +316,8 @@ impl AlbumId { } pub fn compatible(&self, other: &AlbumId) -> bool { - let titles_compatible = self.title == other.title; + let titles_compatible = + string::normalize_string(&self.title) == string::normalize_string(&other.title); let lib_id_compatible = self.lib_id.is_none() || other.lib_id.is_none() || (self.lib_id == other.lib_id); let mb_ref_compatible = diff --git a/src/core/collection/artist.rs b/src/core/collection/artist.rs index b83240c..d67ca94 100644 --- a/src/core/collection/artist.rs +++ b/src/core/collection/artist.rs @@ -1,12 +1,14 @@ use std::{ collections::HashMap, fmt::{self, Debug, Display}, + mem, }; use crate::core::collection::{ album::{Album, AlbumLibId}, - merge::{Merge, MergeCollections, MergeName}, + merge::{Merge, MergeCollections, NormalMap}, musicbrainz::{MbArtistRef, MbRefOption}, + string::{self, NormalString}, }; /// An artist. @@ -30,12 +32,6 @@ pub struct ArtistInfo { pub properties: HashMap>, } -impl MergeName for Artist { - fn name(&self) -> &str { - &self.meta.id.name - } -} - /// The artist identifier. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtistId { @@ -46,49 +42,6 @@ pub struct ArtistId { /// Unique database identifier. Use MBID for this purpose. pub type ArtistMbRef = MbRefOption; -impl Artist { - /// Create new [`Artist`] with the given [`ArtistId`]. - pub fn new>(id: Id) -> Self { - Artist { - meta: ArtistMeta::new(id), - albums: vec![], - } - } - - fn merge_album_by_lib_id( - primary_albums: &mut [Album], - mut secondary_album: Album, - ) -> Option { - let id_opt = secondary_album.meta.id.lib_id; - if let id @ AlbumLibId::Value(_) | id @ AlbumLibId::Singleton = id_opt { - let mut iter = primary_albums.iter_mut(); - if let Some(ref mut primary_album) = iter.find(|a| a.meta.id.lib_id == id) { - primary_album.merge_in_place(secondary_album); - return None; - } - secondary_album.meta.id.lib_id = AlbumLibId::None; - } - Some(secondary_album) - } - - fn merge_albums_by_lib_id( - primary_albums: &mut [Album], - secondary_albums: Vec, - ) -> HashMap> { - let mut secondary_without_id = HashMap::>::new(); - for secondary_album in secondary_albums.into_iter() { - let unmerged = Artist::merge_album_by_lib_id(primary_albums, secondary_album); - if let Some(secondary_album) = unmerged { - secondary_without_id - .entry(secondary_album.meta.id.title.clone()) - .or_default() - .push(secondary_album); - } - } - secondary_without_id - } -} - impl PartialOrd for Artist { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) @@ -104,10 +57,94 @@ impl Ord for Artist { impl Merge for Artist { fn merge_in_place(&mut self, other: Self) { self.meta.merge_in_place(other.meta); + self.albums = MergeAlbums::merge_albums(mem::take(&mut self.albums), other.albums); + } +} - let other_without_id = Artist::merge_albums_by_lib_id(&mut self.albums, other.albums); - MergeCollections::merge_by_name(&mut self.albums, other_without_id); - self.albums.sort_unstable(); +#[derive(Debug, Default)] +struct MergeAlbums { + primary_by_lib_id: HashMap, + primary_by_singleton: HashMap, + primary_by_title: NormalMap, + secondary_by_title: NormalMap, + merged: Vec, +} + +impl MergeAlbums { + fn merge_albums(primary_albums: Vec, secondary_albums: Vec) -> Vec { + let mut cache = MergeAlbums::new(primary_albums); + cache.merge_albums_by_lib_id(secondary_albums); + cache.merged.extend(MergeCollections::merge_by_name( + cache.primary_by_title, + cache.secondary_by_title, + )); + cache.merged.sort_unstable(); + cache.merged + } + + fn new(primary_albums: Vec) -> Self { + let mut cache = MergeAlbums::default(); + for album in primary_albums.into_iter() { + match album.meta.id.lib_id { + AlbumLibId::Value(value) => { + assert!(cache.primary_by_lib_id.insert(value, album).is_none()) + } + AlbumLibId::Singleton => assert!(cache + .primary_by_singleton + .insert(string::normalize_string(&album.meta.id.title), album) + .is_none()), + _ => unreachable!("primary collection should always have a lib_id"), + } + } + cache + } + + fn merge_albums_by_lib_id(&mut self, secondary_albums: Vec) { + for secondary_album in secondary_albums.into_iter() { + self.merge_album_by_lib_id(secondary_album); + } + for (_, primary_album) in self.primary_by_lib_id.drain() { + self.primary_by_title.insert( + string::normalize_string(&primary_album.meta.id.title), + primary_album, + ); + } + for (title, primary_album) in self.primary_by_singleton.drain() { + self.primary_by_title.insert(title, primary_album); + } + } + + fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) { + let title = string::normalize_string(&secondary_album.meta.id.title); + let id_opt = secondary_album.meta.id.lib_id; + + let find = match id_opt { + AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value), + AlbumLibId::Singleton => self.primary_by_singleton.remove(&title), + AlbumLibId::None => None, + }; + + if let Some(mut primary_album) = find { + primary_album.merge_in_place(secondary_album); + self.merged.push(primary_album); + return; + } + + secondary_album.meta.id.lib_id = AlbumLibId::None; + self.secondary_by_title.insert( + string::normalize_string(&secondary_album.meta.id.title), + secondary_album, + ); + } +} + +impl Artist { + /// Create new [`Artist`] with the given [`ArtistId`]. + pub fn new>(id: Id) -> Self { + Artist { + meta: ArtistMeta::new(id), + albums: vec![], + } } } @@ -248,7 +285,8 @@ impl ArtistId { } pub fn compatible(&self, other: &ArtistId) -> bool { - let names_compatible = self.name == other.name; + let names_compatible = + string::normalize_string(&self.name) == string::normalize_string(&other.name); let mb_ref_compatible = self.mb_ref.is_none() || other.mb_ref.is_none() || (self.mb_ref == other.mb_ref); names_compatible && mb_ref_compatible @@ -263,7 +301,13 @@ impl Display for ArtistId { #[cfg(test)] mod tests { - use crate::core::testmod::FULL_COLLECTION; + use crate::{ + collection::{ + album::{AlbumId, AlbumMbRef}, + musicbrainz::MbAlbumRef, + }, + core::testmod::FULL_COLLECTION, + }; use super::*; @@ -533,4 +577,119 @@ mod tests { let merged = left.clone().merge(right); assert_eq!(expected, merged); } + + #[test] + #[should_panic(expected = "multiple secondaries unsupported")] + fn merge_two_db_albums_to_one_lib_album() { + let mut left = Artist::new(ArtistId::new("Artist")); + let mut right = left.clone(); + + let album = Album::new(AlbumId::new("Album")); + + left.albums.push(album.clone()); + left.albums[0].meta.id.lib_id = AlbumLibId::Value(1); + + right.albums.push(album.clone()); + right.albums.push(album.clone()); + + left.merge(right); + } + + #[test] + #[should_panic(expected = "multiple primaries unsupported")] + fn merge_one_db_album_to_two_lib_albums() { + let mut left = Artist::new(ArtistId::new("Artist")); + let mut right = left.clone(); + + let album = Album::new(AlbumId::new("Album")); + + left.albums.push(album.clone()); + left.albums.push(album.clone()); + left.albums[0].meta.id.lib_id = AlbumLibId::Value(1); + left.albums[1].meta.id.lib_id = AlbumLibId::Value(2); + + right.albums.push(album.clone()); + + left.merge(right); + } + + #[test] + fn merge_normalized_album_titles() { + let mut left = Artist::new(ArtistId::new("Artist")); + let mut right = left.clone(); + + left.albums + .push(Album::new(AlbumId::new("Album‐Title ‘Title’"))); + left.albums[0].meta.id.lib_id = AlbumLibId::Value(1); + + right + .albums + .push(Album::new(AlbumId::new("alBum—tiTle 'title’"))); + right + .albums + .push(Album::new(AlbumId::new("Album‐Title “Title”"))); + + // The first album will be merged, the second will be added. + let mut expected = left.clone(); + expected.albums.push(right.albums.last().unwrap().clone()); + expected.albums.sort_unstable(); + + let merged = left.merge(right); + assert_eq!(expected, merged); + } + + #[test] + fn merge_multiple_singletons() { + let mut left = Artist::new(ArtistId::new("Artist")); + let mut right = left.clone(); + + left.albums.push(Album::new(AlbumId::new("Singleton 1"))); + left.albums.push(Album::new(AlbumId::new("Singleton 2"))); + left.albums[0].meta.id.lib_id = AlbumLibId::Singleton; + left.albums[1].meta.id.lib_id = AlbumLibId::Singleton; + + right.albums.push(Album::new(AlbumId::new("Singleton 1"))); + right.albums.push(Album::new(AlbumId::new("Singleton 2"))); + right.albums.push(Album::new(AlbumId::new("Singleton 3"))); + right.albums[0].meta.id.lib_id = AlbumLibId::Singleton; + right.albums[2].meta.id.lib_id = AlbumLibId::Singleton; + + // Expect first two albums to merge (including lib id) and the third to be added. However, + // the third one will lose its lib id due to no match in the primary. + let mut expected = left.clone(); + expected.albums.push(right.albums.last().unwrap().clone()); + expected.albums.last_mut().unwrap().meta.id.lib_id = AlbumLibId::None; + + let merged = left.merge(right); + assert_eq!(expected, merged); + } + + #[test] + fn merge_two_db_albums_to_one_lib_album_with_ids() { + let mut left = Artist::new(ArtistId::new("Artist")); + let mut right = left.clone(); + + let album = Album::new(AlbumId::new("Album")); + + left.albums.push(album.clone()); + left.albums[0].meta.id.lib_id = AlbumLibId::Value(1); + + // In this test, one DB album has a lib id that matches that of the lib album. This is the + // one that we expect to merge while the second album should just be added. + right.albums.push(album.clone()); + right.albums[0].meta.id.lib_id = AlbumLibId::Value(1); + right.albums[0].meta.id.mb_ref = AlbumMbRef::Some( + MbAlbumRef::from_uuid_str("00000000-0000-0000-0000-000000000000").unwrap(), + ); + + right.albums.push(album.clone()); + right.albums[1].meta.id.mb_ref = AlbumMbRef::Some( + MbAlbumRef::from_uuid_str("11111111-1111-1111-1111-111111111111").unwrap(), + ); + + let expected = right.clone(); + + let merged = left.merge(right); + assert_eq!(expected, merged); + } } diff --git a/src/tui/app/machine/benchmod.rs b/src/core/collection/benchmod.rs similarity index 83% rename from src/tui/app/machine/benchmod.rs rename to src/core/collection/benchmod.rs index 6d45a3a..f401e77 100644 --- a/src/tui/app/machine/benchmod.rs +++ b/src/core/collection/benchmod.rs @@ -1,5 +1,7 @@ -// Date: 2024-02-19 -pub const ARTISTS: [&str; 141] = [ +// 1. `beet ls -a -f '$albumartist'`. +// 2. `M-x delete-duplicate-lines`. +// Date: 2025-01-03 +pub const ARTISTS: [&str; 156] = [ "Abadden", "Acid Drinkers", "Adema", @@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [ "Аркона", "Artas", "As I Lay Dying", + "At the Gates", "Avenged Sevenfold", "Aversions Crown", "Aviators", @@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [ "Bloodbath", "Bloodbound", "Brothers of Metal", + "Carcass", "Carnation", "Cellar Darling", "Children of Bodom", @@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [ "Dynazty", "Edguy", "Eluveitie", - "Eminem", "Enforcer", "Ensiferum", "Epica", @@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [ "Heaven’s Basement", "Heavy Load", "Hermh", + "Ignea", "Immortal", "In Flames", "Insomnium", "Iron Maiden", + "Judas Priest", "Kalmah", "Kataklysm", "Kontrust", @@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [ "Linkin Park", "Lost Dreams", "Man Must Die", + "Månegarm", "Me and That Man", + "Megaton Sword", "Mercyful Fate", + "Metal Church", "Metallica", - "Michael Jackson", "Miracle of Sound", "Misery Index", + "Mortal Sin", "Mudvayne", - "Månegarm", "Nickelback", "Nightwish", "Nile", @@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [ "Oomph!", "P.O.D.", "Paddy and the Rats", + "Pain", "Paul Stanley", - "Persefone", "Peyton Parrish", "Powerwolf", "Primitai", @@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [ "Rammstein", "Red Hot Chili Peppers", "Revocation", + "Ride the Sky", "Rob Zombie", "Sabaton", + "Saltatio Mortis", + "Saltatio Mortis & Lara Loft", + "Satan", "Savatage", + "Scar Symmetry", "Scars on Broadway", "Scorpions", "Silent Descent", @@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [ "Timecry", "Trivium", "Tuomas Holopainen", - "VNV Nation", + "Turisas", "Vader", + "Vesania", "Vicious Crusade", - "The Wages of Sin", + "Vintersorg", + "VNV Nation", + "W.A.S.P.", "Whitechapel", "Within Temptation", "Woe of Tyrants", "Wovenwar", "Xandria", + "Jonathan Young", + "Jonathan Young, Peyton Parrish & Colm R. McGuinness", ]; diff --git a/src/core/collection/merge.rs b/src/core/collection/merge.rs index bbfc0fe..796045a 100644 --- a/src/core/collection/merge.rs +++ b/src/core/collection/merge.rs @@ -1,4 +1,6 @@ -use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable, marker::PhantomData}; +use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable}; + +use crate::core::collection::string::NormalString; /// A trait for merging two objects. The merge is asymmetric with the left argument considered to be /// the primary whose properties are to be kept in case of collisions. @@ -80,30 +82,49 @@ where } } -pub trait MergeName { - fn name(&self) -> &str; -} +#[derive(Debug)] +pub struct NormalMap(HashMap>); -pub struct MergeCollections { - _t: PhantomData, - _it: PhantomData, -} - -impl MergeCollections -where - T: MergeName + Merge, - IT: IntoIterator)>, -{ - pub fn merge_by_name(primary_items: &mut Vec, secondary: IT) { - for (name, mut secondary_items) in secondary.into_iter() { - match primary_items.iter_mut().find(|item| item.name() == name) { - Some(ref mut primary_item) => { - // We do not support merging multiple DB items with same name yet. - assert_eq!(secondary_items.len(), 1); - primary_item.merge_in_place(secondary_items.pop().unwrap()); - } - None => primary_items.extend(secondary_items), - } - } +impl Default for NormalMap { + fn default() -> Self { + NormalMap(HashMap::default()) + } +} + +impl NormalMap { + pub fn new() -> Self { + NormalMap(HashMap::new()) + } + + pub fn insert(&mut self, key: NormalString, item: T) { + self.0.entry(key).or_default().push(item); + } + + fn remove(&mut self, key: &NormalString) -> Option> { + self.0.remove(key) + } +} + +pub struct MergeCollections; + +impl MergeCollections { + pub fn merge_by_name(mut primary: NormalMap, secondary: NormalMap) -> Vec { + let mut merged = vec![]; + for (title, mut secondary_items) in secondary.0.into_iter() { + match primary.remove(&title) { + Some(mut primary_items) => { + // We do not support merging multiple items with same name yet. Support will be + // added once encountered in the wild. + assert_eq!(primary_items.len(), 1, "multiple primaries unsupported"); + assert_eq!(secondary_items.len(), 1, "multiple secondaries unsupported"); + let mut primary_item = primary_items.pop().unwrap(); + primary_item.merge_in_place(secondary_items.pop().unwrap()); + merged.push(primary_item); + } + None => merged.extend(secondary_items), + } + } + merged.extend(primary.0.into_values().flatten()); + merged } } diff --git a/src/core/collection/mod.rs b/src/core/collection/mod.rs index 3dbc419..542f018 100644 --- a/src/core/collection/mod.rs +++ b/src/core/collection/mod.rs @@ -4,6 +4,7 @@ pub mod album; pub mod artist; pub mod merge; pub mod musicbrainz; +pub mod string; pub mod track; use std::fmt::{self, Display}; @@ -40,3 +41,7 @@ impl From for Error { Error::MbidError(err.to_string()) } } + +#[cfg(nightly)] +#[cfg(test)] +mod benchmod; diff --git a/src/core/collection/string.rs b/src/core/collection/string.rs new file mode 100644 index 0000000..7617c0f --- /dev/null +++ b/src/core/collection/string.rs @@ -0,0 +1,81 @@ +use aho_corasick::AhoCorasick; +use once_cell::sync::Lazy; + +// Unlikely that this covers all possible strings, but it should at least cover strings +// relevant for music (at least in English). The list of characters handled is based on +// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters. +// +// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018, +// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign +const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−']; +const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"]; +static AC: Lazy = + Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap()); + +pub fn is_case_sensitive(string: &str) -> bool { + string + .chars() + .any(|ch| ch.is_alphabetic() && ch.is_uppercase()) +} + +pub fn is_char_sensitive(string: &str) -> bool { + // Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to + // a high constant cost of AhoCorasick and the otherwise simple nature of the task. + string.chars().any(|ch| SPECIAL.contains(&ch)) +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct NormalString { + pub string: String, +} + +pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString { + let string = if asciify { + if lowercase { + AC.replace_all(&string.to_lowercase(), &REPLACE) + } else { + AC.replace_all(string, &REPLACE) + } + } else if lowercase { + string.to_lowercase() + } else { + string.to_owned() + }; + NormalString { string } +} + +pub fn normalize_string(string: &str) -> NormalString { + NormalString { + string: AC.replace_all(&string.to_lowercase(), &REPLACE), + } +} + +#[cfg(nightly)] +#[cfg(test)] +mod benches { + // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions. + // The winners are left to allow comparison with future alternatives. + use test::Bencher; + + use crate::core::collection::benchmod::ARTISTS; + + use super::*; + + #[bench] + fn bench_is_char_sensitive(b: &mut Bencher) { + let mut iter = ARTISTS.iter().cycle(); + b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap()))) + } + + #[bench] + fn bench_normalize_string_with(b: &mut Bencher) { + let mut iter = ARTISTS.iter().cycle(); + b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true))) + } + + #[bench] + fn bench_normalize_string(b: &mut Bencher) { + let mut iter = ARTISTS.iter().cycle(); + b.iter(|| test::black_box(normalize_string(&iter.next().unwrap()))) + } +} diff --git a/src/core/musichoard/base.rs b/src/core/musichoard/base.rs index 50c9340..f247eb0 100644 --- a/src/core/musichoard/base.rs +++ b/src/core/musichoard/base.rs @@ -1,11 +1,9 @@ -use std::collections::HashMap; - use crate::core::{ collection::{ album::{Album, AlbumId}, artist::{Artist, ArtistId}, - merge::MergeCollections, - Collection, + merge::{MergeCollections, NormalMap}, + string, Collection, }, musichoard::{Error, MusicHoard}, }; @@ -59,20 +57,21 @@ impl IMusicHoardBasePrivate for MusicHoard } fn merge_collections(&self) -> Collection { - let mut primary = self.library_cache.clone(); - let mut secondary = HashMap::>::new(); + let mut primary = NormalMap::::new(); + let mut secondary = NormalMap::::new(); - for artist in self.database_cache.iter().cloned() { - secondary - .entry(artist.meta.id.name.clone()) - .or_default() - .push(artist); + for artist in self.library_cache.iter().cloned() { + primary.insert(string::normalize_string(&artist.meta.id.name), artist); } - MergeCollections::merge_by_name(&mut primary, secondary); - primary.sort_unstable(); + for artist in self.database_cache.iter().cloned() { + secondary.insert(string::normalize_string(&artist.meta.id.name), artist); + } - primary + let mut collection = MergeCollections::merge_by_name(primary, secondary); + collection.sort_unstable(); + + collection } fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> { @@ -224,4 +223,71 @@ mod tests { mh.collection = mh.merge_collections(); assert_eq!(expected, mh.collection); } + + #[test] + #[should_panic(expected = "multiple secondaries unsupported")] + fn merge_two_db_artists_to_one_lib_artist() { + let mut left = Collection::new(); + let mut right = Collection::new(); + + let artist = Artist::new(ArtistId::new("Artist")); + + left.push(artist.clone()); + right.push(artist.clone()); + right.push(artist.clone()); + + let mut mh = MusicHoard { + library_cache: left.clone(), + database_cache: right.clone(), + ..Default::default() + }; + + mh.collection = mh.merge_collections(); + } + + #[test] + #[should_panic(expected = "multiple primaries unsupported")] + fn merge_one_db_artist_to_two_lib_artists() { + let mut left = Collection::new(); + let mut right = Collection::new(); + + let artist = Artist::new(ArtistId::new("Artist")); + + left.push(artist.clone()); + left.push(artist.clone()); + right.push(artist.clone()); + + let mut mh = MusicHoard { + library_cache: left.clone(), + database_cache: right.clone(), + ..Default::default() + }; + + mh.collection = mh.merge_collections(); + } + + #[test] + fn merge_normalized_artist_names() { + let mut left = Collection::new(); + let mut right = Collection::new(); + + left.push(Artist::new(ArtistId::new("Artist‐Name ‘Name’"))); + + right.push(Artist::new(ArtistId::new("arTist—naMe 'name’"))); + right.push(Artist::new(ArtistId::new("Artist‐Name “Name”"))); + + // The first artist will be merged, the second will be added. + let mut expected = left.clone(); + expected.push(right.last().unwrap().clone()); + expected.sort_unstable(); + + let mut mh = MusicHoard { + library_cache: left.clone(), + database_cache: right.clone(), + ..Default::default() + }; + + mh.collection = mh.merge_collections(); + assert_eq!(expected, mh.collection); + } } diff --git a/src/lib.rs b/src/lib.rs index 0213dc6..6e8d317 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,7 @@ //! MusicHoard - a music collection manager. +#![cfg_attr(nightly, feature(test))] +#[cfg(nightly)] +extern crate test; mod core; pub mod external; diff --git a/src/main.rs b/src/main.rs index 4577ea6..cfbf750 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,3 @@ -#![cfg_attr(nightly, feature(test))] -#[cfg(nightly)] -extern crate test; - mod tui; use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread}; diff --git a/src/tui/app/machine/mod.rs b/src/tui/app/machine/mod.rs index 19f930c..2d0322f 100644 --- a/src/tui/app/machine/mod.rs +++ b/src/tui/app/machine/mod.rs @@ -601,7 +601,3 @@ mod tests { app.unwrap_critical(); } } - -#[cfg(nightly)] -#[cfg(test)] -mod benchmod; diff --git a/src/tui/app/machine/search_state.rs b/src/tui/app/machine/search_state.rs index ce6d519..736bcec 100644 --- a/src/tui/app/machine/search_state.rs +++ b/src/tui/app/machine/search_state.rs @@ -1,7 +1,4 @@ -use aho_corasick::AhoCorasick; -use once_cell::sync::Lazy; - -use musichoard::collection::{album::Album, artist::Artist, track::Track}; +use musichoard::collection::{album::Album, artist::Artist, string, track::Track}; use crate::tui::app::{ machine::{App, AppInner, AppMachine}, @@ -9,17 +6,6 @@ use crate::tui::app::{ AppPublicState, AppState, Category, IAppInteractSearch, }; -// Unlikely that this covers all possible strings, but it should at least cover strings -// relevant for music (at least in English). The list of characters handled is based on -// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters. -// -// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018, -// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign -const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−']; -const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"]; -static AC: Lazy = - Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap()); - pub struct SearchState { string: String, orig: ListSelection, @@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate { fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool; fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool; fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool; - - fn is_case_sensitive(artist_name: &str) -> bool; - fn is_char_sensitive(artist_name: &str) -> bool; - fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String; } impl IAppInteractSearchPrivate for AppMachine { @@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine { where P: FnMut(bool, bool, &str, &T) -> bool, { - let case_sens = Self::is_case_sensitive(name); - let char_sens = Self::is_char_sensitive(name); - let search = Self::normalize_search(name, !case_sens, !char_sens); + let case_sens = string::is_case_sensitive(name); + let char_sens = string::is_char_sensitive(name); + let search = string::normalize_string_with(name, !case_sens, !char_sens); let mut index = st.index; if next && ((index + 1) < st.list.len()) { @@ -172,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine { let slice = &st.list[index..]; slice .iter() - .position(|probe| pred(case_sens, char_sens, &search, probe)) + .position(|probe| pred(case_sens, char_sens, &search.string, probe)) .map(|slice_index| index + slice_index) } fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool { - let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens); - let mut result = name.starts_with(search); + let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens); + let mut result = name.string.starts_with(search); if let Some(ref probe_sort) = probe.meta.sort { if !result { - let name = Self::normalize_search(probe_sort, !case_sens, !char_sens); - result = name.starts_with(search); + let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens); + result = name.string.starts_with(search); } } @@ -199,33 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine { } fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool { - Self::normalize_search(title, !case_sens, !char_sens).starts_with(search) - } - - fn is_case_sensitive(artist_name: &str) -> bool { - artist_name - .chars() - .any(|ch| ch.is_alphabetic() && ch.is_uppercase()) - } - - fn is_char_sensitive(artist_name: &str) -> bool { - // Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to - // a high constant cost of AhoCorasick and the otherwise simple nature of the task. - artist_name.chars().any(|ch| SPECIAL.contains(&ch)) - } - - fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String { - if asciify { - if lowercase { - AC.replace_all(&search.to_lowercase(), &REPLACE) - } else { - AC.replace_all(search, &REPLACE) - } - } else if lowercase { - search.to_lowercase() - } else { - search.to_owned() - } + string::normalize_string_with(title, !case_sens, !char_sens) + .string + .starts_with(search) } } @@ -544,28 +502,3 @@ mod tests { assert_eq!(browse.inner.selection.selected(), None); } } - -#[cfg(nightly)] -#[cfg(test)] -mod benches { - // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions. - use test::Bencher; - - use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard}; - - use super::*; - - type Search = AppMachine; - - #[bench] - fn is_char_sensitive(b: &mut Bencher) { - let mut iter = ARTISTS.iter().cycle(); - b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap()))) - } - - #[bench] - fn normalize_search(b: &mut Bencher) { - let mut iter = ARTISTS.iter().cycle(); - b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true))) - } -}