Startup merge fails when the database has two albums with the same title as an album in the library (#246)
All checks were successful
Cargo CI / Build and Test (push) Successful in 2m5s
Cargo CI / Lint (push) Successful in 1m8s

Closes #245

Reviewed-on: #246
This commit is contained in:
Wojciech Kozlowski 2025-01-03 17:46:55 +01:00
parent 2b468260cc
commit 90c381c165
14 changed files with 476 additions and 202 deletions

View File

@ -38,7 +38,7 @@ jobs:
--ignore "tests/*" --ignore "tests/*"
--ignore "src/main.rs" --ignore "src/main.rs"
--ignore "src/bin/musichoard-edit.rs" --ignore "src/bin/musichoard-edit.rs"
--excl-line "^#\[derive|unimplemented\!\(\)" --excl-line "^#\[derive|unimplemented\!\(|unreachable\!\("
--excl-start "GRCOV_EXCL_START|mod tests \{" --excl-start "GRCOV_EXCL_START|mod tests \{"
--excl-stop "GRCOV_EXCL_STOP" --excl-stop "GRCOV_EXCL_STOP"
--output-path ./target/debug/coverage/ --output-path ./target/debug/coverage/

View File

@ -6,9 +6,9 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
aho-corasick = { version = "1.1.2", optional = true } aho-corasick = { version = "1.1.2" }
crossterm = { version = "0.28.1", optional = true} crossterm = { version = "0.28.1", optional = true}
once_cell = { version = "1.19.0", optional = true} once_cell = { version = "1.19.0" }
openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true} openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true}
paste = { version = "1.0.15", optional = true } paste = { version = "1.0.15", optional = true }
ratatui = { version = "0.28.1", optional = true} ratatui = { version = "0.28.1", optional = true}
@ -37,7 +37,7 @@ database-json = ["serde", "serde_json"]
library-beets = [] library-beets = []
library-beets-ssh = ["openssh", "tokio"] library-beets-ssh = ["openssh", "tokio"]
musicbrainz = ["paste", "reqwest", "serde", "serde_json"] musicbrainz = ["paste", "reqwest", "serde", "serde_json"]
tui = ["aho-corasick", "crossterm", "once_cell", "ratatui", "tui-input"] tui = ["crossterm", "ratatui", "tui-input"]
[[bin]] [[bin]]
name = "musichoard" name = "musichoard"

View File

@ -50,7 +50,7 @@ grcov codecov/debug/profraw \
--ignore "tests/*" \ --ignore "tests/*" \
--ignore "src/main.rs" \ --ignore "src/main.rs" \
--ignore "src/bin/musichoard-edit.rs" \ --ignore "src/bin/musichoard-edit.rs" \
--excl-line "^#\[derive|unimplemented\!\(\)" \ --excl-line "^#\[derive|unimplemented\!\(|unreachable\!\(" \
--excl-start "GRCOV_EXCL_START|mod tests \{" \ --excl-start "GRCOV_EXCL_START|mod tests \{" \
--excl-stop "GRCOV_EXCL_STOP" \ --excl-stop "GRCOV_EXCL_STOP" \
--output-path ./codecov/debug/coverage/ --output-path ./codecov/debug/coverage/

View File

@ -1,11 +1,13 @@
use std::mem; use std::mem;
use crate::core::collection::{ use crate::core::collection::{
merge::{Merge, MergeName, MergeSorted}, merge::{Merge, MergeSorted},
musicbrainz::{MbAlbumRef, MbRefOption}, musicbrainz::{MbAlbumRef, MbRefOption},
track::{Track, TrackFormat}, track::{Track, TrackFormat},
}; };
use super::string;
/// An album is a collection of tracks that were released together. /// An album is a collection of tracks that were released together.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Album { pub struct Album {
@ -54,12 +56,6 @@ impl AlbumLibId {
/// Unique database identifier. Use MBID for this purpose. /// Unique database identifier. Use MBID for this purpose.
pub type AlbumMbRef = MbRefOption<MbAlbumRef>; pub type AlbumMbRef = MbRefOption<MbAlbumRef>;
impl MergeName for Album {
fn name(&self) -> &str {
&self.meta.id.title
}
}
// There are crates for handling dates, but we don't need much complexity beyond year-month-day. // There are crates for handling dates, but we don't need much complexity beyond year-month-day.
/// The album's release date. /// The album's release date.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
@ -320,7 +316,8 @@ impl AlbumId {
} }
pub fn compatible(&self, other: &AlbumId) -> bool { pub fn compatible(&self, other: &AlbumId) -> bool {
let titles_compatible = self.title == other.title; let titles_compatible =
string::normalize_string(&self.title) == string::normalize_string(&other.title);
let lib_id_compatible = let lib_id_compatible =
self.lib_id.is_none() || other.lib_id.is_none() || (self.lib_id == other.lib_id); self.lib_id.is_none() || other.lib_id.is_none() || (self.lib_id == other.lib_id);
let mb_ref_compatible = let mb_ref_compatible =

View File

@ -1,12 +1,14 @@
use std::{ use std::{
collections::HashMap, collections::HashMap,
fmt::{self, Debug, Display}, fmt::{self, Debug, Display},
mem,
}; };
use crate::core::collection::{ use crate::core::collection::{
album::{Album, AlbumLibId}, album::{Album, AlbumLibId},
merge::{Merge, MergeCollections, MergeName}, merge::{Merge, MergeCollections, NormalMap},
musicbrainz::{MbArtistRef, MbRefOption}, musicbrainz::{MbArtistRef, MbRefOption},
string::{self, NormalString},
}; };
/// An artist. /// An artist.
@ -30,12 +32,6 @@ pub struct ArtistInfo {
pub properties: HashMap<String, Vec<String>>, pub properties: HashMap<String, Vec<String>>,
} }
impl MergeName for Artist {
fn name(&self) -> &str {
&self.meta.id.name
}
}
/// The artist identifier. /// The artist identifier.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ArtistId { pub struct ArtistId {
@ -46,49 +42,6 @@ pub struct ArtistId {
/// Unique database identifier. Use MBID for this purpose. /// Unique database identifier. Use MBID for this purpose.
pub type ArtistMbRef = MbRefOption<MbArtistRef>; pub type ArtistMbRef = MbRefOption<MbArtistRef>;
impl Artist {
/// Create new [`Artist`] with the given [`ArtistId`].
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
Artist {
meta: ArtistMeta::new(id),
albums: vec![],
}
}
fn merge_album_by_lib_id(
primary_albums: &mut [Album],
mut secondary_album: Album,
) -> Option<Album> {
let id_opt = secondary_album.meta.id.lib_id;
if let id @ AlbumLibId::Value(_) | id @ AlbumLibId::Singleton = id_opt {
let mut iter = primary_albums.iter_mut();
if let Some(ref mut primary_album) = iter.find(|a| a.meta.id.lib_id == id) {
primary_album.merge_in_place(secondary_album);
return None;
}
secondary_album.meta.id.lib_id = AlbumLibId::None;
}
Some(secondary_album)
}
fn merge_albums_by_lib_id(
primary_albums: &mut [Album],
secondary_albums: Vec<Album>,
) -> HashMap<String, Vec<Album>> {
let mut secondary_without_id = HashMap::<String, Vec<Album>>::new();
for secondary_album in secondary_albums.into_iter() {
let unmerged = Artist::merge_album_by_lib_id(primary_albums, secondary_album);
if let Some(secondary_album) = unmerged {
secondary_without_id
.entry(secondary_album.meta.id.title.clone())
.or_default()
.push(secondary_album);
}
}
secondary_without_id
}
}
impl PartialOrd for Artist { impl PartialOrd for Artist {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other)) Some(self.cmp(other))
@ -104,10 +57,94 @@ impl Ord for Artist {
impl Merge for Artist { impl Merge for Artist {
fn merge_in_place(&mut self, other: Self) { fn merge_in_place(&mut self, other: Self) {
self.meta.merge_in_place(other.meta); self.meta.merge_in_place(other.meta);
self.albums = MergeAlbums::merge_albums(mem::take(&mut self.albums), other.albums);
}
}
let other_without_id = Artist::merge_albums_by_lib_id(&mut self.albums, other.albums); #[derive(Debug, Default)]
MergeCollections::merge_by_name(&mut self.albums, other_without_id); struct MergeAlbums {
self.albums.sort_unstable(); primary_by_lib_id: HashMap<u32, Album>,
primary_by_singleton: HashMap<NormalString, Album>,
primary_by_title: NormalMap<Album>,
secondary_by_title: NormalMap<Album>,
merged: Vec<Album>,
}
impl MergeAlbums {
fn merge_albums(primary_albums: Vec<Album>, secondary_albums: Vec<Album>) -> Vec<Album> {
let mut cache = MergeAlbums::new(primary_albums);
cache.merge_albums_by_lib_id(secondary_albums);
cache.merged.extend(MergeCollections::merge_by_name(
cache.primary_by_title,
cache.secondary_by_title,
));
cache.merged.sort_unstable();
cache.merged
}
fn new(primary_albums: Vec<Album>) -> Self {
let mut cache = MergeAlbums::default();
for album in primary_albums.into_iter() {
match album.meta.id.lib_id {
AlbumLibId::Value(value) => {
assert!(cache.primary_by_lib_id.insert(value, album).is_none())
}
AlbumLibId::Singleton => assert!(cache
.primary_by_singleton
.insert(string::normalize_string(&album.meta.id.title), album)
.is_none()),
_ => unreachable!("primary collection should always have a lib_id"),
}
}
cache
}
fn merge_albums_by_lib_id(&mut self, secondary_albums: Vec<Album>) {
for secondary_album in secondary_albums.into_iter() {
self.merge_album_by_lib_id(secondary_album);
}
for (_, primary_album) in self.primary_by_lib_id.drain() {
self.primary_by_title.insert(
string::normalize_string(&primary_album.meta.id.title),
primary_album,
);
}
for (title, primary_album) in self.primary_by_singleton.drain() {
self.primary_by_title.insert(title, primary_album);
}
}
fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
let title = string::normalize_string(&secondary_album.meta.id.title);
let id_opt = secondary_album.meta.id.lib_id;
let find = match id_opt {
AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
AlbumLibId::Singleton => self.primary_by_singleton.remove(&title),
AlbumLibId::None => None,
};
if let Some(mut primary_album) = find {
primary_album.merge_in_place(secondary_album);
self.merged.push(primary_album);
return;
}
secondary_album.meta.id.lib_id = AlbumLibId::None;
self.secondary_by_title.insert(
string::normalize_string(&secondary_album.meta.id.title),
secondary_album,
);
}
}
impl Artist {
/// Create new [`Artist`] with the given [`ArtistId`].
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
Artist {
meta: ArtistMeta::new(id),
albums: vec![],
}
} }
} }
@ -248,7 +285,8 @@ impl ArtistId {
} }
pub fn compatible(&self, other: &ArtistId) -> bool { pub fn compatible(&self, other: &ArtistId) -> bool {
let names_compatible = self.name == other.name; let names_compatible =
string::normalize_string(&self.name) == string::normalize_string(&other.name);
let mb_ref_compatible = let mb_ref_compatible =
self.mb_ref.is_none() || other.mb_ref.is_none() || (self.mb_ref == other.mb_ref); self.mb_ref.is_none() || other.mb_ref.is_none() || (self.mb_ref == other.mb_ref);
names_compatible && mb_ref_compatible names_compatible && mb_ref_compatible
@ -263,7 +301,13 @@ impl Display for ArtistId {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::core::testmod::FULL_COLLECTION; use crate::{
collection::{
album::{AlbumId, AlbumMbRef},
musicbrainz::MbAlbumRef,
},
core::testmod::FULL_COLLECTION,
};
use super::*; use super::*;
@ -533,4 +577,119 @@ mod tests {
let merged = left.clone().merge(right); let merged = left.clone().merge(right);
assert_eq!(expected, merged); assert_eq!(expected, merged);
} }
#[test]
#[should_panic(expected = "multiple secondaries unsupported")]
fn merge_two_db_albums_to_one_lib_album() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
let album = Album::new(AlbumId::new("Album"));
left.albums.push(album.clone());
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
right.albums.push(album.clone());
right.albums.push(album.clone());
left.merge(right);
}
#[test]
#[should_panic(expected = "multiple primaries unsupported")]
fn merge_one_db_album_to_two_lib_albums() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
let album = Album::new(AlbumId::new("Album"));
left.albums.push(album.clone());
left.albums.push(album.clone());
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
left.albums[1].meta.id.lib_id = AlbumLibId::Value(2);
right.albums.push(album.clone());
left.merge(right);
}
#[test]
fn merge_normalized_album_titles() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
left.albums
.push(Album::new(AlbumId::new("AlbumTitle Title")));
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
right
.albums
.push(Album::new(AlbumId::new("alBum—tiTle 'title")));
right
.albums
.push(Album::new(AlbumId::new("AlbumTitle “Title”")));
// The first album will be merged, the second will be added.
let mut expected = left.clone();
expected.albums.push(right.albums.last().unwrap().clone());
expected.albums.sort_unstable();
let merged = left.merge(right);
assert_eq!(expected, merged);
}
#[test]
fn merge_multiple_singletons() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
left.albums.push(Album::new(AlbumId::new("Singleton 1")));
left.albums.push(Album::new(AlbumId::new("Singleton 2")));
left.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
left.albums[1].meta.id.lib_id = AlbumLibId::Singleton;
right.albums.push(Album::new(AlbumId::new("Singleton 1")));
right.albums.push(Album::new(AlbumId::new("Singleton 2")));
right.albums.push(Album::new(AlbumId::new("Singleton 3")));
right.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
right.albums[2].meta.id.lib_id = AlbumLibId::Singleton;
// Expect first two albums to merge (including lib id) and the third to be added. However,
// the third one will lose its lib id due to no match in the primary.
let mut expected = left.clone();
expected.albums.push(right.albums.last().unwrap().clone());
expected.albums.last_mut().unwrap().meta.id.lib_id = AlbumLibId::None;
let merged = left.merge(right);
assert_eq!(expected, merged);
}
#[test]
fn merge_two_db_albums_to_one_lib_album_with_ids() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
let album = Album::new(AlbumId::new("Album"));
left.albums.push(album.clone());
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
// In this test, one DB album has a lib id that matches that of the lib album. This is the
// one that we expect to merge while the second album should just be added.
right.albums.push(album.clone());
right.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
right.albums[0].meta.id.mb_ref = AlbumMbRef::Some(
MbAlbumRef::from_uuid_str("00000000-0000-0000-0000-000000000000").unwrap(),
);
right.albums.push(album.clone());
right.albums[1].meta.id.mb_ref = AlbumMbRef::Some(
MbAlbumRef::from_uuid_str("11111111-1111-1111-1111-111111111111").unwrap(),
);
let expected = right.clone();
let merged = left.merge(right);
assert_eq!(expected, merged);
}
} }

View File

@ -1,5 +1,7 @@
// Date: 2024-02-19 // 1. `beet ls -a -f '$albumartist'`.
pub const ARTISTS: [&str; 141] = [ // 2. `M-x delete-duplicate-lines`.
// Date: 2025-01-03
pub const ARTISTS: [&str; 156] = [
"Abadden", "Abadden",
"Acid Drinkers", "Acid Drinkers",
"Adema", "Adema",
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
"Аркона", "Аркона",
"Artas", "Artas",
"As I Lay Dying", "As I Lay Dying",
"At the Gates",
"Avenged Sevenfold", "Avenged Sevenfold",
"Aversions Crown", "Aversions Crown",
"Aviators", "Aviators",
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
"Bloodbath", "Bloodbath",
"Bloodbound", "Bloodbound",
"Brothers of Metal", "Brothers of Metal",
"Carcass",
"Carnation", "Carnation",
"Cellar Darling", "Cellar Darling",
"Children of Bodom", "Children of Bodom",
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
"Dynazty", "Dynazty",
"Edguy", "Edguy",
"Eluveitie", "Eluveitie",
"Eminem",
"Enforcer", "Enforcer",
"Ensiferum", "Ensiferum",
"Epica", "Epica",
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
"Heavens Basement", "Heavens Basement",
"Heavy Load", "Heavy Load",
"Hermh", "Hermh",
"Ignea",
"Immortal", "Immortal",
"In Flames", "In Flames",
"Insomnium", "Insomnium",
"Iron Maiden", "Iron Maiden",
"Judas Priest",
"Kalmah", "Kalmah",
"Kataklysm", "Kataklysm",
"Kontrust", "Kontrust",
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
"Linkin Park", "Linkin Park",
"Lost Dreams", "Lost Dreams",
"Man Must Die", "Man Must Die",
"Månegarm",
"Me and That Man", "Me and That Man",
"Megaton Sword",
"Mercyful Fate", "Mercyful Fate",
"Metal Church",
"Metallica", "Metallica",
"Michael Jackson",
"Miracle of Sound", "Miracle of Sound",
"Misery Index", "Misery Index",
"Mortal Sin",
"Mudvayne", "Mudvayne",
"Månegarm",
"Nickelback", "Nickelback",
"Nightwish", "Nightwish",
"Nile", "Nile",
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
"Oomph!", "Oomph!",
"P.O.D.", "P.O.D.",
"Paddy and the Rats", "Paddy and the Rats",
"Pain",
"Paul Stanley", "Paul Stanley",
"Persefone",
"Peyton Parrish", "Peyton Parrish",
"Powerwolf", "Powerwolf",
"Primitai", "Primitai",
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
"Rammstein", "Rammstein",
"Red Hot Chili Peppers", "Red Hot Chili Peppers",
"Revocation", "Revocation",
"Ride the Sky",
"Rob Zombie", "Rob Zombie",
"Sabaton", "Sabaton",
"Saltatio Mortis",
"Saltatio Mortis & Lara Loft",
"Satan",
"Savatage", "Savatage",
"Scar Symmetry",
"Scars on Broadway", "Scars on Broadway",
"Scorpions", "Scorpions",
"Silent Descent", "Silent Descent",
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
"Timecry", "Timecry",
"Trivium", "Trivium",
"Tuomas Holopainen", "Tuomas Holopainen",
"VNV Nation", "Turisas",
"Vader", "Vader",
"Vesania",
"Vicious Crusade", "Vicious Crusade",
"The Wages of Sin", "Vintersorg",
"VNV Nation",
"W.A.S.P.",
"Whitechapel", "Whitechapel",
"Within Temptation", "Within Temptation",
"Woe of Tyrants", "Woe of Tyrants",
"Wovenwar", "Wovenwar",
"Xandria", "Xandria",
"Jonathan Young",
"Jonathan Young, Peyton Parrish & Colm R. McGuinness",
]; ];

View File

@ -1,4 +1,6 @@
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable, marker::PhantomData}; use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
use crate::core::collection::string::NormalString;
/// A trait for merging two objects. The merge is asymmetric with the left argument considered to be /// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
/// the primary whose properties are to be kept in case of collisions. /// the primary whose properties are to be kept in case of collisions.
@ -80,30 +82,49 @@ where
} }
} }
pub trait MergeName { #[derive(Debug)]
fn name(&self) -> &str; pub struct NormalMap<T>(HashMap<NormalString, Vec<T>>);
impl<T> Default for NormalMap<T> {
fn default() -> Self {
NormalMap(HashMap::default())
}
} }
pub struct MergeCollections<T, IT> { impl<T> NormalMap<T> {
_t: PhantomData<T>, pub fn new() -> Self {
_it: PhantomData<IT>, NormalMap(HashMap::new())
}
pub fn insert(&mut self, key: NormalString, item: T) {
self.0.entry(key).or_default().push(item);
}
fn remove(&mut self, key: &NormalString) -> Option<Vec<T>> {
self.0.remove(key)
}
} }
impl<T, IT> MergeCollections<T, IT> pub struct MergeCollections;
where
T: MergeName + Merge, impl MergeCollections {
IT: IntoIterator<Item = (String, Vec<T>)>, pub fn merge_by_name<T: Merge>(mut primary: NormalMap<T>, secondary: NormalMap<T>) -> Vec<T> {
{ let mut merged = vec![];
pub fn merge_by_name(primary_items: &mut Vec<T>, secondary: IT) { for (title, mut secondary_items) in secondary.0.into_iter() {
for (name, mut secondary_items) in secondary.into_iter() { match primary.remove(&title) {
match primary_items.iter_mut().find(|item| item.name() == name) { Some(mut primary_items) => {
Some(ref mut primary_item) => { // We do not support merging multiple items with same name yet. Support will be
// We do not support merging multiple DB items with same name yet. // added once encountered in the wild.
assert_eq!(secondary_items.len(), 1); assert_eq!(primary_items.len(), 1, "multiple primaries unsupported");
assert_eq!(secondary_items.len(), 1, "multiple secondaries unsupported");
let mut primary_item = primary_items.pop().unwrap();
primary_item.merge_in_place(secondary_items.pop().unwrap()); primary_item.merge_in_place(secondary_items.pop().unwrap());
merged.push(primary_item);
} }
None => primary_items.extend(secondary_items), None => merged.extend(secondary_items),
} }
} }
merged.extend(primary.0.into_values().flatten());
merged
} }
} }

View File

@ -4,6 +4,7 @@ pub mod album;
pub mod artist; pub mod artist;
pub mod merge; pub mod merge;
pub mod musicbrainz; pub mod musicbrainz;
pub mod string;
pub mod track; pub mod track;
use std::fmt::{self, Display}; use std::fmt::{self, Display};
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
Error::MbidError(err.to_string()) Error::MbidError(err.to_string())
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -0,0 +1,81 @@
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub fn is_case_sensitive(string: &str) -> bool {
string
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
pub fn is_char_sensitive(string: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
string.chars().any(|ch| SPECIAL.contains(&ch))
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NormalString {
pub string: String,
}
pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString {
let string = if asciify {
if lowercase {
AC.replace_all(&string.to_lowercase(), &REPLACE)
} else {
AC.replace_all(string, &REPLACE)
}
} else if lowercase {
string.to_lowercase()
} else {
string.to_owned()
};
NormalString { string }
}
pub fn normalize_string(string: &str) -> NormalString {
NormalString {
string: AC.replace_all(&string.to_lowercase(), &REPLACE),
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
// The winners are left to allow comparison with future alternatives.
use test::Bencher;
use crate::core::collection::benchmod::ARTISTS;
use super::*;
#[bench]
fn bench_is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn bench_normalize_string_with(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true)))
}
#[bench]
fn bench_normalize_string(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap())))
}
}

View File

@ -1,11 +1,9 @@
use std::collections::HashMap;
use crate::core::{ use crate::core::{
collection::{ collection::{
album::{Album, AlbumId}, album::{Album, AlbumId},
artist::{Artist, ArtistId}, artist::{Artist, ArtistId},
merge::MergeCollections, merge::{MergeCollections, NormalMap},
Collection, string, Collection,
}, },
musichoard::{Error, MusicHoard}, musichoard::{Error, MusicHoard},
}; };
@ -59,20 +57,21 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
} }
fn merge_collections(&self) -> Collection { fn merge_collections(&self) -> Collection {
let mut primary = self.library_cache.clone(); let mut primary = NormalMap::<Artist>::new();
let mut secondary = HashMap::<String, Vec<Artist>>::new(); let mut secondary = NormalMap::<Artist>::new();
for artist in self.database_cache.iter().cloned() { for artist in self.library_cache.iter().cloned() {
secondary primary.insert(string::normalize_string(&artist.meta.id.name), artist);
.entry(artist.meta.id.name.clone())
.or_default()
.push(artist);
} }
MergeCollections::merge_by_name(&mut primary, secondary); for artist in self.database_cache.iter().cloned() {
primary.sort_unstable(); secondary.insert(string::normalize_string(&artist.meta.id.name), artist);
}
primary let mut collection = MergeCollections::merge_by_name(primary, secondary);
collection.sort_unstable();
collection
} }
fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> { fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> {
@ -224,4 +223,71 @@ mod tests {
mh.collection = mh.merge_collections(); mh.collection = mh.merge_collections();
assert_eq!(expected, mh.collection); assert_eq!(expected, mh.collection);
} }
#[test]
#[should_panic(expected = "multiple secondaries unsupported")]
fn merge_two_db_artists_to_one_lib_artist() {
let mut left = Collection::new();
let mut right = Collection::new();
let artist = Artist::new(ArtistId::new("Artist"));
left.push(artist.clone());
right.push(artist.clone());
right.push(artist.clone());
let mut mh = MusicHoard {
library_cache: left.clone(),
database_cache: right.clone(),
..Default::default()
};
mh.collection = mh.merge_collections();
}
#[test]
#[should_panic(expected = "multiple primaries unsupported")]
fn merge_one_db_artist_to_two_lib_artists() {
let mut left = Collection::new();
let mut right = Collection::new();
let artist = Artist::new(ArtistId::new("Artist"));
left.push(artist.clone());
left.push(artist.clone());
right.push(artist.clone());
let mut mh = MusicHoard {
library_cache: left.clone(),
database_cache: right.clone(),
..Default::default()
};
mh.collection = mh.merge_collections();
}
#[test]
fn merge_normalized_artist_names() {
let mut left = Collection::new();
let mut right = Collection::new();
left.push(Artist::new(ArtistId::new("ArtistName Name")));
right.push(Artist::new(ArtistId::new("arTist—naMe 'name")));
right.push(Artist::new(ArtistId::new("ArtistName “Name”")));
// The first artist will be merged, the second will be added.
let mut expected = left.clone();
expected.push(right.last().unwrap().clone());
expected.sort_unstable();
let mut mh = MusicHoard {
library_cache: left.clone(),
database_cache: right.clone(),
..Default::default()
};
mh.collection = mh.merge_collections();
assert_eq!(expected, mh.collection);
}
} }

View File

@ -1,4 +1,7 @@
//! MusicHoard - a music collection manager. //! MusicHoard - a music collection manager.
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod core; mod core;
pub mod external; pub mod external;

View File

@ -1,7 +1,3 @@
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod tui; mod tui;
use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread}; use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};

View File

@ -601,7 +601,3 @@ mod tests {
app.unwrap_critical(); app.unwrap_critical();
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -1,7 +1,4 @@
use aho_corasick::AhoCorasick; use musichoard::collection::{album::Album, artist::Artist, string, track::Track};
use once_cell::sync::Lazy;
use musichoard::collection::{album::Album, artist::Artist, track::Track};
use crate::tui::app::{ use crate::tui::app::{
machine::{App, AppInner, AppMachine}, machine::{App, AppInner, AppMachine},
@ -9,17 +6,6 @@ use crate::tui::app::{
AppPublicState, AppState, Category, IAppInteractSearch, AppPublicState, AppState, Category, IAppInteractSearch,
}; };
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub struct SearchState { pub struct SearchState {
string: String, string: String,
orig: ListSelection, orig: ListSelection,
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool; fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool; fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool; fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
fn is_case_sensitive(artist_name: &str) -> bool;
fn is_char_sensitive(artist_name: &str) -> bool;
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
} }
impl IAppInteractSearchPrivate for AppMachine<SearchState> { impl IAppInteractSearchPrivate for AppMachine<SearchState> {
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
where where
P: FnMut(bool, bool, &str, &T) -> bool, P: FnMut(bool, bool, &str, &T) -> bool,
{ {
let case_sens = Self::is_case_sensitive(name); let case_sens = string::is_case_sensitive(name);
let char_sens = Self::is_char_sensitive(name); let char_sens = string::is_char_sensitive(name);
let search = Self::normalize_search(name, !case_sens, !char_sens); let search = string::normalize_string_with(name, !case_sens, !char_sens);
let mut index = st.index; let mut index = st.index;
if next && ((index + 1) < st.list.len()) { if next && ((index + 1) < st.list.len()) {
@ -172,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
let slice = &st.list[index..]; let slice = &st.list[index..];
slice slice
.iter() .iter()
.position(|probe| pred(case_sens, char_sens, &search, probe)) .position(|probe| pred(case_sens, char_sens, &search.string, probe))
.map(|slice_index| index + slice_index) .map(|slice_index| index + slice_index)
} }
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool { fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens); let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.starts_with(search); let mut result = name.string.starts_with(search);
if let Some(ref probe_sort) = probe.meta.sort { if let Some(ref probe_sort) = probe.meta.sort {
if !result { if !result {
let name = Self::normalize_search(probe_sort, !case_sens, !char_sens); let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens);
result = name.starts_with(search); result = name.string.starts_with(search);
} }
} }
@ -199,33 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
} }
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool { fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
Self::normalize_search(title, !case_sens, !char_sens).starts_with(search) string::normalize_string_with(title, !case_sens, !char_sens)
} .string
.starts_with(search)
fn is_case_sensitive(artist_name: &str) -> bool {
artist_name
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
fn is_char_sensitive(artist_name: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
artist_name.chars().any(|ch| SPECIAL.contains(&ch))
}
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
if lowercase {
AC.replace_all(&search.to_lowercase(), &REPLACE)
} else {
AC.replace_all(search, &REPLACE)
}
} else if lowercase {
search.to_lowercase()
} else {
search.to_owned()
}
} }
} }
@ -544,28 +502,3 @@ mod tests {
assert_eq!(browse.inner.selection.selected(), None); assert_eq!(browse.inner.selection.selected(), None);
} }
} }
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use test::Bencher;
use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
use super::*;
type Search = AppMachine<MockIMusicHoard, SearchState>;
#[bench]
fn is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn normalize_search(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
}
}