Startup merge fails when the database has two albums with the same title as an album in the library #246

Merged
wojtek merged 11 commits from 245---startup-merge-fails-when-the-database-has-two-albums-with-the-same-title-as-an-album-in-the-library into main 2025-01-03 17:46:55 +01:00
4 changed files with 124 additions and 84 deletions
Showing only changes of commit 0f658f8730 - Show all commits

View File

@ -1,7 +1,7 @@
use std::mem; use std::mem;
use crate::core::collection::{ use crate::core::collection::{
merge::{Merge, MergeName, MergeSorted}, merge::{Merge, MergeSorted},
musicbrainz::{MbAlbumRef, MbRefOption}, musicbrainz::{MbAlbumRef, MbRefOption},
track::{Track, TrackFormat}, track::{Track, TrackFormat},
}; };
@ -54,12 +54,6 @@ impl AlbumLibId {
/// Unique database identifier. Use MBID for this purpose. /// Unique database identifier. Use MBID for this purpose.
pub type AlbumMbRef = MbRefOption<MbAlbumRef>; pub type AlbumMbRef = MbRefOption<MbAlbumRef>;
impl MergeName for Album {
fn name(&self) -> &str {
&self.meta.id.title
}
}
// There are crates for handling dates, but we don't need much complexity beyond year-month-day. // There are crates for handling dates, but we don't need much complexity beyond year-month-day.
/// The album's release date. /// The album's release date.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]

View File

@ -1,11 +1,12 @@
use std::{ use std::{
collections::HashMap, collections::HashMap,
fmt::{self, Debug, Display}, fmt::{self, Debug, Display},
mem,
}; };
use crate::core::collection::{ use crate::core::collection::{
album::{Album, AlbumLibId}, album::{Album, AlbumLibId},
merge::{Merge, MergeCollections, MergeName}, merge::{Merge, MergeCollections},
musicbrainz::{MbArtistRef, MbRefOption}, musicbrainz::{MbArtistRef, MbRefOption},
}; };
@ -30,12 +31,6 @@ pub struct ArtistInfo {
pub properties: HashMap<String, Vec<String>>, pub properties: HashMap<String, Vec<String>>,
} }
impl MergeName for Artist {
fn name(&self) -> &str {
&self.meta.id.name
}
}
/// The artist identifier. /// The artist identifier.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ArtistId { pub struct ArtistId {
@ -46,49 +41,6 @@ pub struct ArtistId {
/// Unique database identifier. Use MBID for this purpose. /// Unique database identifier. Use MBID for this purpose.
pub type ArtistMbRef = MbRefOption<MbArtistRef>; pub type ArtistMbRef = MbRefOption<MbArtistRef>;
impl Artist {
/// Create new [`Artist`] with the given [`ArtistId`].
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
Artist {
meta: ArtistMeta::new(id),
albums: vec![],
}
}
fn merge_album_by_lib_id(
primary_albums: &mut [Album],
mut secondary_album: Album,
) -> Option<Album> {
let id_opt = secondary_album.meta.id.lib_id;
if let id @ AlbumLibId::Value(_) | id @ AlbumLibId::Singleton = id_opt {
let mut iter = primary_albums.iter_mut();
if let Some(ref mut primary_album) = iter.find(|a| a.meta.id.lib_id == id) {
primary_album.merge_in_place(secondary_album);
return None;
}
secondary_album.meta.id.lib_id = AlbumLibId::None;
}
Some(secondary_album)
}
fn merge_albums_by_lib_id(
primary_albums: &mut [Album],
secondary_albums: Vec<Album>,
) -> HashMap<String, Vec<Album>> {
let mut secondary_without_id = HashMap::<String, Vec<Album>>::new();
for secondary_album in secondary_albums.into_iter() {
let unmerged = Artist::merge_album_by_lib_id(primary_albums, secondary_album);
if let Some(secondary_album) = unmerged {
secondary_without_id
.entry(secondary_album.meta.id.title.clone())
.or_default()
.push(secondary_album);
}
}
secondary_without_id
}
}
impl PartialOrd for Artist { impl PartialOrd for Artist {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other)) Some(self.cmp(other))
@ -104,10 +56,97 @@ impl Ord for Artist {
impl Merge for Artist { impl Merge for Artist {
fn merge_in_place(&mut self, other: Self) { fn merge_in_place(&mut self, other: Self) {
self.meta.merge_in_place(other.meta); self.meta.merge_in_place(other.meta);
self.albums = MergeAlbums::merge_albums(mem::take(&mut self.albums), other.albums);
}
}
let other_without_id = Artist::merge_albums_by_lib_id(&mut self.albums, other.albums); #[derive(Debug, Default)]
MergeCollections::merge_by_name(&mut self.albums, other_without_id); struct MergeAlbums {
self.albums.sort_unstable(); primary_by_lib_id: HashMap<u32, Album>,
primary_by_singleton: HashMap<String, Album>,
primary_by_title: HashMap<String, Vec<Album>>,
secondary_by_title: HashMap<String, Vec<Album>>,
merged: Vec<Album>,
}
impl MergeAlbums {
fn merge_albums(primary_albums: Vec<Album>, secondary_albums: Vec<Album>) -> Vec<Album> {
let mut cache = MergeAlbums::new(primary_albums);
cache.merge_albums_by_lib_id(secondary_albums);
cache.merged.extend(MergeCollections::merge_by_name(
cache.primary_by_title,
cache.secondary_by_title,
));
cache.merged.sort_unstable();
cache.merged
}
fn new(primary_albums: Vec<Album>) -> Self {
let mut cache = MergeAlbums::default();
for album in primary_albums.into_iter() {
match album.meta.id.lib_id {
AlbumLibId::Value(value) => {
assert!(cache.primary_by_lib_id.insert(value, album).is_none())
}
AlbumLibId::Singleton => assert!(cache
.primary_by_singleton
.insert(album.meta.id.title.clone(), album)
.is_none()),
_ => unreachable!("primary collection should always have a lib_id"),
}
}
cache
}
fn merge_albums_by_lib_id(&mut self, secondary_albums: Vec<Album>) {
for secondary_album in secondary_albums.into_iter() {
self.merge_album_by_lib_id(secondary_album);
}
for (_, primary_album) in self.primary_by_lib_id.drain() {
self.primary_by_title
.entry(primary_album.meta.id.title.clone())
.or_default()
.push(primary_album);
}
for (title, primary_album) in self.primary_by_singleton.drain() {
self.primary_by_title
.entry(title)
.or_default()
.push(primary_album);
}
}
fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
let title = &secondary_album.meta.id.title;
let id_opt = secondary_album.meta.id.lib_id;
let find = match id_opt {
AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
AlbumLibId::Singleton => self.primary_by_singleton.remove(title),
AlbumLibId::None => None,
};
if let Some(mut primary_album) = find {
primary_album.merge_in_place(secondary_album);
self.merged.push(primary_album);
return;
}
secondary_album.meta.id.lib_id = AlbumLibId::None;
self.secondary_by_title
.entry(secondary_album.meta.id.title.clone())
.or_default()
.push(secondary_album);
}
}
impl Artist {
/// Create new [`Artist`] with the given [`ArtistId`].
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
Artist {
meta: ArtistMeta::new(id),
albums: vec![],
}
} }
} }

View File

@ -1,4 +1,4 @@
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable, marker::PhantomData}; use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
/// A trait for merging two objects. The merge is asymmetric with the left argument considered to be /// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
/// the primary whose properties are to be kept in case of collisions. /// the primary whose properties are to be kept in case of collisions.
@ -80,30 +80,30 @@ where
} }
} }
pub trait MergeName { pub struct MergeCollections;
fn name(&self) -> &str;
}
pub struct MergeCollections<T, IT> { impl MergeCollections {
_t: PhantomData<T>, pub fn merge_by_name<T, It>(mut primary: HashMap<String, Vec<T>>, secondary: It) -> Vec<T>
_it: PhantomData<IT>,
}
impl<T, IT> MergeCollections<T, IT>
where where
T: MergeName + Merge, T: Merge,
IT: IntoIterator<Item = (String, Vec<T>)>, It: IntoIterator<Item = (String, Vec<T>)>,
{ {
pub fn merge_by_name(primary_items: &mut Vec<T>, secondary: IT) { let mut merged = vec![];
for (name, mut secondary_items) in secondary.into_iter() { for (title, mut secondary_items) in secondary.into_iter() {
match primary_items.iter_mut().find(|item| item.name() == name) { match primary.remove(&title) {
Some(ref mut primary_item) => { Some(mut primary_items) => {
// We do not support merging multiple DB items with same name yet. // We do not support merging multiple items with same name yet. Support will be
// added once encountered in the wild.
assert_eq!(primary_items.len(), 1);
assert_eq!(secondary_items.len(), 1); assert_eq!(secondary_items.len(), 1);
let mut primary_item = primary_items.pop().unwrap();
primary_item.merge_in_place(secondary_items.pop().unwrap()); primary_item.merge_in_place(secondary_items.pop().unwrap());
merged.push(primary_item);
} }
None => primary_items.extend(secondary_items), None => merged.extend(secondary_items),
} }
} }
merged.extend(primary.into_values().flatten());
merged
} }
} }

View File

@ -59,9 +59,16 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
} }
fn merge_collections(&self) -> Collection { fn merge_collections(&self) -> Collection {
let mut primary = self.library_cache.clone(); let mut primary = HashMap::<String, Vec<Artist>>::new();
let mut secondary = HashMap::<String, Vec<Artist>>::new(); let mut secondary = HashMap::<String, Vec<Artist>>::new();
for artist in self.library_cache.iter().cloned() {
primary
.entry(artist.meta.id.name.clone())
.or_default()
.push(artist);
}
for artist in self.database_cache.iter().cloned() { for artist in self.database_cache.iter().cloned() {
secondary secondary
.entry(artist.meta.id.name.clone()) .entry(artist.meta.id.name.clone())
@ -69,10 +76,10 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
.push(artist); .push(artist);
} }
MergeCollections::merge_by_name(&mut primary, secondary); let mut collection = MergeCollections::merge_by_name(primary, secondary);
primary.sort_unstable(); collection.sort_unstable();
primary collection
} }
fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> { fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> {