From d876b75d1473e983d886a84a44c1cf399e266872 Mon Sep 17 00:00:00 2001 From: Wojciech Kozlowski Date: Sun, 14 Jan 2024 15:46:33 +0100 Subject: [PATCH] Artists with a _sort field show up twice (#109) Closes #108 Reviewed-on: https://git.thenineworlds.net/wojtek/musichoard/pulls/109 --- README.md | 4 +- src/lib.rs | 261 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 184 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index f0d1207..29444e9 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,9 @@ cargo install grcov ```sh env CARGO_TARGET_DIR=codecov \ - cargo clean + rm -rf ./codecov/debug/{coverage,profraw} +env CARGO_TARGET_DIR=codecov \ + cargo clean -p musichoard env RUSTFLAGS="-C instrument-coverage" \ LLVM_PROFILE_FILE="codecov/debug/profraw/musichoard-%p-%m.profraw" \ CARGO_TARGET_DIR=codecov \ diff --git a/src/lib.rs b/src/lib.rs index a782173..e876af7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ pub mod library; use std::{ cmp::Ordering, - collections::{HashMap, HashSet}, + collections::HashMap, fmt::{self, Debug, Display}, iter::Peekable, mem, @@ -287,9 +287,8 @@ impl Ord for Track { } impl Merge for Track { - fn merge(self, other: Self) -> Self { + fn merge_in_place(&mut self, other: Self) { assert_eq!(self.id, other.id); - self } } @@ -320,10 +319,10 @@ impl Ord for Album { } impl Merge for Album { - fn merge(mut self, other: Self) -> Self { + fn merge_in_place(&mut self, other: Self) { assert_eq!(self.id, other.id); - self.tracks = MergeSorted::new(self.tracks.into_iter(), other.tracks.into_iter()).collect(); - self + let tracks = mem::take(&mut self.tracks); + self.tracks = MergeSorted::new(tracks.into_iter(), other.tracks.into_iter()).collect(); } } @@ -361,13 +360,11 @@ pub struct ArtistProperties { } impl Merge for ArtistProperties { - fn merge(mut self, other: Self) -> Self { - self.musicbrainz = Self::merge_opts(self.musicbrainz, other.musicbrainz); - self.musicbutler = Self::merge_vecs(self.musicbutler, other.musicbutler); - self.bandcamp = Self::merge_vecs(self.bandcamp, other.bandcamp); - self.qobuz = Self::merge_opts(self.qobuz, other.qobuz); - - self + fn merge_in_place(&mut self, other: Self) { + self.musicbrainz = self.musicbrainz.take().or(other.musicbrainz); + Self::merge_vecs(&mut self.musicbutler, other.musicbutler); + Self::merge_vecs(&mut self.bandcamp, other.bandcamp); + self.qobuz = self.qobuz.take().or(other.qobuz); } } @@ -566,12 +563,12 @@ impl Ord for Artist { } impl Merge for Artist { - fn merge(mut self, other: Self) -> Self { + fn merge_in_place(&mut self, other: Self) { assert_eq!(self.id, other.id); - self.sort = Self::merge_opts(self.sort, other.sort); - self.properties = self.properties.merge(other.properties); - self.albums = MergeSorted::new(self.albums.into_iter(), other.albums.into_iter()).collect(); - self + self.sort = self.sort.take().or(other.sort); + self.properties.merge_in_place(other.properties); + let albums = mem::take(&mut self.albums); + self.albums = MergeSorted::new(albums.into_iter(), other.albums.into_iter()).collect(); } } @@ -579,20 +576,20 @@ impl Merge for Artist { pub type Collection = Vec; trait Merge { - fn merge(self, other: Self) -> Self; + fn merge_in_place(&mut self, other: Self); - fn merge_opts(this: Option, other: Option) -> Option { - match &this { - Some(_) => this, - None => other, - } + fn merge(mut self, other: Self) -> Self + where + Self: Sized, + { + self.merge_in_place(other); + self } - fn merge_vecs(mut this: Vec, mut other: Vec) -> Vec { + fn merge_vecs(this: &mut Vec, mut other: Vec) { this.append(&mut other); this.sort_unstable(); this.dedup(); - this } } @@ -815,10 +812,8 @@ impl MusicHoard { let artist_id: ArtistId = artist_id.into(); if self.get_artist(&artist_id).is_none() { - let new_artist = vec![Artist::new(artist_id)]; - - let collection = mem::take(&mut self.collection); - self.collection = Self::merge(collection, new_artist); + self.collection.push(Artist::new(artist_id)); + Self::sort_artists(&mut self.collection); } } @@ -860,8 +855,16 @@ impl MusicHoard { music_hoard_unique_url_dispatch!(qobuz); fn sort(collection: &mut [Artist]) { + Self::sort_artists(collection); + Self::sort_albums_and_tracks(collection.iter_mut()); + } + + fn sort_artists(collection: &mut [Artist]) { collection.sort_unstable(); - for artist in collection.iter_mut() { + } + + fn sort_albums_and_tracks<'a, COL: Iterator>(collection: COL) { + for artist in collection { artist.albums.sort_unstable(); for album in artist.albums.iter_mut() { album.tracks.sort_unstable(); @@ -869,13 +872,38 @@ impl MusicHoard { } } - fn merge(primary: Vec, secondary: Vec) -> Vec { - MergeSorted::new(primary.into_iter(), secondary.into_iter()).collect() + fn merge_with_primary(&mut self, primary: HashMap) { + let collection = mem::take(&mut self.collection); + self.collection = Self::merge_collections(primary, collection); } - fn items_to_artists(items: Vec) -> Result, Error> { - let mut artists: Vec = vec![]; - let mut album_ids = HashMap::>::new(); + fn merge_with_secondary>(&mut self, secondary: SEC) { + let primary_map: HashMap = self + .collection + .drain(..) + .map(|a| (a.id.clone(), a)) + .collect(); + self.collection = Self::merge_collections(primary_map, secondary); + } + + fn merge_collections>( + mut primary: HashMap, + secondary: SEC, + ) -> Collection { + for secondary_artist in secondary.into_iter() { + if let Some(ref mut primary_artist) = primary.get_mut(&secondary_artist.id) { + primary_artist.merge_in_place(secondary_artist); + } else { + primary.insert(secondary_artist.id.clone(), secondary_artist); + } + } + let mut collection: Collection = primary.into_values().collect(); + Self::sort_artists(&mut collection); + collection + } + + fn items_to_artists(items: Vec) -> Result, Error> { + let mut collection = HashMap::::new(); for item in items.into_iter() { let artist_id = ArtistId { @@ -901,19 +929,14 @@ impl MusicHoard { }, }; - let artist = if album_ids.contains_key(&artist_id) { - // Assume results are in some order which means they will likely be grouped by - // artist. Therefore, we look from the back since the last inserted artist is most - // likely the one we are looking for. - artists - .iter_mut() - .rev() - .find(|a| a.id == artist_id) - .unwrap() - } else { - album_ids.insert(artist_id.clone(), HashSet::::new()); - artists.push(Artist::new(artist_id.clone())); - artists.last_mut().unwrap() + // There are usually many entries per artist. Therefore, we avoid simply calling + // .entry(artist_id.clone()).or_insert_with(..), because of the clone. The flipside is + // that insertions will thus do an additional lookup. + let artist = match collection.get_mut(&artist_id) { + Some(artist) => artist, + None => collection + .entry(artist_id.clone()) + .or_insert_with(|| Artist::new(artist_id)), }; if artist.sort.is_some() { @@ -929,30 +952,23 @@ impl MusicHoard { artist.sort = artist_sort; } - if album_ids[&artist_id].contains(&album_id) { - // Assume results are in some order which means they will likely be grouped by - // album. Therefore, we look from the back since the last inserted album is most - // likely the one we are looking for. - let album = artist - .albums - .iter_mut() - .rev() - .find(|a| a.id == album_id) - .unwrap(); - album.tracks.push(track); - } else { - album_ids - .get_mut(&artist_id) - .unwrap() - .insert(album_id.clone()); - artist.albums.push(Album { + // Do a linear search as few artists have more than a handful of albums. Search from the + // back as the original items vector is usually already sorted. + match artist + .albums + .iter_mut() + .rev() + .find(|album| album.id == album_id) + { + Some(album) => album.tracks.push(track), + None => artist.albums.push(Album { id: album_id, tracks: vec![track], - }); + }), } } - Ok(artists) + Ok(collection) } fn get_artist(&self, artist_id: &ArtistId) -> Option<&Artist> { @@ -975,11 +991,9 @@ impl MusicHoard { pub fn rescan_library(&mut self) -> Result<(), Error> { let items = self.library.list(&Query::new())?; let mut library_collection = Self::items_to_artists(items)?; - Self::sort(&mut library_collection); - - let collection = mem::take(&mut self.collection); - self.collection = Self::merge(library_collection, collection); + Self::sort_albums_and_tracks(library_collection.values_mut()); + self.merge_with_primary(library_collection); Ok(()) } } @@ -988,11 +1002,9 @@ impl MusicHoard { /// Load the database and merge with the in-memory collection. pub fn load_from_database(&mut self) -> Result<(), Error> { let mut database_collection = self.database.load()?; - Self::sort(&mut database_collection); - - let collection = mem::take(&mut self.collection); - self.collection = Self::merge(collection, database_collection); + Self::sort_albums_and_tracks(database_collection.iter_mut()); + self.merge_with_secondary(database_collection); Ok(()) } @@ -1893,7 +1905,11 @@ mod tests { expected.tracks.append(&mut right.tracks.clone()); expected.tracks.sort_unstable(); - let merged = left.clone().merge(right); + let merged = left.clone().merge(right.clone()); + assert_eq!(expected, merged); + + // Non-overlapping merge should be commutative. + let merged = right.clone().merge(left.clone()); assert_eq!(expected, merged); } @@ -1919,13 +1935,18 @@ mod tests { let left = COLLECTION[0].to_owned(); let mut right = COLLECTION[1].to_owned(); right.id = left.id.clone(); + right.properties = ArtistProperties::default(); let mut expected = left.clone(); expected.properties = expected.properties.merge(right.clone().properties); expected.albums.append(&mut right.albums.clone()); expected.albums.sort_unstable(); - let merged = left.clone().merge(right); + let merged = left.clone().merge(right.clone()); + assert_eq!(expected, merged); + + // Non-overlapping merge should be commutative. + let merged = right.clone().merge(left.clone()); assert_eq!(expected, merged); } @@ -1957,7 +1978,24 @@ mod tests { let mut expected = COLLECTION.to_owned(); expected.sort_unstable(); - let merged = MusicHoard::::merge(left.clone(), right); + let merged = MusicHoard::::merge_collections( + left.clone() + .into_iter() + .map(|a| (a.id.clone(), a)) + .collect(), + right.clone(), + ); + assert_eq!(expected, merged); + + // The merge is completely non-overlapping so it should be commutative. + let merged = MusicHoard::::merge_collections( + right + .clone() + .into_iter() + .map(|a| (a.id.clone(), a)) + .collect(), + left.clone(), + ); assert_eq!(expected, merged); } @@ -1971,7 +2009,70 @@ mod tests { let mut expected = COLLECTION.to_owned(); expected.sort_unstable(); - let merged = MusicHoard::::merge(left.clone(), right); + let merged = MusicHoard::::merge_collections( + left.clone() + .into_iter() + .map(|a| (a.id.clone(), a)) + .collect(), + right.clone(), + ); + assert_eq!(expected, merged); + + // The merge does not overwrite any data so it should be commutative. + let merged = MusicHoard::::merge_collections( + right + .clone() + .into_iter() + .map(|a| (a.id.clone(), a)) + .collect(), + left.clone(), + ); + assert_eq!(expected, merged); + } + + #[test] + fn merge_collection_incompatible_sorting() { + // It may be that the same artist in one collection has a "sort" field defined while the + // same artist in the other collection does not. This means that the two collections are not + // sorted consistently. If the merge assumes they are sorted consistently this will lead to + // the same artist appearing twice in the final list. This should not be the case. + + // We will mimic this situation by taking the last artist from COLLECTION and giving it a + // sorting name that would place it in the beginning. + let left = COLLECTION.to_owned(); + let mut right: Vec = vec![left.last().unwrap().clone()]; + + assert!(right.first().unwrap() > left.first().unwrap()); + let artist_sort = Some(ArtistId::new("album_artist 0")); + right[0].sort = artist_sort.clone(); + assert!(right.first().unwrap() < left.first().unwrap()); + + // The result of the merge should be the same list of artists, but with the last artist now + // in first place. + let mut expected = left.to_owned(); + expected.last_mut().as_mut().unwrap().sort = artist_sort.clone(); + expected.rotate_right(1); + + let merged = MusicHoard::::merge_collections( + left.clone() + .into_iter() + .map(|a| (a.id.clone(), a)) + .collect(), + right.clone(), + ); + assert_eq!(expected.len(), merged.len()); + assert_eq!(expected, merged); + + // The merge overwrites the sort data, but no data is erased so it should be commutative. + let merged = MusicHoard::::merge_collections( + right + .clone() + .into_iter() + .map(|a| (a.id.clone(), a)) + .collect(), + left.clone(), + ); + assert_eq!(expected.len(), merged.len()); assert_eq!(expected, merged); }