Artists with a _sort field show up twice #109

Merged
wojtek merged 8 commits from 108---artists-with-a-_sort-field-show-up-twice into main 2024-01-14 15:46:34 +01:00
2 changed files with 184 additions and 81 deletions

View File

@ -19,7 +19,9 @@ cargo install grcov
```sh
env CARGO_TARGET_DIR=codecov \
cargo clean
rm -rf ./codecov/debug/{coverage,profraw}
env CARGO_TARGET_DIR=codecov \
cargo clean -p musichoard
env RUSTFLAGS="-C instrument-coverage" \
LLVM_PROFILE_FILE="codecov/debug/profraw/musichoard-%p-%m.profraw" \
CARGO_TARGET_DIR=codecov \

View File

@ -5,7 +5,7 @@ pub mod library;
use std::{
cmp::Ordering,
collections::{HashMap, HashSet},
collections::HashMap,
fmt::{self, Debug, Display},
iter::Peekable,
mem,
@ -287,9 +287,8 @@ impl Ord for Track {
}
impl Merge for Track {
fn merge(self, other: Self) -> Self {
fn merge_in_place(&mut self, other: Self) {
assert_eq!(self.id, other.id);
self
}
}
@ -320,10 +319,10 @@ impl Ord for Album {
}
impl Merge for Album {
fn merge(mut self, other: Self) -> Self {
fn merge_in_place(&mut self, other: Self) {
assert_eq!(self.id, other.id);
self.tracks = MergeSorted::new(self.tracks.into_iter(), other.tracks.into_iter()).collect();
self
let tracks = mem::take(&mut self.tracks);
self.tracks = MergeSorted::new(tracks.into_iter(), other.tracks.into_iter()).collect();
}
}
@ -361,13 +360,11 @@ pub struct ArtistProperties {
}
impl Merge for ArtistProperties {
fn merge(mut self, other: Self) -> Self {
self.musicbrainz = Self::merge_opts(self.musicbrainz, other.musicbrainz);
self.musicbutler = Self::merge_vecs(self.musicbutler, other.musicbutler);
self.bandcamp = Self::merge_vecs(self.bandcamp, other.bandcamp);
self.qobuz = Self::merge_opts(self.qobuz, other.qobuz);
self
fn merge_in_place(&mut self, other: Self) {
self.musicbrainz = self.musicbrainz.take().or(other.musicbrainz);
Self::merge_vecs(&mut self.musicbutler, other.musicbutler);
Self::merge_vecs(&mut self.bandcamp, other.bandcamp);
self.qobuz = self.qobuz.take().or(other.qobuz);
}
}
@ -566,12 +563,12 @@ impl Ord for Artist {
}
impl Merge for Artist {
fn merge(mut self, other: Self) -> Self {
fn merge_in_place(&mut self, other: Self) {
assert_eq!(self.id, other.id);
self.sort = Self::merge_opts(self.sort, other.sort);
self.properties = self.properties.merge(other.properties);
self.albums = MergeSorted::new(self.albums.into_iter(), other.albums.into_iter()).collect();
self
self.sort = self.sort.take().or(other.sort);
self.properties.merge_in_place(other.properties);
let albums = mem::take(&mut self.albums);
self.albums = MergeSorted::new(albums.into_iter(), other.albums.into_iter()).collect();
}
}
@ -579,20 +576,20 @@ impl Merge for Artist {
pub type Collection = Vec<Artist>;
trait Merge {
fn merge(self, other: Self) -> Self;
fn merge_in_place(&mut self, other: Self);
fn merge_opts<T>(this: Option<T>, other: Option<T>) -> Option<T> {
match &this {
Some(_) => this,
None => other,
}
fn merge(mut self, other: Self) -> Self
where
Self: Sized,
{
self.merge_in_place(other);
self
}
fn merge_vecs<T: Ord + Eq>(mut this: Vec<T>, mut other: Vec<T>) -> Vec<T> {
fn merge_vecs<T: Ord + Eq>(this: &mut Vec<T>, mut other: Vec<T>) {
this.append(&mut other);
this.sort_unstable();
this.dedup();
this
}
}
@ -815,10 +812,8 @@ impl<LIB, DB> MusicHoard<LIB, DB> {
let artist_id: ArtistId = artist_id.into();
if self.get_artist(&artist_id).is_none() {
let new_artist = vec![Artist::new(artist_id)];
let collection = mem::take(&mut self.collection);
self.collection = Self::merge(collection, new_artist);
self.collection.push(Artist::new(artist_id));
Self::sort_artists(&mut self.collection);
}
}
@ -860,8 +855,16 @@ impl<LIB, DB> MusicHoard<LIB, DB> {
music_hoard_unique_url_dispatch!(qobuz);
fn sort(collection: &mut [Artist]) {
Self::sort_artists(collection);
Self::sort_albums_and_tracks(collection.iter_mut());
}
fn sort_artists(collection: &mut [Artist]) {
collection.sort_unstable();
for artist in collection.iter_mut() {
}
fn sort_albums_and_tracks<'a, COL: Iterator<Item = &'a mut Artist>>(collection: COL) {
for artist in collection {
artist.albums.sort_unstable();
for album in artist.albums.iter_mut() {
album.tracks.sort_unstable();
@ -869,13 +872,38 @@ impl<LIB, DB> MusicHoard<LIB, DB> {
}
}
fn merge(primary: Vec<Artist>, secondary: Vec<Artist>) -> Vec<Artist> {
MergeSorted::new(primary.into_iter(), secondary.into_iter()).collect()
fn merge_with_primary(&mut self, primary: HashMap<ArtistId, Artist>) {
let collection = mem::take(&mut self.collection);
self.collection = Self::merge_collections(primary, collection);
}
fn items_to_artists(items: Vec<Item>) -> Result<Vec<Artist>, Error> {
let mut artists: Vec<Artist> = vec![];
let mut album_ids = HashMap::<ArtistId, HashSet<AlbumId>>::new();
fn merge_with_secondary<SEC: IntoIterator<Item = Artist>>(&mut self, secondary: SEC) {
let primary_map: HashMap<ArtistId, Artist> = self
.collection
.drain(..)
.map(|a| (a.id.clone(), a))
.collect();
self.collection = Self::merge_collections(primary_map, secondary);
}
fn merge_collections<SEC: IntoIterator<Item = Artist>>(
mut primary: HashMap<ArtistId, Artist>,
secondary: SEC,
) -> Collection {
for secondary_artist in secondary.into_iter() {
if let Some(ref mut primary_artist) = primary.get_mut(&secondary_artist.id) {
primary_artist.merge_in_place(secondary_artist);
} else {
primary.insert(secondary_artist.id.clone(), secondary_artist);
}
}
let mut collection: Collection = primary.into_values().collect();
Self::sort_artists(&mut collection);
collection
}
fn items_to_artists(items: Vec<Item>) -> Result<HashMap<ArtistId, Artist>, Error> {
let mut collection = HashMap::<ArtistId, Artist>::new();
for item in items.into_iter() {
let artist_id = ArtistId {
@ -901,19 +929,14 @@ impl<LIB, DB> MusicHoard<LIB, DB> {
},
};
let artist = if album_ids.contains_key(&artist_id) {
// Assume results are in some order which means they will likely be grouped by
// artist. Therefore, we look from the back since the last inserted artist is most
// likely the one we are looking for.
artists
.iter_mut()
.rev()
.find(|a| a.id == artist_id)
.unwrap()
} else {
album_ids.insert(artist_id.clone(), HashSet::<AlbumId>::new());
artists.push(Artist::new(artist_id.clone()));
artists.last_mut().unwrap()
// There are usually many entries per artist. Therefore, we avoid simply calling
// .entry(artist_id.clone()).or_insert_with(..), because of the clone. The flipside is
// that insertions will thus do an additional lookup.
let artist = match collection.get_mut(&artist_id) {
Some(artist) => artist,
None => collection
.entry(artist_id.clone())
.or_insert_with(|| Artist::new(artist_id)),
};
if artist.sort.is_some() {
@ -929,30 +952,23 @@ impl<LIB, DB> MusicHoard<LIB, DB> {
artist.sort = artist_sort;
}
if album_ids[&artist_id].contains(&album_id) {
// Assume results are in some order which means they will likely be grouped by
// album. Therefore, we look from the back since the last inserted album is most
// likely the one we are looking for.
let album = artist
.albums
.iter_mut()
.rev()
.find(|a| a.id == album_id)
.unwrap();
album.tracks.push(track);
} else {
album_ids
.get_mut(&artist_id)
.unwrap()
.insert(album_id.clone());
artist.albums.push(Album {
// Do a linear search as few artists have more than a handful of albums. Search from the
// back as the original items vector is usually already sorted.
match artist
.albums
.iter_mut()
.rev()
.find(|album| album.id == album_id)
{
Some(album) => album.tracks.push(track),
None => artist.albums.push(Album {
id: album_id,
tracks: vec![track],
});
}),
}
}
Ok(artists)
Ok(collection)
}
fn get_artist(&self, artist_id: &ArtistId) -> Option<&Artist> {
@ -975,11 +991,9 @@ impl<LIB: ILibrary, DB> MusicHoard<LIB, DB> {
pub fn rescan_library(&mut self) -> Result<(), Error> {
let items = self.library.list(&Query::new())?;
let mut library_collection = Self::items_to_artists(items)?;
Self::sort(&mut library_collection);
let collection = mem::take(&mut self.collection);
self.collection = Self::merge(library_collection, collection);
Self::sort_albums_and_tracks(library_collection.values_mut());
self.merge_with_primary(library_collection);
Ok(())
}
}
@ -988,11 +1002,9 @@ impl<LIB, DB: IDatabase> MusicHoard<LIB, DB> {
/// Load the database and merge with the in-memory collection.
pub fn load_from_database(&mut self) -> Result<(), Error> {
let mut database_collection = self.database.load()?;
Self::sort(&mut database_collection);
let collection = mem::take(&mut self.collection);
self.collection = Self::merge(collection, database_collection);
Self::sort_albums_and_tracks(database_collection.iter_mut());
self.merge_with_secondary(database_collection);
Ok(())
}
@ -1893,7 +1905,11 @@ mod tests {
expected.tracks.append(&mut right.tracks.clone());
expected.tracks.sort_unstable();
let merged = left.clone().merge(right);
let merged = left.clone().merge(right.clone());
assert_eq!(expected, merged);
// Non-overlapping merge should be commutative.
let merged = right.clone().merge(left.clone());
assert_eq!(expected, merged);
}
@ -1919,13 +1935,18 @@ mod tests {
let left = COLLECTION[0].to_owned();
let mut right = COLLECTION[1].to_owned();
right.id = left.id.clone();
right.properties = ArtistProperties::default();
let mut expected = left.clone();
expected.properties = expected.properties.merge(right.clone().properties);
expected.albums.append(&mut right.albums.clone());
expected.albums.sort_unstable();
let merged = left.clone().merge(right);
let merged = left.clone().merge(right.clone());
assert_eq!(expected, merged);
// Non-overlapping merge should be commutative.
let merged = right.clone().merge(left.clone());
assert_eq!(expected, merged);
}
@ -1957,7 +1978,24 @@ mod tests {
let mut expected = COLLECTION.to_owned();
expected.sort_unstable();
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge(left.clone(), right);
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge_collections(
left.clone()
.into_iter()
.map(|a| (a.id.clone(), a))
.collect(),
right.clone(),
);
assert_eq!(expected, merged);
// The merge is completely non-overlapping so it should be commutative.
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge_collections(
right
.clone()
.into_iter()
.map(|a| (a.id.clone(), a))
.collect(),
left.clone(),
);
assert_eq!(expected, merged);
}
@ -1971,7 +2009,70 @@ mod tests {
let mut expected = COLLECTION.to_owned();
expected.sort_unstable();
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge(left.clone(), right);
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge_collections(
left.clone()
.into_iter()
.map(|a| (a.id.clone(), a))
.collect(),
right.clone(),
);
assert_eq!(expected, merged);
// The merge does not overwrite any data so it should be commutative.
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge_collections(
right
.clone()
.into_iter()
.map(|a| (a.id.clone(), a))
.collect(),
left.clone(),
);
assert_eq!(expected, merged);
}
#[test]
fn merge_collection_incompatible_sorting() {
// It may be that the same artist in one collection has a "sort" field defined while the
// same artist in the other collection does not. This means that the two collections are not
// sorted consistently. If the merge assumes they are sorted consistently this will lead to
// the same artist appearing twice in the final list. This should not be the case.
// We will mimic this situation by taking the last artist from COLLECTION and giving it a
// sorting name that would place it in the beginning.
let left = COLLECTION.to_owned();
let mut right: Vec<Artist> = vec![left.last().unwrap().clone()];
assert!(right.first().unwrap() > left.first().unwrap());
let artist_sort = Some(ArtistId::new("album_artist 0"));
right[0].sort = artist_sort.clone();
assert!(right.first().unwrap() < left.first().unwrap());
// The result of the merge should be the same list of artists, but with the last artist now
// in first place.
let mut expected = left.to_owned();
expected.last_mut().as_mut().unwrap().sort = artist_sort.clone();
expected.rotate_right(1);
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge_collections(
left.clone()
.into_iter()
.map(|a| (a.id.clone(), a))
.collect(),
right.clone(),
);
assert_eq!(expected.len(), merged.len());
assert_eq!(expected, merged);
// The merge overwrites the sort data, but no data is erased so it should be commutative.
let merged = MusicHoard::<NoLibrary, NoDatabase>::merge_collections(
right
.clone()
.into_iter()
.map(|a| (a.id.clone(), a))
.collect(),
left.clone(),
);
assert_eq!(expected.len(), merged.len());
assert_eq!(expected, merged);
}