Startup merge fails when the database has two albums with the same title as an album in the library #246
@ -38,7 +38,7 @@ jobs:
|
||||
--ignore "tests/*"
|
||||
--ignore "src/main.rs"
|
||||
--ignore "src/bin/musichoard-edit.rs"
|
||||
--excl-line "^#\[derive|unimplemented\!\(\)"
|
||||
--excl-line "^#\[derive|unimplemented\!\(|unreachable\!\("
|
||||
--excl-start "GRCOV_EXCL_START|mod tests \{"
|
||||
--excl-stop "GRCOV_EXCL_STOP"
|
||||
--output-path ./target/debug/coverage/
|
||||
|
@ -6,9 +6,9 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = { version = "1.1.2", optional = true }
|
||||
aho-corasick = { version = "1.1.2" }
|
||||
crossterm = { version = "0.28.1", optional = true}
|
||||
once_cell = { version = "1.19.0", optional = true}
|
||||
once_cell = { version = "1.19.0" }
|
||||
openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true}
|
||||
paste = { version = "1.0.15", optional = true }
|
||||
ratatui = { version = "0.28.1", optional = true}
|
||||
@ -37,7 +37,7 @@ database-json = ["serde", "serde_json"]
|
||||
library-beets = []
|
||||
library-beets-ssh = ["openssh", "tokio"]
|
||||
musicbrainz = ["paste", "reqwest", "serde", "serde_json"]
|
||||
tui = ["aho-corasick", "crossterm", "once_cell", "ratatui", "tui-input"]
|
||||
tui = ["crossterm", "ratatui", "tui-input"]
|
||||
|
||||
[[bin]]
|
||||
name = "musichoard"
|
||||
|
@ -50,7 +50,7 @@ grcov codecov/debug/profraw \
|
||||
--ignore "tests/*" \
|
||||
--ignore "src/main.rs" \
|
||||
--ignore "src/bin/musichoard-edit.rs" \
|
||||
--excl-line "^#\[derive|unimplemented\!\(\)" \
|
||||
--excl-line "^#\[derive|unimplemented\!\(|unreachable\!\(" \
|
||||
--excl-start "GRCOV_EXCL_START|mod tests \{" \
|
||||
--excl-stop "GRCOV_EXCL_STOP" \
|
||||
--output-path ./codecov/debug/coverage/
|
||||
|
@ -1,11 +1,13 @@
|
||||
use std::mem;
|
||||
|
||||
use crate::core::collection::{
|
||||
merge::{Merge, MergeName, MergeSorted},
|
||||
merge::{Merge, MergeSorted},
|
||||
musicbrainz::{MbAlbumRef, MbRefOption},
|
||||
track::{Track, TrackFormat},
|
||||
};
|
||||
|
||||
use super::string;
|
||||
|
||||
/// An album is a collection of tracks that were released together.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Album {
|
||||
@ -54,12 +56,6 @@ impl AlbumLibId {
|
||||
/// Unique database identifier. Use MBID for this purpose.
|
||||
pub type AlbumMbRef = MbRefOption<MbAlbumRef>;
|
||||
|
||||
impl MergeName for Album {
|
||||
fn name(&self) -> &str {
|
||||
&self.meta.id.title
|
||||
}
|
||||
}
|
||||
|
||||
// There are crates for handling dates, but we don't need much complexity beyond year-month-day.
|
||||
/// The album's release date.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
@ -320,7 +316,8 @@ impl AlbumId {
|
||||
}
|
||||
|
||||
pub fn compatible(&self, other: &AlbumId) -> bool {
|
||||
let titles_compatible = self.title == other.title;
|
||||
let titles_compatible =
|
||||
string::normalize_string(&self.title) == string::normalize_string(&other.title);
|
||||
let lib_id_compatible =
|
||||
self.lib_id.is_none() || other.lib_id.is_none() || (self.lib_id == other.lib_id);
|
||||
let mb_ref_compatible =
|
||||
|
@ -1,12 +1,14 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::{self, Debug, Display},
|
||||
mem,
|
||||
};
|
||||
|
||||
use crate::core::collection::{
|
||||
album::{Album, AlbumLibId},
|
||||
merge::{Merge, MergeCollections, MergeName},
|
||||
merge::{Merge, MergeCollections, NormalMap},
|
||||
musicbrainz::{MbArtistRef, MbRefOption},
|
||||
string::{self, NormalString},
|
||||
};
|
||||
|
||||
/// An artist.
|
||||
@ -30,12 +32,6 @@ pub struct ArtistInfo {
|
||||
pub properties: HashMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl MergeName for Artist {
|
||||
fn name(&self) -> &str {
|
||||
&self.meta.id.name
|
||||
}
|
||||
}
|
||||
|
||||
/// The artist identifier.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ArtistId {
|
||||
@ -46,49 +42,6 @@ pub struct ArtistId {
|
||||
/// Unique database identifier. Use MBID for this purpose.
|
||||
pub type ArtistMbRef = MbRefOption<MbArtistRef>;
|
||||
|
||||
impl Artist {
|
||||
/// Create new [`Artist`] with the given [`ArtistId`].
|
||||
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
|
||||
Artist {
|
||||
meta: ArtistMeta::new(id),
|
||||
albums: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_album_by_lib_id(
|
||||
primary_albums: &mut [Album],
|
||||
mut secondary_album: Album,
|
||||
) -> Option<Album> {
|
||||
let id_opt = secondary_album.meta.id.lib_id;
|
||||
if let id @ AlbumLibId::Value(_) | id @ AlbumLibId::Singleton = id_opt {
|
||||
let mut iter = primary_albums.iter_mut();
|
||||
if let Some(ref mut primary_album) = iter.find(|a| a.meta.id.lib_id == id) {
|
||||
primary_album.merge_in_place(secondary_album);
|
||||
return None;
|
||||
}
|
||||
secondary_album.meta.id.lib_id = AlbumLibId::None;
|
||||
}
|
||||
Some(secondary_album)
|
||||
}
|
||||
|
||||
fn merge_albums_by_lib_id(
|
||||
primary_albums: &mut [Album],
|
||||
secondary_albums: Vec<Album>,
|
||||
) -> HashMap<String, Vec<Album>> {
|
||||
let mut secondary_without_id = HashMap::<String, Vec<Album>>::new();
|
||||
for secondary_album in secondary_albums.into_iter() {
|
||||
let unmerged = Artist::merge_album_by_lib_id(primary_albums, secondary_album);
|
||||
if let Some(secondary_album) = unmerged {
|
||||
secondary_without_id
|
||||
.entry(secondary_album.meta.id.title.clone())
|
||||
.or_default()
|
||||
.push(secondary_album);
|
||||
}
|
||||
}
|
||||
secondary_without_id
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Artist {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
@ -104,10 +57,94 @@ impl Ord for Artist {
|
||||
impl Merge for Artist {
|
||||
fn merge_in_place(&mut self, other: Self) {
|
||||
self.meta.merge_in_place(other.meta);
|
||||
self.albums = MergeAlbums::merge_albums(mem::take(&mut self.albums), other.albums);
|
||||
}
|
||||
}
|
||||
|
||||
let other_without_id = Artist::merge_albums_by_lib_id(&mut self.albums, other.albums);
|
||||
MergeCollections::merge_by_name(&mut self.albums, other_without_id);
|
||||
self.albums.sort_unstable();
|
||||
#[derive(Debug, Default)]
|
||||
struct MergeAlbums {
|
||||
primary_by_lib_id: HashMap<u32, Album>,
|
||||
primary_by_singleton: HashMap<NormalString, Album>,
|
||||
primary_by_title: NormalMap<Album>,
|
||||
secondary_by_title: NormalMap<Album>,
|
||||
merged: Vec<Album>,
|
||||
}
|
||||
|
||||
impl MergeAlbums {
|
||||
fn merge_albums(primary_albums: Vec<Album>, secondary_albums: Vec<Album>) -> Vec<Album> {
|
||||
let mut cache = MergeAlbums::new(primary_albums);
|
||||
cache.merge_albums_by_lib_id(secondary_albums);
|
||||
cache.merged.extend(MergeCollections::merge_by_name(
|
||||
cache.primary_by_title,
|
||||
cache.secondary_by_title,
|
||||
));
|
||||
cache.merged.sort_unstable();
|
||||
cache.merged
|
||||
}
|
||||
|
||||
fn new(primary_albums: Vec<Album>) -> Self {
|
||||
let mut cache = MergeAlbums::default();
|
||||
for album in primary_albums.into_iter() {
|
||||
match album.meta.id.lib_id {
|
||||
AlbumLibId::Value(value) => {
|
||||
assert!(cache.primary_by_lib_id.insert(value, album).is_none())
|
||||
}
|
||||
AlbumLibId::Singleton => assert!(cache
|
||||
.primary_by_singleton
|
||||
.insert(string::normalize_string(&album.meta.id.title), album)
|
||||
.is_none()),
|
||||
_ => unreachable!("primary collection should always have a lib_id"),
|
||||
}
|
||||
}
|
||||
cache
|
||||
}
|
||||
|
||||
fn merge_albums_by_lib_id(&mut self, secondary_albums: Vec<Album>) {
|
||||
for secondary_album in secondary_albums.into_iter() {
|
||||
self.merge_album_by_lib_id(secondary_album);
|
||||
}
|
||||
for (_, primary_album) in self.primary_by_lib_id.drain() {
|
||||
self.primary_by_title.insert(
|
||||
string::normalize_string(&primary_album.meta.id.title),
|
||||
primary_album,
|
||||
);
|
||||
}
|
||||
for (title, primary_album) in self.primary_by_singleton.drain() {
|
||||
self.primary_by_title.insert(title, primary_album);
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
|
||||
let title = string::normalize_string(&secondary_album.meta.id.title);
|
||||
let id_opt = secondary_album.meta.id.lib_id;
|
||||
|
||||
let find = match id_opt {
|
||||
AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
|
||||
AlbumLibId::Singleton => self.primary_by_singleton.remove(&title),
|
||||
AlbumLibId::None => None,
|
||||
};
|
||||
|
||||
if let Some(mut primary_album) = find {
|
||||
primary_album.merge_in_place(secondary_album);
|
||||
self.merged.push(primary_album);
|
||||
return;
|
||||
}
|
||||
|
||||
secondary_album.meta.id.lib_id = AlbumLibId::None;
|
||||
self.secondary_by_title.insert(
|
||||
string::normalize_string(&secondary_album.meta.id.title),
|
||||
secondary_album,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl Artist {
|
||||
/// Create new [`Artist`] with the given [`ArtistId`].
|
||||
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
|
||||
Artist {
|
||||
meta: ArtistMeta::new(id),
|
||||
albums: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -248,7 +285,8 @@ impl ArtistId {
|
||||
}
|
||||
|
||||
pub fn compatible(&self, other: &ArtistId) -> bool {
|
||||
let names_compatible = self.name == other.name;
|
||||
let names_compatible =
|
||||
string::normalize_string(&self.name) == string::normalize_string(&other.name);
|
||||
let mb_ref_compatible =
|
||||
self.mb_ref.is_none() || other.mb_ref.is_none() || (self.mb_ref == other.mb_ref);
|
||||
names_compatible && mb_ref_compatible
|
||||
@ -263,7 +301,13 @@ impl Display for ArtistId {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core::testmod::FULL_COLLECTION;
|
||||
use crate::{
|
||||
collection::{
|
||||
album::{AlbumId, AlbumMbRef},
|
||||
musicbrainz::MbAlbumRef,
|
||||
},
|
||||
core::testmod::FULL_COLLECTION,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -533,4 +577,119 @@ mod tests {
|
||||
let merged = left.clone().merge(right);
|
||||
assert_eq!(expected, merged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "multiple secondaries unsupported")]
|
||||
fn merge_two_db_albums_to_one_lib_album() {
|
||||
let mut left = Artist::new(ArtistId::new("Artist"));
|
||||
let mut right = left.clone();
|
||||
|
||||
let album = Album::new(AlbumId::new("Album"));
|
||||
|
||||
left.albums.push(album.clone());
|
||||
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
|
||||
|
||||
right.albums.push(album.clone());
|
||||
right.albums.push(album.clone());
|
||||
|
||||
left.merge(right);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "multiple primaries unsupported")]
|
||||
fn merge_one_db_album_to_two_lib_albums() {
|
||||
let mut left = Artist::new(ArtistId::new("Artist"));
|
||||
let mut right = left.clone();
|
||||
|
||||
let album = Album::new(AlbumId::new("Album"));
|
||||
|
||||
left.albums.push(album.clone());
|
||||
left.albums.push(album.clone());
|
||||
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
|
||||
left.albums[1].meta.id.lib_id = AlbumLibId::Value(2);
|
||||
|
||||
right.albums.push(album.clone());
|
||||
|
||||
left.merge(right);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_normalized_album_titles() {
|
||||
let mut left = Artist::new(ArtistId::new("Artist"));
|
||||
let mut right = left.clone();
|
||||
|
||||
left.albums
|
||||
.push(Album::new(AlbumId::new("Album‐Title ‘Title’")));
|
||||
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
|
||||
|
||||
right
|
||||
.albums
|
||||
.push(Album::new(AlbumId::new("alBum—tiTle 'title’")));
|
||||
right
|
||||
.albums
|
||||
.push(Album::new(AlbumId::new("Album‐Title “Title”")));
|
||||
|
||||
// The first album will be merged, the second will be added.
|
||||
let mut expected = left.clone();
|
||||
expected.albums.push(right.albums.last().unwrap().clone());
|
||||
expected.albums.sort_unstable();
|
||||
|
||||
let merged = left.merge(right);
|
||||
assert_eq!(expected, merged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_multiple_singletons() {
|
||||
let mut left = Artist::new(ArtistId::new("Artist"));
|
||||
let mut right = left.clone();
|
||||
|
||||
left.albums.push(Album::new(AlbumId::new("Singleton 1")));
|
||||
left.albums.push(Album::new(AlbumId::new("Singleton 2")));
|
||||
left.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
|
||||
left.albums[1].meta.id.lib_id = AlbumLibId::Singleton;
|
||||
|
||||
right.albums.push(Album::new(AlbumId::new("Singleton 1")));
|
||||
right.albums.push(Album::new(AlbumId::new("Singleton 2")));
|
||||
right.albums.push(Album::new(AlbumId::new("Singleton 3")));
|
||||
right.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
|
||||
right.albums[2].meta.id.lib_id = AlbumLibId::Singleton;
|
||||
|
||||
// Expect first two albums to merge (including lib id) and the third to be added. However,
|
||||
// the third one will lose its lib id due to no match in the primary.
|
||||
let mut expected = left.clone();
|
||||
expected.albums.push(right.albums.last().unwrap().clone());
|
||||
expected.albums.last_mut().unwrap().meta.id.lib_id = AlbumLibId::None;
|
||||
|
||||
let merged = left.merge(right);
|
||||
assert_eq!(expected, merged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_two_db_albums_to_one_lib_album_with_ids() {
|
||||
let mut left = Artist::new(ArtistId::new("Artist"));
|
||||
let mut right = left.clone();
|
||||
|
||||
let album = Album::new(AlbumId::new("Album"));
|
||||
|
||||
left.albums.push(album.clone());
|
||||
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
|
||||
|
||||
// In this test, one DB album has a lib id that matches that of the lib album. This is the
|
||||
// one that we expect to merge while the second album should just be added.
|
||||
right.albums.push(album.clone());
|
||||
right.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
|
||||
right.albums[0].meta.id.mb_ref = AlbumMbRef::Some(
|
||||
MbAlbumRef::from_uuid_str("00000000-0000-0000-0000-000000000000").unwrap(),
|
||||
);
|
||||
|
||||
right.albums.push(album.clone());
|
||||
right.albums[1].meta.id.mb_ref = AlbumMbRef::Some(
|
||||
MbAlbumRef::from_uuid_str("11111111-1111-1111-1111-111111111111").unwrap(),
|
||||
);
|
||||
|
||||
let expected = right.clone();
|
||||
|
||||
let merged = left.merge(right);
|
||||
assert_eq!(expected, merged);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
// Date: 2024-02-19
|
||||
pub const ARTISTS: [&str; 141] = [
|
||||
// 1. `beet ls -a -f '$albumartist'`.
|
||||
// 2. `M-x delete-duplicate-lines`.
|
||||
// Date: 2025-01-03
|
||||
pub const ARTISTS: [&str; 156] = [
|
||||
"Abadden",
|
||||
"Acid Drinkers",
|
||||
"Adema",
|
||||
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Аркона",
|
||||
"Artas",
|
||||
"As I Lay Dying",
|
||||
"At the Gates",
|
||||
"Avenged Sevenfold",
|
||||
"Aversions Crown",
|
||||
"Aviators",
|
||||
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Bloodbath",
|
||||
"Bloodbound",
|
||||
"Brothers of Metal",
|
||||
"Carcass",
|
||||
"Carnation",
|
||||
"Cellar Darling",
|
||||
"Children of Bodom",
|
||||
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Dynazty",
|
||||
"Edguy",
|
||||
"Eluveitie",
|
||||
"Eminem",
|
||||
"Enforcer",
|
||||
"Ensiferum",
|
||||
"Epica",
|
||||
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Heaven’s Basement",
|
||||
"Heavy Load",
|
||||
"Hermh",
|
||||
"Ignea",
|
||||
"Immortal",
|
||||
"In Flames",
|
||||
"Insomnium",
|
||||
"Iron Maiden",
|
||||
"Judas Priest",
|
||||
"Kalmah",
|
||||
"Kataklysm",
|
||||
"Kontrust",
|
||||
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Linkin Park",
|
||||
"Lost Dreams",
|
||||
"Man Must Die",
|
||||
"Månegarm",
|
||||
"Me and That Man",
|
||||
"Megaton Sword",
|
||||
"Mercyful Fate",
|
||||
"Metal Church",
|
||||
"Metallica",
|
||||
"Michael Jackson",
|
||||
"Miracle of Sound",
|
||||
"Misery Index",
|
||||
"Mortal Sin",
|
||||
"Mudvayne",
|
||||
"Månegarm",
|
||||
"Nickelback",
|
||||
"Nightwish",
|
||||
"Nile",
|
||||
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Oomph!",
|
||||
"P.O.D.",
|
||||
"Paddy and the Rats",
|
||||
"Pain",
|
||||
"Paul Stanley",
|
||||
"Persefone",
|
||||
"Peyton Parrish",
|
||||
"Powerwolf",
|
||||
"Primitai",
|
||||
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Rammstein",
|
||||
"Red Hot Chili Peppers",
|
||||
"Revocation",
|
||||
"Ride the Sky",
|
||||
"Rob Zombie",
|
||||
"Sabaton",
|
||||
"Saltatio Mortis",
|
||||
"Saltatio Mortis & Lara Loft",
|
||||
"Satan",
|
||||
"Savatage",
|
||||
"Scar Symmetry",
|
||||
"Scars on Broadway",
|
||||
"Scorpions",
|
||||
"Silent Descent",
|
||||
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
|
||||
"Timecry",
|
||||
"Trivium",
|
||||
"Tuomas Holopainen",
|
||||
"VNV Nation",
|
||||
"Turisas",
|
||||
"Vader",
|
||||
"Vesania",
|
||||
"Vicious Crusade",
|
||||
"The Wages of Sin",
|
||||
"Vintersorg",
|
||||
"VNV Nation",
|
||||
"W.A.S.P.",
|
||||
"Whitechapel",
|
||||
"Within Temptation",
|
||||
"Woe of Tyrants",
|
||||
"Wovenwar",
|
||||
"Xandria",
|
||||
"Jonathan Young",
|
||||
"Jonathan Young, Peyton Parrish & Colm R. McGuinness",
|
||||
];
|
@ -1,4 +1,6 @@
|
||||
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable, marker::PhantomData};
|
||||
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
|
||||
|
||||
use crate::core::collection::string::NormalString;
|
||||
|
||||
/// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
|
||||
/// the primary whose properties are to be kept in case of collisions.
|
||||
@ -80,30 +82,49 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MergeName {
|
||||
fn name(&self) -> &str;
|
||||
#[derive(Debug)]
|
||||
pub struct NormalMap<T>(HashMap<NormalString, Vec<T>>);
|
||||
|
||||
impl<T> Default for NormalMap<T> {
|
||||
fn default() -> Self {
|
||||
NormalMap(HashMap::default())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MergeCollections<T, IT> {
|
||||
_t: PhantomData<T>,
|
||||
_it: PhantomData<IT>,
|
||||
impl<T> NormalMap<T> {
|
||||
pub fn new() -> Self {
|
||||
NormalMap(HashMap::new())
|
||||
}
|
||||
|
||||
impl<T, IT> MergeCollections<T, IT>
|
||||
where
|
||||
T: MergeName + Merge,
|
||||
IT: IntoIterator<Item = (String, Vec<T>)>,
|
||||
{
|
||||
pub fn merge_by_name(primary_items: &mut Vec<T>, secondary: IT) {
|
||||
for (name, mut secondary_items) in secondary.into_iter() {
|
||||
match primary_items.iter_mut().find(|item| item.name() == name) {
|
||||
Some(ref mut primary_item) => {
|
||||
// We do not support merging multiple DB items with same name yet.
|
||||
assert_eq!(secondary_items.len(), 1);
|
||||
pub fn insert(&mut self, key: NormalString, item: T) {
|
||||
self.0.entry(key).or_default().push(item);
|
||||
}
|
||||
|
||||
fn remove(&mut self, key: &NormalString) -> Option<Vec<T>> {
|
||||
self.0.remove(key)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MergeCollections;
|
||||
|
||||
impl MergeCollections {
|
||||
pub fn merge_by_name<T: Merge>(mut primary: NormalMap<T>, secondary: NormalMap<T>) -> Vec<T> {
|
||||
let mut merged = vec![];
|
||||
for (title, mut secondary_items) in secondary.0.into_iter() {
|
||||
match primary.remove(&title) {
|
||||
Some(mut primary_items) => {
|
||||
// We do not support merging multiple items with same name yet. Support will be
|
||||
// added once encountered in the wild.
|
||||
assert_eq!(primary_items.len(), 1, "multiple primaries unsupported");
|
||||
assert_eq!(secondary_items.len(), 1, "multiple secondaries unsupported");
|
||||
let mut primary_item = primary_items.pop().unwrap();
|
||||
primary_item.merge_in_place(secondary_items.pop().unwrap());
|
||||
merged.push(primary_item);
|
||||
}
|
||||
None => primary_items.extend(secondary_items),
|
||||
None => merged.extend(secondary_items),
|
||||
}
|
||||
}
|
||||
merged.extend(primary.0.into_values().flatten());
|
||||
merged
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ pub mod album;
|
||||
pub mod artist;
|
||||
pub mod merge;
|
||||
pub mod musicbrainz;
|
||||
pub mod string;
|
||||
pub mod track;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
|
||||
Error::MbidError(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benchmod;
|
||||
|
81
src/core/collection/string.rs
Normal file
81
src/core/collection/string.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use aho_corasick::AhoCorasick;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
// Unlikely that this covers all possible strings, but it should at least cover strings
|
||||
// relevant for music (at least in English). The list of characters handled is based on
|
||||
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
|
||||
//
|
||||
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
|
||||
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
|
||||
const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−'];
|
||||
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
|
||||
static AC: Lazy<AhoCorasick> =
|
||||
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
|
||||
|
||||
pub fn is_case_sensitive(string: &str) -> bool {
|
||||
string
|
||||
.chars()
|
||||
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
|
||||
}
|
||||
|
||||
pub fn is_char_sensitive(string: &str) -> bool {
|
||||
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
|
||||
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
|
||||
string.chars().any(|ch| SPECIAL.contains(&ch))
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct NormalString {
|
||||
pub string: String,
|
||||
}
|
||||
|
||||
pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString {
|
||||
let string = if asciify {
|
||||
if lowercase {
|
||||
AC.replace_all(&string.to_lowercase(), &REPLACE)
|
||||
} else {
|
||||
AC.replace_all(string, &REPLACE)
|
||||
}
|
||||
} else if lowercase {
|
||||
string.to_lowercase()
|
||||
} else {
|
||||
string.to_owned()
|
||||
};
|
||||
NormalString { string }
|
||||
}
|
||||
|
||||
pub fn normalize_string(string: &str) -> NormalString {
|
||||
NormalString {
|
||||
string: AC.replace_all(&string.to_lowercase(), &REPLACE),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
|
||||
// The winners are left to allow comparison with future alternatives.
|
||||
use test::Bencher;
|
||||
|
||||
use crate::core::collection::benchmod::ARTISTS;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[bench]
|
||||
fn bench_is_char_sensitive(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_normalize_string_with(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true)))
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_normalize_string(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap())))
|
||||
}
|
||||
}
|
@ -1,11 +1,9 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::core::{
|
||||
collection::{
|
||||
album::{Album, AlbumId},
|
||||
artist::{Artist, ArtistId},
|
||||
merge::MergeCollections,
|
||||
Collection,
|
||||
merge::{MergeCollections, NormalMap},
|
||||
string, Collection,
|
||||
},
|
||||
musichoard::{Error, MusicHoard},
|
||||
};
|
||||
@ -59,20 +57,21 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
|
||||
}
|
||||
|
||||
fn merge_collections(&self) -> Collection {
|
||||
let mut primary = self.library_cache.clone();
|
||||
let mut secondary = HashMap::<String, Vec<Artist>>::new();
|
||||
let mut primary = NormalMap::<Artist>::new();
|
||||
let mut secondary = NormalMap::<Artist>::new();
|
||||
|
||||
for artist in self.database_cache.iter().cloned() {
|
||||
secondary
|
||||
.entry(artist.meta.id.name.clone())
|
||||
.or_default()
|
||||
.push(artist);
|
||||
for artist in self.library_cache.iter().cloned() {
|
||||
primary.insert(string::normalize_string(&artist.meta.id.name), artist);
|
||||
}
|
||||
|
||||
MergeCollections::merge_by_name(&mut primary, secondary);
|
||||
primary.sort_unstable();
|
||||
for artist in self.database_cache.iter().cloned() {
|
||||
secondary.insert(string::normalize_string(&artist.meta.id.name), artist);
|
||||
}
|
||||
|
||||
primary
|
||||
let mut collection = MergeCollections::merge_by_name(primary, secondary);
|
||||
collection.sort_unstable();
|
||||
|
||||
collection
|
||||
}
|
||||
|
||||
fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> {
|
||||
@ -224,4 +223,71 @@ mod tests {
|
||||
mh.collection = mh.merge_collections();
|
||||
assert_eq!(expected, mh.collection);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "multiple secondaries unsupported")]
|
||||
fn merge_two_db_artists_to_one_lib_artist() {
|
||||
let mut left = Collection::new();
|
||||
let mut right = Collection::new();
|
||||
|
||||
let artist = Artist::new(ArtistId::new("Artist"));
|
||||
|
||||
left.push(artist.clone());
|
||||
right.push(artist.clone());
|
||||
right.push(artist.clone());
|
||||
|
||||
let mut mh = MusicHoard {
|
||||
library_cache: left.clone(),
|
||||
database_cache: right.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
mh.collection = mh.merge_collections();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "multiple primaries unsupported")]
|
||||
fn merge_one_db_artist_to_two_lib_artists() {
|
||||
let mut left = Collection::new();
|
||||
let mut right = Collection::new();
|
||||
|
||||
let artist = Artist::new(ArtistId::new("Artist"));
|
||||
|
||||
left.push(artist.clone());
|
||||
left.push(artist.clone());
|
||||
right.push(artist.clone());
|
||||
|
||||
let mut mh = MusicHoard {
|
||||
library_cache: left.clone(),
|
||||
database_cache: right.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
mh.collection = mh.merge_collections();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_normalized_artist_names() {
|
||||
let mut left = Collection::new();
|
||||
let mut right = Collection::new();
|
||||
|
||||
left.push(Artist::new(ArtistId::new("Artist‐Name ‘Name’")));
|
||||
|
||||
right.push(Artist::new(ArtistId::new("arTist—naMe 'name’")));
|
||||
right.push(Artist::new(ArtistId::new("Artist‐Name “Name”")));
|
||||
|
||||
// The first artist will be merged, the second will be added.
|
||||
let mut expected = left.clone();
|
||||
expected.push(right.last().unwrap().clone());
|
||||
expected.sort_unstable();
|
||||
|
||||
let mut mh = MusicHoard {
|
||||
library_cache: left.clone(),
|
||||
database_cache: right.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
mh.collection = mh.merge_collections();
|
||||
assert_eq!(expected, mh.collection);
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,7 @@
|
||||
//! MusicHoard - a music collection manager.
|
||||
#![cfg_attr(nightly, feature(test))]
|
||||
#[cfg(nightly)]
|
||||
extern crate test;
|
||||
|
||||
mod core;
|
||||
pub mod external;
|
||||
|
@ -1,7 +1,3 @@
|
||||
#![cfg_attr(nightly, feature(test))]
|
||||
#[cfg(nightly)]
|
||||
extern crate test;
|
||||
|
||||
mod tui;
|
||||
|
||||
use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};
|
||||
|
@ -601,7 +601,3 @@ mod tests {
|
||||
app.unwrap_critical();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benchmod;
|
||||
|
@ -1,7 +1,4 @@
|
||||
use aho_corasick::AhoCorasick;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use musichoard::collection::{album::Album, artist::Artist, track::Track};
|
||||
use musichoard::collection::{album::Album, artist::Artist, string, track::Track};
|
||||
|
||||
use crate::tui::app::{
|
||||
machine::{App, AppInner, AppMachine},
|
||||
@ -9,17 +6,6 @@ use crate::tui::app::{
|
||||
AppPublicState, AppState, Category, IAppInteractSearch,
|
||||
};
|
||||
|
||||
// Unlikely that this covers all possible strings, but it should at least cover strings
|
||||
// relevant for music (at least in English). The list of characters handled is based on
|
||||
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
|
||||
//
|
||||
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
|
||||
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
|
||||
const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−'];
|
||||
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
|
||||
static AC: Lazy<AhoCorasick> =
|
||||
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
|
||||
|
||||
pub struct SearchState {
|
||||
string: String,
|
||||
orig: ListSelection,
|
||||
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
|
||||
fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
|
||||
fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
|
||||
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
|
||||
|
||||
fn is_case_sensitive(artist_name: &str) -> bool;
|
||||
fn is_char_sensitive(artist_name: &str) -> bool;
|
||||
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
|
||||
}
|
||||
|
||||
impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
where
|
||||
P: FnMut(bool, bool, &str, &T) -> bool,
|
||||
{
|
||||
let case_sens = Self::is_case_sensitive(name);
|
||||
let char_sens = Self::is_char_sensitive(name);
|
||||
let search = Self::normalize_search(name, !case_sens, !char_sens);
|
||||
let case_sens = string::is_case_sensitive(name);
|
||||
let char_sens = string::is_char_sensitive(name);
|
||||
let search = string::normalize_string_with(name, !case_sens, !char_sens);
|
||||
|
||||
let mut index = st.index;
|
||||
if next && ((index + 1) < st.list.len()) {
|
||||
@ -172,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
let slice = &st.list[index..];
|
||||
slice
|
||||
.iter()
|
||||
.position(|probe| pred(case_sens, char_sens, &search, probe))
|
||||
.position(|probe| pred(case_sens, char_sens, &search.string, probe))
|
||||
.map(|slice_index| index + slice_index)
|
||||
}
|
||||
|
||||
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
|
||||
let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens);
|
||||
let mut result = name.starts_with(search);
|
||||
let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens);
|
||||
let mut result = name.string.starts_with(search);
|
||||
|
||||
if let Some(ref probe_sort) = probe.meta.sort {
|
||||
if !result {
|
||||
let name = Self::normalize_search(probe_sort, !case_sens, !char_sens);
|
||||
result = name.starts_with(search);
|
||||
let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens);
|
||||
result = name.string.starts_with(search);
|
||||
}
|
||||
}
|
||||
|
||||
@ -199,33 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
|
||||
}
|
||||
|
||||
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
|
||||
Self::normalize_search(title, !case_sens, !char_sens).starts_with(search)
|
||||
}
|
||||
|
||||
fn is_case_sensitive(artist_name: &str) -> bool {
|
||||
artist_name
|
||||
.chars()
|
||||
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
|
||||
}
|
||||
|
||||
fn is_char_sensitive(artist_name: &str) -> bool {
|
||||
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
|
||||
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
|
||||
artist_name.chars().any(|ch| SPECIAL.contains(&ch))
|
||||
}
|
||||
|
||||
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
|
||||
if asciify {
|
||||
if lowercase {
|
||||
AC.replace_all(&search.to_lowercase(), &REPLACE)
|
||||
} else {
|
||||
AC.replace_all(search, &REPLACE)
|
||||
}
|
||||
} else if lowercase {
|
||||
search.to_lowercase()
|
||||
} else {
|
||||
search.to_owned()
|
||||
}
|
||||
string::normalize_string_with(title, !case_sens, !char_sens)
|
||||
.string
|
||||
.starts_with(search)
|
||||
}
|
||||
}
|
||||
|
||||
@ -544,28 +502,3 @@ mod tests {
|
||||
assert_eq!(browse.inner.selection.selected(), None);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(nightly)]
|
||||
#[cfg(test)]
|
||||
mod benches {
|
||||
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
|
||||
use test::Bencher;
|
||||
|
||||
use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
|
||||
|
||||
use super::*;
|
||||
|
||||
type Search = AppMachine<MockIMusicHoard, SearchState>;
|
||||
|
||||
#[bench]
|
||||
fn is_char_sensitive(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn normalize_search(b: &mut Bencher) {
|
||||
let mut iter = ARTISTS.iter().cycle();
|
||||
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user