Startup merge fails when the database has two albums with the same title as an album in the library #246

Merged
wojtek merged 11 commits from 245---startup-merge-fails-when-the-database-has-two-albums-with-the-same-title-as-an-album-in-the-library into main 2025-01-03 17:46:55 +01:00
14 changed files with 476 additions and 202 deletions

View File

@ -38,7 +38,7 @@ jobs:
--ignore "tests/*"
--ignore "src/main.rs"
--ignore "src/bin/musichoard-edit.rs"
--excl-line "^#\[derive|unimplemented\!\(\)"
--excl-line "^#\[derive|unimplemented\!\(|unreachable\!\("
--excl-start "GRCOV_EXCL_START|mod tests \{"
--excl-stop "GRCOV_EXCL_STOP"
--output-path ./target/debug/coverage/

View File

@ -6,9 +6,9 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
aho-corasick = { version = "1.1.2", optional = true }
aho-corasick = { version = "1.1.2" }
crossterm = { version = "0.28.1", optional = true}
once_cell = { version = "1.19.0", optional = true}
once_cell = { version = "1.19.0" }
openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true}
paste = { version = "1.0.15", optional = true }
ratatui = { version = "0.28.1", optional = true}
@ -37,7 +37,7 @@ database-json = ["serde", "serde_json"]
library-beets = []
library-beets-ssh = ["openssh", "tokio"]
musicbrainz = ["paste", "reqwest", "serde", "serde_json"]
tui = ["aho-corasick", "crossterm", "once_cell", "ratatui", "tui-input"]
tui = ["crossterm", "ratatui", "tui-input"]
[[bin]]
name = "musichoard"

View File

@ -50,7 +50,7 @@ grcov codecov/debug/profraw \
--ignore "tests/*" \
--ignore "src/main.rs" \
--ignore "src/bin/musichoard-edit.rs" \
--excl-line "^#\[derive|unimplemented\!\(\)" \
--excl-line "^#\[derive|unimplemented\!\(|unreachable\!\(" \
--excl-start "GRCOV_EXCL_START|mod tests \{" \
--excl-stop "GRCOV_EXCL_STOP" \
--output-path ./codecov/debug/coverage/

View File

@ -1,11 +1,13 @@
use std::mem;
use crate::core::collection::{
merge::{Merge, MergeName, MergeSorted},
merge::{Merge, MergeSorted},
musicbrainz::{MbAlbumRef, MbRefOption},
track::{Track, TrackFormat},
};
use super::string;
/// An album is a collection of tracks that were released together.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Album {
@ -54,12 +56,6 @@ impl AlbumLibId {
/// Unique database identifier. Use MBID for this purpose.
pub type AlbumMbRef = MbRefOption<MbAlbumRef>;
impl MergeName for Album {
fn name(&self) -> &str {
&self.meta.id.title
}
}
// There are crates for handling dates, but we don't need much complexity beyond year-month-day.
/// The album's release date.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
@ -320,7 +316,8 @@ impl AlbumId {
}
pub fn compatible(&self, other: &AlbumId) -> bool {
let titles_compatible = self.title == other.title;
let titles_compatible =
string::normalize_string(&self.title) == string::normalize_string(&other.title);
let lib_id_compatible =
self.lib_id.is_none() || other.lib_id.is_none() || (self.lib_id == other.lib_id);
let mb_ref_compatible =

View File

@ -1,12 +1,14 @@
use std::{
collections::HashMap,
fmt::{self, Debug, Display},
mem,
};
use crate::core::collection::{
album::{Album, AlbumLibId},
merge::{Merge, MergeCollections, MergeName},
merge::{Merge, MergeCollections, NormalMap},
musicbrainz::{MbArtistRef, MbRefOption},
string::{self, NormalString},
};
/// An artist.
@ -30,12 +32,6 @@ pub struct ArtistInfo {
pub properties: HashMap<String, Vec<String>>,
}
impl MergeName for Artist {
fn name(&self) -> &str {
&self.meta.id.name
}
}
/// The artist identifier.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ArtistId {
@ -46,49 +42,6 @@ pub struct ArtistId {
/// Unique database identifier. Use MBID for this purpose.
pub type ArtistMbRef = MbRefOption<MbArtistRef>;
impl Artist {
/// Create new [`Artist`] with the given [`ArtistId`].
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
Artist {
meta: ArtistMeta::new(id),
albums: vec![],
}
}
fn merge_album_by_lib_id(
primary_albums: &mut [Album],
mut secondary_album: Album,
) -> Option<Album> {
let id_opt = secondary_album.meta.id.lib_id;
if let id @ AlbumLibId::Value(_) | id @ AlbumLibId::Singleton = id_opt {
let mut iter = primary_albums.iter_mut();
if let Some(ref mut primary_album) = iter.find(|a| a.meta.id.lib_id == id) {
primary_album.merge_in_place(secondary_album);
return None;
}
secondary_album.meta.id.lib_id = AlbumLibId::None;
}
Some(secondary_album)
}
fn merge_albums_by_lib_id(
primary_albums: &mut [Album],
secondary_albums: Vec<Album>,
) -> HashMap<String, Vec<Album>> {
let mut secondary_without_id = HashMap::<String, Vec<Album>>::new();
for secondary_album in secondary_albums.into_iter() {
let unmerged = Artist::merge_album_by_lib_id(primary_albums, secondary_album);
if let Some(secondary_album) = unmerged {
secondary_without_id
.entry(secondary_album.meta.id.title.clone())
.or_default()
.push(secondary_album);
}
}
secondary_without_id
}
}
impl PartialOrd for Artist {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
@ -104,10 +57,94 @@ impl Ord for Artist {
impl Merge for Artist {
fn merge_in_place(&mut self, other: Self) {
self.meta.merge_in_place(other.meta);
self.albums = MergeAlbums::merge_albums(mem::take(&mut self.albums), other.albums);
}
}
let other_without_id = Artist::merge_albums_by_lib_id(&mut self.albums, other.albums);
MergeCollections::merge_by_name(&mut self.albums, other_without_id);
self.albums.sort_unstable();
#[derive(Debug, Default)]
struct MergeAlbums {
primary_by_lib_id: HashMap<u32, Album>,
primary_by_singleton: HashMap<NormalString, Album>,
primary_by_title: NormalMap<Album>,
secondary_by_title: NormalMap<Album>,
merged: Vec<Album>,
}
impl MergeAlbums {
fn merge_albums(primary_albums: Vec<Album>, secondary_albums: Vec<Album>) -> Vec<Album> {
let mut cache = MergeAlbums::new(primary_albums);
cache.merge_albums_by_lib_id(secondary_albums);
cache.merged.extend(MergeCollections::merge_by_name(
cache.primary_by_title,
cache.secondary_by_title,
));
cache.merged.sort_unstable();
cache.merged
}
fn new(primary_albums: Vec<Album>) -> Self {
let mut cache = MergeAlbums::default();
for album in primary_albums.into_iter() {
match album.meta.id.lib_id {
AlbumLibId::Value(value) => {
assert!(cache.primary_by_lib_id.insert(value, album).is_none())
}
AlbumLibId::Singleton => assert!(cache
.primary_by_singleton
.insert(string::normalize_string(&album.meta.id.title), album)
.is_none()),
_ => unreachable!("primary collection should always have a lib_id"),
}
}
cache
}
fn merge_albums_by_lib_id(&mut self, secondary_albums: Vec<Album>) {
for secondary_album in secondary_albums.into_iter() {
self.merge_album_by_lib_id(secondary_album);
}
for (_, primary_album) in self.primary_by_lib_id.drain() {
self.primary_by_title.insert(
string::normalize_string(&primary_album.meta.id.title),
primary_album,
);
}
for (title, primary_album) in self.primary_by_singleton.drain() {
self.primary_by_title.insert(title, primary_album);
}
}
fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
let title = string::normalize_string(&secondary_album.meta.id.title);
let id_opt = secondary_album.meta.id.lib_id;
let find = match id_opt {
AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
AlbumLibId::Singleton => self.primary_by_singleton.remove(&title),
AlbumLibId::None => None,
};
if let Some(mut primary_album) = find {
primary_album.merge_in_place(secondary_album);
self.merged.push(primary_album);
return;
}
secondary_album.meta.id.lib_id = AlbumLibId::None;
self.secondary_by_title.insert(
string::normalize_string(&secondary_album.meta.id.title),
secondary_album,
);
}
}
impl Artist {
/// Create new [`Artist`] with the given [`ArtistId`].
pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
Artist {
meta: ArtistMeta::new(id),
albums: vec![],
}
}
}
@ -248,7 +285,8 @@ impl ArtistId {
}
pub fn compatible(&self, other: &ArtistId) -> bool {
let names_compatible = self.name == other.name;
let names_compatible =
string::normalize_string(&self.name) == string::normalize_string(&other.name);
let mb_ref_compatible =
self.mb_ref.is_none() || other.mb_ref.is_none() || (self.mb_ref == other.mb_ref);
names_compatible && mb_ref_compatible
@ -263,7 +301,13 @@ impl Display for ArtistId {
#[cfg(test)]
mod tests {
use crate::core::testmod::FULL_COLLECTION;
use crate::{
collection::{
album::{AlbumId, AlbumMbRef},
musicbrainz::MbAlbumRef,
},
core::testmod::FULL_COLLECTION,
};
use super::*;
@ -533,4 +577,119 @@ mod tests {
let merged = left.clone().merge(right);
assert_eq!(expected, merged);
}
#[test]
#[should_panic(expected = "multiple secondaries unsupported")]
fn merge_two_db_albums_to_one_lib_album() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
let album = Album::new(AlbumId::new("Album"));
left.albums.push(album.clone());
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
right.albums.push(album.clone());
right.albums.push(album.clone());
left.merge(right);
}
#[test]
#[should_panic(expected = "multiple primaries unsupported")]
fn merge_one_db_album_to_two_lib_albums() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
let album = Album::new(AlbumId::new("Album"));
left.albums.push(album.clone());
left.albums.push(album.clone());
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
left.albums[1].meta.id.lib_id = AlbumLibId::Value(2);
right.albums.push(album.clone());
left.merge(right);
}
#[test]
fn merge_normalized_album_titles() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
left.albums
.push(Album::new(AlbumId::new("AlbumTitle Title")));
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
right
.albums
.push(Album::new(AlbumId::new("alBum—tiTle 'title")));
right
.albums
.push(Album::new(AlbumId::new("AlbumTitle “Title”")));
// The first album will be merged, the second will be added.
let mut expected = left.clone();
expected.albums.push(right.albums.last().unwrap().clone());
expected.albums.sort_unstable();
let merged = left.merge(right);
assert_eq!(expected, merged);
}
#[test]
fn merge_multiple_singletons() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
left.albums.push(Album::new(AlbumId::new("Singleton 1")));
left.albums.push(Album::new(AlbumId::new("Singleton 2")));
left.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
left.albums[1].meta.id.lib_id = AlbumLibId::Singleton;
right.albums.push(Album::new(AlbumId::new("Singleton 1")));
right.albums.push(Album::new(AlbumId::new("Singleton 2")));
right.albums.push(Album::new(AlbumId::new("Singleton 3")));
right.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
right.albums[2].meta.id.lib_id = AlbumLibId::Singleton;
// Expect first two albums to merge (including lib id) and the third to be added. However,
// the third one will lose its lib id due to no match in the primary.
let mut expected = left.clone();
expected.albums.push(right.albums.last().unwrap().clone());
expected.albums.last_mut().unwrap().meta.id.lib_id = AlbumLibId::None;
let merged = left.merge(right);
assert_eq!(expected, merged);
}
#[test]
fn merge_two_db_albums_to_one_lib_album_with_ids() {
let mut left = Artist::new(ArtistId::new("Artist"));
let mut right = left.clone();
let album = Album::new(AlbumId::new("Album"));
left.albums.push(album.clone());
left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
// In this test, one DB album has a lib id that matches that of the lib album. This is the
// one that we expect to merge while the second album should just be added.
right.albums.push(album.clone());
right.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
right.albums[0].meta.id.mb_ref = AlbumMbRef::Some(
MbAlbumRef::from_uuid_str("00000000-0000-0000-0000-000000000000").unwrap(),
);
right.albums.push(album.clone());
right.albums[1].meta.id.mb_ref = AlbumMbRef::Some(
MbAlbumRef::from_uuid_str("11111111-1111-1111-1111-111111111111").unwrap(),
);
let expected = right.clone();
let merged = left.merge(right);
assert_eq!(expected, merged);
}
}

View File

@ -1,5 +1,7 @@
// Date: 2024-02-19
pub const ARTISTS: [&str; 141] = [
// 1. `beet ls -a -f '$albumartist'`.
// 2. `M-x delete-duplicate-lines`.
// Date: 2025-01-03
pub const ARTISTS: [&str; 156] = [
"Abadden",
"Acid Drinkers",
"Adema",
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
"Аркона",
"Artas",
"As I Lay Dying",
"At the Gates",
"Avenged Sevenfold",
"Aversions Crown",
"Aviators",
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
"Bloodbath",
"Bloodbound",
"Brothers of Metal",
"Carcass",
"Carnation",
"Cellar Darling",
"Children of Bodom",
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
"Dynazty",
"Edguy",
"Eluveitie",
"Eminem",
"Enforcer",
"Ensiferum",
"Epica",
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
"Heavens Basement",
"Heavy Load",
"Hermh",
"Ignea",
"Immortal",
"In Flames",
"Insomnium",
"Iron Maiden",
"Judas Priest",
"Kalmah",
"Kataklysm",
"Kontrust",
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
"Linkin Park",
"Lost Dreams",
"Man Must Die",
"Månegarm",
"Me and That Man",
"Megaton Sword",
"Mercyful Fate",
"Metal Church",
"Metallica",
"Michael Jackson",
"Miracle of Sound",
"Misery Index",
"Mortal Sin",
"Mudvayne",
"Månegarm",
"Nickelback",
"Nightwish",
"Nile",
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
"Oomph!",
"P.O.D.",
"Paddy and the Rats",
"Pain",
"Paul Stanley",
"Persefone",
"Peyton Parrish",
"Powerwolf",
"Primitai",
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
"Rammstein",
"Red Hot Chili Peppers",
"Revocation",
"Ride the Sky",
"Rob Zombie",
"Sabaton",
"Saltatio Mortis",
"Saltatio Mortis & Lara Loft",
"Satan",
"Savatage",
"Scar Symmetry",
"Scars on Broadway",
"Scorpions",
"Silent Descent",
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
"Timecry",
"Trivium",
"Tuomas Holopainen",
"VNV Nation",
"Turisas",
"Vader",
"Vesania",
"Vicious Crusade",
"The Wages of Sin",
"Vintersorg",
"VNV Nation",
"W.A.S.P.",
"Whitechapel",
"Within Temptation",
"Woe of Tyrants",
"Wovenwar",
"Xandria",
"Jonathan Young",
"Jonathan Young, Peyton Parrish & Colm R. McGuinness",
];

View File

@ -1,4 +1,6 @@
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable, marker::PhantomData};
use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
use crate::core::collection::string::NormalString;
/// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
/// the primary whose properties are to be kept in case of collisions.
@ -80,30 +82,49 @@ where
}
}
pub trait MergeName {
fn name(&self) -> &str;
}
#[derive(Debug)]
pub struct NormalMap<T>(HashMap<NormalString, Vec<T>>);
pub struct MergeCollections<T, IT> {
_t: PhantomData<T>,
_it: PhantomData<IT>,
}
impl<T, IT> MergeCollections<T, IT>
where
T: MergeName + Merge,
IT: IntoIterator<Item = (String, Vec<T>)>,
{
pub fn merge_by_name(primary_items: &mut Vec<T>, secondary: IT) {
for (name, mut secondary_items) in secondary.into_iter() {
match primary_items.iter_mut().find(|item| item.name() == name) {
Some(ref mut primary_item) => {
// We do not support merging multiple DB items with same name yet.
assert_eq!(secondary_items.len(), 1);
primary_item.merge_in_place(secondary_items.pop().unwrap());
}
None => primary_items.extend(secondary_items),
}
}
impl<T> Default for NormalMap<T> {
fn default() -> Self {
NormalMap(HashMap::default())
}
}
impl<T> NormalMap<T> {
pub fn new() -> Self {
NormalMap(HashMap::new())
}
pub fn insert(&mut self, key: NormalString, item: T) {
self.0.entry(key).or_default().push(item);
}
fn remove(&mut self, key: &NormalString) -> Option<Vec<T>> {
self.0.remove(key)
}
}
pub struct MergeCollections;
impl MergeCollections {
pub fn merge_by_name<T: Merge>(mut primary: NormalMap<T>, secondary: NormalMap<T>) -> Vec<T> {
let mut merged = vec![];
for (title, mut secondary_items) in secondary.0.into_iter() {
match primary.remove(&title) {
Some(mut primary_items) => {
// We do not support merging multiple items with same name yet. Support will be
// added once encountered in the wild.
assert_eq!(primary_items.len(), 1, "multiple primaries unsupported");
assert_eq!(secondary_items.len(), 1, "multiple secondaries unsupported");
let mut primary_item = primary_items.pop().unwrap();
primary_item.merge_in_place(secondary_items.pop().unwrap());
merged.push(primary_item);
}
None => merged.extend(secondary_items),
}
}
merged.extend(primary.0.into_values().flatten());
merged
}
}

View File

@ -4,6 +4,7 @@ pub mod album;
pub mod artist;
pub mod merge;
pub mod musicbrainz;
pub mod string;
pub mod track;
use std::fmt::{self, Display};
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
Error::MbidError(err.to_string())
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -0,0 +1,81 @@
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub fn is_case_sensitive(string: &str) -> bool {
string
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
pub fn is_char_sensitive(string: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
string.chars().any(|ch| SPECIAL.contains(&ch))
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NormalString {
pub string: String,
}
pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString {
let string = if asciify {
if lowercase {
AC.replace_all(&string.to_lowercase(), &REPLACE)
} else {
AC.replace_all(string, &REPLACE)
}
} else if lowercase {
string.to_lowercase()
} else {
string.to_owned()
};
NormalString { string }
}
pub fn normalize_string(string: &str) -> NormalString {
NormalString {
string: AC.replace_all(&string.to_lowercase(), &REPLACE),
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
// The winners are left to allow comparison with future alternatives.
use test::Bencher;
use crate::core::collection::benchmod::ARTISTS;
use super::*;
#[bench]
fn bench_is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn bench_normalize_string_with(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true)))
}
#[bench]
fn bench_normalize_string(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(normalize_string(&iter.next().unwrap())))
}
}

View File

@ -1,11 +1,9 @@
use std::collections::HashMap;
use crate::core::{
collection::{
album::{Album, AlbumId},
artist::{Artist, ArtistId},
merge::MergeCollections,
Collection,
merge::{MergeCollections, NormalMap},
string, Collection,
},
musichoard::{Error, MusicHoard},
};
@ -59,20 +57,21 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
}
fn merge_collections(&self) -> Collection {
let mut primary = self.library_cache.clone();
let mut secondary = HashMap::<String, Vec<Artist>>::new();
let mut primary = NormalMap::<Artist>::new();
let mut secondary = NormalMap::<Artist>::new();
for artist in self.database_cache.iter().cloned() {
secondary
.entry(artist.meta.id.name.clone())
.or_default()
.push(artist);
for artist in self.library_cache.iter().cloned() {
primary.insert(string::normalize_string(&artist.meta.id.name), artist);
}
MergeCollections::merge_by_name(&mut primary, secondary);
primary.sort_unstable();
for artist in self.database_cache.iter().cloned() {
secondary.insert(string::normalize_string(&artist.meta.id.name), artist);
}
primary
let mut collection = MergeCollections::merge_by_name(primary, secondary);
collection.sort_unstable();
collection
}
fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> {
@ -224,4 +223,71 @@ mod tests {
mh.collection = mh.merge_collections();
assert_eq!(expected, mh.collection);
}
#[test]
#[should_panic(expected = "multiple secondaries unsupported")]
fn merge_two_db_artists_to_one_lib_artist() {
let mut left = Collection::new();
let mut right = Collection::new();
let artist = Artist::new(ArtistId::new("Artist"));
left.push(artist.clone());
right.push(artist.clone());
right.push(artist.clone());
let mut mh = MusicHoard {
library_cache: left.clone(),
database_cache: right.clone(),
..Default::default()
};
mh.collection = mh.merge_collections();
}
#[test]
#[should_panic(expected = "multiple primaries unsupported")]
fn merge_one_db_artist_to_two_lib_artists() {
let mut left = Collection::new();
let mut right = Collection::new();
let artist = Artist::new(ArtistId::new("Artist"));
left.push(artist.clone());
left.push(artist.clone());
right.push(artist.clone());
let mut mh = MusicHoard {
library_cache: left.clone(),
database_cache: right.clone(),
..Default::default()
};
mh.collection = mh.merge_collections();
}
#[test]
fn merge_normalized_artist_names() {
let mut left = Collection::new();
let mut right = Collection::new();
left.push(Artist::new(ArtistId::new("ArtistName Name")));
right.push(Artist::new(ArtistId::new("arTist—naMe 'name")));
right.push(Artist::new(ArtistId::new("ArtistName “Name”")));
// The first artist will be merged, the second will be added.
let mut expected = left.clone();
expected.push(right.last().unwrap().clone());
expected.sort_unstable();
let mut mh = MusicHoard {
library_cache: left.clone(),
database_cache: right.clone(),
..Default::default()
};
mh.collection = mh.merge_collections();
assert_eq!(expected, mh.collection);
}
}

View File

@ -1,4 +1,7 @@
//! MusicHoard - a music collection manager.
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod core;
pub mod external;

View File

@ -1,7 +1,3 @@
#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)]
extern crate test;
mod tui;
use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};

View File

@ -601,7 +601,3 @@ mod tests {
app.unwrap_critical();
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benchmod;

View File

@ -1,7 +1,4 @@
use aho_corasick::AhoCorasick;
use once_cell::sync::Lazy;
use musichoard::collection::{album::Album, artist::Artist, track::Track};
use musichoard::collection::{album::Album, artist::Artist, string, track::Track};
use crate::tui::app::{
machine::{App, AppInner, AppMachine},
@ -9,17 +6,6 @@ use crate::tui::app::{
AppPublicState, AppState, Category, IAppInteractSearch,
};
// Unlikely that this covers all possible strings, but it should at least cover strings
// relevant for music (at least in English). The list of characters handled is based on
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
//
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
const SPECIAL: [char; 11] = ['', '', '', '—', '―', '', '', '“', '”', '…', ''];
const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
static AC: Lazy<AhoCorasick> =
Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
pub struct SearchState {
string: String,
orig: ListSelection,
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
fn is_case_sensitive(artist_name: &str) -> bool;
fn is_char_sensitive(artist_name: &str) -> bool;
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
}
impl IAppInteractSearchPrivate for AppMachine<SearchState> {
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
where
P: FnMut(bool, bool, &str, &T) -> bool,
{
let case_sens = Self::is_case_sensitive(name);
let char_sens = Self::is_char_sensitive(name);
let search = Self::normalize_search(name, !case_sens, !char_sens);
let case_sens = string::is_case_sensitive(name);
let char_sens = string::is_char_sensitive(name);
let search = string::normalize_string_with(name, !case_sens, !char_sens);
let mut index = st.index;
if next && ((index + 1) < st.list.len()) {
@ -172,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
let slice = &st.list[index..];
slice
.iter()
.position(|probe| pred(case_sens, char_sens, &search, probe))
.position(|probe| pred(case_sens, char_sens, &search.string, probe))
.map(|slice_index| index + slice_index)
}
fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.starts_with(search);
let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens);
let mut result = name.string.starts_with(search);
if let Some(ref probe_sort) = probe.meta.sort {
if !result {
let name = Self::normalize_search(probe_sort, !case_sens, !char_sens);
result = name.starts_with(search);
let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens);
result = name.string.starts_with(search);
}
}
@ -199,33 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
}
fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
Self::normalize_search(title, !case_sens, !char_sens).starts_with(search)
}
fn is_case_sensitive(artist_name: &str) -> bool {
artist_name
.chars()
.any(|ch| ch.is_alphabetic() && ch.is_uppercase())
}
fn is_char_sensitive(artist_name: &str) -> bool {
// Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
// a high constant cost of AhoCorasick and the otherwise simple nature of the task.
artist_name.chars().any(|ch| SPECIAL.contains(&ch))
}
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
if asciify {
if lowercase {
AC.replace_all(&search.to_lowercase(), &REPLACE)
} else {
AC.replace_all(search, &REPLACE)
}
} else if lowercase {
search.to_lowercase()
} else {
search.to_owned()
}
string::normalize_string_with(title, !case_sens, !char_sens)
.string
.starts_with(search)
}
}
@ -544,28 +502,3 @@ mod tests {
assert_eq!(browse.inner.selection.selected(), None);
}
}
#[cfg(nightly)]
#[cfg(test)]
mod benches {
// The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
use test::Bencher;
use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
use super::*;
type Search = AppMachine<MockIMusicHoard, SearchState>;
#[bench]
fn is_char_sensitive(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
}
#[bench]
fn normalize_search(b: &mut Bencher) {
let mut iter = ARTISTS.iter().cycle();
b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
}
}