2025-01-03 17:46:55 +01:00
14 changed files with 476 additions and 202 deletions
--- a/.gitea/workflows/gitea-ci.yaml
+++ b/.gitea/workflows/gitea-ci.yaml
@ -38,7 +38,7 @@ jobs:
          --ignore "tests/*"
          --ignore "src/main.rs"
          --ignore "src/bin/musichoard-edit.rs"
-          --excl-line "^#\[derive|unimplemented\!\(\)"
+          --excl-line "^#\[derive|unimplemented\!\(|unreachable\!\("
          --excl-start "GRCOV_EXCL_START|mod tests \{"
          --excl-stop "GRCOV_EXCL_STOP"
          --output-path ./target/debug/coverage/
--- a/Cargo.toml
+++ b/Cargo.toml
@ -6,9 +6,9 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-aho-corasick = { version = "1.1.2", optional = true }
+aho-corasick = { version = "1.1.2" }
 crossterm = { version = "0.28.1", optional = true}
-once_cell = { version = "1.19.0", optional = true}
+once_cell = { version = "1.19.0" }
 openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true}
 paste = { version = "1.0.15", optional = true }
 ratatui = { version = "0.28.1", optional = true}
@ -37,7 +37,7 @@ database-json = ["serde", "serde_json"]
 library-beets = []
 library-beets-ssh = ["openssh", "tokio"]
 musicbrainz = ["paste", "reqwest", "serde", "serde_json"]
-tui = ["aho-corasick", "crossterm", "once_cell", "ratatui", "tui-input"]
+tui = ["crossterm", "ratatui", "tui-input"]

 [[bin]]
 name = "musichoard"
--- a/README.md
+++ b/README.md
@ -50,7 +50,7 @@ grcov codecov/debug/profraw \
      --ignore "tests/*" \
      --ignore "src/main.rs" \
      --ignore "src/bin/musichoard-edit.rs" \
-      --excl-line "^#\[derive|unimplemented\!\(\)" \
+      --excl-line "^#\[derive|unimplemented\!\(|unreachable\!\(" \
      --excl-start "GRCOV_EXCL_START|mod tests \{" \
      --excl-stop "GRCOV_EXCL_STOP" \
      --output-path ./codecov/debug/coverage/
--- a/src/core/collection/album.rs
+++ b/src/core/collection/album.rs
@ -1,11 +1,13 @@
 use std::mem;

 use crate::core::collection::{
-    merge::{Merge, MergeName, MergeSorted},
+    merge::{Merge, MergeSorted},
    musicbrainz::{MbAlbumRef, MbRefOption},
    track::{Track, TrackFormat},
 };

+use super::string;
+
 /// An album is a collection of tracks that were released together.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Album {
@ -54,12 +56,6 @@ impl AlbumLibId {
 /// Unique database identifier. Use MBID for this purpose.
 pub type AlbumMbRef = MbRefOption<MbAlbumRef>;

-impl MergeName for Album {
-    fn name(&self) -> &str {
-        &self.meta.id.title
-    }
-}
-
 // There are crates for handling dates, but we don't need much complexity beyond year-month-day.
 /// The album's release date.
 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
@ -320,7 +316,8 @@ impl AlbumId {
    }

    pub fn compatible(&self, other: &AlbumId) -> bool {
-        let titles_compatible = self.title == other.title;
+        let titles_compatible =
+            string::normalize_string(&self.title) == string::normalize_string(&other.title);
        let lib_id_compatible =
            self.lib_id.is_none() || other.lib_id.is_none() || (self.lib_id == other.lib_id);
        let mb_ref_compatible =
--- a/src/core/collection/artist.rs
+++ b/src/core/collection/artist.rs
@ -1,12 +1,14 @@
 use std::{
    collections::HashMap,
    fmt::{self, Debug, Display},
+    mem,
 };

 use crate::core::collection::{
    album::{Album, AlbumLibId},
-    merge::{Merge, MergeCollections, MergeName},
+    merge::{Merge, MergeCollections, NormalMap},
    musicbrainz::{MbArtistRef, MbRefOption},
+    string::{self, NormalString},
 };

 /// An artist.
@ -30,12 +32,6 @@ pub struct ArtistInfo {
    pub properties: HashMap<String, Vec<String>>,
 }

-impl MergeName for Artist {
-    fn name(&self) -> &str {
-        &self.meta.id.name
-    }
-}
-
 /// The artist identifier.
 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct ArtistId {
@ -46,49 +42,6 @@ pub struct ArtistId {
 /// Unique database identifier. Use MBID for this purpose.
 pub type ArtistMbRef = MbRefOption<MbArtistRef>;

-impl Artist {
-    /// Create new [`Artist`] with the given [`ArtistId`].
-    pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
-        Artist {
-            meta: ArtistMeta::new(id),
-            albums: vec![],
-        }
-    }
-
-    fn merge_album_by_lib_id(
-        primary_albums: &mut [Album],
-        mut secondary_album: Album,
-    ) -> Option<Album> {
-        let id_opt = secondary_album.meta.id.lib_id;
-        if let id @ AlbumLibId::Value(_) | id @ AlbumLibId::Singleton = id_opt {
-            let mut iter = primary_albums.iter_mut();
-            if let Some(ref mut primary_album) = iter.find(|a| a.meta.id.lib_id == id) {
-                primary_album.merge_in_place(secondary_album);
-                return None;
-            }
-            secondary_album.meta.id.lib_id = AlbumLibId::None;
-        }
-        Some(secondary_album)
-    }
-
-    fn merge_albums_by_lib_id(
-        primary_albums: &mut [Album],
-        secondary_albums: Vec<Album>,
-    ) -> HashMap<String, Vec<Album>> {
-        let mut secondary_without_id = HashMap::<String, Vec<Album>>::new();
-        for secondary_album in secondary_albums.into_iter() {
-            let unmerged = Artist::merge_album_by_lib_id(primary_albums, secondary_album);
-            if let Some(secondary_album) = unmerged {
-                secondary_without_id
-                    .entry(secondary_album.meta.id.title.clone())
-                    .or_default()
-                    .push(secondary_album);
-            }
-        }
-        secondary_without_id
-    }
-}
-
 impl PartialOrd for Artist {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
@ -104,10 +57,94 @@ impl Ord for Artist {
 impl Merge for Artist {
    fn merge_in_place(&mut self, other: Self) {
        self.meta.merge_in_place(other.meta);
+        self.albums = MergeAlbums::merge_albums(mem::take(&mut self.albums), other.albums);
+    }
+}

-        let other_without_id = Artist::merge_albums_by_lib_id(&mut self.albums, other.albums);
-        MergeCollections::merge_by_name(&mut self.albums, other_without_id);
-        self.albums.sort_unstable();
+#[derive(Debug, Default)]
+struct MergeAlbums {
+    primary_by_lib_id: HashMap<u32, Album>,
+    primary_by_singleton: HashMap<NormalString, Album>,
+    primary_by_title: NormalMap<Album>,
+    secondary_by_title: NormalMap<Album>,
+    merged: Vec<Album>,
+}
+
+impl MergeAlbums {
+    fn merge_albums(primary_albums: Vec<Album>, secondary_albums: Vec<Album>) -> Vec<Album> {
+        let mut cache = MergeAlbums::new(primary_albums);
+        cache.merge_albums_by_lib_id(secondary_albums);
+        cache.merged.extend(MergeCollections::merge_by_name(
+            cache.primary_by_title,
+            cache.secondary_by_title,
+        ));
+        cache.merged.sort_unstable();
+        cache.merged
+    }
+
+    fn new(primary_albums: Vec<Album>) -> Self {
+        let mut cache = MergeAlbums::default();
+        for album in primary_albums.into_iter() {
+            match album.meta.id.lib_id {
+                AlbumLibId::Value(value) => {
+                    assert!(cache.primary_by_lib_id.insert(value, album).is_none())
+                }
+                AlbumLibId::Singleton => assert!(cache
+                    .primary_by_singleton
+                    .insert(string::normalize_string(&album.meta.id.title), album)
+                    .is_none()),
+                _ => unreachable!("primary collection should always have a lib_id"),
+            }
+        }
+        cache
+    }
+
+    fn merge_albums_by_lib_id(&mut self, secondary_albums: Vec<Album>) {
+        for secondary_album in secondary_albums.into_iter() {
+            self.merge_album_by_lib_id(secondary_album);
+        }
+        for (_, primary_album) in self.primary_by_lib_id.drain() {
+            self.primary_by_title.insert(
+                string::normalize_string(&primary_album.meta.id.title),
+                primary_album,
+            );
+        }
+        for (title, primary_album) in self.primary_by_singleton.drain() {
+            self.primary_by_title.insert(title, primary_album);
+        }
+    }
+
+    fn merge_album_by_lib_id(&mut self, mut secondary_album: Album) {
+        let title = string::normalize_string(&secondary_album.meta.id.title);
+        let id_opt = secondary_album.meta.id.lib_id;
+
+        let find = match id_opt {
+            AlbumLibId::Value(value) => self.primary_by_lib_id.remove(&value),
+            AlbumLibId::Singleton => self.primary_by_singleton.remove(&title),
+            AlbumLibId::None => None,
+        };
+
+        if let Some(mut primary_album) = find {
+            primary_album.merge_in_place(secondary_album);
+            self.merged.push(primary_album);
+            return;
+        }
+
+        secondary_album.meta.id.lib_id = AlbumLibId::None;
+        self.secondary_by_title.insert(
+            string::normalize_string(&secondary_album.meta.id.title),
+            secondary_album,
+        );
+    }
+}
+
+impl Artist {
+    /// Create new [`Artist`] with the given [`ArtistId`].
+    pub fn new<Id: Into<ArtistId>>(id: Id) -> Self {
+        Artist {
+            meta: ArtistMeta::new(id),
+            albums: vec![],
+        }
    }
 }

@ -248,7 +285,8 @@ impl ArtistId {
    }

    pub fn compatible(&self, other: &ArtistId) -> bool {
-        let names_compatible = self.name == other.name;
+        let names_compatible =
+            string::normalize_string(&self.name) == string::normalize_string(&other.name);
        let mb_ref_compatible =
            self.mb_ref.is_none() || other.mb_ref.is_none() || (self.mb_ref == other.mb_ref);
        names_compatible && mb_ref_compatible
@ -263,7 +301,13 @@ impl Display for ArtistId {

 #[cfg(test)]
 mod tests {
-    use crate::core::testmod::FULL_COLLECTION;
+    use crate::{
+        collection::{
+            album::{AlbumId, AlbumMbRef},
+            musicbrainz::MbAlbumRef,
+        },
+        core::testmod::FULL_COLLECTION,
+    };

    use super::*;

@ -533,4 +577,119 @@ mod tests {
        let merged = left.clone().merge(right);
        assert_eq!(expected, merged);
    }
+
+    #[test]
+    #[should_panic(expected = "multiple secondaries unsupported")]
+    fn merge_two_db_albums_to_one_lib_album() {
+        let mut left = Artist::new(ArtistId::new("Artist"));
+        let mut right = left.clone();
+
+        let album = Album::new(AlbumId::new("Album"));
+
+        left.albums.push(album.clone());
+        left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
+
+        right.albums.push(album.clone());
+        right.albums.push(album.clone());
+
+        left.merge(right);
+    }
+
+    #[test]
+    #[should_panic(expected = "multiple primaries unsupported")]
+    fn merge_one_db_album_to_two_lib_albums() {
+        let mut left = Artist::new(ArtistId::new("Artist"));
+        let mut right = left.clone();
+
+        let album = Album::new(AlbumId::new("Album"));
+
+        left.albums.push(album.clone());
+        left.albums.push(album.clone());
+        left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
+        left.albums[1].meta.id.lib_id = AlbumLibId::Value(2);
+
+        right.albums.push(album.clone());
+
+        left.merge(right);
+    }
+
+    #[test]
+    fn merge_normalized_album_titles() {
+        let mut left = Artist::new(ArtistId::new("Artist"));
+        let mut right = left.clone();
+
+        left.albums
+            .push(Album::new(AlbumId::new("Album‐Title ‘Title’")));
+        left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
+
+        right
+            .albums
+            .push(Album::new(AlbumId::new("alBum—tiTle 'title’")));
+        right
+            .albums
+            .push(Album::new(AlbumId::new("Album‐Title “Title”")));
+
+        // The first album will be merged, the second will be added.
+        let mut expected = left.clone();
+        expected.albums.push(right.albums.last().unwrap().clone());
+        expected.albums.sort_unstable();
+
+        let merged = left.merge(right);
+        assert_eq!(expected, merged);
+    }
+
+    #[test]
+    fn merge_multiple_singletons() {
+        let mut left = Artist::new(ArtistId::new("Artist"));
+        let mut right = left.clone();
+
+        left.albums.push(Album::new(AlbumId::new("Singleton 1")));
+        left.albums.push(Album::new(AlbumId::new("Singleton 2")));
+        left.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
+        left.albums[1].meta.id.lib_id = AlbumLibId::Singleton;
+
+        right.albums.push(Album::new(AlbumId::new("Singleton 1")));
+        right.albums.push(Album::new(AlbumId::new("Singleton 2")));
+        right.albums.push(Album::new(AlbumId::new("Singleton 3")));
+        right.albums[0].meta.id.lib_id = AlbumLibId::Singleton;
+        right.albums[2].meta.id.lib_id = AlbumLibId::Singleton;
+
+        // Expect first two albums to merge (including lib id) and the third to be added. However,
+        // the third one will lose its lib id due to no match in the primary.
+        let mut expected = left.clone();
+        expected.albums.push(right.albums.last().unwrap().clone());
+        expected.albums.last_mut().unwrap().meta.id.lib_id = AlbumLibId::None;
+
+        let merged = left.merge(right);
+        assert_eq!(expected, merged);
+    }
+
+    #[test]
+    fn merge_two_db_albums_to_one_lib_album_with_ids() {
+        let mut left = Artist::new(ArtistId::new("Artist"));
+        let mut right = left.clone();
+
+        let album = Album::new(AlbumId::new("Album"));
+
+        left.albums.push(album.clone());
+        left.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
+
+        // In this test, one DB album has a lib id that matches that of the lib album. This is the
+        // one that we expect to merge while the second album should just be added.
+        right.albums.push(album.clone());
+        right.albums[0].meta.id.lib_id = AlbumLibId::Value(1);
+        right.albums[0].meta.id.mb_ref = AlbumMbRef::Some(
+            MbAlbumRef::from_uuid_str("00000000-0000-0000-0000-000000000000").unwrap(),
+        );
+
+        right.albums.push(album.clone());
+        right.albums[1].meta.id.mb_ref = AlbumMbRef::Some(
+            MbAlbumRef::from_uuid_str("11111111-1111-1111-1111-111111111111").unwrap(),
+        );
+
+        let expected = right.clone();
+
+        let merged = left.merge(right);
+        assert_eq!(expected, merged);
+    }
 }
--- a/src/core/collection/benchmod.rs
+++ b/src/core/collection/benchmod.rs
@ -1,5 +1,7 @@
-// Date: 2024-02-19
-pub const ARTISTS: [&str; 141] = [
+// 1. `beet ls -a -f '$albumartist'`.
+// 2. `M-x delete-duplicate-lines`.
+// Date: 2025-01-03
+pub const ARTISTS: [&str; 156] = [
    "Abadden",
    "Acid Drinkers",
    "Adema",
@ -16,6 +18,7 @@ pub const ARTISTS: [&str; 141] = [
    "Аркона",
    "Artas",
    "As I Lay Dying",
+    "At the Gates",
    "Avenged Sevenfold",
    "Aversions Crown",
    "Aviators",
@ -30,6 +33,7 @@ pub const ARTISTS: [&str; 141] = [
    "Bloodbath",
    "Bloodbound",
    "Brothers of Metal",
+    "Carcass",
    "Carnation",
    "Cellar Darling",
    "Children of Bodom",
@ -44,7 +48,6 @@ pub const ARTISTS: [&str; 141] = [
    "Dynazty",
    "Edguy",
    "Eluveitie",
-    "Eminem",
    "Enforcer",
    "Ensiferum",
    "Epica",
@ -72,10 +75,12 @@ pub const ARTISTS: [&str; 141] = [
    "Heaven’s Basement",
    "Heavy Load",
    "Hermh",
+    "Ignea",
    "Immortal",
    "In Flames",
    "Insomnium",
    "Iron Maiden",
+    "Judas Priest",
    "Kalmah",
    "Kataklysm",
    "Kontrust",
@ -86,14 +91,16 @@ pub const ARTISTS: [&str; 141] = [
    "Linkin Park",
    "Lost Dreams",
    "Man Must Die",
+    "Månegarm",
    "Me and That Man",
+    "Megaton Sword",
    "Mercyful Fate",
+    "Metal Church",
    "Metallica",
-    "Michael Jackson",
    "Miracle of Sound",
    "Misery Index",
+    "Mortal Sin",
    "Mudvayne",
-    "Månegarm",
    "Nickelback",
    "Nightwish",
    "Nile",
@ -103,8 +110,8 @@ pub const ARTISTS: [&str; 141] = [
    "Oomph!",
    "P.O.D.",
    "Paddy and the Rats",
+    "Pain",
    "Paul Stanley",
-    "Persefone",
    "Peyton Parrish",
    "Powerwolf",
    "Primitai",
@ -113,9 +120,14 @@ pub const ARTISTS: [&str; 141] = [
    "Rammstein",
    "Red Hot Chili Peppers",
    "Revocation",
+    "Ride the Sky",
    "Rob Zombie",
    "Sabaton",
+    "Saltatio Mortis",
+    "Saltatio Mortis & Lara Loft",
+    "Satan",
    "Savatage",
+    "Scar Symmetry",
    "Scars on Broadway",
    "Scorpions",
    "Silent Descent",
@ -132,13 +144,18 @@ pub const ARTISTS: [&str; 141] = [
    "Timecry",
    "Trivium",
    "Tuomas Holopainen",
-    "VNV Nation",
+    "Turisas",
    "Vader",
+    "Vesania",
    "Vicious Crusade",
-    "The Wages of Sin",
+    "Vintersorg",
+    "VNV Nation",
+    "W.A.S.P.",
    "Whitechapel",
    "Within Temptation",
    "Woe of Tyrants",
    "Wovenwar",
    "Xandria",
+    "Jonathan Young",
+    "Jonathan Young, Peyton Parrish & Colm R. McGuinness",
 ];
--- a/src/core/collection/merge.rs
+++ b/src/core/collection/merge.rs
@ -1,4 +1,6 @@
-use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable, marker::PhantomData};
+use std::{cmp::Ordering, collections::HashMap, hash::Hash, iter::Peekable};
+
+use crate::core::collection::string::NormalString;

 /// A trait for merging two objects. The merge is asymmetric with the left argument considered to be
 /// the primary whose properties are to be kept in case of collisions.
@ -80,30 +82,49 @@ where
    }
 }

-pub trait MergeName {
-    fn name(&self) -> &str;
-}
+#[derive(Debug)]
+pub struct NormalMap<T>(HashMap<NormalString, Vec<T>>);

-pub struct MergeCollections<T, IT> {
-    _t: PhantomData<T>,
-    _it: PhantomData<IT>,
-}
-
-impl<T, IT> MergeCollections<T, IT>
-where
-    T: MergeName + Merge,
-    IT: IntoIterator<Item = (String, Vec<T>)>,
-{
-    pub fn merge_by_name(primary_items: &mut Vec<T>, secondary: IT) {
-        for (name, mut secondary_items) in secondary.into_iter() {
-            match primary_items.iter_mut().find(|item| item.name() == name) {
-                Some(ref mut primary_item) => {
-                    // We do not support merging multiple DB items with same name yet.
-                    assert_eq!(secondary_items.len(), 1);
-                    primary_item.merge_in_place(secondary_items.pop().unwrap());
-                }
-                None => primary_items.extend(secondary_items),
-            }
-        }
+impl<T> Default for NormalMap<T> {
+    fn default() -> Self {
+        NormalMap(HashMap::default())
+    }
+}
+
+impl<T> NormalMap<T> {
+    pub fn new() -> Self {
+        NormalMap(HashMap::new())
+    }
+
+    pub fn insert(&mut self, key: NormalString, item: T) {
+        self.0.entry(key).or_default().push(item);
+    }
+
+    fn remove(&mut self, key: &NormalString) -> Option<Vec<T>> {
+        self.0.remove(key)
+    }
+}
+
+pub struct MergeCollections;
+
+impl MergeCollections {
+    pub fn merge_by_name<T: Merge>(mut primary: NormalMap<T>, secondary: NormalMap<T>) -> Vec<T> {
+        let mut merged = vec![];
+        for (title, mut secondary_items) in secondary.0.into_iter() {
+            match primary.remove(&title) {
+                Some(mut primary_items) => {
+                    // We do not support merging multiple items with same name yet. Support will be
+                    // added once encountered in the wild.
+                    assert_eq!(primary_items.len(), 1, "multiple primaries unsupported");
+                    assert_eq!(secondary_items.len(), 1, "multiple secondaries unsupported");
+                    let mut primary_item = primary_items.pop().unwrap();
+                    primary_item.merge_in_place(secondary_items.pop().unwrap());
+                    merged.push(primary_item);
+                }
+                None => merged.extend(secondary_items),
+            }
+        }
+        merged.extend(primary.0.into_values().flatten());
+        merged
    }
 }
--- a/src/core/collection/mod.rs
+++ b/src/core/collection/mod.rs
@ -4,6 +4,7 @@ pub mod album;
 pub mod artist;
 pub mod merge;
 pub mod musicbrainz;
+pub mod string;
 pub mod track;

 use std::fmt::{self, Display};
@ -40,3 +41,7 @@ impl From<uuid::Error> for Error {
        Error::MbidError(err.to_string())
    }
 }
+
+#[cfg(nightly)]
+#[cfg(test)]
+mod benchmod;
--- a/src/core/collection/string.rs
+++ b/src/core/collection/string.rs
@ -0,0 +1,81 @@
+use aho_corasick::AhoCorasick;
+use once_cell::sync::Lazy;
+
+// Unlikely that this covers all possible strings, but it should at least cover strings
+// relevant for music (at least in English). The list of characters handled is based on
+// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
+//
+// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
+// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
+const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−'];
+const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
+static AC: Lazy<AhoCorasick> =
+    Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
+
+pub fn is_case_sensitive(string: &str) -> bool {
+    string
+        .chars()
+        .any(|ch| ch.is_alphabetic() && ch.is_uppercase())
+}
+
+pub fn is_char_sensitive(string: &str) -> bool {
+    // Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
+    // a high constant cost of AhoCorasick and the otherwise simple nature of the task.
+    string.chars().any(|ch| SPECIAL.contains(&ch))
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct NormalString {
+    pub string: String,
+}
+
+pub fn normalize_string_with(string: &str, lowercase: bool, asciify: bool) -> NormalString {
+    let string = if asciify {
+        if lowercase {
+            AC.replace_all(&string.to_lowercase(), &REPLACE)
+        } else {
+            AC.replace_all(string, &REPLACE)
+        }
+    } else if lowercase {
+        string.to_lowercase()
+    } else {
+        string.to_owned()
+    };
+    NormalString { string }
+}
+
+pub fn normalize_string(string: &str) -> NormalString {
+    NormalString {
+        string: AC.replace_all(&string.to_lowercase(), &REPLACE),
+    }
+}
+
+#[cfg(nightly)]
+#[cfg(test)]
+mod benches {
+    // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
+    // The winners are left to allow comparison with future alternatives.
+    use test::Bencher;
+
+    use crate::core::collection::benchmod::ARTISTS;
+
+    use super::*;
+
+    #[bench]
+    fn bench_is_char_sensitive(b: &mut Bencher) {
+        let mut iter = ARTISTS.iter().cycle();
+        b.iter(|| test::black_box(is_char_sensitive(&iter.next().unwrap())))
+    }
+
+    #[bench]
+    fn bench_normalize_string_with(b: &mut Bencher) {
+        let mut iter = ARTISTS.iter().cycle();
+        b.iter(|| test::black_box(normalize_string_with(&iter.next().unwrap(), true, true)))
+    }
+
+    #[bench]
+    fn bench_normalize_string(b: &mut Bencher) {
+        let mut iter = ARTISTS.iter().cycle();
+        b.iter(|| test::black_box(normalize_string(&iter.next().unwrap())))
+    }
+}
--- a/src/core/musichoard/base.rs
+++ b/src/core/musichoard/base.rs
@ -1,11 +1,9 @@
-use std::collections::HashMap;
-
 use crate::core::{
    collection::{
        album::{Album, AlbumId},
        artist::{Artist, ArtistId},
-        merge::MergeCollections,
-        Collection,
+        merge::{MergeCollections, NormalMap},
+        string, Collection,
    },
    musichoard::{Error, MusicHoard},
 };
@ -59,20 +57,21 @@ impl<Database, Library> IMusicHoardBasePrivate for MusicHoard<Database, Library>
    }

    fn merge_collections(&self) -> Collection {
-        let mut primary = self.library_cache.clone();
-        let mut secondary = HashMap::<String, Vec<Artist>>::new();
+        let mut primary = NormalMap::<Artist>::new();
+        let mut secondary = NormalMap::<Artist>::new();

-        for artist in self.database_cache.iter().cloned() {
-            secondary
-                .entry(artist.meta.id.name.clone())
-                .or_default()
-                .push(artist);
+        for artist in self.library_cache.iter().cloned() {
+            primary.insert(string::normalize_string(&artist.meta.id.name), artist);
        }

-        MergeCollections::merge_by_name(&mut primary, secondary);
-        primary.sort_unstable();
+        for artist in self.database_cache.iter().cloned() {
+            secondary.insert(string::normalize_string(&artist.meta.id.name), artist);
+        }

-        primary
+        let mut collection = MergeCollections::merge_by_name(primary, secondary);
+        collection.sort_unstable();
+
+        collection
    }

    fn get_artist<'a>(collection: &'a Collection, artist_id: &ArtistId) -> Option<&'a Artist> {
@ -224,4 +223,71 @@ mod tests {
        mh.collection = mh.merge_collections();
        assert_eq!(expected, mh.collection);
    }
+
+    #[test]
+    #[should_panic(expected = "multiple secondaries unsupported")]
+    fn merge_two_db_artists_to_one_lib_artist() {
+        let mut left = Collection::new();
+        let mut right = Collection::new();
+
+        let artist = Artist::new(ArtistId::new("Artist"));
+
+        left.push(artist.clone());
+        right.push(artist.clone());
+        right.push(artist.clone());
+
+        let mut mh = MusicHoard {
+            library_cache: left.clone(),
+            database_cache: right.clone(),
+            ..Default::default()
+        };
+
+        mh.collection = mh.merge_collections();
+    }
+
+    #[test]
+    #[should_panic(expected = "multiple primaries unsupported")]
+    fn merge_one_db_artist_to_two_lib_artists() {
+        let mut left = Collection::new();
+        let mut right = Collection::new();
+
+        let artist = Artist::new(ArtistId::new("Artist"));
+
+        left.push(artist.clone());
+        left.push(artist.clone());
+        right.push(artist.clone());
+
+        let mut mh = MusicHoard {
+            library_cache: left.clone(),
+            database_cache: right.clone(),
+            ..Default::default()
+        };
+
+        mh.collection = mh.merge_collections();
+    }
+
+    #[test]
+    fn merge_normalized_artist_names() {
+        let mut left = Collection::new();
+        let mut right = Collection::new();
+
+        left.push(Artist::new(ArtistId::new("Artist‐Name ‘Name’")));
+
+        right.push(Artist::new(ArtistId::new("arTist—naMe 'name’")));
+        right.push(Artist::new(ArtistId::new("Artist‐Name “Name”")));
+
+        // The first artist will be merged, the second will be added.
+        let mut expected = left.clone();
+        expected.push(right.last().unwrap().clone());
+        expected.sort_unstable();
+
+        let mut mh = MusicHoard {
+            library_cache: left.clone(),
+            database_cache: right.clone(),
+            ..Default::default()
+        };
+
+        mh.collection = mh.merge_collections();
+        assert_eq!(expected, mh.collection);
+    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,4 +1,7 @@
 //! MusicHoard - a music collection manager.
+#![cfg_attr(nightly, feature(test))]
+#[cfg(nightly)]
+extern crate test;

 mod core;
 pub mod external;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,7 +1,3 @@
-#![cfg_attr(nightly, feature(test))]
-#[cfg(nightly)]
-extern crate test;
-
 mod tui;

 use std::{ffi::OsString, fs::OpenOptions, io, path::PathBuf, thread};
--- a/src/tui/app/machine/mod.rs
+++ b/src/tui/app/machine/mod.rs
@ -601,7 +601,3 @@ mod tests {
        app.unwrap_critical();
    }
 }
-
-#[cfg(nightly)]
-#[cfg(test)]
-mod benchmod;
--- a/src/tui/app/machine/search_state.rs
+++ b/src/tui/app/machine/search_state.rs
@ -1,7 +1,4 @@
-use aho_corasick::AhoCorasick;
-use once_cell::sync::Lazy;
-
-use musichoard::collection::{album::Album, artist::Artist, track::Track};
+use musichoard::collection::{album::Album, artist::Artist, string, track::Track};

 use crate::tui::app::{
    machine::{App, AppInner, AppMachine},
@ -9,17 +6,6 @@ use crate::tui::app::{
    AppPublicState, AppState, Category, IAppInteractSearch,
 };

-// Unlikely that this covers all possible strings, but it should at least cover strings
-// relevant for music (at least in English). The list of characters handled is based on
-// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
-//
-// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
-// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
-const SPECIAL: [char; 11] = ['‐', '‒', '–', '—', '―', '‘', '’', '“', '”', '…', '−'];
-const REPLACE: [&str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
-static AC: Lazy<AhoCorasick> =
-    Lazy::new(|| AhoCorasick::new(SPECIAL.map(|ch| ch.to_string())).unwrap());
-
 pub struct SearchState {
    string: String,
    orig: ListSelection,
@ -114,10 +100,6 @@ trait IAppInteractSearchPrivate {
    fn predicate_albums(case_sens: bool, char_sens: bool, search: &str, probe: &Album) -> bool;
    fn predicate_tracks(case_sens: bool, char_sens: bool, search: &str, probe: &Track) -> bool;
    fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool;
-
-    fn is_case_sensitive(artist_name: &str) -> bool;
-    fn is_char_sensitive(artist_name: &str) -> bool;
-    fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String;
 }

 impl IAppInteractSearchPrivate for AppMachine<SearchState> {
@ -160,9 +142,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
    where
        P: FnMut(bool, bool, &str, &T) -> bool,
    {
-        let case_sens = Self::is_case_sensitive(name);
-        let char_sens = Self::is_char_sensitive(name);
-        let search = Self::normalize_search(name, !case_sens, !char_sens);
+        let case_sens = string::is_case_sensitive(name);
+        let char_sens = string::is_char_sensitive(name);
+        let search = string::normalize_string_with(name, !case_sens, !char_sens);

        let mut index = st.index;
        if next && ((index + 1) < st.list.len()) {
@ -172,18 +154,18 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
        let slice = &st.list[index..];
        slice
            .iter()
-            .position(|probe| pred(case_sens, char_sens, &search, probe))
+            .position(|probe| pred(case_sens, char_sens, &search.string, probe))
            .map(|slice_index| index + slice_index)
    }

    fn predicate_artists(case_sens: bool, char_sens: bool, search: &str, probe: &Artist) -> bool {
-        let name = Self::normalize_search(&probe.meta.id.name, !case_sens, !char_sens);
-        let mut result = name.starts_with(search);
+        let name = string::normalize_string_with(&probe.meta.id.name, !case_sens, !char_sens);
+        let mut result = name.string.starts_with(search);

        if let Some(ref probe_sort) = probe.meta.sort {
            if !result {
-                let name = Self::normalize_search(probe_sort, !case_sens, !char_sens);
-                result = name.starts_with(search);
+                let name = string::normalize_string_with(probe_sort, !case_sens, !char_sens);
+                result = name.string.starts_with(search);
            }
        }

@ -199,33 +181,9 @@ impl IAppInteractSearchPrivate for AppMachine<SearchState> {
    }

    fn predicate_title(case_sens: bool, char_sens: bool, search: &str, title: &str) -> bool {
-        Self::normalize_search(title, !case_sens, !char_sens).starts_with(search)
-    }
-
-    fn is_case_sensitive(artist_name: &str) -> bool {
-        artist_name
-            .chars()
-            .any(|ch| ch.is_alphabetic() && ch.is_uppercase())
-    }
-
-    fn is_char_sensitive(artist_name: &str) -> bool {
-        // Benchmarking reveals that using AhoCorasick is slower. At a guess, this is likely due to
-        // a high constant cost of AhoCorasick and the otherwise simple nature of the task.
-        artist_name.chars().any(|ch| SPECIAL.contains(&ch))
-    }
-
-    fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
-        if asciify {
-            if lowercase {
-                AC.replace_all(&search.to_lowercase(), &REPLACE)
-            } else {
-                AC.replace_all(search, &REPLACE)
-            }
-        } else if lowercase {
-            search.to_lowercase()
-        } else {
-            search.to_owned()
-        }
+        string::normalize_string_with(title, !case_sens, !char_sens)
+            .string
+            .starts_with(search)
    }
 }

@ -544,28 +502,3 @@ mod tests {
        assert_eq!(browse.inner.selection.selected(), None);
    }
 }
-
-#[cfg(nightly)]
-#[cfg(test)]
-mod benches {
-    // The purpose of these benches was to evaluate the benefit of AhoCorasick over std solutions.
-    use test::Bencher;
-
-    use crate::tui::{app::machine::benchmod::ARTISTS, lib::MockIMusicHoard};
-
-    use super::*;
-
-    type Search = AppMachine<MockIMusicHoard, SearchState>;
-
-    #[bench]
-    fn is_char_sensitive(b: &mut Bencher) {
-        let mut iter = ARTISTS.iter().cycle();
-        b.iter(|| test::black_box(Search::is_char_sensitive(&iter.next().unwrap())))
-    }
-
-    #[bench]
-    fn normalize_search(b: &mut Bencher) {
-        let mut iter = ARTISTS.iter().cycle();
-        b.iter(|| test::black_box(Search::normalize_search(&iter.next().unwrap(), true, true)))
-    }
-}