diff --git a/Cargo.lock b/Cargo.lock index f662848..bc004e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,6 +29,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.16" @@ -398,6 +407,7 @@ dependencies = [ name = "musichoard" version = "0.1.0" dependencies = [ + "aho-corasick", "crossterm", "mockall", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 3e83004..4065a28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +aho-corasick = { version = "1.1.2", optional = true } crossterm = { version = "0.27.0", optional = true} +once_cell = { version = "1.19.0", optional = true} openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true} ratatui = { version = "0.26.0", optional = true} serde = { version = "1.0.196", features = ["derive"], optional = true } @@ -27,7 +29,7 @@ bin = ["structopt"] database-json = ["serde", "serde_json"] library-beets = [] ssh-library = ["openssh", "tokio"] -tui = ["crossterm", "ratatui"] +tui = ["aho-corasick", "crossterm", "once_cell", "ratatui"] [[bin]] name = "musichoard" diff --git a/src/tui/app/machine/search.rs b/src/tui/app/machine/search.rs index 783e816..92380a8 100644 --- a/src/tui/app/machine/search.rs +++ b/src/tui/app/machine/search.rs @@ -1,3 +1,6 @@ +use aho_corasick::AhoCorasick; +use once_cell::sync::Lazy; + use musichoard::collection::artist::Artist; use crate::tui::{ @@ -9,6 +12,16 @@ use crate::tui::{ lib::IMusicHoard, }; +// Unlikely that this covers all possible strings, but it should at least cover strings +// relevant for music (at least in English). The list of characters handled is based on +// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters. +// +// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018, +// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign +static PATTERNS: [&'static str; 11] = ["‐", "‒", "–", "—", "―", "‘", "’", "“", "”", "…", "−"]; +static REPLACE: [&'static str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"]; +static AC: Lazy = Lazy::new(|| AhoCorasick::new(&PATTERNS).unwrap()); + pub struct AppSearch { string: String, orig: ListSelection, @@ -160,12 +173,9 @@ impl IAppInteractSearchPrivate for AppMachine { } fn is_char_sensitive(artist_name: &str) -> bool { - let special_chars: &[char] = &['‐', '‒', '–', '—', '―', '−', '‘', '’', '“', '”', '…']; - artist_name.chars().any(|ch| special_chars.contains(&ch)) + AC.find(artist_name).is_some() } - // FIXME: use aho_corasick for normalization - AhoCorasick does not implement PartialEq. It - // makes more sense to be places in app.rs as it would make ArtistSelection non-trivial. fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String { let normalized = if lowercase { search.to_lowercase() @@ -173,17 +183,8 @@ impl IAppInteractSearchPrivate for AppMachine { search.to_owned() }; - // Unlikely that this covers all possible strings, but it should at least cover strings - // relevant for music (at least in English). The list of characters handled is based on - // https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters. if asciify { - normalized - // U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, - // U+2015 horizontal bar, U+2212 minus sign - .replace(['‐', '‒', '–', '—', '―', '−'], "-") - .replace(['‘', '’'], "'") // U+2018, U+2019 - .replace(['“', '”'], "\"") // U+201C, U+201D - .replace('…', "...") // U+2026 + AC.replace_all(&normalized, &REPLACE) } else { normalized }