String operations with aho-corasick
This commit is contained in:
parent
c1e634b473
commit
1cafca9048
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -29,6 +29,15 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.16"
|
||||
@ -398,6 +407,7 @@ dependencies = [
|
||||
name = "musichoard"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"crossterm",
|
||||
"mockall",
|
||||
"once_cell",
|
||||
|
@ -6,7 +6,9 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = { version = "1.1.2", optional = true }
|
||||
crossterm = { version = "0.27.0", optional = true}
|
||||
once_cell = { version = "1.19.0", optional = true}
|
||||
openssh = { version = "0.10.3", features = ["native-mux"], default-features = false, optional = true}
|
||||
ratatui = { version = "0.26.0", optional = true}
|
||||
serde = { version = "1.0.196", features = ["derive"], optional = true }
|
||||
@ -27,7 +29,7 @@ bin = ["structopt"]
|
||||
database-json = ["serde", "serde_json"]
|
||||
library-beets = []
|
||||
ssh-library = ["openssh", "tokio"]
|
||||
tui = ["crossterm", "ratatui"]
|
||||
tui = ["aho-corasick", "crossterm", "once_cell", "ratatui"]
|
||||
|
||||
[[bin]]
|
||||
name = "musichoard"
|
||||
|
@ -1,3 +1,6 @@
|
||||
use aho_corasick::AhoCorasick;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use musichoard::collection::artist::Artist;
|
||||
|
||||
use crate::tui::{
|
||||
@ -9,6 +12,16 @@ use crate::tui::{
|
||||
lib::IMusicHoard,
|
||||
};
|
||||
|
||||
// Unlikely that this covers all possible strings, but it should at least cover strings
|
||||
// relevant for music (at least in English). The list of characters handled is based on
|
||||
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
|
||||
//
|
||||
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash, U+2015 horizontal bar, U+2018,
|
||||
// U+2019, U+201C, U+201D, U+2026, U+2212 minus sign
|
||||
static PATTERNS: [&'static str; 11] = ["‐", "‒", "–", "—", "―", "‘", "’", "“", "”", "…", "−"];
|
||||
static REPLACE: [&'static str; 11] = ["-", "-", "-", "-", "-", "'", "'", "\"", "\"", "...", "-"];
|
||||
static AC: Lazy<AhoCorasick> = Lazy::new(|| AhoCorasick::new(&PATTERNS).unwrap());
|
||||
|
||||
pub struct AppSearch {
|
||||
string: String,
|
||||
orig: ListSelection,
|
||||
@ -160,12 +173,9 @@ impl<MH: IMusicHoard> IAppInteractSearchPrivate for AppMachine<MH, AppSearch> {
|
||||
}
|
||||
|
||||
fn is_char_sensitive(artist_name: &str) -> bool {
|
||||
let special_chars: &[char] = &['‐', '‒', '–', '—', '―', '−', '‘', '’', '“', '”', '…'];
|
||||
artist_name.chars().any(|ch| special_chars.contains(&ch))
|
||||
AC.find(artist_name).is_some()
|
||||
}
|
||||
|
||||
// FIXME: use aho_corasick for normalization - AhoCorasick does not implement PartialEq. It
|
||||
// makes more sense to be places in app.rs as it would make ArtistSelection non-trivial.
|
||||
fn normalize_search(search: &str, lowercase: bool, asciify: bool) -> String {
|
||||
let normalized = if lowercase {
|
||||
search.to_lowercase()
|
||||
@ -173,17 +183,8 @@ impl<MH: IMusicHoard> IAppInteractSearchPrivate for AppMachine<MH, AppSearch> {
|
||||
search.to_owned()
|
||||
};
|
||||
|
||||
// Unlikely that this covers all possible strings, but it should at least cover strings
|
||||
// relevant for music (at least in English). The list of characters handled is based on
|
||||
// https://wiki.musicbrainz.org/User:Yurim/Punctuation_and_Special_Characters.
|
||||
if asciify {
|
||||
normalized
|
||||
// U+2010 hyphen, U+2012 figure dash, U+2013 en dash, U+2014 em dash,
|
||||
// U+2015 horizontal bar, U+2212 minus sign
|
||||
.replace(['‐', '‒', '–', '—', '―', '−'], "-")
|
||||
.replace(['‘', '’'], "'") // U+2018, U+2019
|
||||
.replace(['“', '”'], "\"") // U+201C, U+201D
|
||||
.replace('…', "...") // U+2026
|
||||
AC.replace_all(&normalized, &REPLACE)
|
||||
} else {
|
||||
normalized
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user