qobuz-dl/qobuz_dl/core.py

# ----- Testing ------
import json

# --------------------
import logging
import os
import re
import string
import sys
import time
from typing import Tuple

import requests
from bs4 import BeautifulSoup as bso
from mutagen.flac import FLAC
from mutagen.mp3 import EasyMP3
from pathvalidate import sanitize_filename

import qobuz_dl.spoofbuz as spoofbuz
from qobuz_dl import downloader, qopy
from qobuz_dl.color import CYAN, OFF, RED, YELLOW, DF, RESET
from qobuz_dl.exceptions import NonStreamable
from qobuz_dl.db import create_db, handle_download_id

WEB_URL = "https://play.qobuz.com/"
ARTISTS_SELECTOR = "td.chartlist-artist > a"
TITLE_SELECTOR = "td.chartlist-name > a"
EXTENSIONS = (".mp3", ".flac")
QUALITIES = {5: "5 - MP3", 6: "6 - FLAC", 7: "7 - 24B<96kHz", 27: "27 - 24B>96kHz"}

logger = logging.getLogger(__name__)


class PartialFormatter(string.Formatter):
    def __init__(self, missing="n/a", bad_fmt="n/a"):
        self.missing, self.bad_fmt = missing, bad_fmt

    def get_field(self, field_name, args, kwargs):
        try:
            val = super(PartialFormatter, self).get_field(field_name, args, kwargs)
        except (KeyError, AttributeError):
            val = None, field_name
        return val

    def format_field(self, value, spec):
        if not value:
            return self.missing
        try:
            return super(PartialFormatter, self).format_field(value, spec)
        except ValueError:
            if self.bad_fmt:
                return self.bad_fmt
            raise


class QobuzDL:
    def __init__(
        self,
        directory="Qobuz Downloads",
        quality=6,
        embed_art=False,
        lucky_limit=1,
        lucky_type="album",
        interactive_limit=20,
        ignore_singles_eps=False,
        no_m3u_for_playlists=False,
        quality_fallback=True,
        cover_og_quality=False,
        no_cover=False,
        downloads_db=None,
        folder_format="{artist} - {album} ({year}) [{bit_depth}B-"
        "{sampling_rate}kHz]",
        track_format="{tracknumber}. {tracktitle}",
        smart_discography=False,
    ):
        self.directory = self.create_dir(directory)
        self.quality = quality
        self.embed_art = embed_art
        self.lucky_limit = lucky_limit
        self.lucky_type = lucky_type
        self.interactive_limit = interactive_limit
        self.ignore_singles_eps = ignore_singles_eps
        self.no_m3u_for_playlists = no_m3u_for_playlists
        self.quality_fallback = quality_fallback
        self.cover_og_quality = cover_og_quality
        self.no_cover = no_cover
        self.downloads_db = create_db(downloads_db) if downloads_db else None
        self.folder_format = folder_format
        self.track_format = track_format
        self.smart_discography = smart_discography

    def initialize_client(self, email, pwd, app_id, secrets):
        self.client = qopy.Client(email, pwd, app_id, secrets)
        logger.info(f"{YELLOW}Set quality: {QUALITIES[int(self.quality)]}\n")

    def get_tokens(self):
        spoofer = spoofbuz.Spoofer()
        self.app_id = spoofer.getAppId()
        self.secrets = [
            secret for secret in spoofer.getSecrets().values() if secret
        ]  # avoid empty fields

    def create_dir(self, directory=None):
        fix = os.path.normpath(directory)
        os.makedirs(fix, exist_ok=True)
        return fix

    def get_url_info(self, url: str) -> Tuple[str, str]:
        """Returns the type of the url and the id.

        Compatible with urls of the form:
            https://www.qobuz.com/us-en/{type}/{name}/{id}
            https://open.qobuz.com/{type}/{id}
            https://play.qobuz.com/{type}/{id}
            /us-en/{type}/-/{id}
        """

        r = re.search(
            r"(?:https:\/\/(?:w{3}|open|play)\.qobuz\.com)?(?:\/[a-z]{2}-[a-z]{2})"
            r"?\/(album|artist|track|playlist|label)(?:\/[-\w\d]+)?\/([\w\d]+)",
            url,
        )
        return r.groups()

    def download_from_id(self, item_id, album=True, alt_path=None):
        if handle_download_id(self.downloads_db, item_id, add_id=False):
            logger.info(
                f"{OFF}This release ID ({item_id}) was already downloaded "
                "according to the local database.\nUse the '--no-db' flag "
                "to bypass this."
            )
            return
        try:
            downloader.download_id_by_type(
                self.client,
                item_id,
                alt_path or self.directory,
                str(self.quality),
                album,
                self.embed_art,
                self.ignore_singles_eps,
                self.quality_fallback,
                self.cover_og_quality,
                self.no_cover,
                folder_format=self.folder_format,
                track_format=self.track_format,
            )
            handle_download_id(self.downloads_db, item_id, add_id=True)
        except (requests.exceptions.RequestException, NonStreamable) as e:
            logger.error(f"{RED}Error getting release: {e}. Skipping...")

    def handle_url(self, url):
        possibles = {
            "playlist": {
                "func": self.client.get_plist_meta,
                "iterable_key": "tracks",
            },
            "artist": {
                "func": self.client.get_artist_meta,
                "iterable_key": "albums",
            },
            "label": {
                "func": self.client.get_label_meta,
                "iterable_key": "albums",
            },
            "album": {"album": True, "func": None, "iterable_key": None},
            "track": {"album": False, "func": None, "iterable_key": None},
        }
        try:
            url_type, item_id = self.get_url_info(url)
            type_dict = possibles[url_type]
        except (KeyError, IndexError):
            logger.info(
                f'{RED}Invalid url: "{url}". Use urls from ' "https://play.qobuz.com!"
            )
            return
        if type_dict["func"]:
            content = [item for item in type_dict["func"](item_id)]
            content_name = content[0]["name"]
            logger.info(
                f"{YELLOW}Downloading all the music from {content_name} "
                f"({url_type})!"
            )
            new_path = self.create_dir(
                os.path.join(self.directory, sanitize_filename(content_name))
            )

            # items = [item[type_dict["iterable_key"]]["items"] for item in content][0]
            items = self.smart_discography_filter(
                content, True, True,
            )
            logger.info(f"{YELLOW}{len(items)} downloads in queue")
            for item in items:
                self.download_from_id(
                    item["id"],
                    True if type_dict["iterable_key"] == "albums" else False,
                    new_path,
                )
            if url_type == "playlist":
                self.make_m3u(new_path)
        else:
            self.download_from_id(item_id, type_dict["album"])

    def download_list_of_urls(self, urls):
        if not urls or not isinstance(urls, list):
            logger.info(f"{OFF}Nothing to download")
            return
        for url in urls:
            if "last.fm" in url:
                self.download_lastfm_pl(url)
            elif os.path.isfile(url):
                self.download_from_txt_file(url)
            else:
                self.handle_url(url)

    def download_from_txt_file(self, txt_file):
        with open(txt_file, "r") as txt:
            try:
                urls = [
                    line.replace("\n", "")
                    for line in txt.readlines()
                    if not line.strip().startswith("#")
                ]
            except Exception as e:
                logger.error(f"{RED}Invalid text file: {e}")
                return
            logger.info(
                f"{YELLOW}qobuz-dl will download {len(urls)}"
                f" urls from file: {txt_file}"
            )
            self.download_list_of_urls(urls)

    def lucky_mode(self, query, download=True):
        if len(query) < 3:
            logger.info(f"{RED}Your search query is too short or invalid")
            return

        logger.info(
            f'{YELLOW}Searching {self.lucky_type}s for "{query}".\n'
            f"{YELLOW}qobuz-dl will attempt to download the first "
            f"{self.lucky_limit} results."
        )
        results = self.search_by_type(query, self.lucky_type, self.lucky_limit, True)

        if download:
            self.download_list_of_urls(results)

        return results

    def format_duration(self, duration):
        return time.strftime("%H:%M:%S", time.gmtime(duration))

    def search_by_type(self, query, item_type, limit=10, lucky=False):
        if len(query) < 3:
            logger.info("{RED}Your search query is too short or invalid")
            return

        possibles = {
            "album": {
                "func": self.client.search_albums,
                "album": True,
                "key": "albums",
                "format": "{artist[name]} - {title}",
                "requires_extra": True,
            },
            "artist": {
                "func": self.client.search_artists,
                "album": True,
                "key": "artists",
                "format": "{name} - ({albums_count} releases)",
                "requires_extra": False,
            },
            "track": {
                "func": self.client.search_tracks,
                "album": False,
                "key": "tracks",
                "format": "{performer[name]} - {title}",
                "requires_extra": True,
            },
            "playlist": {
                "func": self.client.search_playlists,
                "album": False,
                "key": "playlists",
                "format": "{name} - ({tracks_count} releases)",
                "requires_extra": False,
            },
        }

        try:
            mode_dict = possibles[item_type]
            results = mode_dict["func"](query, limit)
            iterable = results[mode_dict["key"]]["items"]
            item_list = []
            for i in iterable:
                fmt = PartialFormatter()
                text = fmt.format(mode_dict["format"], **i)
                if mode_dict["requires_extra"]:

                    text = "{} - {} [{}]".format(
                        text,
                        self.format_duration(i["duration"]),
                        "HI-RES" if i["hires_streamable"] else "LOSSLESS",
                    )

                url = "{}{}/{}".format(WEB_URL, item_type, i.get("id", ""))
                item_list.append({"text": text, "url": url} if not lucky else url)
            return item_list
        except (KeyError, IndexError):
            logger.info(f"{RED}Invalid type: {item_type}")
            return

    def interactive(self, download=True):
        try:
            from pick import pick
        except (ImportError, ModuleNotFoundError):
            if os.name == "nt":
                sys.exit(
                    "Please install curses with "
                    '"pip3 install windows-curses" to continue'
                )
            raise

        qualities = [
            {"q_string": "320", "q": 5},
            {"q_string": "Lossless", "q": 6},
            {"q_string": "Hi-res =< 96kHz", "q": 7},
            {"q_string": "Hi-Res > 96 kHz", "q": 27},
        ]

        def get_title_text(option):
            return option.get("text")

        def get_quality_text(option):
            return option.get("q_string")

        try:
            item_types = ["Albums", "Tracks", "Artists", "Playlists"]
            selected_type = pick(item_types, "I'll search for:\n[press Intro]")[0][
                :-1
            ].lower()
            logger.info(f"{YELLOW}Ok, we'll search for " f"{selected_type}s{RESET}")
            final_url_list = []
            while True:
                query = input(
                    f"{CYAN}Enter your search: [Ctrl + c to quit]\n" f"-{DF} "
                )
                logger.info(f"{YELLOW}Searching...{RESET}")
                options = self.search_by_type(
                    query, selected_type, self.interactive_limit
                )
                if not options:
                    logger.info(f"{OFF}Nothing found{RESET}")
                    continue
                title = (
                    f'*** RESULTS FOR "{query.title()}" ***\n\n'
                    "Select [space] the item(s) you want to download "
                    "(one or more)\nPress Ctrl + c to quit\n"
                    "Don't select anything to try another search"
                )
                selected_items = pick(
                    options,
                    title,
                    multiselect=True,
                    min_selection_count=0,
                    options_map_func=get_title_text,
                )
                if len(selected_items) > 0:
                    [final_url_list.append(i[0]["url"]) for i in selected_items]
                    y_n = pick(
                        ["Yes", "No"],
                        "Items were added to queue to be downloaded. "
                        "Keep searching?",
                    )
                    if y_n[0][0] == "N":
                        break
                else:
                    logger.info(f"{YELLOW}Ok, try again...{RESET}")
                    continue
            if final_url_list:
                desc = (
                    "Select [intro] the quality (the quality will "
                    "be automatically\ndowngraded if the selected "
                    "is not found)"
                )
                self.quality = pick(
                    qualities,
                    desc,
                    default_index=1,
                    options_map_func=get_quality_text,
                )[0]["q"]

                if download:
                    self.download_list_of_urls(final_url_list)

                return final_url_list
        except KeyboardInterrupt:
            logger.info(f"{YELLOW}Bye")
            return

    def download_lastfm_pl(self, playlist_url):
        # Apparently, last fm API doesn't have a playlist endpoint. If you
        # find out that it has, please fix this!
        try:
            r = requests.get(playlist_url, timeout=10)
        except requests.exceptions.RequestException as e:
            logger.error(f"{RED}Playlist download failed: {e}")
            return
        soup = bso(r.content, "html.parser")
        artists = [artist.text for artist in soup.select(ARTISTS_SELECTOR)]
        titles = [title.text for title in soup.select(TITLE_SELECTOR)]

        track_list = []
        if len(artists) == len(titles) and artists:
            track_list = [
                artist + " " + title for artist, title in zip(artists, titles)
            ]

        if not track_list:
            logger.info(f"{OFF}Nothing found")
            return

        pl_title = sanitize_filename(soup.select_one("h1").text)
        pl_directory = os.path.join(self.directory, pl_title)
        logger.info(
            f"{YELLOW}Downloading playlist: {pl_title} " f"({len(track_list)} tracks)"
        )

        for i in track_list:
            track_id = self.get_url_info(
                self.search_by_type(i, "track", 1, lucky=True)[0]
            )[1]
            if track_id:
                self.download_from_id(track_id, False, pl_directory)

        self.make_m3u(pl_directory)

    def make_m3u(self, pl_directory):
        if self.no_m3u_for_playlists:
            return

        track_list = ["#EXTM3U"]
        rel_folder = os.path.basename(os.path.normpath(pl_directory))
        pl_name = rel_folder + ".m3u"
        for local, dirs, files in os.walk(pl_directory):
            dirs.sort()
            audio_rel_files = [
                # os.path.abspath(os.path.join(local, file_))
                # os.path.join(rel_folder,
                #              os.path.basename(os.path.normpath(local)),
                #              file_)
                os.path.join(os.path.basename(os.path.normpath(local)), file_)
                for file_ in files
                if os.path.splitext(file_)[-1] in EXTENSIONS
            ]
            audio_files = [
                os.path.abspath(os.path.join(local, file_))
                for file_ in files
                if os.path.splitext(file_)[-1] in EXTENSIONS
            ]
            if not audio_files or len(audio_files) != len(audio_rel_files):
                continue

            for audio_rel_file, audio_file in zip(audio_rel_files, audio_files):
                try:
                    pl_item = (
                        EasyMP3(audio_file)
                        if ".mp3" in audio_file
                        else FLAC(audio_file)
                    )
                    title = pl_item["TITLE"][0]
                    artist = pl_item["ARTIST"][0]
                    length = int(pl_item.info.length)
                    index = "#EXTINF:{}, {} - {}\n{}".format(
                        length, artist, title, audio_rel_file
                    )
                except:  # noqa
                    continue
                track_list.append(index)

        if len(track_list) > 1:
            with open(os.path.join(pl_directory, pl_name), "w") as pl:
                pl.write("\n\n".join(track_list))

    def smart_discography_filter(
        self, contents: list, save_space=False, remove_extras=False
    ) -> list:
        """When downloading some artists' discography, there can be a lot
        of duplicate albums that needlessly use 10's of GB of bandwidth. This
        filters the duplicates.

        Example (Stevie Wonder):
            * ...
            * Songs In The Key of Life [24/192]
            * Songs In The Key of Life [24/96]
            * Songs In The Key of Life [16/44.1]
            * ...

        This function should choose either [24/96] or [24/192].
        It also skips deluxe albums in favor of the originals, picks remasters
        in favor of originals, and removes albums by other artists that just
        feature the requested artist.
        """

        def print_album(a: dict):
            print(
                f"{album['title']} - {album['version']} ({album['maximum_bit_depth']}/{album['maximum_sampling_rate']})"
            )

        def remastered(s: str) -> bool:
            """Case insensitive match to check whether
            an album is remastered.
            """
            if s is None:
                return False
            return re.match(r"(?i)(re)?master(ed)?", s) is not None

        def extra(album: dict) -> bool:
            assert hasattr(album, "__getitem__"), "param must be dict-like"
            if 'version' not in album:
                return False
            return (
                re.findall(
                    r"(?i)(anniversary|deluxe|live|collector|demo)",
                    f"{album['title']} {album['version']}",
                )
                != []
            )

        # remove all albums by other artists
        artist = contents[0]["name"]
        items = [item["albums"]["items"] for item in contents][0]
        artist_f = []  # artist filtered
        for item in items:
            if item["artist"]["name"] == artist:
                artist_f.append(item)

        # use dicts to group duplicate titles together
        titles_f = dict()
        for item in artist_f:
            if (t := item["title"]) not in titles_f:
                titles_f[t] = []
            titles_f[t].append(item)

        # pick desired quality out of duplicates
        # remasters are given preferred status
        quality_f = []
        for albums in titles_f.values():
            # no duplicates for title
            if len(albums) == 1:
                quality_f.append(albums[0])
                continue

            # desired bit depth and sampling rate
            bit_depth = max(a["maximum_bit_depth"] for a in albums)
            # having sampling rate > 44.1kHz is a waste of space
            # https://en.wikipedia.org/wiki/Nyquist–Shannon_sampling_theorem
            # https://en.wikipedia.org/wiki/44,100_Hz#Human_hearing_and_signal_processing
            cmp_func = min if save_space else max
            sampling_rate = cmp_func(
                a["maximum_sampling_rate"]
                for a in albums
                if a["maximum_bit_depth"] == bit_depth
            )
            has_remaster = bool([a for a in albums if remastered(a["version"])])

            # check if album has desired bit depth and sampling rate
            # if there is a remaster in `item`, check if the album is a remaster
            for album in albums:
                if (
                    album["maximum_bit_depth"] == bit_depth
                    and album["maximum_sampling_rate"] == sampling_rate
                ):
                    if not has_remaster:
                        quality_f.append(album)
                    elif remastered(album["version"]):
                        quality_f.append(album)

        if remove_extras:
            final = []
            # this filters those huge albums with outtakes, live performances etc.
            for album in quality_f:
                if not extra(album):
                    final.append(album)
        else:
            final = quality_f

        return final
        # key = lambda a: a["title"]
        # final.sort(key=key)
        # for album in final:
        #     print_album(album)