From 9bffdebe2516389ba2a5be8657ed197b2c3f49ee Mon Sep 17 00:00:00 2001 From: cloudwithax Date: Sat, 4 Oct 2025 00:00:57 -0400 Subject: [PATCH] 2.10.0 --- pomice/__init__.py | 2 +- pomice/applemusic/client.py | 145 ++++++++++++++++++++---- pomice/enums.py | 26 +++-- pomice/pool.py | 33 ++++-- pomice/spotify/client.py | 215 +++++++++++++++++++++++++++++++----- 5 files changed, 359 insertions(+), 62 deletions(-) diff --git a/pomice/__init__.py b/pomice/__init__.py index 9dfe84e..a2ae70c 100644 --- a/pomice/__init__.py +++ b/pomice/__init__.py @@ -20,7 +20,7 @@ if not discord.version_info.major >= 2: "using 'pip install discord.py'", ) -__version__ = "2.9.2" +__version__ = "2.10.0" __title__ = "pomice" __author__ = "cloudwithax" __license__ = "GPL-3.0" diff --git a/pomice/applemusic/client.py b/pomice/applemusic/client.py index 1a82a52..ecda11a 100644 --- a/pomice/applemusic/client.py +++ b/pomice/applemusic/client.py @@ -1,11 +1,14 @@ from __future__ import annotations +import asyncio import base64 import logging import re from datetime import datetime +from typing import AsyncGenerator from typing import Dict from typing import List +from typing import Optional from typing import Union import aiohttp @@ -17,10 +20,10 @@ from .objects import * __all__ = ("Client",) AM_URL_REGEX = re.compile( - r"https?://music.apple.com/(?P[a-zA-Z]{2})/(?Palbum|playlist|song|artist)/(?P.+)/(?P[^?]+)", + r"https?://music\.apple\.com/(?P[a-zA-Z]{2})/(?Palbum|playlist|song|artist)/(?P.+?)/(?P[^/?]+?)(?:/)?(?:\?.*)?$", ) AM_SINGLE_IN_ALBUM_REGEX = re.compile( - r"https?://music.apple.com/(?P[a-zA-Z]{2})/(?Palbum|playlist|song|artist)/(?P.+)/(?P.+)(\?i=)(?P.+)", + r"https?://music\.apple\.com/(?P[a-zA-Z]{2})/(?Palbum|playlist|song|artist)/(?P.+)/(?P[^/?]+)(\?i=)(?P[^&]+)(?:&.*)?$", ) AM_SCRIPT_REGEX = re.compile(r' None: + def __init__(self, *, playlist_concurrency: int = 6) -> None: self.expiry: datetime = datetime(1970, 1, 1) self.token: str = "" self.headers: Dict[str, str] = {} self.session: aiohttp.ClientSession = None # type: ignore self._log = logging.getLogger(__name__) + # Concurrency knob for parallel playlist page retrieval + self._playlist_concurrency = max(1, playlist_concurrency) async def _set_session(self, session: aiohttp.ClientSession) -> None: self.session = session @@ -167,25 +172,127 @@ class Client: "This playlist is empty and therefore cannot be queued.", ) - _next = track_data.get("next") - if _next: - next_page_url = AM_BASE_URL + _next - - while next_page_url is not None: - resp = await self.session.get(next_page_url, headers=self.headers) + # Apple Music uses cursor pagination with 'next'. We'll fetch subsequent pages + # concurrently by first collecting cursors in rolling waves. + next_cursor = track_data.get("next") + semaphore = asyncio.Semaphore(self._playlist_concurrency) + async def fetch_page(url: str) -> List[Song]: + async with semaphore: + resp = await self.session.get(url, headers=self.headers) if resp.status != 200: - raise AppleMusicRequestException( - f"Error while fetching results: {resp.status} {resp.reason}", - ) + if self._log: + self._log.warning( + f"Apple Music page fetch failed {resp.status} {resp.reason} for {url}", + ) + return [] + pj: dict = await resp.json(loads=json.loads) + songs = [Song(track) for track in pj.get("data", [])] + # Return songs; we will look for pj.get('next') in streaming iterator variant + return songs, pj.get("next") # type: ignore - next_data: dict = await resp.json(loads=json.loads) - album_tracks.extend(Song(track) for track in next_data["data"]) + # We'll implement a wave-based approach similar to Spotify but need to follow cursors. + # Because we cannot know all cursors upfront, we'll iteratively fetch waves. + waves: List[List[Song]] = [] + cursors: List[str] = [] + if next_cursor: + cursors.append(next_cursor) - _next = next_data.get("next") - if _next: - next_page_url = AM_BASE_URL + _next - else: - next_page_url = None + # Limit total waves to avoid infinite loops in malformed responses + max_waves = 50 + wave_size = self._playlist_concurrency * 2 + wave_counter = 0 + while cursors and wave_counter < max_waves: + current = cursors[:wave_size] + cursors = cursors[wave_size:] + tasks = [ + fetch_page(AM_BASE_URL + cursor) for cursor in current # type: ignore[arg-type] + ] + results = await asyncio.gather(*tasks, return_exceptions=True) + for res in results: + if isinstance(res, tuple): # (songs, next) + songs, nxt = res + if songs: + waves.append(songs) + if nxt: + cursors.append(nxt) + wave_counter += 1 + + for w in waves: + album_tracks.extend(w) return Playlist(data, album_tracks) + + async def iter_playlist_tracks( + self, + *, + query: str, + batch_size: int = 100, + ) -> AsyncGenerator[List[Song], None]: + """Stream Apple Music playlist tracks in batches. + + Parameters + ---------- + query: str + Apple Music playlist URL. + batch_size: int + Logical grouping size for yielded batches. + """ + if not self.token or datetime.utcnow() > self.expiry: + await self.request_token() + + result = AM_URL_REGEX.match(query) + if not result or result.group("type") != "playlist": + raise InvalidAppleMusicURL("Provided query is not a valid Apple Music playlist URL.") + + country = result.group("country") + playlist_id = result.group("id") + request_url = AM_REQ_URL.format(country=country, type="playlist", id=playlist_id) + resp = await self.session.get(request_url, headers=self.headers) + if resp.status != 200: + raise AppleMusicRequestException( + f"Error while fetching results: {resp.status} {resp.reason}", + ) + data: dict = await resp.json(loads=json.loads) + playlist_data = data["data"][0] + track_data: dict = playlist_data["relationships"]["tracks"] + + first_page_tracks = [Song(track) for track in track_data["data"]] + for i in range(0, len(first_page_tracks), batch_size): + yield first_page_tracks[i : i + batch_size] + + next_cursor = track_data.get("next") + semaphore = asyncio.Semaphore(self._playlist_concurrency) + + async def fetch(cursor: str) -> tuple[List[Song], Optional[str]]: + url = AM_BASE_URL + cursor + async with semaphore: + r = await self.session.get(url, headers=self.headers) + if r.status != 200: + if self._log: + self._log.warning( + f"Skipping Apple Music page due to {r.status} {r.reason}", + ) + return [], None + pj: dict = await r.json(loads=json.loads) + songs = [Song(track) for track in pj.get("data", [])] + return songs, pj.get("next") + + # Rolling waves of fetches following cursor chain + max_waves = 50 + wave_size = self._playlist_concurrency * 2 + waves = 0 + cursors: List[str] = [] + if next_cursor: + cursors.append(next_cursor) + while cursors and waves < max_waves: + current = cursors[:wave_size] + cursors = cursors[wave_size:] + results = await asyncio.gather(*[fetch(c) for c in current]) + for songs, nxt in results: + if songs: + for j in range(0, len(songs), batch_size): + yield songs[j : j + batch_size] + if nxt: + cursors.append(nxt) + waves += 1 diff --git a/pomice/enums.py b/pomice/enums.py index 513b0dc..2098f3a 100644 --- a/pomice/enums.py +++ b/pomice/enums.py @@ -34,6 +34,11 @@ class SearchType(Enum): ytsearch = "ytsearch" ytmsearch = "ytmsearch" scsearch = "scsearch" + other = "other" + + @classmethod + def _missing_(cls, value: object) -> "SearchType": # type: ignore[override] + return cls.other def __str__(self) -> str: return self.value @@ -68,7 +73,7 @@ class TrackType(Enum): OTHER = "other" @classmethod - def _missing_(cls, _: object) -> "TrackType": + def _missing_(cls, value: object) -> "TrackType": # type: ignore[override] return cls.OTHER def __str__(self) -> str: @@ -98,7 +103,7 @@ class PlaylistType(Enum): OTHER = "other" @classmethod - def _missing_(cls, _: object) -> "PlaylistType": + def _missing_(cls, value: object) -> "PlaylistType": # type: ignore[override] return cls.OTHER def __str__(self) -> str: @@ -213,8 +218,12 @@ class URLRegex: """ + # Spotify share links can include query parameters like ?si=XXXX, a trailing slash, + # or an intl locale segment (e.g. /intl-en/). Broaden the regex so we still capture + # the type and id while ignoring extra parameters. This prevents the URL from being + # treated as a generic Lavalink identifier and ensures internal Spotify handling runs. SPOTIFY_URL = re.compile( - r"https?://open.spotify.com/(?Palbum|playlist|track|artist)/(?P[a-zA-Z0-9]+)", + r"https?://open\.spotify\.com/(?:intl-[a-zA-Z-]+/)?(?Palbum|playlist|track|artist)/(?P[a-zA-Z0-9]+)(?:/)?(?:\?.*)?$", ) DISCORD_MP3_URL = re.compile( @@ -235,14 +244,17 @@ class URLRegex: r"(?P