diff --git a/scripts/media/soundcloud-dl.ts b/scripts/media/soundcloud-dl.ts index 26168e0..0065ce6 100644 --- a/scripts/media/soundcloud-dl.ts +++ b/scripts/media/soundcloud-dl.ts @@ -1,12 +1,12 @@ import { mkdir, rm, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { ffetchAddons } from '@fuman/fetch' -import { assert, asyncPool, base64 } from '@fuman/utils' +import { assert, asyncPool, base64, sleep } from '@fuman/utils' import { load } from 'cheerio' import Spinnies from 'spinnies' import { ProxyAgent } from 'undici' import { z } from 'zod' -import { $, question } from 'zx' +import { $, ProcessOutput, question } from 'zx' import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts' import { sanitizeFilename } from '../../utils/fs.ts' import { chunks, getEnv } from '../../utils/misc.ts' @@ -23,12 +23,6 @@ const ffetchApi = ffetchBase.extend({ addons: [ ffetchAddons.rateLimitHandler(), ], - rateLimit: { - isRejected(res) { - return res.status === 429 - }, - defaultWaitTime: 10_000, - }, headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', 'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`, @@ -39,10 +33,6 @@ const ffetchHtml = ffetchBase.extend({ headers: { Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`, }, - extra: { - // @ts-expect-error lol fixme - dispatcher: new ProxyAgent('http://127.0.0.1:7891'), - }, }) const ScTrack = z.object({ @@ -50,6 +40,7 @@ const ScTrack = z.object({ kind: z.literal('track'), title: z.string(), duration: z.number(), + description: z.string().nullable(), permalink_url: z.string(), artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(), media: z.object({ @@ -77,8 +68,8 @@ const ScPlaylist = z.object({ title: z.string(), duration: z.number(), permalink_url: z.string(), - genre: z.string(), - description: z.string().nullable(), + genre: z.string().nullish(), + description: z.string().nullish(), track_count: z.number(), user: z.object({ username: z.string(), @@ -89,10 +80,17 @@ const ScPlaylist = z.object({ id: z.number(), kind: z.literal('track'), }), - ])), + ])).default(() => []), }) type ScPlaylist = z.infer +const ScLike = z.object({ + created_at: z.string(), + kind: z.literal('like'), + track: ScTrack.optional(), + playlist: ScPlaylist.optional(), +}) + function extractHydrationData(html: string) { const $ = load(html) const script = $('script:contains(window.__sc_hydration = )') @@ -128,6 +126,8 @@ async function fetchTracksById(trackIds: number[]) { async function downloadTrack(track: ScTrack, opts: { /* download destination (filename without extension) */ destination: string + onRateLimit?: (waitTime: number) => void + onCdnRateLimit?: () => void }) { const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`) const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null @@ -150,6 +150,16 @@ async function downloadTrack(track: ScTrack, opts: { query: { track_authorization: track.track_authorization, }, + rateLimit: { + isRejected(res) { + return res.status === 429 + }, + defaultWaitTime: 60_000, + maxRetries: 10, + onRateLimitExceeded(res, waitTime) { + opts.onRateLimit?.(waitTime) + }, + }, }).parsedJson(z.object({ url: z.string(), })) @@ -207,15 +217,35 @@ async function downloadTrack(track: ScTrack, opts: { `title=${track.title}`, '-metadata', `artist=${track.user.username}`, + '-metadata', + `comment=${track.description ?? ''}`, filename, ) - await $`ffmpeg ${params}`.quiet(true) + while (true) { + try { + await $`ffmpeg ${params}`.quiet(true) + break + } catch (e) { + if (!(e instanceof ProcessOutput)) { + throw e + } + if (e.stderr.includes('429 Too Many Requests')) { + opts.onCdnRateLimit?.() + await sleep(10_000) + continue + } + + throw e + } + } await rm(artworkPath, { force: true }) } -async function downloadPlaylist(playlist: ScPlaylist) { +async function downloadPlaylist(playlist: ScPlaylist, params: { + destination?: string +} = {}) { const tracks: ScTrack[] = [] const tracksToFetch = new Set() const trackIdToPosition = new Map() @@ -246,7 +276,7 @@ async function downloadPlaylist(playlist: ScPlaylist) { spinnies.succeed('fetching') } - const destDir = join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)) + const destDir = params.destination ?? join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)) await mkdir(destDir, { recursive: true }) const posPadSize = Math.ceil(Math.log10(tracks.length)) @@ -258,6 +288,12 @@ async function downloadPlaylist(playlist: ScPlaylist) { spinnies.add(`${track.id}`, { text: filename }) await downloadTrack(track, { destination: join(destDir, filename), + onRateLimit: (wait) => { + spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` }) + }, + onCdnRateLimit: () => { + spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` }) + }, }) spinnies.remove(`${track.id}`) @@ -267,6 +303,87 @@ async function downloadPlaylist(playlist: ScPlaylist) { spinnies.stopAll() } +async function downloadLikes(username: string) { + const spinnies = new Spinnies() + spinnies.add('collect', { text: 'collecting likes...' }) + + const userPage = await ffetchHtml(`/${username}`).text() + const hydrationData = extractHydrationData(userPage) + const user = hydrationData.find(it => it.hydratable === 'user') + if (!user) throw new Error('no user found') + const userData = z.object({ + likes_count: z.number(), + playlist_likes_count: z.number(), + id: z.number(), + }).parse(user.data) + + const tracks: ScTrack[] = [] + const playlists: ScPlaylist[] = [] + const updateSpinner = () => { + const percent = Math.floor((tracks.length + playlists.length) / (userData.likes_count + userData.playlist_likes_count) * 100) + spinnies.update('collect', { + text: `[${percent}%] collecting liked tracks: ${tracks.length}/${userData.likes_count}, playlists: ${playlists.length}/${userData.playlist_likes_count}`, + }) + } + updateSpinner() + + let offset = '0' + while (true) { + const res = await ffetchApi(`/users/${userData.id}/likes`, { + query: { + limit: 100, + offset, + linked_partitioning: '1', + }, + }).parsedJson(z.object({ + collection: z.array(ScLike), + next_href: z.string().nullable(), + })) + + for (const like of res.collection) { + if (like.track) { + tracks.push(like.track) + } else if (like.playlist) { + playlists.push(like.playlist) + } else { + console.warn('unknown like type:', like.created_at) + } + } + + updateSpinner() + + if (!res.next_href) break + offset = new URL(res.next_href).searchParams.get('offset')! + } + + spinnies.succeed('collect', { text: `collected ${tracks.length} tracks and ${playlists.length} playlists` }) + + const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(username)}-likes`) + await mkdir(baseDir, { recursive: true }) + + await asyncPool(tracks, async (track) => { + const filename = `${track.user.username} - ${track.title}` + spinnies.add(`${track.id}`, { text: filename }) + await downloadTrack(track, { + destination: join(baseDir, sanitizeFilename(filename)), + onRateLimit: (wait) => { + spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` }) + }, + onCdnRateLimit: () => { + spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` }) + }, + }) + spinnies.remove(`${track.id}`) + }) + + for (const playlist of playlists) { + console.log('\uDB83\uDCB8 %s', playlist.title) + await downloadPlaylist(playlist, { + destination: join(baseDir, sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)), + }) + } +} + const url = process.argv[2] ?? await question('url > ') if (!url.startsWith('https://soundcloud.com/')) { console.error('url must start with https://soundcloud.com/') @@ -275,9 +392,11 @@ if (!url.startsWith('https://soundcloud.com/')) { if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) { await downloadPlaylist(await fetchPlaylistByUrl(url)) +} else if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/likes/i)) { + await downloadLikes(url.match(/^https:\/\/soundcloud.com\/([a-z0-9-]+)\/likes/i)![1]) } else { const track = await fetchTrackByUrl(url) - const filename = `${track.user.username}-${track.title}` + const filename = `${track.user.username} - ${track.title}` console.log('downloading track:', filename) await downloadTrack(track, { destination: join('assets/soundcloud-dl', sanitizeFilename(filename)), diff --git a/utils/fetch.ts b/utils/fetch.ts index 6a8de33..bd33dee 100644 --- a/utils/fetch.ts +++ b/utils/fetch.ts @@ -6,6 +6,7 @@ import { ffetchZodAdapter } from '@fuman/fetch/zod' import { webReadableToFuman, write } from '@fuman/io' import { nodeWritableToFuman } from '@fuman/node' import { type CheerioAPI, load } from 'cheerio' +import { ProxyAgent } from 'undici' const cheerioAddon: FfetchAddon Promise }> = { response: { @@ -23,6 +24,9 @@ export const ffetch = ffetchBase.extend({ cheerioAddon, toughCookieAddon(), ], + extra: { + dispatcher: process.env.http_proxy ? new ProxyAgent(process.env.http_proxy) : undefined, + } as any, }) export async function downloadStream(stream: ReadableStream, path: string) {