chore: update public repo
This commit is contained in:
parent
090a502ece
commit
67a6238632
2 changed files with 142 additions and 19 deletions
|
@ -1,12 +1,12 @@
|
|||
import { mkdir, rm, writeFile } from 'node:fs/promises'
|
||||
import { join } from 'node:path'
|
||||
import { ffetchAddons } from '@fuman/fetch'
|
||||
import { assert, asyncPool, base64 } from '@fuman/utils'
|
||||
import { assert, asyncPool, base64, sleep } from '@fuman/utils'
|
||||
import { load } from 'cheerio'
|
||||
import Spinnies from 'spinnies'
|
||||
import { ProxyAgent } from 'undici'
|
||||
import { z } from 'zod'
|
||||
import { $, question } from 'zx'
|
||||
import { $, ProcessOutput, question } from 'zx'
|
||||
import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts'
|
||||
import { sanitizeFilename } from '../../utils/fs.ts'
|
||||
import { chunks, getEnv } from '../../utils/misc.ts'
|
||||
|
@ -23,12 +23,6 @@ const ffetchApi = ffetchBase.extend({
|
|||
addons: [
|
||||
ffetchAddons.rateLimitHandler(),
|
||||
],
|
||||
rateLimit: {
|
||||
isRejected(res) {
|
||||
return res.status === 429
|
||||
},
|
||||
defaultWaitTime: 10_000,
|
||||
},
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
|
||||
'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`,
|
||||
|
@ -39,10 +33,6 @@ const ffetchHtml = ffetchBase.extend({
|
|||
headers: {
|
||||
Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`,
|
||||
},
|
||||
extra: {
|
||||
// @ts-expect-error lol fixme
|
||||
dispatcher: new ProxyAgent('http://127.0.0.1:7891'),
|
||||
},
|
||||
})
|
||||
|
||||
const ScTrack = z.object({
|
||||
|
@ -50,6 +40,7 @@ const ScTrack = z.object({
|
|||
kind: z.literal('track'),
|
||||
title: z.string(),
|
||||
duration: z.number(),
|
||||
description: z.string().nullable(),
|
||||
permalink_url: z.string(),
|
||||
artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(),
|
||||
media: z.object({
|
||||
|
@ -77,8 +68,8 @@ const ScPlaylist = z.object({
|
|||
title: z.string(),
|
||||
duration: z.number(),
|
||||
permalink_url: z.string(),
|
||||
genre: z.string(),
|
||||
description: z.string().nullable(),
|
||||
genre: z.string().nullish(),
|
||||
description: z.string().nullish(),
|
||||
track_count: z.number(),
|
||||
user: z.object({
|
||||
username: z.string(),
|
||||
|
@ -89,10 +80,17 @@ const ScPlaylist = z.object({
|
|||
id: z.number(),
|
||||
kind: z.literal('track'),
|
||||
}),
|
||||
])),
|
||||
])).default(() => []),
|
||||
})
|
||||
type ScPlaylist = z.infer<typeof ScPlaylist>
|
||||
|
||||
const ScLike = z.object({
|
||||
created_at: z.string(),
|
||||
kind: z.literal('like'),
|
||||
track: ScTrack.optional(),
|
||||
playlist: ScPlaylist.optional(),
|
||||
})
|
||||
|
||||
function extractHydrationData(html: string) {
|
||||
const $ = load(html)
|
||||
const script = $('script:contains(window.__sc_hydration = )')
|
||||
|
@ -128,6 +126,8 @@ async function fetchTracksById(trackIds: number[]) {
|
|||
async function downloadTrack(track: ScTrack, opts: {
|
||||
/* download destination (filename without extension) */
|
||||
destination: string
|
||||
onRateLimit?: (waitTime: number) => void
|
||||
onCdnRateLimit?: () => void
|
||||
}) {
|
||||
const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`)
|
||||
const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null
|
||||
|
@ -150,6 +150,16 @@ async function downloadTrack(track: ScTrack, opts: {
|
|||
query: {
|
||||
track_authorization: track.track_authorization,
|
||||
},
|
||||
rateLimit: {
|
||||
isRejected(res) {
|
||||
return res.status === 429
|
||||
},
|
||||
defaultWaitTime: 60_000,
|
||||
maxRetries: 10,
|
||||
onRateLimitExceeded(res, waitTime) {
|
||||
opts.onRateLimit?.(waitTime)
|
||||
},
|
||||
},
|
||||
}).parsedJson(z.object({
|
||||
url: z.string(),
|
||||
}))
|
||||
|
@ -207,15 +217,35 @@ async function downloadTrack(track: ScTrack, opts: {
|
|||
`title=${track.title}`,
|
||||
'-metadata',
|
||||
`artist=${track.user.username}`,
|
||||
'-metadata',
|
||||
`comment=${track.description ?? ''}`,
|
||||
filename,
|
||||
)
|
||||
|
||||
await $`ffmpeg ${params}`.quiet(true)
|
||||
while (true) {
|
||||
try {
|
||||
await $`ffmpeg ${params}`.quiet(true)
|
||||
break
|
||||
} catch (e) {
|
||||
if (!(e instanceof ProcessOutput)) {
|
||||
throw e
|
||||
}
|
||||
if (e.stderr.includes('429 Too Many Requests')) {
|
||||
opts.onCdnRateLimit?.()
|
||||
await sleep(10_000)
|
||||
continue
|
||||
}
|
||||
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
await rm(artworkPath, { force: true })
|
||||
}
|
||||
|
||||
async function downloadPlaylist(playlist: ScPlaylist) {
|
||||
async function downloadPlaylist(playlist: ScPlaylist, params: {
|
||||
destination?: string
|
||||
} = {}) {
|
||||
const tracks: ScTrack[] = []
|
||||
const tracksToFetch = new Set<number>()
|
||||
const trackIdToPosition = new Map<number, number>()
|
||||
|
@ -246,7 +276,7 @@ async function downloadPlaylist(playlist: ScPlaylist) {
|
|||
spinnies.succeed('fetching')
|
||||
}
|
||||
|
||||
const destDir = join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`))
|
||||
const destDir = params.destination ?? join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`))
|
||||
await mkdir(destDir, { recursive: true })
|
||||
|
||||
const posPadSize = Math.ceil(Math.log10(tracks.length))
|
||||
|
@ -258,6 +288,12 @@ async function downloadPlaylist(playlist: ScPlaylist) {
|
|||
spinnies.add(`${track.id}`, { text: filename })
|
||||
await downloadTrack(track, {
|
||||
destination: join(destDir, filename),
|
||||
onRateLimit: (wait) => {
|
||||
spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
|
||||
},
|
||||
onCdnRateLimit: () => {
|
||||
spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
|
||||
},
|
||||
})
|
||||
|
||||
spinnies.remove(`${track.id}`)
|
||||
|
@ -267,6 +303,87 @@ async function downloadPlaylist(playlist: ScPlaylist) {
|
|||
spinnies.stopAll()
|
||||
}
|
||||
|
||||
async function downloadLikes(username: string) {
|
||||
const spinnies = new Spinnies()
|
||||
spinnies.add('collect', { text: 'collecting likes...' })
|
||||
|
||||
const userPage = await ffetchHtml(`/${username}`).text()
|
||||
const hydrationData = extractHydrationData(userPage)
|
||||
const user = hydrationData.find(it => it.hydratable === 'user')
|
||||
if (!user) throw new Error('no user found')
|
||||
const userData = z.object({
|
||||
likes_count: z.number(),
|
||||
playlist_likes_count: z.number(),
|
||||
id: z.number(),
|
||||
}).parse(user.data)
|
||||
|
||||
const tracks: ScTrack[] = []
|
||||
const playlists: ScPlaylist[] = []
|
||||
const updateSpinner = () => {
|
||||
const percent = Math.floor((tracks.length + playlists.length) / (userData.likes_count + userData.playlist_likes_count) * 100)
|
||||
spinnies.update('collect', {
|
||||
text: `[${percent}%] collecting liked tracks: ${tracks.length}/${userData.likes_count}, playlists: ${playlists.length}/${userData.playlist_likes_count}`,
|
||||
})
|
||||
}
|
||||
updateSpinner()
|
||||
|
||||
let offset = '0'
|
||||
while (true) {
|
||||
const res = await ffetchApi(`/users/${userData.id}/likes`, {
|
||||
query: {
|
||||
limit: 100,
|
||||
offset,
|
||||
linked_partitioning: '1',
|
||||
},
|
||||
}).parsedJson(z.object({
|
||||
collection: z.array(ScLike),
|
||||
next_href: z.string().nullable(),
|
||||
}))
|
||||
|
||||
for (const like of res.collection) {
|
||||
if (like.track) {
|
||||
tracks.push(like.track)
|
||||
} else if (like.playlist) {
|
||||
playlists.push(like.playlist)
|
||||
} else {
|
||||
console.warn('unknown like type:', like.created_at)
|
||||
}
|
||||
}
|
||||
|
||||
updateSpinner()
|
||||
|
||||
if (!res.next_href) break
|
||||
offset = new URL(res.next_href).searchParams.get('offset')!
|
||||
}
|
||||
|
||||
spinnies.succeed('collect', { text: `collected ${tracks.length} tracks and ${playlists.length} playlists` })
|
||||
|
||||
const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(username)}-likes`)
|
||||
await mkdir(baseDir, { recursive: true })
|
||||
|
||||
await asyncPool(tracks, async (track) => {
|
||||
const filename = `${track.user.username} - ${track.title}`
|
||||
spinnies.add(`${track.id}`, { text: filename })
|
||||
await downloadTrack(track, {
|
||||
destination: join(baseDir, sanitizeFilename(filename)),
|
||||
onRateLimit: (wait) => {
|
||||
spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
|
||||
},
|
||||
onCdnRateLimit: () => {
|
||||
spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
|
||||
},
|
||||
})
|
||||
spinnies.remove(`${track.id}`)
|
||||
})
|
||||
|
||||
for (const playlist of playlists) {
|
||||
console.log('\uDB83\uDCB8 %s', playlist.title)
|
||||
await downloadPlaylist(playlist, {
|
||||
destination: join(baseDir, sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const url = process.argv[2] ?? await question('url > ')
|
||||
if (!url.startsWith('https://soundcloud.com/')) {
|
||||
console.error('url must start with https://soundcloud.com/')
|
||||
|
@ -275,9 +392,11 @@ if (!url.startsWith('https://soundcloud.com/')) {
|
|||
|
||||
if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) {
|
||||
await downloadPlaylist(await fetchPlaylistByUrl(url))
|
||||
} else if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/likes/i)) {
|
||||
await downloadLikes(url.match(/^https:\/\/soundcloud.com\/([a-z0-9-]+)\/likes/i)![1])
|
||||
} else {
|
||||
const track = await fetchTrackByUrl(url)
|
||||
const filename = `${track.user.username}-${track.title}`
|
||||
const filename = `${track.user.username} - ${track.title}`
|
||||
console.log('downloading track:', filename)
|
||||
await downloadTrack(track, {
|
||||
destination: join('assets/soundcloud-dl', sanitizeFilename(filename)),
|
||||
|
|
|
@ -6,6 +6,7 @@ import { ffetchZodAdapter } from '@fuman/fetch/zod'
|
|||
import { webReadableToFuman, write } from '@fuman/io'
|
||||
import { nodeWritableToFuman } from '@fuman/node'
|
||||
import { type CheerioAPI, load } from 'cheerio'
|
||||
import { ProxyAgent } from 'undici'
|
||||
|
||||
const cheerioAddon: FfetchAddon<object, { cheerio: () => Promise<CheerioAPI> }> = {
|
||||
response: {
|
||||
|
@ -23,6 +24,9 @@ export const ffetch = ffetchBase.extend({
|
|||
cheerioAddon,
|
||||
toughCookieAddon(),
|
||||
],
|
||||
extra: {
|
||||
dispatcher: process.env.http_proxy ? new ProxyAgent(process.env.http_proxy) : undefined,
|
||||
} as any,
|
||||
})
|
||||
|
||||
export async function downloadStream(stream: ReadableStream, path: string) {
|
||||
|
|
Loading…
Reference in a new issue