chore: update public repo

This commit is contained in:
nano :3 2025-01-18 07:55:22 +00:00
parent 090a502ece
commit 67a6238632
Signed by: desu-bot
SSH key fingerprint: SHA256:I2g/pbGhrslSQ4yRCMEvP3GKI29uNQ01z5xVb7kz/OI
2 changed files with 142 additions and 19 deletions

View file

@ -1,12 +1,12 @@
import { mkdir, rm, writeFile } from 'node:fs/promises' import { mkdir, rm, writeFile } from 'node:fs/promises'
import { join } from 'node:path' import { join } from 'node:path'
import { ffetchAddons } from '@fuman/fetch' import { ffetchAddons } from '@fuman/fetch'
import { assert, asyncPool, base64 } from '@fuman/utils' import { assert, asyncPool, base64, sleep } from '@fuman/utils'
import { load } from 'cheerio' import { load } from 'cheerio'
import Spinnies from 'spinnies' import Spinnies from 'spinnies'
import { ProxyAgent } from 'undici' import { ProxyAgent } from 'undici'
import { z } from 'zod' import { z } from 'zod'
import { $, question } from 'zx' import { $, ProcessOutput, question } from 'zx'
import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts' import { downloadFile, ffetch as ffetchBase } from '../../utils/fetch.ts'
import { sanitizeFilename } from '../../utils/fs.ts' import { sanitizeFilename } from '../../utils/fs.ts'
import { chunks, getEnv } from '../../utils/misc.ts' import { chunks, getEnv } from '../../utils/misc.ts'
@ -23,12 +23,6 @@ const ffetchApi = ffetchBase.extend({
addons: [ addons: [
ffetchAddons.rateLimitHandler(), ffetchAddons.rateLimitHandler(),
], ],
rateLimit: {
isRejected(res) {
return res.status === 429
},
defaultWaitTime: 10_000,
},
headers: { headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`, 'Authorization': `OAuth ${getEnv('SOUNDCLOUD_TOKEN')}`,
@ -39,10 +33,6 @@ const ffetchHtml = ffetchBase.extend({
headers: { headers: {
Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`, Cookie: `oauth_token=${getEnv('SOUNDCLOUD_TOKEN')}`,
}, },
extra: {
// @ts-expect-error lol fixme
dispatcher: new ProxyAgent('http://127.0.0.1:7891'),
},
}) })
const ScTrack = z.object({ const ScTrack = z.object({
@ -50,6 +40,7 @@ const ScTrack = z.object({
kind: z.literal('track'), kind: z.literal('track'),
title: z.string(), title: z.string(),
duration: z.number(), duration: z.number(),
description: z.string().nullable(),
permalink_url: z.string(), permalink_url: z.string(),
artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(), artwork_url: z.string().transform(s => s.replace('-large.jpg', '-t500x500.jpg')).nullable(),
media: z.object({ media: z.object({
@ -77,8 +68,8 @@ const ScPlaylist = z.object({
title: z.string(), title: z.string(),
duration: z.number(), duration: z.number(),
permalink_url: z.string(), permalink_url: z.string(),
genre: z.string(), genre: z.string().nullish(),
description: z.string().nullable(), description: z.string().nullish(),
track_count: z.number(), track_count: z.number(),
user: z.object({ user: z.object({
username: z.string(), username: z.string(),
@ -89,10 +80,17 @@ const ScPlaylist = z.object({
id: z.number(), id: z.number(),
kind: z.literal('track'), kind: z.literal('track'),
}), }),
])), ])).default(() => []),
}) })
type ScPlaylist = z.infer<typeof ScPlaylist> type ScPlaylist = z.infer<typeof ScPlaylist>
const ScLike = z.object({
created_at: z.string(),
kind: z.literal('like'),
track: ScTrack.optional(),
playlist: ScPlaylist.optional(),
})
function extractHydrationData(html: string) { function extractHydrationData(html: string) {
const $ = load(html) const $ = load(html)
const script = $('script:contains(window.__sc_hydration = )') const script = $('script:contains(window.__sc_hydration = )')
@ -128,6 +126,8 @@ async function fetchTracksById(trackIds: number[]) {
async function downloadTrack(track: ScTrack, opts: { async function downloadTrack(track: ScTrack, opts: {
/* download destination (filename without extension) */ /* download destination (filename without extension) */
destination: string destination: string
onRateLimit?: (waitTime: number) => void
onCdnRateLimit?: () => void
}) { }) {
const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`) const artworkPath = join('assets', `sc-tmp-${track.id}.jpg`)
const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null const artworkBytes = track.artwork_url ? new Uint8Array(await ffetchHtml(track.artwork_url).arrayBuffer()) : null
@ -150,6 +150,16 @@ async function downloadTrack(track: ScTrack, opts: {
query: { query: {
track_authorization: track.track_authorization, track_authorization: track.track_authorization,
}, },
rateLimit: {
isRejected(res) {
return res.status === 429
},
defaultWaitTime: 60_000,
maxRetries: 10,
onRateLimitExceeded(res, waitTime) {
opts.onRateLimit?.(waitTime)
},
},
}).parsedJson(z.object({ }).parsedJson(z.object({
url: z.string(), url: z.string(),
})) }))
@ -207,15 +217,35 @@ async function downloadTrack(track: ScTrack, opts: {
`title=${track.title}`, `title=${track.title}`,
'-metadata', '-metadata',
`artist=${track.user.username}`, `artist=${track.user.username}`,
'-metadata',
`comment=${track.description ?? ''}`,
filename, filename,
) )
while (true) {
try {
await $`ffmpeg ${params}`.quiet(true) await $`ffmpeg ${params}`.quiet(true)
break
} catch (e) {
if (!(e instanceof ProcessOutput)) {
throw e
}
if (e.stderr.includes('429 Too Many Requests')) {
opts.onCdnRateLimit?.()
await sleep(10_000)
continue
}
throw e
}
}
await rm(artworkPath, { force: true }) await rm(artworkPath, { force: true })
} }
async function downloadPlaylist(playlist: ScPlaylist) { async function downloadPlaylist(playlist: ScPlaylist, params: {
destination?: string
} = {}) {
const tracks: ScTrack[] = [] const tracks: ScTrack[] = []
const tracksToFetch = new Set<number>() const tracksToFetch = new Set<number>()
const trackIdToPosition = new Map<number, number>() const trackIdToPosition = new Map<number, number>()
@ -246,7 +276,7 @@ async function downloadPlaylist(playlist: ScPlaylist) {
spinnies.succeed('fetching') spinnies.succeed('fetching')
} }
const destDir = join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)) const destDir = params.destination ?? join('assets/soundcloud-dl', sanitizeFilename(`${playlist.user.username} - ${playlist.title}`))
await mkdir(destDir, { recursive: true }) await mkdir(destDir, { recursive: true })
const posPadSize = Math.ceil(Math.log10(tracks.length)) const posPadSize = Math.ceil(Math.log10(tracks.length))
@ -258,6 +288,12 @@ async function downloadPlaylist(playlist: ScPlaylist) {
spinnies.add(`${track.id}`, { text: filename }) spinnies.add(`${track.id}`, { text: filename })
await downloadTrack(track, { await downloadTrack(track, {
destination: join(destDir, filename), destination: join(destDir, filename),
onRateLimit: (wait) => {
spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
},
onCdnRateLimit: () => {
spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
},
}) })
spinnies.remove(`${track.id}`) spinnies.remove(`${track.id}`)
@ -267,6 +303,87 @@ async function downloadPlaylist(playlist: ScPlaylist) {
spinnies.stopAll() spinnies.stopAll()
} }
async function downloadLikes(username: string) {
const spinnies = new Spinnies()
spinnies.add('collect', { text: 'collecting likes...' })
const userPage = await ffetchHtml(`/${username}`).text()
const hydrationData = extractHydrationData(userPage)
const user = hydrationData.find(it => it.hydratable === 'user')
if (!user) throw new Error('no user found')
const userData = z.object({
likes_count: z.number(),
playlist_likes_count: z.number(),
id: z.number(),
}).parse(user.data)
const tracks: ScTrack[] = []
const playlists: ScPlaylist[] = []
const updateSpinner = () => {
const percent = Math.floor((tracks.length + playlists.length) / (userData.likes_count + userData.playlist_likes_count) * 100)
spinnies.update('collect', {
text: `[${percent}%] collecting liked tracks: ${tracks.length}/${userData.likes_count}, playlists: ${playlists.length}/${userData.playlist_likes_count}`,
})
}
updateSpinner()
let offset = '0'
while (true) {
const res = await ffetchApi(`/users/${userData.id}/likes`, {
query: {
limit: 100,
offset,
linked_partitioning: '1',
},
}).parsedJson(z.object({
collection: z.array(ScLike),
next_href: z.string().nullable(),
}))
for (const like of res.collection) {
if (like.track) {
tracks.push(like.track)
} else if (like.playlist) {
playlists.push(like.playlist)
} else {
console.warn('unknown like type:', like.created_at)
}
}
updateSpinner()
if (!res.next_href) break
offset = new URL(res.next_href).searchParams.get('offset')!
}
spinnies.succeed('collect', { text: `collected ${tracks.length} tracks and ${playlists.length} playlists` })
const baseDir = join('assets/soundcloud-dl', `${sanitizeFilename(username)}-likes`)
await mkdir(baseDir, { recursive: true })
await asyncPool(tracks, async (track) => {
const filename = `${track.user.username} - ${track.title}`
spinnies.add(`${track.id}`, { text: filename })
await downloadTrack(track, {
destination: join(baseDir, sanitizeFilename(filename)),
onRateLimit: (wait) => {
spinnies.update(`${track.id}`, { text: `[rate limit ${Math.floor(wait / 1000)}s] ${filename}` })
},
onCdnRateLimit: () => {
spinnies.update(`${track.id}`, { text: `[cdn rate limit] ${filename}` })
},
})
spinnies.remove(`${track.id}`)
})
for (const playlist of playlists) {
console.log('\uDB83\uDCB8 %s', playlist.title)
await downloadPlaylist(playlist, {
destination: join(baseDir, sanitizeFilename(`${playlist.user.username} - ${playlist.title}`)),
})
}
}
const url = process.argv[2] ?? await question('url > ') const url = process.argv[2] ?? await question('url > ')
if (!url.startsWith('https://soundcloud.com/')) { if (!url.startsWith('https://soundcloud.com/')) {
console.error('url must start with https://soundcloud.com/') console.error('url must start with https://soundcloud.com/')
@ -275,6 +392,8 @@ if (!url.startsWith('https://soundcloud.com/')) {
if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) { if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/sets\//i)) {
await downloadPlaylist(await fetchPlaylistByUrl(url)) await downloadPlaylist(await fetchPlaylistByUrl(url))
} else if (url.match(/^https:\/\/soundcloud.com\/[a-z0-9-]+\/likes/i)) {
await downloadLikes(url.match(/^https:\/\/soundcloud.com\/([a-z0-9-]+)\/likes/i)![1])
} else { } else {
const track = await fetchTrackByUrl(url) const track = await fetchTrackByUrl(url)
const filename = `${track.user.username} - ${track.title}` const filename = `${track.user.username} - ${track.title}`

View file

@ -6,6 +6,7 @@ import { ffetchZodAdapter } from '@fuman/fetch/zod'
import { webReadableToFuman, write } from '@fuman/io' import { webReadableToFuman, write } from '@fuman/io'
import { nodeWritableToFuman } from '@fuman/node' import { nodeWritableToFuman } from '@fuman/node'
import { type CheerioAPI, load } from 'cheerio' import { type CheerioAPI, load } from 'cheerio'
import { ProxyAgent } from 'undici'
const cheerioAddon: FfetchAddon<object, { cheerio: () => Promise<CheerioAPI> }> = { const cheerioAddon: FfetchAddon<object, { cheerio: () => Promise<CheerioAPI> }> = {
response: { response: {
@ -23,6 +24,9 @@ export const ffetch = ffetchBase.extend({
cheerioAddon, cheerioAddon,
toughCookieAddon(), toughCookieAddon(),
], ],
extra: {
dispatcher: process.env.http_proxy ? new ProxyAgent(process.env.http_proxy) : undefined,
} as any,
}) })
export async function downloadStream(stream: ReadableStream, path: string) { export async function downloadStream(stream: ReadableStream, path: string) {