660 lines
21 KiB
JavaScript
660 lines
21 KiB
JavaScript
// Downloads latest .tl schemas from TDesktop repo,
|
|
// fetches documentation from https://core.telegram.org/schema
|
|
// and builds a single .json file from all of that,
|
|
// while also changing default types (they suck) to ts-like
|
|
// disclaimer: code sucks because tl itself sucks :shrug:
|
|
|
|
const fetch = require('node-fetch')
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
const cheerio = require('cheerio')
|
|
const { applyDescriptionsFile } = require('./process-descriptions-yaml')
|
|
const yaml = require('js-yaml')
|
|
const { snakeToCamel, signedInt32ToUnsigned } = require('./common')
|
|
const { asyncPool } = require('eager-async-pool')
|
|
const { mergeSchemas, stringifyType } = require('./merge-schemas')
|
|
const CRC32 = require('crc-32')
|
|
|
|
const SingleRegex = /^(.+?)(?:#([0-f]{1,8}))?(?: \?)?(?: {(.+?:.+?)})? ((?:.+? )*)= (.+);$/
|
|
|
|
const transformIgnoreNamespace = (fn, s) => {
|
|
if (s.includes('.')) {
|
|
let [namespace, name] = s.split('.')
|
|
return namespace + '.' + fn(name)
|
|
}
|
|
return fn(s)
|
|
}
|
|
const normalizeGenerics = (s) => {
|
|
if (!s.includes(' ')) return s
|
|
|
|
let [base, ...args] = s.split(' ')
|
|
let ret = base
|
|
let depth = 0
|
|
args.forEach((arg) => {
|
|
depth += 1
|
|
ret += '<' + arg
|
|
})
|
|
while (depth--) ret += '>'
|
|
return ret
|
|
}
|
|
|
|
// all result types must be different to allow differentiating wire format
|
|
// (we could make int128 = Buffer, int256 = Buffer, but we would have no way to determine which is which after building json)
|
|
const _types = {
|
|
int: 'number',
|
|
long: 'Long',
|
|
int128: 'Int128',
|
|
int256: 'Int256',
|
|
double: 'Double',
|
|
string: 'string',
|
|
bytes: 'Buffer',
|
|
boolFalse: 'false',
|
|
boolTrue: 'true',
|
|
bool: 'boolean',
|
|
Bool: 'boolean',
|
|
true: 'true',
|
|
null: 'null',
|
|
Type: 'any',
|
|
// this will be removed by generate-reader/writer script and replaced with flags calculation
|
|
'#': '$FlagsBitField',
|
|
}
|
|
|
|
// override Long to RawLong type for some mtproto types
|
|
const overrideLongToRawLong = Object.entries({
|
|
mt_bind_auth_key_inner: [
|
|
'nonce',
|
|
'temp_auth_key_id',
|
|
'perm_auth_key_id',
|
|
'temp_session_id',
|
|
],
|
|
mt_bad_server_salt: ['new_server_salt'],
|
|
mt_future_salt: ['salt'],
|
|
mt_destroy_session_ok: ['session_id'],
|
|
mt_destroy_session_none: ['session_id'],
|
|
mt_destroy_session: ['session_id'],
|
|
mt_new_session_created: ['server_salt'],
|
|
}).flatMap(([obj, args]) => args.map((it) => `${obj}#${it}`))
|
|
|
|
function getJSType(typ, argName) {
|
|
if (typ[0] === '!') typ = typ.substr(1)
|
|
if (typ === 'long' && overrideLongToRawLong.includes(argName))
|
|
return 'RawLong'
|
|
if (typ in _types) return _types[typ]
|
|
let m = typ.match(/^[Vv]ector[< ](.+?)[> ]$/)
|
|
if (m) {
|
|
return getJSType(m[1], argName) + '[]'
|
|
}
|
|
return normalizeGenerics(typ)
|
|
}
|
|
|
|
async function convertTlToJson(tlText, tlType, silent = false) {
|
|
if (tlType === 'api') {
|
|
tlText =
|
|
fs.readFileSync(path.join(__dirname, '_prepend.tl'), 'utf8')
|
|
+ '\n---$start-main---\n'
|
|
+ tlText
|
|
+ '\n---$start-append---\n'
|
|
+ fs.readFileSync(path.join(__dirname, 'append.tl'), 'utf8')
|
|
}
|
|
|
|
let lines = tlText.split('\n')
|
|
let pos = 0
|
|
let line = lines[0].trim()
|
|
|
|
const padSize = (lines.length + '').length
|
|
const pad = (i) => {
|
|
const len = (i + '').length
|
|
if (len < padSize) {
|
|
let pre = ''
|
|
for (let i = 0; i < padSize - len; i++) {
|
|
pre += ' '
|
|
}
|
|
return pre + i
|
|
} else return i
|
|
}
|
|
|
|
const state = {
|
|
comment: '',
|
|
annotations: null,
|
|
type: 'class',
|
|
extends: null,
|
|
blankLines: 0,
|
|
stop: false,
|
|
part: 'prepend',
|
|
source: {}
|
|
}
|
|
|
|
const unions = {}
|
|
|
|
let nextLine = () => {
|
|
state.stop = pos === lines.length - 1
|
|
if (state.stop) return
|
|
|
|
line = lines[++pos].trim()
|
|
if (line === '') {
|
|
state.blankLines++
|
|
} else {
|
|
state.blankLines = 0
|
|
}
|
|
if (line && line.startsWith('---functions---')) {
|
|
state.type = 'method'
|
|
return nextLine()
|
|
}
|
|
if (line && line.startsWith('---types---')) {
|
|
state.type = 'class'
|
|
return nextLine()
|
|
}
|
|
if (line && line.startsWith('---$start-')) {
|
|
state.part = line.split('$start-')[1].split('-')[0]
|
|
state.type = 'class'
|
|
return nextLine()
|
|
}
|
|
if (!silent)
|
|
process.stdout.write(
|
|
`[${pad(pos)}/${lines.length}] Processing ${tlType}.tl..\r`
|
|
)
|
|
}
|
|
|
|
const ret = {}
|
|
|
|
function getNamespace(name) {
|
|
if (!ret[name]) {
|
|
ret[name] = {
|
|
classes: [],
|
|
methods: [],
|
|
unions: [],
|
|
}
|
|
}
|
|
return ret[name]
|
|
}
|
|
|
|
if (!silent)
|
|
process.stdout.write(
|
|
`[${pad(pos)}/${lines.length}] Processing ${tlType}.tl..\r`
|
|
)
|
|
|
|
while (!state.stop) {
|
|
if (line === '' || line.startsWith('//')) {
|
|
// skip empty lines and comments
|
|
nextLine()
|
|
continue
|
|
}
|
|
|
|
const match = SingleRegex.exec(line)
|
|
if (!match) {
|
|
console.warn(`Regex failed on:\n${line}`)
|
|
} else {
|
|
let [, fullName, typeId, generics, args, type] = match
|
|
if (fullName in _types || fullName === 'vector') {
|
|
// vector is parsed manually
|
|
nextLine()
|
|
continue
|
|
}
|
|
|
|
if (!typeId) {
|
|
typeId =
|
|
signedInt32ToUnsigned(
|
|
CRC32.str(
|
|
// normalize
|
|
line
|
|
.replace(
|
|
/[{};]|[a-zA-Z0-9_]+:flags\.[0-9]+\?true/g,
|
|
''
|
|
)
|
|
.replace(/[<>]/g, ' ')
|
|
.replace(/ +/g, ' ')
|
|
.trim()
|
|
)
|
|
).toString(16)
|
|
}
|
|
|
|
args = args.trim()
|
|
args =
|
|
args && !args.match(/\[ [a-z]+ ]/i)
|
|
? args.split(' ').map((j) => j.split(':'))
|
|
: []
|
|
|
|
let [namespace, name] = fullName.split('.')
|
|
if (!name) {
|
|
name = namespace
|
|
namespace = '$root'
|
|
}
|
|
|
|
if (state.type === 'class') {
|
|
if (!unions[type]) unions[type] = []
|
|
unions[type].push(
|
|
namespace === '$root' ? name : namespace + '.' + name
|
|
)
|
|
}
|
|
|
|
let r = {
|
|
name: state.type === 'class' ? name : snakeToCamel(name),
|
|
id: parseInt(typeId, 16),
|
|
[state.type === 'class' ? 'type' : 'returns']: getJSType(type),
|
|
arguments: [],
|
|
}
|
|
if (generics) {
|
|
r.generics = generics.split(',').map((it) => {
|
|
let [name, superClass] = it.split(':')
|
|
return { name, super: getJSType(superClass) }
|
|
})
|
|
}
|
|
if (args.length) {
|
|
r.arguments = args.map(([name, typ]) => {
|
|
let [predicate, type] = typ.split('?')
|
|
if (!type) {
|
|
return {
|
|
name: snakeToCamel(name),
|
|
type: getJSType(
|
|
typ,
|
|
tlType === 'mtproto'
|
|
? `mt_${fullName}#${name}`
|
|
: ''
|
|
),
|
|
}
|
|
}
|
|
return {
|
|
name: snakeToCamel(name),
|
|
type: getJSType(
|
|
type,
|
|
tlType === 'mtproto'
|
|
? `mt_${fullName}#${name}`
|
|
: ''
|
|
),
|
|
optional: true,
|
|
predicate,
|
|
}
|
|
})
|
|
}
|
|
|
|
// check for overrides/conflicts
|
|
if (fullName in state.source) {
|
|
// this object was already met
|
|
const source = state.source[fullName]
|
|
const collection = getNamespace(namespace)[
|
|
state.type === 'class' ? 'classes' : 'methods'
|
|
]
|
|
const oldIdx = collection.findIndex(
|
|
(it) => it.name === r.name
|
|
)
|
|
|
|
if (source === state.part) {
|
|
console.log(
|
|
'warn: %s was met >1 times in %s part, was overridden',
|
|
fullName,
|
|
source
|
|
)
|
|
collection.splice(oldIdx, 1)
|
|
} else {
|
|
const former = collection[oldIdx]
|
|
|
|
const latter = r
|
|
|
|
if (former.id === latter.id) {
|
|
console.log(
|
|
'warn: %s was met in %s, then in %s, was overridden',
|
|
fullName,
|
|
source,
|
|
state.part
|
|
)
|
|
collection.splice(oldIdx, 1)
|
|
} else {
|
|
const rl = require('readline').createInterface({
|
|
input: process.stdin,
|
|
output: process.stdout,
|
|
})
|
|
|
|
const input = (q) =>
|
|
new Promise((res) => rl.question(q, res))
|
|
|
|
console.log(
|
|
`!! Conflict on %s !! First met in %s, then in %s.`,
|
|
fullName,
|
|
source,
|
|
state.part
|
|
)
|
|
console.log(
|
|
'Option A (%s): %s',
|
|
source,
|
|
stringifyType(former, namespace)
|
|
)
|
|
console.log(
|
|
'Option B (%s): %s',
|
|
state.part,
|
|
stringifyType(latter, namespace)
|
|
)
|
|
|
|
let keep
|
|
while (true) {
|
|
keep = await input('Which to keep? [A/B] > ')
|
|
keep = keep.toUpperCase()
|
|
|
|
if (keep !== 'A' && keep !== 'B') {
|
|
console.log('Invalid input! Please type A or B')
|
|
continue
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
rl.close()
|
|
|
|
if (keep === 'A') {
|
|
nextLine()
|
|
continue
|
|
} else {
|
|
collection.splice(oldIdx, 1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
state.source[fullName] = state.part
|
|
getNamespace(namespace)[state.type === 'class' ? 'classes' : 'methods'].push(r)
|
|
}
|
|
nextLine()
|
|
}
|
|
|
|
Object.entries(unions).forEach(([type, subtypes]) => {
|
|
let [namespace, name] = type.split('.')
|
|
if (!name) {
|
|
name = namespace
|
|
namespace = '$root'
|
|
}
|
|
|
|
getNamespace(namespace).unions.push({
|
|
type: name,
|
|
subtypes,
|
|
})
|
|
})
|
|
|
|
if (!silent)
|
|
console.log(`[${lines.length}/${lines.length}] Processed ${tlType}.tl`)
|
|
|
|
return ret
|
|
}
|
|
|
|
async function addDocumentation(obj) {
|
|
console.log('[i] Parsing documentation entries')
|
|
// structure: { type: 'class' | 'method' | 'type', name: string, target: object }
|
|
let tasks = []
|
|
|
|
Object.entries(obj).forEach(([namespace, content]) => {
|
|
if (namespace === '$root') namespace = ''
|
|
else namespace += '.'
|
|
content.classes.forEach((cls) =>
|
|
tasks.push({
|
|
type: 'class',
|
|
name: namespace + cls.name,
|
|
target: cls,
|
|
})
|
|
)
|
|
content.methods.forEach((cls) =>
|
|
tasks.push({
|
|
type: 'method',
|
|
name: namespace + cls.name,
|
|
target: cls,
|
|
})
|
|
)
|
|
content.unions.forEach((cls) =>
|
|
tasks.push({
|
|
type: 'union',
|
|
name: namespace + cls.type,
|
|
target: cls,
|
|
})
|
|
)
|
|
})
|
|
|
|
async function parseDocumentation(task) {
|
|
const { type, name, target } = task
|
|
let path = {
|
|
class: 'constructor',
|
|
method: 'method',
|
|
union: 'type',
|
|
}[type]
|
|
|
|
const url = `https://core.telegram.org/${path}/${name}`
|
|
|
|
function normalizeLinks(el) {
|
|
el.find('a').each((i, it) => {
|
|
it = $(it)
|
|
it.attr('href', new URL(it.attr('href'), url).href)
|
|
let href = it.attr('href')
|
|
let m
|
|
if (
|
|
(m = href.match(
|
|
/\/(constructor|method|union)\/([^#?]+)(?:\?|#|$)/
|
|
))
|
|
) {
|
|
let [, type, name] = m
|
|
if (type === 'method')
|
|
name = transformIgnoreNamespace(snakeToCamel, name)
|
|
it.replaceWith(`{@link ${name}}`)
|
|
}
|
|
})
|
|
}
|
|
|
|
let html = await fetch(url, {
|
|
headers: {
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' +
|
|
'Chrome/87.0.4280.88 Safari/537.36',
|
|
},
|
|
}).then((i) => i.text())
|
|
let $ = cheerio.load(html)
|
|
normalizeLinks($('#dev_page_content'))
|
|
|
|
if ($('#dev_page_content').text().includes('has not been saved')) return
|
|
|
|
target.description = $('#dev_page_content')
|
|
.find('p')
|
|
.first()
|
|
.html()
|
|
.trim()
|
|
let parametersTable = $("h3:contains('Parameters')").next()
|
|
parametersTable.find('tr').each((idx, el) => {
|
|
el = $(el)
|
|
let cols = el.find('td')
|
|
if (!cols.length) return // <thead>
|
|
let name = snakeToCamel(cols.first().text().trim())
|
|
let description = cols.last().html().trim()
|
|
|
|
target.arguments.forEach((arg) => {
|
|
if (arg.name === name) arg.description = description
|
|
})
|
|
})
|
|
|
|
if (type === 'method') {
|
|
let errorsTable = $("h3:contains('Possible errors')").next()
|
|
errorsTable.find('tr').each((idx, el) => {
|
|
el = $(el)
|
|
let cols = el.find('td')
|
|
if (!cols.length) return // <thead>
|
|
let code = parseInt($(cols[0]).text())
|
|
let name = $(cols[1]).text()
|
|
let description = $(cols[2]).text()
|
|
|
|
if (!target.throws) target.throws = []
|
|
target.throws.push({ code, name, description })
|
|
})
|
|
|
|
let botsCanUse = !!$("h3:contains('Bots can use this method')")
|
|
.length
|
|
let onlyBotsCanUse =
|
|
botsCanUse &&
|
|
(!!target.description.match(/[,;]( for)? bots only$/) ||
|
|
(target.throws &&
|
|
target.throws.some(
|
|
(it) => it.code === 'USER_BOT_REQUIRED'
|
|
)))
|
|
|
|
target.available = onlyBotsCanUse
|
|
? 'bot'
|
|
: botsCanUse
|
|
? 'both'
|
|
: 'user'
|
|
}
|
|
}
|
|
|
|
let count = 0
|
|
for await (let { idx, error } of asyncPool(parseDocumentation, tasks, {
|
|
limit: 5,
|
|
})) {
|
|
if (error) {
|
|
if (error instanceof fetch.FetchError) {
|
|
console.error(
|
|
'Network error %s while downloading docs for %s %s, retrying',
|
|
error.message,
|
|
tasks[idx].type,
|
|
tasks[idx].name
|
|
)
|
|
tasks.push(tasks[idx])
|
|
} else {
|
|
console.error(
|
|
'Error while downloading docs for %o: %s',
|
|
tasks[idx],
|
|
error
|
|
)
|
|
}
|
|
}
|
|
if (++count % 50 === 0)
|
|
process.stdout.write(`Downloading documentation: ${count} so far\r`)
|
|
}
|
|
}
|
|
|
|
// converts telegram's json to tl
|
|
function convertJsonToTl(json) {
|
|
// their json schema uses signed integers for ids, we use unsigned, so we need to convert them
|
|
|
|
const lines = []
|
|
const objectToLine = (cls) => {
|
|
let line = `${cls.predicate || cls.method}#${signedInt32ToUnsigned(
|
|
parseInt(cls.id)
|
|
).toString(16)}${cls.params
|
|
.map((p) => ` ${p.name}:${p.type}`)
|
|
.join('')} = ${cls.type};`
|
|
lines.push(line)
|
|
}
|
|
|
|
// i honestly have no idea why http_wait is a function in schema.
|
|
// it can't be a function.
|
|
// there's literally no way.
|
|
// and it is a type in tgdesktop schema.
|
|
// (see https://t.me/teispam/998, in russian)
|
|
// durov why
|
|
let httpWait = json.methods.find((it) => it.method === 'http_wait')
|
|
json.methods = json.methods.filter((it) => it.method !== 'http_wait')
|
|
json.constructors.push(httpWait)
|
|
|
|
json.constructors.filter(Boolean).forEach(objectToLine)
|
|
lines.push('---functions---')
|
|
json.methods.filter(Boolean).forEach(objectToLine)
|
|
return lines.join('\n')
|
|
}
|
|
|
|
async function main() {
|
|
const descriptionsYaml = yaml.load(
|
|
await fs.promises.readFile(path.join(__dirname, '../descriptions.yaml'))
|
|
)
|
|
|
|
console.log('[i] Fetching mtproto.tl')
|
|
// using this instead of one in tgdesktop repo because tgdesktop one uses strings instead of bytes in many places
|
|
// idk why, i don't wanna know why, maybe some memes with strings in c++ or smth...
|
|
// seems like in api.tl there's no such thing (hopefully?)
|
|
//
|
|
// and also tl-schema inside the docs is outdated, unlike json (wtf???)
|
|
// so we basically convert their json to tl, just to convert it back to json immediately after that
|
|
// thank you durov
|
|
let mtprotoTl = await fetch('https://core.telegram.org/schema/mtproto-json')
|
|
.then((i) => i.json())
|
|
.then((json) => convertJsonToTl(json))
|
|
let ret = {}
|
|
|
|
ret.mtproto = await convertTlToJson(mtprotoTl, 'mtproto')
|
|
|
|
console.log('[i] Fetching api.tl from tdesktop')
|
|
const apiTlDesktop = await fetch(
|
|
'https://raw.githubusercontent.com/telegramdesktop/tdesktop/dev/Telegram/Resources/tl/api.tl'
|
|
).then((i) => i.text())
|
|
const apiDesktopLayer = parseInt(
|
|
apiTlDesktop.match(/^\/\/ LAYER (\d+)/m)[1]
|
|
)
|
|
|
|
console.log('[i] Fetching telegram_api.tl from TDLib')
|
|
const apiTlTdlib = await fetch(
|
|
'https://raw.githubusercontent.com/tdlib/td/master/td/generate/scheme/telegram_api.tl'
|
|
).then((i) => i.text())
|
|
const apiTdlibLayer = await fetch(
|
|
'https://raw.githubusercontent.com/tdlib/td/master/td/telegram/Version.h'
|
|
)
|
|
.then((r) => r.text())
|
|
.then((res) =>
|
|
parseInt(res.match(/^constexpr int32 MTPROTO_LAYER = (\d+)/m)[1])
|
|
)
|
|
|
|
console.log(
|
|
'[i] tdesktop has layer %d, tdlib has %d',
|
|
apiDesktopLayer,
|
|
apiTdlibLayer
|
|
)
|
|
|
|
if (Math.abs(apiDesktopLayer - apiTdlibLayer) > 2) {
|
|
console.log('[i] Too different layers, using newer one')
|
|
|
|
const newer =
|
|
apiDesktopLayer > apiTdlibLayer ? apiTlDesktop : apiTlTdlib
|
|
const newerLayer =
|
|
apiDesktopLayer > apiTdlibLayer ? apiDesktopLayer : apiTdlibLayer
|
|
|
|
ret.apiLayer = newerLayer + ''
|
|
ret.api = await convertTlToJson(newer, 'api')
|
|
} else {
|
|
console.log('[i] Merging schemas...')
|
|
|
|
const first = await convertTlToJson(apiTlTdlib, 'api')
|
|
const second = await convertTlToJson(apiTlDesktop, 'api')
|
|
|
|
const onConflict =
|
|
apiDesktopLayer === apiTdlibLayer
|
|
? null // manual conflict resolving
|
|
: apiTdlibLayer > apiDesktopLayer // use ctor from newer schema
|
|
? 'A'
|
|
: 'B'
|
|
|
|
await mergeSchemas(first, second, onConflict)
|
|
|
|
ret.apiLayer = apiTdlibLayer + ''
|
|
ret.api = first
|
|
}
|
|
|
|
await addDocumentation(ret.api)
|
|
|
|
await applyDescriptionsFile(ret, descriptionsYaml)
|
|
|
|
await fs.promises.writeFile(
|
|
path.join(__dirname, '../raw-schema.json'),
|
|
JSON.stringify(ret, 4)
|
|
)
|
|
|
|
// update version in README.md
|
|
let readmeMd = await fs.promises.readFile(
|
|
path.join(__dirname, '../README.md'),
|
|
'utf-8'
|
|
)
|
|
readmeMd = readmeMd.replace(
|
|
/^Generated from TL layer \*\*\d+\*\* \(last updated on \d+\.\d+\.\d+\)\.$/m,
|
|
`Generated from TL layer **${
|
|
ret.apiLayer
|
|
}** (last updated on ${new Date().toLocaleDateString('ru')}).`
|
|
)
|
|
await fs.promises.writeFile(path.join(__dirname, '../README.md'), readmeMd)
|
|
}
|
|
|
|
module.exports = {
|
|
convertTlToJson,
|
|
convertJsonToTl,
|
|
}
|
|
|
|
if (require.main === module) {
|
|
main().catch(console.error)
|
|
}
|