feat(tl): scrape schema from weba/k sources

This commit is contained in:
alina 🌸 2024-03-07 05:46:04 +03:00
parent 0e8f160871
commit ddc8f1dad7
Signed by: teidesu
SSH key fingerprint: SHA256:uNeCpw6aTSU4aIObXLvHfLkDa82HWH9EiOj9AXOIRpI
8 changed files with 516 additions and 11 deletions

View file

@ -12,6 +12,22 @@
"docs": "typedoc",
"build": "pnpm run -w build-package tl-utils"
},
"exports": {
".": "./src/index.ts",
"./json.js": "./src/json/index.ts"
},
"distOnlyFields": {
"exports": {
".": {
"import": "./dist/esm/index.js",
"require": "./dist/cjs/index.js"
},
"./json.js": {
"import": "./dist/esm/json/index.js",
"require": "./dist/cjs/json/index.js"
}
}
},
"dependencies": {
"crc-32": "1.2.0"
},

View file

@ -9,5 +9,7 @@ import { TlEntry } from './types.js'
* @param entry TL entry
*/
export function computeConstructorIdFromEntry(entry: TlEntry): number {
return CRC32.str(writeTlEntryToString(entry, true)) >>> 0
const str = writeTlEntryToString(entry, true)
return CRC32.str(str) >>> 0
}

View file

@ -0,0 +1,232 @@
import { describe, expect, it } from 'vitest'
import { computeConstructorIdFromEntry } from '../ctor-id.js'
import { TlEntry } from '../types.js'
import { parseTlEntriesFromJson } from './from-json.js'
describe('parseTlEntriesFromJson', () => {
const test = (json: object, expected: TlEntry[], params?: Parameters<typeof parseTlEntriesFromJson>[1]) => {
const entries = parseTlEntriesFromJson(json, params)
expect(entries).toEqual(expected)
for (const entry of entries) {
expect(entry.id).to.equal(computeConstructorIdFromEntry(entry), `ID for ${entry.name}`)
}
}
it('parses simple constructors', () => {
test(
{
constructors: [
{
id: '-1132882121',
predicate: 'boolFalse',
params: [],
type: 'Bool',
},
{
id: '-1720552011',
predicate: 'boolTrue',
params: [],
type: 'Bool',
},
],
methods: [],
},
[
{
arguments: [],
id: 3162085175,
kind: 'class',
name: 'boolFalse',
type: 'Bool',
},
{
arguments: [],
id: 2574415285,
kind: 'class',
name: 'boolTrue',
type: 'Bool',
},
],
{ keepPrimitives: true },
)
})
it('parses simple arguments', () => {
test(
{
constructors: [
{
id: '-122978821',
predicate: 'inputMediaContact',
params: [
{ name: 'phone_number', type: 'string' },
{ name: 'first_name', type: 'string' },
{ name: 'last_name', type: 'string' },
{ name: 'vcard', type: 'string' },
],
type: 'InputMedia',
},
],
methods: [],
},
[
{
arguments: [
{ name: 'phone_number', type: 'string' },
{ name: 'first_name', type: 'string' },
{ name: 'last_name', type: 'string' },
{ name: 'vcard', type: 'string' },
],
id: 4171988475,
kind: 'class',
name: 'inputMediaContact',
type: 'InputMedia',
},
],
)
})
it('parses predicated arguments', () => {
test(
{
constructors: [
{
id: '-1110593856',
predicate: 'inputChatUploadedPhoto',
params: [
{ name: 'flags', type: '#' },
{ name: 'file', type: 'flags.0?InputFile' },
{ name: 'video', type: 'flags.1?InputFile' },
{ name: 'video_start_ts', type: 'flags.2?double' },
{ name: 'video_emoji_markup', type: 'flags.3?VideoSize' },
],
type: 'InputChatPhoto',
},
],
methods: [],
},
[
{
arguments: [
{
name: 'flags',
type: '#',
typeModifiers: undefined,
},
{
name: 'file',
type: 'InputFile',
typeModifiers: {
predicate: 'flags.0',
},
},
{
name: 'video',
type: 'InputFile',
typeModifiers: {
predicate: 'flags.1',
},
},
{
name: 'video_start_ts',
type: 'double',
typeModifiers: {
predicate: 'flags.2',
},
},
{
name: 'video_emoji_markup',
type: 'VideoSize',
typeModifiers: {
predicate: 'flags.3',
},
},
],
id: 3184373440,
kind: 'class',
name: 'inputChatUploadedPhoto',
type: 'InputChatPhoto',
},
],
)
})
it('parses vector arguments', () => {
test(
{
constructors: [],
methods: [
{
id: '1779249670',
method: 'account.unregisterDevice',
params: [
{ name: 'token_type', type: 'int' },
{ name: 'token', type: 'string' },
{ name: 'other_uids', type: 'Vector<long>' },
],
type: 'Bool',
},
{
id: '227648840',
method: 'users.getUsers',
params: [
{
name: 'id',
type: 'Vector<InputUser>',
},
],
type: 'Vector<User>',
},
],
},
[
{
arguments: [
{
name: 'token_type',
type: 'int',
typeModifiers: undefined,
},
{
name: 'token',
type: 'string',
typeModifiers: undefined,
},
{
name: 'other_uids',
type: 'long',
typeModifiers: {
isVector: true,
},
},
],
id: 1779249670,
kind: 'method',
name: 'account.unregisterDevice',
type: 'Bool',
},
{
arguments: [
{
name: 'id',
type: 'InputUser',
typeModifiers: {
isVector: true,
},
},
],
id: 227648840,
kind: 'method',
name: 'users.getUsers',
type: 'User',
typeModifiers: {
isVector: true,
},
},
],
{ parseMethodTypes: true },
)
})
})

View file

@ -0,0 +1,99 @@
import { TL_PRIMITIVES, TlArgument, TlEntry } from '../types.js'
import { parseArgumentType } from '../utils.js'
import { parseTlSchemaFromJson, TlParamJson } from './types.js'
function paramsToArguments(params: TlParamJson[]): TlArgument[] {
return params.map((p) => {
const [type, modifiers] = parseArgumentType(p.type)
return {
name: p.name,
type,
typeModifiers: Object.keys(modifiers).length ? modifiers : undefined,
}
})
}
export function parseTlEntriesFromJson(
json: object,
params?: {
/**
* Prefix to be applied to all types
*/
prefix?: string
/**
* Whether to parse typeModifiers for method return types
*/
parseMethodTypes?: boolean
/**
* Whether to keep primitives
*/
keepPrimitives?: boolean
},
): TlEntry[] {
const { parseMethodTypes, keepPrimitives, prefix = '' } = params ?? {}
const schema = parseTlSchemaFromJson(json)
const ret: TlEntry[] = []
const entries: Record<string, TlEntry> = {}
const unions: Record<string, TlEntry[]> = {}
schema.constructors.forEach((c) => {
if (!keepPrimitives && (c.predicate in TL_PRIMITIVES || c.type in TL_PRIMITIVES)) return
const entry: TlEntry = {
id: Number(c.id) >>> 0,
kind: 'class',
name: prefix + c.predicate,
type: c.type,
arguments: paramsToArguments(c.params),
}
entries[entry.name] = entry
ret.push(entry)
if (c.type in unions) {
unions[c.type].push(entry)
} else {
unions[c.type] = [entry]
}
})
schema.methods.forEach((m) => {
const entry: TlEntry = {
id: Number(m.id) >>> 0,
kind: 'method',
name: prefix + m.method,
type: m.type,
arguments: paramsToArguments(m.params),
}
if (parseMethodTypes) {
const [type, modifiers] = parseArgumentType(entry.type)
entry.type = type
if (Object.keys(modifiers).length) {
entry.typeModifiers = modifiers
}
// since constructors were all already processed, we can put return type ctor id here
if (type in unions && unions[type].length === 1) {
if (!entry.typeModifiers) entry.typeModifiers = {}
entry.typeModifiers.constructorId = unions[type][0].id
} else if (type in entries) {
if (!entry.typeModifiers) entry.typeModifiers = {}
entry.typeModifiers.isBareType = true
entry.typeModifiers.constructorId = entries[type].id
}
}
entries[entry.name] = entry
ret.push(entry)
})
return ret
}

View file

@ -0,0 +1,2 @@
export * from './from-json.js'
export * from './types.js'

View file

@ -0,0 +1,107 @@
function assertObject(obj: object): asserts obj is object {
if (typeof obj !== 'object' || obj === null) {
throw new Error('Expected object')
}
}
interface TypeofToType {
string: string
number: number
boolean: boolean
object: object
}
function assertFieldType<Field extends string, Type extends keyof TypeofToType>(
obj: object,
field: Field,
type: Type,
): asserts obj is { [K in Field]: TypeofToType[Type] } {
// eslint-disable-next-line
const typeof_ = typeof (obj as any)[field]
if (typeof_ !== type) {
throw new Error(`Expected field ${field} to be of type ${type} (got ${typeof_})`)
}
}
export interface TlParamJson {
name: string
type: string
}
export function parseTlParamFromJson(obj: object): TlParamJson {
assertObject(obj)
assertFieldType(obj, 'name', 'string')
assertFieldType(obj, 'type', 'string')
return obj
}
function assertFieldParams(obj: object): asserts obj is { params: TlParamJson[] } {
assertFieldType(obj, 'params', 'object')
if (!Array.isArray(obj.params)) {
throw new Error('Expected field params to be an array')
}
obj.params.forEach(parseTlParamFromJson) // will throw if invalid
}
export interface TlConstructorJson {
id: string
type: string
predicate: string
params: TlParamJson[]
}
export function parseTlConstructorFromJson(obj: object): TlConstructorJson {
assertObject(obj)
assertFieldType(obj, 'id', 'string')
assertFieldType(obj, 'type', 'string')
assertFieldType(obj, 'predicate', 'string')
assertFieldParams(obj)
return obj
}
export interface TlMethodJson {
id: string
type: string
method: string
params: TlParamJson[]
}
export function parseTlMethodFromJson(obj: object): TlMethodJson {
assertObject(obj)
assertFieldType(obj, 'id', 'string')
assertFieldType(obj, 'type', 'string')
assertFieldType(obj, 'method', 'string')
assertFieldParams(obj)
return obj
}
export interface TlSchemaJson {
constructors: TlConstructorJson[]
methods: TlMethodJson[]
}
export function parseTlSchemaFromJson(obj: object): TlSchemaJson {
assertObject(obj)
assertFieldType(obj, 'constructors', 'object')
assertFieldType(obj, 'methods', 'object')
if (!Array.isArray(obj.constructors)) {
throw new Error('Expected field constructors to be an array')
}
if (!Array.isArray(obj.methods)) {
throw new Error('Expected field methods to be an array')
}
obj.constructors.forEach(parseTlConstructorFromJson) // will throw if invalid
obj.methods.forEach(parseTlMethodFromJson) // will throw if invalid
return obj as TlSchemaJson
}

View file

@ -20,6 +20,9 @@ export const TDESKTOP_SCHEMA =
export const TDESKTOP_LAYER =
'https://raw.githubusercontent.com/telegramdesktop/tdesktop/dev/Telegram/SourceFiles/mtproto/scheme/layer.tl'
export const TDLIB_SCHEMA = 'https://raw.githubusercontent.com/tdlib/td/master/td/generate/scheme/telegram_api.tl'
export const WEBK_SCHEMA = 'https://raw.githubusercontent.com/morethanwords/tweb/master/src/scripts/in/schema.json'
export const WEBA_SCHEMA = 'https://raw.githubusercontent.com/Ajaxy/telegram-tt/master/src/lib/gramjs/tl/static/api.tl'
export const WEBA_LAYER = 'https://raw.githubusercontent.com/Ajaxy/telegram-tt/master/src/lib/gramjs/tl/AllTLObjects.js'
export const ESM_PRELUDE = `// This file is auto-generated. Do not edit.
"use strict";

View file

@ -20,6 +20,7 @@ import {
TlFullSchema,
writeTlEntryToString,
} from '@mtcute/tl-utils'
import { parseTlEntriesFromJson } from '@mtcute/tl-utils/json.js'
import {
__dirname,
@ -31,6 +32,9 @@ import {
TDESKTOP_LAYER,
TDESKTOP_SCHEMA,
TDLIB_SCHEMA,
WEBA_LAYER,
WEBA_SCHEMA,
WEBK_SCHEMA,
} from './constants.js'
import { applyDocumentation, fetchDocumentation, getCachedDocumentation } from './documentation.js'
import { packTlSchema, TlPackedSchema, unpackTlSchema } from './schema.js'
@ -101,6 +105,44 @@ async function fetchCoreSchema(domain = CORE_DOMAIN, name = 'Core'): Promise<Sch
}
}
async function fetchWebkSchema(): Promise<Schema> {
const schema = await fetchRetry(WEBK_SCHEMA)
const json = JSON.parse(schema) as {
layer: number
API: object
}
let entries = parseTlEntriesFromJson(json.API, { parseMethodTypes: true })
entries = entries.filter((it) => {
if (it.kind === 'method') {
// json schema doesn't provide info about generics, remove these
return !it.arguments.some((arg) => arg.type === '!X') && it.type !== 'X'
}
return true
})
return {
name: 'WebK',
layer: json.layer,
content: parseFullTlSchema(entries),
}
}
async function fetchWebaSchema(): Promise<Schema> {
const [schema, layerFile] = await Promise.all([fetchRetry(WEBA_SCHEMA), fetchRetry(WEBA_LAYER)])
// const LAYER = 174;
const version = layerFile.match(/^const LAYER = (\d+);$/m)
if (!version) throw new Error('Layer number not found')
return {
name: 'WebA',
layer: parseInt(version[1]),
content: tlToFullSchema(schema),
}
}
function input(rl: readline.Interface, q: string): Promise<string> {
return new Promise((resolve) => rl.question(q, resolve))
}
@ -182,18 +224,20 @@ async function overrideInt53(schema: TlFullSchema): Promise<void> {
async function main() {
console.log('Loading schemas...')
const schemas: Schema[] = [
await fetchTdlibSchema(),
await fetchTdesktopSchema(),
await fetchCoreSchema(),
await fetchCoreSchema(COREFORK_DOMAIN, 'Corefork'),
await fetchCoreSchema(BLOGFORK_DOMAIN, 'Blogfork'),
{
const schemas: Schema[] = await Promise.all([
fetchTdlibSchema(),
fetchTdesktopSchema(),
fetchCoreSchema(),
fetchCoreSchema(COREFORK_DOMAIN, 'Corefork'),
fetchCoreSchema(BLOGFORK_DOMAIN, 'Blogfork'),
fetchWebkSchema(),
fetchWebaSchema(),
readFile(join(__dirname, '../data/custom.tl'), 'utf8').then((tl) => ({
name: 'Custom',
layer: 0, // handled manually
content: tlToFullSchema(await readFile(join(__dirname, '../data/custom.tl'), 'utf8')),
},
]
content: tlToFullSchema(tl),
})),
])
console.log('Available schemas:')
schemas.forEach((schema) =>