import Long from 'long' import type { InputText, MessageEntity, TextWithEntities, tl } from '@mtcute/client' const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/ const EMOJI_REGEX = /^tg:\/\/emoji\?id=(-?\d+)/ const TAG_BOLD = '**' const TAG_ITALIC = '__' const TAG_UNDERLINE = '--' const TAG_STRIKE = '~~' const TAG_SPOILER = '||' const TAG_CODE = '`' const TAG_PRE = '```' const TO_BE_ESCAPED = /[*_\-~`[\\\]|]/g /** * Escape a string to be safely used in Markdown. * * > **Note**: this function is in most cases not needed, as `md` function * > handles all `string`s passed to it automatically as plain text. */ function escape(str: string): string { return str.replace(TO_BE_ESCAPED, (s) => '\\' + s) } /** * Add Markdown formatting to the text given the plain text and entities contained in it. */ function unparse(input: InputText): string { if (typeof input === 'string') return escape(input) let text = input.text const entities = input.entities ?? [] // keep track of positions of inserted escape symbols const escaped: number[] = [] text = text.replace(TO_BE_ESCAPED, (s, pos: number) => { escaped.push(pos) return '\\' + s }) const hasEscaped = escaped.length > 0 type InsertLater = [number, string] const insert: InsertLater[] = [] for (const entity of entities) { const type = entity._ let start = entity.offset let end = start + entity.length if (start > text.length) continue if (start < 0) start = 0 if (end > text.length) end = text.length if (hasEscaped) { // determine number of escape chars since the beginning of the string let escapedPos = 0 while (escapedPos < escaped.length && escaped[escapedPos] < start) { escapedPos += 1 } start += escapedPos while (escapedPos < escaped.length && escaped[escapedPos] <= end) { escapedPos += 1 } end += escapedPos } let startTag let endTag: string switch (type) { case 'messageEntityBold': startTag = endTag = TAG_BOLD break case 'messageEntityItalic': startTag = endTag = TAG_ITALIC break case 'messageEntityUnderline': startTag = endTag = TAG_UNDERLINE break case 'messageEntityStrike': startTag = endTag = TAG_STRIKE break case 'messageEntitySpoiler': startTag = endTag = TAG_SPOILER break case 'messageEntityCode': startTag = endTag = TAG_CODE break case 'messageEntityPre': startTag = TAG_PRE if (entity.language) { startTag += entity.language } startTag += '\n' endTag = '\n' + TAG_PRE break case 'messageEntityTextUrl': startTag = '[' endTag = `](${entity.url})` break case 'messageEntityMentionName': startTag = '[' endTag = `](tg://user?id=${entity.userId})` break case 'messageEntityCustomEmoji': startTag = '[' endTag = `](tg://emoji?id=${entity.documentId.toString()})` break default: continue } insert.push([start, startTag]) insert.push([end, endTag]) } // sort by offset desc insert.sort((a, b) => b[0] - a[0]) for (const [offset, tag] of insert) { text = text.substr(0, offset) + tag + text.substr(offset) } return text } function parse( strings: TemplateStringsArray | string, ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] ): TextWithEntities { const entities: tl.TypeMessageEntity[] = [] let result = '' const stacks: Record[]> = {} let insideCode = false let insidePre = false let insideLink = false function feed(text: string) { const len = text.length let pos = 0 while (pos < len) { const c = text[pos] if (c === '\\') { result += text[pos + 1] pos += 2 continue } if (insideCode) { if (c === '`') { // we can be certain that we're inside code const ent = stacks.code.pop()! ent.length = result.length - ent.offset entities.push(ent) insideCode = false pos += 1 } else { pos += 1 result += c } continue } if (insidePre) { if (c === '`' || (c === '\n' && text[pos + 1] === '`')) { if (c === '\n') pos += 1 if (text[pos + 1] === '`' && text[pos + 2] === '`') { // we can be certain that we're inside pre const ent = stacks.pre.pop()! ent.length = result.length - ent.offset entities.push(ent) insidePre = false pos += 3 continue // closed with single or double backtick // i.e. not closed actually! this is totally valid md: // ```javascript // const a = ``; // ``` // compensate that `pos` change we made earliers } else if (c === '\n') { pos -= 1 } } pos += 1 result += c continue } if (insideLink && c === ']') { // we can be certain that we're inside link const ent = stacks.link.pop()! if (text[pos + 1] !== '(') { // [link text] // ignore this, and add opening [ result = `${result.substr(0, ent.offset)}[${result.substr(ent.offset)}]` pos += 1 insideLink = false continue } pos += 2 let url = '' while (pos < text.length && text[pos] !== ')') { url += text[pos++] } pos += 1 // ) if (pos > text.length) { throw new Error('Malformed LINK entity, expected )') } if (url.length) { ent.length = result.length - ent.offset let m = url.match(MENTION_REGEX) if (m) { const userId = parseInt(m[1]) const accessHash = m[2] if (accessHash) { (ent as tl.Mutable)._ = 'inputMessageEntityMentionName' ;(ent as tl.Mutable).userId = { _: 'inputUser', userId, accessHash: Long.fromString(accessHash, false, 16), } } else { (ent as tl.Mutable)._ = 'messageEntityMentionName' ;(ent as tl.Mutable).userId = userId } } else if ((m = EMOJI_REGEX.exec(url))) { (ent as tl.Mutable)._ = 'messageEntityCustomEmoji' ;(ent as tl.Mutable).documentId = Long.fromString(m[1]) } else { if (url.match(/^\/\//)) url = 'http:' + url ;(ent as tl.Mutable)._ = 'messageEntityTextUrl' ;(ent as tl.Mutable).url = url } entities.push(ent) } insideLink = false continue } if (c === '[' && !insideLink) { pos += 1 insideLink = true if (!('link' in stacks)) stacks.link = [] // eslint-disable-next-line @typescript-eslint/no-unsafe-argument stacks.link.push({ offset: result.length, length: 0, // eslint-disable-next-line @typescript-eslint/no-explicit-any } as any) // other fields are added after the second part continue } if (c === '`') { const isPre = text[pos + 1] === '`' && text[pos + 2] === '`' if (isPre) { pos += 3 let language = '' while (pos < text.length && text[pos] !== '\n') { language += text[pos++] } // newline pos += 1 if (pos > text.length) { throw new Error('Malformed PRE entity, expected LF after ```') } if (!('pre' in stacks)) stacks.pre = [] stacks.pre.push({ _: 'messageEntityPre', offset: result.length, length: 0, language, }) insidePre = true } else { pos += 1 if (!('code' in stacks)) stacks.code = [] stacks.code.push({ _: 'messageEntityCode', offset: result.length, length: 0, }) insideCode = true } continue } if (c === text[pos + 1]) { // maybe (?) start or end of an entity let type: 'Italic' | 'Bold' | 'Underline' | 'Strike' | 'Spoiler' | null = null switch (c) { case '_': type = 'Italic' break case '*': type = 'Bold' break case '-': type = 'Underline' break case '~': type = 'Strike' break case '|': type = 'Spoiler' break } if (type) { if (!(type in stacks)) stacks[type] = [] const isBegin = stacks[type].length === 0 if (isBegin) { stacks[type].push({ _: `messageEntity${type}`, offset: result.length, length: 0, }) } else { // valid because isBegin is false const ent = stacks[type].pop()! ent.length = result.length - ent.offset entities.push(ent) } pos += 2 continue } } if (c === '\n') { if (pos !== 0) { result += '\n' } const nonWhitespace = text.slice(pos + 1).search(/\S/) if (nonWhitespace !== -1) { pos += nonWhitespace + 1 } else { pos = len result = result.trimEnd() } continue } // nothing matched => normal character result += c pos += 1 } } if (typeof strings === 'string') strings = [strings] as unknown as TemplateStringsArray sub.forEach((it, idx) => { feed(strings[idx]) if (typeof it === 'boolean' || !it) return if (typeof it === 'string' || typeof it === 'number') { result += it } else { // TextWithEntities or MessageEntity const text = it.text const innerEntities = 'raw' in it ? [it.raw] : it.entities const baseOffset = result.length result += text if (innerEntities) { for (const ent of innerEntities) { entities.push({ ...ent, offset: ent.offset + baseOffset }) } } } }) feed(strings[strings.length - 1]) for (const [name, stack] of Object.entries(stacks)) { if (stack.length) { throw new Error(`Unterminated ${name} entity`) } } return { text: result, entities, } } // typedoc doesn't support this yet, so we'll have to do it manually // https://github.com/TypeStrong/typedoc/issues/2436 export const md: { /** * Tagged template based Markdown-to-entities parser function * * Additionally, `md` function has two static methods: * - `md.escape` - escape a string to be safely used in Markdown * (should not be needed in most cases, as `md` function itself handles all `string`s * passed to it automatically as plain text) * - `md.unparse` - add Markdown formatting to the text given the plain text and entities contained in it * * @example * ```typescript * const text = md`**${user.displayName}**` * ``` */ ( strings: TemplateStringsArray, ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] ): TextWithEntities /** * A variant taking a plain JS string as input * and parsing it. * * Useful for cases when you already have a string * (e.g. from some server) and want to parse it. * * @example * ```typescript * const string = '**hello**' * const text = md(string) * ``` */ (string: string): TextWithEntities escape: typeof escape unparse: typeof unparse } = Object.assign(parse, { escape, unparse, })