import { Parser } from 'htmlparser2' import Long from 'long' import type { InputText, MessageEntity, TextWithEntities, tl } from '@mtcute/client' const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/ /** * Escape a string to be safely used in HTML. * * > **Note**: this function is in most cases not needed, as `html` function * > handles all `string`s passed to it automatically as plain text. */ function escape(str: string, quote = false): string { str = str.replace(/&/g, '&').replace(//g, '>') if (quote) str = str.replace(/"/g, '"') return str } function parse( strings: TemplateStringsArray | string, ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] ): TextWithEntities { const stacks: Record[]> = {} const entities: tl.TypeMessageEntity[] = [] let plainText = '' let pendingText = '' function processPendingText(tagEnd = false) { if (!pendingText.length) return if (!stacks.pre?.length) { pendingText = pendingText.replace(/[^\S\u00A0]+/gs, ' ') if (tagEnd) pendingText = pendingText.trimEnd() if (!plainText.length || plainText.match(/\s$/)) { pendingText = pendingText.trimStart() } } for (const ents of Object.values(stacks)) { for (const ent of ents) { ent.length += pendingText.length } } plainText += pendingText pendingText = '' } const parser = new Parser({ onopentag(name, attribs) { name = name.toLowerCase() processPendingText() // ignore tags inside pre (except pre) if (name !== 'pre' && stacks.pre?.length) return let entity: tl.TypeMessageEntity switch (name) { case 'br': plainText += '\n' return case 'b': case 'strong': entity = { _: 'messageEntityBold', offset: plainText.length, length: 0, } break case 'i': case 'em': entity = { _: 'messageEntityItalic', offset: plainText.length, length: 0, } break case 'u': entity = { _: 'messageEntityUnderline', offset: plainText.length, length: 0, } break case 's': case 'del': case 'strike': entity = { _: 'messageEntityStrike', offset: plainText.length, length: 0, } break case 'blockquote': entity = { _: 'messageEntityBlockquote', offset: plainText.length, length: 0, } break case 'code': entity = { _: 'messageEntityCode', offset: plainText.length, length: 0, } break case 'pre': entity = { _: 'messageEntityPre', offset: plainText.length, length: 0, language: attribs.language ?? '', } break case 'spoiler': case 'tg-spoiler': entity = { _: 'messageEntitySpoiler', offset: plainText.length, length: 0, } break case 'emoji': case 'tg-emoji': { const id = attribs.id || attribs['emoji-id'] if (!id || !id.match(/^-?\d+$/)) return entity = { _: 'messageEntityCustomEmoji', offset: plainText.length, length: 0, documentId: Long.fromString(id), } break } case 'a': { let url = attribs.href if (!url) return const mention = MENTION_REGEX.exec(url) if (mention) { const id = parseInt(mention[1]) const accessHash = mention[2] if (accessHash) { entity = { _: 'inputMessageEntityMentionName', offset: plainText.length, length: 0, userId: { _: 'inputUser', userId: id, accessHash: Long.fromString(accessHash, false, 16), }, } } else { entity = { _: 'messageEntityMentionName', offset: plainText.length, length: 0, userId: id, } } } else { if (url.match(/^\/\//)) url = 'http:' + url entity = { _: 'messageEntityTextUrl', offset: plainText.length, length: 0, url, } } break } default: return } if (!(name in stacks)) { stacks[name] = [] } stacks[name].push(entity) }, onclosetag(name: string) { processPendingText(true) name = name.toLowerCase() // ignore tags inside pre (except pre) if (name !== 'pre' && stacks.pre?.length) return const entity = stacks[name]?.pop() if (!entity) return // unmatched close tag // ignore nested pre-s if (name !== 'pre' || !stacks.pre?.length) { entities.push(entity) } }, ontext(data) { pendingText += data }, }) if (typeof strings === 'string') strings = [strings] as unknown as TemplateStringsArray sub.forEach((it, idx) => { parser.write(strings[idx]) if (typeof it === 'boolean' || !it) return if (typeof it === 'string' || typeof it === 'number') { pendingText += it } else { // TextWithEntities or MessageEntity const text = it.text const innerEntities = 'raw' in it ? [it.raw] : it.entities processPendingText() const baseOffset = plainText.length pendingText += text if (innerEntities) { for (const ent of innerEntities) { entities.push({ ...ent, offset: ent.offset + baseOffset }) } } } }) parser.write(strings[strings.length - 1]) processPendingText(true) return { text: plainText.replace(/\u00A0/g, ' '), entities, } } /** Options passed to `html.unparse` */ export interface HtmlUnparseOptions { /** * Syntax highlighter to use when un-parsing `pre` tags with language */ syntaxHighlighter?: (code: string, language: string) => string } // internal function that uses recursion to correctly process nested & overlapping entities function _unparse( text: string, entities: ReadonlyArray, params: HtmlUnparseOptions, entitiesOffset = 0, offset = 0, length = text.length, ): string { if (!text) return text if (!entities.length || entities.length === entitiesOffset) { return escape(text) .replace(/\n/g, '
') .replace(/ {2,}/g, (match) => { return ' '.repeat(match.length) }) } const end = offset + length const html: string[] = [] let lastOffset = 0 for (let i = entitiesOffset; i < entities.length; i++) { const entity = entities[i] if (entity.offset >= end) break let entOffset = entity.offset let length = entity.length if (entOffset < 0) { length += entOffset entOffset = 0 } let relativeOffset = entOffset - offset if (relativeOffset > lastOffset) { // add missing plain text html.push(escape(text.substring(lastOffset, relativeOffset))) } else if (relativeOffset < lastOffset) { length -= lastOffset - relativeOffset relativeOffset = lastOffset } if (length <= 0 || relativeOffset >= end || relativeOffset < 0) { continue } let skip = false const substr = text.substr(relativeOffset, length) if (!substr) continue const type = entity._ let entityText if (type === 'messageEntityPre') { entityText = substr } else { entityText = _unparse(substr, entities, params, i + 1, offset + relativeOffset, length) } switch (type) { case 'messageEntityBold': case 'messageEntityItalic': case 'messageEntityUnderline': case 'messageEntityStrike': case 'messageEntityCode': case 'messageEntityBlockquote': case 'messageEntitySpoiler': { const tag = ( { messageEntityBold: 'b', messageEntityItalic: 'i', messageEntityUnderline: 'u', messageEntityStrike: 's', messageEntityCode: 'code', messageEntityBlockquote: 'blockquote', messageEntitySpoiler: 'spoiler', } as const )[type] html.push(`<${tag}>${entityText}`) } break case 'messageEntityPre': html.push( `${ params.syntaxHighlighter && entity.language ? params.syntaxHighlighter(entityText, entity.language) : entityText }`, ) break case 'messageEntityEmail': html.push(`${entityText}`) break case 'messageEntityUrl': html.push(`${entityText}`) break case 'messageEntityTextUrl': html.push(`${entityText}`) break case 'messageEntityMentionName': html.push(`${entityText}`) break default: skip = true break } lastOffset = relativeOffset + (skip ? 0 : length) } html.push(escape(text.substr(lastOffset))) return html.join('') } /** * Add HTML formatting to the text given the plain text and entities contained in it. */ function unparse(input: InputText, options?: HtmlUnparseOptions): string { if (typeof input === 'string') { return _unparse(input, [], options ?? {}) } return _unparse(input.text, input.entities ?? [], options ?? {}) } // typedoc doesn't support this yet, so we'll have to do it manually // https://github.com/TypeStrong/typedoc/issues/2436 export const html: { /** * Tagged template based HTML-to-entities parser function * * Additionally, `md` function has two static methods: * - `html.escape` - escape a string to be safely used in HTML * (should not be needed in most cases, as `html` function itself handles all `string`s * passed to it automatically as plain text) * - `html.unparse` - add HTML formatting to the text given the plain text and entities contained in it * * @example * ```typescript * const text = html`${user.displayName}` * ``` */ ( strings: TemplateStringsArray, ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] ): TextWithEntities /** * A variant taking a plain JS string as input * and parsing it. * * Useful for cases when you already have a string * (e.g. from some server) and want to parse it. * * @example * ```typescript * const string = 'hello' * const text = html(string) * ``` */ (string: string): TextWithEntities escape: typeof escape unparse: typeof unparse } = Object.assign(parse, { escape, unparse, })