2021-07-02 21:28:30 +03:00
|
|
|
import type { IMessageEntityParser, MessageEntity, RawString } from '@mtcute/client'
|
2021-04-08 12:19:38 +03:00
|
|
|
import { tl } from '@mtcute/tl'
|
|
|
|
import { Parser } from 'htmlparser2'
|
|
|
|
import bigInt from 'big-integer'
|
|
|
|
|
2021-04-25 13:34:06 +03:00
|
|
|
const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
|
2021-04-08 12:19:38 +03:00
|
|
|
|
2021-07-02 20:20:29 +03:00
|
|
|
/**
|
|
|
|
* Tagged template based helper for escaping entities in HTML
|
|
|
|
*
|
|
|
|
* @example
|
|
|
|
* ```typescript
|
|
|
|
* const escaped = html`<b>${user.displayName}</b>`
|
|
|
|
* ```
|
|
|
|
*/
|
2021-07-02 21:28:30 +03:00
|
|
|
export function html(strings: TemplateStringsArray, ...sub: (string | RawString)[]): string {
|
2021-07-02 20:20:29 +03:00
|
|
|
let str = ''
|
|
|
|
sub.forEach((it, idx) => {
|
2021-07-02 21:28:30 +03:00
|
|
|
if (typeof it === 'string') it = HtmlMessageEntityParser.escape(it)
|
|
|
|
str += strings[idx] + it
|
2021-07-02 20:20:29 +03:00
|
|
|
})
|
|
|
|
return str + strings[strings.length - 1]
|
|
|
|
}
|
|
|
|
|
2021-04-08 12:19:38 +03:00
|
|
|
export namespace HtmlMessageEntityParser {
|
|
|
|
/**
|
|
|
|
* Syntax highlighter function used in {@link HtmlMessageEntityParser.unparse}
|
|
|
|
*
|
|
|
|
* Must be sync (this might change in the future) and must return valid HTML.
|
|
|
|
*/
|
|
|
|
export type SyntaxHighlighter = (code: string, language: string) => string
|
|
|
|
|
|
|
|
export interface Options {
|
|
|
|
syntaxHighlighter?: SyntaxHighlighter
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* HTML MessageEntity parser.
|
|
|
|
*
|
|
|
|
* This class implements syntax very similar to one available
|
|
|
|
* in the Bot API ([documented here](https://core.telegram.org/bots/api#html-style))
|
|
|
|
* with some slight differences.
|
|
|
|
*/
|
|
|
|
export class HtmlMessageEntityParser implements IMessageEntityParser {
|
|
|
|
name = 'html'
|
|
|
|
|
|
|
|
private readonly _syntaxHighlighter?: HtmlMessageEntityParser.SyntaxHighlighter
|
|
|
|
|
|
|
|
constructor(options?: HtmlMessageEntityParser.Options) {
|
|
|
|
this._syntaxHighlighter = options?.syntaxHighlighter
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Escape the string so it can be safely used inside HTML
|
|
|
|
*
|
|
|
|
* @param str String to be escaped
|
|
|
|
* @param quote Whether `"` (double quote) should be escaped as `"`
|
|
|
|
*/
|
|
|
|
static escape(str: string, quote = false): string {
|
|
|
|
str = str
|
|
|
|
.replace(/&/g, '&')
|
|
|
|
.replace(/</g, '<')
|
|
|
|
.replace(/>/g, '>')
|
|
|
|
if (quote) str = str.replace(/"/g, '"')
|
|
|
|
|
|
|
|
return str
|
|
|
|
}
|
|
|
|
|
|
|
|
parse(text: string): [string, tl.TypeMessageEntity[]] {
|
|
|
|
const stacks: Record<string, tl.Mutable<tl.TypeMessageEntity>[]> = {}
|
|
|
|
const entities: tl.TypeMessageEntity[] = []
|
|
|
|
let plainText = ''
|
|
|
|
|
|
|
|
const parser = new Parser({
|
|
|
|
onopentag(name, attribs) {
|
|
|
|
name = name.toLowerCase()
|
|
|
|
|
|
|
|
let entity: tl.TypeMessageEntity
|
2021-05-12 17:58:45 +03:00
|
|
|
switch (name) {
|
|
|
|
case 'b':
|
|
|
|
case 'strong':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityBold',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
}
|
|
|
|
break
|
|
|
|
case 'i':
|
|
|
|
case 'em':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityItalic',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
}
|
|
|
|
break
|
|
|
|
case 'u':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityUnderline',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
}
|
|
|
|
break
|
|
|
|
case 's':
|
|
|
|
case 'del':
|
|
|
|
case 'strike':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityStrike',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
}
|
|
|
|
break
|
|
|
|
case 'blockquote':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityBlockquote',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
}
|
|
|
|
break
|
|
|
|
case 'code':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityCode',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
}
|
|
|
|
break
|
|
|
|
case 'pre':
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityPre',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
language: attribs.language ?? '',
|
|
|
|
}
|
|
|
|
break
|
2021-06-06 15:20:41 +03:00
|
|
|
case 'a': {
|
2021-06-18 18:06:39 +03:00
|
|
|
let url = attribs.href
|
2021-05-12 17:58:45 +03:00
|
|
|
if (!url) return
|
2021-04-08 12:19:38 +03:00
|
|
|
|
2021-05-12 17:58:45 +03:00
|
|
|
const mention = MENTION_REGEX.exec(url)
|
|
|
|
if (mention) {
|
|
|
|
const accessHash = mention[2]
|
|
|
|
if (accessHash) {
|
|
|
|
entity = {
|
|
|
|
_: 'inputMessageEntityMentionName',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
|
|
|
userId: {
|
|
|
|
_: 'inputUser',
|
|
|
|
userId: parseInt(mention[1]),
|
|
|
|
accessHash: bigInt(accessHash, 16),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
entity = {
|
|
|
|
_: 'messageEntityMentionName',
|
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
2021-04-08 12:19:38 +03:00
|
|
|
userId: parseInt(mention[1]),
|
2021-05-12 17:58:45 +03:00
|
|
|
}
|
2021-04-08 12:19:38 +03:00
|
|
|
}
|
|
|
|
} else {
|
2021-06-18 18:06:39 +03:00
|
|
|
if (url.match(/^\/\//)) url = 'http:' + url
|
|
|
|
|
2021-04-08 12:19:38 +03:00
|
|
|
entity = {
|
2021-05-12 17:58:45 +03:00
|
|
|
_: 'messageEntityTextUrl',
|
2021-04-08 12:19:38 +03:00
|
|
|
offset: plainText.length,
|
|
|
|
length: 0,
|
2021-05-12 17:58:45 +03:00
|
|
|
url,
|
2021-04-08 12:19:38 +03:00
|
|
|
}
|
|
|
|
}
|
2021-05-12 17:58:45 +03:00
|
|
|
break
|
2021-06-06 15:20:41 +03:00
|
|
|
}
|
2021-05-12 17:58:45 +03:00
|
|
|
default:
|
|
|
|
return
|
|
|
|
}
|
2021-04-08 12:19:38 +03:00
|
|
|
|
|
|
|
if (!(name in stacks)) {
|
|
|
|
stacks[name] = []
|
|
|
|
}
|
|
|
|
stacks[name].push(entity)
|
|
|
|
},
|
|
|
|
ontext(data) {
|
|
|
|
for (const ents of Object.values(stacks)) {
|
|
|
|
for (const ent of ents) {
|
|
|
|
ent.length += data.length
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
plainText += data
|
|
|
|
},
|
|
|
|
onclosetag(name: string) {
|
|
|
|
const entity = stacks[name]?.pop()
|
|
|
|
if (!entity) return // unmatched close tag
|
|
|
|
entities.push(entity)
|
|
|
|
},
|
|
|
|
})
|
|
|
|
|
|
|
|
parser.write(text)
|
|
|
|
|
|
|
|
return [plainText, entities]
|
|
|
|
}
|
|
|
|
|
2021-05-11 22:02:00 +03:00
|
|
|
unparse(text: string, entities: ReadonlyArray<MessageEntity>): string {
|
2021-04-08 12:19:38 +03:00
|
|
|
return this._unparse(text, entities)
|
|
|
|
}
|
|
|
|
|
|
|
|
// internal function that uses recursion to correctly process nested & overlapping entities
|
|
|
|
private _unparse(
|
|
|
|
text: string,
|
2021-05-11 22:02:00 +03:00
|
|
|
entities: ReadonlyArray<MessageEntity>,
|
2021-04-08 12:19:38 +03:00
|
|
|
entitiesOffset = 0,
|
|
|
|
offset = 0,
|
|
|
|
length = text.length
|
|
|
|
): string {
|
|
|
|
if (!text) return text
|
|
|
|
if (!entities.length || entities.length === entitiesOffset) {
|
|
|
|
return HtmlMessageEntityParser.escape(text)
|
|
|
|
}
|
|
|
|
|
|
|
|
const end = offset + length
|
|
|
|
|
|
|
|
const html: string[] = []
|
|
|
|
let lastOffset = 0
|
|
|
|
|
|
|
|
for (let i = entitiesOffset; i < entities.length; i++) {
|
|
|
|
const entity = entities[i]
|
|
|
|
if (entity.offset >= end) break
|
|
|
|
|
|
|
|
let entOffset = entity.offset
|
|
|
|
let length = entity.length
|
|
|
|
if (entOffset < 0) {
|
|
|
|
length += entOffset
|
|
|
|
entOffset = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
let relativeOffset = entOffset - offset
|
|
|
|
if (relativeOffset > lastOffset) {
|
|
|
|
// add missing plain text
|
|
|
|
html.push(
|
|
|
|
HtmlMessageEntityParser.escape(
|
|
|
|
text.substring(lastOffset, relativeOffset)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
} else if (relativeOffset < lastOffset) {
|
|
|
|
length -= lastOffset - relativeOffset
|
|
|
|
relativeOffset = lastOffset
|
|
|
|
}
|
|
|
|
|
|
|
|
if (length <= 0 || relativeOffset >= end || relativeOffset < 0)
|
|
|
|
continue
|
|
|
|
|
|
|
|
let skip = false
|
|
|
|
|
|
|
|
const substr = text.substr(relativeOffset, length)
|
|
|
|
if (!substr) continue
|
|
|
|
|
|
|
|
const entityText = this._unparse(
|
|
|
|
substr,
|
|
|
|
entities,
|
|
|
|
i + 1,
|
|
|
|
offset + relativeOffset,
|
|
|
|
length
|
|
|
|
)
|
|
|
|
|
|
|
|
const type = entity.type
|
2021-05-12 17:58:45 +03:00
|
|
|
switch (type) {
|
|
|
|
case 'bold':
|
|
|
|
case 'italic':
|
|
|
|
case 'underline':
|
|
|
|
case 'strikethrough':
|
|
|
|
html.push(`<${type[0]}>${entityText}</${type[0]}>`)
|
|
|
|
break
|
|
|
|
case 'code':
|
|
|
|
case 'pre':
|
|
|
|
case 'blockquote':
|
|
|
|
html.push(
|
|
|
|
`<${type}${
|
|
|
|
type === 'pre' && entity.language
|
|
|
|
? ` language="${entity.language}"`
|
|
|
|
: ''
|
|
|
|
}>${
|
|
|
|
this._syntaxHighlighter
|
|
|
|
? this._syntaxHighlighter(
|
|
|
|
entityText,
|
|
|
|
entity.language!
|
|
|
|
)
|
|
|
|
: entityText
|
|
|
|
}</${type}>`
|
|
|
|
)
|
|
|
|
break
|
|
|
|
case 'email':
|
|
|
|
html.push(
|
|
|
|
`<a href="mailto:${entityText}">${entityText}</a>`
|
|
|
|
)
|
|
|
|
break
|
|
|
|
case 'url':
|
|
|
|
html.push(`<a href="${entityText}">${entityText}</a>`)
|
|
|
|
break
|
|
|
|
case 'text_link':
|
|
|
|
html.push(
|
|
|
|
`<a href="${HtmlMessageEntityParser.escape(
|
|
|
|
entity.url!,
|
|
|
|
true
|
|
|
|
)}">${entityText}</a>`
|
|
|
|
)
|
|
|
|
break
|
|
|
|
case 'text_mention':
|
|
|
|
html.push(
|
|
|
|
`<a href="tg://user?id=${entity.userId!}">${entityText}</a>`
|
|
|
|
)
|
|
|
|
break
|
|
|
|
default:
|
|
|
|
skip = true
|
|
|
|
break
|
|
|
|
}
|
2021-04-08 12:19:38 +03:00
|
|
|
|
|
|
|
lastOffset = relativeOffset + (skip ? 0 : length)
|
|
|
|
}
|
|
|
|
|
|
|
|
html.push(HtmlMessageEntityParser.escape(text.substr(lastOffset)))
|
|
|
|
|
|
|
|
return html.join('')
|
|
|
|
}
|
|
|
|
}
|