471 lines
15 KiB
TypeScript
471 lines
15 KiB
TypeScript
import Long from 'long'
|
|
|
|
import type { InputText, MessageEntity, TextWithEntities, tl } from '@mtcute/core'
|
|
|
|
const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
|
|
const EMOJI_REGEX = /^tg:\/\/emoji\?id=(-?\d+)/
|
|
|
|
const TAG_BOLD = '**'
|
|
const TAG_ITALIC = '__'
|
|
const TAG_UNDERLINE = '--'
|
|
const TAG_STRIKE = '~~'
|
|
const TAG_SPOILER = '||'
|
|
const TAG_CODE = '`'
|
|
const TAG_PRE = '```'
|
|
|
|
const TO_BE_ESCAPED = /[*_\-~`[\\\]|]/g
|
|
|
|
/**
|
|
* Escape a string to be safely used in Markdown.
|
|
*
|
|
* > **Note**: this function is in most cases not needed, as `md` function
|
|
* > handles all `string`s passed to it automatically as plain text.
|
|
*/
|
|
function escape(str: string): string {
|
|
return str.replace(TO_BE_ESCAPED, (s) => '\\' + s)
|
|
}
|
|
|
|
/**
|
|
* Add Markdown formatting to the text given the plain text and entities contained in it.
|
|
*/
|
|
function unparse(input: InputText): string {
|
|
if (typeof input === 'string') return escape(input)
|
|
|
|
let text = input.text
|
|
const entities = input.entities ?? []
|
|
|
|
// keep track of positions of inserted escape symbols
|
|
const escaped: number[] = []
|
|
text = text.replace(TO_BE_ESCAPED, (s, pos: number) => {
|
|
escaped.push(pos)
|
|
|
|
return '\\' + s
|
|
})
|
|
const hasEscaped = escaped.length > 0
|
|
|
|
type InsertLater = [number, string]
|
|
const insert: InsertLater[] = []
|
|
|
|
for (const entity of entities) {
|
|
const type = entity._
|
|
|
|
let start = entity.offset
|
|
let end = start + entity.length
|
|
|
|
if (start > text.length) continue
|
|
if (start < 0) start = 0
|
|
if (end > text.length) end = text.length
|
|
|
|
if (hasEscaped) {
|
|
// determine number of escape chars since the beginning of the string
|
|
let escapedPos = 0
|
|
|
|
while (escapedPos < escaped.length && escaped[escapedPos] < start) {
|
|
escapedPos += 1
|
|
}
|
|
start += escapedPos
|
|
|
|
while (escapedPos < escaped.length && escaped[escapedPos] <= end) {
|
|
escapedPos += 1
|
|
}
|
|
end += escapedPos
|
|
}
|
|
|
|
let startTag
|
|
let endTag: string
|
|
|
|
switch (type) {
|
|
case 'messageEntityBold':
|
|
startTag = endTag = TAG_BOLD
|
|
break
|
|
case 'messageEntityItalic':
|
|
startTag = endTag = TAG_ITALIC
|
|
break
|
|
case 'messageEntityUnderline':
|
|
startTag = endTag = TAG_UNDERLINE
|
|
break
|
|
case 'messageEntityStrike':
|
|
startTag = endTag = TAG_STRIKE
|
|
break
|
|
case 'messageEntitySpoiler':
|
|
startTag = endTag = TAG_SPOILER
|
|
break
|
|
case 'messageEntityCode':
|
|
startTag = endTag = TAG_CODE
|
|
break
|
|
case 'messageEntityPre':
|
|
startTag = TAG_PRE
|
|
|
|
if (entity.language) {
|
|
startTag += entity.language
|
|
}
|
|
|
|
startTag += '\n'
|
|
endTag = '\n' + TAG_PRE
|
|
break
|
|
case 'messageEntityTextUrl':
|
|
startTag = '['
|
|
endTag = `](${entity.url})`
|
|
break
|
|
case 'messageEntityMentionName':
|
|
startTag = '['
|
|
endTag = `](tg://user?id=${entity.userId})`
|
|
break
|
|
case 'messageEntityCustomEmoji':
|
|
startTag = '['
|
|
endTag = `](tg://emoji?id=${entity.documentId.toString()})`
|
|
break
|
|
default:
|
|
continue
|
|
}
|
|
|
|
insert.push([start, startTag])
|
|
insert.push([end, endTag])
|
|
}
|
|
|
|
// sort by offset desc
|
|
insert.sort((a, b) => b[0] - a[0])
|
|
|
|
for (const [offset, tag] of insert) {
|
|
text = text.substr(0, offset) + tag + text.substr(offset)
|
|
}
|
|
|
|
return text
|
|
}
|
|
|
|
function parse(
|
|
strings: TemplateStringsArray | string,
|
|
...sub: (InputText | MessageEntity | boolean | number | undefined | null)[]
|
|
): TextWithEntities {
|
|
const entities: tl.TypeMessageEntity[] = []
|
|
let result = ''
|
|
|
|
const stacks: Record<string, tl.Mutable<tl.TypeMessageEntity>[]> = {}
|
|
|
|
let insideCode = false
|
|
let insidePre = false
|
|
let insideLink = false
|
|
|
|
function feed(text: string) {
|
|
const len = text.length
|
|
let pos = 0
|
|
|
|
while (pos < len) {
|
|
const c = text[pos]
|
|
|
|
if (c === '\\') {
|
|
result += text[pos + 1]
|
|
pos += 2
|
|
continue
|
|
}
|
|
|
|
if (insideCode) {
|
|
if (c === '`') {
|
|
// we can be certain that we're inside code
|
|
|
|
const ent = stacks.code.pop()!
|
|
ent.length = result.length - ent.offset
|
|
entities.push(ent)
|
|
insideCode = false
|
|
pos += 1
|
|
} else {
|
|
pos += 1
|
|
result += c
|
|
}
|
|
continue
|
|
}
|
|
|
|
if (insidePre) {
|
|
if (c === '`' || (c === '\n' && text[pos + 1] === '`')) {
|
|
if (c === '\n') pos += 1
|
|
|
|
if (text[pos + 1] === '`' && text[pos + 2] === '`') {
|
|
// we can be certain that we're inside pre
|
|
|
|
const ent = stacks.pre.pop()!
|
|
ent.length = result.length - ent.offset
|
|
entities.push(ent)
|
|
insidePre = false
|
|
pos += 3
|
|
continue
|
|
|
|
// closed with single or double backtick
|
|
// i.e. not closed actually! this is totally valid md:
|
|
// ```javascript
|
|
// const a = ``;
|
|
// ```
|
|
// compensate that `pos` change we made earliers
|
|
} else if (c === '\n') {
|
|
pos -= 1
|
|
}
|
|
}
|
|
|
|
pos += 1
|
|
result += c
|
|
continue
|
|
}
|
|
|
|
if (insideLink && c === ']') {
|
|
// we can be certain that we're inside link
|
|
|
|
const ent = stacks.link.pop()!
|
|
|
|
if (text[pos + 1] !== '(') {
|
|
// [link text]
|
|
// ignore this, and add opening [
|
|
result = `${result.substr(0, ent.offset)}[${result.substr(ent.offset)}]`
|
|
pos += 1
|
|
insideLink = false
|
|
continue
|
|
}
|
|
|
|
pos += 2
|
|
let url = ''
|
|
|
|
while (pos < text.length && text[pos] !== ')') {
|
|
url += text[pos++]
|
|
}
|
|
|
|
pos += 1 // )
|
|
|
|
if (pos > text.length) {
|
|
throw new Error('Malformed LINK entity, expected )')
|
|
}
|
|
|
|
if (url.length) {
|
|
ent.length = result.length - ent.offset
|
|
|
|
let m = url.match(MENTION_REGEX)
|
|
|
|
if (m) {
|
|
const userId = parseInt(m[1])
|
|
const accessHash = m[2]
|
|
|
|
if (accessHash) {
|
|
(ent as tl.Mutable<tl.RawInputMessageEntityMentionName>)._ =
|
|
'inputMessageEntityMentionName'
|
|
;(ent as tl.Mutable<tl.RawInputMessageEntityMentionName>).userId = {
|
|
_: 'inputUser',
|
|
userId,
|
|
accessHash: Long.fromString(accessHash, false, 16),
|
|
}
|
|
} else {
|
|
(ent as tl.Mutable<tl.RawMessageEntityMentionName>)._ = 'messageEntityMentionName'
|
|
;(ent as tl.Mutable<tl.RawMessageEntityMentionName>).userId = userId
|
|
}
|
|
} else if ((m = EMOJI_REGEX.exec(url))) {
|
|
(ent as tl.Mutable<tl.RawMessageEntityCustomEmoji>)._ = 'messageEntityCustomEmoji'
|
|
;(ent as tl.Mutable<tl.RawMessageEntityCustomEmoji>).documentId = Long.fromString(m[1])
|
|
} else {
|
|
if (url.match(/^\/\//)) url = 'http:' + url
|
|
;(ent as tl.Mutable<tl.RawMessageEntityTextUrl>)._ = 'messageEntityTextUrl'
|
|
;(ent as tl.Mutable<tl.RawMessageEntityTextUrl>).url = url
|
|
}
|
|
entities.push(ent)
|
|
}
|
|
|
|
insideLink = false
|
|
continue
|
|
}
|
|
|
|
if (c === '[' && !insideLink) {
|
|
pos += 1
|
|
insideLink = true
|
|
if (!('link' in stacks)) stacks.link = []
|
|
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
|
|
stacks.link.push({
|
|
offset: result.length,
|
|
length: 0,
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
} as any) // other fields are added after the second part
|
|
continue
|
|
}
|
|
|
|
if (c === '`') {
|
|
const isPre = text[pos + 1] === '`' && text[pos + 2] === '`'
|
|
|
|
if (isPre) {
|
|
pos += 3
|
|
let language = ''
|
|
|
|
while (pos < text.length && text[pos] !== '\n') {
|
|
language += text[pos++]
|
|
}
|
|
|
|
// newline
|
|
pos += 1
|
|
|
|
if (pos > text.length) {
|
|
throw new Error('Malformed PRE entity, expected LF after ```')
|
|
}
|
|
|
|
if (!('pre' in stacks)) stacks.pre = []
|
|
stacks.pre.push({
|
|
_: 'messageEntityPre',
|
|
offset: result.length,
|
|
length: 0,
|
|
language,
|
|
})
|
|
insidePre = true
|
|
} else {
|
|
pos += 1
|
|
if (!('code' in stacks)) stacks.code = []
|
|
stacks.code.push({
|
|
_: 'messageEntityCode',
|
|
offset: result.length,
|
|
length: 0,
|
|
})
|
|
insideCode = true
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
if (c === text[pos + 1]) {
|
|
// maybe (?) start or end of an entity
|
|
let type: 'Italic' | 'Bold' | 'Underline' | 'Strike' | 'Spoiler' | null = null
|
|
|
|
switch (c) {
|
|
case '_':
|
|
type = 'Italic'
|
|
break
|
|
case '*':
|
|
type = 'Bold'
|
|
break
|
|
case '-':
|
|
type = 'Underline'
|
|
break
|
|
case '~':
|
|
type = 'Strike'
|
|
break
|
|
case '|':
|
|
type = 'Spoiler'
|
|
break
|
|
}
|
|
|
|
if (type) {
|
|
if (!(type in stacks)) stacks[type] = []
|
|
const isBegin = stacks[type].length === 0
|
|
|
|
if (isBegin) {
|
|
stacks[type].push({
|
|
_: `messageEntity${type}`,
|
|
offset: result.length,
|
|
length: 0,
|
|
})
|
|
} else {
|
|
// valid because isBegin is false
|
|
|
|
const ent = stacks[type].pop()!
|
|
ent.length = result.length - ent.offset
|
|
entities.push(ent)
|
|
}
|
|
|
|
pos += 2
|
|
continue
|
|
}
|
|
}
|
|
|
|
if (c === '\n') {
|
|
if (pos !== 0) {
|
|
result += '\n'
|
|
}
|
|
|
|
const nonWhitespace = text.slice(pos + 1).search(/\S/)
|
|
|
|
if (nonWhitespace !== -1) {
|
|
pos += nonWhitespace + 1
|
|
} else {
|
|
pos = len
|
|
result = result.trimEnd()
|
|
}
|
|
continue
|
|
}
|
|
|
|
// nothing matched => normal character
|
|
result += c
|
|
pos += 1
|
|
}
|
|
}
|
|
|
|
if (typeof strings === 'string') strings = [strings] as unknown as TemplateStringsArray
|
|
|
|
sub.forEach((it, idx) => {
|
|
feed(strings[idx])
|
|
|
|
if (typeof it === 'boolean' || !it) return
|
|
|
|
if (typeof it === 'string' || typeof it === 'number') {
|
|
result += it
|
|
} else {
|
|
// TextWithEntities or MessageEntity
|
|
const text = it.text
|
|
const innerEntities = 'raw' in it ? [it.raw] : it.entities
|
|
|
|
const baseOffset = result.length
|
|
result += text
|
|
|
|
if (innerEntities) {
|
|
for (const ent of innerEntities) {
|
|
entities.push({ ...ent, offset: ent.offset + baseOffset })
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
feed(strings[strings.length - 1])
|
|
|
|
for (const [name, stack] of Object.entries(stacks)) {
|
|
if (stack.length) {
|
|
throw new Error(`Unterminated ${name} entity`)
|
|
}
|
|
}
|
|
|
|
return {
|
|
text: result,
|
|
entities,
|
|
}
|
|
}
|
|
|
|
// typedoc doesn't support this yet, so we'll have to do it manually
|
|
// https://github.com/TypeStrong/typedoc/issues/2436
|
|
|
|
export const md: {
|
|
/**
|
|
* Tagged template based Markdown-to-entities parser function
|
|
*
|
|
* Additionally, `md` function has two static methods:
|
|
* - `md.escape` - escape a string to be safely used in Markdown
|
|
* (should not be needed in most cases, as `md` function itself handles all `string`s
|
|
* passed to it automatically as plain text)
|
|
* - `md.unparse` - add Markdown formatting to the text given the plain text and entities contained in it
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* const text = md`**${user.displayName}**`
|
|
* ```
|
|
*/
|
|
(
|
|
strings: TemplateStringsArray,
|
|
...sub: (InputText | MessageEntity | boolean | number | undefined | null)[]
|
|
): TextWithEntities
|
|
/**
|
|
* A variant taking a plain JS string as input
|
|
* and parsing it.
|
|
*
|
|
* Useful for cases when you already have a string
|
|
* (e.g. from some server) and want to parse it.
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* const string = '**hello**'
|
|
* const text = md(string)
|
|
* ```
|
|
*/
|
|
(string: string): TextWithEntities
|
|
escape: typeof escape
|
|
unparse: typeof unparse
|
|
} = Object.assign(parse, {
|
|
escape,
|
|
unparse,
|
|
})
|