feat(html): big rework, process html similar to browsers
This commit is contained in:
parent
d031388ea2
commit
28baf50958
3 changed files with 223 additions and 76 deletions
|
@ -5,7 +5,7 @@
|
||||||
This package implements formatting syntax based on HTML, similar to the one available in the Bot
|
This package implements formatting syntax based on HTML, similar to the one available in the Bot
|
||||||
API ([documented here](https://core.telegram.org/bots/api#html-style))
|
API ([documented here](https://core.telegram.org/bots/api#html-style))
|
||||||
|
|
||||||
> **NOTE**: The syntax implemented here is not entirely compatible with Bot API _HTML_.
|
> **NOTE**: The syntax implemented here is **incompatible** with Bot API _HTML_.
|
||||||
>
|
>
|
||||||
> Please read [Syntax](#syntax) below for a detailed explanation
|
> Please read [Syntax](#syntax) below for a detailed explanation
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ tg.registerParseMode(new HtmlMessageEntityParser())
|
||||||
|
|
||||||
tg.sendText(
|
tg.sendText(
|
||||||
'me',
|
'me',
|
||||||
html`Hello, <b>me</b>! Updates from the feed:\n${await getUpdatesFromFeed()}`
|
html`Hello, <b>me</b>! Updates from the feed:<br>${await getUpdatesFromFeed()}`
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -30,34 +30,26 @@ tg.sendText(
|
||||||
supports nearly any HTML. However, since the text is still processed in a custom way for Telegram, the supported subset
|
supports nearly any HTML. However, since the text is still processed in a custom way for Telegram, the supported subset
|
||||||
of features is documented below:
|
of features is documented below:
|
||||||
|
|
||||||
## Line breaks
|
## Line breaks and spaces
|
||||||
|
|
||||||
Line breaks are preserved, `<br>` are ignored.
|
Line breaks are **not** preserved, `<br>` is used instead,
|
||||||
|
making the syntax very close to the one used when building web pages.
|
||||||
|
|
||||||
> ⚠️ Warning for **Prettier** users: be aware that Prettier
|
Multiple spaces and indents are collapsed, when you do need multiple spaces use ` ` instead.
|
||||||
> formats tagged template literals with `html` as normal HTML and may add
|
|
||||||
> unwanted line breaks.
|
|
||||||
>
|
|
||||||
> Use `htm` instead (which is just an alias):
|
|
||||||
> ```typescript
|
|
||||||
> import { htm } from '@mtcute/html-parser'
|
|
||||||
>
|
|
||||||
> await msg.answerText(htm`Hello, <b>${msg.sender.username}</b>`)
|
|
||||||
> ```
|
|
||||||
|
|
||||||
## Inline entities
|
## Inline entities
|
||||||
|
|
||||||
Inline entities are entities that are in-line with other text. We support these entities:
|
Inline entities are entities that are in-line with other text. We support these entities:
|
||||||
|
|
||||||
| Name | Code | Result (visual)
|
| Name | Code | Result (visual) |
|
||||||
|---|---|---|
|
|------------------|-------------------------------------------|------------------------------|
|
||||||
| Bold | `<b>text</b>` | **text**
|
| Bold | `<b>text</b>` | **text** |
|
||||||
| Italic | `<b>text</b>` | _text_
|
| Italic | `<b>text</b>` | _text_ |
|
||||||
| Underline | `<u>text</u>` | <u>text</u>
|
| Underline | `<u>text</u>` | <u>text</u> |
|
||||||
| Strikethrough | `<s>text</s>` | ~~text~~
|
| Strikethrough | `<s>text</s>` | ~~text~~ |
|
||||||
| Monospace (code) | `<code>text</code>` | `text`
|
| Monospace (code) | `<code>text</code>` | `text` |
|
||||||
| Text link | `<a href="https://google.com">Google</a>` | [Google](https://google.com)
|
| Text link | `<a href="https://google.com">Google</a>` | [Google](https://google.com) |
|
||||||
| Text mention | `<a href="tg://user?id=1234567">Name</a>` | N/A
|
| Text mention | `<a href="tg://user?id=1234567">Name</a>` | N/A |
|
||||||
|
|
||||||
> **Note**: `<strong>`, `<em>`, `<ins>`, `<strike>`, `<del>` are not supported because they are redundant
|
> **Note**: `<strong>`, `<em>`, `<ins>`, `<strike>`, `<del>` are not supported because they are redundant
|
||||||
|
|
||||||
|
@ -82,10 +74,10 @@ Optionally, language for `<pre>` block can be specified like this:
|
||||||
> However, since syntax highlighting hasn't been implemented in
|
> However, since syntax highlighting hasn't been implemented in
|
||||||
> official Telegram clients, this doesn't really matter 🤷♀️
|
> official Telegram clients, this doesn't really matter 🤷♀️
|
||||||
|
|
||||||
| Code | Result (visual)
|
| Code | Result (visual) |
|
||||||
|---|---|
|
|-------------------------------------------------------------------------------------|------------------------------|
|
||||||
| <pre><pre>multiline\ntext</pre></pre> | <pre>multiline<br>text</pre>
|
| <pre><pre>multiline\ntext</pre></pre> | <pre>multiline<br>text</pre> |
|
||||||
| <pre><pre language="javascript"><br> export default 42<br></pre></pre> | <pre>export default 42</pre>
|
| <pre><pre language="javascript"><br> export default 42<br></pre></pre> | <pre>export default 42</pre> |
|
||||||
|
|
||||||
## Nested and overlapped entities
|
## Nested and overlapped entities
|
||||||
|
|
||||||
|
@ -94,12 +86,11 @@ as expected!
|
||||||
|
|
||||||
Overlapping entities are supported in `unparse()`, though.
|
Overlapping entities are supported in `unparse()`, though.
|
||||||
|
|
||||||
| Code | Result (visual)
|
| Code | Result (visual) |
|
||||||
|---|---|
|
|---------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------|
|
||||||
| `<b>Welcome back, <i>User</i>!</b>` | **Welcome back, _User_!**
|
| `<b>Welcome back, <i>User</i>!</b>` | **Welcome back, _User_!** |
|
||||||
| `<b>bold <i>and</b> italic</i>` | **bold _and_** italic<br>⚠️ <i>word "italic" is not actually italic!</i>
|
| `<b>bold <i>and</b> italic</i>` | **bold _and_** italic<br>⚠️ <i>word "italic" is not actually italic!</i> |
|
||||||
| `<b>bold <i>and</i></b><i> italic</i>`<br>⚠️ <i>this is how <code>unparse()</code> handles overlapping entities</i> | **
|
| `<b>bold <i>and</i></b><i> italic</i>`<br>⚠️ <i>this is how <code>unparse()</code> handles overlapping entities</i> | **bold _and_** _italic_ |
|
||||||
bold _and_** _italic_
|
|
||||||
|
|
||||||
## Escaping
|
## Escaping
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,8 @@ import { tl } from '@mtcute/tl'
|
||||||
import { Parser } from 'htmlparser2'
|
import { Parser } from 'htmlparser2'
|
||||||
import Long from 'long'
|
import Long from 'long'
|
||||||
|
|
||||||
const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
|
const MENTION_REGEX =
|
||||||
|
/^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tagged template based helper for escaping entities in HTML
|
* Tagged template based helper for escaping entities in HTML
|
||||||
|
@ -35,28 +36,6 @@ export function html(
|
||||||
return { value: str + strings[strings.length - 1], mode: 'html' }
|
return { value: str + strings[strings.length - 1], mode: 'html' }
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Alias for {@link html} for Prettier users.
|
|
||||||
*
|
|
||||||
* Prettier formats <code>html`...`</code> as normal HTML,
|
|
||||||
* thus may add unwanted line breaks.
|
|
||||||
*/
|
|
||||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
||||||
// @ts-ignore
|
|
||||||
export declare function htm(
|
|
||||||
strings: TemplateStringsArray,
|
|
||||||
...sub: (string | FormattedString)[]
|
|
||||||
): FormattedString
|
|
||||||
|
|
||||||
/** @internal */
|
|
||||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
||||||
// @ts-ignore
|
|
||||||
export const htm = html
|
|
||||||
|
|
||||||
// ts ignores above are a hack so the resulting d.ts contains `htm`
|
|
||||||
// as a function and not a variable, thus the ide would highlight
|
|
||||||
// it as such (the same way as `html`)
|
|
||||||
|
|
||||||
export namespace HtmlMessageEntityParser {
|
export namespace HtmlMessageEntityParser {
|
||||||
/**
|
/**
|
||||||
* Syntax highlighter function used in {@link HtmlMessageEntityParser.unparse}
|
* Syntax highlighter function used in {@link HtmlMessageEntityParser.unparse}
|
||||||
|
@ -106,13 +85,45 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
|
||||||
const stacks: Record<string, tl.Mutable<tl.TypeMessageEntity>[]> = {}
|
const stacks: Record<string, tl.Mutable<tl.TypeMessageEntity>[]> = {}
|
||||||
const entities: tl.TypeMessageEntity[] = []
|
const entities: tl.TypeMessageEntity[] = []
|
||||||
let plainText = ''
|
let plainText = ''
|
||||||
|
let pendingText = ''
|
||||||
|
|
||||||
|
function processPendingText(tagEnd = false) {
|
||||||
|
if (!pendingText.length) return
|
||||||
|
|
||||||
|
if (!stacks.pre?.length) {
|
||||||
|
pendingText = pendingText.replace(/[^\S\u00A0]+/gs, ' ')
|
||||||
|
|
||||||
|
if (tagEnd) pendingText = pendingText.trimEnd()
|
||||||
|
|
||||||
|
if (!plainText.length || plainText.match(/\s$/)) {
|
||||||
|
pendingText = pendingText.trimStart()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const ents of Object.values(stacks)) {
|
||||||
|
for (const ent of ents) {
|
||||||
|
ent.length += pendingText.length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
plainText += pendingText
|
||||||
|
pendingText = ''
|
||||||
|
}
|
||||||
|
|
||||||
const parser = new Parser({
|
const parser = new Parser({
|
||||||
onopentag(name, attribs) {
|
onopentag(name, attribs) {
|
||||||
name = name.toLowerCase()
|
name = name.toLowerCase()
|
||||||
|
|
||||||
|
processPendingText()
|
||||||
|
|
||||||
|
// ignore tags inside pre (except pre)
|
||||||
|
if (name !== 'pre' && stacks.pre?.length) return
|
||||||
|
|
||||||
let entity: tl.TypeMessageEntity
|
let entity: tl.TypeMessageEntity
|
||||||
switch (name) {
|
switch (name) {
|
||||||
|
case 'br':
|
||||||
|
plainText += '\n'
|
||||||
|
return
|
||||||
case 'b':
|
case 'b':
|
||||||
case 'strong':
|
case 'strong':
|
||||||
entity = {
|
entity = {
|
||||||
|
@ -184,7 +195,11 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
|
||||||
userId: {
|
userId: {
|
||||||
_: 'inputUser',
|
_: 'inputUser',
|
||||||
userId: id,
|
userId: id,
|
||||||
accessHash: Long.fromString(accessHash, false, 16),
|
accessHash: Long.fromString(
|
||||||
|
accessHash,
|
||||||
|
false,
|
||||||
|
16
|
||||||
|
),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -216,25 +231,33 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
|
||||||
}
|
}
|
||||||
stacks[name].push(entity)
|
stacks[name].push(entity)
|
||||||
},
|
},
|
||||||
ontext(data) {
|
|
||||||
for (const ents of Object.values(stacks)) {
|
|
||||||
for (const ent of ents) {
|
|
||||||
ent.length += data.length
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
plainText += data
|
|
||||||
},
|
|
||||||
onclosetag(name: string) {
|
onclosetag(name: string) {
|
||||||
|
processPendingText(true)
|
||||||
|
|
||||||
|
name = name.toLowerCase()
|
||||||
|
|
||||||
|
// ignore tags inside pre (except pre)
|
||||||
|
if (name !== 'pre' && stacks.pre?.length) return
|
||||||
|
|
||||||
const entity = stacks[name]?.pop()
|
const entity = stacks[name]?.pop()
|
||||||
|
|
||||||
if (!entity) return // unmatched close tag
|
if (!entity) return // unmatched close tag
|
||||||
|
|
||||||
|
// ignore nested pre-s
|
||||||
|
if (name !== 'pre' || !stacks.pre.length) {
|
||||||
entities.push(entity)
|
entities.push(entity)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
ontext(data) {
|
||||||
|
pendingText += data
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
parser.write(text)
|
parser.write(text)
|
||||||
|
|
||||||
return [plainText, entities]
|
processPendingText(true)
|
||||||
|
|
||||||
|
return [plainText.replace(/\u00A0/g, ' '), entities]
|
||||||
}
|
}
|
||||||
|
|
||||||
unparse(text: string, entities: ReadonlyArray<MessageEntity>): string {
|
unparse(text: string, entities: ReadonlyArray<MessageEntity>): string {
|
||||||
|
|
|
@ -3,7 +3,7 @@ import { expect } from 'chai'
|
||||||
import { tl } from '@mtcute/tl'
|
import { tl } from '@mtcute/tl'
|
||||||
import { HtmlMessageEntityParser, html } from '../src'
|
import { HtmlMessageEntityParser, html } from '../src'
|
||||||
import { MessageEntity, FormattedString } from '@mtcute/client'
|
import { MessageEntity, FormattedString } from '@mtcute/client'
|
||||||
import bigInt from 'big-integer'
|
import Long from 'long'
|
||||||
|
|
||||||
const createEntity = <T extends tl.TypeMessageEntity['_']>(
|
const createEntity = <T extends tl.TypeMessageEntity['_']>(
|
||||||
type: T,
|
type: T,
|
||||||
|
@ -257,6 +257,22 @@ describe('HtmlMessageEntityParser', () => {
|
||||||
parser
|
parser
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should replace newlines with <br>', () => {
|
||||||
|
test(
|
||||||
|
'plain\n\nplain',
|
||||||
|
[],
|
||||||
|
'plain<br><br>plain'
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should replace multiple spaces with ', () => {
|
||||||
|
test(
|
||||||
|
'plain plain',
|
||||||
|
[],
|
||||||
|
'plain plain'
|
||||||
|
)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('parse', () => {
|
describe('parse', () => {
|
||||||
|
@ -316,7 +332,7 @@ describe('HtmlMessageEntityParser', () => {
|
||||||
userId: {
|
userId: {
|
||||||
_: 'inputUser',
|
_: 'inputUser',
|
||||||
userId: 1234567,
|
userId: 1234567,
|
||||||
accessHash: bigInt('aabbccddaabbccdd', 16),
|
accessHash: Long.fromString('aabbccddaabbccdd', 16),
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
],
|
],
|
||||||
|
@ -337,6 +353,111 @@ describe('HtmlMessageEntityParser', () => {
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should ignore other tags inside <pre>', () => {
|
||||||
|
test(
|
||||||
|
'<pre><b>bold</b> and not bold</pre>',
|
||||||
|
[createEntity('messageEntityPre', 0, 17, { language: '' })],
|
||||||
|
'bold and not bold'
|
||||||
|
)
|
||||||
|
test(
|
||||||
|
'<pre><pre>pre inside pre</pre> so cool</pre>',
|
||||||
|
[createEntity('messageEntityPre', 0, 22, { language: '' })],
|
||||||
|
'pre inside pre so cool'
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should ignore newlines and indentation', () => {
|
||||||
|
test(
|
||||||
|
'this is some text\n\nwith newlines',
|
||||||
|
[],
|
||||||
|
'this is some text with newlines'
|
||||||
|
)
|
||||||
|
test(
|
||||||
|
'<b>this is some text\n\nwith</b> newlines',
|
||||||
|
[createEntity('messageEntityBold', 0, 22)],
|
||||||
|
'this is some text with newlines'
|
||||||
|
)
|
||||||
|
test(
|
||||||
|
'<b>this is some text ending with\n\n</b> newlines',
|
||||||
|
[createEntity('messageEntityBold', 0, 29)],
|
||||||
|
'this is some text ending with newlines'
|
||||||
|
)
|
||||||
|
test(
|
||||||
|
`
|
||||||
|
this is some indented text
|
||||||
|
with newlines and
|
||||||
|
<b>
|
||||||
|
indented tags
|
||||||
|
</b> yeah <i>so cool
|
||||||
|
</i>
|
||||||
|
`,
|
||||||
|
[
|
||||||
|
createEntity('messageEntityBold', 45, 13),
|
||||||
|
createEntity('messageEntityItalic', 64, 7),
|
||||||
|
],
|
||||||
|
'this is some indented text with newlines and indented tags yeah so cool'
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should not ignore newlines and indentation in pre', () => {
|
||||||
|
test(
|
||||||
|
'<pre>this is some text\n\nwith newlines</pre>',
|
||||||
|
[createEntity('messageEntityPre', 0, 32, { language: '' })],
|
||||||
|
'this is some text\n\nwith newlines'
|
||||||
|
)
|
||||||
|
|
||||||
|
// fuck my life
|
||||||
|
const indent = ' '
|
||||||
|
test(
|
||||||
|
`<pre>
|
||||||
|
this is some indented text
|
||||||
|
with newlines and
|
||||||
|
<b>
|
||||||
|
indented tags
|
||||||
|
</b> yeah <i>so cool
|
||||||
|
</i>
|
||||||
|
</pre>`,
|
||||||
|
[createEntity('messageEntityPre', 0, 203, { language: '' })],
|
||||||
|
'\n' +
|
||||||
|
indent +
|
||||||
|
'this is some indented text\n' +
|
||||||
|
indent +
|
||||||
|
'with newlines and\n' +
|
||||||
|
indent +
|
||||||
|
'\n' +
|
||||||
|
indent +
|
||||||
|
' indented tags\n' +
|
||||||
|
indent +
|
||||||
|
' yeah so cool\n' +
|
||||||
|
indent +
|
||||||
|
'\n' +
|
||||||
|
indent
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle <br>', () => {
|
||||||
|
test(
|
||||||
|
'this is some text<br><br>with actual newlines',
|
||||||
|
[],
|
||||||
|
'this is some text\n\nwith actual newlines'
|
||||||
|
)
|
||||||
|
test(
|
||||||
|
'<b>this is some text<br><br></b>with actual newlines',
|
||||||
|
// note that the <br> (i.e. \n) is not included in the entity
|
||||||
|
// this is expected, and the result is the same
|
||||||
|
[createEntity('messageEntityBold', 0, 17)],
|
||||||
|
'this is some text\n\nwith actual newlines'
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle ', () => {
|
||||||
|
test(
|
||||||
|
'one space, many spaces, and<br>a newline',
|
||||||
|
[],
|
||||||
|
'one space, many spaces, and\na newline'
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
it('should support entities on the edges', () => {
|
it('should support entities on the edges', () => {
|
||||||
test(
|
test(
|
||||||
'<b>Hello</b>, <b>world</b>',
|
'<b>Hello</b>, <b>world</b>',
|
||||||
|
@ -456,9 +577,15 @@ describe('HtmlMessageEntityParser', () => {
|
||||||
const unsafeString = '<&>'
|
const unsafeString = '<&>'
|
||||||
|
|
||||||
expect(html`${unsafeString}`.value).eq('<&>')
|
expect(html`${unsafeString}`.value).eq('<&>')
|
||||||
expect(html`${unsafeString} <b>text</b>`.value).eq('<&> <b>text</b>')
|
expect(html`${unsafeString} <b>text</b>`.value).eq(
|
||||||
expect(html`<b>text</b> ${unsafeString}`.value).eq('<b>text</b> <&>')
|
'<&> <b>text</b>'
|
||||||
expect(html`<b>${unsafeString}</b>`.value).eq('<b><&></b>')
|
)
|
||||||
|
expect(html`<b>text</b> ${unsafeString}`.value).eq(
|
||||||
|
'<b>text</b> <&>'
|
||||||
|
)
|
||||||
|
expect(html`<b>${unsafeString}</b>`.value).eq(
|
||||||
|
'<b><&></b>'
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should skip with FormattedString', () => {
|
it('should skip with FormattedString', () => {
|
||||||
|
@ -467,10 +594,16 @@ describe('HtmlMessageEntityParser', () => {
|
||||||
|
|
||||||
expect(html`${unsafeString}`.value).eq('<&>')
|
expect(html`${unsafeString}`.value).eq('<&>')
|
||||||
expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>')
|
expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>')
|
||||||
expect(html`${unsafeString} <b>text</b>`.value).eq('<&> <b>text</b>')
|
expect(html`${unsafeString} <b>text</b>`.value).eq(
|
||||||
expect(html`<b>text</b> ${unsafeString}`.value).eq('<b>text</b> <&>')
|
'<&> <b>text</b>'
|
||||||
|
)
|
||||||
|
expect(html`<b>text</b> ${unsafeString}`.value).eq(
|
||||||
|
'<b>text</b> <&>'
|
||||||
|
)
|
||||||
expect(html`<b>${unsafeString}</b>`.value).eq('<b><&></b>')
|
expect(html`<b>${unsafeString}</b>`.value).eq('<b><&></b>')
|
||||||
expect(html`<b>${unsafeString} ${unsafeString2}</b>`.value).eq('<b><&> <&></b>')
|
expect(html`<b>${unsafeString} ${unsafeString2}</b>`.value).eq(
|
||||||
|
'<b><&> <&></b>'
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should error with incompatible FormattedString', () => {
|
it('should error with incompatible FormattedString', () => {
|
||||||
|
|
Loading…
Reference in a new issue