feat(html): big rework, process html similar to browsers
This commit is contained in:
parent
d031388ea2
commit
28baf50958
3 changed files with 223 additions and 76 deletions
|
@ -5,7 +5,7 @@
|
|||
This package implements formatting syntax based on HTML, similar to the one available in the Bot
|
||||
API ([documented here](https://core.telegram.org/bots/api#html-style))
|
||||
|
||||
> **NOTE**: The syntax implemented here is not entirely compatible with Bot API _HTML_.
|
||||
> **NOTE**: The syntax implemented here is **incompatible** with Bot API _HTML_.
|
||||
>
|
||||
> Please read [Syntax](#syntax) below for a detailed explanation
|
||||
|
||||
|
@ -20,7 +20,7 @@ tg.registerParseMode(new HtmlMessageEntityParser())
|
|||
|
||||
tg.sendText(
|
||||
'me',
|
||||
html`Hello, <b>me</b>! Updates from the feed:\n${await getUpdatesFromFeed()}`
|
||||
html`Hello, <b>me</b>! Updates from the feed:<br>${await getUpdatesFromFeed()}`
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -30,34 +30,26 @@ tg.sendText(
|
|||
supports nearly any HTML. However, since the text is still processed in a custom way for Telegram, the supported subset
|
||||
of features is documented below:
|
||||
|
||||
## Line breaks
|
||||
## Line breaks and spaces
|
||||
|
||||
Line breaks are preserved, `<br>` are ignored.
|
||||
Line breaks are **not** preserved, `<br>` is used instead,
|
||||
making the syntax very close to the one used when building web pages.
|
||||
|
||||
> ⚠️ Warning for **Prettier** users: be aware that Prettier
|
||||
> formats tagged template literals with `html` as normal HTML and may add
|
||||
> unwanted line breaks.
|
||||
>
|
||||
> Use `htm` instead (which is just an alias):
|
||||
> ```typescript
|
||||
> import { htm } from '@mtcute/html-parser'
|
||||
>
|
||||
> await msg.answerText(htm`Hello, <b>${msg.sender.username}</b>`)
|
||||
> ```
|
||||
Multiple spaces and indents are collapsed, when you do need multiple spaces use ` ` instead.
|
||||
|
||||
## Inline entities
|
||||
|
||||
Inline entities are entities that are in-line with other text. We support these entities:
|
||||
|
||||
| Name | Code | Result (visual)
|
||||
|---|---|---|
|
||||
| Bold | `<b>text</b>` | **text**
|
||||
| Italic | `<b>text</b>` | _text_
|
||||
| Underline | `<u>text</u>` | <u>text</u>
|
||||
| Strikethrough | `<s>text</s>` | ~~text~~
|
||||
| Monospace (code) | `<code>text</code>` | `text`
|
||||
| Text link | `<a href="https://google.com">Google</a>` | [Google](https://google.com)
|
||||
| Text mention | `<a href="tg://user?id=1234567">Name</a>` | N/A
|
||||
| Name | Code | Result (visual) |
|
||||
|------------------|-------------------------------------------|------------------------------|
|
||||
| Bold | `<b>text</b>` | **text** |
|
||||
| Italic | `<b>text</b>` | _text_ |
|
||||
| Underline | `<u>text</u>` | <u>text</u> |
|
||||
| Strikethrough | `<s>text</s>` | ~~text~~ |
|
||||
| Monospace (code) | `<code>text</code>` | `text` |
|
||||
| Text link | `<a href="https://google.com">Google</a>` | [Google](https://google.com) |
|
||||
| Text mention | `<a href="tg://user?id=1234567">Name</a>` | N/A |
|
||||
|
||||
> **Note**: `<strong>`, `<em>`, `<ins>`, `<strike>`, `<del>` are not supported because they are redundant
|
||||
|
||||
|
@ -82,10 +74,10 @@ Optionally, language for `<pre>` block can be specified like this:
|
|||
> However, since syntax highlighting hasn't been implemented in
|
||||
> official Telegram clients, this doesn't really matter 🤷♀️
|
||||
|
||||
| Code | Result (visual)
|
||||
|---|---|
|
||||
| <pre><pre>multiline\ntext</pre></pre> | <pre>multiline<br>text</pre>
|
||||
| <pre><pre language="javascript"><br> export default 42<br></pre></pre> | <pre>export default 42</pre>
|
||||
| Code | Result (visual) |
|
||||
|-------------------------------------------------------------------------------------|------------------------------|
|
||||
| <pre><pre>multiline\ntext</pre></pre> | <pre>multiline<br>text</pre> |
|
||||
| <pre><pre language="javascript"><br> export default 42<br></pre></pre> | <pre>export default 42</pre> |
|
||||
|
||||
## Nested and overlapped entities
|
||||
|
||||
|
@ -94,12 +86,11 @@ as expected!
|
|||
|
||||
Overlapping entities are supported in `unparse()`, though.
|
||||
|
||||
| Code | Result (visual)
|
||||
|---|---|
|
||||
| `<b>Welcome back, <i>User</i>!</b>` | **Welcome back, _User_!**
|
||||
| `<b>bold <i>and</b> italic</i>` | **bold _and_** italic<br>⚠️ <i>word "italic" is not actually italic!</i>
|
||||
| `<b>bold <i>and</i></b><i> italic</i>`<br>⚠️ <i>this is how <code>unparse()</code> handles overlapping entities</i> | **
|
||||
bold _and_** _italic_
|
||||
| Code | Result (visual) |
|
||||
|---------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------|
|
||||
| `<b>Welcome back, <i>User</i>!</b>` | **Welcome back, _User_!** |
|
||||
| `<b>bold <i>and</b> italic</i>` | **bold _and_** italic<br>⚠️ <i>word "italic" is not actually italic!</i> |
|
||||
| `<b>bold <i>and</i></b><i> italic</i>`<br>⚠️ <i>this is how <code>unparse()</code> handles overlapping entities</i> | **bold _and_** _italic_ |
|
||||
|
||||
## Escaping
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@ import { tl } from '@mtcute/tl'
|
|||
import { Parser } from 'htmlparser2'
|
||||
import Long from 'long'
|
||||
|
||||
const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
|
||||
const MENTION_REGEX =
|
||||
/^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
|
||||
|
||||
/**
|
||||
* Tagged template based helper for escaping entities in HTML
|
||||
|
@ -35,28 +36,6 @@ export function html(
|
|||
return { value: str + strings[strings.length - 1], mode: 'html' }
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias for {@link html} for Prettier users.
|
||||
*
|
||||
* Prettier formats <code>html`...`</code> as normal HTML,
|
||||
* thus may add unwanted line breaks.
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-ignore
|
||||
export declare function htm(
|
||||
strings: TemplateStringsArray,
|
||||
...sub: (string | FormattedString)[]
|
||||
): FormattedString
|
||||
|
||||
/** @internal */
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-ignore
|
||||
export const htm = html
|
||||
|
||||
// ts ignores above are a hack so the resulting d.ts contains `htm`
|
||||
// as a function and not a variable, thus the ide would highlight
|
||||
// it as such (the same way as `html`)
|
||||
|
||||
export namespace HtmlMessageEntityParser {
|
||||
/**
|
||||
* Syntax highlighter function used in {@link HtmlMessageEntityParser.unparse}
|
||||
|
@ -106,13 +85,45 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
|
|||
const stacks: Record<string, tl.Mutable<tl.TypeMessageEntity>[]> = {}
|
||||
const entities: tl.TypeMessageEntity[] = []
|
||||
let plainText = ''
|
||||
let pendingText = ''
|
||||
|
||||
function processPendingText(tagEnd = false) {
|
||||
if (!pendingText.length) return
|
||||
|
||||
if (!stacks.pre?.length) {
|
||||
pendingText = pendingText.replace(/[^\S\u00A0]+/gs, ' ')
|
||||
|
||||
if (tagEnd) pendingText = pendingText.trimEnd()
|
||||
|
||||
if (!plainText.length || plainText.match(/\s$/)) {
|
||||
pendingText = pendingText.trimStart()
|
||||
}
|
||||
}
|
||||
|
||||
for (const ents of Object.values(stacks)) {
|
||||
for (const ent of ents) {
|
||||
ent.length += pendingText.length
|
||||
}
|
||||
}
|
||||
|
||||
plainText += pendingText
|
||||
pendingText = ''
|
||||
}
|
||||
|
||||
const parser = new Parser({
|
||||
onopentag(name, attribs) {
|
||||
name = name.toLowerCase()
|
||||
|
||||
processPendingText()
|
||||
|
||||
// ignore tags inside pre (except pre)
|
||||
if (name !== 'pre' && stacks.pre?.length) return
|
||||
|
||||
let entity: tl.TypeMessageEntity
|
||||
switch (name) {
|
||||
case 'br':
|
||||
plainText += '\n'
|
||||
return
|
||||
case 'b':
|
||||
case 'strong':
|
||||
entity = {
|
||||
|
@ -184,7 +195,11 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
|
|||
userId: {
|
||||
_: 'inputUser',
|
||||
userId: id,
|
||||
accessHash: Long.fromString(accessHash, false, 16),
|
||||
accessHash: Long.fromString(
|
||||
accessHash,
|
||||
false,
|
||||
16
|
||||
),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
|
@ -216,25 +231,33 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
|
|||
}
|
||||
stacks[name].push(entity)
|
||||
},
|
||||
ontext(data) {
|
||||
for (const ents of Object.values(stacks)) {
|
||||
for (const ent of ents) {
|
||||
ent.length += data.length
|
||||
}
|
||||
}
|
||||
|
||||
plainText += data
|
||||
},
|
||||
onclosetag(name: string) {
|
||||
processPendingText(true)
|
||||
|
||||
name = name.toLowerCase()
|
||||
|
||||
// ignore tags inside pre (except pre)
|
||||
if (name !== 'pre' && stacks.pre?.length) return
|
||||
|
||||
const entity = stacks[name]?.pop()
|
||||
|
||||
if (!entity) return // unmatched close tag
|
||||
entities.push(entity)
|
||||
|
||||
// ignore nested pre-s
|
||||
if (name !== 'pre' || !stacks.pre.length) {
|
||||
entities.push(entity)
|
||||
}
|
||||
},
|
||||
ontext(data) {
|
||||
pendingText += data
|
||||
},
|
||||
})
|
||||
|
||||
parser.write(text)
|
||||
|
||||
return [plainText, entities]
|
||||
processPendingText(true)
|
||||
|
||||
return [plainText.replace(/\u00A0/g, ' '), entities]
|
||||
}
|
||||
|
||||
unparse(text: string, entities: ReadonlyArray<MessageEntity>): string {
|
||||
|
|
|
@ -3,7 +3,7 @@ import { expect } from 'chai'
|
|||
import { tl } from '@mtcute/tl'
|
||||
import { HtmlMessageEntityParser, html } from '../src'
|
||||
import { MessageEntity, FormattedString } from '@mtcute/client'
|
||||
import bigInt from 'big-integer'
|
||||
import Long from 'long'
|
||||
|
||||
const createEntity = <T extends tl.TypeMessageEntity['_']>(
|
||||
type: T,
|
||||
|
@ -257,6 +257,22 @@ describe('HtmlMessageEntityParser', () => {
|
|||
parser
|
||||
)
|
||||
})
|
||||
|
||||
it('should replace newlines with <br>', () => {
|
||||
test(
|
||||
'plain\n\nplain',
|
||||
[],
|
||||
'plain<br><br>plain'
|
||||
)
|
||||
})
|
||||
|
||||
it('should replace multiple spaces with ', () => {
|
||||
test(
|
||||
'plain plain',
|
||||
[],
|
||||
'plain plain'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('parse', () => {
|
||||
|
@ -316,7 +332,7 @@ describe('HtmlMessageEntityParser', () => {
|
|||
userId: {
|
||||
_: 'inputUser',
|
||||
userId: 1234567,
|
||||
accessHash: bigInt('aabbccddaabbccdd', 16),
|
||||
accessHash: Long.fromString('aabbccddaabbccdd', 16),
|
||||
},
|
||||
}),
|
||||
],
|
||||
|
@ -337,6 +353,111 @@ describe('HtmlMessageEntityParser', () => {
|
|||
)
|
||||
})
|
||||
|
||||
it('should ignore other tags inside <pre>', () => {
|
||||
test(
|
||||
'<pre><b>bold</b> and not bold</pre>',
|
||||
[createEntity('messageEntityPre', 0, 17, { language: '' })],
|
||||
'bold and not bold'
|
||||
)
|
||||
test(
|
||||
'<pre><pre>pre inside pre</pre> so cool</pre>',
|
||||
[createEntity('messageEntityPre', 0, 22, { language: '' })],
|
||||
'pre inside pre so cool'
|
||||
)
|
||||
})
|
||||
|
||||
it('should ignore newlines and indentation', () => {
|
||||
test(
|
||||
'this is some text\n\nwith newlines',
|
||||
[],
|
||||
'this is some text with newlines'
|
||||
)
|
||||
test(
|
||||
'<b>this is some text\n\nwith</b> newlines',
|
||||
[createEntity('messageEntityBold', 0, 22)],
|
||||
'this is some text with newlines'
|
||||
)
|
||||
test(
|
||||
'<b>this is some text ending with\n\n</b> newlines',
|
||||
[createEntity('messageEntityBold', 0, 29)],
|
||||
'this is some text ending with newlines'
|
||||
)
|
||||
test(
|
||||
`
|
||||
this is some indented text
|
||||
with newlines and
|
||||
<b>
|
||||
indented tags
|
||||
</b> yeah <i>so cool
|
||||
</i>
|
||||
`,
|
||||
[
|
||||
createEntity('messageEntityBold', 45, 13),
|
||||
createEntity('messageEntityItalic', 64, 7),
|
||||
],
|
||||
'this is some indented text with newlines and indented tags yeah so cool'
|
||||
)
|
||||
})
|
||||
|
||||
it('should not ignore newlines and indentation in pre', () => {
|
||||
test(
|
||||
'<pre>this is some text\n\nwith newlines</pre>',
|
||||
[createEntity('messageEntityPre', 0, 32, { language: '' })],
|
||||
'this is some text\n\nwith newlines'
|
||||
)
|
||||
|
||||
// fuck my life
|
||||
const indent = ' '
|
||||
test(
|
||||
`<pre>
|
||||
this is some indented text
|
||||
with newlines and
|
||||
<b>
|
||||
indented tags
|
||||
</b> yeah <i>so cool
|
||||
</i>
|
||||
</pre>`,
|
||||
[createEntity('messageEntityPre', 0, 203, { language: '' })],
|
||||
'\n' +
|
||||
indent +
|
||||
'this is some indented text\n' +
|
||||
indent +
|
||||
'with newlines and\n' +
|
||||
indent +
|
||||
'\n' +
|
||||
indent +
|
||||
' indented tags\n' +
|
||||
indent +
|
||||
' yeah so cool\n' +
|
||||
indent +
|
||||
'\n' +
|
||||
indent
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle <br>', () => {
|
||||
test(
|
||||
'this is some text<br><br>with actual newlines',
|
||||
[],
|
||||
'this is some text\n\nwith actual newlines'
|
||||
)
|
||||
test(
|
||||
'<b>this is some text<br><br></b>with actual newlines',
|
||||
// note that the <br> (i.e. \n) is not included in the entity
|
||||
// this is expected, and the result is the same
|
||||
[createEntity('messageEntityBold', 0, 17)],
|
||||
'this is some text\n\nwith actual newlines'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle ', () => {
|
||||
test(
|
||||
'one space, many spaces, and<br>a newline',
|
||||
[],
|
||||
'one space, many spaces, and\na newline'
|
||||
)
|
||||
})
|
||||
|
||||
it('should support entities on the edges', () => {
|
||||
test(
|
||||
'<b>Hello</b>, <b>world</b>',
|
||||
|
@ -456,9 +577,15 @@ describe('HtmlMessageEntityParser', () => {
|
|||
const unsafeString = '<&>'
|
||||
|
||||
expect(html`${unsafeString}`.value).eq('<&>')
|
||||
expect(html`${unsafeString} <b>text</b>`.value).eq('<&> <b>text</b>')
|
||||
expect(html`<b>text</b> ${unsafeString}`.value).eq('<b>text</b> <&>')
|
||||
expect(html`<b>${unsafeString}</b>`.value).eq('<b><&></b>')
|
||||
expect(html`${unsafeString} <b>text</b>`.value).eq(
|
||||
'<&> <b>text</b>'
|
||||
)
|
||||
expect(html`<b>text</b> ${unsafeString}`.value).eq(
|
||||
'<b>text</b> <&>'
|
||||
)
|
||||
expect(html`<b>${unsafeString}</b>`.value).eq(
|
||||
'<b><&></b>'
|
||||
)
|
||||
})
|
||||
|
||||
it('should skip with FormattedString', () => {
|
||||
|
@ -467,10 +594,16 @@ describe('HtmlMessageEntityParser', () => {
|
|||
|
||||
expect(html`${unsafeString}`.value).eq('<&>')
|
||||
expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>')
|
||||
expect(html`${unsafeString} <b>text</b>`.value).eq('<&> <b>text</b>')
|
||||
expect(html`<b>text</b> ${unsafeString}`.value).eq('<b>text</b> <&>')
|
||||
expect(html`${unsafeString} <b>text</b>`.value).eq(
|
||||
'<&> <b>text</b>'
|
||||
)
|
||||
expect(html`<b>text</b> ${unsafeString}`.value).eq(
|
||||
'<b>text</b> <&>'
|
||||
)
|
||||
expect(html`<b>${unsafeString}</b>`.value).eq('<b><&></b>')
|
||||
expect(html`<b>${unsafeString} ${unsafeString2}</b>`.value).eq('<b><&> <&></b>')
|
||||
expect(html`<b>${unsafeString} ${unsafeString2}</b>`.value).eq(
|
||||
'<b><&> <&></b>'
|
||||
)
|
||||
})
|
||||
|
||||
it('should error with incompatible FormattedString', () => {
|
||||
|
|
Loading…
Reference in a new issue