` are not supported because they are redundant
@@ -82,10 +74,10 @@ Optionally, language for `` block can be specified like this:
> However, since syntax highlighting hasn't been implemented in
> official Telegram clients, this doesn't really matter 🤷♀️
-| Code | Result (visual)
-|---|---|
-| <pre>multiline\ntext</pre>
| multiline
text
-| <pre language="javascript">
export default 42
</pre>
| export default 42
+| Code | Result (visual) |
+|-------------------------------------------------------------------------------------|------------------------------|
+| <pre>multiline\ntext</pre>
| multiline
text
|
+| <pre language="javascript">
export default 42
</pre>
| export default 42
|
## Nested and overlapped entities
@@ -94,12 +86,11 @@ as expected!
Overlapping entities are supported in `unparse()`, though.
-| Code | Result (visual)
-|---|---|
-| `Welcome back, User!` | **Welcome back, _User_!**
-| `bold and italic` | **bold _and_** italic
⚠️ word "italic" is not actually italic!
-| `bold and italic`
⚠️ this is how unparse()
handles overlapping entities | **
-bold _and_** _italic_
+| Code | Result (visual) |
+|---------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------|
+| `Welcome back, User!` | **Welcome back, _User_!** |
+| `bold and italic` | **bold _and_** italic
⚠️ word "italic" is not actually italic! |
+| `bold and italic`
⚠️ this is how unparse()
handles overlapping entities | **bold _and_** _italic_ |
## Escaping
diff --git a/packages/html-parser/src/index.ts b/packages/html-parser/src/index.ts
index 5e7e2266..4111ebc2 100644
--- a/packages/html-parser/src/index.ts
+++ b/packages/html-parser/src/index.ts
@@ -7,7 +7,8 @@ import { tl } from '@mtcute/tl'
import { Parser } from 'htmlparser2'
import Long from 'long'
-const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
+const MENTION_REGEX =
+ /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/
/**
* Tagged template based helper for escaping entities in HTML
@@ -35,28 +36,6 @@ export function html(
return { value: str + strings[strings.length - 1], mode: 'html' }
}
-/**
- * Alias for {@link html} for Prettier users.
- *
- * Prettier formats html`...`
as normal HTML,
- * thus may add unwanted line breaks.
- */
-// eslint-disable-next-line @typescript-eslint/ban-ts-comment
-// @ts-ignore
-export declare function htm(
- strings: TemplateStringsArray,
- ...sub: (string | FormattedString)[]
-): FormattedString
-
-/** @internal */
-// eslint-disable-next-line @typescript-eslint/ban-ts-comment
-// @ts-ignore
-export const htm = html
-
-// ts ignores above are a hack so the resulting d.ts contains `htm`
-// as a function and not a variable, thus the ide would highlight
-// it as such (the same way as `html`)
-
export namespace HtmlMessageEntityParser {
/**
* Syntax highlighter function used in {@link HtmlMessageEntityParser.unparse}
@@ -106,13 +85,45 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
const stacks: Record[]> = {}
const entities: tl.TypeMessageEntity[] = []
let plainText = ''
+ let pendingText = ''
+
+ function processPendingText(tagEnd = false) {
+ if (!pendingText.length) return
+
+ if (!stacks.pre?.length) {
+ pendingText = pendingText.replace(/[^\S\u00A0]+/gs, ' ')
+
+ if (tagEnd) pendingText = pendingText.trimEnd()
+
+ if (!plainText.length || plainText.match(/\s$/)) {
+ pendingText = pendingText.trimStart()
+ }
+ }
+
+ for (const ents of Object.values(stacks)) {
+ for (const ent of ents) {
+ ent.length += pendingText.length
+ }
+ }
+
+ plainText += pendingText
+ pendingText = ''
+ }
const parser = new Parser({
onopentag(name, attribs) {
name = name.toLowerCase()
+ processPendingText()
+
+ // ignore tags inside pre (except pre)
+ if (name !== 'pre' && stacks.pre?.length) return
+
let entity: tl.TypeMessageEntity
switch (name) {
+ case 'br':
+ plainText += '\n'
+ return
case 'b':
case 'strong':
entity = {
@@ -184,7 +195,11 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
userId: {
_: 'inputUser',
userId: id,
- accessHash: Long.fromString(accessHash, false, 16),
+ accessHash: Long.fromString(
+ accessHash,
+ false,
+ 16
+ ),
},
}
} else {
@@ -216,25 +231,33 @@ export class HtmlMessageEntityParser implements IMessageEntityParser {
}
stacks[name].push(entity)
},
- ontext(data) {
- for (const ents of Object.values(stacks)) {
- for (const ent of ents) {
- ent.length += data.length
- }
- }
-
- plainText += data
- },
onclosetag(name: string) {
+ processPendingText(true)
+
+ name = name.toLowerCase()
+
+ // ignore tags inside pre (except pre)
+ if (name !== 'pre' && stacks.pre?.length) return
+
const entity = stacks[name]?.pop()
+
if (!entity) return // unmatched close tag
- entities.push(entity)
+
+ // ignore nested pre-s
+ if (name !== 'pre' || !stacks.pre.length) {
+ entities.push(entity)
+ }
+ },
+ ontext(data) {
+ pendingText += data
},
})
parser.write(text)
- return [plainText, entities]
+ processPendingText(true)
+
+ return [plainText.replace(/\u00A0/g, ' '), entities]
}
unparse(text: string, entities: ReadonlyArray): string {
diff --git a/packages/html-parser/tests/html-parser.spec.ts b/packages/html-parser/tests/html-parser.spec.ts
index ebde052c..89cd5fff 100644
--- a/packages/html-parser/tests/html-parser.spec.ts
+++ b/packages/html-parser/tests/html-parser.spec.ts
@@ -3,7 +3,7 @@ import { expect } from 'chai'
import { tl } from '@mtcute/tl'
import { HtmlMessageEntityParser, html } from '../src'
import { MessageEntity, FormattedString } from '@mtcute/client'
-import bigInt from 'big-integer'
+import Long from 'long'
const createEntity = (
type: T,
@@ -257,6 +257,22 @@ describe('HtmlMessageEntityParser', () => {
parser
)
})
+
+ it('should replace newlines with
', () => {
+ test(
+ 'plain\n\nplain',
+ [],
+ 'plain
plain'
+ )
+ })
+
+ it('should replace multiple spaces with ', () => {
+ test(
+ 'plain plain',
+ [],
+ 'plain plain'
+ )
+ })
})
describe('parse', () => {
@@ -316,7 +332,7 @@ describe('HtmlMessageEntityParser', () => {
userId: {
_: 'inputUser',
userId: 1234567,
- accessHash: bigInt('aabbccddaabbccdd', 16),
+ accessHash: Long.fromString('aabbccddaabbccdd', 16),
},
}),
],
@@ -337,6 +353,111 @@ describe('HtmlMessageEntityParser', () => {
)
})
+ it('should ignore other tags inside ', () => {
+ test(
+ 'bold and not bold
',
+ [createEntity('messageEntityPre', 0, 17, { language: '' })],
+ 'bold and not bold'
+ )
+ test(
+ 'pre inside pre
so cool
',
+ [createEntity('messageEntityPre', 0, 22, { language: '' })],
+ 'pre inside pre so cool'
+ )
+ })
+
+ it('should ignore newlines and indentation', () => {
+ test(
+ 'this is some text\n\nwith newlines',
+ [],
+ 'this is some text with newlines'
+ )
+ test(
+ 'this is some text\n\nwith newlines',
+ [createEntity('messageEntityBold', 0, 22)],
+ 'this is some text with newlines'
+ )
+ test(
+ 'this is some text ending with\n\n newlines',
+ [createEntity('messageEntityBold', 0, 29)],
+ 'this is some text ending with newlines'
+ )
+ test(
+ `
+ this is some indented text
+ with newlines and
+
+ indented tags
+ yeah so cool
+
+ `,
+ [
+ createEntity('messageEntityBold', 45, 13),
+ createEntity('messageEntityItalic', 64, 7),
+ ],
+ 'this is some indented text with newlines and indented tags yeah so cool'
+ )
+ })
+
+ it('should not ignore newlines and indentation in pre', () => {
+ test(
+ 'this is some text\n\nwith newlines
',
+ [createEntity('messageEntityPre', 0, 32, { language: '' })],
+ 'this is some text\n\nwith newlines'
+ )
+
+ // fuck my life
+ const indent = ' '
+ test(
+ `
+ this is some indented text
+ with newlines and
+
+ indented tags
+ yeah so cool
+
+
`,
+ [createEntity('messageEntityPre', 0, 203, { language: '' })],
+ '\n' +
+ indent +
+ 'this is some indented text\n' +
+ indent +
+ 'with newlines and\n' +
+ indent +
+ '\n' +
+ indent +
+ ' indented tags\n' +
+ indent +
+ ' yeah so cool\n' +
+ indent +
+ '\n' +
+ indent
+ )
+ })
+
+ it('should handle
', () => {
+ test(
+ 'this is some text
with actual newlines',
+ [],
+ 'this is some text\n\nwith actual newlines'
+ )
+ test(
+ 'this is some text
with actual newlines',
+ // note that the
(i.e. \n) is not included in the entity
+ // this is expected, and the result is the same
+ [createEntity('messageEntityBold', 0, 17)],
+ 'this is some text\n\nwith actual newlines'
+ )
+ })
+
+ it('should handle ', () => {
+ test(
+ 'one space, many spaces, and
a newline',
+ [],
+ 'one space, many spaces, and\na newline'
+ )
+ })
+
it('should support entities on the edges', () => {
test(
'Hello, world',
@@ -456,9 +577,15 @@ describe('HtmlMessageEntityParser', () => {
const unsafeString = '<&>'
expect(html`${unsafeString}`.value).eq('<&>')
- expect(html`${unsafeString} text`.value).eq('<&> text')
- expect(html`text ${unsafeString}`.value).eq('text <&>')
- expect(html`${unsafeString}`.value).eq('<&>')
+ expect(html`${unsafeString} text`.value).eq(
+ '<&> text'
+ )
+ expect(html`text ${unsafeString}`.value).eq(
+ 'text <&>'
+ )
+ expect(html`${unsafeString}`.value).eq(
+ '<&>'
+ )
})
it('should skip with FormattedString', () => {
@@ -467,10 +594,16 @@ describe('HtmlMessageEntityParser', () => {
expect(html`${unsafeString}`.value).eq('<&>')
expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>')
- expect(html`${unsafeString} text`.value).eq('<&> text')
- expect(html`text ${unsafeString}`.value).eq('text <&>')
+ expect(html`${unsafeString} text`.value).eq(
+ '<&> text'
+ )
+ expect(html`text ${unsafeString}`.value).eq(
+ 'text <&>'
+ )
expect(html`${unsafeString}`.value).eq('<&>')
- expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>')
+ expect(html`${unsafeString} ${unsafeString2}`.value).eq(
+ '<&> <&>'
+ )
})
it('should error with incompatible FormattedString', () => {