From 23a0e699428bae8af60998f336710c434d305938 Mon Sep 17 00:00:00 2001 From: Alina Sireneva Date: Wed, 1 Nov 2023 20:24:00 +0300 Subject: [PATCH] refactor: no more parse modes! --- package.json | 4 +- packages/client/src/client.ts | 163 +--- packages/client/src/methods/_imports.ts | 3 +- .../src/methods/bots/answer-inline-query.ts | 16 +- .../methods/files/normalize-input-media.ts | 12 +- .../methods/messages/edit-inline-message.ts | 34 +- .../src/methods/messages/edit-message.ts | 34 +- .../src/methods/messages/forward-messages.ts | 41 +- .../src/methods/messages/parse-entities.ts | 60 -- .../src/methods/messages/send-common.ts | 25 +- .../client/src/methods/messages/send-copy.ts | 40 +- .../src/methods/messages/send-media-group.ts | 9 +- .../client/src/methods/messages/send-media.ts | 23 +- .../client/src/methods/messages/send-quote.ts | 14 +- .../client/src/methods/messages/send-text.ts | 16 +- .../client/src/methods/misc/normalize-text.ts | 41 + .../client/src/methods/parse-modes/_state.ts | 21 - .../src/methods/parse-modes/parse-modes.ts | 89 --- .../client/src/methods/stories/edit-story.ts | 30 +- .../client/src/methods/stories/send-story.ts | 28 +- .../types/bots/input/input-inline-message.ts | 37 +- .../types/bots/input/input-inline-result.ts | 7 +- packages/client/src/types/index.ts | 1 - .../client/src/types/media/input-media.ts | 18 +- packages/client/src/types/misc/entities.ts | 14 +- packages/client/src/types/misc/index.ts | 1 + packages/client/src/types/parser.ts | 55 -- packages/html-parser/README.md | 42 +- packages/html-parser/src/index.ts | 746 +++++++++--------- .../html-parser/tests/html-parser.spec.ts | 199 +++-- packages/i18n/src/types.ts | 17 +- packages/i18n/src/utils.ts | 2 +- packages/i18n/tests/types.ts | 5 +- packages/markdown-parser/README.md | 54 +- packages/markdown-parser/src/index.ts | 346 ++++---- .../tests/markdown-parser.spec.ts | 145 ++-- packages/node/index.ts | 19 - 37 files changed, 1022 insertions(+), 1389 deletions(-) delete mode 100644 packages/client/src/methods/messages/parse-entities.ts create mode 100644 packages/client/src/methods/misc/normalize-text.ts delete mode 100644 packages/client/src/methods/parse-modes/_state.ts delete mode 100644 packages/client/src/methods/parse-modes/parse-modes.ts delete mode 100644 packages/client/src/types/parser.ts diff --git a/package.json b/package.json index 438ca399..395a3dea 100644 --- a/package.json +++ b/package.json @@ -12,8 +12,8 @@ "test:all:ci": "pnpm run -r test", "lint": "eslint .", "lint:ci": "NODE_OPTIONS=\"--max_old_space_size=8192\" eslint --config .eslintrc.ci.js .", - "lint:tsc": "pnpm -r --filter=!crypto --parallel exec tsc --build", - "lint:tsc:ci": "pnpm -r --filter=!crypto exec tsc --build", + "lint:tsc": "pnpm -r --parallel exec tsc --build", + "lint:tsc:ci": "pnpm -r exec tsc --build", "lint:dpdm": "dpdm -T --no-warning --no-tree --exit-code circular:1 packages/*", "lint:fix": "eslint --fix .", "format": "prettier --write \"packages/**/*.ts\"", diff --git a/packages/client/src/client.ts b/packages/client/src/client.ts index b26c8ff1..86a2b657 100644 --- a/packages/client/src/client.ts +++ b/packages/client/src/client.ts @@ -167,12 +167,6 @@ import { unpinAllMessages } from './methods/messages/unpin-all-messages.js' import { unpinMessage } from './methods/messages/unpin-message.js' import { initTakeoutSession } from './methods/misc/init-takeout-session.js' import { _normalizePrivacyRules } from './methods/misc/normalize-privacy-rules.js' -import { - getParseMode, - registerParseMode, - setDefaultParseMode, - unregisterParseMode, -} from './methods/parse-modes/parse-modes.js' import { changeCloudPassword } from './methods/password/change-cloud-password.js' import { enableCloudPassword } from './methods/password/enable-cloud-password.js' import { cancelPasswordEmail, resendPasswordEmail, verifyPasswordEmail } from './methods/password/password-email.js' @@ -271,11 +265,9 @@ import { Dialog, FileDownloadLocation, FileDownloadParameters, - FormattedString, ForumTopic, GameHighScore, HistoryReadUpdate, - IMessageEntityParser, InlineQuery, InputChatEventFilters, InputDialogFolder, @@ -288,6 +280,7 @@ import { InputReaction, InputStickerSet, InputStickerSetItem, + InputText, MaybeDynamic, Message, MessageEntity, @@ -852,18 +845,6 @@ export interface TelegramClient extends BaseTelegramClient { */ url: string } - - /** - * Parse mode to use when parsing inline message text. - * - * Passing `null` will explicitly disable formatting. - * - * **Note**: inline results themselves *can not* have markup - * entities, only the messages that are sent once a result is clicked. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null }, ): Promise /** @@ -2232,8 +2213,7 @@ export interface TelegramClient extends BaseTelegramClient { */ _normalizeInputMedia( media: InputMediaLike, - params: { - parseMode?: string | null + params?: { progressCallback?: (uploaded: number, total: number) => void uploadPeer?: tl.TypeInputPeer }, @@ -2929,26 +2909,7 @@ export interface TelegramClient extends BaseTelegramClient { * * When `media` is passed, `media.caption` is used instead */ - text?: string | FormattedString - - /** - * Parse mode to use to parse entities before sending the message. - * - * - * Passing `null` will explicitly disable formatting. - * @default current default parse mode (if any). - */ - parseMode?: string | null - - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - * - * When `media` is passed, `media.entities` is used instead - */ - entities?: tl.TypeMessageEntity[] + text?: InputText /** * New message media @@ -2998,26 +2959,7 @@ export interface TelegramClient extends BaseTelegramClient { * * When `media` is passed, `media.caption` is used instead */ - text?: string | FormattedString - - /** - * Parse mode to use to parse entities before sending the message. - * - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null - - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - * - * When `media` is passed, `media.entities` is used instead - */ - entities?: tl.TypeMessageEntity[] + text?: InputText /** * New message media @@ -3068,8 +3010,6 @@ export interface TelegramClient extends BaseTelegramClient { * Forward one or more messages by their IDs. * You can forward no more than 100 messages at once. * - * If a caption message was sent, it will be the first message in the resulting array. - * * **Available**: ✅ both users and bots * * @param toChatId Destination chat ID, username, phone, `"me"` or `"self"` @@ -3784,15 +3724,7 @@ export interface TelegramClient extends BaseTelegramClient { * Can be used, for example. when using File IDs * or when using existing InputMedia objects. */ - caption?: string | FormattedString - - /** - * Override entities for `media`. - * - * Can be used, for example. when using File IDs - * or when using existing InputMedia objects. - */ - entities?: tl.TypeMessageEntity[] + caption?: InputText /** * Function that will be called after some part has been uploaded. @@ -3883,7 +3815,7 @@ export interface TelegramClient extends BaseTelegramClient { */ sendText( chatId: InputPeerLike, - text: string | FormattedString, + text: InputText, params?: CommonSendParams & { /** * For bots: inline or reply markup or an instruction @@ -3891,14 +3823,6 @@ export interface TelegramClient extends BaseTelegramClient { */ replyMarkup?: ReplyMarkup - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - */ - entities?: tl.TypeMessageEntity[] - /** * Whether to disable links preview in this message */ @@ -4027,47 +3951,6 @@ export interface TelegramClient extends BaseTelegramClient { * */ _normalizePrivacyRules(rules: InputPrivacyRule[]): Promise - /** - * Register a given {@link IMessageEntityParser} as a parse mode - * for messages. When this method is first called, given parse - * mode is also set as default. - * - * **Available**: ✅ both users and bots - * - * @param parseMode Parse mode to register - * @throws MtClientError When the parse mode with a given name is already registered. - */ - registerParseMode(parseMode: IMessageEntityParser): void - /** - * Unregister a parse mode by its name. - * Will silently fail if given parse mode does not exist. - * - * Also updates the default parse mode to the next one available, if any - * - * **Available**: ✅ both users and bots - * - * @param name Name of the parse mode to unregister - */ - unregisterParseMode(name: string): void - /** - * Get a {@link IMessageEntityParser} registered under a given name (or a default one). - * - * **Available**: ✅ both users and bots - * - * @param name Name of the parse mode which parser to get. - * @throws MtClientError When the provided parse mode is not registered - * @throws MtClientError When `name` is omitted and there is no default parse mode - */ - getParseMode(name?: string | null): IMessageEntityParser - /** - * Set a given parse mode as a default one. - * - * **Available**: ✅ both users and bots - * - * @param name Name of the parse mode - * @throws MtClientError When given parse mode is not registered. - */ - setDefaultParseMode(name: string): void /** * Change your 2FA password * **Available**: 👤 users only @@ -4422,20 +4305,7 @@ export interface TelegramClient extends BaseTelegramClient { /** * Override caption for {@link media} */ - caption?: string | FormattedString - - /** - * Override entities for {@link media} - */ - entities?: tl.TypeMessageEntity[] - - /** - * Parse mode to use to parse entities before sending the message. - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null + caption?: InputText /** * Interactive elements to add to the story @@ -4806,20 +4676,7 @@ export interface TelegramClient extends BaseTelegramClient { /** * Override caption for {@link media} */ - caption?: string | FormattedString - - /** - * Override entities for {@link media} - */ - entities?: tl.TypeMessageEntity[] - - /** - * Parse mode to use to parse entities before sending the message. - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null + caption?: InputText /** * Whether to automatically pin this story to the profile @@ -5422,10 +5279,6 @@ export class TelegramClient extends BaseTelegramClient { unpinMessage = unpinMessage.bind(null, this) initTakeoutSession = initTakeoutSession.bind(null, this) _normalizePrivacyRules = _normalizePrivacyRules.bind(null, this) - registerParseMode = registerParseMode.bind(null, this) - unregisterParseMode = unregisterParseMode.bind(null, this) - getParseMode = getParseMode.bind(null, this) - setDefaultParseMode = setDefaultParseMode.bind(null, this) changeCloudPassword = changeCloudPassword.bind(null, this) enableCloudPassword = enableCloudPassword.bind(null, this) verifyPasswordEmail = verifyPasswordEmail.bind(null, this) diff --git a/packages/client/src/methods/_imports.ts b/packages/client/src/methods/_imports.ts index 2419a32a..ff779334 100644 --- a/packages/client/src/methods/_imports.ts +++ b/packages/client/src/methods/_imports.ts @@ -38,11 +38,9 @@ import { Dialog, FileDownloadLocation, FileDownloadParameters, - FormattedString, ForumTopic, GameHighScore, HistoryReadUpdate, - IMessageEntityParser, InlineQuery, InputChatEventFilters, InputDialogFolder, @@ -55,6 +53,7 @@ import { InputReaction, InputStickerSet, InputStickerSetItem, + InputText, MaybeDynamic, Message, MessageEntity, diff --git a/packages/client/src/methods/bots/answer-inline-query.ts b/packages/client/src/methods/bots/answer-inline-query.ts index 363a0f08..2319d676 100644 --- a/packages/client/src/methods/bots/answer-inline-query.ts +++ b/packages/client/src/methods/bots/answer-inline-query.ts @@ -96,23 +96,11 @@ export async function answerInlineQuery( */ url: string } - - /** - * Parse mode to use when parsing inline message text. - * - * Passing `null` will explicitly disable formatting. - * - * **Note**: inline results themselves *can not* have markup - * entities, only the messages that are sent once a result is clicked. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null }, ): Promise { - const { cacheTime = 300, gallery, private: priv, nextOffset, switchPm, switchWebview, parseMode } = params ?? {} + const { cacheTime = 300, gallery, private: priv, nextOffset, switchPm, switchWebview } = params ?? {} - const [defaultGallery, tlResults] = await BotInline._convertToTl(client, results, parseMode) + const [defaultGallery, tlResults] = await BotInline._convertToTl(client, results) await client.call({ _: 'messages.setInlineBotResults', diff --git a/packages/client/src/methods/files/normalize-input-media.ts b/packages/client/src/methods/files/normalize-input-media.ts index c52d841d..7b3c9078 100644 --- a/packages/client/src/methods/files/normalize-input-media.ts +++ b/packages/client/src/methods/files/normalize-input-media.ts @@ -8,7 +8,7 @@ import { InputMediaLike } from '../../types/media/input-media.js' import { extractFileName } from '../../utils/file-utils.js' import { normalizeDate } from '../../utils/misc-utils.js' import { encodeWaveform } from '../../utils/voice-utils.js' -import { _parseEntities } from '../messages/parse-entities.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _normalizeInputFile } from './normalize-input-file.js' import { uploadFile } from './upload-file.js' @@ -21,10 +21,9 @@ export async function _normalizeInputMedia( client: BaseTelegramClient, media: InputMediaLike, params: { - parseMode?: string | null progressCallback?: (uploaded: number, total: number) => void uploadPeer?: tl.TypeInputPeer - }, + } = {}, uploadMedia = false, ): Promise { // my condolences to those poor souls who are going to maintain this (myself included) @@ -165,12 +164,7 @@ export async function _normalizeInputMedia( }) if (media.solution) { - [solution, solutionEntities] = await _parseEntities( - client, - media.solution, - params.parseMode, - media.solutionEntities, - ) + [solution, solutionEntities] = await _normalizeInputText(client, media.solution) } } diff --git a/packages/client/src/methods/messages/edit-inline-message.ts b/packages/client/src/methods/messages/edit-inline-message.ts index ef7d17eb..e5870fa5 100644 --- a/packages/client/src/methods/messages/edit-inline-message.ts +++ b/packages/client/src/methods/messages/edit-inline-message.ts @@ -1,9 +1,9 @@ import { BaseTelegramClient, tl } from '@mtcute/core' -import { BotKeyboard, FormattedString, InputMediaLike, ReplyMarkup } from '../../types/index.js' +import { BotKeyboard, InputMediaLike, InputText, ReplyMarkup } from '../../types/index.js' import { normalizeInlineId } from '../../utils/inline-utils.js' import { _normalizeInputMedia } from '../files/normalize-input-media.js' -import { _parseEntities } from './parse-entities.js' +import { _normalizeInputText } from '../misc/normalize-text.js' /** * Edit sent inline message text, media and reply markup. @@ -27,26 +27,7 @@ export async function editInlineMessage( * * When `media` is passed, `media.caption` is used instead */ - text?: string | FormattedString - - /** - * Parse mode to use to parse entities before sending the message. - * - * - * Passing `null` will explicitly disable formatting. - * @default current default parse mode (if any). - */ - parseMode?: string | null - - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - * - * When `media` is passed, `media.entities` is used instead - */ - entities?: tl.TypeMessageEntity[] + text?: InputText /** * New message media @@ -93,15 +74,10 @@ export async function editInlineMessage( // if there's no caption in input media (i.e. not present or undefined), // user wants to keep current caption, thus `content` needs to stay `undefined` if ('caption' in params.media && params.media.caption !== undefined) { - [content, entities] = await _parseEntities( - client, - params.media.caption, - params.parseMode, - params.media.entities, - ) + [content, entities] = await _normalizeInputText(client, params.media.caption) } } else if (params.text) { - [content, entities] = await _parseEntities(client, params.text, params.parseMode, params.entities) + [content, entities] = await _normalizeInputText(client, params.text) } let retries = 3 diff --git a/packages/client/src/methods/messages/edit-message.ts b/packages/client/src/methods/messages/edit-message.ts index d06d69e4..9ed80c1b 100644 --- a/packages/client/src/methods/messages/edit-message.ts +++ b/packages/client/src/methods/messages/edit-message.ts @@ -2,17 +2,17 @@ import { BaseTelegramClient, tl } from '@mtcute/core' import { BotKeyboard, - FormattedString, InputMediaLike, InputMessageId, + InputText, Message, normalizeInputMessageId, ReplyMarkup, } from '../../types/index.js' import { _normalizeInputMedia } from '../files/normalize-input-media.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _findMessageInUpdate } from './find-in-update.js' -import { _parseEntities } from './parse-entities.js' /** * Edit message text, media, reply markup and schedule date. @@ -29,26 +29,7 @@ export async function editMessage( * * When `media` is passed, `media.caption` is used instead */ - text?: string | FormattedString - - /** - * Parse mode to use to parse entities before sending the message. - * - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null - - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - * - * When `media` is passed, `media.entities` is used instead - */ - entities?: tl.TypeMessageEntity[] + text?: InputText /** * New message media @@ -106,17 +87,12 @@ export async function editMessage( // if there's no caption in input media (i.e. not present or undefined), // user wants to keep current caption, thus `content` needs to stay `undefined` if ('caption' in params.media && params.media.caption !== undefined) { - [content, entities] = await _parseEntities( - client, - params.media.caption, - params.parseMode, - params.media.entities, - ) + [content, entities] = await _normalizeInputText(client, params.media.caption) } } if (params.text) { - [content, entities] = await _parseEntities(client, params.text, params.parseMode, params.entities) + [content, entities] = await _normalizeInputText(client, params.text) } const res = await client.call({ diff --git a/packages/client/src/methods/messages/forward-messages.ts b/packages/client/src/methods/messages/forward-messages.ts index 10921831..797ca44c 100644 --- a/packages/client/src/methods/messages/forward-messages.ts +++ b/packages/client/src/methods/messages/forward-messages.ts @@ -1,7 +1,7 @@ -import { BaseTelegramClient, MtArgumentError, tl } from '@mtcute/core' +import { BaseTelegramClient, MtArgumentError } from '@mtcute/core' import { randomLong } from '@mtcute/core/utils.js' -import { FormattedString, InputMediaLike, InputPeerLike, Message, PeersIndex } from '../../types/index.js' +import { InputPeerLike, Message, PeersIndex } from '../../types/index.js' import { normalizeDate } from '../../utils/misc-utils.js' import { assertIsUpdatesGroup } from '../../utils/updates-utils.js' import { resolvePeer } from '../users/resolve-peer.js' @@ -11,41 +11,6 @@ export interface ForwardMessageOptions { /** Destination chat ID, username, phone, `"me"` or `"self"` */ toChatId: InputPeerLike - /** - * Optionally, a caption for your forwarded message(s). - * It will be sent as a separate message before the forwarded messages. - * - * You can either pass `caption` or `captionMedia`, passing both will - * result in an error - */ - caption?: string | FormattedString - - /** - * Optionally, a media caption for your forwarded message(s). - * It will be sent as a separate message before the forwarded messages. - * - * You can either pass `caption` or `captionMedia`, passing both will - * result in an error - */ - captionMedia?: InputMediaLike - - /** - * Parse mode to use to parse entities in caption. - * - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null - - /** - * List of formatting entities in caption to use instead - * of parsing via a parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - */ - entities?: tl.TypeMessageEntity[] - /** * Whether to forward silently (also applies to caption message). */ @@ -102,8 +67,6 @@ export interface ForwardMessageOptions { * Forward one or more messages by their IDs. * You can forward no more than 100 messages at once. * - * If a caption message was sent, it will be the first message in the resulting array. - * * @param toChatId Destination chat ID, username, phone, `"me"` or `"self"` * @param fromChatId Source chat ID, username, phone, `"me"` or `"self"` * @param messages Message IDs diff --git a/packages/client/src/methods/messages/parse-entities.ts b/packages/client/src/methods/messages/parse-entities.ts deleted file mode 100644 index 9e513585..00000000 --- a/packages/client/src/methods/messages/parse-entities.ts +++ /dev/null @@ -1,60 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { BaseTelegramClient, MtArgumentError, tl } from '@mtcute/core' - -import { FormattedString } from '../../types/index.js' -import { normalizeToInputUser } from '../../utils/peer-utils.js' -import { getParseModesState } from '../parse-modes/_state.js' -import { resolvePeer } from '../users/resolve-peer.js' - -const empty: [string, undefined] = ['', undefined] - -/** @internal */ -export async function _parseEntities( - client: BaseTelegramClient, - text?: string | FormattedString, - mode?: string | null, - entities?: tl.TypeMessageEntity[], -): Promise<[string, tl.TypeMessageEntity[] | undefined]> { - if (!text) { - return empty - } - - if (typeof text === 'object') { - mode = text.mode - text = text.value - } - - if (!entities) { - const parseModesState = getParseModesState(client) - - if (mode === undefined) { - mode = parseModesState.defaultParseMode - } - // either explicitly disabled or no available parser - if (!mode) return [text, []] - - const modeImpl = parseModesState.parseModes.get(mode) - - if (!modeImpl) { - throw new MtArgumentError(`Parse mode ${mode} is not registered.`) - } - - [text, entities] = modeImpl.parse(text) - } - - // replace mentionName entities with input ones - for (const ent of entities) { - if (ent._ === 'messageEntityMentionName') { - try { - const inputPeer = normalizeToInputUser(await resolvePeer(client, ent.userId), ent.userId) - - // not a user - if (!inputPeer) continue - (ent as any)._ = 'inputMessageEntityMentionName' - ;(ent as any).userId = inputPeer - } catch (e) {} - } - } - - return [text, entities] -} diff --git a/packages/client/src/methods/messages/send-common.ts b/packages/client/src/methods/messages/send-common.ts index ba269fce..f95a03c2 100644 --- a/packages/client/src/methods/messages/send-common.ts +++ b/packages/client/src/methods/messages/send-common.ts @@ -2,8 +2,10 @@ import { BaseTelegramClient, getMarkedPeerId, MtArgumentError, tl } from '@mtcut import { MtMessageNotFoundError } from '../../types/errors.js' import { Message } from '../../types/messages/message.js' +import { TextWithEntities } from '../../types/misc/entities.js' import { InputPeerLike } from '../../types/peers/index.js' import { normalizeMessageId, normalizeToInputUser } from '../../utils/index.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _getDiscussionMessage } from './get-discussion-message.js' import { getMessages } from './get-messages.js' @@ -50,24 +52,9 @@ export interface CommonSendParams { /** * Quoted text. Must be exactly contained in the message - * being quoted to be accepted by the server + * being quoted to be accepted by the server (as well as entities) */ - quoteText?: string - - /** - * Entities contained in the quoted text. - * Must be exactly contained in the message - * being quoted to be accepted by the server - */ - quoteEntities?: tl.TypeMessageEntity[] - - /** - * Parse mode to use to parse entities before sending the message. - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null + quote?: TextWithEntities /** * Whether to send this message silently. @@ -151,8 +138,8 @@ export async function _processCommonSendParameters( _: 'inputReplyToMessage', replyToMsgId: replyTo, replyToPeerId: replyToPeer, - quoteText: params.quoteText, - quoteEntities: params.quoteEntities, + quoteText: params.quote?.text, + quoteEntities: params.quote?.entities as tl.TypeMessageEntity[], } } else if (params.replyToStory) { tlReplyTo = { diff --git a/packages/client/src/methods/messages/send-copy.ts b/packages/client/src/methods/messages/send-copy.ts index 031d2b60..e7483be6 100644 --- a/packages/client/src/methods/messages/send-copy.ts +++ b/packages/client/src/methods/messages/send-copy.ts @@ -1,6 +1,6 @@ -import { BaseTelegramClient, getMarkedPeerId, MtArgumentError, tl } from '@mtcute/core' +import { BaseTelegramClient, getMarkedPeerId, MtArgumentError } from '@mtcute/core' -import { FormattedString, InputPeerLike, Message, MtMessageNotFoundError, ReplyMarkup } from '../../types/index.js' +import { InputPeerLike, InputText, Message, MtMessageNotFoundError, ReplyMarkup } from '../../types/index.js' import { resolvePeer } from '../users/resolve-peer.js' import { getMessages } from './get-messages.js' import { CommonSendParams } from './send-common.js' @@ -15,24 +15,7 @@ export interface SendCopyParams extends CommonSendParams { /** * New message caption (only used for media) */ - caption?: string | FormattedString - - /** - * Parse mode to use to parse `text` entities before sending - * the message. - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null - - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - */ - entities?: tl.TypeMessageEntity[] + caption?: InputText /** * For bots: inline or reply markup or an instruction @@ -83,15 +66,26 @@ export async function sendCopy( } if (msg.media && msg.media.type !== 'web_page' && msg.media.type !== 'invoice') { + let caption: InputText | undefined = params.caption + + if (!caption) { + if (msg.raw.entities?.length) { + caption = { + text: msg.raw.message, + entities: msg.raw.entities, + } + } else { + caption = msg.raw.message + } + } + return sendMedia( client, toChatId, { type: 'auto', file: msg.media.inputMedia, - caption: params.caption ?? msg.raw.message, - // we shouldn't use original entities if the user wants custom text - entities: params.entities ?? params.caption ? undefined : msg.raw.entities, + caption, }, rest, ) diff --git a/packages/client/src/methods/messages/send-media-group.ts b/packages/client/src/methods/messages/send-media-group.ts index 14592553..7ab3b082 100644 --- a/packages/client/src/methods/messages/send-media-group.ts +++ b/packages/client/src/methods/messages/send-media-group.ts @@ -7,9 +7,9 @@ import { InputPeerLike, PeersIndex } from '../../types/peers/index.js' import { normalizeDate } from '../../utils/misc-utils.js' import { assertIsUpdatesGroup } from '../../utils/updates-utils.js' import { _normalizeInputMedia } from '../files/normalize-input-media.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _getDiscussionMessage } from './get-discussion-message.js' -import { _parseEntities } from './parse-entities.js' import { _processCommonSendParameters, CommonSendParams } from './send-common.js' /** @@ -77,14 +77,11 @@ export async function sendMediaGroup( true, ) - const [message, entities] = await _parseEntities( + const [message, entities] = await _normalizeInputText( client, // some types dont have `caption` field, and ts warns us, - // but since it's JS, they'll just be `undefined` and properly - // handled by _parseEntities method + // but since it's JS, they'll just be `undefined` and properly handled by the method (media as Extract).caption, - params.parseMode, - (media as Extract).entities, ) multiMedia.push({ diff --git a/packages/client/src/methods/messages/send-media.ts b/packages/client/src/methods/messages/send-media.ts index 893a7d50..bac7ccc0 100644 --- a/packages/client/src/methods/messages/send-media.ts +++ b/packages/client/src/methods/messages/send-media.ts @@ -1,17 +1,17 @@ -import { BaseTelegramClient, tl } from '@mtcute/core' +import { BaseTelegramClient } from '@mtcute/core' import { randomLong } from '@mtcute/core/utils.js' import { BotKeyboard, ReplyMarkup } from '../../types/bots/keyboards.js' import { InputMediaLike } from '../../types/media/input-media.js' import { Message } from '../../types/messages/message.js' -import { FormattedString } from '../../types/parser.js' +import { InputText } from '../../types/misc/entities.js' import { InputPeerLike } from '../../types/peers/index.js' import { normalizeDate } from '../../utils/misc-utils.js' import { _normalizeInputMedia } from '../files/normalize-input-media.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _findMessageInUpdate } from './find-in-update.js' import { _getDiscussionMessage } from './get-discussion-message.js' -import { _parseEntities } from './parse-entities.js' import { _processCommonSendParameters, CommonSendParams } from './send-common.js' /** @@ -50,15 +50,7 @@ export async function sendMedia( * Can be used, for example. when using File IDs * or when using existing InputMedia objects. */ - caption?: string | FormattedString - - /** - * Override entities for `media`. - * - * Can be used, for example. when using File IDs - * or when using existing InputMedia objects. - */ - entities?: tl.TypeMessageEntity[] + caption?: InputText /** * Function that will be called after some part has been uploaded. @@ -82,14 +74,11 @@ export async function sendMedia( const inputMedia = await _normalizeInputMedia(client, media, params) - const [message, entities] = await _parseEntities( + const [message, entities] = await _normalizeInputText( client, // some types dont have `caption` field, and ts warns us, - // but since it's JS, they'll just be `undefined` and properly - // handled by _parseEntities method + // but since it's JS, they'll just be `undefined` and properly handled by the method params.caption || (media as Extract).caption, - params.parseMode, - params.entities || (media as Extract).entities, ) const replyMarkup = BotKeyboard._convertToTl(params.replyMarkup) diff --git a/packages/client/src/methods/messages/send-quote.ts b/packages/client/src/methods/messages/send-quote.ts index de1287ea..e0b1e9cc 100644 --- a/packages/client/src/methods/messages/send-quote.ts +++ b/packages/client/src/methods/messages/send-quote.ts @@ -1,6 +1,6 @@ import { BaseTelegramClient, MtArgumentError, tl } from '@mtcute/core' -import { InputPeerLike } from '../../index.js' +import { InputPeerLike, TextWithEntities } from '../../index.js' import { Message } from '../../types/messages/message.js' import { sendMedia } from './send-media.js' import { sendMediaGroup } from './send-media-group.js' @@ -22,7 +22,7 @@ export type QuoteParamsFrom = Omit, 'quoteText' | 'quoteEntiti end: number } -function extractQuote(message: Message, from: number, to: number): [string, tl.TypeMessageEntity[] | undefined] { +function extractQuote(message: Message, from: number, to: number): TextWithEntities { const { raw } = message if (raw._ === 'messageService') throw new MtArgumentError('Cannot quote service message') @@ -34,7 +34,7 @@ function extractQuote(message: Message, from: number, to: number): [string, tl.T if (from >= to) throw new MtArgumentError('Invalid quote range') - if (!raw.entities) return [text.slice(from, to), undefined] + if (!raw.entities) return { text: text.slice(from, to), entities: undefined } const entities: tl.TypeMessageEntity[] = [] @@ -51,7 +51,7 @@ function extractQuote(message: Message, from: number, to: number): [string, tl.T entities.push(newEnt) } - return [text.slice(from, to), entities] + return { text: text.slice(from, to), entities } } /** Send a text in reply to a given quote */ @@ -66,7 +66,7 @@ export function quoteWithText( const { toChatId = message.chat, start, end, text, ...params__ } = params const params_ = params__ as NonNullable[3]> params_.replyTo = message - ;[params_.quoteText, params_.quoteEntities] = extractQuote(message, params.start, params.end) + params_.quote = extractQuote(message, params.start, params.end) return sendText(client, toChatId, text, params_) } @@ -83,7 +83,7 @@ export function quoteWithMedia( const { toChatId = message.chat, start, end, media, ...params__ } = params const params_ = params__ as NonNullable[3]> params_.replyTo = message - ;[params_.quoteText, params_.quoteEntities] = extractQuote(message, params.start, params.end) + params_.quote = extractQuote(message, params.start, params.end) return sendMedia(client, toChatId, media, params_) } @@ -100,7 +100,7 @@ export function quoteWithMediaGroup( const { toChatId, start, end, medias, ...params__ } = params const params_ = params__ as NonNullable[3]> params_.replyTo = message - ;[params_.quoteText, params_.quoteEntities] = extractQuote(message, params.start, params.end) + params_.quote = extractQuote(message, params.start, params.end) return sendMediaGroup(client, message.chat.inputPeer, medias, params_) } diff --git a/packages/client/src/methods/messages/send-text.ts b/packages/client/src/methods/messages/send-text.ts index c7fd29b6..4fb5afa7 100644 --- a/packages/client/src/methods/messages/send-text.ts +++ b/packages/client/src/methods/messages/send-text.ts @@ -3,16 +3,16 @@ import { randomLong } from '@mtcute/core/utils.js' import { BotKeyboard, ReplyMarkup } from '../../types/bots/keyboards.js' import { Message } from '../../types/messages/message.js' -import { FormattedString } from '../../types/parser.js' +import { InputText } from '../../types/misc/entities.js' import { InputPeerLike, PeersIndex } from '../../types/peers/index.js' import { normalizeDate } from '../../utils/misc-utils.js' import { inputPeerToPeer } from '../../utils/peer-utils.js' import { createDummyUpdate } from '../../utils/updates-utils.js' import { getAuthState } from '../auth/_state.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _findMessageInUpdate } from './find-in-update.js' import { _getDiscussionMessage } from './get-discussion-message.js' -import { _parseEntities } from './parse-entities.js' import { _processCommonSendParameters, CommonSendParams } from './send-common.js' /** @@ -25,7 +25,7 @@ import { _processCommonSendParameters, CommonSendParams } from './send-common.js export async function sendText( client: BaseTelegramClient, chatId: InputPeerLike, - text: string | FormattedString, + text: InputText, params?: CommonSendParams & { /** * For bots: inline or reply markup or an instruction @@ -33,14 +33,6 @@ export async function sendText( */ replyMarkup?: ReplyMarkup - /** - * List of formatting entities to use instead of parsing via a - * parse mode. - * - * **Note:** Passing this makes the method ignore {@link parseMode} - */ - entities?: tl.TypeMessageEntity[] - /** * Whether to disable links preview in this message */ @@ -57,7 +49,7 @@ export async function sendText( ): Promise { if (!params) params = {} - const [message, entities] = await _parseEntities(client, text, params.parseMode, params.entities) + const [message, entities] = await _normalizeInputText(client, text) const replyMarkup = BotKeyboard._convertToTl(params.replyMarkup) const { peer, replyTo } = await _processCommonSendParameters(client, chatId, params) diff --git a/packages/client/src/methods/misc/normalize-text.ts b/packages/client/src/methods/misc/normalize-text.ts new file mode 100644 index 00000000..f04646fa --- /dev/null +++ b/packages/client/src/methods/misc/normalize-text.ts @@ -0,0 +1,41 @@ +import { BaseTelegramClient, tl } from '@mtcute/core' + +import { InputText } from '../../types/misc/entities.js' +import { normalizeToInputUser } from '../../utils/peer-utils.js' +import { resolvePeer } from '../users/resolve-peer.js' + +const empty: [string, undefined] = ['', undefined] + +/** @internal */ +export async function _normalizeInputText( + client: BaseTelegramClient, + input?: InputText, +): Promise<[string, tl.TypeMessageEntity[] | undefined]> { + if (!input) { + return empty + } + + if (typeof input === 'string') { + return [input, undefined] + } + + const { text, entities } = input + if (!entities) return [text, undefined] + + // replace mentionName entities with input ones + for (const ent of entities) { + if (ent._ === 'messageEntityMentionName') { + try { + const inputPeer = normalizeToInputUser(await resolvePeer(client, ent.userId), ent.userId) + + const ent_ = ent as unknown as tl.RawInputMessageEntityMentionName + ent_._ = 'inputMessageEntityMentionName' + ent_.userId = inputPeer + } catch (e) { + client.log.warn('Failed to resolve mention entity for %s: %s', ent.userId, e) + } + } + } + + return [text, entities] +} diff --git a/packages/client/src/methods/parse-modes/_state.ts b/packages/client/src/methods/parse-modes/_state.ts deleted file mode 100644 index 744b5faa..00000000 --- a/packages/client/src/methods/parse-modes/_state.ts +++ /dev/null @@ -1,21 +0,0 @@ -/* eslint-disable @typescript-eslint/no-unused-vars */ -import { BaseTelegramClient } from '@mtcute/core' - -import { IMessageEntityParser } from '../../types/index.js' - -const STATE_SYMBOL = Symbol('parseModesState') - -/** @internal */ -export interface ParseModesState { - parseModes: Map - defaultParseMode: string | null -} - -/** @internal */ -export function getParseModesState(client: BaseTelegramClient): ParseModesState { - // eslint-disable-next-line - return ((client as any)[STATE_SYMBOL] ??= { - parseModes: new Map(), - defaultParseMode: null, - } satisfies ParseModesState) -} diff --git a/packages/client/src/methods/parse-modes/parse-modes.ts b/packages/client/src/methods/parse-modes/parse-modes.ts deleted file mode 100644 index 101c70cb..00000000 --- a/packages/client/src/methods/parse-modes/parse-modes.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { BaseTelegramClient, MtArgumentError } from '@mtcute/core' - -import { IMessageEntityParser } from '../../types/index.js' -import { getParseModesState } from './_state.js' - -/** - * Register a given {@link IMessageEntityParser} as a parse mode - * for messages. When this method is first called, given parse - * mode is also set as default. - * - * @param parseMode Parse mode to register - * @throws MtClientError When the parse mode with a given name is already registered. - */ -export function registerParseMode(client: BaseTelegramClient, parseMode: IMessageEntityParser): void { - const name = parseMode.name - - const state = getParseModesState(client) - - if (state.parseModes.has(name)) { - throw new MtArgumentError(`Parse mode ${name} is already registered. Unregister it first!`) - } - state.parseModes.set(name, parseMode) - - if (!state.defaultParseMode) { - state.defaultParseMode = name - } -} - -/** - * Unregister a parse mode by its name. - * Will silently fail if given parse mode does not exist. - * - * Also updates the default parse mode to the next one available, if any - * - * @param name Name of the parse mode to unregister - */ -export function unregisterParseMode(client: BaseTelegramClient, name: string): void { - const state = getParseModesState(client) - - state.parseModes.delete(name) - - if (state.defaultParseMode === name) { - const [first] = state.parseModes.keys() - state.defaultParseMode = first ?? null - } -} - -/** - * Get a {@link IMessageEntityParser} registered under a given name (or a default one). - * - * @param name Name of the parse mode which parser to get. - * @throws MtClientError When the provided parse mode is not registered - * @throws MtClientError When `name` is omitted and there is no default parse mode - */ -export function getParseMode(client: BaseTelegramClient, name?: string | null): IMessageEntityParser { - const state = getParseModesState(client) - - if (!name) { - if (!state.defaultParseMode) { - throw new MtArgumentError('There is no default parse mode') - } - - name = state.defaultParseMode - } - - const mode = state.parseModes.get(name) - - if (!mode) { - throw new MtArgumentError(`Parse mode ${name} is not registered.`) - } - - return mode -} - -/** - * Set a given parse mode as a default one. - * - * @param name Name of the parse mode - * @throws MtClientError When given parse mode is not registered. - */ -export function setDefaultParseMode(client: BaseTelegramClient, name: string): void { - const state = getParseModesState(client) - - if (!state.parseModes.has(name)) { - throw new MtArgumentError(`Parse mode ${name} is not registered.`) - } - - state.defaultParseMode = name -} diff --git a/packages/client/src/methods/stories/edit-story.ts b/packages/client/src/methods/stories/edit-story.ts index 819fe7e4..f3eee571 100644 --- a/packages/client/src/methods/stories/edit-story.ts +++ b/packages/client/src/methods/stories/edit-story.ts @@ -1,9 +1,9 @@ import { BaseTelegramClient, tl } from '@mtcute/core' -import { FormattedString, InputMediaLike, InputPeerLike, InputPrivacyRule, Story } from '../../types/index.js' +import { InputMediaLike, InputPeerLike, InputPrivacyRule, InputText, Story } from '../../types/index.js' import { _normalizeInputMedia } from '../files/normalize-input-media.js' -import { _parseEntities } from '../messages/parse-entities.js' import { _normalizePrivacyRules } from '../misc/normalize-privacy-rules.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _findStoryInUpdate } from './find-in-update.js' @@ -35,20 +35,7 @@ export async function editStory( /** * Override caption for {@link media} */ - caption?: string | FormattedString - - /** - * Override entities for {@link media} - */ - entities?: tl.TypeMessageEntity[] - - /** - * Parse mode to use to parse entities before sending the message. - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null + caption?: InputText /** * Interactive elements to add to the story @@ -70,22 +57,17 @@ export async function editStory( let media: tl.TypeInputMedia | undefined = undefined if (params.media) { - media = await _normalizeInputMedia(client, params.media, params) + media = await _normalizeInputMedia(client, params.media) // if there's no caption in input media (i.e. not present or undefined), // user wants to keep current caption, thus `content` needs to stay `undefined` if ('caption' in params.media && params.media.caption !== undefined) { - [caption, entities] = await _parseEntities( - client, - params.media.caption, - params.parseMode, - params.media.entities, - ) + [caption, entities] = await _normalizeInputText(client, params.media.caption) } } if (params.caption) { - [caption, entities] = await _parseEntities(client, params.caption, params.parseMode, params.entities) + [caption, entities] = await _normalizeInputText(client, params.caption) } const privacyRules = params.privacyRules ? await _normalizePrivacyRules(client, params.privacyRules) : undefined diff --git a/packages/client/src/methods/stories/send-story.ts b/packages/client/src/methods/stories/send-story.ts index d715faa5..6adac18f 100644 --- a/packages/client/src/methods/stories/send-story.ts +++ b/packages/client/src/methods/stories/send-story.ts @@ -1,10 +1,10 @@ import { BaseTelegramClient, tl } from '@mtcute/core' import { randomLong } from '@mtcute/core/utils.js' -import { FormattedString, InputMediaLike, InputPeerLike, InputPrivacyRule, Story } from '../../types/index.js' +import { InputMediaLike, InputPeerLike, InputPrivacyRule, InputText, Story } from '../../types/index.js' import { _normalizeInputMedia } from '../files/normalize-input-media.js' -import { _parseEntities } from '../messages/parse-entities.js' import { _normalizePrivacyRules } from '../misc/normalize-privacy-rules.js' +import { _normalizeInputText } from '../misc/normalize-text.js' import { resolvePeer } from '../users/resolve-peer.js' import { _findStoryInUpdate } from './find-in-update.js' @@ -34,20 +34,7 @@ export async function sendStory( /** * Override caption for {@link media} */ - caption?: string | FormattedString - - /** - * Override entities for {@link media} - */ - entities?: tl.TypeMessageEntity[] - - /** - * Parse mode to use to parse entities before sending the message. - * Passing `null` will explicitly disable formatting. - * - * @default current default parse mode (if any). - */ - parseMode?: string | null + caption?: InputText /** * Whether to automatically pin this story to the profile @@ -89,19 +76,16 @@ export async function sendStory( } } - const inputMedia = await _normalizeInputMedia(client, media, params) + const inputMedia = await _normalizeInputMedia(client, media) const privacyRules = params.privacyRules ? await _normalizePrivacyRules(client, params.privacyRules) : [{ _: 'inputPrivacyValueAllowAll' } as const] - const [caption, entities] = await _parseEntities( + const [caption, entities] = await _normalizeInputText( client, // some types dont have `caption` field, and ts warns us, - // but since it's JS, they'll just be `undefined` and properly - // handled by _parseEntities method + // but since it's JS, they'll just be `undefined` and properly handled by the method params.caption || (media as Extract).caption, - params.parseMode, - params.entities || (media as Extract).entities, ) const res = await client.call({ diff --git a/packages/client/src/types/bots/input/input-inline-message.ts b/packages/client/src/types/bots/input/input-inline-message.ts index 0fd2a839..5913d480 100644 --- a/packages/client/src/types/bots/input/input-inline-message.ts +++ b/packages/client/src/types/bots/input/input-inline-message.ts @@ -1,6 +1,7 @@ import { assertNever, BaseTelegramClient, tl } from '@mtcute/core' -import { _parseEntities } from '../../../methods/messages/parse-entities.js' +import { _normalizeInputText } from '../../../methods/misc/normalize-text.js' +import { InputText } from '../../../types/misc/entities.js' import { InputMediaContact, InputMediaGeo, @@ -8,7 +9,6 @@ import { InputMediaVenue, InputMediaWebpage, } from '../../media/index.js' -import { FormattedString } from '../../parser.js' import { BotKeyboard, ReplyMarkup } from '../keyboards.js' /** @@ -20,13 +20,7 @@ export interface InputInlineMessageText { /** * Text of the message */ - text: string | FormattedString - - /** - * Text markup entities. - * If passed, parse mode is ignored - */ - entities?: tl.TypeMessageEntity[] + text: InputText /** * Message reply markup @@ -57,13 +51,7 @@ export interface InputInlineMessageMedia { /** * Caption for the media */ - text?: string | FormattedString - - /** - * Caption markup entities. - * If passed, parse mode is ignored - */ - entities?: tl.TypeMessageEntity[] + text?: InputText /** * Message reply markup @@ -135,13 +123,7 @@ export interface InputInlineMessageWebpage extends InputMediaWebpage { /** * Text of the message */ - text: string | FormattedString - - /** - * Text markup entities. - * If passed, parse mode is ignored - */ - entities?: tl.TypeMessageEntity[] + text: InputText /** * Message reply markup @@ -176,7 +158,7 @@ export namespace BotInlineMessage { * @param params */ export function text( - text: string | FormattedString, + text: InputText, params: Omit = {}, ): InputInlineMessageText { const ret = params as tl.Mutable @@ -266,11 +248,10 @@ export namespace BotInlineMessage { export async function _convertToTl( client: BaseTelegramClient, obj: InputInlineMessage, - parseMode?: string | null, ): Promise { switch (obj.type) { case 'text': { - const [message, entities] = await _parseEntities(client, obj.text, parseMode, obj.entities) + const [message, entities] = await _normalizeInputText(client, obj.text) return { _: 'inputBotInlineMessageText', @@ -281,7 +262,7 @@ export namespace BotInlineMessage { } } case 'media': { - const [message, entities] = await _parseEntities(client, obj.text, parseMode, obj.entities) + const [message, entities] = await _normalizeInputText(client, obj.text) return { _: 'inputBotInlineMessageMediaAuto', @@ -336,7 +317,7 @@ export namespace BotInlineMessage { replyMarkup: BotKeyboard._convertToTl(obj.replyMarkup), } case 'webpage': { - const [message, entities] = await _parseEntities(client, obj.text, parseMode, obj.entities) + const [message, entities] = await _normalizeInputText(client, obj.text) return { _: 'inputBotInlineMessageMediaWebPage', diff --git a/packages/client/src/types/bots/input/input-inline-result.ts b/packages/client/src/types/bots/input/input-inline-result.ts index 77cf7cb1..a4421fd4 100644 --- a/packages/client/src/types/bots/input/input-inline-result.ts +++ b/packages/client/src/types/bots/input/input-inline-result.ts @@ -725,7 +725,6 @@ export namespace BotInline { export async function _convertToTl( client: BaseTelegramClient, results: InputInlineResult[], - parseMode?: string | null, ): Promise<[boolean, tl.TypeInputBotInlineResult[]]> { const normalizeThumb = (obj: InputInlineResult, fallback?: string): tl.RawInputWebDocument | undefined => { if (obj.type !== 'voice' && obj.type !== 'audio' && obj.type !== 'sticker' && obj.type !== 'game') { @@ -760,7 +759,7 @@ export namespace BotInline { let sendMessage: tl.TypeInputBotInlineMessage if (obj.message) { - sendMessage = await BotInlineMessage._convertToTl(client, obj.message, parseMode) + sendMessage = await BotInlineMessage._convertToTl(client, obj.message) } else { let message = obj.title const entities: tl.TypeMessageEntity[] = [ @@ -817,7 +816,7 @@ export namespace BotInline { let sendMessage: tl.TypeInputBotInlineMessage if (obj.message) { - sendMessage = await BotInlineMessage._convertToTl(client, obj.message, parseMode) + sendMessage = await BotInlineMessage._convertToTl(client, obj.message) if (sendMessage._ !== 'inputBotInlineMessageGame') { throw new MtArgumentError('game inline result must contain a game inline message') @@ -850,7 +849,7 @@ export namespace BotInline { let sendMessage: tl.TypeInputBotInlineMessage if (obj.message) { - sendMessage = await BotInlineMessage._convertToTl(client, obj.message, parseMode) + sendMessage = await BotInlineMessage._convertToTl(client, obj.message) } else if (obj.type === 'venue') { if (obj.latitude && obj.longitude) { sendMessage = { diff --git a/packages/client/src/types/index.ts b/packages/client/src/types/index.ts index 46001bcf..98d793b1 100644 --- a/packages/client/src/types/index.ts +++ b/packages/client/src/types/index.ts @@ -7,7 +7,6 @@ export * from './files/index.js' export * from './media/index.js' export * from './messages/index.js' export * from './misc/index.js' -export * from './parser.js' export * from './peers/index.js' export * from './reactions/index.js' export * from './stories/index.js' diff --git a/packages/client/src/types/media/input-media.ts b/packages/client/src/types/media/input-media.ts index 3700416b..03343eb0 100644 --- a/packages/client/src/types/media/input-media.ts +++ b/packages/client/src/types/media/input-media.ts @@ -1,7 +1,7 @@ import { MaybeArray, tl } from '@mtcute/core' +import { InputText } from '../../types/misc/entities.js' import { InputFileLike } from '../files/index.js' -import { FormattedString } from '../parser.js' import { InputPeerLike } from '../peers/index.js' import { VenueSource } from './venue.js' @@ -9,13 +9,7 @@ export interface CaptionMixin { /** * Caption of the media */ - caption?: string | FormattedString - - /** - * Caption entities of the media. - * If passed, parse mode is ignored - */ - entities?: tl.TypeMessageEntity[] + caption?: InputText } export interface FileMixin { @@ -541,13 +535,7 @@ export interface InputMediaQuiz extends Omit { /** * Explanation of the quiz solution */ - solution?: string | FormattedString - - /** - * Format entities for `solution`. - * If used, parse mode is ignored. - */ - solutionEntities?: tl.TypeMessageEntity[] + solution?: InputText } /** diff --git a/packages/client/src/types/misc/entities.ts b/packages/client/src/types/misc/entities.ts index 07e60281..363a695a 100644 --- a/packages/client/src/types/misc/entities.ts +++ b/packages/client/src/types/misc/entities.ts @@ -1,11 +1,17 @@ import { tl } from '@mtcute/core' /** - * Interface describing some text with entities. - * - * Primarily used as a return type for parsers. + * Formatted text with entities */ export interface TextWithEntities { readonly text: string - readonly entities: tl.TypeMessageEntity[] + readonly entities?: tl.TypeMessageEntity[] } + +/** + * Type to be used as a parameter for methods that accept + * a formatted text with entities. + * + * Can be either a plain string or an object with `text` and `entities` fields. + */ +export type InputText = string | TextWithEntities diff --git a/packages/client/src/types/misc/index.ts b/packages/client/src/types/misc/index.ts index 4e8a6d05..85728bfc 100644 --- a/packages/client/src/types/misc/index.ts +++ b/packages/client/src/types/misc/index.ts @@ -1,3 +1,4 @@ +export * from './entities.js' export * from './input-privacy-rule.js' export * from './sticker-set.js' export * from './takeout-session.js' diff --git a/packages/client/src/types/parser.ts b/packages/client/src/types/parser.ts deleted file mode 100644 index 298e02e8..00000000 --- a/packages/client/src/types/parser.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { tl } from '@mtcute/core' - -/** - * Interface describing a message entity parser. - * - * mtcute comes with HTML parser inside `@mtcute/html-parser` - * and Markdown parser inside `@mtcute/markdown-parser`. - * - * You are also free to implement your own parser and register it with - * {@link TelegramClient.registerParseMode}. - */ -export interface IMessageEntityParser { - /** - * Parser name, which will be used when registering it. - */ - name: string - - /** - * Parse a string containing some text with formatting to plain text - * and message entities - * - * @param text Formatted text - * @returns A tuple containing plain text and a list of entities - */ - parse(text: string): [string, tl.TypeMessageEntity[]] - - /** - * Add formatting to the text given the plain text and the entities. - * - * > **Note**: `unparse(parse(text)) === text` is not always true! - * - * @param text Plain text - * @param entities Message entities that should be added to the text - */ - unparse(text: string, entities: ReadonlyArray): string -} - -/** - * Raw string that will not be escaped when passing - * to tagged template helpers (like `html` and `md`) - */ -export class FormattedString { - /** - * @param value Value that the string holds - * @param mode Name of the parse mode used - */ - constructor( - readonly value: string, - readonly mode?: T, - ) {} - - toString(): string { - return this.value - } -} diff --git a/packages/html-parser/README.md b/packages/html-parser/README.md index 07d1d918..ab9d0818 100644 --- a/packages/html-parser/README.md +++ b/packages/html-parser/README.md @@ -2,7 +2,6 @@ 📖 [API Reference](https://ref.mtcute.dev/modules/_mtcute_html_parser.html) - HTML entities parser for mtcute > **NOTE**: The syntax implemented here is **incompatible** with Bot API _HTML_. @@ -12,21 +11,20 @@ HTML entities parser for mtcute ## Features - Supports all entities that Telegram supports - Supports nested entities -- Proper newline handling (just like in real HTML) -- Automatic escaping of user input +- Proper newline/whitespace handling (just like in real HTML) +- [Interpolation](#interpolation)! ## Usage ```ts -import { TelegramClient } from '@mtcute/client' -import { HtmlMessageEntityParser, html } from '@mtcute/html-parser' - -const tg = new TelegramClient({ ... }) -tg.registerParseMode(new HtmlMessageEntityParser()) +import { html } from '@mtcute/html-parser' tg.sendText( 'me', - html`Hello, me! Updates from the feed:
${await getUpdatesFromFeed()}` + html` + Hello, me! Updates from the feed:
+ ${await getUpdatesFromFeed()} + ` ) ``` @@ -97,18 +95,20 @@ Overlapping entities are supported in `unparse()`, though. | `bold and italic` | **bold _and_** italic
⚠️ word "italic" is not actually italic! | | `bold and italic`
⚠️ this is how unparse() handles overlapping entities | **bold _and_** _italic_ | -## Escaping +## Interpolation -Escaping in this parser works exactly the same as in `htmlparser2`. +Being a tagged template literal, `html` supports interpolation. -This means that you can keep `<>&` symbols as-is in some cases. However, when dealing with user input, it is always -better to use [`HtmlMessageEntityParser.escape`](./classes/htmlmessageentityparser.html#escape) or, even better, -`html` helper: +You can interpolate one of the following: +- `string` - **will not** be parsed, and appended to plain text as-is + - In case you want the string to be parsed, use `html` as a simple function: html\`... ${html('**bold**')} ...\` +- `number` - will be converted to string and appended to plain text as-is +- `TextWithEntities` or `MessageEntity` - will add the text and its entities to the output. This is the type returned by `html` itself: + ```ts + const bold = html`**bold**` + const text = html`Hello, ${bold}!` + ``` +- falsy value (i.e. `null`, `undefined`, `false`) - will be ignored -```typescript -import { html } from '@mtcute/html-parser' - -const username = 'Boris <&>' -const text = html`Hi, ${username}!` -console.log(text) // Hi, Boris &lt;&amp;&gt;! -``` +Note that because of interpolation, you almost never need to think about escaping anything, +since the values are not even parsed as HTML, and are appended to the output as-is. \ No newline at end of file diff --git a/packages/html-parser/src/index.ts b/packages/html-parser/src/index.ts index d148d44e..07b86116 100644 --- a/packages/html-parser/src/index.ts +++ b/packages/html-parser/src/index.ts @@ -1,405 +1,435 @@ import { Parser } from 'htmlparser2' import Long from 'long' -import type { FormattedString, IMessageEntityParser, MessageEntity, tl } from '@mtcute/client' +import type { InputText, MessageEntity, TextWithEntities, tl } from '@mtcute/client' const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/ /** - * Tagged template based helper for escaping entities in HTML + * Escape a string to be safely used in HTML. * - * @example - * ```typescript - * const escaped = html`${user.displayName}` - * ``` + * > **Note**: this function is in most cases not needed, as `html` function + * > handles all `string`s passed to it automatically as plain text. */ -export function html( - strings: TemplateStringsArray, - ...sub: (string | FormattedString<'html'> | MessageEntity | boolean | undefined | null)[] -): FormattedString<'html'> { - let str = '' - sub.forEach((it, idx) => { - if (typeof it === 'boolean' || !it) return +function escape(str: string, quote = false): string { + str = str.replace(/&/g, '&').replace(//g, '>') + if (quote) str = str.replace(/"/g, '"') - if (typeof it === 'string') { - it = HtmlMessageEntityParser.escape(it, Boolean(str.match(/=['"]$/))) - } else if ('raw' in it) { - it = new HtmlMessageEntityParser().unparse(it.text, [it.raw]) - } else { - if (it.mode && it.mode !== 'html') { - throw new Error(`Incompatible parse mode: ${it.mode}`) - } - it = it.value - } - - str += strings[idx] + it - }) - - return { value: str + strings[strings.length - 1], mode: 'html' } + return str } -/** - * Syntax highlighter function used in {@link HtmlMessageEntityParser.unparse} - * - * Must be sync (this might change in the future) and must return valid HTML. - */ -export type SyntaxHighlighter = (code: string, language: string) => string +function parse( + strings: TemplateStringsArray | string, + ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] +): TextWithEntities { + const stacks: Record[]> = {} + const entities: tl.TypeMessageEntity[] = [] + let plainText = '' + let pendingText = '' -export interface HtmlMessageEntityParserOptions { - syntaxHighlighter?: SyntaxHighlighter -} + function processPendingText(tagEnd = false) { + if (!pendingText.length) return -/** - * HTML MessageEntity parser. - * - * This class implements syntax very similar to one available - * in the Bot API ([documented here](https://core.telegram.org/bots/api#html-style)) - * with some slight differences. - */ -export class HtmlMessageEntityParser implements IMessageEntityParser { - name = 'html' + if (!stacks.pre?.length) { + pendingText = pendingText.replace(/[^\S\u00A0]+/gs, ' ') - private readonly _syntaxHighlighter?: SyntaxHighlighter + if (tagEnd) pendingText = pendingText.trimEnd() - constructor(options?: HtmlMessageEntityParserOptions) { - this._syntaxHighlighter = options?.syntaxHighlighter - } - - /** - * Escape the string so it can be safely used inside HTML - * - * @param str String to be escaped - * @param quote Whether `"` (double quote) should be escaped as `"` - */ - static escape(str: string, quote = false): string { - str = str.replace(/&/g, '&').replace(//g, '>') - if (quote) str = str.replace(/"/g, '"') - - return str - } - - parse(text: string): [string, tl.TypeMessageEntity[]] { - const stacks: Record[]> = {} - const entities: tl.TypeMessageEntity[] = [] - let plainText = '' - let pendingText = '' - - function processPendingText(tagEnd = false) { - if (!pendingText.length) return - - if (!stacks.pre?.length) { - pendingText = pendingText.replace(/[^\S\u00A0]+/gs, ' ') - - if (tagEnd) pendingText = pendingText.trimEnd() - - if (!plainText.length || plainText.match(/\s$/)) { - pendingText = pendingText.trimStart() - } + if (!plainText.length || plainText.match(/\s$/)) { + pendingText = pendingText.trimStart() } - - for (const ents of Object.values(stacks)) { - for (const ent of ents) { - ent.length += pendingText.length - } - } - - plainText += pendingText - pendingText = '' } - const parser = new Parser({ - onopentag(name, attribs) { - name = name.toLowerCase() + for (const ents of Object.values(stacks)) { + for (const ent of ents) { + ent.length += pendingText.length + } + } - processPendingText() + plainText += pendingText + pendingText = '' + } - // ignore tags inside pre (except pre) - if (name !== 'pre' && stacks.pre?.length) return + const parser = new Parser({ + onopentag(name, attribs) { + name = name.toLowerCase() - let entity: tl.TypeMessageEntity + processPendingText() - switch (name) { - case 'br': - plainText += '\n' + // ignore tags inside pre (except pre) + if (name !== 'pre' && stacks.pre?.length) return - return - case 'b': - case 'strong': - entity = { - _: 'messageEntityBold', - offset: plainText.length, - length: 0, - } - break - case 'i': - case 'em': - entity = { - _: 'messageEntityItalic', - offset: plainText.length, - length: 0, - } - break - case 'u': - entity = { - _: 'messageEntityUnderline', - offset: plainText.length, - length: 0, - } - break - case 's': - case 'del': - case 'strike': - entity = { - _: 'messageEntityStrike', - offset: plainText.length, - length: 0, - } - break - case 'blockquote': - entity = { - _: 'messageEntityBlockquote', - offset: plainText.length, - length: 0, - } - break - case 'code': - entity = { - _: 'messageEntityCode', - offset: plainText.length, - length: 0, - } - break - case 'pre': - entity = { - _: 'messageEntityPre', - offset: plainText.length, - length: 0, - language: attribs.language ?? '', - } - break - case 'spoiler': - case 'tg-spoiler': - entity = { - _: 'messageEntitySpoiler', - offset: plainText.length, - length: 0, - } - break + let entity: tl.TypeMessageEntity - case 'emoji': - case 'tg-emoji': { - const id = attribs.id || attribs['emoji-id'] - if (!id || !id.match(/^-?\d+$/)) return + switch (name) { + case 'br': + plainText += '\n' - entity = { - _: 'messageEntityCustomEmoji', - offset: plainText.length, - length: 0, - documentId: Long.fromString(id), - } - break + return + case 'b': + case 'strong': + entity = { + _: 'messageEntityBold', + offset: plainText.length, + length: 0, } - case 'a': { - let url = attribs.href - if (!url) return + break + case 'i': + case 'em': + entity = { + _: 'messageEntityItalic', + offset: plainText.length, + length: 0, + } + break + case 'u': + entity = { + _: 'messageEntityUnderline', + offset: plainText.length, + length: 0, + } + break + case 's': + case 'del': + case 'strike': + entity = { + _: 'messageEntityStrike', + offset: plainText.length, + length: 0, + } + break + case 'blockquote': + entity = { + _: 'messageEntityBlockquote', + offset: plainText.length, + length: 0, + } + break + case 'code': + entity = { + _: 'messageEntityCode', + offset: plainText.length, + length: 0, + } + break + case 'pre': + entity = { + _: 'messageEntityPre', + offset: plainText.length, + length: 0, + language: attribs.language ?? '', + } + break + case 'spoiler': + case 'tg-spoiler': + entity = { + _: 'messageEntitySpoiler', + offset: plainText.length, + length: 0, + } + break - const mention = MENTION_REGEX.exec(url) + case 'emoji': + case 'tg-emoji': { + const id = attribs.id || attribs['emoji-id'] + if (!id || !id.match(/^-?\d+$/)) return - if (mention) { - const id = parseInt(mention[1]) - const accessHash = mention[2] + entity = { + _: 'messageEntityCustomEmoji', + offset: plainText.length, + length: 0, + documentId: Long.fromString(id), + } + break + } + case 'a': { + let url = attribs.href + if (!url) return - if (accessHash) { - entity = { - _: 'inputMessageEntityMentionName', - offset: plainText.length, - length: 0, - userId: { - _: 'inputUser', - userId: id, - accessHash: Long.fromString(accessHash, false, 16), - }, - } - } else { - entity = { - _: 'messageEntityMentionName', - offset: plainText.length, - length: 0, - userId: id, - } - } - } else { - if (url.match(/^\/\//)) url = 'http:' + url + const mention = MENTION_REGEX.exec(url) + if (mention) { + const id = parseInt(mention[1]) + const accessHash = mention[2] + + if (accessHash) { entity = { - _: 'messageEntityTextUrl', + _: 'inputMessageEntityMentionName', offset: plainText.length, length: 0, - url, + userId: { + _: 'inputUser', + userId: id, + accessHash: Long.fromString(accessHash, false, 16), + }, + } + } else { + entity = { + _: 'messageEntityMentionName', + offset: plainText.length, + length: 0, + userId: id, } } - break - } - default: - return - } + } else { + if (url.match(/^\/\//)) url = 'http:' + url - if (!(name in stacks)) { - stacks[name] = [] - } - stacks[name].push(entity) - }, - onclosetag(name: string) { - processPendingText(true) - - name = name.toLowerCase() - - // ignore tags inside pre (except pre) - if (name !== 'pre' && stacks.pre?.length) return - - const entity = stacks[name]?.pop() - - if (!entity) return // unmatched close tag - - // ignore nested pre-s - if (name !== 'pre' || !stacks.pre?.length) { - entities.push(entity) - } - }, - ontext(data) { - pendingText += data - }, - }) - - parser.write(text) - - processPendingText(true) - - return [plainText.replace(/\u00A0/g, ' '), entities] - } - - unparse(text: string, entities: ReadonlyArray): string { - return this._unparse(text, entities) - } - - // internal function that uses recursion to correctly process nested & overlapping entities - private _unparse( - text: string, - entities: ReadonlyArray, - entitiesOffset = 0, - offset = 0, - length = text.length, - ): string { - if (!text) return text - - if (!entities.length || entities.length === entitiesOffset) { - return HtmlMessageEntityParser.escape(text) - .replace(/\n/g, '
') - .replace(/ {2,}/g, (match) => { - return ' '.repeat(match.length) - }) - } - - const end = offset + length - - const html: string[] = [] - let lastOffset = 0 - - for (let i = entitiesOffset; i < entities.length; i++) { - const entity = entities[i] - if (entity.offset >= end) break - - let entOffset = entity.offset - let length = entity.length - - if (entOffset < 0) { - length += entOffset - entOffset = 0 - } - - let relativeOffset = entOffset - offset - - if (relativeOffset > lastOffset) { - // add missing plain text - html.push(HtmlMessageEntityParser.escape(text.substring(lastOffset, relativeOffset))) - } else if (relativeOffset < lastOffset) { - length -= lastOffset - relativeOffset - relativeOffset = lastOffset - } - - if (length <= 0 || relativeOffset >= end || relativeOffset < 0) { - continue - } - - let skip = false - - const substr = text.substr(relativeOffset, length) - if (!substr) continue - - const type = entity._ - - let entityText - - if (type === 'messageEntityPre') { - entityText = substr - } else { - entityText = this._unparse(substr, entities, i + 1, offset + relativeOffset, length) - } - - switch (type) { - case 'messageEntityBold': - case 'messageEntityItalic': - case 'messageEntityUnderline': - case 'messageEntityStrike': - case 'messageEntityCode': - case 'messageEntityBlockquote': - case 'messageEntitySpoiler': - { - const tag = ( - { - messageEntityBold: 'b', - messageEntityItalic: 'i', - messageEntityUnderline: 'u', - messageEntityStrike: 's', - messageEntityCode: 'code', - messageEntityBlockquote: 'blockquote', - messageEntitySpoiler: 'spoiler', - } as const - )[type] - html.push(`<${tag}>${entityText}`) + entity = { + _: 'messageEntityTextUrl', + offset: plainText.length, + length: 0, + url, + } } break - case 'messageEntityPre': - html.push( - `${ - this._syntaxHighlighter && entity.language ? - this._syntaxHighlighter(entityText, entity.language) : - entityText - }`, - ) - break - case 'messageEntityEmail': - html.push(`${entityText}`) - break - case 'messageEntityUrl': - html.push(`${entityText}`) - break - case 'messageEntityTextUrl': - html.push(`${entityText}`) - break - case 'messageEntityMentionName': - html.push(`${entityText}`) - break + } default: - skip = true - break + return } - lastOffset = relativeOffset + (skip ? 0 : length) + if (!(name in stacks)) { + stacks[name] = [] + } + stacks[name].push(entity) + }, + onclosetag(name: string) { + processPendingText(true) + + name = name.toLowerCase() + + // ignore tags inside pre (except pre) + if (name !== 'pre' && stacks.pre?.length) return + + const entity = stacks[name]?.pop() + + if (!entity) return // unmatched close tag + + // ignore nested pre-s + if (name !== 'pre' || !stacks.pre?.length) { + entities.push(entity) + } + }, + ontext(data) { + pendingText += data + }, + }) + + if (typeof strings === 'string') strings = [strings] as unknown as TemplateStringsArray + + sub.forEach((it, idx) => { + parser.write(strings[idx]) + + if (typeof it === 'boolean' || !it) return + + if (typeof it === 'string' || typeof it === 'number') { + pendingText += it + } else { + // TextWithEntities or MessageEntity + const text = it.text + const innerEntities = 'raw' in it ? [it.raw] : it.entities + + processPendingText() + const baseOffset = plainText.length + pendingText += text + + if (innerEntities) { + for (const ent of innerEntities) { + entities.push({ ...ent, offset: ent.offset + baseOffset }) + } + } } + }) - html.push(HtmlMessageEntityParser.escape(text.substr(lastOffset))) + parser.write(strings[strings.length - 1]) - return html.join('') + processPendingText(true) + + return { + text: plainText.replace(/\u00A0/g, ' '), + entities, } } + +/** Options passed to `html.unparse` */ +export interface HtmlUnparseOptions { + /** + * Syntax highlighter to use when un-parsing `pre` tags with language + */ + syntaxHighlighter?: (code: string, language: string) => string +} + +// internal function that uses recursion to correctly process nested & overlapping entities +function _unparse( + text: string, + entities: ReadonlyArray, + params: HtmlUnparseOptions, + entitiesOffset = 0, + offset = 0, + length = text.length, +): string { + if (!text) return text + + if (!entities.length || entities.length === entitiesOffset) { + return escape(text) + .replace(/\n/g, '
') + .replace(/ {2,}/g, (match) => { + return ' '.repeat(match.length) + }) + } + + const end = offset + length + + const html: string[] = [] + let lastOffset = 0 + + for (let i = entitiesOffset; i < entities.length; i++) { + const entity = entities[i] + if (entity.offset >= end) break + + let entOffset = entity.offset + let length = entity.length + + if (entOffset < 0) { + length += entOffset + entOffset = 0 + } + + let relativeOffset = entOffset - offset + + if (relativeOffset > lastOffset) { + // add missing plain text + html.push(escape(text.substring(lastOffset, relativeOffset))) + } else if (relativeOffset < lastOffset) { + length -= lastOffset - relativeOffset + relativeOffset = lastOffset + } + + if (length <= 0 || relativeOffset >= end || relativeOffset < 0) { + continue + } + + let skip = false + + const substr = text.substr(relativeOffset, length) + if (!substr) continue + + const type = entity._ + + let entityText + + if (type === 'messageEntityPre') { + entityText = substr + } else { + entityText = _unparse(substr, entities, params, i + 1, offset + relativeOffset, length) + } + + switch (type) { + case 'messageEntityBold': + case 'messageEntityItalic': + case 'messageEntityUnderline': + case 'messageEntityStrike': + case 'messageEntityCode': + case 'messageEntityBlockquote': + case 'messageEntitySpoiler': + { + const tag = ( + { + messageEntityBold: 'b', + messageEntityItalic: 'i', + messageEntityUnderline: 'u', + messageEntityStrike: 's', + messageEntityCode: 'code', + messageEntityBlockquote: 'blockquote', + messageEntitySpoiler: 'spoiler', + } as const + )[type] + html.push(`<${tag}>${entityText}`) + } + break + case 'messageEntityPre': + html.push( + `${ + params.syntaxHighlighter && entity.language ? + params.syntaxHighlighter(entityText, entity.language) : + entityText + }`, + ) + break + case 'messageEntityEmail': + html.push(`${entityText}`) + break + case 'messageEntityUrl': + html.push(`${entityText}`) + break + case 'messageEntityTextUrl': + html.push(`${entityText}`) + break + case 'messageEntityMentionName': + html.push(`${entityText}`) + break + default: + skip = true + break + } + + lastOffset = relativeOffset + (skip ? 0 : length) + } + + html.push(escape(text.substr(lastOffset))) + + return html.join('') +} + +/** + * Add HTML formatting to the text given the plain text and entities contained in it. + */ +function unparse(input: InputText, options?: HtmlUnparseOptions): string { + if (typeof input === 'string') { + return _unparse(input, [], options ?? {}) + } + + return _unparse(input.text, input.entities ?? [], options ?? {}) +} + +// typedoc doesn't support this yet, so we'll have to do it manually +// https://github.com/TypeStrong/typedoc/issues/2436 + +export const html: { + /** + * Tagged template based HTML-to-entities parser function + * + * Additionally, `md` function has two static methods: + * - `html.escape` - escape a string to be safely used in HTML + * (should not be needed in most cases, as `html` function itself handles all `string`s + * passed to it automatically as plain text) + * - `html.unparse` - add HTML formatting to the text given the plain text and entities contained in it + * + * @example + * ```typescript + * const text = html`${user.displayName}` + * ``` + */ + ( + strings: TemplateStringsArray, + ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] + ): TextWithEntities + /** + * A variant taking a plain JS string as input + * and parsing it. + * + * Useful for cases when you already have a string + * (e.g. from some server) and want to parse it. + * + * @example + * ```typescript + * const string = 'hello' + * const text = html(string) + * ``` + */ + (string: string): TextWithEntities + escape: typeof escape + unparse: typeof unparse +} = Object.assign(parse, { + escape, + unparse, +}) diff --git a/packages/html-parser/tests/html-parser.spec.ts b/packages/html-parser/tests/html-parser.spec.ts index 98d56132..2061927f 100644 --- a/packages/html-parser/tests/html-parser.spec.ts +++ b/packages/html-parser/tests/html-parser.spec.ts @@ -2,9 +2,12 @@ import { expect } from 'chai' import Long from 'long' import { describe, it } from 'mocha' -import { FormattedString, tl } from '@mtcute/client' +import { MessageEntity, TextWithEntities, tl } from '@mtcute/client' -import { html, HtmlMessageEntityParser } from '../src/index.js' +// prettier has "html" special-cased which breaks the formatting +// this is not an issue when using normally, since we properly handle newlines/spaces, +// but here we want to test everything as it is +import { html as htm, HtmlUnparseOptions } from '../src/index.js' const createEntity = ( type: T, @@ -21,11 +24,14 @@ const createEntity = ( } describe('HtmlMessageEntityParser', () => { - const parser = new HtmlMessageEntityParser() - describe('unparse', () => { - const test = (text: string, entities: tl.TypeMessageEntity[], expected: string, _parser = parser): void => { - expect(_parser.unparse(text, entities)).eq(expected) + const test = ( + text: string, + entities: tl.TypeMessageEntity[], + expected: string, + params?: HtmlUnparseOptions, + ): void => { + expect(htm.unparse({ text, entities }, params)).eq(expected) } it('should return the same text if there are no entities or text', () => { @@ -197,10 +203,6 @@ describe('HtmlMessageEntityParser', () => { }) it('should work with custom syntax highlighter', () => { - const parser = new HtmlMessageEntityParser({ - syntaxHighlighter: (code, lang) => `lang: ${lang}
${code}`, - }) - test( 'plain console.log("Hello, world!") some code plain', [ @@ -210,7 +212,9 @@ describe('HtmlMessageEntityParser', () => { createEntity('messageEntityPre', 35, 9, { language: '' }), ], 'plain
lang: javascript
console.log("Hello, world!")
some code
plain', - parser, + { + syntaxHighlighter: (code, lang) => `lang: ${lang}
${code}`, + }, ) }) @@ -226,15 +230,14 @@ describe('HtmlMessageEntityParser', () => { }) describe('parse', () => { - const test = (text: string, expectedEntities: tl.TypeMessageEntity[], expectedText: string): void => { - const [_text, entities] = parser.parse(text) - expect(_text).eql(expectedText) - expect(entities).eql(expectedEntities) + const test = (text: TextWithEntities, expectedEntities: tl.TypeMessageEntity[], expectedText: string): void => { + expect(text.text).eql(expectedText) + expect(text.entities ?? []).eql(expectedEntities) } it('should handle , , , tags', () => { test( - 'plain bold italic underline strikethrough plain', + htm`plain bold italic underline strikethrough plain`, [ createEntity('messageEntityBold', 6, 4), createEntity('messageEntityItalic', 11, 6), @@ -247,7 +250,7 @@ describe('HtmlMessageEntityParser', () => { it('should handle ,
, 
, tags', () => { test( - 'plain code
pre
blockquote
spoiler plain', + htm`plain code
pre
blockquote
spoiler plain`, [ createEntity('messageEntityCode', 6, 4), createEntity('messageEntityPre', 11, 3, { language: '' }), @@ -260,7 +263,7 @@ describe('HtmlMessageEntityParser', () => { it('should handle links and text mentions', () => { test( - 'plain https://google.com google @durov Pavel Durov plain', + htm`plain https://google.com google @durov Pavel Durov plain`, [ createEntity('messageEntityTextUrl', 25, 6, { url: 'https://google.com', @@ -273,7 +276,7 @@ describe('HtmlMessageEntityParser', () => { ) test( - 'user', + htm`user`, [ createEntity('inputMessageEntityMentionName', 0, 4, { userId: { @@ -289,7 +292,7 @@ describe('HtmlMessageEntityParser', () => { it('should handle language in
', () => {
             test(
-                'plain 
console.log("Hello, world!")
some code
plain', + htm`plain
console.log("Hello, world!")
some code
plain`, [ createEntity('messageEntityPre', 6, 28, { language: 'javascript', @@ -302,31 +305,31 @@ describe('HtmlMessageEntityParser', () => { it('should ignore other tags inside
', () => {
             test(
-                '
bold and not bold
', + htm`
bold and not bold
`, [createEntity('messageEntityPre', 0, 17, { language: '' })], 'bold and not bold', ) test( - '
pre inside pre
so cool
', + htm`
pre inside pre
so cool
`, [createEntity('messageEntityPre', 0, 22, { language: '' })], 'pre inside pre so cool', ) }) it('should ignore newlines and indentation', () => { - test('this is some text\n\nwith newlines', [], 'this is some text with newlines') + test(htm`this is some text\n\nwith newlines`, [], 'this is some text with newlines') test( - 'this is some text\n\nwith newlines', + htm`this is some text\n\nwith newlines`, [createEntity('messageEntityBold', 0, 22)], 'this is some text with newlines', ) test( - 'this is some text ending with\n\n newlines', + htm`this is some text ending with\n\n newlines`, [createEntity('messageEntityBold', 0, 29)], 'this is some text ending with newlines', ) test( - ` + htm` this is some indented text with newlines and @@ -341,7 +344,7 @@ describe('HtmlMessageEntityParser', () => { it('should not ignore newlines and indentation in pre', () => { test( - '
this is some text\n\nwith newlines
', + htm`
this is some text\n\nwith newlines
`, [createEntity('messageEntityPre', 0, 32, { language: '' })], 'this is some text\n\nwith newlines', ) @@ -349,7 +352,7 @@ describe('HtmlMessageEntityParser', () => { // fuck my life const indent = ' ' test( - `
+                htm`
                 this  is  some  indented  text
                 with    newlines     and
                 
@@ -376,9 +379,9 @@ describe('HtmlMessageEntityParser', () => {
         })
 
         it('should handle 
', () => { - test('this is some text

with actual newlines', [], 'this is some text\n\nwith actual newlines') + test(htm`this is some text

with actual newlines`, [], 'this is some text\n\nwith actual newlines') test( - 'this is some text

with actual newlines', + htm`this is some text

with actual newlines`, // note that the
(i.e. \n) is not included in the entity // this is expected, and the result is the same [createEntity('messageEntityBold', 0, 17)], @@ -388,7 +391,7 @@ describe('HtmlMessageEntityParser', () => { it('should handle  ', () => { test( - 'one space, many    spaces, and
a newline', + htm`one space, many    spaces, and
a newline`, [], 'one space, many spaces, and\na newline', ) @@ -396,19 +399,19 @@ describe('HtmlMessageEntityParser', () => { it('should support entities on the edges', () => { test( - 'Hello, world', + htm`Hello, world`, [createEntity('messageEntityBold', 0, 5), createEntity('messageEntityBold', 7, 5)], 'Hello, world', ) }) it('should return empty array if there are no entities', () => { - test('Hello, world', [], 'Hello, world') + test(htm`Hello, world`, [], 'Hello, world') }) it('should support entities followed by each other', () => { test( - 'plain Hello, world plain', + htm`plain Hello, world plain`, [createEntity('messageEntityBold', 6, 6), createEntity('messageEntityItalic', 12, 6)], 'plain Hello, world plain', ) @@ -416,7 +419,7 @@ describe('HtmlMessageEntityParser', () => { it('should support nested entities', () => { test( - 'Welcome to the gym zone!', + htm`Welcome to the gym zone!`, [createEntity('messageEntityBold', 15, 8), createEntity('messageEntityItalic', 0, 24)], 'Welcome to the gym zone!', ) @@ -424,22 +427,22 @@ describe('HtmlMessageEntityParser', () => { it('should support nested entities with the same edges', () => { test( - 'Welcome to the gym zone!', + htm`Welcome to the gym zone!`, [createEntity('messageEntityBold', 15, 9), createEntity('messageEntityItalic', 0, 24)], 'Welcome to the gym zone!', ) test( - 'Welcome to the gym zone!', + htm`Welcome to the gym zone!`, [createEntity('messageEntityItalic', 15, 9), createEntity('messageEntityBold', 0, 24)], 'Welcome to the gym zone!', ) test( - 'Welcome to the gym zone!', + htm`Welcome to the gym zone!`, [createEntity('messageEntityBold', 0, 7), createEntity('messageEntityItalic', 0, 24)], 'Welcome to the gym zone!', ) test( - 'Welcome to the gym zone!', + htm`Welcome to the gym zone!`, [createEntity('messageEntityBold', 0, 24), createEntity('messageEntityItalic', 0, 24)], 'Welcome to the gym zone!', ) @@ -447,7 +450,7 @@ describe('HtmlMessageEntityParser', () => { it('should properly handle emojis', () => { test( - "best flower: 🌸. don't you even doubt it.", + htm`best flower: 🌸. don't you even doubt it.`, [ createEntity('messageEntityItalic', 0, 11), createEntity('messageEntityBold', 13, 2), @@ -458,12 +461,12 @@ describe('HtmlMessageEntityParser', () => { }) it('should handle non-escaped special symbols', () => { - test('<&> < & > <&>', [createEntity('messageEntityBold', 4, 5)], '<&> < & > <&>') + test(htm`<&> < & > <&>`, [createEntity('messageEntityBold', 4, 5)], '<&> < & > <&>') }) it('should unescape special symbols', () => { test( - '<&> < & > <&> link', + htm`<&> < & > <&> link`, [ createEntity('messageEntityBold', 4, 5), createEntity('messageEntityTextUrl', 14, 4, { @@ -475,44 +478,96 @@ describe('HtmlMessageEntityParser', () => { }) it('should ignore other tags', () => { - test('', [], 'alert(1)') + test(htm``, [], 'alert(1)') }) it('should ignore empty urls', () => { - test('link link', [], 'link link') - }) - }) - - describe('template', () => { - it('should work as a tagged template literal', () => { - const unsafeString = '<&>' - - expect(html`${unsafeString}`.value).eq('<&>') - expect(html`${unsafeString} text`.value).eq('<&> text') - expect(html`text ${unsafeString}`.value).eq('text <&>') - expect(html`${unsafeString}`.value).eq('<&>') + test(htm`link link`, [], 'link link') }) - it('should skip with FormattedString', () => { - const unsafeString2 = '<&>' - const unsafeString = new FormattedString('<&>') + describe('template', () => { + it('should add plain strings as is', () => { + test( + htm`some text ${'not bold yea'} some more text`, + [], + 'some text not bold yea some more text', + ) + }) - expect(html`${unsafeString}`.value).eq('<&>') - expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>') - expect(html`${unsafeString} text`.value).eq('<&> text') - expect(html`text ${unsafeString}`.value).eq('text <&>') - expect(html`${unsafeString}`.value).eq('<&>') - expect(html`${unsafeString} ${unsafeString2}`.value).eq('<&> <&>') - }) + it('should skip falsy values', () => { + test(htm`some text ${null} some ${false} more text`, [], 'some text some more text') + }) - it('should error with incompatible FormattedString', () => { - const unsafeString = new FormattedString('<&>', 'html') - const unsafeString2 = new FormattedString('<&>', 'some-other-mode') + it('should process entities', () => { + const inner = htm`bold` + test( + htm`some text ${inner} some more text`, + [createEntity('messageEntityBold', 10, 4)], + 'some text bold some more text', + ) + test( + htm`some text ${inner} some more ${inner} text`, + [createEntity('messageEntityBold', 10, 4), createEntity('messageEntityBold', 25, 4)], + 'some text bold some more bold text', + ) + }) - expect(() => html`${unsafeString}`.value).not.throw(Error) - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-expect-error - expect(() => html`${unsafeString2}`.value).throw(Error) + it('should process entities on edges', () => { + test( + htm`${htm`bold`} and ${htm`italic`}`, + [createEntity('messageEntityBold', 0, 4), createEntity('messageEntityItalic', 9, 6)], + 'bold and italic', + ) + }) + + it('should process nested entities', () => { + test( + htm`bold ${htm`bold italic`} more bold`, + [createEntity('messageEntityItalic', 5, 11), createEntity('messageEntityBold', 0, 26)], + 'bold bold italic more bold', + ) + test( + htm`bold ${htm`bold italic and some underline`} more bold`, + [ + createEntity('messageEntityItalic', 5, 11), + createEntity('messageEntityUnderline', 17, 18), + createEntity('messageEntityBold', 0, 45), + ], + 'bold bold italic and some underline more bold', + ) + test( + htm`${htm`bold italic underline`}`, + [ + createEntity('messageEntityUnderline', 12, 9), + createEntity('messageEntityItalic', 0, 21), + createEntity('messageEntityBold', 0, 21), + ], + 'bold italic underline', + ) + }) + + it('should process MessageEntity', () => { + test( + htm`bold ${new MessageEntity( + createEntity('messageEntityItalic', 0, 11), + 'bold italic', + )} more bold`, + [createEntity('messageEntityItalic', 5, 11), createEntity('messageEntityBold', 0, 26)], + 'bold bold italic more bold', + ) + }) + + it('should support simple function usage', () => { + // assuming we are receiving it e.g. from a server + const someHtml = 'bold' + + test(htm(someHtml), [createEntity('messageEntityBold', 0, 4)], 'bold') + test( + htm`text ${htm(someHtml)} more text`, + [createEntity('messageEntityBold', 5, 4)], + 'text bold more text', + ) + }) }) }) }) diff --git a/packages/i18n/src/types.ts b/packages/i18n/src/types.ts index 02ce4899..b2782a2d 100644 --- a/packages/i18n/src/types.ts +++ b/packages/i18n/src/types.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ -import type { FormattedString } from '@mtcute/client' +import type { tl } from '@mtcute/client' type Values = T[keyof T] type SafeGet = T extends Record ? T[K] : never @@ -8,7 +8,18 @@ type SafeGet = T extends Record ? T[K] : never /** * Literal translated value, represented by (optionally formatted) string */ -export type I18nValueLiteral = string | FormattedString +export type I18nValueLiteral = + | string + | { + readonly text: string + readonly entities?: tl.TypeMessageEntity[] + } + +// ^ we're not using InputText from @mtcute/client because it's a type-only dependency +// and may not be available at runtime, and we don't want it to be `any` +// +// we check if this is assignable to InputText in tests, so it's fine + /** * Dynamic translated value, represented by a * function resolving to a literal one @@ -59,7 +70,7 @@ export type MtcuteI18nFunction = -) => string | FormattedString +) => I18nValueLiteral /** * Wrapper type for i18n object containing strings for a language diff --git a/packages/i18n/src/utils.ts b/packages/i18n/src/utils.ts index 7ad92400..40eaabe0 100644 --- a/packages/i18n/src/utils.ts +++ b/packages/i18n/src/utils.ts @@ -14,7 +14,7 @@ export function createI18nStringsIndex(strings: I18nStrings): Record 'World', welcome: (name: string) => `Welcome ${name}`, + test: someInputText, }, } diff --git a/packages/markdown-parser/README.md b/packages/markdown-parser/README.md index 7e5939fa..76c69b58 100644 --- a/packages/markdown-parser/README.md +++ b/packages/markdown-parser/README.md @@ -8,22 +8,23 @@ Markdown entities parser for mtcute > > Please read [Syntax](#syntax) below for a detailed explanation -> **Note**: -> It is generally recommended to use `@mtcute/html-parser` instead, -> as it is easier to use and is more readable in most cases +## Features +- Supports all entities that Telegram supports +- Supports nested and overlapping entities +- Supports dedentation +- [Interpolation](#interpolation)! ## Usage ```typescript -import { TelegramClient } from '@mtcute/client' -import { MarkdownMessageEntityParser, md } from '@mtcute/markdown-parser' - -const tg = new TelegramClient({ ... }) -tg.registerParseMode(new MarkdownMessageEntityParser()) +import { md } from '@mtcute/markdown-parser' tg.sendText( 'me', - md`Hello, **me**! Updates from the feed:\n${await getUpdatesFromFeed()}` + md` + Hello, **me**! Updates from the feed: + ${await getUpdatesFromFeed()} + ` ) ``` @@ -118,27 +119,20 @@ tags just as start/end markers, and not in terms of nesting. | `**Welcome back, __User__!**` | **Welcome back, _User_!** | `Welcome back, User!` | | `**bold __and** italic__` | **bold _and_** _italic_ | `bold and italic` | -## Escaping +## Interpolation -Often, you may want to escape the text in a way it is not processed as an entity. +Being a tagged template literal, `md` supports interpolation. -To escape any character, prepend it with ` \ ` (backslash). Escaped characters are added to output as-is. +You can interpolate one of the following: +- `string` - **will not** be parsed, and appended to plain text as-is + - In case you want the string to be parsed, use `md` as a simple function: md\`... ${md('**bold**')} ...\` +- `number` - will be converted to string and appended to plain text as-is +- `TextWithEntities` or `MessageEntity` - will add the text and its entities to the output. This is the type returned by `md` itself: + ```ts + const bold = md`**bold**` + const text = md`Hello, ${bold}!` + ``` +- falsy value (i.e. `null`, `undefined`, `false`) - will be ignored -Inline entities and links inside code entities (both inline and pre) are not processed, so you only need to escape -closing tags. - -> **Note**: backslash itself must be escaped like this: ` \\ ` (double backslash). -> -> This will look pretty bad in real code, so use escaping only when really needed, and use -> [`MarkdownMessageEntityParser.escape`](./classes/markdownmessageentityparser.html#escape) or `md` or -> other parse modes (like HTML one provided by [`@mtcute/html-parser`](../html-parser/index.html))) instead. - -> In theory, you could escape every single non-markup character, but why would you want to do that 😜 - -| Code | Result (visual) | Result (as HTML) | -|----------------------------------------|--------------------------------|---------------------------------------------------------| -| `\_\_not italic\_\_` | \_\_not italic\_\_ | `__not italic__` | -| `__italic \_ text__` | _italic \_ text_ | `italic _ text ` | -| \`__not italic__\` | `__not italic__` | `__not italic__` | -| C:\\\\Users\\\\Guest | C:\Users\Guest | `C:\Users\Guest` | -| \`var a = \\\`hello\\\`\` | var a = \`hello\` | <code>var a = \`hello\`</code> | +Because of interpolation, you almost never need to think about escaping anything, +since the values are not even parsed as Markdown, and are appended to the output as-is. \ No newline at end of file diff --git a/packages/markdown-parser/src/index.ts b/packages/markdown-parser/src/index.ts index ea7e4722..e97aa9eb 100644 --- a/packages/markdown-parser/src/index.ts +++ b/packages/markdown-parser/src/index.ts @@ -1,6 +1,6 @@ import Long from 'long' -import type { FormattedString, IMessageEntityParser, MessageEntity, tl } from '@mtcute/client' +import type { InputText, MessageEntity, TextWithEntities, tl } from '@mtcute/client' const MENTION_REGEX = /^tg:\/\/user\?id=(\d+)(?:&hash=(-?[0-9a-fA-F]+)(?:&|$)|&|$)/ const EMOJI_REGEX = /^tg:\/\/emoji\?id=(-?\d+)/ @@ -16,69 +16,138 @@ const TAG_PRE = '```' const TO_BE_ESCAPED = /[*_\-~`[\\\]|]/g /** - * Tagged template based helper for escaping entities in Markdown + * Escape a string to be safely used in Markdown. * - * @example - * ```typescript - * const escaped = md`**${user.displayName}**` - * ``` + * > **Note**: this function is in most cases not needed, as `md` function + * > handles all `string`s passed to it automatically as plain text. */ -export function md( - strings: TemplateStringsArray, - ...sub: (string | FormattedString<'markdown'> | MessageEntity | boolean | undefined | null)[] -): FormattedString<'markdown'> { - let str = '' - sub.forEach((it, idx) => { - if (typeof it === 'boolean' || !it) return - - if (typeof it === 'string') it = MarkdownMessageEntityParser.escape(it) - else if ('raw' in it) { - it = new MarkdownMessageEntityParser().unparse(it.text, [it.raw]) - } else { - if (it.mode && it.mode !== 'markdown') { - throw new Error(`Incompatible parse mode: ${it.mode}`) - } - it = it.value - } - - str += strings[idx] + it - }) - - return { value: str + strings[strings.length - 1], mode: 'markdown' } +function escape(str: string): string { + return str.replace(TO_BE_ESCAPED, (s) => '\\' + s) } /** - * Markdown MessageEntity parser. - * - * This class is **not** compatible with the Bot API Markdown nor MarkdownV2, - * please read the [documentation](../) to learn about syntax. + * Add Markdown formatting to the text given the plain text and entities contained in it. */ -export class MarkdownMessageEntityParser implements IMessageEntityParser { - name = 'markdown' +function unparse(input: InputText): string { + if (typeof input === 'string') return escape(input) - /** - * - * @param str String to be escaped - */ + let text = input.text + const entities = input.entities ?? [] - /* istanbul ignore next */ - static escape(str: string): string { - // this code doesn't really need to be tested since it's just - // a simplified version of what is used in .unparse() - return str.replace(TO_BE_ESCAPED, (s) => '\\' + s) + // keep track of positions of inserted escape symbols + const escaped: number[] = [] + text = text.replace(TO_BE_ESCAPED, (s, pos: number) => { + escaped.push(pos) + + return '\\' + s + }) + const hasEscaped = escaped.length > 0 + + type InsertLater = [number, string] + const insert: InsertLater[] = [] + + for (const entity of entities) { + const type = entity._ + + let start = entity.offset + let end = start + entity.length + + if (start > text.length) continue + if (start < 0) start = 0 + if (end > text.length) end = text.length + + if (hasEscaped) { + // determine number of escape chars since the beginning of the string + let escapedPos = 0 + + while (escapedPos < escaped.length && escaped[escapedPos] < start) { + escapedPos += 1 + } + start += escapedPos + + while (escapedPos < escaped.length && escaped[escapedPos] <= end) { + escapedPos += 1 + } + end += escapedPos + } + + let startTag + let endTag: string + + switch (type) { + case 'messageEntityBold': + startTag = endTag = TAG_BOLD + break + case 'messageEntityItalic': + startTag = endTag = TAG_ITALIC + break + case 'messageEntityUnderline': + startTag = endTag = TAG_UNDERLINE + break + case 'messageEntityStrike': + startTag = endTag = TAG_STRIKE + break + case 'messageEntitySpoiler': + startTag = endTag = TAG_SPOILER + break + case 'messageEntityCode': + startTag = endTag = TAG_CODE + break + case 'messageEntityPre': + startTag = TAG_PRE + + if (entity.language) { + startTag += entity.language + } + + startTag += '\n' + endTag = '\n' + TAG_PRE + break + case 'messageEntityTextUrl': + startTag = '[' + endTag = `](${entity.url})` + break + case 'messageEntityMentionName': + startTag = '[' + endTag = `](tg://user?id=${entity.userId})` + break + case 'messageEntityCustomEmoji': + startTag = '[' + endTag = `](tg://emoji?id=${entity.documentId.toString()})` + break + default: + continue + } + + insert.push([start, startTag]) + insert.push([end, endTag]) } - parse(text: string): [string, tl.TypeMessageEntity[]] { - const entities: tl.TypeMessageEntity[] = [] + // sort by offset desc + insert.sort((a, b) => b[0] - a[0]) + + for (const [offset, tag] of insert) { + text = text.substr(0, offset) + tag + text.substr(offset) + } + + return text +} + +function parse( + strings: TemplateStringsArray | string, + ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] +): TextWithEntities { + const entities: tl.TypeMessageEntity[] = [] + let result = '' + + const stacks: Record[]> = {} + + let insideCode = false + let insidePre = false + let insideLink = false + + function feed(text: string) { const len = text.length - let result = '' - - const stacks: Record[]> = {} - - let insideCode = false - let insidePre = false - let insideLink = false - let pos = 0 while (pos < len) { @@ -297,111 +366,106 @@ export class MarkdownMessageEntityParser implements IMessageEntityParser { } } + if (c === '\n') { + if (pos !== 0) { + result += '\n' + } + + const nonWhitespace = text.slice(pos + 1).search(/\S/) + + if (nonWhitespace !== -1) { + pos += nonWhitespace + 1 + } else { + pos = len + result = result.trimEnd() + } + continue + } + // nothing matched => normal character result += c pos += 1 } - - return [result, entities] } - unparse(text: string, entities: ReadonlyArray): string { - // keep track of positions of inserted escape symbols - const escaped: number[] = [] - text = text.replace(TO_BE_ESCAPED, (s, pos: number) => { - escaped.push(pos) + if (typeof strings === 'string') strings = [strings] as unknown as TemplateStringsArray - return '\\' + s - }) - const hasEscaped = escaped.length > 0 + sub.forEach((it, idx) => { + feed(strings[idx]) - type InsertLater = [number, string] - const insert: InsertLater[] = [] + if (typeof it === 'boolean' || !it) return - for (const entity of entities) { - const type = entity._ + if (typeof it === 'string' || typeof it === 'number') { + result += it + } else { + // TextWithEntities or MessageEntity + const text = it.text + const innerEntities = 'raw' in it ? [it.raw] : it.entities - let start = entity.offset - let end = start + entity.length + const baseOffset = result.length + result += text - if (start > text.length) continue - if (start < 0) start = 0 - if (end > text.length) end = text.length - - if (hasEscaped) { - // determine number of escape chars since the beginning of the string - let escapedPos = 0 - - while (escapedPos < escaped.length && escaped[escapedPos] < start) { - escapedPos += 1 + if (innerEntities) { + for (const ent of innerEntities) { + entities.push({ ...ent, offset: ent.offset + baseOffset }) } - start += escapedPos - - while (escapedPos < escaped.length && escaped[escapedPos] <= end) { - escapedPos += 1 - } - end += escapedPos } - - let startTag - let endTag: string - - switch (type) { - case 'messageEntityBold': - startTag = endTag = TAG_BOLD - break - case 'messageEntityItalic': - startTag = endTag = TAG_ITALIC - break - case 'messageEntityUnderline': - startTag = endTag = TAG_UNDERLINE - break - case 'messageEntityStrike': - startTag = endTag = TAG_STRIKE - break - case 'messageEntitySpoiler': - startTag = endTag = TAG_SPOILER - break - case 'messageEntityCode': - startTag = endTag = TAG_CODE - break - case 'messageEntityPre': - startTag = TAG_PRE - - if (entity.language) { - startTag += entity.language - } - - startTag += '\n' - endTag = '\n' + TAG_PRE - break - case 'messageEntityTextUrl': - startTag = '[' - endTag = `](${entity.url})` - break - case 'messageEntityMentionName': - startTag = '[' - endTag = `](tg://user?id=${entity.userId})` - break - case 'messageEntityCustomEmoji': - startTag = '[' - endTag = `](tg://emoji?id=${entity.documentId.toString()})` - break - default: - continue - } - - insert.push([start, startTag]) - insert.push([end, endTag]) } + }) - // sort by offset desc - insert.sort((a, b) => b[0] - a[0]) + feed(strings[strings.length - 1]) - for (const [offset, tag] of insert) { - text = text.substr(0, offset) + tag + text.substr(offset) + for (const [name, stack] of Object.entries(stacks)) { + if (stack.length) { + throw new Error(`Unterminated ${name} entity`) } + } - return text + return { + text: result, + entities, } } + +// typedoc doesn't support this yet, so we'll have to do it manually +// https://github.com/TypeStrong/typedoc/issues/2436 + +export const md: { + /** + * Tagged template based Markdown-to-entities parser function + * + * Additionally, `md` function has two static methods: + * - `md.escape` - escape a string to be safely used in Markdown + * (should not be needed in most cases, as `md` function itself handles all `string`s + * passed to it automatically as plain text) + * - `md.unparse` - add Markdown formatting to the text given the plain text and entities contained in it + * + * @example + * ```typescript + * const text = md`**${user.displayName}**` + * ``` + */ + ( + strings: TemplateStringsArray, + ...sub: (InputText | MessageEntity | boolean | number | undefined | null)[] + ): TextWithEntities + /** + * A variant taking a plain JS string as input + * and parsing it. + * + * Useful for cases when you already have a string + * (e.g. from some server) and want to parse it. + * + * @example + * ```typescript + * const string = '**hello**' + * const text = md(string) + * ``` + */ + (string: string): TextWithEntities + escape: typeof escape + unparse: typeof unparse +} = Object.assign(parse, { + escape, + unparse, +}) diff --git a/packages/markdown-parser/tests/markdown-parser.spec.ts b/packages/markdown-parser/tests/markdown-parser.spec.ts index c3ddd5e8..b9cf8cba 100644 --- a/packages/markdown-parser/tests/markdown-parser.spec.ts +++ b/packages/markdown-parser/tests/markdown-parser.spec.ts @@ -2,9 +2,10 @@ import { expect } from 'chai' import Long from 'long' import { describe, it } from 'mocha' -import { FormattedString, tl } from '@mtcute/client' +import { MessageEntity, TextWithEntities, tl } from '@mtcute/client' -import { MarkdownMessageEntityParser, md } from '../src/index.js' +// md is special cased in prettier, we don't want that here +import { md as md_ } from '../src/index.js' const createEntity = ( type: T, @@ -21,16 +22,9 @@ const createEntity = ( } describe('MarkdownMessageEntityParser', () => { - const parser = new MarkdownMessageEntityParser() - describe('unparse', () => { - const test = ( - text: string, - entities: tl.TypeMessageEntity[], - expected: string | string[], - _parser = parser, - ): void => { - const result = _parser.unparse(text, entities) + const test = (text: string, entities: tl.TypeMessageEntity[], expected: string | string[]): void => { + const result = md_.unparse({ text, entities }) if (Array.isArray(expected)) { expect(expected).to.include(result) @@ -240,9 +234,9 @@ describe('MarkdownMessageEntityParser', () => { if (!Array.isArray(texts)) texts = [texts] for (const text of texts) { - const [_text, entities] = parser.parse(text) - expect(_text).eql(expectedText) - expect(entities).eql(expectedEntities) + const res = md_(text) + expect(res.text).eql(expectedText) + expect(res.entities ?? []).eql(expectedEntities) } } @@ -492,29 +486,8 @@ describe('MarkdownMessageEntityParser', () => { test('[link]() [link]', [], 'link [link]') }) - it('should ignore unclosed tags', () => { - test('plain ```\npre closed with single backtick`', [], 'plain pre closed with single backtick`') - test('plain ```\npre closed with single backtick\n`', [], 'plain pre closed with single backtick\n`') - - test('plain ```\npre closed with double backticks`', [], 'plain pre closed with double backticks`') - test('plain ```\npre closed with double backticks\n`', [], 'plain pre closed with double backticks\n`') - - test('plain __italic but unclosed', [], 'plain italic but unclosed') - test('plain __italic and **also bold but both unclosed', [], 'plain italic and also bold but both unclosed') - test( - 'plain __italic and **also bold but italic closed__', - [createEntity('messageEntityItalic', 6, 38)], - 'plain italic and also bold but italic closed', - ) - test( - 'plain __italic and **also bold but bold closed**', - [createEntity('messageEntityBold', 17, 25)], - 'plain italic and also bold but bold closed', - ) - }) - describe('malformed input', () => { - const testThrows = (input: string) => expect(() => parser.parse(input)).throws(Error) + const testThrows = (input: string) => expect(() => md_(input)).throws(Error) it('should throw an error on malformed links', () => { testThrows('plain [link](https://google.com but unclosed') @@ -524,37 +497,95 @@ describe('MarkdownMessageEntityParser', () => { testThrows('plain ```pre without linebreaks```') testThrows('plain ``` pre without linebreaks but with spaces instead ```') }) + + it('should throw an error on unterminated entity', () => { + testThrows('plain **bold but unclosed') + testThrows('plain **bold and __also italic but unclosed') + }) }) }) describe('template', () => { - it('should work as a tagged template literal', () => { - const unsafeString = '__[]__' + const test = (text: TextWithEntities, expectedEntities: tl.TypeMessageEntity[], expectedText: string): void => { + expect(text.text).eql(expectedText) + expect(text.entities ?? []).eql(expectedEntities) + } - expect(md`${unsafeString}`.value).eq('\\_\\_\\[\\]\\_\\_') - expect(md`${unsafeString} **text**`.value).eq('\\_\\_\\[\\]\\_\\_ **text**') - expect(md`**text** ${unsafeString}`.value).eq('**text** \\_\\_\\[\\]\\_\\_') - expect(md`**${unsafeString}**`.value).eq('**\\_\\_\\[\\]\\_\\_**') + it('should add plain strings as is', () => { + test(md_`${'**plain**'}`, [], '**plain**') }) - it('should skip with FormattedString', () => { - const unsafeString2 = '__[]__' - const unsafeString = new FormattedString('__[]__') - - expect(md`${unsafeString}`.value).eq('__[]__') - expect(md`${unsafeString} ${unsafeString2}`.value).eq('__[]__ \\_\\_\\[\\]\\_\\_') - expect(md`${unsafeString} **text**`.value).eq('__[]__ **text**') - expect(md`**text** ${unsafeString}`.value).eq('**text** __[]__') - expect(md`**${unsafeString} ${unsafeString2}**`.value).eq('**__[]__ \\_\\_\\[\\]\\_\\_**') + it('should skip falsy values', () => { + test(md_`some text ${null} more text ${false}`, [], 'some text more text ') }) - it('should error with incompatible FormattedString', () => { - const unsafeString = new FormattedString('<&>', 'markdown') - const unsafeString2 = new FormattedString('<&>', 'some-other-mode') + it('should properly dedent', () => { + test( + md_` + some text + **bold** + more text + `, + [createEntity('messageEntityBold', 10, 4)], + 'some text\nbold\nmore text', + ) + }) - expect(() => md`${unsafeString}`.value).not.throw(Error) - // @ts-expect-error this is intentional - expect(() => md`${unsafeString2}`.value).throw(Error) + it('should process entities', () => { + const inner = md_`**bold**` + + test( + md_`some text ${inner} some more text`, + [createEntity('messageEntityBold', 10, 4)], + 'some text bold some more text', + ) + test( + md_`some text ${inner} some more ${inner} text`, + [createEntity('messageEntityBold', 10, 4), createEntity('messageEntityBold', 25, 4)], + 'some text bold some more bold text', + ) + }) + + it('should process entities on edges', () => { + test( + md_`${md_`**bold**`} and ${md_`__italic__`}`, + [createEntity('messageEntityBold', 0, 4), createEntity('messageEntityItalic', 9, 6)], + 'bold and italic', + ) + }) + + it('should process nested entities', () => { + test( + md_`**bold ${md_`__bold italic__`} more bold**`, + [createEntity('messageEntityItalic', 5, 11), createEntity('messageEntityBold', 0, 26)], + 'bold bold italic more bold', + ) + test( + md_`**bold ${md_`__bold italic__ --and some underline--`} more bold**`, + [ + createEntity('messageEntityItalic', 5, 11), + createEntity('messageEntityUnderline', 17, 18), + createEntity('messageEntityBold', 0, 45), + ], + 'bold bold italic and some underline more bold', + ) + test( + md_`**${md_`__bold italic --underline--__`}**`, + [ + createEntity('messageEntityUnderline', 12, 9), + createEntity('messageEntityItalic', 0, 21), + createEntity('messageEntityBold', 0, 21), + ], + 'bold italic underline', + ) + }) + + it('should process MessageEntity', () => { + test( + md_`**bold ${new MessageEntity(createEntity('messageEntityItalic', 0, 11), 'bold italic')} more bold**`, + [createEntity('messageEntityItalic', 5, 11), createEntity('messageEntityBold', 0, 26)], + 'bold bold italic more bold', + ) }) }) }) diff --git a/packages/node/index.ts b/packages/node/index.ts index d8bd823e..a2dbc97f 100644 --- a/packages/node/index.ts +++ b/packages/node/index.ts @@ -2,8 +2,6 @@ import { createRequire } from 'module' import { createInterface, Interface as RlInterface } from 'readline' import { TelegramClient, TelegramClientOptions } from '@mtcute/client' -import { HtmlMessageEntityParser } from '@mtcute/html-parser' -import { MarkdownMessageEntityParser } from '@mtcute/markdown-parser' import { SqliteStorage } from '@mtcute/sqlite' export * from '@mtcute/client' @@ -23,16 +21,6 @@ try { } catch (e) {} export interface NodeTelegramClientOptions extends Omit { - /** - * Default parse mode to use. - * - * Both HTML and Markdown parse modes are - * registered automatically. - * - * @default `html` - */ - defaultParseMode?: 'html' | 'markdown' - /** * Storage to use. * @@ -66,13 +54,6 @@ export class NodeTelegramClient extends TelegramClient { new SqliteStorage(opts.storage) : opts.storage ?? new SqliteStorage('client.session'), }) - - this.registerParseMode(new HtmlMessageEntityParser()) - this.registerParseMode(new MarkdownMessageEntityParser()) - - if (opts.defaultParseMode) { - this.setDefaultParseMode(opts.defaultParseMode) - } } private _rl?: RlInterface