From 4c717ffc6398f439f77ba54c5c4cdb6ed84c0437 Mon Sep 17 00:00:00 2001 From: teidesu <86301490+teidesu@users.noreply.github.com> Date: Thu, 22 Jul 2021 01:46:31 +0300 Subject: [PATCH] fix(voice): proper handling of waveforms --- .../methods/files/normalize-input-media.ts | 6 +- .../client/src/types/media/input-media.ts | 8 ++- packages/client/src/types/media/voice.ts | 10 +++- packages/client/src/utils/voice-utils.ts | 56 +++++++++++++++++++ 4 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 packages/client/src/utils/voice-utils.ts diff --git a/packages/client/src/methods/files/normalize-input-media.ts b/packages/client/src/methods/files/normalize-input-media.ts index 767f4919..73209580 100644 --- a/packages/client/src/methods/files/normalize-input-media.ts +++ b/packages/client/src/methods/files/normalize-input-media.ts @@ -16,6 +16,7 @@ import { extractFileName } from '../../utils/file-utils' import { assertTypeIs } from '../../utils/type-assertion' import bigInt from 'big-integer' import { normalizeDate } from '../../utils/misc-utils' +import { encodeWaveform } from '../../utils/voice-utils' /** * Normalize an {@link InputMediaLike} to `InputMedia`, @@ -381,7 +382,10 @@ export async function _normalizeInputMedia( duration: media.duration || 0, title: media.type === 'audio' ? media.title : undefined, performer: media.type === 'audio' ? media.performer : undefined, - waveform: media.type === 'voice' ? media.waveform : undefined, + waveform: + media.type === 'voice' && media.waveform + ? encodeWaveform(media.waveform) + : undefined, }) } diff --git a/packages/client/src/types/media/input-media.ts b/packages/client/src/types/media/input-media.ts index 377dd504..979af34b 100644 --- a/packages/client/src/types/media/input-media.ts +++ b/packages/client/src/types/media/input-media.ts @@ -114,11 +114,15 @@ export interface InputMediaVoice extends BaseInputMedia { duration?: number /** - * Waveform of the voice message + * Waveform of the voice message. + * + * Represented with integers in range [0, 31], + * usually has length of 100. + * Generated by the server if omitted. * * Only applicable to newly uploaded files. */ - waveform?: Buffer + waveform?: number[] } /** diff --git a/packages/client/src/types/media/voice.ts b/packages/client/src/types/media/voice.ts index d377b84b..bcb35537 100644 --- a/packages/client/src/types/media/voice.ts +++ b/packages/client/src/types/media/voice.ts @@ -3,6 +3,7 @@ import { tl } from '@mtcute/tl' import { TelegramClient } from '../../client' import { makeInspectable } from '../utils' import { tdFileId } from '@mtcute/file-id' +import { decodeWaveform } from '../../utils/voice-utils' /** * An voice note. @@ -35,10 +36,13 @@ export class Voice extends RawDocument { /** * Voice note's waveform + * + * Represented with integers in range [0, 31], + * usually has length of 100 */ - get waveform(): Buffer { - return this.attr.waveform! + get waveform(): number[] { + return decodeWaveform(this.attr.waveform!) } } -makeInspectable(Voice, ['fileSize', 'dcId'], ['inputMedia', 'inputDocument']) +makeInspectable(Voice, ['fileSize', 'dcId'], ['inputMedia', 'inputDocument', 'waveform']) diff --git a/packages/client/src/utils/voice-utils.ts b/packages/client/src/utils/voice-utils.ts new file mode 100644 index 00000000..3d0e2bc1 --- /dev/null +++ b/packages/client/src/utils/voice-utils.ts @@ -0,0 +1,56 @@ +export function decodeWaveform(wf: Buffer): number[] { + const bitsCount = wf.length * 8 + const valuesCount = ~~(bitsCount / 5) + + if (!valuesCount) return [] + + const lastIdx = valuesCount - 1 + + // Read each 5 bit of encoded5bit as 0-31 unsigned char. + // We count the index of the byte in which the desired 5-bit sequence starts. + // And then we read a uint16 starting from that byte to guarantee to get all of those 5 bits. + // + // BUT! if it is the last byte we have, we're not allowed to read a uint16 starting with it. + // Because it will be an overflow (we'll access one byte after the available memory). + // We see, that only the last 5 bits could start in the last available byte and be problematic. + // So we read in a general way all the entries except the last one. + + const result: number[] = [] + for (let i = 0, j = 0; i < lastIdx; i++, j += 5) { + const byteIdx = ~~(j / 8) + const bitShift = j % 8 + result[i] = (wf.readUInt16LE(byteIdx) >> bitShift) & 0b11111 + } + + const lastByteIdx = ~~((lastIdx * 5) / 8) + const lastBitShift = (lastIdx * 5) % 8 + const lastValue = + lastByteIdx === wf.length - 1 + ? wf[lastByteIdx] + : wf.readUInt16LE(lastByteIdx) + result[lastIdx] = (lastValue >> lastBitShift) & 0b11111 + + return result +} + + +export function encodeWaveform(wf: number[]): Buffer { + const bitsCount = wf.length * 5 + const bytesCount = ~~(bitsCount + 7) / 8 + const result = Buffer.alloc(bytesCount + 1) + + // Write each 0-31 unsigned char as 5 bit to result. + // We reserve one extra byte to be able to dereference any of required bytes + // as a uint16 without overflowing, even the byte with index "bytesCount - 1". + + for (let i = 0, j = 0; i < wf.length; i++, j += 5) { + const byteIdx = ~~(j / 8) + const bitShift = j % 8 + const value = (wf[i] & 0b11111) << bitShift + + const old = result.readUInt16LE(byteIdx) + result.writeUInt16LE(old | value, byteIdx) + } + + return result.slice(0, bytesCount) +}