fix(voice): proper handling of waveforms

2021-07-22 01:46:31 +03:00 · 2021-07-22 01:46:31 +03:00 · 4c717ffc63
commit 4c717ffc63
parent 96d133dd2f
4 changed files with 74 additions and 6 deletions
--- a/packages/client/src/methods/files/normalize-input-media.ts
+++ b/packages/client/src/methods/files/normalize-input-media.ts
@ -16,6 +16,7 @@ import { extractFileName } from '../../utils/file-utils'
 import { assertTypeIs } from '../../utils/type-assertion'
 import bigInt from 'big-integer'
 import { normalizeDate } from '../../utils/misc-utils'
 import { encodeWaveform } from '../../utils/voice-utils'
 /**
 * Normalize an {@link InputMediaLike} to `InputMedia`,
@ -381,7 +382,10 @@ export async function _normalizeInputMedia(
            duration: media.duration || 0,
            title: media.type === 'audio' ? media.title : undefined,
            performer: media.type === 'audio' ? media.performer : undefined,
-            waveform: media.type === 'voice' ? media.waveform : undefined,
+            waveform:
                media.type === 'voice' && media.waveform
                    ? encodeWaveform(media.waveform)
                    : undefined,
        })
    }
--- a/packages/client/src/types/media/input-media.ts
+++ b/packages/client/src/types/media/input-media.ts
@ -114,11 +114,15 @@ export interface InputMediaVoice extends BaseInputMedia {
    duration?: number
    /**
-     * Waveform of the voice message
+     * Waveform of the voice message.
     *
     * Represented with integers in range [0, 31],
     * usually has length of 100.
     * Generated by the server if omitted.
     *
     * Only applicable to newly uploaded files.
     */
-    waveform?: Buffer
+    waveform?: number[]
 }
 /**
--- a/packages/client/src/types/media/voice.ts
+++ b/packages/client/src/types/media/voice.ts
@ -3,6 +3,7 @@ import { tl } from '@mtcute/tl'
 import { TelegramClient } from '../../client'
 import { makeInspectable } from '../utils'
 import { tdFileId } from '@mtcute/file-id'
 import { decodeWaveform } from '../../utils/voice-utils'
 /**
 * An voice note.
@ -35,10 +36,13 @@ export class Voice extends RawDocument {
    /**
     * Voice note's waveform
     *
     * Represented with integers in range [0, 31],
     * usually has length of 100
     */
-    get waveform(): Buffer {
+    get waveform(): number[] {
-        return this.attr.waveform!
+        return decodeWaveform(this.attr.waveform!)
    }
 }
-makeInspectable(Voice, ['fileSize', 'dcId'], ['inputMedia', 'inputDocument'])
+makeInspectable(Voice, ['fileSize', 'dcId'], ['inputMedia', 'inputDocument', 'waveform'])
--- a/packages/client/src/utils/voice-utils.ts
+++ b/packages/client/src/utils/voice-utils.ts
@ -0,0 +1,56 @@
 export function decodeWaveform(wf: Buffer): number[] {
    const bitsCount = wf.length * 8
    const valuesCount = ~~(bitsCount / 5)
    if (!valuesCount) return []
    const lastIdx = valuesCount - 1
    // Read each 5 bit of encoded5bit as 0-31 unsigned char.
    // We count the index of the byte in which the desired 5-bit sequence starts.
    // And then we read a uint16 starting from that byte to guarantee to get all of those 5 bits.
    //
    // BUT! if it is the last byte we have, we're not allowed to read a uint16 starting with it.
    // Because it will be an overflow (we'll access one byte after the available memory).
    // We see, that only the last 5 bits could start in the last available byte and be problematic.
    // So we read in a general way all the entries except the last one.
    const result: number[] = []
    for (let i = 0, j = 0; i < lastIdx; i++, j += 5) {
        const byteIdx = ~~(j / 8)
        const bitShift = j % 8
        result[i] = (wf.readUInt16LE(byteIdx) >> bitShift) & 0b11111
    }
    const lastByteIdx = ~~((lastIdx * 5) / 8)
    const lastBitShift = (lastIdx * 5) % 8
    const lastValue =
        lastByteIdx === wf.length - 1
            ? wf[lastByteIdx]
            : wf.readUInt16LE(lastByteIdx)
    result[lastIdx] = (lastValue >> lastBitShift) & 0b11111
    return result
 }
 export function encodeWaveform(wf: number[]): Buffer {
    const bitsCount = wf.length * 5
    const bytesCount = ~~(bitsCount + 7) / 8
    const result = Buffer.alloc(bytesCount + 1)
    // Write each 0-31 unsigned char as 5 bit to result.
    // We reserve one extra byte to be able to dereference any of required bytes
    // as a uint16 without overflowing, even the byte with index "bytesCount - 1".
    for (let i = 0, j = 0; i < wf.length; i++, j += 5) {
        const byteIdx = ~~(j / 8)
        const bitShift = j % 8
        const value = (wf[i] & 0b11111) << bitShift
        const old = result.readUInt16LE(byteIdx)
        result.writeUInt16LE(old | value, byteIdx)
    }
    return result.slice(0, bytesCount)
 }