fix(voice): proper handling of waveforms

2021-07-22 01:46:31 +03:00 · 2021-07-22 01:46:31 +03:00 · 4c717ffc63
commit 4c717ffc63
parent 96d133dd2f
4 changed files with 74 additions and 6 deletions
--- a/packages/client/src/methods/files/normalize-input-media.ts
+++ b/packages/client/src/methods/files/normalize-input-media.ts
@ -16,6 +16,7 @@ import { extractFileName } from '../../utils/file-utils'
 import { assertTypeIs } from '../../utils/type-assertion'
 import bigInt from 'big-integer'
 import { normalizeDate } from '../../utils/misc-utils'
+import { encodeWaveform } from '../../utils/voice-utils'

 /**
 * Normalize an {@link InputMediaLike} to `InputMedia`,
@ -381,7 +382,10 @@ export async function _normalizeInputMedia(
            duration: media.duration || 0,
            title: media.type === 'audio' ? media.title : undefined,
            performer: media.type === 'audio' ? media.performer : undefined,
-            waveform: media.type === 'voice' ? media.waveform : undefined,
+            waveform:
+                media.type === 'voice' && media.waveform
+                    ? encodeWaveform(media.waveform)
+                    : undefined,
        })
    }

--- a/packages/client/src/types/media/input-media.ts
+++ b/packages/client/src/types/media/input-media.ts
@ -114,11 +114,15 @@ export interface InputMediaVoice extends BaseInputMedia {
    duration?: number

    /**
-     * Waveform of the voice message
+     * Waveform of the voice message.
+     *
+     * Represented with integers in range [0, 31],
+     * usually has length of 100.
+     * Generated by the server if omitted.
     *
     * Only applicable to newly uploaded files.
     */
-    waveform?: Buffer
+    waveform?: number[]
 }

 /**
--- a/packages/client/src/types/media/voice.ts
+++ b/packages/client/src/types/media/voice.ts
@ -3,6 +3,7 @@ import { tl } from '@mtcute/tl'
 import { TelegramClient } from '../../client'
 import { makeInspectable } from '../utils'
 import { tdFileId } from '@mtcute/file-id'
+import { decodeWaveform } from '../../utils/voice-utils'

 /**
 * An voice note.
@ -35,10 +36,13 @@ export class Voice extends RawDocument {

    /**
     * Voice note's waveform
+     *
+     * Represented with integers in range [0, 31],
+     * usually has length of 100
     */
-    get waveform(): Buffer {
-        return this.attr.waveform!
+    get waveform(): number[] {
+        return decodeWaveform(this.attr.waveform!)
    }
 }

-makeInspectable(Voice, ['fileSize', 'dcId'], ['inputMedia', 'inputDocument'])
+makeInspectable(Voice, ['fileSize', 'dcId'], ['inputMedia', 'inputDocument', 'waveform'])
--- a/packages/client/src/utils/voice-utils.ts
+++ b/packages/client/src/utils/voice-utils.ts
@ -0,0 +1,56 @@
+export function decodeWaveform(wf: Buffer): number[] {
+    const bitsCount = wf.length * 8
+    const valuesCount = ~~(bitsCount / 5)
+
+    if (!valuesCount) return []
+
+    const lastIdx = valuesCount - 1
+
+    // Read each 5 bit of encoded5bit as 0-31 unsigned char.
+    // We count the index of the byte in which the desired 5-bit sequence starts.
+    // And then we read a uint16 starting from that byte to guarantee to get all of those 5 bits.
+    //
+    // BUT! if it is the last byte we have, we're not allowed to read a uint16 starting with it.
+    // Because it will be an overflow (we'll access one byte after the available memory).
+    // We see, that only the last 5 bits could start in the last available byte and be problematic.
+    // So we read in a general way all the entries except the last one.
+
+    const result: number[] = []
+    for (let i = 0, j = 0; i < lastIdx; i++, j += 5) {
+        const byteIdx = ~~(j / 8)
+        const bitShift = j % 8
+        result[i] = (wf.readUInt16LE(byteIdx) >> bitShift) & 0b11111
+    }
+
+    const lastByteIdx = ~~((lastIdx * 5) / 8)
+    const lastBitShift = (lastIdx * 5) % 8
+    const lastValue =
+        lastByteIdx === wf.length - 1
+            ? wf[lastByteIdx]
+            : wf.readUInt16LE(lastByteIdx)
+    result[lastIdx] = (lastValue >> lastBitShift) & 0b11111
+
+    return result
+}
+
+
+export function encodeWaveform(wf: number[]): Buffer {
+    const bitsCount = wf.length * 5
+    const bytesCount = ~~(bitsCount + 7) / 8
+    const result = Buffer.alloc(bytesCount + 1)
+
+    // Write each 0-31 unsigned char as 5 bit to result.
+    // We reserve one extra byte to be able to dereference any of required bytes
+    // as a uint16 without overflowing, even the byte with index "bytesCount - 1".
+
+    for (let i = 0, j = 0; i < wf.length; i++, j += 5) {
+        const byteIdx = ~~(j / 8)
+        const bitShift = j % 8
+        const value = (wf[i] & 0b11111) << bitShift
+
+        const old = result.readUInt16LE(byteIdx)
+        result.writeUInt16LE(old | value, byteIdx)
+    }
+
+    return result.slice(0, bytesCount)
+}