From 19d675e0fbc58c76fd31c5cad3565d216fc3a011 Mon Sep 17 00:00:00 2001
From: teidesu <me@tei.su>
Date: Fri, 21 May 2021 23:24:52 +0300
Subject: [PATCH] feat(core): utf string length calculation optimization

only has effect on in-browser use when the Buffer is polyfilled, since with BrowserBuffer we had to write it twice.
---
 .../core/src/utils/binary/binary-writer.ts    | 70 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/utils/binary/binary-writer.ts b/packages/core/src/utils/binary/binary-writer.ts
index 0db9d4d7..6c5be1f5 100644
--- a/packages/core/src/utils/binary/binary-writer.ts
+++ b/packages/core/src/utils/binary/binary-writer.ts
@@ -13,6 +13,74 @@ type SerializableObject = {
 
 const isNativeBigIntAvailable = typeof BigInt !== 'undefined' && 'writeBigInt64LE' in Buffer.prototype
 
+function utf8ByteLength (string: string): number {
+    let codePoint
+    const length = string.length
+    let leadSurrogate = null
+    let bytes = 0
+
+    for (let i = 0; i < length; ++i) {
+        codePoint = string.charCodeAt(i)
+
+        // is surrogate component
+        if (codePoint > 0xD7FF && codePoint < 0xE000) {
+            // last char was a lead
+            if (!leadSurrogate) {
+                // no lead yet
+                if (codePoint > 0xDBFF) {
+                    // unexpected trail
+                    bytes += 3
+                    continue
+                } else if (i + 1 === length) {
+                    // unpaired lead
+                    bytes += 3
+                    continue
+                }
+
+                // valid lead
+                leadSurrogate = codePoint
+
+                continue
+            }
+
+            // 2 leads in a row
+            if (codePoint < 0xDC00) {
+                bytes += 3
+                leadSurrogate = codePoint
+                continue
+            }
+
+            // valid surrogate pair
+            codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000
+        } else if (leadSurrogate) {
+            // valid bmp char, but last char was a lead
+            bytes += 3
+        }
+
+        leadSurrogate = null
+
+        // encode utf8
+        if (codePoint < 0x80) {
+            bytes += 1
+        } else if (codePoint < 0x800) {
+            bytes += 2
+        } else if (codePoint < 0x10000) {
+            bytes += 3
+        } else if (codePoint < 0x110000) {
+            bytes += 4
+        } else {
+            throw new Error('Invalid code point')
+        }
+    }
+
+    return bytes
+}
+
+// buffer package for the web detects size by writing the string to an array and checking size
+// that is slow.
+// see https://github.com/feross/buffer/blob/795bbb5bda1b39f1370ebd784bea6107b087e3a7/index.js#L527
+const utfLength = (Buffer.prototype as any)._isBuffer ? utf8ByteLength : Buffer.byteLength
+
 export class SerializationCounter implements ITlBinaryWriter {
     count = 0
     _objectMap = writerMap
@@ -82,7 +150,7 @@ export class SerializationCounter implements ITlBinaryWriter {
     }
 
     string(val: string): void {
-        const length = Buffer.byteLength(val, 'utf-8')
+        const length = utfLength(val)
         let padding
         if (length <= 253) {
             this.count += 1