feat: wasm! 🚀

This commit is contained in:
alina 🌸 2023-11-04 06:44:18 +03:00
parent 4c42304a79
commit eec142f0e5
Signed by: teidesu
SSH key fingerprint: SHA256:uNeCpw6aTSU4aIObXLvHfLkDa82HWH9EiOj9AXOIRpI
80 changed files with 10231 additions and 535 deletions

View file

@ -46,13 +46,13 @@
"dependencies": {
"@mtcute/tl": "workspace:^",
"@mtcute/tl-runtime": "workspace:^",
"@mtcute/wasm": "workspace:^",
"@types/events": "3.0.0",
"big-integer": "1.6.51",
"events": "3.2.0",
"long": "5.2.3"
},
"devDependencies": {
"@cryptography/aes": "^0.1.1",
"@types/ws": "8.5.4",
"node-forge": "1.3.1",
"ws": "8.13.0"

View file

@ -361,6 +361,7 @@ export class BaseTelegramClient extends EventEmitter {
const promise = (this._connected = createControllablePromise())
await this.crypto.initialize?.()
await this._loadStorage()
const primaryDc = await this.storage.getDefaultDcs()
if (primaryDc !== null) this._defaultDcs = primaryDc

View file

@ -5,7 +5,14 @@ import { TlBinaryReader, TlReaderMap } from '@mtcute/tl-runtime'
import { MtcuteError } from '../types/errors.js'
import { createAesIgeForMessage } from '../utils/crypto/mtproto.js'
import { buffersEqual, concatBuffers, dataViewFromBuffer, ICryptoProvider, Logger, randomBytes } from '../utils/index.js'
import {
buffersEqual,
concatBuffers,
dataViewFromBuffer,
ICryptoProvider,
Logger,
randomBytes,
} from '../utils/index.js'
export class AuthKey {
ready = false
@ -55,7 +62,7 @@ export class AuthKey {
const messageKey = (await this._crypto.sha256(concatBuffers([this.clientSalt, buf]))).subarray(8, 24)
const ige = await createAesIgeForMessage(this._crypto, this.key, messageKey, true)
const encryptedData = await ige.encrypt(buf)
const encryptedData = ige.encrypt(buf)
return concatBuffers([this.id, messageKey, encryptedData])
}
@ -78,7 +85,7 @@ export class AuthKey {
}
const ige = await createAesIgeForMessage(this._crypto, this.key, messageKey, false)
const innerData = await ige.decrypt(encryptedData)
const innerData = ige.decrypt(encryptedData)
const msgKeySource = await this._crypto.sha256(concatBuffers([this.serverSalt, innerData]))
const expectedMessageKey = msgKeySource.subarray(8, 24)

View file

@ -141,8 +141,8 @@ async function rsaPad(data: Uint8Array, crypto: ICryptoProvider, key: TlPublicKe
// we only need to reverse the data
dataWithHash.subarray(0, 192).reverse()
const aes = await crypto.createAesIge(aesKey, aesIv)
const encrypted = await aes.encrypt(dataWithHash)
const aes = crypto.createAesIge(aesKey, aesIv)
const encrypted = aes.encrypt(dataWithHash)
const encryptedHash = await crypto.sha256(encrypted)
xorBufferInPlace(aesKey, encryptedHash)
@ -300,9 +300,9 @@ export async function doAuthorization(
// Step 3: complete DH exchange
const [key, iv] = await generateKeyAndIvFromNonce(crypto, resPq.serverNonce, newNonce)
const ige = await crypto.createAesIge(key, iv)
const ige = crypto.createAesIge(key, iv)
const plainTextAnswer = await ige.decrypt(serverDhParams.encryptedAnswer)
const plainTextAnswer = ige.decrypt(serverDhParams.encryptedAnswer)
const innerDataHash = plainTextAnswer.subarray(0, 20)
const serverDhInnerReader = new TlBinaryReader(readerMap, plainTextAnswer, 20)
const serverDhInner = serverDhInnerReader.object() as mtp.TlObject
@ -379,7 +379,7 @@ export async function doAuthorization(
log.debug('sending client DH (timeOffset = %d)', timeOffset)
const clientDhEncrypted = await ige.encrypt(clientDhInnerWriter.uint8View)
const clientDhEncrypted = ige.encrypt(clientDhInnerWriter.uint8View)
await sendPlainMessage({
_: 'mt_set_client_DH_params',
nonce,

View file

@ -3,15 +3,7 @@
import Long from 'long'
import { mtp, tl } from '@mtcute/tl'
import {
gzipDeflate,
gzipInflate,
TlBinaryReader,
TlBinaryWriter,
TlReaderMap,
TlSerializationCounter,
TlWriterMap,
} from '@mtcute/tl-runtime'
import { TlBinaryReader, TlBinaryWriter, TlReaderMap, TlSerializationCounter, TlWriterMap } from '@mtcute/tl-runtime'
import { MtArgumentError, MtcuteError, MtTimeoutError } from '../types/index.js'
import { createAesIgeForMessageOld } from '../utils/crypto/mtproto.js'
@ -20,6 +12,7 @@ import {
ControllablePromise,
createControllablePromise,
EarlyTimer,
ICryptoProvider,
longFromBuffer,
randomBytes,
randomLong,
@ -51,6 +44,12 @@ export interface SessionConnectionParams extends PersistentConnectionParams {
// destroy_auth_key#d1435160 = DestroyAuthKeyRes;
// const DESTROY_AUTH_KEY = Buffer.from('605134d1', 'hex')
// gzip_packed#3072cfa1 packed_data:string = Object;
const GZIP_PACKED_ID = 0x3072cfa1
// msg_container#73f1f8dc messages:vector<%Message> = MessageContainer;
const MSG_CONTAINER_ID = 0x73f1f8dc
// rpc_result#f35c6d01 req_msg_id:long result:Object = RpcResult;
const RPC_RESULT_ID = 0xf35c6d01
function makeNiceStack(error: tl.RpcError, stack: string, method?: string) {
error.stack = `RpcError (${error.code} ${error.text}): ${error.message}\n at ${method}\n${stack
@ -80,6 +79,7 @@ export class SessionConnection extends PersistentConnection {
private _readerMap: TlReaderMap
private _writerMap: TlWriterMap
private _crypto: ICryptoProvider
constructor(
params: SessionConnectionParams,
@ -90,6 +90,7 @@ export class SessionConnection extends PersistentConnection {
this._readerMap = params.readerMap
this._writerMap = params.writerMap
this._crypto = params.crypto
this._handleRawMessage = this._handleRawMessage.bind(this)
}
@ -265,7 +266,7 @@ export class SessionConnection extends PersistentConnection {
this._session.authorizationPending = true
this.emit('auth-begin')
doAuthorization(this, this.params.crypto)
doAuthorization(this, this._crypto)
.then(async ([authKey, serverSalt, timeOffset]) => {
await this._session._authKey.setup(authKey)
this._session.serverSalt = serverSalt
@ -312,7 +313,7 @@ export class SessionConnection extends PersistentConnection {
this._isPfsBindingPending = true
}
doAuthorization(this, this.params.crypto, TEMP_AUTH_KEY_EXPIRY)
doAuthorization(this, this._crypto, TEMP_AUTH_KEY_EXPIRY)
.then(async ([tempAuthKey, tempServerSalt]) => {
if (!this._usePfs) {
this.log.info('pfs has been disabled while generating temp key')
@ -357,16 +358,11 @@ export class SessionConnection extends PersistentConnection {
writer.raw(randomBytes(8))
const msgWithPadding = writer.result()
const hash = await this.params.crypto.sha1(msgWithoutPadding)
const hash = await this._crypto.sha1(msgWithoutPadding)
const msgKey = hash.subarray(4, 20)
const ige = await createAesIgeForMessageOld(
this.params.crypto,
this._session._authKey.key,
msgKey,
true,
)
const encryptedData = await ige.encrypt(msgWithPadding)
const ige = await createAesIgeForMessageOld(this._crypto, this._session._authKey.key, msgKey, true)
const encryptedData = ige.encrypt(msgWithPadding)
const encryptedMessage = concatBuffers([this._session._authKey.id, msgKey, encryptedData])
const promise = createControllablePromise<mtp.RawMt_rpc_error | boolean>()
@ -512,22 +508,17 @@ export class SessionConnection extends PersistentConnection {
}
private _handleRawMessage(messageId: Long, seqNo: number, message: TlBinaryReader): void {
if (message.peekUint() === 0x3072cfa1) {
// gzip_packed
// we can't use message.gzip() because it may contain msg_container,
// so we parse it manually.
message.uint()
const objectId = message.uint()
if (objectId === GZIP_PACKED_ID) {
return this._handleRawMessage(
messageId,
seqNo,
new TlBinaryReader(this._readerMap, gzipInflate(message.bytes())),
new TlBinaryReader(this._readerMap, this._crypto.gunzip(message.bytes())),
)
}
if (message.peekUint() === 0x73f1f8dc) {
// msg_container
message.uint()
if (objectId === MSG_CONTAINER_ID) {
const count = message.uint()
for (let i = 0; i < count; i++) {
@ -545,15 +536,12 @@ export class SessionConnection extends PersistentConnection {
return
}
if (message.peekUint() === 0xf35c6d01) {
// rpc_result
message.uint()
if (objectId === RPC_RESULT_ID) {
return this._onRpcResult(messageId, message)
}
// we are safe.. i guess
this._handleMessage(messageId, message.object())
this._handleMessage(messageId, message.object(objectId))
}
private _handleMessage(messageId: Long, message_: unknown): void {
@ -729,7 +717,22 @@ export class SessionConnection extends PersistentConnection {
const rpc = msg.rpc
const customReader = this._readerMap._results![rpc.method]
const result: any = customReader ? customReader(message) : message.object()
let result: any
if (customReader) {
result = customReader(message)
} else {
const objectId = message.uint()
if (objectId === GZIP_PACKED_ID) {
const inner = this._crypto.gunzip(message.bytes())
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
result = TlBinaryReader.deserializeObject(this._readerMap, inner)
} else {
result = message.object(objectId)
}
}
// initConnection call was definitely received and
// processed by the server, so we no longer need to use it
@ -1262,13 +1265,14 @@ export class SessionConnection extends PersistentConnection {
// if it is less than 0.9, then try to compress the whole request
const middle = ~~((content.length - 1024) / 2)
const gzipped = gzipDeflate(content.subarray(middle, middle + 1024), 0.9)
const middlePart = content.subarray(middle, middle + 1024)
const gzipped = this._crypto.gzip(middlePart, Math.floor(middlePart.length * 0.9))
if (!gzipped) shouldGzip = false
}
if (shouldGzip) {
const gzipped = gzipDeflate(content, 0.9)
const gzipped = this._crypto.gzip(content, Math.floor(content.length * 0.9))
if (gzipped) {
this.log.debug('gzipped %s (%db -> %db)', method, content.length, gzipped.length)
@ -1601,7 +1605,7 @@ export class SessionConnection extends PersistentConnection {
// leave bytes for mtproto header (we'll write it later,
// since we need seqno and msg_id to be larger than the content)
writer.pos += 16
writer.uint(0x73f1f8dc) // msg_container
writer.uint(MSG_CONTAINER_ID)
writer.uint(messageCount)
}

View file

@ -1,5 +1,5 @@
import { concatBuffers, dataViewFromBuffer } from '../../utils/buffer-utils.js'
import { IEncryptionScheme, randomBytes } from '../../utils/index.js'
import { IAesCtr, randomBytes } from '../../utils/index.js'
import { IPacketCodec } from './abstract.js'
import { WrappedCodec } from './wrapped.js'
@ -11,8 +11,8 @@ export interface MtProxyInfo {
}
export class ObfuscatedPacketCodec extends WrappedCodec implements IPacketCodec {
private _encryptor?: IEncryptionScheme
private _decryptor?: IEncryptionScheme
private _encryptor?: IAesCtr
private _decryptor?: IAesCtr
private _proxy?: MtProxyInfo
@ -78,31 +78,31 @@ export class ObfuscatedPacketCodec extends WrappedCodec implements IPacketCodec
decryptKey = await this._crypto.sha256(concatBuffers([decryptKey, this._proxy.secret]))
}
this._encryptor = await this._crypto.createAesCtr(encryptKey, encryptIv, true)
this._decryptor = await this._crypto.createAesCtr(decryptKey, decryptIv, false)
this._encryptor = this._crypto.createAesCtr(encryptKey, encryptIv, true)
this._decryptor = this._crypto.createAesCtr(decryptKey, decryptIv, false)
const encrypted = await this._encryptor.encrypt(random)
const encrypted = this._encryptor.process(random)
random.set(encrypted.subarray(56, 64), 56)
return random
}
async encode(packet: Uint8Array): Promise<Uint8Array> {
return this._encryptor!.encrypt(await this._inner.encode(packet))
return this._encryptor!.process(await this._inner.encode(packet))
}
feed(data: Uint8Array): void {
const dec = this._decryptor!.decrypt(data)
const dec = this._decryptor!.process(data)
if (ArrayBuffer.isView(dec)) this._inner.feed(dec)
else {
dec.then((dec) => this._inner.feed(dec)).catch((err) => this.emit('error', err))
}
this._inner.feed(dec)
}
reset(): void {
this._inner.reset()
delete this._encryptor
delete this._decryptor
this._encryptor?.close?.()
this._decryptor?.close?.()
this._encryptor = undefined
this._decryptor = undefined
}
}

View file

@ -1,11 +1,14 @@
import { MaybeAsync } from '../../types/index.js'
import { AesModeOfOperationIge } from './common.js'
import { factorizePQSync } from './factorization.js'
export interface IEncryptionScheme {
encrypt(data: Uint8Array): MaybeAsync<Uint8Array>
encrypt(data: Uint8Array): Uint8Array
decrypt(data: Uint8Array): Uint8Array
}
decrypt(data: Uint8Array): MaybeAsync<Uint8Array>
export interface IAesCtr {
process(data: Uint8Array): Uint8Array
close?(): void
}
export interface ICryptoProvider {
@ -25,32 +28,20 @@ export interface ICryptoProvider {
hmacSha256(data: Uint8Array, key: Uint8Array): MaybeAsync<Uint8Array>
// in telegram, iv is always either used only once, or is the same for all calls for the key
createAesCtr(key: Uint8Array, iv: Uint8Array, encrypt: boolean): MaybeAsync<IEncryptionScheme>
createAesCtr(key: Uint8Array, iv: Uint8Array, encrypt: boolean): IAesCtr
createAesIge(key: Uint8Array, iv: Uint8Array): MaybeAsync<IEncryptionScheme>
createAesEcb(key: Uint8Array): MaybeAsync<IEncryptionScheme>
createAesIge(key: Uint8Array, iv: Uint8Array): IEncryptionScheme
factorizePQ(pq: Uint8Array): MaybeAsync<[Uint8Array, Uint8Array]>
gzip(data: Uint8Array, maxSize: number): Uint8Array | null
gunzip(data: Uint8Array): Uint8Array
}
export abstract class BaseCryptoProvider {
createAesIge(key: Uint8Array, iv: Uint8Array): MaybeAsync<IEncryptionScheme> {
const ecb = this.createAesEcb(key)
if ('then' in ecb) {
return ecb.then((ecb) => new AesModeOfOperationIge(key, iv, ecb))
}
return new AesModeOfOperationIge(key, iv, ecb)
}
factorizePQ(pq: Uint8Array) {
return factorizePQSync(pq)
}
abstract createAesEcb(key: Uint8Array): MaybeAsync<IEncryptionScheme>
}
export type CryptoProviderFactory = () => ICryptoProvider

View file

@ -1,67 +0,0 @@
import type { IEncryptionScheme } from './abstract.js'
import { xorBufferInPlace } from './utils.js'
/**
* AES mode of operation IGE implementation in JS
*/
export class AesModeOfOperationIge implements IEncryptionScheme {
private _key: Uint8Array
private _iv: Uint8Array
private _aes: IEncryptionScheme
constructor(key: Uint8Array, iv: Uint8Array, ecb: IEncryptionScheme) {
this._key = key
this._iv = iv
this._aes = ecb
}
async encrypt(data: Uint8Array): Promise<Uint8Array> {
if (data.length % 16 !== 0) {
throw new Error('invalid plaintext size (must be multiple of 16 bytes)')
}
const ciphertext = new Uint8Array(data.length)
let block = new Uint8Array(16)
let iv1 = this._iv.subarray(0, 16)
let iv2 = this._iv.subarray(16, 32)
for (let i = 0; i < data.length; i += 16) {
block.set(data.subarray(i, i + 16))
xorBufferInPlace(block, iv1)
block = await this._aes.encrypt(block)
xorBufferInPlace(block, iv2)
ciphertext.set(block, i)
iv1 = ciphertext.subarray(i, i + 16)
iv2 = data.subarray(i, i + 16)
}
return ciphertext
}
async decrypt(data: Uint8Array): Promise<Uint8Array> {
if (data.length % 16 !== 0) {
throw new Error('invalid ciphertext size (must be multiple of 16 bytes)')
}
const plaintext = new Uint8Array(data.length)
let block = new Uint8Array(16)
let iv1 = this._iv.subarray(16, 32)
let iv2 = this._iv.subarray(0, 16)
for (let i = 0; i < data.length; i += 16) {
block.set(data.subarray(i, i + 16))
xorBufferInPlace(block, iv1)
block = await this._aes.decrypt(block)
xorBufferInPlace(block, iv2)
plaintext.set(block, i)
iv1 = plaintext.subarray(i, i + 16)
iv2 = data.subarray(i, i + 16)
}
return plaintext
}
}

View file

@ -1,64 +0,0 @@
// eslint-disable-next-line no-restricted-imports
import { createCipheriv, createDecipheriv, createHash, createHmac, pbkdf2 } from 'crypto'
import { MaybeAsync } from '../../types/index.js'
import { concatBuffers } from '../buffer-utils.js'
import { BaseCryptoProvider, ICryptoProvider, IEncryptionScheme } from './abstract.js'
export class NodeCryptoProvider extends BaseCryptoProvider implements ICryptoProvider {
createAesCtr(key: Uint8Array, iv: Uint8Array, encrypt: boolean): IEncryptionScheme {
const cipher = (encrypt ? createCipheriv : createDecipheriv)(`aes-${key.length * 8}-ctr`, key, iv)
const update = (data: Uint8Array) => cipher.update(data)
return {
encrypt: update,
decrypt: update,
}
}
createAesEcb(key: Uint8Array): IEncryptionScheme {
const methodName = `aes-${key.length * 8}-ecb`
return {
encrypt(data: Uint8Array) {
const cipher = createCipheriv(methodName, key, null)
cipher.setAutoPadding(false)
return concatBuffers([cipher.update(data), cipher.final()])
},
decrypt(data: Uint8Array) {
const cipher = createDecipheriv(methodName, key, null)
cipher.setAutoPadding(false)
return concatBuffers([cipher.update(data), cipher.final()])
},
}
}
pbkdf2(
password: Uint8Array,
salt: Uint8Array,
iterations: number,
keylen = 64,
algo = 'sha512',
): MaybeAsync<Uint8Array> {
return new Promise((resolve, reject) =>
pbkdf2(password, salt, iterations, keylen, algo, (err: Error | null, buf: Uint8Array) =>
err !== null ? reject(err) : resolve(buf),
),
)
}
sha1(data: Uint8Array): Uint8Array {
return createHash('sha1').update(data).digest()
}
sha256(data: Uint8Array): Uint8Array {
return createHash('sha256').update(data).digest()
}
hmacSha256(data: Uint8Array, key: Uint8Array): MaybeAsync<Uint8Array> {
return createHmac('sha256', key).update(data).digest()
}
}

View file

@ -0,0 +1,93 @@
// eslint-disable-next-line no-restricted-imports
import { createCipheriv, createHash, createHmac, pbkdf2 } from 'crypto'
import { deflateSync, gunzipSync } from 'zlib'
import { ige256Decrypt, ige256Encrypt, initAsync, InitInput } from '@mtcute/wasm'
import { MaybeAsync } from '../../types/index.js'
import { BaseCryptoProvider, IAesCtr, ICryptoProvider, IEncryptionScheme } from './abstract.js'
export abstract class BaseNodeCryptoProvider extends BaseCryptoProvider {
createAesCtr(key: Uint8Array, iv: Uint8Array): IAesCtr {
const cipher = createCipheriv(`aes-${key.length * 8}-ctr`, key, iv)
const update = (data: Uint8Array) => cipher.update(data)
return {
process: update,
}
}
pbkdf2(
password: Uint8Array,
salt: Uint8Array,
iterations: number,
keylen = 64,
algo = 'sha512',
): MaybeAsync<Uint8Array> {
return new Promise((resolve, reject) =>
pbkdf2(password, salt, iterations, keylen, algo, (err: Error | null, buf: Uint8Array) =>
err !== null ? reject(err) : resolve(buf),
),
)
}
sha1(data: Uint8Array): Uint8Array {
return createHash('sha1').update(data).digest()
}
sha256(data: Uint8Array): Uint8Array {
return createHash('sha256').update(data).digest()
}
hmacSha256(data: Uint8Array, key: Uint8Array): Uint8Array {
return createHmac('sha256', key).update(data).digest()
}
gzip(data: Uint8Array, maxSize: number): Uint8Array | null {
// todo: test if wasm impl is better fit here
try {
// telegram accepts both zlib and gzip, but zlib is faster and has less overhead, so we use it here
return deflateSync(data, {
maxOutputLength: maxSize,
})
// hot path, avoid additional runtime checks
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
if (e.code === 'ERR_BUFFER_TOO_LARGE') {
return null
}
throw e
}
}
gunzip(data: Uint8Array): Uint8Array {
// todo: test if wasm impl is better fit here
return gunzipSync(data)
}
}
export class NodeCryptoProvider extends BaseNodeCryptoProvider implements ICryptoProvider {
private wasmInput?: InitInput
constructor(params?: { wasmInput?: InitInput }) {
super()
this.wasmInput = params?.wasmInput
}
initialize(): Promise<void> {
return initAsync(this.wasmInput)
}
createAesIge(key: Uint8Array, iv: Uint8Array): IEncryptionScheme {
return {
encrypt(data: Uint8Array): Uint8Array {
return ige256Encrypt(data, key, iv)
},
decrypt(data: Uint8Array): Uint8Array {
return ige256Decrypt(data, key, iv)
},
}
}
}

View file

@ -1,12 +1,17 @@
import {
createCtr256,
ctr256,
deflateMaxSize,
freeCtr256,
gunzip,
ige256Decrypt,
ige256Encrypt,
initAsync,
InitInput,
} from '@mtcute/wasm'
import { MaybeAsync } from '../../index.js'
import { BaseCryptoProvider, ICryptoProvider, IEncryptionScheme } from './abstract.js'
import AES_, { CTR } from '@cryptography/aes'
// fucking weird flex with es modules.
// i hate default imports please for the love of god never use them
type AES_ = typeof AES_.default
const AES = 'default' in AES_ ? AES_.default : AES_ as AES_
import { BaseCryptoProvider, IAesCtr, ICryptoProvider, IEncryptionScheme } from './abstract.js'
const ALGO_TO_SUBTLE: Record<string, string> = {
sha256: 'SHA-256',
@ -14,23 +19,23 @@ const ALGO_TO_SUBTLE: Record<string, string> = {
sha512: 'SHA-512',
}
function wordsToBytes(words: Uint32Array): Uint8Array {
const o = new Uint8Array(words.byteLength)
export class WebCryptoProvider extends BaseCryptoProvider implements ICryptoProvider {
readonly subtle: SubtleCrypto
readonly wasmInput?: InitInput
const len = words.length * 4
constructor(params?: { wasmInput?: InitInput; subtle?: SubtleCrypto }) {
super()
this.wasmInput = params?.wasmInput
const subtle = params?.subtle ?? globalThis.crypto?.subtle
for (let i = 0; i < len; ++i) {
o[i] = ((words[i >>> 2] >>> (24 - (i % 4) * 8)) & 0xff)
if (!subtle) {
throw new Error('SubtleCrypto is not available')
}
this.subtle = subtle
}
return o
}
export class SubtleCryptoProvider extends BaseCryptoProvider implements ICryptoProvider {
constructor(
readonly subtle: SubtleCrypto,
) {
super()
initialize(): Promise<void> {
return initAsync(this.wasmInput)
}
sha1(data: Uint8Array): MaybeAsync<Uint8Array> {
@ -78,21 +83,27 @@ export class SubtleCryptoProvider extends BaseCryptoProvider implements ICryptoP
return new Uint8Array(res)
}
createAesCtr(key: Uint8Array, iv: Uint8Array): IEncryptionScheme {
const aes = new CTR(key, iv)
createAesCtr(key: Uint8Array, iv: Uint8Array): IAesCtr {
const ctx = createCtr256(key, iv)
return {
encrypt: (data) => wordsToBytes(aes.encrypt(data)),
decrypt: (data) => wordsToBytes(aes.decrypt(data)),
process: (data) => ctr256(ctx, data),
close: () => freeCtr256(ctx),
}
}
createAesEcb(key: Uint8Array): IEncryptionScheme {
const aes = new AES(key)
createAesIge(key: Uint8Array, iv: Uint8Array): IEncryptionScheme {
return {
encrypt: (data) => wordsToBytes(aes.encrypt(data)),
decrypt: (data) => wordsToBytes(aes.decrypt(data)),
encrypt: (data) => ige256Encrypt(data, key, iv),
decrypt: (data) => ige256Decrypt(data, key, iv),
}
}
gzip(data: Uint8Array, maxSize: number): Uint8Array | null {
return deflateMaxSize(data, maxSize)
}
gunzip(data: Uint8Array): Uint8Array {
return gunzip(data)
}
}

View file

@ -1,4 +1,4 @@
import { NodeCryptoProvider } from '../crypto/node-crypto.js'
import { NodeCryptoProvider } from '../crypto/node.js'
/** @internal */
export const _defaultCryptoProviderFactory = () => new NodeCryptoProvider()

View file

@ -1,5 +1,5 @@
import { MtUnsupportedError } from '../../index.js'
import { SubtleCryptoProvider } from '../crypto/subtle.js'
import { WebCryptoProvider } from '../crypto/web.js'
/** @internal */
export const _defaultCryptoProviderFactory = () => {
@ -7,5 +7,5 @@ export const _defaultCryptoProviderFactory = () => {
throw new MtUnsupportedError('WebCrypto API is not available')
}
return new SubtleCryptoProvider(crypto.subtle)
return new WebCryptoProvider({ subtle: crypto.subtle })
}

View file

@ -6,7 +6,7 @@ import { describe, it } from 'mocha'
import { TlReaderMap } from '@mtcute/tl-runtime'
import { AuthKey } from '../src/network/auth-key.js'
import { NodeCryptoProvider } from '../src/utils/crypto/node-crypto.js'
import { NodeCryptoProvider } from '../src/utils/crypto/node.js'
import { LogManager } from '../src/utils/index.js'
chai.use(spies)

View file

@ -4,11 +4,13 @@ import { describe, it } from 'mocha'
import { hexDecodeToBuffer, hexEncode, utf8EncodeToBuffer } from '@mtcute/tl-runtime'
import { NodeCryptoProvider } from '../src/utils/crypto/node-crypto.js'
import { SubtleCryptoProvider } from '../src/utils/crypto/subtle.js'
import { NodeCryptoProvider } from '../src/utils/crypto/node.js'
import { WebCryptoProvider } from '../src/utils/crypto/web.js'
import { ICryptoProvider } from '../src/utils/index.js'
export function testCryptoProvider(c: ICryptoProvider): void {
before(() => c.initialize?.())
it('should calculate sha1', async () => {
expect(hexEncode(await c.sha1(utf8EncodeToBuffer('')))).to.eq('da39a3ee5e6b4b0d3255bfef95601890afd80709')
expect(hexEncode(await c.sha1(utf8EncodeToBuffer('hello')))).to.eq('aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d')
@ -47,81 +49,51 @@ export function testCryptoProvider(c: ICryptoProvider): void {
)
})
it('should encrypt and decrypt aes-ctr', async () => {
let aes = await c.createAesCtr(
it('should encrypt and decrypt aes-ctr', () => {
let aes = c.createAesCtr(
hexDecodeToBuffer('d450aae0bf0060a4af1044886b42a13f7c506b35255d134a7e87ab3f23a9493b'),
hexDecodeToBuffer('0182de2bd789c295c3c6c875c5e9e190'),
true,
)
const data = hexDecodeToBuffer('7baae571e4c2f4cfadb1931d5923aca7')
expect(hexEncode(await aes.encrypt(data))).eq('df5647dbb70bc393f2fb05b72f42286f')
expect(hexEncode(await aes.encrypt(data))).eq('3917147082672516b3177150129bc579')
expect(hexEncode(await aes.encrypt(data))).eq('2a7a9089270a5de45d5e3dd399cac725')
expect(hexEncode(await aes.encrypt(data))).eq('56d085217771398ac13583de4d677dd8')
expect(hexEncode(await aes.encrypt(data))).eq('cc639b488126cf36e79c4515e8012b92')
expect(hexEncode(await aes.encrypt(data))).eq('01384d100646cd562cc5586ec3f8f8c4')
expect(hexEncode(aes.process(data))).eq('df5647dbb70bc393f2fb05b72f42286f')
expect(hexEncode(aes.process(data))).eq('3917147082672516b3177150129bc579')
expect(hexEncode(aes.process(data))).eq('2a7a9089270a5de45d5e3dd399cac725')
expect(hexEncode(aes.process(data))).eq('56d085217771398ac13583de4d677dd8')
expect(hexEncode(aes.process(data))).eq('cc639b488126cf36e79c4515e8012b92')
expect(hexEncode(aes.process(data))).eq('01384d100646cd562cc5586ec3f8f8c4')
aes = await c.createAesCtr(
aes.close?.()
aes = c.createAesCtr(
hexDecodeToBuffer('d450aae0bf0060a4af1044886b42a13f7c506b35255d134a7e87ab3f23a9493b'),
hexDecodeToBuffer('0182de2bd789c295c3c6c875c5e9e190'),
false,
)
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('df5647dbb70bc393f2fb05b72f42286f')))).eq(hexEncode(data))
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('3917147082672516b3177150129bc579')))).eq(hexEncode(data))
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('2a7a9089270a5de45d5e3dd399cac725')))).eq(hexEncode(data))
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('56d085217771398ac13583de4d677dd8')))).eq(hexEncode(data))
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('cc639b488126cf36e79c4515e8012b92')))).eq(hexEncode(data))
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('01384d100646cd562cc5586ec3f8f8c4')))).eq(hexEncode(data))
expect(hexEncode(aes.process(hexDecodeToBuffer('df5647dbb70bc393f2fb05b72f42286f')))).eq(hexEncode(data))
expect(hexEncode(aes.process(hexDecodeToBuffer('3917147082672516b3177150129bc579')))).eq(hexEncode(data))
expect(hexEncode(aes.process(hexDecodeToBuffer('2a7a9089270a5de45d5e3dd399cac725')))).eq(hexEncode(data))
expect(hexEncode(aes.process(hexDecodeToBuffer('56d085217771398ac13583de4d677dd8')))).eq(hexEncode(data))
expect(hexEncode(aes.process(hexDecodeToBuffer('cc639b488126cf36e79c4515e8012b92')))).eq(hexEncode(data))
expect(hexEncode(aes.process(hexDecodeToBuffer('01384d100646cd562cc5586ec3f8f8c4')))).eq(hexEncode(data))
aes.close?.()
})
it('should encrypt and decrypt aes-ecb', async () => {
let aes = await c.createAesEcb(
hexDecodeToBuffer('d450aae0bf0060a4af1044886b42a13f7c506b35255d134a7e87ab3f23a9493b'),
)
expect(hexEncode(await aes.encrypt(hexDecodeToBuffer('f71eed6018f1ef976d39c19f9d29fd29')))).eq(
'038ef30acb438b64159f484aec541fd2',
)
expect(hexEncode(await aes.encrypt(hexDecodeToBuffer('f71eed6018f1ef976d39c19f9d29fd29')))).eq(
'038ef30acb438b64159f484aec541fd2',
)
expect(hexEncode(await aes.encrypt(hexDecodeToBuffer('460af382084b7960d2e9f3bca4cdc25b')))).eq(
'29c3af710c3c56f7fbb97ca06af3b974',
)
aes = await c.createAesEcb(
hexDecodeToBuffer('d450aae0bf0060a4af1044886b42a13f7c506b35255d134a7e87ab3f23a9493b'),
)
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('038ef30acb438b64159f484aec541fd2')))).eq(
'f71eed6018f1ef976d39c19f9d29fd29',
)
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('038ef30acb438b64159f484aec541fd2')))).eq(
'f71eed6018f1ef976d39c19f9d29fd29',
)
expect(hexEncode(await aes.decrypt(hexDecodeToBuffer('29c3af710c3c56f7fbb97ca06af3b974')))).eq(
'460af382084b7960d2e9f3bca4cdc25b',
)
})
it('should encrypt and decrypt aes-ige', async () => {
const aes = await c.createAesIge(
it('should encrypt and decrypt aes-ige', () => {
const aes = c.createAesIge(
hexDecodeToBuffer('5468697320697320616E20696D706C655468697320697320616E20696D706C65'),
hexDecodeToBuffer('6D656E746174696F6E206F6620494745206D6F646520666F72204F70656E5353'),
)
expect(
hexEncode(
await aes.encrypt(
hexDecodeToBuffer('99706487a1cde613bc6de0b6f24b1c7aa448c8b9c3403e3467a8cad89340f53b'),
),
aes.encrypt(hexDecodeToBuffer('99706487a1cde613bc6de0b6f24b1c7aa448c8b9c3403e3467a8cad89340f53b')),
),
).to.eq('792ea8ae577b1a66cb3bd92679b8030ca54ee631976bd3a04547fdcb4639fa69')
expect(
hexEncode(
await aes.decrypt(
hexDecodeToBuffer('792ea8ae577b1a66cb3bd92679b8030ca54ee631976bd3a04547fdcb4639fa69'),
),
aes.decrypt(hexDecodeToBuffer('792ea8ae577b1a66cb3bd92679b8030ca54ee631976bd3a04547fdcb4639fa69')),
),
).to.eq('99706487a1cde613bc6de0b6f24b1c7aa448c8b9c3403e3467a8cad89340f53b')
})
@ -137,12 +109,12 @@ describe('NodeCryptoProvider', () => {
testCryptoProvider(new NodeCryptoProvider())
})
describe('SubtleCryptoProvider', () => {
describe('WebCryptoProvider', () => {
if (typeof crypto.subtle === 'undefined') {
console.warn('Skipping SubtleCryptoProvider tests')
console.warn('Skipping WebCryptoProvider tests')
return
}
testCryptoProvider(new SubtleCryptoProvider(crypto.subtle))
testCryptoProvider(new WebCryptoProvider({ subtle: crypto.subtle }))
})

View file

@ -1,7 +1,7 @@
import { expect } from 'chai'
import { describe, it } from 'mocha'
import { NodeCryptoProvider } from '../src/utils/crypto/node-crypto.js'
import { NodeCryptoProvider } from '../src/utils/crypto/node.js'
import { parsePublicKey } from '../src/utils/index.js'
const crypto = new NodeCryptoProvider()

View file

@ -9,7 +9,7 @@ import {
createAesIgeForMessageOld,
generateKeyAndIvFromNonce,
} from '../src/utils/crypto/mtproto.js'
import { NodeCryptoProvider } from '../src/utils/crypto/node-crypto.js'
import { NodeCryptoProvider } from '../src/utils/crypto/node.js'
chai.use(spies)

View file

@ -1,2 +1,2 @@
.vs
build
build

View file

@ -1,4 +1,4 @@
import { NodeCryptoProvider } from '@mtcute/core/src/utils/crypto/node-crypto.js'
import { BaseNodeCryptoProvider } from '@mtcute/core/src/utils/crypto/node.js'
import { IEncryptionScheme } from '@mtcute/core/utils.js'
import { native } from './native.cjs'
@ -13,7 +13,7 @@ const { ige256_decrypt, ige256_encrypt } = native
* Other modes are supported natively by OpenSSL, and
* they *are* faster than the custom ones.
*/
export class NodeNativeCryptoProvider extends NodeCryptoProvider {
export class NodeNativeCryptoProvider extends BaseNodeCryptoProvider {
createAesIge(key: Uint8Array, iv: Uint8Array): IEncryptionScheme {
return {
encrypt(data: Uint8Array): Uint8Array {

View file

@ -1,5 +0,0 @@
# This is not a package
This is just a bunch of C files that are imported in `crypto-*` packages.
## Acknowledgements
This code is based on [pyrogram/tgcrypto](https://github.com/pyrogram/tgcrypto)

View file

@ -1,36 +0,0 @@
#include "aes256.h"
void cbc256_encrypt(uint8_t* in, size_t length, uint8_t* key, uint8_t* iv, uint8_t* out) {
uint32_t expandedKey[EXPANDED_KEY_SIZE];
uint32_t i, j;
uint8_t* currentIv = iv;
aes256_set_encryption_key(key, expandedKey);
for (i = 0; i < length; i += AES_BLOCK_SIZE) {
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[i + j] = in[i + j] ^ currentIv[j];
aes256_encrypt(&out[i], &out[i], expandedKey);
currentIv = &out[i];
}
}
void cbc256_decrypt(uint8_t* in, size_t length, uint8_t* key, uint8_t* iv, uint8_t* out) {
uint32_t expandedKey[EXPANDED_KEY_SIZE];
uint32_t i, j;
uint8_t* currentIv = iv;
aes256_set_decryption_key(key, expandedKey);
for (i = 0; i < length; i += AES_BLOCK_SIZE) {
aes256_decrypt(&in[i], &out[i], expandedKey);
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[i + j] ^= currentIv[j];
currentIv = &in[i];
}
}

View file

@ -1,17 +0,0 @@
#include <stdint.h>
#ifndef CBC256_H
#define CBC256_H
#ifdef __cplusplus
extern "C" {
#endif
void cbc256_encrypt(uint8_t* in, size_t length, uint8_t* key, uint8_t* iv, uint8_t* out);
void cbc256_decrypt(uint8_t* in, size_t length, uint8_t* key, uint8_t* iv, uint8_t* out);
#ifdef __cplusplus
}
#endif
#endif // CBC256_H

View file

@ -1,32 +0,0 @@
#include "aes256.h"
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
void ctr256(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uint8_t* counter, uint8_t* out) {
uint8_t chunk[AES_BLOCK_SIZE];
uint32_t expandedKey[EXPANDED_KEY_SIZE];
uint32_t i, j, k;
memcpy(out, in, length);
aes256_set_encryption_key(key, expandedKey);
aes256_encrypt(iv, chunk, expandedKey);
for (i = 0; i < length; i += AES_BLOCK_SIZE) {
for (j = 0; j < MIN(length - i, AES_BLOCK_SIZE); ++j) {
out[i + j] ^= chunk[(*counter)++];
if (*counter >= AES_BLOCK_SIZE)
*counter = 0;
if (*counter == 0) {
k = AES_BLOCK_SIZE;
while(k--)
if (++iv[k])
break;
aes256_encrypt(iv, chunk, expandedKey);
}
}
}
}

View file

@ -1,8 +0,0 @@
#include <stdint.h>
#ifndef CTR256_H
#define CTR256_H
extern "C" uint8_t* ctr256(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uint8_t* state, uint8_t* out);
#endif // CTR256_H

View file

@ -17,9 +17,7 @@
"./cjs/encodings/hex.js": "./cjs/encodings/hex.web.js",
"./esm/encodings/hex.js": "./esm/encodings/hex.web.js",
"./cjs/encodings/utf8.js": "./cjs/encodings/utf8.web.js",
"./esm/encodings/utf8.js": "./esm/encodings/utf8.web.js",
"./cjs/encodings/gzip.js": "./cjs/encodings/gzip.web.js",
"./esm/encodings/gzip.js": "./esm/encodings/gzip.web.js"
"./esm/encodings/utf8.js": "./esm/encodings/utf8.web.js"
},
"distOnlyFields": {
"exports": {
@ -31,10 +29,6 @@
},
"main": "src/index.ts",
"dependencies": {
"long": "5.2.3",
"pako": "2.1.0"
},
"devDependencies": {
"@types/pako": "2.0.0"
"long": "5.2.3"
}
}

View file

@ -1,39 +0,0 @@
/* eslint-disable no-restricted-globals */
import { deflateSync, gunzipSync } from 'node:zlib'
/**
* Decompress a buffer with gzip.
* @param buf Buffer to decompress
*/
export function gzipInflate(buf: Uint8Array): Uint8Array {
return gunzipSync(buf)
}
/**
* Compress a buffer with gzip.
*
* @param buf Buffer to compress
* @param maxRatio
* Maximum compression ratio. If the resulting buffer is smaller than
* `buf.length * ratio`, `null` is returned.
*/
export function gzipDeflate(buf: ArrayBuffer, maxRatio?: number): Buffer | null {
if (maxRatio) {
try {
return deflateSync(buf, {
maxOutputLength: Math.floor(buf.byteLength * maxRatio),
})
// hot path, avoid additional runtime checks
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
if (e.code === 'ERR_BUFFER_TOO_LARGE') {
return null
}
throw e
}
}
return deflateSync(buf)
}

View file

@ -1,40 +0,0 @@
import { Data, Deflate, inflate } from 'pako'
export function gzipInflate(buf: Uint8Array): Uint8Array {
return inflate(buf)
}
const ERROR_SIZE_LIMIT_REACHED = 'ERR_SIZE_LIMIT_REACHED'
class DeflateLimited extends Deflate {
constructor(readonly limit: number) {
super()
}
_size = 0
onData(chunk: Data) {
this._size += (chunk as Uint8Array).length
if (this._size > this.limit) {
// caught locally
// eslint-disable-next-line @typescript-eslint/no-throw-literal
throw ERROR_SIZE_LIMIT_REACHED
}
super.onData(chunk)
}
}
export function gzipDeflate(buf: Uint8Array, maxRatio?: number): Uint8Array | null {
const deflator = maxRatio ? new DeflateLimited(Math.floor(buf.length * maxRatio)) : new Deflate()
try {
deflator.push(buf, true)
} catch (e) {
if (e === ERROR_SIZE_LIMIT_REACHED) return null
throw e
}
return deflator.result
}

View file

@ -1,4 +1,3 @@
export * from './base64.js'
export * from './gzip.js'
export * from './hex.js'
export * from './utf8.js'

View file

@ -1,5 +1,4 @@
export * from './encodings/base64.js'
export * from './encodings/gzip.js'
export * from './encodings/hex.js'
export * from './encodings/utf8.js'
export * from './reader.js'

View file

@ -1,6 +1,5 @@
import Long from 'long'
import { gzipInflate } from './encodings/gzip.js'
import { hexEncode } from './encodings/hex.js'
import { utf8Decode } from './encodings/utf8.js'
@ -178,13 +177,10 @@ export class TlBinaryReader {
return utf8Decode(this.bytes())
}
object(): unknown {
const id = this.uint()
object(id = this.uint()): unknown {
if (id === 0x1cb5c415 /* vector */) {
return this.vector(this.object, true)
}
if (id === 0x3072cfa1 /* gzip_packed */) return this.gzip()
if (id === 0xbc799737 /* boolFalse */) return false
if (id === 0x997275b5 /* boolTrue */) return true
// unsure if it is actually used in the wire, seems like it's only used for boolean flags
@ -209,10 +205,6 @@ export class TlBinaryReader {
return reader(this)
}
gzip(): unknown {
return new TlBinaryReader(this.objectsMap, gzipInflate(this.bytes())).object()
}
vector(reader = this.object, bare = false): unknown[] {
if (!bare) {
const uint = this.uint()

View file

@ -3,7 +3,7 @@ import { writeFile } from 'fs/promises'
import { join } from 'path'
import readline from 'readline'
import { NodeCryptoProvider } from '@mtcute/core/src/utils/crypto/node-crypto.js'
import { NodeCryptoProvider } from '@mtcute/core/src/utils/crypto/node.js'
import { parsePublicKey } from '@mtcute/core/utils.js'
import { TlPublicKey } from '../binary/rsa-keys.js'

1
packages/wasm/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
build

19
packages/wasm/README.md Normal file
View file

@ -0,0 +1,19 @@
# @mtcute/wasm
📖 [API Reference](https://ref.mtcute.dev/modules/_mtcute_wasm.html)
Highly optimized for size & speed WASM implementation of common algorithms used in Telegram.
## Features
- **Super lightweight**: Only 45 KB raw, 22 KB gzipped
- **Blazingly fast**: Up to 10x faster than pure JS implementations
- Implements AES IGE and Deflate (zlib compression + gunzip), which are not available in some environments (e.g. web)
## Acknowledgements
- Deflate is implemented through a modified version of [libdeflate](https://github.com/ebiggers/libdeflate), MIT license.
- Modified by [kamillaova](https://github.com/kamillaova) to support WASM and improve bundle size
- AES IGE code is mostly based on [tgcrypto](https://github.com/pyrogram/tgcrypto), LGPL-3.0 license.
- To comply with LGPL-3.0, the source code of the modified tgcrypto is available [here](./lib/crypto/) under LGPL-3.0 license.
## Benchmarks
See https://github.com/mtcute/benchmarks

View file

@ -0,0 +1,20 @@
// /* eslint-disable no-console */
// import * cp from 'child_process'
// import * as fs from 'fs'
// import { join } from 'path'
// const root = new URL('.', import.meta.url).pathname
module.exports = ({ path: { join }, fs, outDir, packageDir, transformFile }) => ({
esmOnlyDirectives: true,
final() {
const fixWasmPath = (path) => {
transformFile(join(outDir, path), (data) => data.replace('../lib/mtcute.wasm', '../mtcute.wasm'))
}
fixWasmPath('cjs/init.js')
fixWasmPath('esm/init.js')
fs.cpSync(join(packageDir, 'lib/mtcute.wasm'), join(outDir, 'mtcute.wasm'))
},
})

View file

@ -0,0 +1,14 @@
FROM alpine:3.18.4 AS build
WORKDIR /src
RUN apk add --no-cache lld make clang16 binaryen
COPY crypto /src/crypto
COPY libdeflate /src/libdeflate
COPY *.h *.c Makefile /src/
RUN ZLIB_COMPRESSION_API=1 GZIP_DECOMPRESSION_API=1 IGE_API=1 CTR_API=1 make
FROM scratch AS binaries
COPY --from=build /src/mtcute.wasm /

View file

@ -0,0 +1,75 @@
.PHONY: all clean
DEFAULT_API ?= 0
DEFLATE_COMPRESSION_API ?= $(DEFAULT_API)
DEFLATE_DECOMPRESSION_API ?= $(DEFAULT_API)
GZIP_COMPRESSION_API ?= $(DEFAULT_API)
GZIP_DECOMPRESSION_API ?= $(DEFAULT_API)
ZLIB_COMPRESSION_API ?= $(DEFAULT_API)
ZLIB_DECOMPRESSION_API ?= $(DEFAULT_API)
CRC32_API ?= $(DEFAULT_API)
ADLER32_API ?= $(DEFAULT_API)
IGE_API ?= $(DEFAULT_API)
CTR_API ?= $(DEFAULT_API)
CRC32 ?= 0
LOGGING ?= 0
_DEFLATE_COMPRESSION := 1
_DEFLATE_DECOMPRESSION := 1
_ADLER32 := $(findstring 1, $(ZLIB_COMPRESSION_API)$(ZLIB_DECOMPRESSION_API))
_AES := $(findstring 1, $(IGE_API)$(CTR_API))
SOURCES = utils.c \
$(if $(filter 1, $(_DEFLATE_COMPRESSION)), libdeflate/deflate_compress.c) \
$(if $(filter 1, $(_DEFLATE_DECOMPRESSION)), libdeflate/deflate_decompress.c) \
$(if $(filter 1, $(GZIP_COMPRESSION_API)), libdeflate/gzip_compress.c) \
$(if $(filter 1, $(GZIP_DECOMPRESSION_API)), libdeflate/gzip_decompress.c) \
$(if $(filter 1, $(ZLIB_COMPRESSION_API)), libdeflate/zlib_compress.c) \
$(if $(filter 1, $(ZLIB_DECOMPRESSION_API)), libdeflate/zlib_decompress.c) \
$(if $(filter 1, $(CRC32)), libdeflate/crc32.c) \
$(if $(filter 1, $(_ADLER32)), libdeflate/adler32.c) \
$(if $(filter 1, $(_AES)), crypto/aes256.c) \
$(if $(filter 1, $(IGE_API)), crypto/ige256.c) \
$(if $(filter 1, $(CTR_API)), crypto/ctr256.c)
CC := clang
CFLAGS_WASM := \
-target wasm32-unknown-unknown \
-nostdlib -ffreestanding -DFREESTANDING \
$(if $(filter 1, $(LOGGING)), -DLOGGING) \
-mbulk-memory \
-Wl,--no-entry,--export-dynamic,--lto-O3
CFLAGS := $(CFLAGS_WASM) \
-O3 \
-Qn \
-DNDEBUG \
-mno-exception-handling \
-fdelete-null-pointer-checks \
-fno-stack-protector \
-flto=full \
-fdata-sections \
-ffunction-sections \
-Wl,--gc-sections \
-fno-inline \
-fno-unroll-loops
ifneq ($(OS),Windows_NT)
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
export PATH := /opt/homebrew/opt/llvm/bin/:$(PATH)
endif
endif
OUT := mtcute.wasm
$(OUT): $(SOURCES)
$(CC) $(CFLAGS) -I . -o $@ $^
clean:
rm -f $(OUT)
all: $(OUT)

View file

@ -0,0 +1,686 @@
/*
* common_defs.h
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef COMMON_DEFS_H
#define COMMON_DEFS_H
#include "libdeflate.h"
#include <stdbool.h>
#include <stddef.h> /* for size_t */
#include <stdint.h>
#ifdef _MSC_VER
# include <intrin.h> /* for _BitScan*() and other intrinsics */
# include <stdlib.h> /* for _byteswap_*() */
/* Disable MSVC warnings that are expected. */
/* /W2 */
# pragma warning(disable : 4146) /* unary minus on unsigned type */
/* /W3 */
# pragma warning(disable : 4018) /* signed/unsigned mismatch */
# pragma warning(disable : 4244) /* possible loss of data */
# pragma warning(disable : 4267) /* possible loss of precision */
# pragma warning(disable : 4310) /* cast truncates constant value */
/* /W4 */
# pragma warning(disable : 4100) /* unreferenced formal parameter */
# pragma warning(disable : 4127) /* conditional expression is constant */
# pragma warning(disable : 4189) /* local variable initialized but not referenced */
# pragma warning(disable : 4232) /* nonstandard extension used */
# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */
# pragma warning(disable : 4295) /* array too small to include terminating null */
#endif
/* ========================================================================== */
/* Target architecture */
/* ========================================================================== */
/* If possible, define a compiler-independent ARCH_* macro. */
#undef ARCH_X86_64
#undef ARCH_X86_32
#undef ARCH_ARM64
#undef ARCH_ARM32
#ifdef _MSC_VER
# if defined(_M_X64)
# define ARCH_X86_64
# elif defined(_M_IX86)
# define ARCH_X86_32
# elif defined(_M_ARM64)
# define ARCH_ARM64
# elif defined(_M_ARM)
# define ARCH_ARM32
# endif
#else
# if defined(__x86_64__)
# define ARCH_X86_64
# elif defined(__i386__)
# define ARCH_X86_32
# elif defined(__aarch64__)
# define ARCH_ARM64
# elif defined(__arm__)
# define ARCH_ARM32
# endif
#endif
/* ========================================================================== */
/* Type definitions */
/* ========================================================================== */
/* Fixed-width integer types */
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
/* ssize_t, if not available in <sys/types.h> */
#ifdef _MSC_VER
# ifdef _WIN64
typedef long long ssize_t;
# else
typedef long ssize_t;
# endif
#endif
/*
* Word type of the target architecture. Use 'size_t' instead of
* 'unsigned long' to account for platforms such as Windows that use 32-bit
* 'unsigned long' on 64-bit architectures.
*/
typedef size_t machine_word_t;
/* Number of bytes in a word */
#define WORDBYTES ((int)sizeof(machine_word_t))
/* Number of bits in a word */
#define WORDBITS (8 * WORDBYTES)
/* ========================================================================== */
/* Optional compiler features */
/* ========================================================================== */
/* Compiler version checks. Only use when absolutely necessary. */
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
# define GCC_PREREQ(major, minor) \
(__GNUC__ > (major) || \
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#else
# define GCC_PREREQ(major, minor) 0
#endif
#ifdef __clang__
# ifdef __apple_build_version__
# define CLANG_PREREQ(major, minor, apple_version) \
(__apple_build_version__ >= (apple_version))
# else
# define CLANG_PREREQ(major, minor, apple_version) \
(__clang_major__ > (major) || \
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
# endif
#else
# define CLANG_PREREQ(major, minor, apple_version) 0
#endif
/*
* Macros to check for compiler support for attributes and builtins. clang
* implements these macros, but gcc doesn't, so generally any use of one of
* these macros must also be combined with a gcc version check.
*/
#ifndef __has_attribute
# define __has_attribute(attribute) 0
#endif
#ifndef __has_builtin
# define __has_builtin(builtin) 0
#endif
/*
* restrict - hint that writes only occur through the given pointer.
*
* Don't use MSVC's __restrict, since it has nonstandard behavior.
* Standard restrict is okay, if it is supported.
*/
#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L)
# if defined(__GNUC__) || defined(__clang__)
# define restrict __restrict__
# else
# define restrict
# endif
#endif /* else assume 'restrict' is usable as-is */
/* likely(expr) - hint that an expression is usually true */
#if defined(__GNUC__) || __has_builtin(__builtin_expect)
# define likely(expr) __builtin_expect(!!(expr), 1)
#else
# define likely(expr) (expr)
#endif
/* unlikely(expr) - hint that an expression is usually false */
#if defined(__GNUC__) || __has_builtin(__builtin_expect)
# define unlikely(expr) __builtin_expect(!!(expr), 0)
#else
# define unlikely(expr) (expr)
#endif
/* prefetchr(addr) - prefetch into L1 cache for read */
#undef prefetchr
#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
# define prefetchr(addr) __builtin_prefetch((addr), 0)
#elif defined(_MSC_VER)
# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
# elif defined(ARCH_ARM64)
# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */)
# elif defined(ARCH_ARM32)
# define prefetchr(addr) __prefetch(addr)
# endif
#endif
#ifndef prefetchr
# define prefetchr(addr)
#endif
/* prefetchw(addr) - prefetch into L1 cache for write */
#undef prefetchw
#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
# define prefetchw(addr) __builtin_prefetch((addr), 1)
#elif defined(_MSC_VER)
# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
# define prefetchw(addr) _m_prefetchw(addr)
# elif defined(ARCH_ARM64)
# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */)
# elif defined(ARCH_ARM32)
# define prefetchw(addr) __prefetchw(addr)
# endif
#endif
#ifndef prefetchw
# define prefetchw(addr)
#endif
/*
* _aligned_attribute(n) - declare that the annotated variable, or variables of
* the annotated type, must be aligned on n-byte boundaries.
*/
#undef _aligned_attribute
#if defined(__GNUC__) || __has_attribute(aligned)
# define _aligned_attribute(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER)
# define _aligned_attribute(n) __declspec(align(n))
#endif
/*
* _target_attribute(attrs) - override the compilation target for a function.
*
* This accepts one or more comma-separated suffixes to the -m prefix jointly
* forming the name of a machine-dependent option. On gcc-like compilers, this
* enables codegen for the given targets, including arbitrary compiler-generated
* code as well as the corresponding intrinsics. On other compilers this macro
* expands to nothing, though MSVC allows intrinsics to be used anywhere anyway.
*/
#if GCC_PREREQ(4, 4) || __has_attribute(target)
# define _target_attribute(attrs) __attribute__((target(attrs)))
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1
#else
# define _target_attribute(attrs)
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
#endif
/* ========================================================================== */
/* Miscellaneous macros */
/* ========================================================================== */
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
/* ========================================================================== */
/* Endianness handling */
/* ========================================================================== */
/*
* CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big
* endian. When possible this is a compile-time macro that can be used in
* preprocessor conditionals. As a fallback, a generic method is used that
* can't be used in preprocessor conditionals but should still be optimized out.
*/
#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */
# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#elif defined(_MSC_VER)
# define CPU_IS_LITTLE_ENDIAN() true
#else
static bool CPU_IS_LITTLE_ENDIAN(void)
{
union {
u32 w;
u8 b;
} u;
u.w = 1;
return u.b;
}
#endif
/* bswap16(v) - swap the bytes of a 16-bit integer */
static u16 bswap16(u16 v)
{
#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
return __builtin_bswap16(v);
#elif defined(_MSC_VER)
return _byteswap_ushort(v);
#else
return (v << 8) | (v >> 8);
#endif
}
/* bswap32(v) - swap the bytes of a 32-bit integer */
static u32 bswap32(u32 v)
{
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
return __builtin_bswap32(v);
#elif defined(_MSC_VER)
return _byteswap_ulong(v);
#else
return ((v & 0x000000FF) << 24) |
((v & 0x0000FF00) << 8) |
((v & 0x00FF0000) >> 8) |
((v & 0xFF000000) >> 24);
#endif
}
/* bswap64(v) - swap the bytes of a 64-bit integer */
static u64 bswap64(u64 v)
{
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
return __builtin_bswap64(v);
#elif defined(_MSC_VER)
return _byteswap_uint64(v);
#else
return ((v & 0x00000000000000FF) << 56) |
((v & 0x000000000000FF00) << 40) |
((v & 0x0000000000FF0000) << 24) |
((v & 0x00000000FF000000) << 8) |
((v & 0x000000FF00000000) >> 8) |
((v & 0x0000FF0000000000) >> 24) |
((v & 0x00FF000000000000) >> 40) |
((v & 0xFF00000000000000) >> 56);
#endif
}
#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
/* ========================================================================== */
/* Unaligned memory accesses */
/* ========================================================================== */
/*
* UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed
* efficiently on the target platform, otherwise 0.
*/
#if (defined(__GNUC__) || defined(__clang__)) && \
(defined(ARCH_X86_64) || defined(ARCH_X86_32) || \
defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
/*
* For all compilation purposes, WebAssembly behaves like any other CPU
* instruction set. Even though WebAssembly engine might be running on
* top of different actual CPU architectures, the WebAssembly spec
* itself permits unaligned access and it will be fast on most of those
* platforms, and simulated at the engine level on others, so it's
* worth treating it as a CPU architecture with fast unaligned access.
*/ defined(__wasm__))
# define UNALIGNED_ACCESS_IS_FAST 1
#elif defined(_MSC_VER)
# define UNALIGNED_ACCESS_IS_FAST 1
#else
# define UNALIGNED_ACCESS_IS_FAST 0
#endif
/*
* Implementing unaligned memory accesses using memcpy() is portable, and it
* usually gets optimized appropriately by modern compilers. I.e., each
* memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store
* instruction, not to an actual function call.
*
* We no longer use the "packed struct" approach to unaligned accesses, as that
* is nonstandard, has unclear semantics, and doesn't receive enough testing
* (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
*
* arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
* where memcpy() generates inefficient code
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
* consider that one case important enough to maintain different code for.
* If you run into it, please just use a newer version of gcc (or use clang).
*/
/* Unaligned loads and stores without endianness conversion */
#define DEFINE_UNALIGNED_TYPE(type) \
static type \
load_##type##_unaligned(const void *p) \
{ \
type v; \
\
__builtin_memcpy(&v, p, sizeof(v)); \
return v; \
} \
\
static void \
store_##type##_unaligned(type v, void *p) \
{ \
__builtin_memcpy(p, &v, sizeof(v)); \
}
DEFINE_UNALIGNED_TYPE(u16)
DEFINE_UNALIGNED_TYPE(u32)
DEFINE_UNALIGNED_TYPE(u64)
DEFINE_UNALIGNED_TYPE(machine_word_t)
#define load_word_unaligned load_machine_word_t_unaligned
#define store_word_unaligned store_machine_word_t_unaligned
/* Unaligned loads with endianness conversion */
static u16
get_unaligned_le16(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return le16_bswap(load_u16_unaligned(p));
else
return ((u16)p[1] << 8) | p[0];
}
static u16
get_unaligned_be16(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return be16_bswap(load_u16_unaligned(p));
else
return ((u16)p[0] << 8) | p[1];
}
static u32
get_unaligned_le32(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return le32_bswap(load_u32_unaligned(p));
else
return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
((u32)p[1] << 8) | p[0];
}
static u32
get_unaligned_be32(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return be32_bswap(load_u32_unaligned(p));
else
return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
((u32)p[2] << 8) | p[3];
}
static u64
get_unaligned_le64(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return le64_bswap(load_u64_unaligned(p));
else
return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
((u64)p[5] << 40) | ((u64)p[4] << 32) |
((u64)p[3] << 24) | ((u64)p[2] << 16) |
((u64)p[1] << 8) | p[0];
}
static machine_word_t
get_unaligned_leword(const u8 *p)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return get_unaligned_le32(p);
else
return get_unaligned_le64(p);
}
/* Unaligned stores with endianness conversion */
static void
put_unaligned_le16(u16 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u16_unaligned(le16_bswap(v), p);
} else {
p[0] = (u8)(v >> 0);
p[1] = (u8)(v >> 8);
}
}
static void
put_unaligned_be16(u16 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u16_unaligned(be16_bswap(v), p);
} else {
p[0] = (u8)(v >> 8);
p[1] = (u8)(v >> 0);
}
}
static void
put_unaligned_le32(u32 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u32_unaligned(le32_bswap(v), p);
} else {
p[0] = (u8)(v >> 0);
p[1] = (u8)(v >> 8);
p[2] = (u8)(v >> 16);
p[3] = (u8)(v >> 24);
}
}
static void
put_unaligned_be32(u32 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u32_unaligned(be32_bswap(v), p);
} else {
p[0] = (u8)(v >> 24);
p[1] = (u8)(v >> 16);
p[2] = (u8)(v >> 8);
p[3] = (u8)(v >> 0);
}
}
static void
put_unaligned_le64(u64 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u64_unaligned(le64_bswap(v), p);
} else {
p[0] = (u8)(v >> 0);
p[1] = (u8)(v >> 8);
p[2] = (u8)(v >> 16);
p[3] = (u8)(v >> 24);
p[4] = (u8)(v >> 32);
p[5] = (u8)(v >> 40);
p[6] = (u8)(v >> 48);
p[7] = (u8)(v >> 56);
}
}
static void
put_unaligned_leword(machine_word_t v, u8 *p)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
put_unaligned_le32(v, p);
else
put_unaligned_le64(v, p);
}
/* ========================================================================== */
/* Bit manipulation functions */
/* ========================================================================== */
/*
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
* significant end) of the *most* significant 1 bit in the input value. The
* input value must be nonzero!
*/
static unsigned
bsr32(u32 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_clz)
return 31 - __builtin_clz(v);
#elif defined(_MSC_VER)
unsigned long i;
_BitScanReverse(&i, v);
return i;
#else
unsigned i = 0;
while ((v >>= 1) != 0)
i++;
return i;
#endif
}
static unsigned
bsr64(u64 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_clzll)
return 63 - __builtin_clzll(v);
#elif defined(_MSC_VER) && defined(_WIN64)
unsigned long i;
_BitScanReverse64(&i, v);
return i;
#else
unsigned i = 0;
while ((v >>= 1) != 0)
i++;
return i;
#endif
}
static unsigned
bsrw(machine_word_t v)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsr32(v);
else
return bsr64(v);
}
/*
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
* significant end) of the *least* significant 1 bit in the input value. The
* input value must be nonzero!
*/
static unsigned
bsf32(u32 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_ctz)
return __builtin_ctz(v);
#elif defined(_MSC_VER)
unsigned long i;
_BitScanForward(&i, v);
return i;
#else
unsigned i = 0;
for (; (v & 1) == 0; v >>= 1)
i++;
return i;
#endif
}
static unsigned
bsf64(u64 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_ctzll)
return __builtin_ctzll(v);
#elif defined(_MSC_VER) && defined(_WIN64)
unsigned long i;
_BitScanForward64(&i, v);
return i;
#else
unsigned i = 0;
for (; (v & 1) == 0; v >>= 1)
i++;
return i;
#endif
}
static unsigned
bsfw(machine_word_t v)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsf32(v);
else
return bsf64(v);
}
/*
* rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a
* fallback implementation; use '#ifdef rbit32' to check if this is available.
*/
#undef rbit32
#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \
(__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
static u32
rbit32(u32 v)
{
__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
return v;
}
#define rbit32 rbit32
#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64)
static u32
rbit32(u32 v)
{
__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
return v;
}
#define rbit32 rbit32
#endif
#endif /* COMMON_DEFS_H */

View file

@ -0,0 +1,165 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View file

@ -1,12 +1,11 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "lib_common.h"
#ifndef AES256_H
#define AES256_H
#define AES_BLOCK_SIZE 16
#define EXPANDED_KEY_SIZE 60
#define AES_EXPORT __attribute__((visibility("default")))
#ifdef __cplusplus
extern "C" {

View file

@ -0,0 +1,55 @@
#include "aes256.h"
struct ctr256_ctx {
uint32_t expandedKey[EXPANDED_KEY_SIZE];
uint8_t* iv;
uint8_t state;
};
AES_EXPORT struct ctr256_ctx* ctr256_alloc(uint8_t* key, uint8_t* iv) {
struct ctr256_ctx *state = (struct ctr256_ctx *) __malloc(sizeof(struct ctr256_ctx));
aes256_set_encryption_key(key, state->expandedKey);
__free(key);
state->iv = iv;
state->state = 0;
return state;
}
AES_EXPORT void ctr256_free(struct ctr256_ctx* ctx) {
__free(ctx->iv);
__free(ctx);
}
AES_EXPORT void ctr256(struct ctr256_ctx* ctx, uint8_t* in, uint32_t length, uint8_t *out) {
uint8_t chunk[AES_BLOCK_SIZE];
uint32_t* expandedKey = ctx->expandedKey;
uint8_t* iv = ctx->iv;
uint8_t state = ctx->state;
uint32_t i, j, k;
aes256_encrypt(iv, chunk, expandedKey);
for (i = 0; i < length; i += AES_BLOCK_SIZE) {
for (j = 0; j < MIN(length - i, AES_BLOCK_SIZE); ++j) {
out[i + j] = in[i + j] ^ chunk[state++];
if (state >= AES_BLOCK_SIZE)
state = 0;
if (state == 0) {
k = AES_BLOCK_SIZE;
while(k--)
if (++iv[k])
break;
aes256_encrypt(iv, chunk, expandedKey);
}
}
}
__free(in);
ctx->state = state;
}

View file

@ -0,0 +1,6 @@
#ifndef CTR256_H
#define CTR256_H
uint8_t *ctr256(const uint8_t in[], uint32_t length, const uint8_t key[32], uint8_t iv[16], uint8_t *state);
#endif

View file

@ -1,6 +1,6 @@
#include "aes256.h"
void ige256_encrypt(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uint8_t* out) {
AES_EXPORT void ige256_encrypt(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uint8_t* out) {
uint32_t expandedKey[EXPANDED_KEY_SIZE];
uint32_t i, j;
@ -29,7 +29,7 @@ void ige256_encrypt(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uin
}
}
void ige256_decrypt(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uint8_t* out) {
AES_EXPORT void ige256_decrypt(uint8_t* in, uint32_t length, uint8_t* key, uint8_t* iv, uint8_t* out) {
uint32_t expandedKey[EXPANDED_KEY_SIZE];
uint32_t i, j;

View file

@ -0,0 +1,62 @@
/*
* lib_common.h - internal header included by all library code
*/
#ifndef LIB_LIB_COMMON_H
#define LIB_LIB_COMMON_H
#ifdef LIBDEFLATE_H
/*
* When building the library, LIBDEFLATEAPI needs to be defined properly before
* including libdeflate.h.
*/
# error "lib_common.h must always be included before libdeflate.h"
#endif
#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
#elif defined(__GNUC__)
# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
#else
# define LIBDEFLATE_EXPORT_SYM
#endif
/*
* On i386, gcc assumes that the stack is 16-byte aligned at function entry.
* However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
* only guarantee 4-byte alignment when calling functions. This is mainly an
* issue on Windows, but it has been seen on Linux too. Work around this ABI
* incompatibility by realigning the stack pointer when entering libdeflate.
* This prevents crashes in SSE/AVX code.
*/
#if defined(__GNUC__) && defined(__i386__)
# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
#else
# define LIBDEFLATE_ALIGN_STACK
#endif
#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
#include "common_defs.h"
extern void* __malloc(size_t size);
extern void __free(void* ptr);
void *libdeflate_aligned_malloc(size_t alignment, size_t size);
void libdeflate_aligned_free(void *ptr);
#define ASSERT(expr) (void)(expr)
#define CONCAT_IMPL(a, b) a##b
#define CONCAT(a, b) CONCAT_IMPL(a, b)
#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
#ifdef LOGGING
void __debug(char* str);
#define DEBUG(str) __debug(str);
#else
#define DEBUG(str)
#endif
#endif /* LIB_LIB_COMMON_H */

View file

@ -0,0 +1,245 @@
/*
* libdeflate.h - public header for libdeflate
*/
#ifndef LIBDEFLATE_H
#define LIBDEFLATE_H
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define LIBDEFLATE_VERSION_MAJOR 1
#define LIBDEFLATE_VERSION_MINOR 19
#define LIBDEFLATE_VERSION_STRING "1.19"
/*
* Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
* __declspec(dllimport) to be used. This should be done when it's easy to do.
* Otherwise it's fine to skip it, since it is a very minor performance
* optimization that is irrelevant for most use cases of libdeflate.
*/
#ifndef LIBDEFLATEAPI
# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
# define LIBDEFLATEAPI __declspec(dllimport)
# else
# define LIBDEFLATEAPI
# endif
#endif
/* ========================================================================== */
/* Compression */
/* ========================================================================== */
struct libdeflate_compressor;
struct libdeflate_options;
/*
* libdeflate_alloc_compressor() allocates a new compressor that supports
* DEFLATE, zlib, and gzip compression. 'compression_level' is the compression
* level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
* medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
* "no compression", specifically "create a valid stream, but only emit
* uncompressed blocks" (this will expand the data slightly).
*
* The return value is a pointer to the new compressor, or NULL if out of memory
* or if the compression level is invalid (i.e. outside the range [0, 12]).
*
* Note: for compression, the sliding window size is defined at compilation time
* to 32768, the largest size permissible in the DEFLATE format. It cannot be
* changed at runtime.
*
* A single compressor is not safe to use by multiple threads concurrently.
* However, different threads may use different compressors concurrently.
*/
LIBDEFLATEAPI struct libdeflate_compressor *
libdeflate_alloc_compressor(int compression_level);
/*
* Like libdeflate_alloc_compressor(), but adds the 'options' argument.
*/
//LIBDEFLATEAPI struct libdeflate_compressor *
//libdeflate_alloc_compressor_ex(int compression_level,
// const struct libdeflate_options *options);
LIBDEFLATEAPI size_t
libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail);
//LIBDEFLATEAPI size_t
//libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
// size_t in_nbytes);
/*
* libdeflate_free_compressor() frees a compressor that was allocated with
* libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is
* taken.
*/
LIBDEFLATEAPI void
libdeflate_free_compressor(struct libdeflate_compressor *compressor);
/* ========================================================================== */
/* Decompression */
/* ========================================================================== */
struct libdeflate_decompressor;
struct libdeflate_options;
/*
* libdeflate_alloc_decompressor() allocates a new decompressor that can be used
* for DEFLATE, zlib, and gzip decompression. The return value is a pointer to
* the new decompressor, or NULL if out of memory.
*
* This function takes no parameters, and the returned decompressor is valid for
* decompressing data that was compressed at any compression level and with any
* sliding window size.
*
* A single decompressor is not safe to use by multiple threads concurrently.
* However, different threads may use different decompressors concurrently.
*/
LIBDEFLATEAPI struct libdeflate_decompressor *
libdeflate_alloc_decompressor(void);
/*
* Like libdeflate_alloc_decompressor(), but adds the 'options' argument.
*/
//LIBDEFLATEAPI struct libdeflate_decompressor *
//libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
/*
* Result of a call to libdeflate_deflate_decompress(),
* libdeflate_zlib_decompress(), or libdeflate_gzip_decompress().
*/
enum libdeflate_result {
/* Decompression was successful. */
LIBDEFLATE_SUCCESS = 0,
/* Decompression failed because the compressed data was invalid,
* corrupt, or otherwise unsupported. */
LIBDEFLATE_BAD_DATA = 1,
/* A NULL 'actual_out_nbytes_ret' was provided, but the data would have
* decompressed to fewer than 'out_nbytes_avail' bytes. */
LIBDEFLATE_SHORT_OUTPUT = 2,
/* The data would have decompressed to more than 'out_nbytes_avail'
* bytes. */
LIBDEFLATE_INSUFFICIENT_SPACE = 3,
};
/*
* libdeflate_deflate_decompress() decompresses a DEFLATE stream from the buffer
* 'in' with compressed size up to 'in_nbytes' bytes. The uncompressed data is
* written to 'out', a buffer with size 'out_nbytes_avail' bytes. If
* decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned. Otherwise,
* a nonzero result code such as LIBDEFLATE_BAD_DATA is returned, and the
* contents of the output buffer are undefined.
*
* Decompression stops at the end of the DEFLATE stream (as indicated by the
* BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes.
*
* libdeflate_deflate_decompress() can be used in cases where the actual
* uncompressed size is known (recommended) or unknown (not recommended):
*
* - If the actual uncompressed size is known, then pass the actual
* uncompressed size as 'out_nbytes_avail' and pass NULL for
* 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail
* with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the
* specified number of bytes.
*
* - If the actual uncompressed size is unknown, then provide a non-NULL
* 'actual_out_nbytes_ret' and provide a buffer with some size
* 'out_nbytes_avail' that you think is large enough to hold all the
* uncompressed data. In this case, if the data decompresses to less than
* or equal to 'out_nbytes_avail' bytes, then
* libdeflate_deflate_decompress() will write the actual uncompressed size
* to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise,
* it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was
* not large enough but no other problems were encountered, or another
* nonzero result code if decompression failed for another reason.
*/
//LIBDEFLATEAPI enum libdeflate_result
//libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
// const void *in, size_t in_nbytes,
// void *out, size_t out_nbytes_avail,
// size_t *actual_out_nbytes_ret);
/*
* Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret'
* argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL,
* then the actual compressed size of the DEFLATE stream (aligned to the next
* byte boundary) is written to *actual_in_nbytes_ret.
*/
enum libdeflate_result
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret,
size_t *actual_out_nbytes_ret);
/*
* Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format
* instead of raw DEFLATE.
*
* If multiple gzip-compressed members are concatenated, then only the first
* will be decompressed. Use libdeflate_gzip_decompress_ex() if you need
* multi-member support.
*/
LIBDEFLATEAPI enum libdeflate_result
libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail);
/*
* Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret'
* argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
* succeeds (indicating that the first gzip-compressed member in the input
* buffer was decompressed), then the actual number of input bytes consumed is
* written to *actual_in_nbytes_ret.
*/
//LIBDEFLATEAPI enum libdeflate_result
//libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
// const void *in, size_t in_nbytes,
// void *out, size_t out_nbytes_avail,
// size_t *actual_in_nbytes_ret,
// size_t *actual_out_nbytes_ret);
/*
* libdeflate_free_decompressor() frees a decompressor that was allocated with
* libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action
* is taken.
*/
LIBDEFLATEAPI void
libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
/*
* Advanced options. This is the options structure that
* libdeflate_alloc_compressor_ex() and libdeflate_alloc_decompressor_ex()
* require. Most users won't need this and should just use the non-"_ex"
* functions instead. If you do need this, it should be initialized like this:
*
* struct libdeflate_options options;
*
* __builtin_memset(&options, 0, sizeof(options));
* options.sizeof_options = sizeof(options);
* // Then set the fields that you need to override the defaults for.
*/
struct libdeflate_options {
/*
* This field must be set to the struct size. This field exists for
* extensibility, so that fields can be appended to this struct in
* future versions of libdeflate while still supporting old binaries.
*/
size_t sizeof_options;
};
#ifdef __cplusplus
}
#endif
#endif /* LIBDEFLATE_H */

View file

@ -0,0 +1,21 @@
Copyright 2016 Eric Biggers
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,123 @@
/*
* adler32.c - Adler-32 checksum algorithm
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "lib_common.h"
/* The Adler-32 divisor, or "base", value */
#define DIVISOR 65521
/*
* MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
* of s2 overflowing when it is represented as an unsigned 32-bit integer. This
* value was computed using the following Python script:
*
* divisor = 65521
* count = 0
* s1 = divisor - 1
* s2 = divisor - 1
* while True:
* s1 += 0xFF
* s2 += s1
* if s2 > 0xFFFFFFFF:
* break
* count += 1
* print(count)
*
* Note that to get the correct worst-case value, we must assume that every byte
* has value 0xFF and that s1 and s2 started with the highest possible values
* modulo the divisor.
*/
#define MAX_CHUNK_LEN 5552
static u32
adler32_generic(u32 adler, const u8 *p, size_t len)
{
u32 s1 = adler & 0xFFFF;
u32 s2 = adler >> 16;
const u8 * const end = p + len;
while (p != end) {
size_t chunk_len = MIN(end - p, MAX_CHUNK_LEN);
const u8 *chunk_end = p + chunk_len;
size_t num_unrolled_iterations = chunk_len / 4;
while (num_unrolled_iterations--) {
s1 += *p++;
s2 += s1;
s1 += *p++;
s2 += s1;
s1 += *p++;
s2 += s1;
s1 += *p++;
s2 += s1;
}
while (p != chunk_end) {
s1 += *p++;
s2 += s1;
}
s1 %= DIVISOR;
s2 %= DIVISOR;
}
return (s2 << 16) | s1;
}
/* Include architecture-specific implementation(s) if available. */
#undef DEFAULT_IMPL
#undef arch_select_adler32_func
typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len);
#define DEFAULT_IMPL adler32_generic
#ifdef arch_select_adler32_func
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);
static volatile adler32_func_t adler32_impl = dispatch_adler32;
/* Choose the best implementation at runtime. */
static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
{
adler32_func_t f = arch_select_adler32_func();
if (f == NULL)
f = DEFAULT_IMPL;
adler32_impl = f;
return f(adler, p, len);
}
#else
/* The best implementation is statically known, so call it directly. */
#define adler32_impl DEFAULT_IMPL
#endif
u32
libdeflate_adler32(u32 adler, const void *buffer, size_t len)
{
if (buffer == NULL) /* Return initial value. */
return 1;
return adler32_impl(adler, buffer, len);
}

View file

@ -0,0 +1,8 @@
#ifndef LIB_DEFLATE_ADLER32_H
#define LIB_DEFLATE_ADLER32_H
#include "lib_common.h"
u32 libdeflate_adler32(u32 adler, const void *buffer, size_t len);
#endif /* LIB_DEFLATE_ADLER32_H */

View file

@ -0,0 +1,342 @@
/*
* bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ----------------------------------------------------------------------------
*
* This is a Binary Trees (bt) based matchfinder.
*
* The main data structure is a hash table where each hash bucket contains a
* binary tree of sequences whose first 4 bytes share the same hash code. Each
* sequence is identified by its starting position in the input buffer. Each
* binary tree is always sorted such that each left child represents a sequence
* lexicographically lesser than its parent and each right child represents a
* sequence lexicographically greater than its parent.
*
* The algorithm processes the input buffer sequentially. At each byte
* position, the hash code of the first 4 bytes of the sequence beginning at
* that position (the sequence being matched against) is computed. This
* identifies the hash bucket to use for that position. Then, a new binary tree
* node is created to represent the current sequence. Then, in a single tree
* traversal, the hash bucket's binary tree is searched for matches and is
* re-rooted at the new node.
*
* Compared to the simpler algorithm that uses linked lists instead of binary
* trees (see hc_matchfinder.h), the binary tree version gains more information
* at each node visitation. Ideally, the binary tree version will examine only
* 'log(n)' nodes to find the same matches that the linked list version will
* find by examining 'n' nodes. In addition, the binary tree version can
* examine fewer bytes at each node by taking advantage of the common prefixes
* that result from the sort order, whereas the linked list version may have to
* examine up to the full length of the match at each node.
*
* However, it is not always best to use the binary tree version. It requires
* nearly twice as much memory as the linked list version, and it takes time to
* keep the binary trees sorted, even at positions where the compressor does not
* need matches. Generally, when doing fast compression on small buffers,
* binary trees are the wrong approach. They are best suited for thorough
* compression and/or large buffers.
*
* ----------------------------------------------------------------------------
*/
#ifndef LIB_BT_MATCHFINDER_H
#define LIB_BT_MATCHFINDER_H
#include "matchfinder_common.h"
#define BT_MATCHFINDER_HASH3_ORDER 16
#define BT_MATCHFINDER_HASH3_WAYS 2
#define BT_MATCHFINDER_HASH4_ORDER 16
#define BT_MATCHFINDER_TOTAL_HASH_SIZE \
(((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
(1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
/* Representation of a match found by the bt_matchfinder */
struct lz_match {
/* The number of bytes matched. */
u16 length;
/* The offset back from the current position that was matched. */
u16 offset;
};
struct MATCHFINDER_ALIGNED bt_matchfinder {
/* The hash table for finding length 3 matches */
mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
/* The hash table which contains the roots of the binary trees for
* finding length 4+ matches */
mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
/* The child node references for the binary trees. The left and right
* children of the node for the sequence with position 'pos' are
* 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */
mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
};
/* Prepare the matchfinder for a new input buffer. */
static void
bt_matchfinder_init(struct bt_matchfinder *mf)
{
STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
}
static void
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
{
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
}
static mf_pos_t *
bt_left_child(struct bt_matchfinder *mf, s32 node)
{
return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
}
static mf_pos_t *
bt_right_child(struct bt_matchfinder *mf, s32 node)
{
return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
}
/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
* and bt_matchfinder_skip_byte(). There must be sufficiently many bytes
* remaining to load a 32-bit integer from the *next* position. */
#define BT_MATCHFINDER_REQUIRED_NBYTES 5
/* Advance the binary tree matchfinder by one byte, optionally recording
* matches. @record_matches should be a compile-time constant. */
static struct lz_match *
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf,
const u8 * const in_base,
const ptrdiff_t cur_pos,
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
u32 * const next_hashes,
struct lz_match *lz_matchptr,
const bool record_matches)
{
const u8 *in_next = in_base + cur_pos;
u32 depth_remaining = max_search_depth;
const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
u32 next_hashseq;
u32 hash3;
u32 hash4;
s32 cur_node;
#if BT_MATCHFINDER_HASH3_WAYS >= 2
s32 cur_node_2;
#endif
const u8 *matchptr;
mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
u32 best_lt_len, best_gt_len;
u32 len;
u32 best_len = 3;
STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
BT_MATCHFINDER_HASH3_WAYS <= 2);
next_hashseq = get_unaligned_le32(in_next + 1);
hash3 = next_hashes[0];
hash4 = next_hashes[1];
next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
prefetchw(&mf->hash3_tab[next_hashes[0]]);
prefetchw(&mf->hash4_tab[next_hashes[1]]);
cur_node = mf->hash3_tab[hash3][0];
mf->hash3_tab[hash3][0] = cur_pos;
#if BT_MATCHFINDER_HASH3_WAYS >= 2
cur_node_2 = mf->hash3_tab[hash3][1];
mf->hash3_tab[hash3][1] = cur_node;
#endif
if (record_matches && cur_node > cutoff) {
u32 seq3 = load_u24_unaligned(in_next);
if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
lz_matchptr->length = 3;
lz_matchptr->offset = in_next - &in_base[cur_node];
lz_matchptr++;
}
#if BT_MATCHFINDER_HASH3_WAYS >= 2
else if (cur_node_2 > cutoff &&
seq3 == load_u24_unaligned(&in_base[cur_node_2]))
{
lz_matchptr->length = 3;
lz_matchptr->offset = in_next - &in_base[cur_node_2];
lz_matchptr++;
}
#endif
}
cur_node = mf->hash4_tab[hash4];
mf->hash4_tab[hash4] = cur_pos;
pending_lt_ptr = bt_left_child(mf, cur_pos);
pending_gt_ptr = bt_right_child(mf, cur_pos);
if (cur_node <= cutoff) {
*pending_lt_ptr = MATCHFINDER_INITVAL;
*pending_gt_ptr = MATCHFINDER_INITVAL;
return lz_matchptr;
}
best_lt_len = 0;
best_gt_len = 0;
len = 0;
for (;;) {
matchptr = &in_base[cur_node];
if (matchptr[len] == in_next[len]) {
len = lz_extend(in_next, matchptr, len + 1, max_len);
if (!record_matches || len > best_len) {
if (record_matches) {
best_len = len;
lz_matchptr->length = len;
lz_matchptr->offset = in_next - matchptr;
lz_matchptr++;
}
if (len >= nice_len) {
*pending_lt_ptr = *bt_left_child(mf, cur_node);
*pending_gt_ptr = *bt_right_child(mf, cur_node);
return lz_matchptr;
}
}
}
if (matchptr[len] < in_next[len]) {
*pending_lt_ptr = cur_node;
pending_lt_ptr = bt_right_child(mf, cur_node);
cur_node = *pending_lt_ptr;
best_lt_len = len;
if (best_gt_len < len)
len = best_gt_len;
} else {
*pending_gt_ptr = cur_node;
pending_gt_ptr = bt_left_child(mf, cur_node);
cur_node = *pending_gt_ptr;
best_gt_len = len;
if (best_lt_len < len)
len = best_lt_len;
}
if (cur_node <= cutoff || !--depth_remaining) {
*pending_lt_ptr = MATCHFINDER_INITVAL;
*pending_gt_ptr = MATCHFINDER_INITVAL;
return lz_matchptr;
}
}
}
/*
* Retrieve a list of matches with the current position.
*
* @mf
* The matchfinder structure.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
* @cur_pos
* The current position in the input buffer relative to @in_base (the
* position of the sequence being matched against).
* @max_len
* The maximum permissible match length at this position. Must be >=
* BT_MATCHFINDER_REQUIRED_NBYTES.
* @nice_len
* Stop searching if a match of at least this length is found.
* Must be <= @max_len.
* @max_search_depth
* Limit on the number of potential matches to consider. Must be >= 1.
* @next_hashes
* The precomputed hash codes for the sequence beginning at @in_next.
* These will be used and then updated with the precomputed hashcodes for
* the sequence beginning at @in_next + 1.
* @lz_matchptr
* An array in which this function will record the matches. The recorded
* matches will be sorted by strictly increasing length and (non-strictly)
* increasing offset. The maximum number of matches that may be found is
* 'nice_len - 2'.
*
* The return value is a pointer to the next available slot in the @lz_matchptr
* array. (If no matches were found, this will be the same as @lz_matchptr.)
*/
static struct lz_match *
bt_matchfinder_get_matches(struct bt_matchfinder *mf,
const u8 *in_base,
ptrdiff_t cur_pos,
u32 max_len,
u32 nice_len,
u32 max_search_depth,
u32 next_hashes[2],
struct lz_match *lz_matchptr)
{
return bt_matchfinder_advance_one_byte(mf,
in_base,
cur_pos,
max_len,
nice_len,
max_search_depth,
next_hashes,
lz_matchptr,
true);
}
/*
* Advance the matchfinder, but don't record any matches.
*
* This is very similar to bt_matchfinder_get_matches() because both functions
* must do hashing and tree re-rooting.
*/
static void
bt_matchfinder_skip_byte(struct bt_matchfinder *mf,
const u8 *in_base,
ptrdiff_t cur_pos,
u32 nice_len,
u32 max_search_depth,
u32 next_hashes[2])
{
bt_matchfinder_advance_one_byte(mf,
in_base,
cur_pos,
nice_len,
nice_len,
max_search_depth,
next_hashes,
NULL,
false);
}
#endif /* LIB_BT_MATCHFINDER_H */

View file

@ -0,0 +1,777 @@
/*
* decompress_template.h
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* This is the actual DEFLATE decompression routine, lifted out of
* deflate_decompress.c so that it can be compiled multiple times with different
* target instruction sets.
*/
#ifndef ATTRIBUTES
# define ATTRIBUTES
#endif
#ifndef EXTRACT_VARBITS
# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count))
#endif
#ifndef EXTRACT_VARBITS8
# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
#endif
static enum libdeflate_result ATTRIBUTES
FUNCNAME(struct libdeflate_decompressor * restrict d,
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
{
u8 *out_next = out;
u8 * const out_end = out_next + out_nbytes_avail;
u8 * const out_fastloop_end =
out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
/* Input bitstream state; see deflate_decompress.c for documentation */
const u8 *in_next = in;
const u8 * const in_end = in_next + in_nbytes;
const u8 * const in_fastloop_end =
in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
bitbuf_t bitbuf = 0;
bitbuf_t saved_bitbuf;
u32 bitsleft = 0;
size_t overread_count = 0;
bool is_final_block;
unsigned block_type;
unsigned num_litlen_syms;
unsigned num_offset_syms;
bitbuf_t litlen_tablemask;
u32 entry;
next_block:
/* Starting to read the next block */
;
STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
REFILL_BITS();
/* BFINAL: 1 bit */
is_final_block = bitbuf & BITMASK(1);
/* BTYPE: 2 bits */
block_type = (bitbuf >> 1) & BITMASK(2);
if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
/* Dynamic Huffman block */
/* The order in which precode lengths are stored */
static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
};
unsigned num_explicit_precode_lens;
unsigned i;
/* Read the codeword length counts. */
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
d->static_codes_loaded = false;
/*
* Read the precode codeword lengths.
*
* A 64-bit bitbuffer is just one bit too small to hold the
* maximum number of precode lens, so to minimize branches we
* merge one len with the previous fields.
*/
STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
d->u.precode_lens[deflate_precode_lens_permutation[0]] =
(bitbuf >> 17) & BITMASK(3);
bitbuf >>= 20;
bitsleft -= 20;
REFILL_BITS();
i = 1;
do {
d->u.precode_lens[deflate_precode_lens_permutation[i]] =
bitbuf & BITMASK(3);
bitbuf >>= 3;
bitsleft -= 3;
} while (++i < num_explicit_precode_lens);
} else {
bitbuf >>= 17;
bitsleft -= 17;
i = 0;
do {
if ((u8)bitsleft < 3)
REFILL_BITS();
d->u.precode_lens[deflate_precode_lens_permutation[i]] =
bitbuf & BITMASK(3);
bitbuf >>= 3;
bitsleft -= 3;
} while (++i < num_explicit_precode_lens);
}
for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
/* Build the decode table for the precode. */
SAFETY_CHECK(build_precode_decode_table(d));
/* Decode the litlen and offset codeword lengths. */
i = 0;
do {
unsigned presym;
u8 rep_val;
unsigned rep_count;
if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
REFILL_BITS();
/*
* The code below assumes that the precode decode table
* doesn't have any subtables.
*/
STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
/* Decode the next precode symbol. */
entry = d->u.l.precode_decode_table[
bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
bitbuf >>= (u8)entry;
bitsleft -= entry; /* optimization: subtract full entry */
presym = entry >> 16;
if (presym < 16) {
/* Explicit codeword length */
d->u.l.lens[i++] = presym;
continue;
}
/* Run-length encoded codeword lengths */
/*
* Note: we don't need to immediately verify that the
* repeat count doesn't overflow the number of elements,
* since we've sized the lens array to have enough extra
* space to allow for the worst-case overrun (138 zeroes
* when only 1 length was remaining).
*
* In the case of the small repeat counts (presyms 16
* and 17), it is fastest to always write the maximum
* number of entries. That gets rid of branches that
* would otherwise be required.
*
* It is not just because of the numerical order that
* our checks go in the order 'presym < 16', 'presym ==
* 16', and 'presym == 17'. For typical data this is
* ordered from most frequent to least frequent case.
*/
STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
if (presym == 16) {
/* Repeat the previous length 3 - 6 times. */
SAFETY_CHECK(i != 0);
rep_val = d->u.l.lens[i - 1];
STATIC_ASSERT(3 + BITMASK(2) == 6);
rep_count = 3 + (bitbuf & BITMASK(2));
bitbuf >>= 2;
bitsleft -= 2;
d->u.l.lens[i + 0] = rep_val;
d->u.l.lens[i + 1] = rep_val;
d->u.l.lens[i + 2] = rep_val;
d->u.l.lens[i + 3] = rep_val;
d->u.l.lens[i + 4] = rep_val;
d->u.l.lens[i + 5] = rep_val;
i += rep_count;
} else if (presym == 17) {
/* Repeat zero 3 - 10 times. */
STATIC_ASSERT(3 + BITMASK(3) == 10);
rep_count = 3 + (bitbuf & BITMASK(3));
bitbuf >>= 3;
bitsleft -= 3;
d->u.l.lens[i + 0] = 0;
d->u.l.lens[i + 1] = 0;
d->u.l.lens[i + 2] = 0;
d->u.l.lens[i + 3] = 0;
d->u.l.lens[i + 4] = 0;
d->u.l.lens[i + 5] = 0;
d->u.l.lens[i + 6] = 0;
d->u.l.lens[i + 7] = 0;
d->u.l.lens[i + 8] = 0;
d->u.l.lens[i + 9] = 0;
i += rep_count;
} else {
/* Repeat zero 11 - 138 times. */
STATIC_ASSERT(11 + BITMASK(7) == 138);
rep_count = 11 + (bitbuf & BITMASK(7));
bitbuf >>= 7;
bitsleft -= 7;
__builtin_memset(&d->u.l.lens[i], 0,
rep_count * sizeof(d->u.l.lens[i]));
i += rep_count;
}
} while (i < num_litlen_syms + num_offset_syms);
/* Unnecessary, but check this for consistency with zlib. */
SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
} else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
u16 len, nlen;
/*
* Uncompressed block: copy 'len' bytes literally from the input
* buffer to the output buffer.
*/
bitsleft -= 3; /* for BTYPE and BFINAL */
/*
* Align the bitstream to the next byte boundary. This means
* the next byte boundary as if we were reading a byte at a
* time. Therefore, we have to rewind 'in_next' by any bytes
* that have been refilled but not actually consumed yet (not
* counting overread bytes, which don't increment 'in_next').
*/
bitsleft = (u8)bitsleft;
SAFETY_CHECK(overread_count <= (bitsleft >> 3));
in_next -= (bitsleft >> 3) - overread_count;
overread_count = 0;
bitbuf = 0;
bitsleft = 0;
SAFETY_CHECK(in_end - in_next >= 4);
len = get_unaligned_le16(in_next);
nlen = get_unaligned_le16(in_next + 2);
in_next += 4;
SAFETY_CHECK(len == (u16)~nlen);
if (unlikely(len > out_end - out_next))
return LIBDEFLATE_INSUFFICIENT_SPACE;
SAFETY_CHECK(len <= in_end - in_next);
__builtin_memcpy(out_next, in_next, len);
in_next += len;
out_next += len;
goto block_done;
} else {
unsigned i;
SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
/*
* Static Huffman block: build the decode tables for the static
* codes. Skip doing so if the tables are already set up from
* an earlier static block; this speeds up decompression of
* degenerate input of many empty or very short static blocks.
*
* Afterwards, the remainder is the same as decompressing a
* dynamic Huffman block.
*/
bitbuf >>= 3; /* for BTYPE and BFINAL */
bitsleft -= 3;
if (d->static_codes_loaded)
goto have_decode_tables;
d->static_codes_loaded = true;
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
for (i = 0; i < 144; i++)
d->u.l.lens[i] = 8;
for (; i < 256; i++)
d->u.l.lens[i] = 9;
for (; i < 280; i++)
d->u.l.lens[i] = 7;
for (; i < 288; i++)
d->u.l.lens[i] = 8;
for (; i < 288 + 32; i++)
d->u.l.lens[i] = 5;
num_litlen_syms = 288;
num_offset_syms = 32;
}
/* Decompressing a Huffman block (either dynamic or static) */
SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
have_decode_tables:
litlen_tablemask = BITMASK(d->litlen_tablebits);
/*
* This is the "fastloop" for decoding literals and matches. It does
* bounds checks on in_next and out_next in the loop conditions so that
* additional bounds checks aren't needed inside the loop body.
*
* To reduce latency, the bitbuffer is refilled and the next litlen
* decode table entry is preloaded before each loop iteration.
*/
if (in_next >= in_fastloop_end || out_next >= out_fastloop_end)
goto generic_loop;
REFILL_BITS_IN_FASTLOOP();
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
do {
u32 length, offset, lit;
const u8 *src;
u8 *dst;
/*
* Consume the bits for the litlen decode table entry. Save the
* original bitbuf for later, in case the extra match length
* bits need to be extracted from it.
*/
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry; /* optimization: subtract full entry */
/*
* Begin by checking for a "fast" literal, i.e. a literal that
* doesn't need a subtable.
*/
if (entry & HUFFDEC_LITERAL) {
/*
* On 64-bit platforms, we decode up to 2 extra fast
* literals in addition to the primary item, as this
* increases performance and still leaves enough bits
* remaining for what follows. We could actually do 3,
* assuming LITLEN_TABLEBITS=11, but that actually
* decreases performance slightly (perhaps by messing
* with the branch prediction of the conditional refill
* that happens later while decoding the match offset).
*
* Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN
* and FASTLOOP_MAX_BYTES_READ need to be updated if the
* number of extra literals decoded here is changed.
*/
if (/* enough bits for 2 fast literals + length + offset preload? */
CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
LENGTH_MAXBITS,
OFFSET_TABLEBITS) &&
/* enough bits for 2 fast literals + slow literal + litlen preload? */
CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
DEFLATE_MAX_LITLEN_CODEWORD_LEN,
LITLEN_TABLEBITS)) {
/* 1st extra fast literal */
lit = entry >> 16;
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry;
*out_next++ = lit;
if (entry & HUFFDEC_LITERAL) {
/* 2nd extra fast literal */
lit = entry >> 16;
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry;
*out_next++ = lit;
if (entry & HUFFDEC_LITERAL) {
/*
* Another fast literal, but
* this one is in lieu of the
* primary item, so it doesn't
* count as one of the extras.
*/
lit = entry >> 16;
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
REFILL_BITS_IN_FASTLOOP();
*out_next++ = lit;
continue;
}
}
} else {
/*
* Decode a literal. While doing so, preload
* the next litlen decode table entry and refill
* the bitbuffer. To reduce latency, we've
* arranged for there to be enough "preloadable"
* bits remaining to do the table preload
* independently of the refill.
*/
STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
LITLEN_TABLEBITS, LITLEN_TABLEBITS));
lit = entry >> 16;
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
REFILL_BITS_IN_FASTLOOP();
*out_next++ = lit;
continue;
}
}
/*
* It's not a literal entry, so it can be a length entry, a
* subtable pointer entry, or an end-of-block entry. Detect the
* two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
*/
if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
/* Subtable pointer or end-of-block entry */
if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
goto block_done;
/*
* A subtable is required. Load and consume the
* subtable entry. The subtable entry can be of any
* type: literal, length, or end-of-block.
*/
entry = d->u.litlen_decode_table[(entry >> 16) +
EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry;
/*
* 32-bit platforms that use the byte-at-a-time refill
* method have to do a refill here for there to always
* be enough bits to decode a literal that requires a
* subtable, then preload the next litlen decode table
* entry; or to decode a match length that requires a
* subtable, then preload the offset decode table entry.
*/
if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
LITLEN_TABLEBITS) ||
!CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
OFFSET_TABLEBITS))
REFILL_BITS_IN_FASTLOOP();
if (entry & HUFFDEC_LITERAL) {
/* Decode a literal that required a subtable. */
lit = entry >> 16;
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
REFILL_BITS_IN_FASTLOOP();
*out_next++ = lit;
continue;
}
if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
goto block_done;
/* Else, it's a length that required a subtable. */
}
/*
* Decode the match length: the length base value associated
* with the litlen symbol (which we extract from the decode
* table entry), plus the extra length bits. We don't need to
* consume the extra length bits here, as they were included in
* the bits consumed by the entry earlier. We also don't need
* to check for too-long matches here, as this is inside the
* fastloop where it's already been verified that the output
* buffer has enough space remaining to copy a max-length match.
*/
length = entry >> 16;
length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
/*
* Decode the match offset. There are enough "preloadable" bits
* remaining to preload the offset decode table entry, but a
* refill might be needed before consuming it.
*/
STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
OFFSET_TABLEBITS));
entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
LITLEN_TABLEBITS)) {
/*
* Decoding a match offset on a 64-bit platform. We may
* need to refill once, but then we can decode the whole
* offset and preload the next litlen table entry.
*/
if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
/* Offset codeword requires a subtable */
if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
LITLEN_TABLEBITS - PRELOAD_SLACK))
REFILL_BITS_IN_FASTLOOP();
bitbuf >>= OFFSET_TABLEBITS;
bitsleft -= OFFSET_TABLEBITS;
entry = d->offset_decode_table[(entry >> 16) +
EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
} else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
LITLEN_TABLEBITS - PRELOAD_SLACK))
REFILL_BITS_IN_FASTLOOP();
} else {
/* Decoding a match offset on a 32-bit platform */
REFILL_BITS_IN_FASTLOOP();
if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
/* Offset codeword requires a subtable */
bitbuf >>= OFFSET_TABLEBITS;
bitsleft -= OFFSET_TABLEBITS;
entry = d->offset_decode_table[(entry >> 16) +
EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
REFILL_BITS_IN_FASTLOOP();
/* No further refill needed before extra bits */
STATIC_ASSERT(CAN_CONSUME(
OFFSET_MAXBITS - OFFSET_TABLEBITS));
} else {
/* No refill needed before extra bits */
STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
}
}
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry; /* optimization: subtract full entry */
offset = entry >> 16;
offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
/* Validate the match offset; needed even in the fastloop. */
SAFETY_CHECK(offset <= out_next - (const u8 *)out);
src = out_next - offset;
dst = out_next;
out_next += length;
/*
* Before starting to issue the instructions to copy the match,
* refill the bitbuffer and preload the litlen decode table
* entry for the next loop iteration. This can increase
* performance by allowing the latency of the match copy to
* overlap with these other operations. To further reduce
* latency, we've arranged for there to be enough bits remaining
* to do the table preload independently of the refill, except
* on 32-bit platforms using the byte-at-a-time refill method.
*/
if (!CAN_CONSUME_AND_THEN_PRELOAD(
MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
OFFSET_MAXFASTBITS),
LITLEN_TABLEBITS) &&
unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
REFILL_BITS_IN_FASTLOOP();
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
REFILL_BITS_IN_FASTLOOP();
/*
* Copy the match. On most CPUs the fastest method is a
* word-at-a-time copy, unconditionally copying about 5 words
* since this is enough for most matches without being too much.
*
* The normal word-at-a-time copy works for offset >= WORDBYTES,
* which is most cases. The case of offset == 1 is also common
* and is worth optimizing for, since it is just RLE encoding of
* the previous byte, which is the result of compressing long
* runs of the same byte.
*
* Writing past the match 'length' is allowed here, since it's
* been ensured there is enough output space left for a slight
* overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if
* the maximum possible overrun here is changed.
*/
if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
while (dst < out_next) {
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
store_word_unaligned(load_word_unaligned(src), dst);
src += WORDBYTES;
dst += WORDBYTES;
}
} else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
machine_word_t v;
/*
* This part tends to get auto-vectorized, so keep it
* copying a multiple of 16 bytes at a time.
*/
v = (machine_word_t)0x0101010101010101 * src[0];
store_word_unaligned(v, dst);
dst += WORDBYTES;
store_word_unaligned(v, dst);
dst += WORDBYTES;
store_word_unaligned(v, dst);
dst += WORDBYTES;
store_word_unaligned(v, dst);
dst += WORDBYTES;
while (dst < out_next) {
store_word_unaligned(v, dst);
dst += WORDBYTES;
store_word_unaligned(v, dst);
dst += WORDBYTES;
store_word_unaligned(v, dst);
dst += WORDBYTES;
store_word_unaligned(v, dst);
dst += WORDBYTES;
}
} else if (UNALIGNED_ACCESS_IS_FAST) {
store_word_unaligned(load_word_unaligned(src), dst);
src += offset;
dst += offset;
store_word_unaligned(load_word_unaligned(src), dst);
src += offset;
dst += offset;
do {
store_word_unaligned(load_word_unaligned(src), dst);
src += offset;
dst += offset;
store_word_unaligned(load_word_unaligned(src), dst);
src += offset;
dst += offset;
} while (dst < out_next);
} else {
*dst++ = *src++;
*dst++ = *src++;
do {
*dst++ = *src++;
} while (dst < out_next);
}
} while (in_next < in_fastloop_end && out_next < out_fastloop_end);
/*
* This is the generic loop for decoding literals and matches. This
* handles cases where in_next and out_next are close to the end of
* their respective buffers. Usually this loop isn't performance-
* critical, as most time is spent in the fastloop above instead. We
* therefore omit some optimizations here in favor of smaller code.
*/
generic_loop:
for (;;) {
u32 length, offset;
const u8 *src;
u8 *dst;
REFILL_BITS();
entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry;
if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
entry = d->u.litlen_decode_table[(entry >> 16) +
EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
saved_bitbuf = bitbuf;
bitbuf >>= (u8)entry;
bitsleft -= entry;
}
length = entry >> 16;
if (entry & HUFFDEC_LITERAL) {
if (unlikely(out_next == out_end))
return LIBDEFLATE_INSUFFICIENT_SPACE;
*out_next++ = length;
continue;
}
if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
goto block_done;
length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
if (unlikely(length > out_end - out_next))
return LIBDEFLATE_INSUFFICIENT_SPACE;
if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
REFILL_BITS();
entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
bitbuf >>= OFFSET_TABLEBITS;
bitsleft -= OFFSET_TABLEBITS;
entry = d->offset_decode_table[(entry >> 16) +
EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
if (!CAN_CONSUME(OFFSET_MAXBITS))
REFILL_BITS();
}
offset = entry >> 16;
offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
bitbuf >>= (u8)entry;
bitsleft -= entry;
SAFETY_CHECK(offset <= out_next - (const u8 *)out);
src = out_next - offset;
dst = out_next;
out_next += length;
STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
*dst++ = *src++;
*dst++ = *src++;
do {
*dst++ = *src++;
} while (dst < out_next);
}
block_done:
/* Finished decoding a block */
if (!is_final_block)
goto next_block;
/* That was the last block. */
bitsleft = (u8)bitsleft;
/*
* If any of the implicit appended zero bytes were consumed (not just
* refilled) before hitting end of stream, then the data is bad.
*/
SAFETY_CHECK(overread_count <= (bitsleft >> 3));
/* Optionally return the actual number of bytes consumed. */
if (actual_in_nbytes_ret) {
/* Don't count bytes that were refilled but not consumed. */
in_next -= (bitsleft >> 3) - overread_count;
*actual_in_nbytes_ret = in_next - (u8 *)in;
}
/* Optionally return the actual number of bytes written. */
if (actual_out_nbytes_ret) {
*actual_out_nbytes_ret = out_next - (u8 *)out;
} else {
if (out_next != out_end)
return LIBDEFLATE_SHORT_OUTPUT;
}
return LIBDEFLATE_SUCCESS;
}
#undef FUNCNAME
#undef ATTRIBUTES
#undef EXTRACT_VARBITS
#undef EXTRACT_VARBITS8

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
#ifndef LIB_DEFLATE_COMPRESS_H
#define LIB_DEFLATE_COMPRESS_H
#include "lib_common.h"
/*
* DEFLATE compression is private to deflate_compress.c, but we do need to be
* able to query the compression level for zlib and gzip header generation.
*/
struct libdeflate_compressor;
unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c);
size_t libdeflate_deflate_compress(struct libdeflate_compressor *c,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail);
size_t libdeflate_deflate_compress_bound(struct libdeflate_compressor *c, size_t in_nbytes);
#endif /* LIB_DEFLATE_COMPRESS_H */

View file

@ -0,0 +1,56 @@
/*
* deflate_constants.h - constants for the DEFLATE compression format
*/
#ifndef LIB_DEFLATE_CONSTANTS_H
#define LIB_DEFLATE_CONSTANTS_H
/* Valid block types */
#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
/* Minimum and maximum supported match lengths (in bytes) */
#define DEFLATE_MIN_MATCH_LEN 3
#define DEFLATE_MAX_MATCH_LEN 258
/* Maximum supported match offset (in bytes) */
#define DEFLATE_MAX_MATCH_OFFSET 32768
/* log2 of DEFLATE_MAX_MATCH_OFFSET */
#define DEFLATE_WINDOW_ORDER 15
/* Number of symbols in each Huffman code. Note: for the literal/length
* and offset codes, these are actually the maximum values; a given block
* might use fewer symbols. */
#define DEFLATE_NUM_PRECODE_SYMS 19
#define DEFLATE_NUM_LITLEN_SYMS 288
#define DEFLATE_NUM_OFFSET_SYMS 32
/* The maximum number of symbols across all codes */
#define DEFLATE_MAX_NUM_SYMS 288
/* Division of symbols in the literal/length code */
#define DEFLATE_NUM_LITERALS 256
#define DEFLATE_END_OF_BLOCK 256
#define DEFLATE_FIRST_LEN_SYM 257
/* Maximum codeword length, in bits, within each Huffman code */
#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
/* The maximum codeword length across all codes */
#define DEFLATE_MAX_CODEWORD_LEN 15
/* Maximum possible overrun when decoding codeword lengths */
#define DEFLATE_MAX_LENS_OVERRUN 137
/*
* Maximum number of extra bits that may be required to represent a match
* length or offset.
*/
#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13
#endif /* LIB_DEFLATE_CONSTANTS_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,14 @@
#ifndef LIB_DEFLATE_COMPRESS_H
#define LIB_DEFLATE_COMPRESS_H
#include "lib_common.h"
enum libdeflate_result
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret,
size_t *actual_out_nbytes_ret);
#endif /* LIB_DEFLATE_COMPRESS_H */

View file

@ -0,0 +1,45 @@
/*
* gzip_constants.h - constants for the gzip wrapper format
*/
#ifndef LIB_GZIP_CONSTANTS_H
#define LIB_GZIP_CONSTANTS_H
#define GZIP_MIN_HEADER_SIZE 10
#define GZIP_FOOTER_SIZE 8
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
#define GZIP_ID1 0x1F
#define GZIP_ID2 0x8B
#define GZIP_CM_DEFLATE 8
#define GZIP_FTEXT 0x01
#define GZIP_FHCRC 0x02
#define GZIP_FEXTRA 0x04
#define GZIP_FNAME 0x08
#define GZIP_FCOMMENT 0x10
#define GZIP_FRESERVED 0xE0
#define GZIP_MTIME_UNAVAILABLE 0
#define GZIP_XFL_SLOWEST_COMPRESSION 0x02
#define GZIP_XFL_FASTEST_COMPRESSION 0x04
#define GZIP_OS_FAT 0
#define GZIP_OS_AMIGA 1
#define GZIP_OS_VMS 2
#define GZIP_OS_UNIX 3
#define GZIP_OS_VM_CMS 4
#define GZIP_OS_ATARI_TOS 5
#define GZIP_OS_HPFS 6
#define GZIP_OS_MACINTOSH 7
#define GZIP_OS_Z_SYSTEM 8
#define GZIP_OS_CP_M 9
#define GZIP_OS_TOPS_20 10
#define GZIP_OS_NTFS 11
#define GZIP_OS_QDOS 12
#define GZIP_OS_RISCOS 13
#define GZIP_OS_UNKNOWN 255
#endif /* LIB_GZIP_CONSTANTS_H */

View file

@ -0,0 +1,160 @@
/*
* gzip_decompress.c - decompress with a gzip wrapper
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "lib_common.h"
#include "gzip_constants.h"
#ifdef CRC32
#include "crc32.h"
#endif
enum libdeflate_result
libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret,
size_t *actual_out_nbytes_ret)
{
const u8 *in_next = in;
const u8 * const in_end = in_next + in_nbytes;
u8 flg;
size_t actual_in_nbytes;
size_t actual_out_nbytes;
enum libdeflate_result result;
if (in_nbytes < GZIP_MIN_OVERHEAD)
return LIBDEFLATE_BAD_DATA;
/* ID1 */
if (*in_next++ != GZIP_ID1)
return LIBDEFLATE_BAD_DATA;
/* ID2 */
if (*in_next++ != GZIP_ID2)
return LIBDEFLATE_BAD_DATA;
/* CM */
if (*in_next++ != GZIP_CM_DEFLATE)
return LIBDEFLATE_BAD_DATA;
flg = *in_next++;
/* MTIME */
in_next += 4;
/* XFL */
in_next += 1;
/* OS */
in_next += 1;
if (flg & GZIP_FRESERVED)
return LIBDEFLATE_BAD_DATA;
/* Extra field */
if (flg & GZIP_FEXTRA) {
u16 xlen = get_unaligned_le16(in_next);
in_next += 2;
if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
return LIBDEFLATE_BAD_DATA;
in_next += xlen;
}
/* Original file name (zero terminated) */
if (flg & GZIP_FNAME) {
while (*in_next++ != 0 && in_next != in_end)
;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return LIBDEFLATE_BAD_DATA;
}
/* File comment (zero terminated) */
if (flg & GZIP_FCOMMENT) {
while (*in_next++ != 0 && in_next != in_end)
;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return LIBDEFLATE_BAD_DATA;
}
/* CRC16 for gzip header */
if (flg & GZIP_FHCRC) {
in_next += 2;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return LIBDEFLATE_BAD_DATA;
}
/* Compressed data */
result = libdeflate_deflate_decompress_ex(d, in_next,
in_end - GZIP_FOOTER_SIZE - in_next,
out, out_nbytes_avail,
&actual_in_nbytes,
actual_out_nbytes_ret);
if (result != LIBDEFLATE_SUCCESS)
return result;
if (actual_out_nbytes_ret)
actual_out_nbytes = *actual_out_nbytes_ret;
else
actual_out_nbytes = out_nbytes_avail;
in_next += actual_in_nbytes;
/* CRC32 */
#ifdef CRC32
// this library is supposed to be used for MTProto
// there's no need to check for CRC32, since the data is guaranteed to be correct
// by the protocol itself. not including crc32 implementation allows us to
// save around 8kb of code size
if (libdeflate_crc32(0, out, actual_out_nbytes) !=
get_unaligned_le32(in_next))
return LIBDEFLATE_BAD_DATA;
#endif
in_next += 4;
/* ISIZE */
if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
return LIBDEFLATE_BAD_DATA;
in_next += 4;
if (actual_in_nbytes_ret)
*actual_in_nbytes_ret = in_next - (u8 *)in;
return LIBDEFLATE_SUCCESS;
}
LIBDEFLATEAPI int32_t
libdeflate_gzip_get_output_size(const void* in, size_t in_nbytes) {
return get_unaligned_le32((u8*)in + in_nbytes - 4);
}
LIBDEFLATEAPI enum libdeflate_result
libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail)
{
// we're using `libdeflate_zlib_get_output_size` to allocate exactly the
// right amount of memory for the output buffer, so this is redundant
size_t actual_out_nbytes_ret;
return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
out, out_nbytes_avail,
NULL, &actual_out_nbytes_ret);
}

View file

@ -0,0 +1,401 @@
/*
* hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ---------------------------------------------------------------------------
*
* Algorithm
*
* This is a Hash Chains (hc) based matchfinder.
*
* The main data structure is a hash table where each hash bucket contains a
* linked list (or "chain") of sequences whose first 4 bytes share the same hash
* code. Each sequence is identified by its starting position in the input
* buffer.
*
* The algorithm processes the input buffer sequentially. At each byte
* position, the hash code of the first 4 bytes of the sequence beginning at
* that position (the sequence being matched against) is computed. This
* identifies the hash bucket to use for that position. Then, this hash
* bucket's linked list is searched for matches. Then, a new linked list node
* is created to represent the current sequence and is prepended to the list.
*
* This algorithm has several useful properties:
*
* - It only finds true Lempel-Ziv matches; i.e., those where the matching
* sequence occurs prior to the sequence being matched against.
*
* - The sequences in each linked list are always sorted by decreasing starting
* position. Therefore, the closest (smallest offset) matches are found
* first, which in many compression formats tend to be the cheapest to encode.
*
* - Although fast running time is not guaranteed due to the possibility of the
* lists getting very long, the worst degenerate behavior can be easily
* prevented by capping the number of nodes searched at each position.
*
* - If the compressor decides not to search for matches at a certain position,
* then that position can be quickly inserted without searching the list.
*
* - The algorithm is adaptable to sliding windows: just store the positions
* relative to a "base" value that is updated from time to time, and stop
* searching each list when the sequences get too far away.
*
* ----------------------------------------------------------------------------
*
* Optimizations
*
* The main hash table and chains handle length 4+ matches. Length 3 matches
* are handled by a separate hash table with no chains. This works well for
* typical "greedy" or "lazy"-style compressors, where length 3 matches are
* often only helpful if they have small offsets. Instead of searching a full
* chain for length 3+ matches, the algorithm just checks for one close length 3
* match, then focuses on finding length 4+ matches.
*
* The longest_match() and skip_bytes() functions are inlined into the
* compressors that use them. This isn't just about saving the overhead of a
* function call. These functions are intended to be called from the inner
* loops of compressors, where giving the compiler more control over register
* allocation is very helpful. There is also significant benefit to be gained
* from allowing the CPU to predict branches independently at each call site.
* For example, "lazy"-style compressors can be written with two calls to
* longest_match(), each of which starts with a different 'best_len' and
* therefore has significantly different performance characteristics.
*
* Although any hash function can be used, a multiplicative hash is fast and
* works well.
*
* On some processors, it is significantly faster to extend matches by whole
* words (32 or 64 bits) instead of by individual bytes. For this to be the
* case, the processor must implement unaligned memory accesses efficiently and
* must have either a fast "find first set bit" instruction or a fast "find last
* set bit" instruction, depending on the processor's endianness.
*
* The code uses one loop for finding the first match and one loop for finding a
* longer match. Each of these loops is tuned for its respective task and in
* combination are faster than a single generalized loop that handles both
* tasks.
*
* The code also uses a tight inner loop that only compares the last and first
* bytes of a potential match. It is only when these bytes match that a full
* match extension is attempted.
*
* ----------------------------------------------------------------------------
*/
#ifndef LIB_HC_MATCHFINDER_H
#define LIB_HC_MATCHFINDER_H
#include "matchfinder_common.h"
#define HC_MATCHFINDER_HASH3_ORDER 15
#define HC_MATCHFINDER_HASH4_ORDER 16
#define HC_MATCHFINDER_TOTAL_HASH_SIZE \
(((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
(1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
struct MATCHFINDER_ALIGNED hc_matchfinder {
/* The hash table for finding length 3 matches */
mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
/* The hash table which contains the first nodes of the linked lists for
* finding length 4+ matches */
mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
/* The "next node" references for the linked lists. The "next node" of
* the node for the sequence with position 'pos' is 'next_tab[pos]'. */
mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
};
/* Prepare the matchfinder for a new input buffer. */
static void
hc_matchfinder_init(struct hc_matchfinder *mf)
{
STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
}
static void
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
{
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
}
/*
* Find the longest match longer than 'best_len' bytes.
*
* @mf
* The matchfinder structure.
* @in_base_p
* Location of a pointer which points to the place in the input data the
* matchfinder currently stores positions relative to. This may be updated
* by this function.
* @in_next
* Pointer to the next position in the input buffer, i.e. the sequence
* being matched against.
* @best_len
* Require a match longer than this length.
* @max_len
* The maximum permissible match length at this position.
* @nice_len
* Stop searching if a match of at least this length is found.
* Must be <= @max_len.
* @max_search_depth
* Limit on the number of potential matches to consider. Must be >= 1.
* @next_hashes
* The precomputed hash codes for the sequence beginning at @in_next.
* These will be used and then updated with the precomputed hashcodes for
* the sequence beginning at @in_next + 1.
* @offset_ret
* If a match is found, its offset is returned in this location.
*
* Return the length of the match found, or 'best_len' if no match longer than
* 'best_len' was found.
*/
static u32
hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 * const in_next,
u32 best_len,
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
u32 * const next_hashes,
u32 * const offset_ret)
{
u32 depth_remaining = max_search_depth;
const u8 *best_matchptr = in_next;
mf_pos_t cur_node3, cur_node4;
u32 hash3, hash4;
u32 next_hashseq;
u32 seq4;
const u8 *matchptr;
u32 len;
u32 cur_pos = in_next - *in_base_p;
const u8 *in_base;
mf_pos_t cutoff;
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
hc_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos = 0;
}
in_base = *in_base_p;
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
goto out;
/* Get the precomputed hash codes. */
hash3 = next_hashes[0];
hash4 = next_hashes[1];
/* From the hash buckets, get the first node of each linked list. */
cur_node3 = mf->hash3_tab[hash3];
cur_node4 = mf->hash4_tab[hash4];
/* Update for length 3 matches. This replaces the singleton node in the
* 'hash3' bucket with the node for the current sequence. */
mf->hash3_tab[hash3] = cur_pos;
/* Update for length 4 matches. This prepends the node for the current
* sequence to the linked list in the 'hash4' bucket. */
mf->hash4_tab[hash4] = cur_pos;
mf->next_tab[cur_pos] = cur_node4;
/* Compute the next hash codes. */
next_hashseq = get_unaligned_le32(in_next + 1);
next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
prefetchw(&mf->hash3_tab[next_hashes[0]]);
prefetchw(&mf->hash4_tab[next_hashes[1]]);
if (best_len < 4) { /* No match of length >= 4 found yet? */
/* Check for a length 3 match if needed. */
if (cur_node3 <= cutoff)
goto out;
seq4 = load_u32_unaligned(in_next);
if (best_len < 3) {
matchptr = &in_base[cur_node3];
if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
best_len = 3;
best_matchptr = matchptr;
}
}
/* Check for a length 4 match. */
if (cur_node4 <= cutoff)
goto out;
for (;;) {
/* No length 4 match found yet. Check the first 4 bytes. */
matchptr = &in_base[cur_node4];
if (load_u32_unaligned(matchptr) == seq4)
break;
/* The first 4 bytes did not match. Keep trying. */
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
if (cur_node4 <= cutoff || !--depth_remaining)
goto out;
}
/* Found a match of length >= 4. Extend it to its full length. */
best_matchptr = matchptr;
best_len = lz_extend(in_next, best_matchptr, 4, max_len);
if (best_len >= nice_len)
goto out;
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
if (cur_node4 <= cutoff || !--depth_remaining)
goto out;
} else {
if (cur_node4 <= cutoff || best_len >= nice_len)
goto out;
}
/* Check for matches of length >= 5. */
for (;;) {
for (;;) {
matchptr = &in_base[cur_node4];
/* Already found a length 4 match. Try for a longer
* match; start by checking either the last 4 bytes and
* the first 4 bytes, or the last byte. (The last byte,
* the one which would extend the match length by 1, is
* the most important.) */
#if UNALIGNED_ACCESS_IS_FAST
if ((load_u32_unaligned(matchptr + best_len - 3) ==
load_u32_unaligned(in_next + best_len - 3)) &&
(load_u32_unaligned(matchptr) ==
load_u32_unaligned(in_next)))
#else
if (matchptr[best_len] == in_next[best_len])
#endif
break;
/* Continue to the next node in the list. */
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
if (cur_node4 <= cutoff || !--depth_remaining)
goto out;
}
#if UNALIGNED_ACCESS_IS_FAST
len = 4;
#else
len = 0;
#endif
len = lz_extend(in_next, matchptr, len, max_len);
if (len > best_len) {
/* This is the new longest match. */
best_len = len;
best_matchptr = matchptr;
if (best_len >= nice_len)
goto out;
}
/* Continue to the next node in the list. */
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
if (cur_node4 <= cutoff || !--depth_remaining)
goto out;
}
out:
*offset_ret = in_next - best_matchptr;
return best_len;
}
/*
* Advance the matchfinder, but don't search for matches.
*
* @mf
* The matchfinder structure.
* @in_base_p
* Location of a pointer which points to the place in the input data the
* matchfinder currently stores positions relative to. This may be updated
* by this function.
* @in_next
* Pointer to the next position in the input buffer.
* @in_end
* Pointer to the end of the input buffer.
* @count
* The number of bytes to advance. Must be > 0.
* @next_hashes
* The precomputed hash codes for the sequence beginning at @in_next.
* These will be used and then updated with the precomputed hashcodes for
* the sequence beginning at @in_next + @count.
*/
static void
hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 *in_next,
const u8 * const in_end,
const u32 count,
u32 * const next_hashes)
{
u32 cur_pos;
u32 hash3, hash4;
u32 next_hashseq;
u32 remaining = count;
if (unlikely(count + 5 > in_end - in_next))
return;
cur_pos = in_next - *in_base_p;
hash3 = next_hashes[0];
hash4 = next_hashes[1];
do {
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
hc_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos = 0;
}
mf->hash3_tab[hash3] = cur_pos;
mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
mf->hash4_tab[hash4] = cur_pos;
next_hashseq = get_unaligned_le32(++in_next);
hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
cur_pos++;
} while (--remaining);
prefetchw(&mf->hash3_tab[hash3]);
prefetchw(&mf->hash4_tab[hash4]);
next_hashes[0] = hash3;
next_hashes[1] = hash4;
}
#endif /* LIB_HC_MATCHFINDER_H */

View file

@ -0,0 +1,234 @@
/*
* ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
*
* Copyright 2022 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ---------------------------------------------------------------------------
*
* This is a Hash Table (ht) matchfinder.
*
* This is a variant of the Hash Chains (hc) matchfinder that is optimized for
* very fast compression. The ht_matchfinder stores the hash chains inline in
* the hash table, whereas the hc_matchfinder stores them in a separate array.
* Storing the hash chains inline is the faster method when max_search_depth
* (the maximum chain length) is very small. It is not appropriate when
* max_search_depth is larger, as then it uses too much memory.
*
* Due to its focus on speed, the ht_matchfinder doesn't support length 3
* matches. It also doesn't allow max_search_depth to vary at runtime; it is
* fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
*
* See hc_matchfinder.h for more information.
*/
#ifndef LIB_HT_MATCHFINDER_H
#define LIB_HT_MATCHFINDER_H
#include "matchfinder_common.h"
#define HT_MATCHFINDER_HASH_ORDER 15
#define HT_MATCHFINDER_BUCKET_SIZE 2
#define HT_MATCHFINDER_MIN_MATCH_LEN 4
/* Minimum value of max_len for ht_matchfinder_longest_match() */
#define HT_MATCHFINDER_REQUIRED_NBYTES 5
struct MATCHFINDER_ALIGNED ht_matchfinder {
mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
[HT_MATCHFINDER_BUCKET_SIZE];
};
static void
ht_matchfinder_init(struct ht_matchfinder *mf)
{
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
}
static void
ht_matchfinder_slide_window(struct ht_matchfinder *mf)
{
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
}
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
static u32
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 * const in_next,
const u32 max_len,
const u32 nice_len,
u32 * const next_hash,
u32 * const offset_ret)
{
u32 best_len = 0;
const u8 *best_matchptr = in_next;
u32 cur_pos = in_next - *in_base_p;
const u8 *in_base;
mf_pos_t cutoff;
u32 hash;
u32 seq;
mf_pos_t cur_node;
const u8 *matchptr;
#if HT_MATCHFINDER_BUCKET_SIZE > 1
mf_pos_t to_insert;
u32 len;
#endif
#if HT_MATCHFINDER_BUCKET_SIZE > 2
int i;
#endif
/* This is assumed throughout this function. */
STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
ht_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos = 0;
}
in_base = *in_base_p;
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
hash = *next_hash;
STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
HT_MATCHFINDER_HASH_ORDER);
seq = load_u32_unaligned(in_next);
prefetchw(&mf->hash_tab[*next_hash]);
#if HT_MATCHFINDER_BUCKET_SIZE == 1
/* Hand-unrolled version for BUCKET_SIZE == 1 */
cur_node = mf->hash_tab[hash][0];
mf->hash_tab[hash][0] = cur_pos;
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq) {
best_len = lz_extend(in_next, matchptr, 4, max_len);
best_matchptr = matchptr;
}
#elif HT_MATCHFINDER_BUCKET_SIZE == 2
/*
* Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
* differs slightly in that it copies the first entry to the second even
* if nice_len is reached on the first, as this can be slightly faster.
*/
cur_node = mf->hash_tab[hash][0];
mf->hash_tab[hash][0] = cur_pos;
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
to_insert = cur_node;
cur_node = mf->hash_tab[hash][1];
mf->hash_tab[hash][1] = to_insert;
if (load_u32_unaligned(matchptr) == seq) {
best_len = lz_extend(in_next, matchptr, 4, max_len);
best_matchptr = matchptr;
if (cur_node <= cutoff || best_len >= nice_len)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq &&
load_u32_unaligned(matchptr + best_len - 3) ==
load_u32_unaligned(in_next + best_len - 3)) {
len = lz_extend(in_next, matchptr, 4, max_len);
if (len > best_len) {
best_len = len;
best_matchptr = matchptr;
}
}
} else {
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq) {
best_len = lz_extend(in_next, matchptr, 4, max_len);
best_matchptr = matchptr;
}
}
#else
/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
to_insert = cur_pos;
for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
cur_node = mf->hash_tab[hash][i];
mf->hash_tab[hash][i] = to_insert;
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq) {
len = lz_extend(in_next, matchptr, 4, max_len);
if (len > best_len) {
best_len = len;
best_matchptr = matchptr;
if (best_len >= nice_len)
goto out;
}
}
to_insert = cur_node;
}
#endif
out:
*offset_ret = in_next - best_matchptr;
return best_len;
}
static void
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 *in_next,
const u8 * const in_end,
const u32 count,
u32 * const next_hash)
{
s32 cur_pos = in_next - *in_base_p;
u32 hash;
u32 remaining = count;
int i;
if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
return;
if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
ht_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos -= MATCHFINDER_WINDOW_SIZE;
}
hash = *next_hash;
do {
for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
mf->hash_tab[hash][0] = cur_pos;
hash = lz_hash(get_unaligned_le32(++in_next),
HT_MATCHFINDER_HASH_ORDER);
cur_pos++;
} while (--remaining);
prefetchw(&mf->hash_tab[hash]);
*next_hash = hash;
}
#endif /* LIB_HT_MATCHFINDER_H */

View file

@ -0,0 +1,194 @@
/*
* matchfinder_common.h - common code for Lempel-Ziv matchfinding
*/
#ifndef LIB_MATCHFINDER_COMMON_H
#define LIB_MATCHFINDER_COMMON_H
#include "lib_common.h"
#ifndef MATCHFINDER_WINDOW_ORDER
# error "MATCHFINDER_WINDOW_ORDER must be defined!"
#endif
/*
* Given a 32-bit value that was loaded with the platform's native endianness,
* return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
* bits contain the first 3 bytes, arranged in octets in a platform-dependent
* order, at the memory location from which the input 32-bit value was loaded.
*/
static u32
loaded_u32_to_u24(u32 v)
{
if (CPU_IS_LITTLE_ENDIAN())
return v & 0xFFFFFF;
else
return v >> 8;
}
/*
* Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
* The order in which the 3 bytes will be arranged as octets in the 24 bits is
* platform-dependent. At least 4 bytes (not 3) must be available at @p.
*/
static u32
load_u24_unaligned(const u8 *p)
{
#if UNALIGNED_ACCESS_IS_FAST
return loaded_u32_to_u24(load_u32_unaligned(p));
#else
if (CPU_IS_LITTLE_ENDIAN())
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
else
return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
#endif
}
#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
typedef s16 mf_pos_t;
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
/*
* Required alignment of the matchfinder buffer pointer and size. The values
* here come from the AVX-2 implementation, which is the worst case.
*/
#define MATCHFINDER_MEM_ALIGNMENT 32
#define MATCHFINDER_SIZE_ALIGNMENT 128
#undef matchfinder_init
#undef matchfinder_rebase
#ifdef _aligned_attribute
# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
#else
# define MATCHFINDER_ALIGNED
#endif
/*
* Initialize the hash table portion of the matchfinder.
*
* Essentially, this is an optimized memset().
*
* 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
* 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
*/
#ifndef matchfinder_init
static void
matchfinder_init(mf_pos_t *data, size_t size)
{
size_t num_entries = size / sizeof(*data);
size_t i;
for (i = 0; i < num_entries; i++)
data[i] = MATCHFINDER_INITVAL;
}
#endif
/*
* Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
*
* This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
* run through the matchfinder.
*
* This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
* array, making the entries be relative to the current position rather than the
* position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows,
* entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
* -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
*
* The given array must contain all matchfinder data that is position-relative:
* the hash table(s) as well as any hash chain or binary tree links. Its
* address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
* must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
*/
#ifndef matchfinder_rebase
static void
matchfinder_rebase(mf_pos_t *data, size_t size)
{
size_t num_entries = size / sizeof(*data);
size_t i;
if (MATCHFINDER_WINDOW_SIZE == 32768) {
/*
* Branchless version for 32768-byte windows. Clear all bits if
* the value was already negative, then set the sign bit. This
* is equivalent to subtracting 32768 with signed saturation.
*/
for (i = 0; i < num_entries; i++)
data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
} else {
for (i = 0; i < num_entries; i++) {
if (data[i] >= 0)
data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
else
data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
}
}
}
#endif
/*
* The hash function: given a sequence prefix held in the low-order bits of a
* 32-bit value, multiply by a carefully-chosen large constant. Discard any
* bits of the product that don't fit in a 32-bit value, but take the
* next-highest @num_bits bits of the product as the hash value, as those have
* the most randomness.
*/
static u32
lz_hash(u32 seq, unsigned num_bits)
{
return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
}
/*
* Return the number of bytes at @matchptr that match the bytes at @strptr, up
* to a maximum of @max_len. Initially, @start_len bytes are matched.
*/
static unsigned
lz_extend(const u8 * const strptr, const u8 * const matchptr,
const unsigned start_len, const unsigned max_len)
{
unsigned len = start_len;
machine_word_t v_word;
if (UNALIGNED_ACCESS_IS_FAST) {
if (likely(max_len - len >= 4 * WORDBYTES)) {
#define COMPARE_WORD_STEP \
v_word = load_word_unaligned(&matchptr[len]) ^ \
load_word_unaligned(&strptr[len]); \
if (v_word != 0) \
goto word_differs; \
len += WORDBYTES; \
COMPARE_WORD_STEP
COMPARE_WORD_STEP
COMPARE_WORD_STEP
COMPARE_WORD_STEP
#undef COMPARE_WORD_STEP
}
while (len + WORDBYTES <= max_len) {
v_word = load_word_unaligned(&matchptr[len]) ^
load_word_unaligned(&strptr[len]);
if (v_word != 0)
goto word_differs;
len += WORDBYTES;
}
}
while (len < max_len && matchptr[len] == strptr[len])
len++;
return len;
word_differs:
if (CPU_IS_LITTLE_ENDIAN())
len += (bsfw(v_word) >> 3);
else
len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
return len;
}
#endif /* LIB_MATCHFINDER_COMMON_H */

View file

@ -0,0 +1,83 @@
/*
* zlib_compress.c - compress with a zlib wrapper
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "deflate_compress.h"
#include "zlib_constants.h"
#include "adler32.h"
LIBDEFLATEAPI size_t
libdeflate_zlib_compress(struct libdeflate_compressor *c,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail)
{
u8 *out_next = out;
u16 hdr;
unsigned compression_level;
unsigned level_hint;
size_t deflate_size;
if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
return 0;
/* 2 byte header: CMF and FLG */
hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12);
compression_level = libdeflate_get_compression_level(c);
if (compression_level < 2)
level_hint = ZLIB_FASTEST_COMPRESSION;
else if (compression_level < 6)
level_hint = ZLIB_FAST_COMPRESSION;
else if (compression_level < 8)
level_hint = ZLIB_DEFAULT_COMPRESSION;
else
level_hint = ZLIB_SLOWEST_COMPRESSION;
hdr |= level_hint << 6;
hdr |= 31 - (hdr % 31);
put_unaligned_be16(hdr, out_next);
out_next += 2;
/* Compressed data */
deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
out_nbytes_avail - ZLIB_MIN_OVERHEAD);
if (deflate_size == 0)
return 0;
out_next += deflate_size;
/* ADLER32 */
put_unaligned_be32(libdeflate_adler32(1, in, in_nbytes), out_next);
out_next += 4;
return out_next - (u8 *)out;
}
/*LIBDEFLATEAPI*/ static size_t
libdeflate_zlib_compress_bound(struct libdeflate_compressor *c,
size_t in_nbytes)
{
return ZLIB_MIN_OVERHEAD +
libdeflate_deflate_compress_bound(c, in_nbytes);
}

View file

@ -0,0 +1,21 @@
/*
* zlib_constants.h - constants for the zlib wrapper format
*/
#ifndef LIB_ZLIB_CONSTANTS_H
#define LIB_ZLIB_CONSTANTS_H
#define ZLIB_MIN_HEADER_SIZE 2
#define ZLIB_FOOTER_SIZE 4
#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
#define ZLIB_CM_DEFLATE 8
#define ZLIB_CINFO_32K_WINDOW 7
#define ZLIB_FASTEST_COMPRESSION 0
#define ZLIB_FAST_COMPRESSION 1
#define ZLIB_DEFAULT_COMPRESSION 2
#define ZLIB_SLOWEST_COMPRESSION 3
#endif /* LIB_ZLIB_CONSTANTS_H */

BIN
packages/wasm/lib/mtcute.wasm Executable file

Binary file not shown.

137
packages/wasm/lib/utils.c Normal file
View file

@ -0,0 +1,137 @@
/*
* utils.c - utility functions for libdeflate
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "lib_common.h"
extern unsigned char __heap_base;
static size_t __heap_tail = (size_t) &__heap_base;
static size_t __heap_mark = (size_t) &__heap_base;
#define memory_size() __builtin_wasm_memory_size(0)
#define memory_grow(delta) __builtin_wasm_memory_grow(0, delta)
enum {
_mem_flag_used = 0xbf82583a,
_mem_flag_free = 0xab34d705
};
__attribute__((visibility("default"))) void* __malloc(size_t n) {
n += (8 - (n % 4)) % 4;
// check if size is enough
size_t total = __heap_tail + n + 3 * sizeof(size_t);
size_t size = memory_size() << 16;
if (total > size) {
memory_grow((total >> 16) - (size >> 16) + 1);
}
unsigned int r = __heap_tail;
*((size_t*) r) = n;
r += sizeof(size_t);
*((size_t*) r) =_mem_flag_used;
r += sizeof(size_t);
__heap_tail = r + n;
*((size_t*) __heap_tail) = n;
__heap_tail += sizeof(size_t);
return (void*) r;
}
__attribute__((visibility("default"))) void __free(void* p) {
size_t n;
// null case
if (!p) return;
size_t r=(size_t)p;
r -= sizeof(size_t);
// already free
if (*((size_t*) r) != _mem_flag_used) {
return;
}
// mark it as free
size_t flag = _mem_flag_free;
*((size_t*) r) = flag;
// calc ptr_tail
r -= sizeof(size_t);
n = *(size_t*) r; // size of current block
size_t ptr_tail = ((size_t) p) + n + sizeof(size_t);
// if not at tail return without moving __heap_tail
if (__heap_tail != ptr_tail) {
return;
}
__heap_tail = r;
while (r > (size_t) &__heap_base) {
r -= sizeof(size_t);
n = *(size_t*) r; // size of previous block
r -= n;
r -= sizeof(size_t);
flag = *((size_t*) r);
if (flag != _mem_flag_free) break;
r -= sizeof(size_t);
n = *(size_t*) r; // size of current block
__heap_tail = r;
}
}
void *
libdeflate_aligned_malloc(size_t alignment, size_t size)
{
void *ptr = __malloc(sizeof(void *) + alignment - 1 + size);
if (ptr) {
void *orig_ptr = ptr;
ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
((void **)ptr)[-1] = orig_ptr;
}
return ptr;
}
void
libdeflate_aligned_free(void *ptr)
{
__free((((void **)ptr)[-1]));
}
#ifdef LOGGING
char* __debug_log = 0;
char __debug_log_pos = 0;
__attribute__((visibility("default"))) char* __get_debug_log() {
return __debug_log;
}
void __debug(char* str) {
if (!__debug_log) {
__debug_log = __malloc(1024);
}
int i = 0;
while (str[i] != '\0') {
__debug_log[__debug_log_pos++] = str[i++];
}
__debug_log[__debug_log_pos++] = '\n';
__debug_log[__debug_log_pos] = '\0';
}
#endif

View file

@ -0,0 +1,29 @@
{
"name": "@mtcute/wasm",
"private": true,
"version": "0.1.0",
"description": "WASM implementation of common algorithms used in Telegram",
"author": "Alina Sireneva <alina@tei.su>",
"license": "MIT",
"main": "src/index.ts",
"type": "module",
"scripts": {
"test": "mocha \"tests/**/*.spec.ts\"",
"docs": "typedoc",
"build": "pnpm run -w build-package wasm",
"build:wasm": "docker build --output=lib --target=binaries lib"
},
"browser": {
"./cjs/init.js": "./cjs/init.web.js",
"./esm/init.js": "./esm/init.web.js"
},
"distOnlyFields": {
"exports": {
".": {
"import": "./esm/index.js",
"require": "./cjs/index.js"
},
"./mtcute.wasm": "./mtcute.wasm"
}
}
}

213
packages/wasm/src/index.ts Normal file
View file

@ -0,0 +1,213 @@
import { loadWasmBinary } from './init.js'
import { InitInput, MtcuteWasmModule, SyncInitInput } from './types.js'
export * from './types.js'
let wasm!: MtcuteWasmModule
let compressor!: number
let decompressor!: number
let cachedUint8Memory: Uint8Array | null = null
function initCommon() {
compressor = wasm.libdeflate_alloc_compressor(6)
decompressor = wasm.libdeflate_alloc_decompressor()
}
function getUint8Memory() {
if (cachedUint8Memory === null || cachedUint8Memory.byteLength === 0) {
cachedUint8Memory = new Uint8Array(wasm.memory.buffer)
}
return cachedUint8Memory
}
/**
* Init the WASM blob synchronously (e.g. by passing a `WebAssembly.Module` instance)
*/
export function initSync(module: SyncInitInput): void {
if (wasm !== undefined) return
if (!(module instanceof WebAssembly.Module)) {
module = new WebAssembly.Module(module)
}
const instance = new WebAssembly.Instance(module)
wasm = instance.exports as unknown as MtcuteWasmModule
initCommon()
}
/**
* Init the WASM blob asynchronously (e.g. by passing a URL to the WASM file)
*
* By default, will try to determine the best way to load the WASM file automatically.
*/
export async function initAsync(input?: InitInput): Promise<void> {
if (wasm !== undefined) return
const instance = await loadWasmBinary(input)
wasm = instance.exports as unknown as MtcuteWasmModule
initCommon()
}
/**
* Deflate some data with zlib headers and max output size
*
* @returns null if the compressed data is larger than `size`, otherwise the compressed data
*/
export function deflateMaxSize(bytes: Uint8Array, size: number): Uint8Array | null {
const outputPtr = wasm.__malloc(size)
const inputPtr = wasm.__malloc(bytes.length)
getUint8Memory().set(bytes, inputPtr)
const written = wasm.libdeflate_zlib_compress(compressor, inputPtr, bytes.length, outputPtr, size)
wasm.__free(inputPtr)
if (written === 0) {
wasm.__free(outputPtr)
return null
}
const result = getUint8Memory().slice(outputPtr, outputPtr + written)
wasm.__free(outputPtr)
return result
}
/**
* Try to decompress some data with zlib headers
*
* @throws Error if the data is invalid
* @param defaultCapacity default capacity of the output buffer. Defaults to `bytes.length * 2`
*/
export function gunzip(bytes: Uint8Array): Uint8Array {
const inputPtr = wasm.__malloc(bytes.length)
getUint8Memory().set(bytes, inputPtr)
const size = wasm.libdeflate_gzip_get_output_size(inputPtr, bytes.length)
const outputPtr = wasm.__malloc(size)
const ret = wasm.libdeflate_gzip_decompress(decompressor, inputPtr, bytes.length, outputPtr, size)
if (ret === -1) throw new Error('gunzip error -- bad data')
if (ret === -2) throw new Error('gunzip error -- short output')
if (ret === -3) throw new Error('gunzip error -- short input') // should never happen
const result = getUint8Memory().slice(outputPtr, outputPtr + size)
wasm.__free(inputPtr)
wasm.__free(outputPtr)
return result
}
/**
* Pefrorm AES-IGE-256 encryption
*
* @param data data to encrypt (must be a multiple of 16 bytes)
* @param key encryption key (32 bytes)
* @param iv initialization vector (32 bytes)
*/
export function ige256Encrypt(data: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array {
const ptr = wasm.__malloc(key.length + iv.length + data.length + data.length)
const keyPtr = ptr
const ivPtr = ptr + key.length
const inputPtr = ivPtr + iv.length
const outputPtr = inputPtr + data.length
const mem = getUint8Memory()
mem.set(data, inputPtr)
mem.set(key, keyPtr)
mem.set(iv, ivPtr)
wasm.ige256_encrypt(inputPtr, data.length, keyPtr, ivPtr, outputPtr)
const result = getUint8Memory().slice(outputPtr, outputPtr + data.length)
wasm.__free(ptr)
return result
}
/**
* Pefrorm AES-IGE-256 decryption
*
* @param data data to decrypt (must be a multiple of 16 bytes)
* @param key encryption key (32 bytes)
* @param iv initialization vector (32 bytes)
*/
export function ige256Decrypt(data: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array {
const ptr = wasm.__malloc(key.length + iv.length + data.length + data.length)
const keyPtr = ptr
const ivPtr = ptr + key.length
const inputPtr = ivPtr + iv.length
const outputPtr = inputPtr + data.length
const mem = getUint8Memory()
mem.set(data, inputPtr)
mem.set(key, keyPtr)
mem.set(iv, ivPtr)
wasm.ige256_decrypt(inputPtr, data.length, keyPtr, ivPtr, outputPtr)
const result = getUint8Memory().slice(outputPtr, outputPtr + data.length)
wasm.__free(ptr)
return result
}
/**
* Create a context for AES-CTR-256 en/decryption
*
* > **Note**: `freeCtr256` must be called on the returned context when it's no longer needed
*/
export function createCtr256(key: Uint8Array, iv: Uint8Array) {
const keyPtr = wasm.__malloc(key.length)
const ivPtr = wasm.__malloc(iv.length)
getUint8Memory().set(key, keyPtr)
getUint8Memory().set(iv, ivPtr)
const ctx = wasm.ctr256_alloc(keyPtr, ivPtr)
// pointers are "moved" and will be handled by c code
return ctx
}
/**
* Release a context for AES-CTR-256 en/decryption
*/
export function freeCtr256(ctx: number) {
wasm.ctr256_free(ctx)
}
/**
* Pefrorm AES-CTR-256 en/decryption
*
* @param ctx context returned by `createCtr256`
* @param data data to en/decrypt (must be a multiple of 16 bytes)
*/
export function ctr256(ctx: number, data: Uint8Array): Uint8Array {
const { __malloc, __free } = wasm
const inputPtr = __malloc(data.length)
const outputPtr = __malloc(data.length)
const mem = getUint8Memory()
mem.set(data, inputPtr)
wasm.ctr256(ctx, inputPtr, data.length, outputPtr)
const result = mem.slice(outputPtr, outputPtr + data.length)
__free(outputPtr)
return result
}
/**
* Get the WASM module instance.
*
* For debugging and testing purposes only
*/
export function __getWasm(): MtcuteWasmModule {
return wasm
}

24
packages/wasm/src/init.ts Normal file
View file

@ -0,0 +1,24 @@
/* eslint-disable no-restricted-imports */
import { readFile } from 'fs/promises'
import { join } from 'path'
import { InitInput } from './types.js'
// @only-if-esm
const __dirname = new URL('.', import.meta.url).pathname
// @/only-if-esm
export async function loadWasmBinary(input?: InitInput): Promise<WebAssembly.Instance> {
if (typeof input === 'undefined') {
input = join(__dirname, '../lib/mtcute.wasm')
}
if (typeof input !== 'string') {
throw new Error('Invalid input, for Node.js pass path to wasm blob')
}
const module = new WebAssembly.Module(await readFile(input))
const instance = new WebAssembly.Instance(module)
return instance
}

View file

@ -0,0 +1,42 @@
import { InitInput } from './types.js'
export async function loadWasmBinary(input?: InitInput): Promise<WebAssembly.Instance> {
if (typeof input === 'undefined') {
input = new URL('../mtcute.wasm', import.meta.url)
}
if (
typeof input === 'string' ||
(typeof Request === 'function' && input instanceof Request) ||
(typeof URL === 'function' && input instanceof URL)
) {
input = await fetch(input)
}
if (typeof Response === 'function' && input instanceof Response) {
if (typeof WebAssembly.instantiateStreaming === 'function') {
try {
const { instance } = await WebAssembly.instantiateStreaming(input)
return instance
} catch (e) {
if (input.headers.get('Content-Type') !== 'application/wasm') {
console.warn(
'`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n',
e,
)
} else {
throw e
}
}
}
const bytes = await input.arrayBuffer()
const { instance } = await WebAssembly.instantiate(bytes)
return instance
}
return await WebAssembly.instantiate(input)
}

View file

@ -0,0 +1,24 @@
export interface MtcuteWasmModule {
memory: WebAssembly.Memory
__malloc: (size: number) => number
__free: (ptr: number) => void
libdeflate_alloc_decompressor: () => number
libdeflate_alloc_compressor: (level: number) => number
/** @returns if !=0 - error */
libdeflate_gzip_decompress: (ctx: number, src: number, srcLen: number, dst: number, dstLen: number) => number
libdeflate_gzip_get_output_size: (src: number, srcLen: number) => number
libdeflate_zlib_compress: (ctx: number, src: number, srcLen: number, dst: number, dstLen: number) => number
ige256_encrypt: (data: number, dataLen: number, key: number, iv: number, out: number) => void
ige256_decrypt: (data: number, dataLen: number, key: number, iv: number, out: number) => void
ctr256_alloc: (key: number, iv: number) => number
ctr256_free: (ctx: number) => void
ctr256: (ctx: number, data: number, dataLen: number, out: number) => number
}
export type SyncInitInput = BufferSource | WebAssembly.Module
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module

View file

@ -0,0 +1,21 @@
import { expect } from 'chai'
import { __getWasm, initAsync } from '../src/index.js'
before(async () => {
await initAsync()
})
describe('allocator', () => {
it('should not leak memory', () => {
const wasm = __getWasm()
const memUsage = wasm.memory.buffer.byteLength
for (let i = 0; i < 1024; i++) {
const ptr = wasm.__malloc(1024)
wasm.__free(ptr)
}
expect(wasm.memory.buffer.byteLength).to.equal(memUsage)
})
})

View file

@ -0,0 +1,149 @@
/* eslint-disable no-restricted-globals */
import { expect } from 'chai'
import { before, describe } from 'mocha'
import { __getWasm, createCtr256, ctr256, freeCtr256, initAsync } from '../src/index.js'
before(async () => {
await initAsync()
})
describe('aes-ctr', () => {
const key = Buffer.from('603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4', 'hex')
const iv = Buffer.from('F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF', 'hex')
describe('NIST', () => {
// https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Standards-and-Guidelines/documents/examples/AES_CTR.pdf
const data = Buffer.from(
`6BC1BEE2 2E409F96 E93D7E11 7393172A
AE2D8A57 1E03AC9C 9EB76FAC 45AF8E51
30C81C46 A35CE411 E5FBC119 1A0A52EF
F69F2445 DF4F9B17 AD2B417B E66C3710`.replace(/\s/g, ''),
'hex',
)
const dataEnc = Buffer.from(
`601EC313 775789A5 B7A7F504 BBF3D228
F443E3CA 4D62B59A CA84E990 CACAF5C5
2B0930DA A23DE94C E87017BA 2D84988D
DFC9C58D B67AADA6 13C2DD08 457941A6`.replace(/\s/g, ''),
'hex',
)
it('should correctly encrypt', () => {
const ctr = createCtr256(key, iv)
const res = ctr256(ctr, data)
freeCtr256(ctr)
expect(Buffer.from(res).toString('hex')).to.equal(dataEnc.toString('hex'))
})
it('should correctly decrypt', () => {
const ctr = createCtr256(key, iv)
const res = ctr256(ctr, dataEnc)
freeCtr256(ctr)
expect(Buffer.from(res).toString('hex')).to.equal(data.toString('hex'))
})
})
describe('stream', () => {
const data = Buffer.from('6BC1BEE22E409F96E93D7E117393172A', 'hex')
const dataEnc1 = Buffer.from('601ec313775789a5b7a7f504bbf3d228', 'hex')
const dataEnc2 = Buffer.from('31afd77f7d218690bd0ef82dfcf66cbe', 'hex')
const dataEnc3 = Buffer.from('7000927e2f2192cbe4b6a8b2441ddd48', 'hex')
it('should correctly encrypt', () => {
const ctr = createCtr256(key, iv)
const res1 = ctr256(ctr, data)
const res2 = ctr256(ctr, data)
const res3 = ctr256(ctr, data)
freeCtr256(ctr)
expect(Buffer.from(res1).toString('hex')).to.equal(dataEnc1.toString('hex'))
expect(Buffer.from(res2).toString('hex')).to.equal(dataEnc2.toString('hex'))
expect(Buffer.from(res3).toString('hex')).to.equal(dataEnc3.toString('hex'))
})
it('should correctly decrypt', () => {
const ctr = createCtr256(key, iv)
const res1 = ctr256(ctr, dataEnc1)
const res2 = ctr256(ctr, dataEnc2)
const res3 = ctr256(ctr, dataEnc3)
freeCtr256(ctr)
expect(Buffer.from(res1).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res2).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res3).toString('hex')).to.equal(data.toString('hex'))
})
})
describe('stream (unaligned)', () => {
const data = Buffer.from('6BC1BEE22E40', 'hex')
const dataEnc1 = Buffer.from('601ec3137757', 'hex')
const dataEnc2 = Buffer.from('7df2e078a555', 'hex')
const dataEnc3 = Buffer.from('a3a17be0742e', 'hex')
const dataEnc4 = Buffer.from('025ced833746', 'hex')
const dataEnc5 = Buffer.from('3ff238dea125', 'hex')
const dataEnc6 = Buffer.from('1055a52302dc', 'hex')
it('should correctly encrypt', () => {
const ctr = createCtr256(key, iv)
const res1 = ctr256(ctr, data)
const res2 = ctr256(ctr, data)
const res3 = ctr256(ctr, data)
const res4 = ctr256(ctr, data)
const res5 = ctr256(ctr, data)
const res6 = ctr256(ctr, data)
freeCtr256(ctr)
expect(Buffer.from(res1).toString('hex')).to.equal(dataEnc1.toString('hex'))
expect(Buffer.from(res2).toString('hex')).to.equal(dataEnc2.toString('hex'))
expect(Buffer.from(res3).toString('hex')).to.equal(dataEnc3.toString('hex'))
expect(Buffer.from(res4).toString('hex')).to.equal(dataEnc4.toString('hex'))
expect(Buffer.from(res5).toString('hex')).to.equal(dataEnc5.toString('hex'))
expect(Buffer.from(res6).toString('hex')).to.equal(dataEnc6.toString('hex'))
})
it('should correctly decrypt', () => {
const ctr = createCtr256(key, iv)
const res1 = ctr256(ctr, dataEnc1)
const res2 = ctr256(ctr, dataEnc2)
const res3 = ctr256(ctr, dataEnc3)
const res4 = ctr256(ctr, dataEnc4)
const res5 = ctr256(ctr, dataEnc5)
const res6 = ctr256(ctr, dataEnc6)
freeCtr256(ctr)
expect(Buffer.from(res1).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res2).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res3).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res4).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res5).toString('hex')).to.equal(data.toString('hex'))
expect(Buffer.from(res6).toString('hex')).to.equal(data.toString('hex'))
})
})
it('should not leak memory', () => {
const data = Buffer.from('6BC1BEE22E409F96E93D7E117393172A', 'hex')
const mem = __getWasm().memory.buffer
const memSize = mem.byteLength
for (let i = 0; i < 100; i++) {
const ctrEnc = createCtr256(key, iv)
const ctrDec = createCtr256(key, iv)
for (let i = 0; i < 100; i++) {
ctr256(ctrDec, ctr256(ctrEnc, data))
}
freeCtr256(ctrEnc)
freeCtr256(ctrDec)
}
expect(mem.byteLength).to.equal(memSize)
})
})

View file

@ -0,0 +1,46 @@
/* eslint-disable no-restricted-globals */
import { expect } from 'chai'
import { before, describe } from 'mocha'
import { gzipSync } from 'zlib'
import { __getWasm, gunzip, initAsync } from '../src/index.js'
before(async () => {
await initAsync()
})
describe('gunzip', () => {
it('should correctly read zlib headers', () => {
const wasm = __getWasm()
const data = gzipSync(Buffer.from('hello world'))
const inputPtr = wasm.__malloc(data.length)
new Uint8Array(wasm.memory.buffer).set(data, inputPtr)
expect(wasm.libdeflate_gzip_get_output_size(inputPtr, data.length)).to.equal(11)
})
it('should correctly inflate', () => {
const data = Array.from({ length: 1000 }, () => 'a').join('')
const res = gzipSync(Buffer.from(data))
expect(res).not.to.be.null
expect(res.length).to.be.lessThan(100)
expect(gunzip(res)).to.deep.equal(new Uint8Array(Buffer.from(data)))
})
it('should not leak memory', () => {
const memSize = __getWasm().memory.buffer.byteLength
for (let i = 0; i < 100; i++) {
const data = Array.from({ length: 1000 }, () => 'a').join('')
const deflated = gzipSync(Buffer.from(data))
const res = gunzip(deflated)
expect(Buffer.from(res).toString()).to.equal(data)
}
expect(__getWasm().memory.buffer.byteLength).to.equal(memSize)
})
})

View file

@ -0,0 +1,40 @@
/* eslint-disable no-restricted-globals */
import { expect } from 'chai'
import { before, describe } from 'mocha'
import { __getWasm, ige256Decrypt, ige256Encrypt, initAsync } from '../src/index.js'
before(async () => {
await initAsync()
})
describe('aes-ige', () => {
const key = Buffer.from('5468697320697320616E20696D706C655468697320697320616E20696D706C65', 'hex')
const iv = Buffer.from('6D656E746174696F6E206F6620494745206D6F646520666F72204F70656E5353', 'hex')
const data = Buffer.from('99706487a1cde613bc6de0b6f24b1c7aa448c8b9c3403e3467a8cad89340f53b', 'hex')
const dataEnc = Buffer.from('792ea8ae577b1a66cb3bd92679b8030ca54ee631976bd3a04547fdcb4639fa69', 'hex')
it('should correctly encrypt', () => {
const aes = ige256Encrypt(data, key, iv)
expect(Buffer.from(aes).toString('hex')).to.equal(dataEnc.toString('hex'))
})
it('should correctly decrypt', () => {
const aes = ige256Decrypt(dataEnc, key, iv)
expect(Buffer.from(aes).toString('hex')).to.equal(data.toString('hex'))
})
it('should not leak memory', () => {
const mem = __getWasm().memory.buffer
const memSize = mem.byteLength
for (let i = 0; i < 10000; i++) {
ige256Decrypt(ige256Encrypt(data, key, iv), key, iv)
}
expect(mem.byteLength).to.equal(memSize)
})
})

View file

@ -0,0 +1,9 @@
{
"extends": "../../../tsconfig.json",
"include": [
"."
],
"references": [
{ "path": "../" }
]
}

View file

@ -0,0 +1,49 @@
/* eslint-disable no-restricted-globals */
import { expect } from 'chai'
import { before, describe } from 'mocha'
import { inflateSync } from 'zlib'
import { __getWasm, deflateMaxSize, initAsync } from '../src/index.js'
before(async () => {
await initAsync()
})
describe('zlib deflate', () => {
it('should add zlib headers', () => {
const res = deflateMaxSize(Buffer.from('hello world'), 100)
expect(res).not.to.be.null
expect(res!.slice(0, 2)).to.deep.equal(Buffer.from([0x78, 0x9c]))
})
it('should return null if compressed data is larger than size', () => {
const res = deflateMaxSize(Buffer.from('hello world'), 1)
expect(res).to.be.null
})
it('should correctly deflate', () => {
const data = Array.from({ length: 1000 }, () => 'a').join('')
const res = deflateMaxSize(Buffer.from(data), 100)
expect(res).not.to.be.null
expect(res!.length).to.be.lessThan(100)
expect(inflateSync(res!)).to.deep.equal(Buffer.from(data))
})
it('should not leak memory', () => {
const memSize = __getWasm().memory.buffer.byteLength
for (let i = 0; i < 100; i++) {
const data = Array.from({ length: 1000 }, () => 'a').join('')
const deflated = deflateMaxSize(Buffer.from(data), 100)
const res = inflateSync(deflated!)
expect(Buffer.from(res).toString()).to.equal(data)
}
expect(__getWasm().memory.buffer.byteLength).to.equal(memSize)
})
})

View file

@ -0,0 +1,10 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist/esm",
"rootDir": "./src"
},
"include": [
"./src",
]
}

View file

@ -0,0 +1,4 @@
module.exports = {
extends: ['../../typedoc.base.cjs'],
entryPoints: ['./src/index.ts'],
}

View file

@ -126,6 +126,9 @@ importers:
'@mtcute/tl-runtime':
specifier: workspace:^
version: link:../tl-runtime
'@mtcute/wasm':
specifier: workspace:^
version: link:../wasm
'@types/events':
specifier: 3.0.0
version: 3.0.0
@ -139,9 +142,6 @@ importers:
specifier: 5.2.3
version: 5.2.3
devDependencies:
'@cryptography/aes':
specifier: ^0.1.1
version: 0.1.1
'@types/ws':
specifier: 8.5.4
version: 8.5.4
@ -312,13 +312,6 @@ importers:
long:
specifier: 5.2.3
version: 5.2.3
pako:
specifier: 2.1.0
version: 2.1.0
devDependencies:
'@types/pako':
specifier: 2.0.0
version: 2.0.0
packages/tl-utils:
dependencies:
@ -330,6 +323,8 @@ importers:
specifier: workspace:^
version: link:../tl-runtime
packages/wasm: {}
packages:
/@aashutoshrathi/word-wrap@1.2.6:
@ -702,10 +697,6 @@ packages:
chalk: 4.1.2
dev: true
/@cryptography/aes@0.1.1:
resolution: {integrity: sha512-PcYz4FDGblO6tM2kSC+VzhhK62vml6k6/YAkiWtyPvrgJVfnDRoHGDtKn5UiaRRUrvUTTocBpvc2rRgTCqxjsg==}
dev: true
/@cspotcode/source-map-support@0.8.1:
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
engines: {node: '>=12'}
@ -968,10 +959,6 @@ packages:
resolution: {integrity: sha512-Gj7cI7z+98M282Tqmp2K5EIsoouUEzbBJhQQzDE3jSIRk6r9gsz0oUokqIUR4u1R3dMHo0pDHM7sNOHyhulypw==}
dev: true
/@types/pako@2.0.0:
resolution: {integrity: sha512-10+iaz93qR5WYxTo+PMifD5TSxiOtdRaxBf7INGGXMQgTCu8Z/7GYWYFUOS3q/G0nE5boj1r4FEB+WSy7s5gbA==}
dev: true
/@types/semver@7.5.0:
resolution: {integrity: sha512-G8hZ6XJiHnuhQKR7ZmysCeJWE08o8T0AXtk5darsCaTVsYZhhgUrq53jizaR2FvsoeCwJhlmwTjkXBY5Pn/ZHw==}
dev: true
@ -4311,10 +4298,6 @@ packages:
release-zalgo: 1.0.0
dev: true
/pako@2.1.0:
resolution: {integrity: sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==}
dev: false
/parent-module@1.0.1:
resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
engines: {node: '>=6'}

View file

@ -50,14 +50,10 @@ const buildConfig = {
})
}
console.log(config)
return config
})(),
}
console.log(buildConfig)
function buildPackageJson() {
const pkgJson = JSON.parse(fs.readFileSync(path.join(packageDir, 'package.json'), 'utf-8'))