mtcute/packages/wasm/lib/libdeflate/ht_matchfinder.h
2023-11-04 06:44:18 +03:00

234 lines
6.9 KiB
C

/*
* ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
*
* Copyright 2022 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ---------------------------------------------------------------------------
*
* This is a Hash Table (ht) matchfinder.
*
* This is a variant of the Hash Chains (hc) matchfinder that is optimized for
* very fast compression. The ht_matchfinder stores the hash chains inline in
* the hash table, whereas the hc_matchfinder stores them in a separate array.
* Storing the hash chains inline is the faster method when max_search_depth
* (the maximum chain length) is very small. It is not appropriate when
* max_search_depth is larger, as then it uses too much memory.
*
* Due to its focus on speed, the ht_matchfinder doesn't support length 3
* matches. It also doesn't allow max_search_depth to vary at runtime; it is
* fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
*
* See hc_matchfinder.h for more information.
*/
#ifndef LIB_HT_MATCHFINDER_H
#define LIB_HT_MATCHFINDER_H
#include "matchfinder_common.h"
#define HT_MATCHFINDER_HASH_ORDER 15
#define HT_MATCHFINDER_BUCKET_SIZE 2
#define HT_MATCHFINDER_MIN_MATCH_LEN 4
/* Minimum value of max_len for ht_matchfinder_longest_match() */
#define HT_MATCHFINDER_REQUIRED_NBYTES 5
struct MATCHFINDER_ALIGNED ht_matchfinder {
mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
[HT_MATCHFINDER_BUCKET_SIZE];
};
static void
ht_matchfinder_init(struct ht_matchfinder *mf)
{
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
}
static void
ht_matchfinder_slide_window(struct ht_matchfinder *mf)
{
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
}
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
static u32
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 * const in_next,
const u32 max_len,
const u32 nice_len,
u32 * const next_hash,
u32 * const offset_ret)
{
u32 best_len = 0;
const u8 *best_matchptr = in_next;
u32 cur_pos = in_next - *in_base_p;
const u8 *in_base;
mf_pos_t cutoff;
u32 hash;
u32 seq;
mf_pos_t cur_node;
const u8 *matchptr;
#if HT_MATCHFINDER_BUCKET_SIZE > 1
mf_pos_t to_insert;
u32 len;
#endif
#if HT_MATCHFINDER_BUCKET_SIZE > 2
int i;
#endif
/* This is assumed throughout this function. */
STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
ht_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos = 0;
}
in_base = *in_base_p;
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
hash = *next_hash;
STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
HT_MATCHFINDER_HASH_ORDER);
seq = load_u32_unaligned(in_next);
prefetchw(&mf->hash_tab[*next_hash]);
#if HT_MATCHFINDER_BUCKET_SIZE == 1
/* Hand-unrolled version for BUCKET_SIZE == 1 */
cur_node = mf->hash_tab[hash][0];
mf->hash_tab[hash][0] = cur_pos;
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq) {
best_len = lz_extend(in_next, matchptr, 4, max_len);
best_matchptr = matchptr;
}
#elif HT_MATCHFINDER_BUCKET_SIZE == 2
/*
* Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
* differs slightly in that it copies the first entry to the second even
* if nice_len is reached on the first, as this can be slightly faster.
*/
cur_node = mf->hash_tab[hash][0];
mf->hash_tab[hash][0] = cur_pos;
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
to_insert = cur_node;
cur_node = mf->hash_tab[hash][1];
mf->hash_tab[hash][1] = to_insert;
if (load_u32_unaligned(matchptr) == seq) {
best_len = lz_extend(in_next, matchptr, 4, max_len);
best_matchptr = matchptr;
if (cur_node <= cutoff || best_len >= nice_len)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq &&
load_u32_unaligned(matchptr + best_len - 3) ==
load_u32_unaligned(in_next + best_len - 3)) {
len = lz_extend(in_next, matchptr, 4, max_len);
if (len > best_len) {
best_len = len;
best_matchptr = matchptr;
}
}
} else {
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq) {
best_len = lz_extend(in_next, matchptr, 4, max_len);
best_matchptr = matchptr;
}
}
#else
/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
to_insert = cur_pos;
for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
cur_node = mf->hash_tab[hash][i];
mf->hash_tab[hash][i] = to_insert;
if (cur_node <= cutoff)
goto out;
matchptr = &in_base[cur_node];
if (load_u32_unaligned(matchptr) == seq) {
len = lz_extend(in_next, matchptr, 4, max_len);
if (len > best_len) {
best_len = len;
best_matchptr = matchptr;
if (best_len >= nice_len)
goto out;
}
}
to_insert = cur_node;
}
#endif
out:
*offset_ret = in_next - best_matchptr;
return best_len;
}
static void
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 *in_next,
const u8 * const in_end,
const u32 count,
u32 * const next_hash)
{
s32 cur_pos = in_next - *in_base_p;
u32 hash;
u32 remaining = count;
int i;
if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
return;
if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
ht_matchfinder_slide_window(mf);
*in_base_p += MATCHFINDER_WINDOW_SIZE;
cur_pos -= MATCHFINDER_WINDOW_SIZE;
}
hash = *next_hash;
do {
for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
mf->hash_tab[hash][0] = cur_pos;
hash = lz_hash(get_unaligned_le32(++in_next),
HT_MATCHFINDER_HASH_ORDER);
cur_pos++;
} while (--remaining);
prefetchw(&mf->hash_tab[hash]);
*next_hash = hash;
}
#endif /* LIB_HT_MATCHFINDER_H */