mtcute/packages/wasm/lib/common_defs.h
2023-11-04 06:44:18 +03:00

686 lines
19 KiB
C

/*
* common_defs.h
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef COMMON_DEFS_H
#define COMMON_DEFS_H
#include "libdeflate.h"
#include <stdbool.h>
#include <stddef.h> /* for size_t */
#include <stdint.h>
#ifdef _MSC_VER
# include <intrin.h> /* for _BitScan*() and other intrinsics */
# include <stdlib.h> /* for _byteswap_*() */
/* Disable MSVC warnings that are expected. */
/* /W2 */
# pragma warning(disable : 4146) /* unary minus on unsigned type */
/* /W3 */
# pragma warning(disable : 4018) /* signed/unsigned mismatch */
# pragma warning(disable : 4244) /* possible loss of data */
# pragma warning(disable : 4267) /* possible loss of precision */
# pragma warning(disable : 4310) /* cast truncates constant value */
/* /W4 */
# pragma warning(disable : 4100) /* unreferenced formal parameter */
# pragma warning(disable : 4127) /* conditional expression is constant */
# pragma warning(disable : 4189) /* local variable initialized but not referenced */
# pragma warning(disable : 4232) /* nonstandard extension used */
# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */
# pragma warning(disable : 4295) /* array too small to include terminating null */
#endif
/* ========================================================================== */
/* Target architecture */
/* ========================================================================== */
/* If possible, define a compiler-independent ARCH_* macro. */
#undef ARCH_X86_64
#undef ARCH_X86_32
#undef ARCH_ARM64
#undef ARCH_ARM32
#ifdef _MSC_VER
# if defined(_M_X64)
# define ARCH_X86_64
# elif defined(_M_IX86)
# define ARCH_X86_32
# elif defined(_M_ARM64)
# define ARCH_ARM64
# elif defined(_M_ARM)
# define ARCH_ARM32
# endif
#else
# if defined(__x86_64__)
# define ARCH_X86_64
# elif defined(__i386__)
# define ARCH_X86_32
# elif defined(__aarch64__)
# define ARCH_ARM64
# elif defined(__arm__)
# define ARCH_ARM32
# endif
#endif
/* ========================================================================== */
/* Type definitions */
/* ========================================================================== */
/* Fixed-width integer types */
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
/* ssize_t, if not available in <sys/types.h> */
#ifdef _MSC_VER
# ifdef _WIN64
typedef long long ssize_t;
# else
typedef long ssize_t;
# endif
#endif
/*
* Word type of the target architecture. Use 'size_t' instead of
* 'unsigned long' to account for platforms such as Windows that use 32-bit
* 'unsigned long' on 64-bit architectures.
*/
typedef size_t machine_word_t;
/* Number of bytes in a word */
#define WORDBYTES ((int)sizeof(machine_word_t))
/* Number of bits in a word */
#define WORDBITS (8 * WORDBYTES)
/* ========================================================================== */
/* Optional compiler features */
/* ========================================================================== */
/* Compiler version checks. Only use when absolutely necessary. */
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
# define GCC_PREREQ(major, minor) \
(__GNUC__ > (major) || \
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#else
# define GCC_PREREQ(major, minor) 0
#endif
#ifdef __clang__
# ifdef __apple_build_version__
# define CLANG_PREREQ(major, minor, apple_version) \
(__apple_build_version__ >= (apple_version))
# else
# define CLANG_PREREQ(major, minor, apple_version) \
(__clang_major__ > (major) || \
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
# endif
#else
# define CLANG_PREREQ(major, minor, apple_version) 0
#endif
/*
* Macros to check for compiler support for attributes and builtins. clang
* implements these macros, but gcc doesn't, so generally any use of one of
* these macros must also be combined with a gcc version check.
*/
#ifndef __has_attribute
# define __has_attribute(attribute) 0
#endif
#ifndef __has_builtin
# define __has_builtin(builtin) 0
#endif
/*
* restrict - hint that writes only occur through the given pointer.
*
* Don't use MSVC's __restrict, since it has nonstandard behavior.
* Standard restrict is okay, if it is supported.
*/
#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L)
# if defined(__GNUC__) || defined(__clang__)
# define restrict __restrict__
# else
# define restrict
# endif
#endif /* else assume 'restrict' is usable as-is */
/* likely(expr) - hint that an expression is usually true */
#if defined(__GNUC__) || __has_builtin(__builtin_expect)
# define likely(expr) __builtin_expect(!!(expr), 1)
#else
# define likely(expr) (expr)
#endif
/* unlikely(expr) - hint that an expression is usually false */
#if defined(__GNUC__) || __has_builtin(__builtin_expect)
# define unlikely(expr) __builtin_expect(!!(expr), 0)
#else
# define unlikely(expr) (expr)
#endif
/* prefetchr(addr) - prefetch into L1 cache for read */
#undef prefetchr
#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
# define prefetchr(addr) __builtin_prefetch((addr), 0)
#elif defined(_MSC_VER)
# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
# elif defined(ARCH_ARM64)
# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */)
# elif defined(ARCH_ARM32)
# define prefetchr(addr) __prefetch(addr)
# endif
#endif
#ifndef prefetchr
# define prefetchr(addr)
#endif
/* prefetchw(addr) - prefetch into L1 cache for write */
#undef prefetchw
#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
# define prefetchw(addr) __builtin_prefetch((addr), 1)
#elif defined(_MSC_VER)
# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
# define prefetchw(addr) _m_prefetchw(addr)
# elif defined(ARCH_ARM64)
# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */)
# elif defined(ARCH_ARM32)
# define prefetchw(addr) __prefetchw(addr)
# endif
#endif
#ifndef prefetchw
# define prefetchw(addr)
#endif
/*
* _aligned_attribute(n) - declare that the annotated variable, or variables of
* the annotated type, must be aligned on n-byte boundaries.
*/
#undef _aligned_attribute
#if defined(__GNUC__) || __has_attribute(aligned)
# define _aligned_attribute(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER)
# define _aligned_attribute(n) __declspec(align(n))
#endif
/*
* _target_attribute(attrs) - override the compilation target for a function.
*
* This accepts one or more comma-separated suffixes to the -m prefix jointly
* forming the name of a machine-dependent option. On gcc-like compilers, this
* enables codegen for the given targets, including arbitrary compiler-generated
* code as well as the corresponding intrinsics. On other compilers this macro
* expands to nothing, though MSVC allows intrinsics to be used anywhere anyway.
*/
#if GCC_PREREQ(4, 4) || __has_attribute(target)
# define _target_attribute(attrs) __attribute__((target(attrs)))
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1
#else
# define _target_attribute(attrs)
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
#endif
/* ========================================================================== */
/* Miscellaneous macros */
/* ========================================================================== */
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
/* ========================================================================== */
/* Endianness handling */
/* ========================================================================== */
/*
* CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big
* endian. When possible this is a compile-time macro that can be used in
* preprocessor conditionals. As a fallback, a generic method is used that
* can't be used in preprocessor conditionals but should still be optimized out.
*/
#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */
# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#elif defined(_MSC_VER)
# define CPU_IS_LITTLE_ENDIAN() true
#else
static bool CPU_IS_LITTLE_ENDIAN(void)
{
union {
u32 w;
u8 b;
} u;
u.w = 1;
return u.b;
}
#endif
/* bswap16(v) - swap the bytes of a 16-bit integer */
static u16 bswap16(u16 v)
{
#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
return __builtin_bswap16(v);
#elif defined(_MSC_VER)
return _byteswap_ushort(v);
#else
return (v << 8) | (v >> 8);
#endif
}
/* bswap32(v) - swap the bytes of a 32-bit integer */
static u32 bswap32(u32 v)
{
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
return __builtin_bswap32(v);
#elif defined(_MSC_VER)
return _byteswap_ulong(v);
#else
return ((v & 0x000000FF) << 24) |
((v & 0x0000FF00) << 8) |
((v & 0x00FF0000) >> 8) |
((v & 0xFF000000) >> 24);
#endif
}
/* bswap64(v) - swap the bytes of a 64-bit integer */
static u64 bswap64(u64 v)
{
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
return __builtin_bswap64(v);
#elif defined(_MSC_VER)
return _byteswap_uint64(v);
#else
return ((v & 0x00000000000000FF) << 56) |
((v & 0x000000000000FF00) << 40) |
((v & 0x0000000000FF0000) << 24) |
((v & 0x00000000FF000000) << 8) |
((v & 0x000000FF00000000) >> 8) |
((v & 0x0000FF0000000000) >> 24) |
((v & 0x00FF000000000000) >> 40) |
((v & 0xFF00000000000000) >> 56);
#endif
}
#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
/* ========================================================================== */
/* Unaligned memory accesses */
/* ========================================================================== */
/*
* UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed
* efficiently on the target platform, otherwise 0.
*/
#if (defined(__GNUC__) || defined(__clang__)) && \
(defined(ARCH_X86_64) || defined(ARCH_X86_32) || \
defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
/*
* For all compilation purposes, WebAssembly behaves like any other CPU
* instruction set. Even though WebAssembly engine might be running on
* top of different actual CPU architectures, the WebAssembly spec
* itself permits unaligned access and it will be fast on most of those
* platforms, and simulated at the engine level on others, so it's
* worth treating it as a CPU architecture with fast unaligned access.
*/ defined(__wasm__))
# define UNALIGNED_ACCESS_IS_FAST 1
#elif defined(_MSC_VER)
# define UNALIGNED_ACCESS_IS_FAST 1
#else
# define UNALIGNED_ACCESS_IS_FAST 0
#endif
/*
* Implementing unaligned memory accesses using memcpy() is portable, and it
* usually gets optimized appropriately by modern compilers. I.e., each
* memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store
* instruction, not to an actual function call.
*
* We no longer use the "packed struct" approach to unaligned accesses, as that
* is nonstandard, has unclear semantics, and doesn't receive enough testing
* (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
*
* arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
* where memcpy() generates inefficient code
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
* consider that one case important enough to maintain different code for.
* If you run into it, please just use a newer version of gcc (or use clang).
*/
/* Unaligned loads and stores without endianness conversion */
#define DEFINE_UNALIGNED_TYPE(type) \
static type \
load_##type##_unaligned(const void *p) \
{ \
type v; \
\
__builtin_memcpy(&v, p, sizeof(v)); \
return v; \
} \
\
static void \
store_##type##_unaligned(type v, void *p) \
{ \
__builtin_memcpy(p, &v, sizeof(v)); \
}
DEFINE_UNALIGNED_TYPE(u16)
DEFINE_UNALIGNED_TYPE(u32)
DEFINE_UNALIGNED_TYPE(u64)
DEFINE_UNALIGNED_TYPE(machine_word_t)
#define load_word_unaligned load_machine_word_t_unaligned
#define store_word_unaligned store_machine_word_t_unaligned
/* Unaligned loads with endianness conversion */
static u16
get_unaligned_le16(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return le16_bswap(load_u16_unaligned(p));
else
return ((u16)p[1] << 8) | p[0];
}
static u16
get_unaligned_be16(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return be16_bswap(load_u16_unaligned(p));
else
return ((u16)p[0] << 8) | p[1];
}
static u32
get_unaligned_le32(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return le32_bswap(load_u32_unaligned(p));
else
return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
((u32)p[1] << 8) | p[0];
}
static u32
get_unaligned_be32(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return be32_bswap(load_u32_unaligned(p));
else
return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
((u32)p[2] << 8) | p[3];
}
static u64
get_unaligned_le64(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return le64_bswap(load_u64_unaligned(p));
else
return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
((u64)p[5] << 40) | ((u64)p[4] << 32) |
((u64)p[3] << 24) | ((u64)p[2] << 16) |
((u64)p[1] << 8) | p[0];
}
static machine_word_t
get_unaligned_leword(const u8 *p)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return get_unaligned_le32(p);
else
return get_unaligned_le64(p);
}
/* Unaligned stores with endianness conversion */
static void
put_unaligned_le16(u16 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u16_unaligned(le16_bswap(v), p);
} else {
p[0] = (u8)(v >> 0);
p[1] = (u8)(v >> 8);
}
}
static void
put_unaligned_be16(u16 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u16_unaligned(be16_bswap(v), p);
} else {
p[0] = (u8)(v >> 8);
p[1] = (u8)(v >> 0);
}
}
static void
put_unaligned_le32(u32 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u32_unaligned(le32_bswap(v), p);
} else {
p[0] = (u8)(v >> 0);
p[1] = (u8)(v >> 8);
p[2] = (u8)(v >> 16);
p[3] = (u8)(v >> 24);
}
}
static void
put_unaligned_be32(u32 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u32_unaligned(be32_bswap(v), p);
} else {
p[0] = (u8)(v >> 24);
p[1] = (u8)(v >> 16);
p[2] = (u8)(v >> 8);
p[3] = (u8)(v >> 0);
}
}
static void
put_unaligned_le64(u64 v, u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u64_unaligned(le64_bswap(v), p);
} else {
p[0] = (u8)(v >> 0);
p[1] = (u8)(v >> 8);
p[2] = (u8)(v >> 16);
p[3] = (u8)(v >> 24);
p[4] = (u8)(v >> 32);
p[5] = (u8)(v >> 40);
p[6] = (u8)(v >> 48);
p[7] = (u8)(v >> 56);
}
}
static void
put_unaligned_leword(machine_word_t v, u8 *p)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
put_unaligned_le32(v, p);
else
put_unaligned_le64(v, p);
}
/* ========================================================================== */
/* Bit manipulation functions */
/* ========================================================================== */
/*
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
* significant end) of the *most* significant 1 bit in the input value. The
* input value must be nonzero!
*/
static unsigned
bsr32(u32 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_clz)
return 31 - __builtin_clz(v);
#elif defined(_MSC_VER)
unsigned long i;
_BitScanReverse(&i, v);
return i;
#else
unsigned i = 0;
while ((v >>= 1) != 0)
i++;
return i;
#endif
}
static unsigned
bsr64(u64 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_clzll)
return 63 - __builtin_clzll(v);
#elif defined(_MSC_VER) && defined(_WIN64)
unsigned long i;
_BitScanReverse64(&i, v);
return i;
#else
unsigned i = 0;
while ((v >>= 1) != 0)
i++;
return i;
#endif
}
static unsigned
bsrw(machine_word_t v)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsr32(v);
else
return bsr64(v);
}
/*
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
* significant end) of the *least* significant 1 bit in the input value. The
* input value must be nonzero!
*/
static unsigned
bsf32(u32 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_ctz)
return __builtin_ctz(v);
#elif defined(_MSC_VER)
unsigned long i;
_BitScanForward(&i, v);
return i;
#else
unsigned i = 0;
for (; (v & 1) == 0; v >>= 1)
i++;
return i;
#endif
}
static unsigned
bsf64(u64 v)
{
#if defined(__GNUC__) || __has_builtin(__builtin_ctzll)
return __builtin_ctzll(v);
#elif defined(_MSC_VER) && defined(_WIN64)
unsigned long i;
_BitScanForward64(&i, v);
return i;
#else
unsigned i = 0;
for (; (v & 1) == 0; v >>= 1)
i++;
return i;
#endif
}
static unsigned
bsfw(machine_word_t v)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsf32(v);
else
return bsf64(v);
}
/*
* rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a
* fallback implementation; use '#ifdef rbit32' to check if this is available.
*/
#undef rbit32
#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \
(__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
static u32
rbit32(u32 v)
{
__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
return v;
}
#define rbit32 rbit32
#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64)
static u32
rbit32(u32 v)
{
__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
return v;
}
#define rbit32 rbit32
#endif
#endif /* COMMON_DEFS_H */