685 lines
19 KiB
C
685 lines
19 KiB
C
|
/*
|
||
|
* common_defs.h
|
||
|
*
|
||
|
* Copyright 2016 Eric Biggers
|
||
|
*
|
||
|
* Permission is hereby granted, free of charge, to any person
|
||
|
* obtaining a copy of this software and associated documentation
|
||
|
* files (the "Software"), to deal in the Software without
|
||
|
* restriction, including without limitation the rights to use,
|
||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
* copies of the Software, and to permit persons to whom the
|
||
|
* Software is furnished to do so, subject to the following
|
||
|
* conditions:
|
||
|
*
|
||
|
* The above copyright notice and this permission notice shall be
|
||
|
* included in all copies or substantial portions of the Software.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
#ifndef COMMON_DEFS_H
|
||
|
#define COMMON_DEFS_H
|
||
|
|
||
|
#include <stdbool.h>
|
||
|
#include <stddef.h> /* for size_t */
|
||
|
#include <stdint.h>
|
||
|
#ifdef _MSC_VER
|
||
|
# include <intrin.h> /* for _BitScan*() and other intrinsics */
|
||
|
# include <stdlib.h> /* for _byteswap_*() */
|
||
|
/* Disable MSVC warnings that are expected. */
|
||
|
/* /W2 */
|
||
|
# pragma warning(disable : 4146) /* unary minus on unsigned type */
|
||
|
/* /W3 */
|
||
|
# pragma warning(disable : 4018) /* signed/unsigned mismatch */
|
||
|
# pragma warning(disable : 4244) /* possible loss of data */
|
||
|
# pragma warning(disable : 4267) /* possible loss of precision */
|
||
|
# pragma warning(disable : 4310) /* cast truncates constant value */
|
||
|
/* /W4 */
|
||
|
# pragma warning(disable : 4100) /* unreferenced formal parameter */
|
||
|
# pragma warning(disable : 4127) /* conditional expression is constant */
|
||
|
# pragma warning(disable : 4189) /* local variable initialized but not referenced */
|
||
|
# pragma warning(disable : 4232) /* nonstandard extension used */
|
||
|
# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */
|
||
|
# pragma warning(disable : 4295) /* array too small to include terminating null */
|
||
|
#endif
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Target architecture */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
/* If possible, define a compiler-independent ARCH_* macro. */
|
||
|
#undef ARCH_X86_64
|
||
|
#undef ARCH_X86_32
|
||
|
#undef ARCH_ARM64
|
||
|
#undef ARCH_ARM32
|
||
|
#ifdef _MSC_VER
|
||
|
# if defined(_M_X64)
|
||
|
# define ARCH_X86_64
|
||
|
# elif defined(_M_IX86)
|
||
|
# define ARCH_X86_32
|
||
|
# elif defined(_M_ARM64)
|
||
|
# define ARCH_ARM64
|
||
|
# elif defined(_M_ARM)
|
||
|
# define ARCH_ARM32
|
||
|
# endif
|
||
|
#else
|
||
|
# if defined(__x86_64__)
|
||
|
# define ARCH_X86_64
|
||
|
# elif defined(__i386__)
|
||
|
# define ARCH_X86_32
|
||
|
# elif defined(__aarch64__)
|
||
|
# define ARCH_ARM64
|
||
|
# elif defined(__arm__)
|
||
|
# define ARCH_ARM32
|
||
|
# endif
|
||
|
#endif
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Type definitions */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
/* Fixed-width integer types */
|
||
|
typedef uint8_t u8;
|
||
|
typedef uint16_t u16;
|
||
|
typedef uint32_t u32;
|
||
|
typedef uint64_t u64;
|
||
|
typedef int8_t s8;
|
||
|
typedef int16_t s16;
|
||
|
typedef int32_t s32;
|
||
|
typedef int64_t s64;
|
||
|
|
||
|
/* ssize_t, if not available in <sys/types.h> */
|
||
|
#ifdef _MSC_VER
|
||
|
# ifdef _WIN64
|
||
|
typedef long long ssize_t;
|
||
|
# else
|
||
|
typedef long ssize_t;
|
||
|
# endif
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Word type of the target architecture. Use 'size_t' instead of
|
||
|
* 'unsigned long' to account for platforms such as Windows that use 32-bit
|
||
|
* 'unsigned long' on 64-bit architectures.
|
||
|
*/
|
||
|
typedef size_t machine_word_t;
|
||
|
|
||
|
/* Number of bytes in a word */
|
||
|
#define WORDBYTES ((int)sizeof(machine_word_t))
|
||
|
|
||
|
/* Number of bits in a word */
|
||
|
#define WORDBITS (8 * WORDBYTES)
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Optional compiler features */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
/* Compiler version checks. Only use when absolutely necessary. */
|
||
|
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||
|
# define GCC_PREREQ(major, minor) \
|
||
|
(__GNUC__ > (major) || \
|
||
|
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||
|
#else
|
||
|
# define GCC_PREREQ(major, minor) 0
|
||
|
#endif
|
||
|
#ifdef __clang__
|
||
|
# ifdef __apple_build_version__
|
||
|
# define CLANG_PREREQ(major, minor, apple_version) \
|
||
|
(__apple_build_version__ >= (apple_version))
|
||
|
# else
|
||
|
# define CLANG_PREREQ(major, minor, apple_version) \
|
||
|
(__clang_major__ > (major) || \
|
||
|
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
|
||
|
# endif
|
||
|
#else
|
||
|
# define CLANG_PREREQ(major, minor, apple_version) 0
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Macros to check for compiler support for attributes and builtins. clang
|
||
|
* implements these macros, but gcc doesn't, so generally any use of one of
|
||
|
* these macros must also be combined with a gcc version check.
|
||
|
*/
|
||
|
#ifndef __has_attribute
|
||
|
# define __has_attribute(attribute) 0
|
||
|
#endif
|
||
|
#ifndef __has_builtin
|
||
|
# define __has_builtin(builtin) 0
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* restrict - hint that writes only occur through the given pointer.
|
||
|
*
|
||
|
* Don't use MSVC's __restrict, since it has nonstandard behavior.
|
||
|
* Standard restrict is okay, if it is supported.
|
||
|
*/
|
||
|
#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L)
|
||
|
# if defined(__GNUC__) || defined(__clang__)
|
||
|
# define restrict __restrict__
|
||
|
# else
|
||
|
# define restrict
|
||
|
# endif
|
||
|
#endif /* else assume 'restrict' is usable as-is */
|
||
|
|
||
|
/* likely(expr) - hint that an expression is usually true */
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_expect)
|
||
|
# define likely(expr) __builtin_expect(!!(expr), 1)
|
||
|
#else
|
||
|
# define likely(expr) (expr)
|
||
|
#endif
|
||
|
|
||
|
/* unlikely(expr) - hint that an expression is usually false */
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_expect)
|
||
|
# define unlikely(expr) __builtin_expect(!!(expr), 0)
|
||
|
#else
|
||
|
# define unlikely(expr) (expr)
|
||
|
#endif
|
||
|
|
||
|
/* prefetchr(addr) - prefetch into L1 cache for read */
|
||
|
#undef prefetchr
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
|
||
|
# define prefetchr(addr) __builtin_prefetch((addr), 0)
|
||
|
#elif defined(_MSC_VER)
|
||
|
# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
|
||
|
# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
|
||
|
# elif defined(ARCH_ARM64)
|
||
|
# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */)
|
||
|
# elif defined(ARCH_ARM32)
|
||
|
# define prefetchr(addr) __prefetch(addr)
|
||
|
# endif
|
||
|
#endif
|
||
|
#ifndef prefetchr
|
||
|
# define prefetchr(addr)
|
||
|
#endif
|
||
|
|
||
|
/* prefetchw(addr) - prefetch into L1 cache for write */
|
||
|
#undef prefetchw
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
|
||
|
# define prefetchw(addr) __builtin_prefetch((addr), 1)
|
||
|
#elif defined(_MSC_VER)
|
||
|
# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
|
||
|
# define prefetchw(addr) _m_prefetchw(addr)
|
||
|
# elif defined(ARCH_ARM64)
|
||
|
# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */)
|
||
|
# elif defined(ARCH_ARM32)
|
||
|
# define prefetchw(addr) __prefetchw(addr)
|
||
|
# endif
|
||
|
#endif
|
||
|
#ifndef prefetchw
|
||
|
# define prefetchw(addr)
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* _aligned_attribute(n) - declare that the annotated variable, or variables of
|
||
|
* the annotated type, must be aligned on n-byte boundaries.
|
||
|
*/
|
||
|
#undef _aligned_attribute
|
||
|
#if defined(__GNUC__) || __has_attribute(aligned)
|
||
|
# define _aligned_attribute(n) __attribute__((aligned(n)))
|
||
|
#elif defined(_MSC_VER)
|
||
|
# define _aligned_attribute(n) __declspec(align(n))
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* _target_attribute(attrs) - override the compilation target for a function.
|
||
|
*
|
||
|
* This accepts one or more comma-separated suffixes to the -m prefix jointly
|
||
|
* forming the name of a machine-dependent option. On gcc-like compilers, this
|
||
|
* enables codegen for the given targets, including arbitrary compiler-generated
|
||
|
* code as well as the corresponding intrinsics. On other compilers this macro
|
||
|
* expands to nothing, though MSVC allows intrinsics to be used anywhere anyway.
|
||
|
*/
|
||
|
#if GCC_PREREQ(4, 4) || __has_attribute(target)
|
||
|
# define _target_attribute(attrs) __attribute__((target(attrs)))
|
||
|
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1
|
||
|
#else
|
||
|
# define _target_attribute(attrs)
|
||
|
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
|
||
|
#endif
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Miscellaneous macros */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
|
||
|
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
||
|
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
||
|
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
|
||
|
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
|
||
|
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
|
||
|
#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Endianness handling */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
/*
|
||
|
* CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big
|
||
|
* endian. When possible this is a compile-time macro that can be used in
|
||
|
* preprocessor conditionals. As a fallback, a generic method is used that
|
||
|
* can't be used in preprocessor conditionals but should still be optimized out.
|
||
|
*/
|
||
|
#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */
|
||
|
# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||
|
#elif defined(_MSC_VER)
|
||
|
# define CPU_IS_LITTLE_ENDIAN() true
|
||
|
#else
|
||
|
static bool CPU_IS_LITTLE_ENDIAN(void)
|
||
|
{
|
||
|
union {
|
||
|
u32 w;
|
||
|
u8 b;
|
||
|
} u;
|
||
|
|
||
|
u.w = 1;
|
||
|
return u.b;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/* bswap16(v) - swap the bytes of a 16-bit integer */
|
||
|
static u16 bswap16(u16 v)
|
||
|
{
|
||
|
#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
|
||
|
return __builtin_bswap16(v);
|
||
|
#elif defined(_MSC_VER)
|
||
|
return _byteswap_ushort(v);
|
||
|
#else
|
||
|
return (v << 8) | (v >> 8);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/* bswap32(v) - swap the bytes of a 32-bit integer */
|
||
|
static u32 bswap32(u32 v)
|
||
|
{
|
||
|
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
|
||
|
return __builtin_bswap32(v);
|
||
|
#elif defined(_MSC_VER)
|
||
|
return _byteswap_ulong(v);
|
||
|
#else
|
||
|
return ((v & 0x000000FF) << 24) |
|
||
|
((v & 0x0000FF00) << 8) |
|
||
|
((v & 0x00FF0000) >> 8) |
|
||
|
((v & 0xFF000000) >> 24);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/* bswap64(v) - swap the bytes of a 64-bit integer */
|
||
|
static u64 bswap64(u64 v)
|
||
|
{
|
||
|
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
|
||
|
return __builtin_bswap64(v);
|
||
|
#elif defined(_MSC_VER)
|
||
|
return _byteswap_uint64(v);
|
||
|
#else
|
||
|
return ((v & 0x00000000000000FF) << 56) |
|
||
|
((v & 0x000000000000FF00) << 40) |
|
||
|
((v & 0x0000000000FF0000) << 24) |
|
||
|
((v & 0x00000000FF000000) << 8) |
|
||
|
((v & 0x000000FF00000000) >> 8) |
|
||
|
((v & 0x0000FF0000000000) >> 24) |
|
||
|
((v & 0x00FF000000000000) >> 40) |
|
||
|
((v & 0xFF00000000000000) >> 56);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
|
||
|
#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
|
||
|
#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
|
||
|
#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
|
||
|
#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
|
||
|
#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Unaligned memory accesses */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
/*
|
||
|
* UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed
|
||
|
* efficiently on the target platform, otherwise 0.
|
||
|
*/
|
||
|
#if (defined(__GNUC__) || defined(__clang__)) && \
|
||
|
(defined(ARCH_X86_64) || defined(ARCH_X86_32) || \
|
||
|
defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
|
||
|
/*
|
||
|
* For all compilation purposes, WebAssembly behaves like any other CPU
|
||
|
* instruction set. Even though WebAssembly engine might be running on
|
||
|
* top of different actual CPU architectures, the WebAssembly spec
|
||
|
* itself permits unaligned access and it will be fast on most of those
|
||
|
* platforms, and simulated at the engine level on others, so it's
|
||
|
* worth treating it as a CPU architecture with fast unaligned access.
|
||
|
*/ defined(__wasm__))
|
||
|
# define UNALIGNED_ACCESS_IS_FAST 1
|
||
|
#elif defined(_MSC_VER)
|
||
|
# define UNALIGNED_ACCESS_IS_FAST 1
|
||
|
#else
|
||
|
# define UNALIGNED_ACCESS_IS_FAST 0
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Implementing unaligned memory accesses using memcpy() is portable, and it
|
||
|
* usually gets optimized appropriately by modern compilers. I.e., each
|
||
|
* memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store
|
||
|
* instruction, not to an actual function call.
|
||
|
*
|
||
|
* We no longer use the "packed struct" approach to unaligned accesses, as that
|
||
|
* is nonstandard, has unclear semantics, and doesn't receive enough testing
|
||
|
* (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
|
||
|
*
|
||
|
* arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
|
||
|
* where memcpy() generates inefficient code
|
||
|
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
|
||
|
* consider that one case important enough to maintain different code for.
|
||
|
* If you run into it, please just use a newer version of gcc (or use clang).
|
||
|
*/
|
||
|
|
||
|
/* Unaligned loads and stores without endianness conversion */
|
||
|
|
||
|
#define DEFINE_UNALIGNED_TYPE(type) \
|
||
|
static type \
|
||
|
load_##type##_unaligned(const void *p) \
|
||
|
{ \
|
||
|
type v; \
|
||
|
\
|
||
|
__builtin_memcpy(&v, p, sizeof(v)); \
|
||
|
return v; \
|
||
|
} \
|
||
|
\
|
||
|
static void \
|
||
|
store_##type##_unaligned(type v, void *p) \
|
||
|
{ \
|
||
|
__builtin_memcpy(p, &v, sizeof(v)); \
|
||
|
}
|
||
|
|
||
|
DEFINE_UNALIGNED_TYPE(u16)
|
||
|
DEFINE_UNALIGNED_TYPE(u32)
|
||
|
DEFINE_UNALIGNED_TYPE(u64)
|
||
|
DEFINE_UNALIGNED_TYPE(machine_word_t)
|
||
|
|
||
|
#define load_word_unaligned load_machine_word_t_unaligned
|
||
|
#define store_word_unaligned store_machine_word_t_unaligned
|
||
|
|
||
|
/* Unaligned loads with endianness conversion */
|
||
|
|
||
|
static u16
|
||
|
get_unaligned_le16(const u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||
|
return le16_bswap(load_u16_unaligned(p));
|
||
|
else
|
||
|
return ((u16)p[1] << 8) | p[0];
|
||
|
}
|
||
|
|
||
|
static u16
|
||
|
get_unaligned_be16(const u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||
|
return be16_bswap(load_u16_unaligned(p));
|
||
|
else
|
||
|
return ((u16)p[0] << 8) | p[1];
|
||
|
}
|
||
|
|
||
|
static u32
|
||
|
get_unaligned_le32(const u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||
|
return le32_bswap(load_u32_unaligned(p));
|
||
|
else
|
||
|
return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
|
||
|
((u32)p[1] << 8) | p[0];
|
||
|
}
|
||
|
|
||
|
static u32
|
||
|
get_unaligned_be32(const u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||
|
return be32_bswap(load_u32_unaligned(p));
|
||
|
else
|
||
|
return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
|
||
|
((u32)p[2] << 8) | p[3];
|
||
|
}
|
||
|
|
||
|
static u64
|
||
|
get_unaligned_le64(const u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||
|
return le64_bswap(load_u64_unaligned(p));
|
||
|
else
|
||
|
return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
|
||
|
((u64)p[5] << 40) | ((u64)p[4] << 32) |
|
||
|
((u64)p[3] << 24) | ((u64)p[2] << 16) |
|
||
|
((u64)p[1] << 8) | p[0];
|
||
|
}
|
||
|
|
||
|
static machine_word_t
|
||
|
get_unaligned_leword(const u8 *p)
|
||
|
{
|
||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||
|
if (WORDBITS == 32)
|
||
|
return get_unaligned_le32(p);
|
||
|
else
|
||
|
return get_unaligned_le64(p);
|
||
|
}
|
||
|
|
||
|
/* Unaligned stores with endianness conversion */
|
||
|
|
||
|
static void
|
||
|
put_unaligned_le16(u16 v, u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||
|
store_u16_unaligned(le16_bswap(v), p);
|
||
|
} else {
|
||
|
p[0] = (u8)(v >> 0);
|
||
|
p[1] = (u8)(v >> 8);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
put_unaligned_be16(u16 v, u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||
|
store_u16_unaligned(be16_bswap(v), p);
|
||
|
} else {
|
||
|
p[0] = (u8)(v >> 8);
|
||
|
p[1] = (u8)(v >> 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
put_unaligned_le32(u32 v, u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||
|
store_u32_unaligned(le32_bswap(v), p);
|
||
|
} else {
|
||
|
p[0] = (u8)(v >> 0);
|
||
|
p[1] = (u8)(v >> 8);
|
||
|
p[2] = (u8)(v >> 16);
|
||
|
p[3] = (u8)(v >> 24);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
put_unaligned_be32(u32 v, u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||
|
store_u32_unaligned(be32_bswap(v), p);
|
||
|
} else {
|
||
|
p[0] = (u8)(v >> 24);
|
||
|
p[1] = (u8)(v >> 16);
|
||
|
p[2] = (u8)(v >> 8);
|
||
|
p[3] = (u8)(v >> 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
put_unaligned_le64(u64 v, u8 *p)
|
||
|
{
|
||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||
|
store_u64_unaligned(le64_bswap(v), p);
|
||
|
} else {
|
||
|
p[0] = (u8)(v >> 0);
|
||
|
p[1] = (u8)(v >> 8);
|
||
|
p[2] = (u8)(v >> 16);
|
||
|
p[3] = (u8)(v >> 24);
|
||
|
p[4] = (u8)(v >> 32);
|
||
|
p[5] = (u8)(v >> 40);
|
||
|
p[6] = (u8)(v >> 48);
|
||
|
p[7] = (u8)(v >> 56);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
put_unaligned_leword(machine_word_t v, u8 *p)
|
||
|
{
|
||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||
|
if (WORDBITS == 32)
|
||
|
put_unaligned_le32(v, p);
|
||
|
else
|
||
|
put_unaligned_le64(v, p);
|
||
|
}
|
||
|
|
||
|
/* ========================================================================== */
|
||
|
/* Bit manipulation functions */
|
||
|
/* ========================================================================== */
|
||
|
|
||
|
/*
|
||
|
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
|
||
|
* significant end) of the *most* significant 1 bit in the input value. The
|
||
|
* input value must be nonzero!
|
||
|
*/
|
||
|
|
||
|
static unsigned
|
||
|
bsr32(u32 v)
|
||
|
{
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_clz)
|
||
|
return 31 - __builtin_clz(v);
|
||
|
#elif defined(_MSC_VER)
|
||
|
unsigned long i;
|
||
|
|
||
|
_BitScanReverse(&i, v);
|
||
|
return i;
|
||
|
#else
|
||
|
unsigned i = 0;
|
||
|
|
||
|
while ((v >>= 1) != 0)
|
||
|
i++;
|
||
|
return i;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static unsigned
|
||
|
bsr64(u64 v)
|
||
|
{
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_clzll)
|
||
|
return 63 - __builtin_clzll(v);
|
||
|
#elif defined(_MSC_VER) && defined(_WIN64)
|
||
|
unsigned long i;
|
||
|
|
||
|
_BitScanReverse64(&i, v);
|
||
|
return i;
|
||
|
#else
|
||
|
unsigned i = 0;
|
||
|
|
||
|
while ((v >>= 1) != 0)
|
||
|
i++;
|
||
|
return i;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static unsigned
|
||
|
bsrw(machine_word_t v)
|
||
|
{
|
||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||
|
if (WORDBITS == 32)
|
||
|
return bsr32(v);
|
||
|
else
|
||
|
return bsr64(v);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
|
||
|
* significant end) of the *least* significant 1 bit in the input value. The
|
||
|
* input value must be nonzero!
|
||
|
*/
|
||
|
|
||
|
static unsigned
|
||
|
bsf32(u32 v)
|
||
|
{
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_ctz)
|
||
|
return __builtin_ctz(v);
|
||
|
#elif defined(_MSC_VER)
|
||
|
unsigned long i;
|
||
|
|
||
|
_BitScanForward(&i, v);
|
||
|
return i;
|
||
|
#else
|
||
|
unsigned i = 0;
|
||
|
|
||
|
for (; (v & 1) == 0; v >>= 1)
|
||
|
i++;
|
||
|
return i;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static unsigned
|
||
|
bsf64(u64 v)
|
||
|
{
|
||
|
#if defined(__GNUC__) || __has_builtin(__builtin_ctzll)
|
||
|
return __builtin_ctzll(v);
|
||
|
#elif defined(_MSC_VER) && defined(_WIN64)
|
||
|
unsigned long i;
|
||
|
|
||
|
_BitScanForward64(&i, v);
|
||
|
return i;
|
||
|
#else
|
||
|
unsigned i = 0;
|
||
|
|
||
|
for (; (v & 1) == 0; v >>= 1)
|
||
|
i++;
|
||
|
return i;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static unsigned
|
||
|
bsfw(machine_word_t v)
|
||
|
{
|
||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||
|
if (WORDBITS == 32)
|
||
|
return bsf32(v);
|
||
|
else
|
||
|
return bsf64(v);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a
|
||
|
* fallback implementation; use '#ifdef rbit32' to check if this is available.
|
||
|
*/
|
||
|
#undef rbit32
|
||
|
#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \
|
||
|
(__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
|
||
|
static u32
|
||
|
rbit32(u32 v)
|
||
|
{
|
||
|
__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
|
||
|
return v;
|
||
|
}
|
||
|
#define rbit32 rbit32
|
||
|
#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64)
|
||
|
static u32
|
||
|
rbit32(u32 v)
|
||
|
{
|
||
|
__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
|
||
|
return v;
|
||
|
}
|
||
|
#define rbit32 rbit32
|
||
|
#endif
|
||
|
|
||
|
#endif /* COMMON_DEFS_H */
|