asset compression

This commit is contained in:
Jorijn van der Graaf 2026-05-11 18:37:30 +02:00
commit 30a283c1b3
57 changed files with 13237 additions and 8 deletions

View file

@ -0,0 +1,338 @@
/*
* common_defs.h
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef COMMON_COMMON_DEFS_H
#define COMMON_COMMON_DEFS_H
#ifdef __GNUC__
# include "compiler_gcc.h"
#elif defined(_MSC_VER)
# include "compiler_msc.h"
#else
# pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!")
#endif
/* ========================================================================== */
/* Type definitions */
/* ========================================================================== */
#include <stddef.h> /* size_t */
#ifndef __bool_true_false_are_defined
# include <stdbool.h> /* bool */
#endif
/* Fixed-width integer types */
#include <stdint.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
/* Concatenation macros */
#define CONCAT2(x,y) x##y
#define CONCAT(x,y) CONCAT2(x,y)
/*
* Word type of the target architecture. Use 'size_t' instead of 'unsigned
* long' to account for platforms such as Windows that use 32-bit 'unsigned
* long' on 64-bit architectures.
*/
typedef size_t machine_word_t;
/* Number of bytes in a word */
#define WORDBYTES ((int)sizeof(machine_word_t))
/* Number of bits in a word */
#define WORDBITS (8 * WORDBYTES)
/* ========================================================================== */
/* Optional compiler features */
/* ========================================================================== */
/* LIBEXPORT - export a function from a shared library */
#ifndef LIBEXPORT
# define LIBEXPORT
#endif
/* inline - suggest that a function be inlined */
#ifndef inline
# define inline
#endif
/* forceinline - force a function to be inlined, if possible */
#ifndef forceinline
# define forceinline inline
#endif
/* restrict - annotate a non-aliased pointer */
#ifndef restrict
# define restrict
#endif
/* likely(expr) - hint that an expression is usually true */
#ifndef likely
# define likely(expr) (expr)
#endif
/* unlikely(expr) - hint that an expression is usually false */
#ifndef unlikely
# define unlikely(expr) (expr)
#endif
/* prefetchr(addr) - prefetch into L1 cache for read */
#ifndef prefetchr
# define prefetchr(addr)
#endif
/* prefetchw(addr) - prefetch into L1 cache for write */
#ifndef prefetchw
# define prefetchw(addr)
#endif
/* Does the compiler support the 'target' function attribute? */
#ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
#endif
/* Which targets are supported with the 'target' function attribute? */
#ifndef COMPILER_SUPPORTS_BMI2_TARGET
# define COMPILER_SUPPORTS_BMI2_TARGET 0
#endif
#ifndef COMPILER_SUPPORTS_AVX_TARGET
# define COMPILER_SUPPORTS_AVX_TARGET 0
#endif
#ifndef COMPILER_SUPPORTS_AVX512BW_TARGET
# define COMPILER_SUPPORTS_AVX512BW_TARGET 0
#endif
/*
* Which targets are supported with the 'target' function attribute and have
* intrinsics that work within 'target'-ed functions?
*/
#ifndef COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 0
#endif
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS 0
#endif
#ifndef COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS 0
#endif
#ifndef COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS 0
#endif
#ifndef COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS 0
#endif
#ifndef COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS 0
#endif
#ifndef COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS 0
#endif
/* _aligned_attribute(n) - declare that the annotated variable, or variables of
* the annotated type, are to be aligned on n-byte boundaries */
#ifndef _aligned_attribute
#endif
/* ========================================================================== */
/* Miscellaneous macros */
/* ========================================================================== */
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
/* ========================================================================== */
/* Endianness handling */
/* ========================================================================== */
/*
* CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little
* endian or 0 if it is big endian. The macro should be defined in a way such
* that the compiler can evaluate it at compilation time. If not defined, a
* fallback is used.
*/
#ifndef CPU_IS_LITTLE_ENDIAN
static forceinline int CPU_IS_LITTLE_ENDIAN(void)
{
union {
unsigned int v;
unsigned char b;
} u;
u.v = 1;
return u.b;
}
#endif
/* bswap16(n) - swap the bytes of a 16-bit integer */
#ifndef bswap16
static forceinline u16 bswap16(u16 n)
{
return (n << 8) | (n >> 8);
}
#endif
/* bswap32(n) - swap the bytes of a 32-bit integer */
#ifndef bswap32
static forceinline u32 bswap32(u32 n)
{
return ((n & 0x000000FF) << 24) |
((n & 0x0000FF00) << 8) |
((n & 0x00FF0000) >> 8) |
((n & 0xFF000000) >> 24);
}
#endif
/* bswap64(n) - swap the bytes of a 64-bit integer */
#ifndef bswap64
static forceinline u64 bswap64(u64 n)
{
return ((n & 0x00000000000000FF) << 56) |
((n & 0x000000000000FF00) << 40) |
((n & 0x0000000000FF0000) << 24) |
((n & 0x00000000FF000000) << 8) |
((n & 0x000000FF00000000) >> 8) |
((n & 0x0000FF0000000000) >> 24) |
((n & 0x00FF000000000000) >> 40) |
((n & 0xFF00000000000000) >> 56);
}
#endif
#define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n))
#define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n))
#define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n))
#define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n))
#define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n))
#define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n))
/* ========================================================================== */
/* Unaligned memory accesses */
/* ========================================================================== */
/*
* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
* can be performed efficiently on the target platform.
*/
#ifndef UNALIGNED_ACCESS_IS_FAST
# define UNALIGNED_ACCESS_IS_FAST 0
#endif
/* ========================================================================== */
/* Bit scan functions */
/* ========================================================================== */
/*
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
* significant end) of the *most* significant 1 bit in the input value. The
* input value must be nonzero!
*/
#ifndef bsr32
static forceinline unsigned
bsr32(u32 n)
{
unsigned i = 0;
while ((n >>= 1) != 0)
i++;
return i;
}
#endif
#ifndef bsr64
static forceinline unsigned
bsr64(u64 n)
{
unsigned i = 0;
while ((n >>= 1) != 0)
i++;
return i;
}
#endif
static forceinline unsigned
bsrw(machine_word_t n)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsr32(n);
else
return bsr64(n);
}
/*
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
* significant end) of the *least* significant 1 bit in the input value. The
* input value must be nonzero!
*/
#ifndef bsf32
static forceinline unsigned
bsf32(u32 n)
{
unsigned i = 0;
while ((n & 1) == 0) {
i++;
n >>= 1;
}
return i;
}
#endif
#ifndef bsf64
static forceinline unsigned
bsf64(u64 n)
{
unsigned i = 0;
while ((n & 1) == 0) {
i++;
n >>= 1;
}
return i;
}
#endif
static forceinline unsigned
bsfw(machine_word_t n)
{
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
if (WORDBITS == 32)
return bsf32(n);
else
return bsf64(n);
}
#endif /* COMMON_COMMON_DEFS_H */

View file

@ -0,0 +1,217 @@
/*
* compiler_gcc.h - definitions for the GNU C Compiler. This also handles clang
* and the Intel C Compiler (icc).
*
* TODO: icc is not well tested, so some things are currently disabled even
* though they maybe can be enabled on some icc versions.
*/
#if !defined(__clang__) && !defined(__INTEL_COMPILER)
# define GCC_PREREQ(major, minor) \
(__GNUC__ > (major) || \
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#else
# define GCC_PREREQ(major, minor) 0
#endif
/* Note: only check the clang version when absolutely necessary!
* "Vendors" such as Apple can use different version numbers. */
#ifdef __clang__
# ifdef __apple_build_version__
# define CLANG_PREREQ(major, minor, apple_version) \
(__apple_build_version__ >= (apple_version))
# else
# define CLANG_PREREQ(major, minor, apple_version) \
(__clang_major__ > (major) || \
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
# endif
#else
# define CLANG_PREREQ(major, minor, apple_version) 0
#endif
#ifndef __has_attribute
# define __has_attribute(attribute) 0
#endif
#ifndef __has_feature
# define __has_feature(feature) 0
#endif
#ifndef __has_builtin
# define __has_builtin(builtin) 0
#endif
#ifdef _WIN32
# define LIBEXPORT __declspec(dllexport)
#else
# define LIBEXPORT __attribute__((visibility("default")))
#endif
#define inline inline
#define forceinline inline __attribute__((always_inline))
#define restrict __restrict__
#define likely(expr) __builtin_expect(!!(expr), 1)
#define unlikely(expr) __builtin_expect(!!(expr), 0)
#define prefetchr(addr) __builtin_prefetch((addr), 0)
#define prefetchw(addr) __builtin_prefetch((addr), 1)
#define _aligned_attribute(n) __attribute__((aligned(n)))
#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE \
(GCC_PREREQ(4, 4) || __has_attribute(target))
#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
# if defined(__i386__) || defined(__x86_64__)
# define COMPILER_SUPPORTS_PCLMUL_TARGET \
(GCC_PREREQ(4, 4) || __has_builtin(__builtin_ia32_pclmulqdq128))
# define COMPILER_SUPPORTS_AVX_TARGET \
(GCC_PREREQ(4, 6) || __has_builtin(__builtin_ia32_maxps256))
# define COMPILER_SUPPORTS_BMI2_TARGET \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di))
# define COMPILER_SUPPORTS_AVX2_TARGET \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_psadbw256))
# define COMPILER_SUPPORTS_AVX512BW_TARGET \
(GCC_PREREQ(5, 1) || __has_builtin(__builtin_ia32_psadbw512))
/*
* Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics
* not available in the main target could not be used in 'target'
* attribute functions. Unfortunately clang has no feature test macro
* for this so we have to check its version.
*/
# if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000)
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 1
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \
COMPILER_SUPPORTS_PCLMUL_TARGET
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \
COMPILER_SUPPORTS_AVX2_TARGET
# define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS \
COMPILER_SUPPORTS_AVX512BW_TARGET
# endif
# elif defined(__arm__) || defined(__aarch64__)
/*
* Determine whether NEON and crypto intrinsics are supported.
*
* With gcc prior to 6.1, (r230411 for arm32, r226563 for arm64), neither
* was available unless enabled in the main target.
*
* But even after that, to include <arm_neon.h> (which contains both the
* basic NEON intrinsics and the crypto intrinsics) the main target still
* needs to have:
* - gcc: hardware floating point support
* - clang: NEON support (but not necessarily crypto support)
*/
# if (GCC_PREREQ(6, 1) && defined(__ARM_FP)) || \
(defined(__clang__) && defined(__ARM_NEON))
# define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS 1
/*
* The crypto intrinsics are broken on arm32 with clang, even when using
* -mfpu=crypto-neon-fp-armv8, because clang's <arm_neon.h> puts them
* behind __aarch64__. Undefine __ARM_FEATURE_CRYPTO in that case...
*/
# if defined(__clang__) && defined(__arm__)
# undef __ARM_FEATURE_CRYPTO
# elif __has_builtin(__builtin_neon_vmull_p64) || !defined(__clang__)
# define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS 1
# endif
# endif
/*
* Determine whether CRC32 intrinsics are supported.
*
* With gcc r274827 or later (gcc 10.1+, 9.3+, or 8.4+), or with clang,
* they work as expected. (Well, not quite. There's still a bug, but we
* have to work around it later when including arm_acle.h.)
*/
# if GCC_PREREQ(10, 1) || \
(GCC_PREREQ(9, 3) && !GCC_PREREQ(10, 0)) || \
(GCC_PREREQ(8, 4) && !GCC_PREREQ(9, 0)) || \
(defined(__clang__) && __has_builtin(__builtin_arm_crc32b))
# define COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS 1
# endif
# endif /* __arm__ || __aarch64__ */
#endif /* COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE */
/*
* Prior to gcc 5.1 and clang 3.9, emmintrin.h only defined vectors of signed
* integers (e.g. __v4si), not vectors of unsigned integers (e.g. __v4su). But
* we need the unsigned ones in order to avoid signed integer overflow, which is
* undefined behavior. Add the missing definitions for the unsigned ones if
* needed.
*/
#if (GCC_PREREQ(4, 0) && !GCC_PREREQ(5, 1)) || \
(defined(__clang__) && !CLANG_PREREQ(3, 9, 8020000)) || \
defined(__INTEL_COMPILER)
typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
typedef unsigned int __v4su __attribute__((__vector_size__(16)));
typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
typedef unsigned long long __v4du __attribute__((__vector_size__(32)));
typedef unsigned int __v8su __attribute__((__vector_size__(32)));
typedef unsigned short __v16hu __attribute__((__vector_size__(32)));
typedef unsigned char __v32qu __attribute__((__vector_size__(32)));
#endif
#ifdef __INTEL_COMPILER
typedef int __v16si __attribute__((__vector_size__(64)));
typedef short __v32hi __attribute__((__vector_size__(64)));
typedef char __v64qi __attribute__((__vector_size__(64)));
#endif
/* Newer gcc supports __BYTE_ORDER__. Older gcc doesn't. */
#ifdef __BYTE_ORDER__
# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#endif
#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
# define bswap16 __builtin_bswap16
#endif
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
# define bswap32 __builtin_bswap32
#endif
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
# define bswap64 __builtin_bswap64
#endif
#if defined(__x86_64__) || defined(__i386__) || \
defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
/*
* For all compilation purposes, WebAssembly behaves like any other CPU
* instruction set. Even though WebAssembly engine might be running on top
* of different actual CPU architectures, the WebAssembly spec itself
* permits unaligned access and it will be fast on most of those platforms,
* and simulated at the engine level on others, so it's worth treating it
* as a CPU architecture with fast unaligned access.
*/ defined(__wasm__)
# define UNALIGNED_ACCESS_IS_FAST 1
#endif
#define bsr32(n) (31 - __builtin_clz(n))
#define bsr64(n) (63 - __builtin_clzll(n))
#define bsf32(n) __builtin_ctz(n)
#define bsf64(n) __builtin_ctzll(n)
/*
* Setup rotation macros similar to MSVS intrinsics.
* These should recognized by compilers.
*/
#ifndef _rotr16
#define _rotr16(x,n) ((x>>n) + (x<<(16-n)))
#endif
#ifndef _rotr
#define _rotr(x,n) ((x>>n) + (x<<(32-n)))
#endif
#ifndef _rotr64
#define _rotr64(x,n) ((x>>n) + (x<<(64-n)))
#endif

View file

@ -0,0 +1,80 @@
/*
* compiler_msc.h - definitions for the Microsoft C Compiler
*/
#include <stdint.h>
#include <stdlib.h> /* for _byteswap_*() */
#define LIBEXPORT __declspec(dllexport)
/*
* Old versions (e.g. VS2010) of MSC don't have the C99 header stdbool.h.
* Beware: the below replacement isn't fully standard, since normally any value
* != 0 should be implicitly cast to a bool with value 1... but that doesn't
* happen if bool is really just an 'int'.
*/
typedef int bool;
#define true 1
#define false 0
#define __bool_true_false_are_defined 1
/* Define ssize_t */
#ifdef _WIN64
typedef long long ssize_t;
#else
typedef int ssize_t;
#endif
/* Assume a little endian architecture with fast unaligned access */
#define CPU_IS_LITTLE_ENDIAN() 1
#define UNALIGNED_ACCESS_IS_FAST 1
/* __restrict has nonstandard behavior; don't use it */
#define restrict
/* ... but we can use __inline and __forceinline */
#define inline __inline
#define forceinline __forceinline
/* Byte swap functions */
#define bswap16 _byteswap_ushort
#define bswap32 _byteswap_ulong
#define bswap64 _byteswap_uint64
/* Bit scan functions (32-bit) */
static forceinline unsigned
bsr32(uint32_t n)
{
_BitScanReverse(&n, n);
return n;
}
#define bsr32 bsr32
static forceinline unsigned
bsf32(uint32_t n)
{
_BitScanForward(&n, n);
return n;
}
#define bsf32 bsf32
#ifdef _M_X64 /* Bit scan functions (64-bit) */
static forceinline unsigned
bsr64(uint64_t n)
{
_BitScanReverse64(&n, n);
return n;
}
#define bsr64 bsr64
static forceinline unsigned
bsf64(uint64_t n)
{
_BitScanForward64(&n, n);
return n;
}
#define bsf64 bsf64
#endif /* _M_X64 */