asset compression
This commit is contained in:
parent
b9b9ecb84c
commit
30a283c1b3
57 changed files with 13237 additions and 8 deletions
136
implementations/Crafter.Asset-Compression.cpp
Normal file
136
implementations/Crafter.Asset-Compression.cpp
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
/*
|
||||||
|
Crafter®.Asset
|
||||||
|
Copyright (C) 2026 Catcrafts®
|
||||||
|
catcrafts.net
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License version 3.0 as published by the Free Software Foundation;
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
module;
|
||||||
|
// Vendored GDeflate (Microsoft DirectStorage reference, Apache-2.0). Headers
|
||||||
|
// pull libdeflate (MIT) via -Ilib/gdeflate/libdeflate. The C++ wrappers are
|
||||||
|
// pulled inline because Crafter.Build's cFiles only handles .c TUs — folding
|
||||||
|
// them into this module impl avoids adding a parallel cppFiles channel.
|
||||||
|
#include "../lib/gdeflate/GDeflate.h"
|
||||||
|
#include "../lib/gdeflate/GDeflateCompress.cpp"
|
||||||
|
#include "../lib/gdeflate/GDeflateDecompress.cpp"
|
||||||
|
|
||||||
|
module Crafter.Asset;
|
||||||
|
import std;
|
||||||
|
|
||||||
|
namespace Crafter::Compression {
|
||||||
|
CompressedBlob CompressStreams(std::span<const std::span<const std::byte>> streams) {
|
||||||
|
CompressedBlob blob;
|
||||||
|
blob.regions.reserve(streams.size());
|
||||||
|
|
||||||
|
// First pass: compress each stream into its own buffer so we know the
|
||||||
|
// exact final size. GDeflate::CompressBound is an upper bound; we'd
|
||||||
|
// waste capacity if we appended the bound directly into a single
|
||||||
|
// shared buffer.
|
||||||
|
std::vector<std::vector<std::byte>> compressed;
|
||||||
|
compressed.reserve(streams.size());
|
||||||
|
|
||||||
|
std::uint64_t totalSize = 0;
|
||||||
|
for (const std::span<const std::byte>& stream : streams) {
|
||||||
|
if (stream.empty()) {
|
||||||
|
compressed.emplace_back();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::size_t boundSize = GDeflate::CompressBound(stream.size());
|
||||||
|
std::vector<std::byte> out(boundSize);
|
||||||
|
std::size_t actualSize = boundSize;
|
||||||
|
bool ok = GDeflate::Compress(
|
||||||
|
reinterpret_cast<std::uint8_t*>(out.data()),
|
||||||
|
&actualSize,
|
||||||
|
reinterpret_cast<const std::uint8_t*>(stream.data()),
|
||||||
|
stream.size(),
|
||||||
|
GDeflate::MaximumCompressionLevel,
|
||||||
|
0);
|
||||||
|
if (!ok) {
|
||||||
|
throw std::runtime_error("GDeflate::Compress failed");
|
||||||
|
}
|
||||||
|
out.resize(actualSize);
|
||||||
|
totalSize += actualSize;
|
||||||
|
compressed.push_back(std::move(out));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: concatenate and build the region table.
|
||||||
|
blob.bytes.reserve(totalSize);
|
||||||
|
for (std::size_t i = 0; i < streams.size(); ++i) {
|
||||||
|
RegionMeta r {
|
||||||
|
.srcOffset = blob.bytes.size(),
|
||||||
|
.compressedSize = compressed[i].size(),
|
||||||
|
.decompressedSize = streams[i].size(),
|
||||||
|
};
|
||||||
|
blob.regions.push_back(r);
|
||||||
|
blob.bytes.insert(blob.bytes.end(), compressed[i].begin(), compressed[i].end());
|
||||||
|
}
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteBlob(std::ostream& file, const CompressedBlob& blob) {
|
||||||
|
std::uint32_t regionCount = static_cast<std::uint32_t>(blob.regions.size());
|
||||||
|
file.write(reinterpret_cast<const char*>(®ionCount), sizeof(regionCount));
|
||||||
|
if (regionCount > 0) {
|
||||||
|
file.write(reinterpret_cast<const char*>(blob.regions.data()),
|
||||||
|
regionCount * sizeof(RegionMeta));
|
||||||
|
}
|
||||||
|
std::uint64_t payloadSize = blob.bytes.size();
|
||||||
|
file.write(reinterpret_cast<const char*>(&payloadSize), sizeof(payloadSize));
|
||||||
|
if (payloadSize > 0) {
|
||||||
|
file.write(reinterpret_cast<const char*>(blob.bytes.data()), payloadSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CompressedBlob ReadBlob(std::istream& file) {
|
||||||
|
CompressedBlob blob;
|
||||||
|
std::uint32_t regionCount = 0;
|
||||||
|
file.read(reinterpret_cast<char*>(®ionCount), sizeof(regionCount));
|
||||||
|
blob.regions.resize(regionCount);
|
||||||
|
if (regionCount > 0) {
|
||||||
|
file.read(reinterpret_cast<char*>(blob.regions.data()),
|
||||||
|
regionCount * sizeof(RegionMeta));
|
||||||
|
}
|
||||||
|
std::uint64_t payloadSize = 0;
|
||||||
|
file.read(reinterpret_cast<char*>(&payloadSize), sizeof(payloadSize));
|
||||||
|
blob.bytes.resize(payloadSize);
|
||||||
|
if (payloadSize > 0) {
|
||||||
|
file.read(reinterpret_cast<char*>(blob.bytes.data()), payloadSize);
|
||||||
|
}
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DecompressCPU(const CompressedBlob& blob, std::span<const std::span<std::byte>> outputs) {
|
||||||
|
if (outputs.size() != blob.regions.size()) {
|
||||||
|
throw std::runtime_error("DecompressCPU: outputs.size() != regions.size()");
|
||||||
|
}
|
||||||
|
for (std::size_t i = 0; i < blob.regions.size(); ++i) {
|
||||||
|
const RegionMeta& r = blob.regions[i];
|
||||||
|
const std::span<std::byte>& out = outputs[i];
|
||||||
|
if (out.size() != r.decompressedSize) {
|
||||||
|
throw std::runtime_error("DecompressCPU: output size mismatch");
|
||||||
|
}
|
||||||
|
if (r.decompressedSize == 0) continue;
|
||||||
|
bool ok = GDeflate::Decompress(
|
||||||
|
reinterpret_cast<std::uint8_t*>(out.data()),
|
||||||
|
r.decompressedSize,
|
||||||
|
reinterpret_cast<const std::uint8_t*>(blob.bytes.data() + r.srcOffset),
|
||||||
|
r.compressedSize,
|
||||||
|
/*numWorkers=*/1);
|
||||||
|
if (!ok) {
|
||||||
|
throw std::runtime_error("GDeflate::Decompress failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -19,6 +19,35 @@ import std;
|
||||||
using namespace Crafter;
|
using namespace Crafter;
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
// CPU GDeflate roundtrip sanity test across the size boundaries from the
|
||||||
|
// implementation plan. Returns 0 on pass, 1 on first byte-mismatch.
|
||||||
|
static int RunCompressionRoundtrip() {
|
||||||
|
const std::array<std::size_t, 5> sizes = { 1, 65535, 65536, 65537, 16 * 1024 * 1024 };
|
||||||
|
std::mt19937_64 rng(0xC0FFEEu);
|
||||||
|
for (std::size_t n : sizes) {
|
||||||
|
std::vector<std::byte> input(n);
|
||||||
|
for (std::size_t i = 0; i < n; ++i) {
|
||||||
|
// Mix random bytes with a deterministic pattern so the codec is
|
||||||
|
// exercised on both compressible and noisy regions.
|
||||||
|
input[i] = static_cast<std::byte>((i * 0x9E3779B97F4A7C15ULL ^ rng()) & 0xFF);
|
||||||
|
}
|
||||||
|
std::array<std::span<const std::byte>, 1> streams = { std::span(input) };
|
||||||
|
Compression::CompressedBlob blob = Compression::CompressStreams(streams);
|
||||||
|
std::vector<std::byte> output(n);
|
||||||
|
std::array<std::span<std::byte>, 1> outputs = { std::span(output) };
|
||||||
|
Compression::DecompressCPU(blob, outputs);
|
||||||
|
if (output != input) {
|
||||||
|
std::cerr << "[FAIL] roundtrip size=" << n << "\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
std::cout << "[ok] size=" << n
|
||||||
|
<< " compressed=" << blob.bytes.size()
|
||||||
|
<< " ratio=" << (double(blob.bytes.size()) / double(n)) << "\n";
|
||||||
|
}
|
||||||
|
std::cout << "All roundtrips passed.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
// Parse arguments: crafter-asset <input_file> [output_file] [--format u8|f16]
|
// Parse arguments: crafter-asset <input_file> [output_file] [--format u8|f16]
|
||||||
fs::path inputPath;
|
fs::path inputPath;
|
||||||
|
|
@ -29,6 +58,9 @@ int main(int argc, char** argv) {
|
||||||
std::vector<std::string> positional;
|
std::vector<std::string> positional;
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
if (arg == "--test-compression") {
|
||||||
|
return RunCompressionRoundtrip();
|
||||||
|
}
|
||||||
if (arg == "--format" || arg == "-f") {
|
if (arg == "--format" || arg == "-f") {
|
||||||
if (i + 1 >= argc) {
|
if (i + 1 >= argc) {
|
||||||
std::cerr << "Error: --format requires a value (u8 or f16)\n";
|
std::cerr << "Error: --format requires a value (u8 or f16)\n";
|
||||||
|
|
|
||||||
59
interfaces/Crafter.Asset-Compression.cppm
Normal file
59
interfaces/Crafter.Asset-Compression.cppm
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
Crafter®.Asset
|
||||||
|
Copyright (C) 2026 Catcrafts®
|
||||||
|
catcrafts.net
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License version 3.0 as published by the Free Software Foundation;
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
export module Crafter.Asset:Compression;
|
||||||
|
import std;
|
||||||
|
|
||||||
|
export namespace Crafter::Compression {
|
||||||
|
// One independently-decompressable GDeflate stream inside CompressedBlob::bytes.
|
||||||
|
// Layout matches what VK_EXT_memory_decompression's VkDecompressMemoryRegionEXT
|
||||||
|
// expects, minus the device addresses (which the consumer fills in).
|
||||||
|
struct RegionMeta {
|
||||||
|
std::uint64_t srcOffset;
|
||||||
|
std::uint64_t compressedSize;
|
||||||
|
std::uint64_t decompressedSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompressedBlob {
|
||||||
|
std::vector<std::byte> bytes;
|
||||||
|
std::vector<RegionMeta> regions;
|
||||||
|
|
||||||
|
std::uint64_t TotalDecompressedSize() const noexcept {
|
||||||
|
std::uint64_t sum = 0;
|
||||||
|
for (const RegionMeta& r : regions) sum += r.decompressedSize;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Compresses each input span as its own GDeflate tile-stream; concatenates
|
||||||
|
// them into one byte buffer with a parallel region table. Streams are
|
||||||
|
// independent and can be addressed individually by VkDecompressMemoryRegionEXT
|
||||||
|
// entries on the GPU path.
|
||||||
|
CompressedBlob CompressStreams(std::span<const std::span<const std::byte>> streams);
|
||||||
|
|
||||||
|
// CPU fallback decoder. outputs.size() must equal blob.regions.size();
|
||||||
|
// outputs[i].size() must equal blob.regions[i].decompressedSize.
|
||||||
|
void DecompressCPU(const CompressedBlob& blob, std::span<const std::span<std::byte>> outputs);
|
||||||
|
|
||||||
|
// Length-prefixed serialization of a CompressedBlob. Used by per-asset
|
||||||
|
// SaveCompressed/LoadCompressed implementations after they've written
|
||||||
|
// their own type-specific header.
|
||||||
|
void WriteBlob(std::ostream& file, const CompressedBlob& blob);
|
||||||
|
CompressedBlob ReadBlob(std::istream& file);
|
||||||
|
}
|
||||||
|
|
@ -18,6 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export module Crafter.Asset:Mesh;
|
export module Crafter.Asset:Mesh;
|
||||||
|
import :Compression;
|
||||||
import Crafter.Math;
|
import Crafter.Math;
|
||||||
import std;
|
import std;
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
@ -35,6 +36,44 @@ export namespace Crafter {
|
||||||
Vector<float, 2, 0> uv;
|
Vector<float, 2, 0> uv;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// GDeflate-compressed counterpart of MeshAsset<T>::Save output. Three
|
||||||
|
// regions: [vertex, index, data]. dataCount==0 leaves the data region
|
||||||
|
// empty (zero compressedSize/decompressedSize). dataStride records sizeof(T)
|
||||||
|
// at compress time so consumers can validate.
|
||||||
|
struct CompressedMeshAsset {
|
||||||
|
std::uint32_t vertexCount = 0;
|
||||||
|
std::uint32_t indexCount = 0;
|
||||||
|
std::uint32_t dataCount = 0;
|
||||||
|
std::uint32_t dataStride = 0;
|
||||||
|
Compression::CompressedBlob blob;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace MeshAssetFormat {
|
||||||
|
inline constexpr char magic[4] = {'C', 'G', 'D', 'M'};
|
||||||
|
inline constexpr std::uint32_t version = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline CompressedMeshAsset LoadCompressedMesh(fs::path path) {
|
||||||
|
std::ifstream file(path, std::ios::binary);
|
||||||
|
char magic[4];
|
||||||
|
file.read(magic, 4);
|
||||||
|
if (std::memcmp(magic, MeshAssetFormat::magic, 4) != 0) {
|
||||||
|
throw std::runtime_error("LoadCompressedMesh: bad magic on " + path.string());
|
||||||
|
}
|
||||||
|
std::uint32_t version = 0;
|
||||||
|
file.read(reinterpret_cast<char*>(&version), sizeof(version));
|
||||||
|
if (version != MeshAssetFormat::version) {
|
||||||
|
throw std::runtime_error("LoadCompressedMesh: unsupported version on " + path.string());
|
||||||
|
}
|
||||||
|
CompressedMeshAsset out;
|
||||||
|
file.read(reinterpret_cast<char*>(&out.vertexCount), sizeof(out.vertexCount));
|
||||||
|
file.read(reinterpret_cast<char*>(&out.indexCount), sizeof(out.indexCount));
|
||||||
|
file.read(reinterpret_cast<char*>(&out.dataCount), sizeof(out.dataCount));
|
||||||
|
file.read(reinterpret_cast<char*>(&out.dataStride), sizeof(out.dataStride));
|
||||||
|
out.blob = Compression::ReadBlob(file);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct MeshAsset {
|
struct MeshAsset {
|
||||||
std::vector<Vector<float, 3, 3>> vertexes;
|
std::vector<Vector<float, 3, 3>> vertexes;
|
||||||
|
|
@ -57,6 +96,29 @@ export namespace Crafter {
|
||||||
file.write(reinterpret_cast<char*>(datas.data()), dataCount * sizeof(T));
|
file.write(reinterpret_cast<char*>(datas.data()), dataCount * sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SaveCompressed(fs::path path) const {
|
||||||
|
std::array<std::span<const std::byte>, 3> streams = {
|
||||||
|
std::as_bytes(std::span(vertexes)),
|
||||||
|
std::as_bytes(std::span(indexes)),
|
||||||
|
std::as_bytes(std::span(datas)),
|
||||||
|
};
|
||||||
|
Compression::CompressedBlob blob = Compression::CompressStreams(streams);
|
||||||
|
|
||||||
|
std::ofstream file(path, std::ios::binary);
|
||||||
|
file.write(MeshAssetFormat::magic, 4);
|
||||||
|
std::uint32_t version = MeshAssetFormat::version;
|
||||||
|
std::uint32_t vc = static_cast<std::uint32_t>(vertexes.size());
|
||||||
|
std::uint32_t ic = static_cast<std::uint32_t>(indexes.size());
|
||||||
|
std::uint32_t dc = static_cast<std::uint32_t>(datas.size());
|
||||||
|
std::uint32_t stride = static_cast<std::uint32_t>(sizeof(T));
|
||||||
|
file.write(reinterpret_cast<const char*>(&version), sizeof(version));
|
||||||
|
file.write(reinterpret_cast<const char*>(&vc), sizeof(vc));
|
||||||
|
file.write(reinterpret_cast<const char*>(&ic), sizeof(ic));
|
||||||
|
file.write(reinterpret_cast<const char*>(&dc), sizeof(dc));
|
||||||
|
file.write(reinterpret_cast<const char*>(&stride), sizeof(stride));
|
||||||
|
Compression::WriteBlob(file, blob);
|
||||||
|
}
|
||||||
|
|
||||||
static MeshAsset<T> Load(fs::path path) {
|
static MeshAsset<T> Load(fs::path path) {
|
||||||
MeshAsset<T> mesh;
|
MeshAsset<T> mesh;
|
||||||
|
|
||||||
|
|
@ -196,6 +258,32 @@ export namespace Crafter {
|
||||||
file.write(reinterpret_cast<char*>(vertexes.data()), vertexCount * sizeof(Vector<float, 3, 3>));
|
file.write(reinterpret_cast<char*>(vertexes.data()), vertexCount * sizeof(Vector<float, 3, 3>));
|
||||||
file.write(reinterpret_cast<char*>(indexes.data()), indexCount * sizeof(std::uint32_t));
|
file.write(reinterpret_cast<char*>(indexes.data()), indexCount * sizeof(std::uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SaveCompressed(fs::path path) const {
|
||||||
|
// Three regions to keep file format identical to the templated
|
||||||
|
// variant; the data region is empty (skipped on the GPU path).
|
||||||
|
std::array<std::span<const std::byte>, 3> streams = {
|
||||||
|
std::as_bytes(std::span(vertexes)),
|
||||||
|
std::as_bytes(std::span(indexes)),
|
||||||
|
std::span<const std::byte>{},
|
||||||
|
};
|
||||||
|
Compression::CompressedBlob blob = Compression::CompressStreams(streams);
|
||||||
|
|
||||||
|
std::ofstream file(path, std::ios::binary);
|
||||||
|
file.write(MeshAssetFormat::magic, 4);
|
||||||
|
std::uint32_t version = MeshAssetFormat::version;
|
||||||
|
std::uint32_t vc = static_cast<std::uint32_t>(vertexes.size());
|
||||||
|
std::uint32_t ic = static_cast<std::uint32_t>(indexes.size());
|
||||||
|
std::uint32_t dc = 0;
|
||||||
|
std::uint32_t stride = 0;
|
||||||
|
file.write(reinterpret_cast<const char*>(&version), sizeof(version));
|
||||||
|
file.write(reinterpret_cast<const char*>(&vc), sizeof(vc));
|
||||||
|
file.write(reinterpret_cast<const char*>(&ic), sizeof(ic));
|
||||||
|
file.write(reinterpret_cast<const char*>(&dc), sizeof(dc));
|
||||||
|
file.write(reinterpret_cast<const char*>(&stride), sizeof(stride));
|
||||||
|
Compression::WriteBlob(file, blob);
|
||||||
|
}
|
||||||
|
|
||||||
static MeshAsset<void> Load(fs::path path) {
|
static MeshAsset<void> Load(fs::path path) {
|
||||||
MeshAsset<void> mesh;
|
MeshAsset<void> mesh;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ module;
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
#define STB_IMAGE_IMPLEMENTATION
|
||||||
#include "../lib/stb_image.h"
|
#include "../lib/stb_image.h"
|
||||||
export module Crafter.Asset:Texture;
|
export module Crafter.Asset:Texture;
|
||||||
|
import :Compression;
|
||||||
import std;
|
import std;
|
||||||
import Crafter.Math;
|
import Crafter.Math;
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
@ -38,6 +39,42 @@ export namespace Crafter {
|
||||||
OpaqueType opaque;
|
OpaqueType opaque;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// GDeflate-compressed counterpart of TextureAsset<T>::Save output.
|
||||||
|
// Single-region blob: the pixel array as one stream.
|
||||||
|
struct CompressedTextureAsset {
|
||||||
|
std::uint16_t sizeX = 0;
|
||||||
|
std::uint16_t sizeY = 0;
|
||||||
|
OpaqueType opaque = OpaqueType::FullyOpaque;
|
||||||
|
std::uint32_t pixelStride = 0;
|
||||||
|
Compression::CompressedBlob blob;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace TextureAssetFormat {
|
||||||
|
inline constexpr char magic[4] = {'C', 'G', 'D', 'T'};
|
||||||
|
inline constexpr std::uint32_t version = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline CompressedTextureAsset LoadCompressedTexture(fs::path path) {
|
||||||
|
std::ifstream file(path, std::ios::binary);
|
||||||
|
char magic[4];
|
||||||
|
file.read(magic, 4);
|
||||||
|
if (std::memcmp(magic, TextureAssetFormat::magic, 4) != 0) {
|
||||||
|
throw std::runtime_error("LoadCompressedTexture: bad magic on " + path.string());
|
||||||
|
}
|
||||||
|
std::uint32_t version = 0;
|
||||||
|
file.read(reinterpret_cast<char*>(&version), sizeof(version));
|
||||||
|
if (version != TextureAssetFormat::version) {
|
||||||
|
throw std::runtime_error("LoadCompressedTexture: unsupported version on " + path.string());
|
||||||
|
}
|
||||||
|
CompressedTextureAsset out;
|
||||||
|
file.read(reinterpret_cast<char*>(&out.sizeX), sizeof(out.sizeX));
|
||||||
|
file.read(reinterpret_cast<char*>(&out.sizeY), sizeof(out.sizeY));
|
||||||
|
file.read(reinterpret_cast<char*>(&out.opaque), sizeof(out.opaque));
|
||||||
|
file.read(reinterpret_cast<char*>(&out.pixelStride), sizeof(out.pixelStride));
|
||||||
|
out.blob = Compression::ReadBlob(file);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct TextureAsset {
|
struct TextureAsset {
|
||||||
std::uint16_t sizeX;
|
std::uint16_t sizeX;
|
||||||
|
|
@ -54,6 +91,24 @@ export namespace Crafter {
|
||||||
file.write(reinterpret_cast<char*>(pixels.data()), pixels.size() * sizeof(T));
|
file.write(reinterpret_cast<char*>(pixels.data()), pixels.size() * sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SaveCompressed(fs::path path) const {
|
||||||
|
std::array<std::span<const std::byte>, 1> streams = {
|
||||||
|
std::as_bytes(std::span(pixels)),
|
||||||
|
};
|
||||||
|
Compression::CompressedBlob blob = Compression::CompressStreams(streams);
|
||||||
|
|
||||||
|
std::ofstream file(path, std::ios::binary);
|
||||||
|
file.write(TextureAssetFormat::magic, 4);
|
||||||
|
std::uint32_t version = TextureAssetFormat::version;
|
||||||
|
std::uint32_t stride = static_cast<std::uint32_t>(sizeof(T));
|
||||||
|
file.write(reinterpret_cast<const char*>(&version), sizeof(version));
|
||||||
|
file.write(reinterpret_cast<const char*>(&sizeX), sizeof(sizeX));
|
||||||
|
file.write(reinterpret_cast<const char*>(&sizeY), sizeof(sizeY));
|
||||||
|
file.write(reinterpret_cast<const char*>(&opaque), sizeof(opaque));
|
||||||
|
file.write(reinterpret_cast<const char*>(&stride), sizeof(stride));
|
||||||
|
Compression::WriteBlob(file, blob);
|
||||||
|
}
|
||||||
|
|
||||||
static TextureAsset<T> Load(fs::path path) {
|
static TextureAsset<T> Load(fs::path path) {
|
||||||
TextureAsset<T> tex;
|
TextureAsset<T> tex;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,5 +19,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
|
||||||
export module Crafter.Asset;
|
export module Crafter.Asset;
|
||||||
|
export import :Compression;
|
||||||
export import :Mesh;
|
export import :Mesh;
|
||||||
export import :Texture;
|
export import :Texture;
|
||||||
46
lib/gdeflate/GDeflate.h
Normal file
46
lib/gdeflate/GDeflate.h
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) Microsoft Corportaion. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
namespace GDeflate
|
||||||
|
{
|
||||||
|
// See README.MD in libdeflate_1_8 for details on Compression Levels
|
||||||
|
static const uint32_t MinimumCompressionLevel = 1;
|
||||||
|
static const uint32_t MaximumCompressionLevel = 12;
|
||||||
|
|
||||||
|
enum Flags
|
||||||
|
{
|
||||||
|
COMPRESS_SINGLE_THREAD = 0x200, /*!< Force compression using a single thread. */
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t CompressBound(size_t size);
|
||||||
|
|
||||||
|
bool Compress(
|
||||||
|
uint8_t* output,
|
||||||
|
size_t* outputSize,
|
||||||
|
const uint8_t* in,
|
||||||
|
size_t inSize,
|
||||||
|
uint32_t level,
|
||||||
|
uint32_t flags);
|
||||||
|
|
||||||
|
bool Decompress(uint8_t* output, size_t outputSize, const uint8_t* in, size_t inSize, uint32_t numWorkers);
|
||||||
|
|
||||||
|
} // namespace GDeflate
|
||||||
270
lib/gdeflate/GDeflateCompress.cpp
Normal file
270
lib/gdeflate/GDeflateCompress.cpp
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "GDeflate.h"
|
||||||
|
#include "TileStream.h"
|
||||||
|
#include "Utils.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include "libdeflate/libdeflate.h"
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <atomic>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct std::default_delete<libdeflate_gdeflate_compressor>
|
||||||
|
{
|
||||||
|
void operator()(libdeflate_gdeflate_compressor* p) const
|
||||||
|
{
|
||||||
|
libdeflate_free_gdeflate_compressor(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace GDeflate
|
||||||
|
{
|
||||||
|
static constexpr uint32_t kMaxWorkers = 31;
|
||||||
|
static constexpr uint32_t kMinTilesPerWorker = 64;
|
||||||
|
|
||||||
|
struct OutputStreamWrapper
|
||||||
|
{
|
||||||
|
uint8_t* ptr = nullptr;
|
||||||
|
size_t size = 0;
|
||||||
|
size_t pos = 0;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void StreamOut(T* d, size_t n = 1)
|
||||||
|
{
|
||||||
|
const size_t dataSize = sizeof(T) * n;
|
||||||
|
|
||||||
|
if (pos + dataSize > size)
|
||||||
|
{
|
||||||
|
printf("Fatal: stream overrun!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(ptr + pos, d, dataSize);
|
||||||
|
pos += dataSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SetPos(size_t n)
|
||||||
|
{
|
||||||
|
if (n > size)
|
||||||
|
{
|
||||||
|
printf("Fatal: stream overrun!\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = n;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompressionContext
|
||||||
|
{
|
||||||
|
const uint8_t* inputPtr;
|
||||||
|
size_t inputSize;
|
||||||
|
|
||||||
|
struct Tile
|
||||||
|
{
|
||||||
|
std::vector<uint8_t> data;
|
||||||
|
size_t uncompressedSize = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<Tile> tiles;
|
||||||
|
|
||||||
|
std::atomic_uint32_t globalIndex;
|
||||||
|
uint32_t numItems;
|
||||||
|
|
||||||
|
std::atomic_bool failed;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool DoCompress(
|
||||||
|
uint8_t* output,
|
||||||
|
size_t* outputSize,
|
||||||
|
const uint8_t* in,
|
||||||
|
size_t inSize,
|
||||||
|
uint32_t level,
|
||||||
|
uint32_t flags)
|
||||||
|
{
|
||||||
|
if (outputSize == nullptr || output == nullptr || in == nullptr || inSize == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (inSize > kDefaultTileSize * TileStream::kMaxTiles)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
CompressionContext context{};
|
||||||
|
|
||||||
|
context.inputPtr = in;
|
||||||
|
context.inputSize = inSize;
|
||||||
|
context.numItems = static_cast<uint32_t>((inSize + kDefaultTileSize - 1) / kDefaultTileSize);
|
||||||
|
context.tiles.resize(context.numItems);
|
||||||
|
context.failed = false;
|
||||||
|
|
||||||
|
auto TileCompressionJob = [&context, level]()
|
||||||
|
{
|
||||||
|
size_t pageCount = 0;
|
||||||
|
const size_t scratchSize = libdeflate_gdeflate_compress_bound(nullptr, kDefaultTileSize, &pageCount);
|
||||||
|
assert(pageCount == 1);
|
||||||
|
|
||||||
|
void* scratch = alloca(scratchSize);
|
||||||
|
|
||||||
|
std::unique_ptr<libdeflate_gdeflate_compressor> compressor(libdeflate_alloc_gdeflate_compressor(level));
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
const uint32_t tileIndex = context.globalIndex.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
|
||||||
|
if (tileIndex >= context.numItems)
|
||||||
|
break;
|
||||||
|
|
||||||
|
const size_t tilePos = tileIndex * kDefaultTileSize;
|
||||||
|
|
||||||
|
size_t remaining = context.inputSize - tilePos;
|
||||||
|
size_t uncompressedSize = std::min<size_t>(remaining, kDefaultTileSize);
|
||||||
|
|
||||||
|
auto& tile = context.tiles[tileIndex];
|
||||||
|
tile.uncompressedSize = uncompressedSize;
|
||||||
|
|
||||||
|
libdeflate_gdeflate_out_page compressedPage{scratch, scratchSize};
|
||||||
|
|
||||||
|
size_t result = libdeflate_gdeflate_compress(
|
||||||
|
compressor.get(),
|
||||||
|
context.inputPtr + tilePos,
|
||||||
|
uncompressedSize,
|
||||||
|
&compressedPage,
|
||||||
|
1);
|
||||||
|
|
||||||
|
if (result == 0)
|
||||||
|
{
|
||||||
|
context.failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
tile.data.resize(compressedPage.nbytes);
|
||||||
|
memcpy(tile.data.data(), compressedPage.data, compressedPage.nbytes);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::thread workers[kMaxWorkers + 1];
|
||||||
|
const uint32_t maxWorkers = std::min(kMaxWorkers, std::thread::hardware_concurrency());
|
||||||
|
|
||||||
|
uint32_t numWorkersLeft =
|
||||||
|
std::min(maxWorkers, (context.numItems + kMinTilesPerWorker - 1) / kMinTilesPerWorker);
|
||||||
|
|
||||||
|
if (flags & COMPRESS_SINGLE_THREAD)
|
||||||
|
numWorkersLeft = 0;
|
||||||
|
|
||||||
|
for (auto& worker : workers)
|
||||||
|
{
|
||||||
|
if (numWorkersLeft == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
worker = std::thread([TileCompressionJob]() { TileCompressionJob(); });
|
||||||
|
|
||||||
|
--numWorkersLeft;
|
||||||
|
}
|
||||||
|
|
||||||
|
TileCompressionJob();
|
||||||
|
|
||||||
|
for (auto& worker : workers)
|
||||||
|
{
|
||||||
|
if (worker.joinable())
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compression failed
|
||||||
|
if (context.failed)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Compression is done. Prepare the output stream.
|
||||||
|
|
||||||
|
std::vector<uint32_t> tilePtrs;
|
||||||
|
size_t dataPos = 0;
|
||||||
|
|
||||||
|
for (auto const& tile : context.tiles)
|
||||||
|
{
|
||||||
|
tilePtrs.push_back(static_cast<uint32_t>(dataPos));
|
||||||
|
dataPos += tile.data.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// tilePtrs[0] is used to store the size of the last tile; all the other
|
||||||
|
// elements are offsets to the tile data.
|
||||||
|
tilePtrs[0] = static_cast<uint32_t>(context.tiles.back().data.size());
|
||||||
|
|
||||||
|
assert(tilePtrs.size() <= TileStream::kMaxTiles);
|
||||||
|
assert(tilePtrs.size() == context.tiles.size());
|
||||||
|
assert(tilePtrs.size() == context.numItems);
|
||||||
|
|
||||||
|
OutputStreamWrapper outputStream;
|
||||||
|
outputStream.ptr = output;
|
||||||
|
outputStream.size = *outputSize;
|
||||||
|
|
||||||
|
// calculate uncompressed size
|
||||||
|
size_t uncompressedSize = tilePtrs.size() * kDefaultTileSize;
|
||||||
|
size_t tailSize = context.inputSize - (tilePtrs.size() - 1) * kDefaultTileSize;
|
||||||
|
if (tailSize < kDefaultTileSize)
|
||||||
|
uncompressedSize -= kDefaultTileSize - tailSize;
|
||||||
|
|
||||||
|
assert(uncompressedSize == inSize);
|
||||||
|
|
||||||
|
TileStream header(uncompressedSize);
|
||||||
|
|
||||||
|
assert(tilePtrs.size() == header.numTiles);
|
||||||
|
|
||||||
|
outputStream.StreamOut(&header);
|
||||||
|
outputStream.StreamOut(tilePtrs.data(), tilePtrs.size());
|
||||||
|
|
||||||
|
size_t dataOffset = outputStream.pos;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < tilePtrs.size(); ++i)
|
||||||
|
{
|
||||||
|
CompressionContext::Tile const& tile = context.tiles[i];
|
||||||
|
uint32_t tileOffset = (i == 0) ? 0 : tilePtrs[i];
|
||||||
|
|
||||||
|
outputStream.SetPos(dataOffset + tileOffset);
|
||||||
|
outputStream.StreamOut(tile.data.data(), tile.data.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
*outputSize = outputStream.pos;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t CompressBound(size_t size)
|
||||||
|
{
|
||||||
|
size_t numTiles = std::min(size_t(TileStream::kMaxTiles), (size + kDefaultTileSize - 1) / kDefaultTileSize);
|
||||||
|
numTiles = std::max(size_t(1), numTiles);
|
||||||
|
|
||||||
|
const size_t tileSize = kDefaultTileSize +
|
||||||
|
// Tile header. Ideally need to make it a part of compressor API.
|
||||||
|
(sizeof(uint32_t) + 4 * 208 + 4 * 8);
|
||||||
|
|
||||||
|
return numTiles * tileSize + sizeof(TileStream) + sizeof(uint64_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Compress(uint8_t* output, size_t* outputSize, const uint8_t* in, size_t inSize, uint32_t level, uint32_t flags)
|
||||||
|
{
|
||||||
|
return DoCompress(output, outputSize, in, inSize, level, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace GDeflate
|
||||||
170
lib/gdeflate/GDeflateDecompress.cpp
Normal file
170
lib/gdeflate/GDeflateDecompress.cpp
Normal file
|
|
@ -0,0 +1,170 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "TileStream.h"
|
||||||
|
#include "Utils.h"
|
||||||
|
|
||||||
|
#include "libdeflate/libdeflate.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <atomic>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct std::default_delete<libdeflate_gdeflate_decompressor>
|
||||||
|
{
|
||||||
|
void operator()(libdeflate_gdeflate_decompressor* p) const
|
||||||
|
{
|
||||||
|
libdeflate_free_gdeflate_decompressor(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace GDeflate
|
||||||
|
{
|
||||||
|
static constexpr uint32_t kMaxDecompressWorkers = 31;
|
||||||
|
|
||||||
|
static bool ValidateStream(const TileStream* header)
|
||||||
|
{
|
||||||
|
if (!header->IsValid())
|
||||||
|
{
|
||||||
|
printf("Malformed stream encountered.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->id != kGDeflateId)
|
||||||
|
{
|
||||||
|
printf("Unknown stream format: %d\n", header->id);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DecompressionContext
|
||||||
|
{
|
||||||
|
const uint8_t* inputPtr;
|
||||||
|
size_t inputSize;
|
||||||
|
|
||||||
|
uint8_t* outputPtr;
|
||||||
|
size_t outputSize;
|
||||||
|
|
||||||
|
std::atomic_uint32_t globalIndex;
|
||||||
|
uint32_t numItems;
|
||||||
|
|
||||||
|
std::atomic_bool failed;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void TileDecompressionJob(DecompressionContext& context, uint32_t compressorId)
|
||||||
|
{
|
||||||
|
std::unique_ptr<libdeflate_gdeflate_decompressor> decompressor(libdeflate_alloc_gdeflate_decompressor());
|
||||||
|
|
||||||
|
const uint32_t* tileOffsets = reinterpret_cast<const uint32_t*>(context.inputPtr + sizeof(TileStream));
|
||||||
|
const uint8_t* inDataPtr = reinterpret_cast<const uint8_t*>(tileOffsets + context.numItems);
|
||||||
|
|
||||||
|
libdeflate_result decompressResult = LIBDEFLATE_SUCCESS;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
const uint32_t tileIndex = context.globalIndex.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
|
||||||
|
if (tileIndex >= context.numItems)
|
||||||
|
break;
|
||||||
|
|
||||||
|
const size_t tileOffset = tileIndex > 0 ? tileOffsets[tileIndex] : 0;
|
||||||
|
libdeflate_gdeflate_in_page compressedPage{};
|
||||||
|
compressedPage.data = inDataPtr + tileOffset;
|
||||||
|
compressedPage.nbytes =
|
||||||
|
tileIndex < context.numItems - 1 ? tileOffsets[tileIndex + 1] - tileOffset : tileOffsets[0];
|
||||||
|
|
||||||
|
auto outputOffset = tileIndex * kDefaultTileSize;
|
||||||
|
|
||||||
|
decompressResult = libdeflate_gdeflate_decompress(
|
||||||
|
decompressor.get(),
|
||||||
|
&compressedPage,
|
||||||
|
1,
|
||||||
|
context.outputPtr + outputOffset,
|
||||||
|
static_cast<size_t>(kDefaultTileSize),
|
||||||
|
nullptr);
|
||||||
|
|
||||||
|
if ( decompressResult != LIBDEFLATE_SUCCESS)
|
||||||
|
{
|
||||||
|
context.failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Decompress(uint8_t* output, size_t outputSize, const uint8_t* in, size_t inSize, uint32_t numWorkers)
|
||||||
|
{
|
||||||
|
if (nullptr == output || nullptr == in || 0 == outputSize || 0 == inSize)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
numWorkers = std::min(kMaxDecompressWorkers, numWorkers);
|
||||||
|
numWorkers = std::max(1u, numWorkers);
|
||||||
|
|
||||||
|
auto header = reinterpret_cast<const TileStream*>(in);
|
||||||
|
|
||||||
|
if (!ValidateStream(header))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
std::thread workers[kMaxDecompressWorkers];
|
||||||
|
|
||||||
|
// Run a tile per thread
|
||||||
|
header = reinterpret_cast<const TileStream*>(in);
|
||||||
|
|
||||||
|
if (!ValidateStream(header))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DecompressionContext context{};
|
||||||
|
|
||||||
|
context.inputPtr = in;
|
||||||
|
context.inputSize = inSize;
|
||||||
|
|
||||||
|
context.outputPtr = output;
|
||||||
|
context.outputSize = header->GetUncompressedSize();
|
||||||
|
|
||||||
|
context.globalIndex = 0;
|
||||||
|
context.numItems = header->numTiles;
|
||||||
|
|
||||||
|
context.failed = false;
|
||||||
|
|
||||||
|
uint32_t numWorkersLeft = context.numItems > (2 * numWorkers) ? numWorkers : 1;
|
||||||
|
const uint32_t compressorId = header->id;
|
||||||
|
|
||||||
|
for (auto& worker : workers)
|
||||||
|
{
|
||||||
|
if (numWorkersLeft == 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
worker = std::thread([&context, compressorId]() { TileDecompressionJob(context, compressorId); });
|
||||||
|
|
||||||
|
--numWorkersLeft;
|
||||||
|
}
|
||||||
|
|
||||||
|
TileDecompressionJob(context, compressorId);
|
||||||
|
|
||||||
|
for (auto& worker : workers)
|
||||||
|
{
|
||||||
|
if (worker.joinable())
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
return (!context.failed);
|
||||||
|
}
|
||||||
|
} // namespace GDeflate
|
||||||
202
lib/gdeflate/LICENSE
Normal file
202
lib/gdeflate/LICENSE
Normal file
|
|
@ -0,0 +1,202 @@
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
83
lib/gdeflate/TileStream.h
Normal file
83
lib/gdeflate/TileStream.h
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Utils.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace GDeflate
|
||||||
|
{
|
||||||
|
#pragma pack(push, 1)
|
||||||
|
struct TileStream
|
||||||
|
{
|
||||||
|
static constexpr uint32_t kMaxTiles = (1 << 16) - 1;
|
||||||
|
|
||||||
|
uint8_t id;
|
||||||
|
uint8_t magic;
|
||||||
|
|
||||||
|
uint16_t numTiles;
|
||||||
|
|
||||||
|
uint32_t tileSizeIdx : 2; // this must be set to 1
|
||||||
|
uint32_t lastTileSize : 18;
|
||||||
|
uint32_t reserved1 : 12;
|
||||||
|
|
||||||
|
TileStream(size_t uncompressedSize)
|
||||||
|
{
|
||||||
|
memset(this, 0, sizeof(*this));
|
||||||
|
tileSizeIdx = 1;
|
||||||
|
SetCodecId(kGDeflateId);
|
||||||
|
SetUncompressedSize(uncompressedSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsValid() const
|
||||||
|
{
|
||||||
|
return id == (magic ^ 0xff);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetUncompressedSize() const
|
||||||
|
{
|
||||||
|
return numTiles * kDefaultTileSize - (lastTileSize == 0 ? 0 : kDefaultTileSize - lastTileSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void SetCodecId(uint8_t inId)
|
||||||
|
{
|
||||||
|
id = inId;
|
||||||
|
magic = inId ^ 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetUncompressedSize(size_t size)
|
||||||
|
{
|
||||||
|
numTiles = static_cast<uint16_t>(size / kDefaultTileSize);
|
||||||
|
lastTileSize = static_cast<uint32_t>(size - numTiles * kDefaultTileSize);
|
||||||
|
|
||||||
|
numTiles += lastTileSize != 0 ? 1 : 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
static_assert(sizeof(TileStream) == 8, "Tile stream header size overrun!");
|
||||||
|
|
||||||
|
} // namespace GDeflate
|
||||||
82
lib/gdeflate/Utils.h
Normal file
82
lib/gdeflate/Utils.h
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) Microsoft Corportaion. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <atomic>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
namespace GDeflate
|
||||||
|
{
|
||||||
|
template<int N, typename T>
|
||||||
|
static inline T _align(T a)
|
||||||
|
{
|
||||||
|
return (a + T(N) - 1) & ~(T(N) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline T _divRoundup(T a, T b)
|
||||||
|
{
|
||||||
|
return (a + b - 1) / b;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline uint32_t _lzCount(T a)
|
||||||
|
{
|
||||||
|
uint32_t n = 0;
|
||||||
|
|
||||||
|
while (0 == (a & 1) && n < sizeof(T) * 8)
|
||||||
|
{
|
||||||
|
a >>= 1;
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static inline T GetBits(uint32_t*& in, uint32_t& offset, uint32_t numBitsToRead)
|
||||||
|
{
|
||||||
|
constexpr uint32_t kBitsPerBucket = sizeof(*in) * 8;
|
||||||
|
|
||||||
|
T bits = 0;
|
||||||
|
uint32_t numBitsConsumed = 0;
|
||||||
|
while (numBitsConsumed < numBitsToRead)
|
||||||
|
{
|
||||||
|
const uint32_t numBits =
|
||||||
|
std::min(numBitsToRead - numBitsConsumed, kBitsPerBucket - (offset % kBitsPerBucket));
|
||||||
|
|
||||||
|
const T mask = std::numeric_limits<T>().max() >> (sizeof(T) * 8 - numBits);
|
||||||
|
|
||||||
|
bits |= (T(*in >> (offset % kBitsPerBucket)) & mask) << numBitsConsumed;
|
||||||
|
|
||||||
|
offset += numBits;
|
||||||
|
numBitsConsumed += numBits;
|
||||||
|
|
||||||
|
if (0 == offset % kBitsPerBucket)
|
||||||
|
in++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
} // namespace GDeflate
|
||||||
29
lib/gdeflate/config.h
Normal file
29
lib/gdeflate/config.h
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
namespace GDeflate
|
||||||
|
{
|
||||||
|
static constexpr uint8_t kGDeflateId = 4;
|
||||||
|
|
||||||
|
static const size_t kDefaultTileSize = 64 * 1024; /*!< Default tile size */
|
||||||
|
} // namespace GDeflate
|
||||||
36
lib/gdeflate/libdeflate/COPYING
Normal file
36
lib/gdeflate/libdeflate/COPYING
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
Copyright 2016 Eric Biggers
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person
|
||||||
|
obtaining a copy of this software and associated documentation files
|
||||||
|
(the "Software"), to deal in the Software without restriction,
|
||||||
|
including without limitation the rights to use, copy, modify, merge,
|
||||||
|
publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
||||||
|
SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
338
lib/gdeflate/libdeflate/common/common_defs.h
Normal file
338
lib/gdeflate/libdeflate/common/common_defs.h
Normal file
|
|
@ -0,0 +1,338 @@
|
||||||
|
/*
|
||||||
|
* common_defs.h
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COMMON_COMMON_DEFS_H
|
||||||
|
#define COMMON_COMMON_DEFS_H
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
# include "compiler_gcc.h"
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
# include "compiler_msc.h"
|
||||||
|
#else
|
||||||
|
# pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Type definitions */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
#include <stddef.h> /* size_t */
|
||||||
|
|
||||||
|
#ifndef __bool_true_false_are_defined
|
||||||
|
# include <stdbool.h> /* bool */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Fixed-width integer types */
|
||||||
|
#include <stdint.h>
|
||||||
|
typedef uint8_t u8;
|
||||||
|
typedef uint16_t u16;
|
||||||
|
typedef uint32_t u32;
|
||||||
|
typedef uint64_t u64;
|
||||||
|
typedef int8_t s8;
|
||||||
|
typedef int16_t s16;
|
||||||
|
typedef int32_t s32;
|
||||||
|
typedef int64_t s64;
|
||||||
|
|
||||||
|
/* Concatenation macros */
|
||||||
|
#define CONCAT2(x,y) x##y
|
||||||
|
#define CONCAT(x,y) CONCAT2(x,y)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Word type of the target architecture. Use 'size_t' instead of 'unsigned
|
||||||
|
* long' to account for platforms such as Windows that use 32-bit 'unsigned
|
||||||
|
* long' on 64-bit architectures.
|
||||||
|
*/
|
||||||
|
typedef size_t machine_word_t;
|
||||||
|
|
||||||
|
/* Number of bytes in a word */
|
||||||
|
#define WORDBYTES ((int)sizeof(machine_word_t))
|
||||||
|
|
||||||
|
/* Number of bits in a word */
|
||||||
|
#define WORDBITS (8 * WORDBYTES)
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Optional compiler features */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/* LIBEXPORT - export a function from a shared library */
|
||||||
|
#ifndef LIBEXPORT
|
||||||
|
# define LIBEXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* inline - suggest that a function be inlined */
|
||||||
|
#ifndef inline
|
||||||
|
# define inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* forceinline - force a function to be inlined, if possible */
|
||||||
|
#ifndef forceinline
|
||||||
|
# define forceinline inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* restrict - annotate a non-aliased pointer */
|
||||||
|
#ifndef restrict
|
||||||
|
# define restrict
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* likely(expr) - hint that an expression is usually true */
|
||||||
|
#ifndef likely
|
||||||
|
# define likely(expr) (expr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* unlikely(expr) - hint that an expression is usually false */
|
||||||
|
#ifndef unlikely
|
||||||
|
# define unlikely(expr) (expr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* prefetchr(addr) - prefetch into L1 cache for read */
|
||||||
|
#ifndef prefetchr
|
||||||
|
# define prefetchr(addr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* prefetchw(addr) - prefetch into L1 cache for write */
|
||||||
|
#ifndef prefetchw
|
||||||
|
# define prefetchw(addr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Does the compiler support the 'target' function attribute? */
|
||||||
|
#ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
|
||||||
|
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Which targets are supported with the 'target' function attribute? */
|
||||||
|
#ifndef COMPILER_SUPPORTS_BMI2_TARGET
|
||||||
|
# define COMPILER_SUPPORTS_BMI2_TARGET 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_AVX_TARGET
|
||||||
|
# define COMPILER_SUPPORTS_AVX_TARGET 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_AVX512BW_TARGET
|
||||||
|
# define COMPILER_SUPPORTS_AVX512BW_TARGET 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Which targets are supported with the 'target' function attribute and have
|
||||||
|
* intrinsics that work within 'target'-ed functions?
|
||||||
|
*/
|
||||||
|
#ifndef COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
#ifndef COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS
|
||||||
|
# define COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* _aligned_attribute(n) - declare that the annotated variable, or variables of
|
||||||
|
* the annotated type, are to be aligned on n-byte boundaries */
|
||||||
|
#ifndef _aligned_attribute
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Miscellaneous macros */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
|
||||||
|
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
||||||
|
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
||||||
|
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
|
||||||
|
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
|
||||||
|
#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Endianness handling */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little
|
||||||
|
* endian or 0 if it is big endian. The macro should be defined in a way such
|
||||||
|
* that the compiler can evaluate it at compilation time. If not defined, a
|
||||||
|
* fallback is used.
|
||||||
|
*/
|
||||||
|
#ifndef CPU_IS_LITTLE_ENDIAN
|
||||||
|
static forceinline int CPU_IS_LITTLE_ENDIAN(void)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
unsigned int v;
|
||||||
|
unsigned char b;
|
||||||
|
} u;
|
||||||
|
u.v = 1;
|
||||||
|
return u.b;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* bswap16(n) - swap the bytes of a 16-bit integer */
|
||||||
|
#ifndef bswap16
|
||||||
|
static forceinline u16 bswap16(u16 n)
|
||||||
|
{
|
||||||
|
return (n << 8) | (n >> 8);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* bswap32(n) - swap the bytes of a 32-bit integer */
|
||||||
|
#ifndef bswap32
|
||||||
|
static forceinline u32 bswap32(u32 n)
|
||||||
|
{
|
||||||
|
return ((n & 0x000000FF) << 24) |
|
||||||
|
((n & 0x0000FF00) << 8) |
|
||||||
|
((n & 0x00FF0000) >> 8) |
|
||||||
|
((n & 0xFF000000) >> 24);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* bswap64(n) - swap the bytes of a 64-bit integer */
|
||||||
|
#ifndef bswap64
|
||||||
|
static forceinline u64 bswap64(u64 n)
|
||||||
|
{
|
||||||
|
return ((n & 0x00000000000000FF) << 56) |
|
||||||
|
((n & 0x000000000000FF00) << 40) |
|
||||||
|
((n & 0x0000000000FF0000) << 24) |
|
||||||
|
((n & 0x00000000FF000000) << 8) |
|
||||||
|
((n & 0x000000FF00000000) >> 8) |
|
||||||
|
((n & 0x0000FF0000000000) >> 24) |
|
||||||
|
((n & 0x00FF000000000000) >> 40) |
|
||||||
|
((n & 0xFF00000000000000) >> 56);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n))
|
||||||
|
#define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n))
|
||||||
|
#define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n))
|
||||||
|
#define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n))
|
||||||
|
#define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n))
|
||||||
|
#define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n))
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Unaligned memory accesses */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
|
||||||
|
* can be performed efficiently on the target platform.
|
||||||
|
*/
|
||||||
|
#ifndef UNALIGNED_ACCESS_IS_FAST
|
||||||
|
# define UNALIGNED_ACCESS_IS_FAST 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Bit scan functions */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
|
||||||
|
* significant end) of the *most* significant 1 bit in the input value. The
|
||||||
|
* input value must be nonzero!
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef bsr32
|
||||||
|
static forceinline unsigned
|
||||||
|
bsr32(u32 n)
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
while ((n >>= 1) != 0)
|
||||||
|
i++;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef bsr64
|
||||||
|
static forceinline unsigned
|
||||||
|
bsr64(u64 n)
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
while ((n >>= 1) != 0)
|
||||||
|
i++;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static forceinline unsigned
|
||||||
|
bsrw(machine_word_t n)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||||
|
if (WORDBITS == 32)
|
||||||
|
return bsr32(n);
|
||||||
|
else
|
||||||
|
return bsr64(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
|
||||||
|
* significant end) of the *least* significant 1 bit in the input value. The
|
||||||
|
* input value must be nonzero!
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef bsf32
|
||||||
|
static forceinline unsigned
|
||||||
|
bsf32(u32 n)
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
while ((n & 1) == 0) {
|
||||||
|
i++;
|
||||||
|
n >>= 1;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef bsf64
|
||||||
|
static forceinline unsigned
|
||||||
|
bsf64(u64 n)
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
while ((n & 1) == 0) {
|
||||||
|
i++;
|
||||||
|
n >>= 1;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static forceinline unsigned
|
||||||
|
bsfw(machine_word_t n)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||||
|
if (WORDBITS == 32)
|
||||||
|
return bsf32(n);
|
||||||
|
else
|
||||||
|
return bsf64(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* COMMON_COMMON_DEFS_H */
|
||||||
217
lib/gdeflate/libdeflate/common/compiler_gcc.h
Normal file
217
lib/gdeflate/libdeflate/common/compiler_gcc.h
Normal file
|
|
@ -0,0 +1,217 @@
|
||||||
|
/*
|
||||||
|
* compiler_gcc.h - definitions for the GNU C Compiler. This also handles clang
|
||||||
|
* and the Intel C Compiler (icc).
|
||||||
|
*
|
||||||
|
* TODO: icc is not well tested, so some things are currently disabled even
|
||||||
|
* though they maybe can be enabled on some icc versions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||||
|
# define GCC_PREREQ(major, minor) \
|
||||||
|
(__GNUC__ > (major) || \
|
||||||
|
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||||||
|
#else
|
||||||
|
# define GCC_PREREQ(major, minor) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Note: only check the clang version when absolutely necessary!
|
||||||
|
* "Vendors" such as Apple can use different version numbers. */
|
||||||
|
#ifdef __clang__
|
||||||
|
# ifdef __apple_build_version__
|
||||||
|
# define CLANG_PREREQ(major, minor, apple_version) \
|
||||||
|
(__apple_build_version__ >= (apple_version))
|
||||||
|
# else
|
||||||
|
# define CLANG_PREREQ(major, minor, apple_version) \
|
||||||
|
(__clang_major__ > (major) || \
|
||||||
|
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define CLANG_PREREQ(major, minor, apple_version) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef __has_attribute
|
||||||
|
# define __has_attribute(attribute) 0
|
||||||
|
#endif
|
||||||
|
#ifndef __has_feature
|
||||||
|
# define __has_feature(feature) 0
|
||||||
|
#endif
|
||||||
|
#ifndef __has_builtin
|
||||||
|
# define __has_builtin(builtin) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define LIBEXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define LIBEXPORT __attribute__((visibility("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define inline inline
|
||||||
|
#define forceinline inline __attribute__((always_inline))
|
||||||
|
#define restrict __restrict__
|
||||||
|
#define likely(expr) __builtin_expect(!!(expr), 1)
|
||||||
|
#define unlikely(expr) __builtin_expect(!!(expr), 0)
|
||||||
|
#define prefetchr(addr) __builtin_prefetch((addr), 0)
|
||||||
|
#define prefetchw(addr) __builtin_prefetch((addr), 1)
|
||||||
|
#define _aligned_attribute(n) __attribute__((aligned(n)))
|
||||||
|
|
||||||
|
#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE \
|
||||||
|
(GCC_PREREQ(4, 4) || __has_attribute(target))
|
||||||
|
|
||||||
|
#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
|
||||||
|
|
||||||
|
# if defined(__i386__) || defined(__x86_64__)
|
||||||
|
|
||||||
|
# define COMPILER_SUPPORTS_PCLMUL_TARGET \
|
||||||
|
(GCC_PREREQ(4, 4) || __has_builtin(__builtin_ia32_pclmulqdq128))
|
||||||
|
|
||||||
|
# define COMPILER_SUPPORTS_AVX_TARGET \
|
||||||
|
(GCC_PREREQ(4, 6) || __has_builtin(__builtin_ia32_maxps256))
|
||||||
|
|
||||||
|
# define COMPILER_SUPPORTS_BMI2_TARGET \
|
||||||
|
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di))
|
||||||
|
|
||||||
|
# define COMPILER_SUPPORTS_AVX2_TARGET \
|
||||||
|
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_psadbw256))
|
||||||
|
|
||||||
|
# define COMPILER_SUPPORTS_AVX512BW_TARGET \
|
||||||
|
(GCC_PREREQ(5, 1) || __has_builtin(__builtin_ia32_psadbw512))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics
|
||||||
|
* not available in the main target could not be used in 'target'
|
||||||
|
* attribute functions. Unfortunately clang has no feature test macro
|
||||||
|
* for this so we have to check its version.
|
||||||
|
*/
|
||||||
|
# if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000)
|
||||||
|
# define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 1
|
||||||
|
# define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \
|
||||||
|
COMPILER_SUPPORTS_PCLMUL_TARGET
|
||||||
|
# define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \
|
||||||
|
COMPILER_SUPPORTS_AVX2_TARGET
|
||||||
|
# define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS \
|
||||||
|
COMPILER_SUPPORTS_AVX512BW_TARGET
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# elif defined(__arm__) || defined(__aarch64__)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether NEON and crypto intrinsics are supported.
|
||||||
|
*
|
||||||
|
* With gcc prior to 6.1, (r230411 for arm32, r226563 for arm64), neither
|
||||||
|
* was available unless enabled in the main target.
|
||||||
|
*
|
||||||
|
* But even after that, to include <arm_neon.h> (which contains both the
|
||||||
|
* basic NEON intrinsics and the crypto intrinsics) the main target still
|
||||||
|
* needs to have:
|
||||||
|
* - gcc: hardware floating point support
|
||||||
|
* - clang: NEON support (but not necessarily crypto support)
|
||||||
|
*/
|
||||||
|
# if (GCC_PREREQ(6, 1) && defined(__ARM_FP)) || \
|
||||||
|
(defined(__clang__) && defined(__ARM_NEON))
|
||||||
|
# define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS 1
|
||||||
|
/*
|
||||||
|
* The crypto intrinsics are broken on arm32 with clang, even when using
|
||||||
|
* -mfpu=crypto-neon-fp-armv8, because clang's <arm_neon.h> puts them
|
||||||
|
* behind __aarch64__. Undefine __ARM_FEATURE_CRYPTO in that case...
|
||||||
|
*/
|
||||||
|
# if defined(__clang__) && defined(__arm__)
|
||||||
|
# undef __ARM_FEATURE_CRYPTO
|
||||||
|
# elif __has_builtin(__builtin_neon_vmull_p64) || !defined(__clang__)
|
||||||
|
# define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS 1
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether CRC32 intrinsics are supported.
|
||||||
|
*
|
||||||
|
* With gcc r274827 or later (gcc 10.1+, 9.3+, or 8.4+), or with clang,
|
||||||
|
* they work as expected. (Well, not quite. There's still a bug, but we
|
||||||
|
* have to work around it later when including arm_acle.h.)
|
||||||
|
*/
|
||||||
|
# if GCC_PREREQ(10, 1) || \
|
||||||
|
(GCC_PREREQ(9, 3) && !GCC_PREREQ(10, 0)) || \
|
||||||
|
(GCC_PREREQ(8, 4) && !GCC_PREREQ(9, 0)) || \
|
||||||
|
(defined(__clang__) && __has_builtin(__builtin_arm_crc32b))
|
||||||
|
# define COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS 1
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# endif /* __arm__ || __aarch64__ */
|
||||||
|
|
||||||
|
#endif /* COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prior to gcc 5.1 and clang 3.9, emmintrin.h only defined vectors of signed
|
||||||
|
* integers (e.g. __v4si), not vectors of unsigned integers (e.g. __v4su). But
|
||||||
|
* we need the unsigned ones in order to avoid signed integer overflow, which is
|
||||||
|
* undefined behavior. Add the missing definitions for the unsigned ones if
|
||||||
|
* needed.
|
||||||
|
*/
|
||||||
|
#if (GCC_PREREQ(4, 0) && !GCC_PREREQ(5, 1)) || \
|
||||||
|
(defined(__clang__) && !CLANG_PREREQ(3, 9, 8020000)) || \
|
||||||
|
defined(__INTEL_COMPILER)
|
||||||
|
typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
|
||||||
|
typedef unsigned int __v4su __attribute__((__vector_size__(16)));
|
||||||
|
typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
|
||||||
|
typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
|
||||||
|
typedef unsigned long long __v4du __attribute__((__vector_size__(32)));
|
||||||
|
typedef unsigned int __v8su __attribute__((__vector_size__(32)));
|
||||||
|
typedef unsigned short __v16hu __attribute__((__vector_size__(32)));
|
||||||
|
typedef unsigned char __v32qu __attribute__((__vector_size__(32)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __INTEL_COMPILER
|
||||||
|
typedef int __v16si __attribute__((__vector_size__(64)));
|
||||||
|
typedef short __v32hi __attribute__((__vector_size__(64)));
|
||||||
|
typedef char __v64qi __attribute__((__vector_size__(64)));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Newer gcc supports __BYTE_ORDER__. Older gcc doesn't. */
|
||||||
|
#ifdef __BYTE_ORDER__
|
||||||
|
# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
|
||||||
|
# define bswap16 __builtin_bswap16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
|
||||||
|
# define bswap32 __builtin_bswap32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
|
||||||
|
# define bswap64 __builtin_bswap64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__i386__) || \
|
||||||
|
defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
|
||||||
|
/*
|
||||||
|
* For all compilation purposes, WebAssembly behaves like any other CPU
|
||||||
|
* instruction set. Even though WebAssembly engine might be running on top
|
||||||
|
* of different actual CPU architectures, the WebAssembly spec itself
|
||||||
|
* permits unaligned access and it will be fast on most of those platforms,
|
||||||
|
* and simulated at the engine level on others, so it's worth treating it
|
||||||
|
* as a CPU architecture with fast unaligned access.
|
||||||
|
*/ defined(__wasm__)
|
||||||
|
# define UNALIGNED_ACCESS_IS_FAST 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define bsr32(n) (31 - __builtin_clz(n))
|
||||||
|
#define bsr64(n) (63 - __builtin_clzll(n))
|
||||||
|
#define bsf32(n) __builtin_ctz(n)
|
||||||
|
#define bsf64(n) __builtin_ctzll(n)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup rotation macros similar to MSVS intrinsics.
|
||||||
|
* These should recognized by compilers.
|
||||||
|
*/
|
||||||
|
#ifndef _rotr16
|
||||||
|
#define _rotr16(x,n) ((x>>n) + (x<<(16-n)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _rotr
|
||||||
|
#define _rotr(x,n) ((x>>n) + (x<<(32-n)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _rotr64
|
||||||
|
#define _rotr64(x,n) ((x>>n) + (x<<(64-n)))
|
||||||
|
#endif
|
||||||
80
lib/gdeflate/libdeflate/common/compiler_msc.h
Normal file
80
lib/gdeflate/libdeflate/common/compiler_msc.h
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
/*
|
||||||
|
* compiler_msc.h - definitions for the Microsoft C Compiler
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h> /* for _byteswap_*() */
|
||||||
|
|
||||||
|
#define LIBEXPORT __declspec(dllexport)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Old versions (e.g. VS2010) of MSC don't have the C99 header stdbool.h.
|
||||||
|
* Beware: the below replacement isn't fully standard, since normally any value
|
||||||
|
* != 0 should be implicitly cast to a bool with value 1... but that doesn't
|
||||||
|
* happen if bool is really just an 'int'.
|
||||||
|
*/
|
||||||
|
typedef int bool;
|
||||||
|
#define true 1
|
||||||
|
#define false 0
|
||||||
|
#define __bool_true_false_are_defined 1
|
||||||
|
|
||||||
|
/* Define ssize_t */
|
||||||
|
#ifdef _WIN64
|
||||||
|
typedef long long ssize_t;
|
||||||
|
#else
|
||||||
|
typedef int ssize_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Assume a little endian architecture with fast unaligned access */
|
||||||
|
#define CPU_IS_LITTLE_ENDIAN() 1
|
||||||
|
#define UNALIGNED_ACCESS_IS_FAST 1
|
||||||
|
|
||||||
|
/* __restrict has nonstandard behavior; don't use it */
|
||||||
|
#define restrict
|
||||||
|
|
||||||
|
/* ... but we can use __inline and __forceinline */
|
||||||
|
#define inline __inline
|
||||||
|
#define forceinline __forceinline
|
||||||
|
|
||||||
|
/* Byte swap functions */
|
||||||
|
#define bswap16 _byteswap_ushort
|
||||||
|
#define bswap32 _byteswap_ulong
|
||||||
|
#define bswap64 _byteswap_uint64
|
||||||
|
|
||||||
|
/* Bit scan functions (32-bit) */
|
||||||
|
|
||||||
|
static forceinline unsigned
|
||||||
|
bsr32(uint32_t n)
|
||||||
|
{
|
||||||
|
_BitScanReverse(&n, n);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
#define bsr32 bsr32
|
||||||
|
|
||||||
|
static forceinline unsigned
|
||||||
|
bsf32(uint32_t n)
|
||||||
|
{
|
||||||
|
_BitScanForward(&n, n);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
#define bsf32 bsf32
|
||||||
|
|
||||||
|
#ifdef _M_X64 /* Bit scan functions (64-bit) */
|
||||||
|
|
||||||
|
static forceinline unsigned
|
||||||
|
bsr64(uint64_t n)
|
||||||
|
{
|
||||||
|
_BitScanReverse64(&n, n);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
#define bsr64 bsr64
|
||||||
|
|
||||||
|
static forceinline unsigned
|
||||||
|
bsf64(uint64_t n)
|
||||||
|
{
|
||||||
|
_BitScanForward64(&n, n);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
#define bsf64 bsf64
|
||||||
|
|
||||||
|
#endif /* _M_X64 */
|
||||||
130
lib/gdeflate/libdeflate/lib/adler32.c
Normal file
130
lib/gdeflate/libdeflate/lib/adler32.c
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
/*
|
||||||
|
* adler32.c - Adler-32 checksum algorithm
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
/* The Adler-32 divisor, or "base", value. */
|
||||||
|
#define DIVISOR 65521
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MAX_CHUNK_SIZE is the most bytes that can be processed without the
|
||||||
|
* possibility of s2 overflowing when it is represented as an unsigned 32-bit
|
||||||
|
* integer. This value was computed using the following Python script:
|
||||||
|
*
|
||||||
|
* divisor = 65521
|
||||||
|
* count = 0
|
||||||
|
* s1 = divisor - 1
|
||||||
|
* s2 = divisor - 1
|
||||||
|
* while True:
|
||||||
|
* s1 += 0xFF
|
||||||
|
* s2 += s1
|
||||||
|
* if s2 > 0xFFFFFFFF:
|
||||||
|
* break
|
||||||
|
* count += 1
|
||||||
|
* print(count)
|
||||||
|
*
|
||||||
|
* Note that to get the correct worst-case value, we must assume that every byte
|
||||||
|
* has value 0xFF and that s1 and s2 started with the highest possible values
|
||||||
|
* modulo the divisor.
|
||||||
|
*/
|
||||||
|
#define MAX_CHUNK_SIZE 5552
|
||||||
|
|
||||||
|
typedef u32 (*adler32_func_t)(u32, const u8 *, size_t);
|
||||||
|
|
||||||
|
/* Include architecture-specific implementations if available */
|
||||||
|
#undef DEFAULT_IMPL
|
||||||
|
#undef DISPATCH
|
||||||
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
|
# include "arm/adler32_impl.h"
|
||||||
|
#elif defined(__i386__) || defined(__x86_64__)
|
||||||
|
# include "x86/adler32_impl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define a generic implementation if needed */
|
||||||
|
#ifndef DEFAULT_IMPL
|
||||||
|
#define DEFAULT_IMPL adler32_generic
|
||||||
|
static u32 adler32_generic(u32 adler, const u8 *p, size_t size)
|
||||||
|
{
|
||||||
|
u32 s1 = adler & 0xFFFF;
|
||||||
|
u32 s2 = adler >> 16;
|
||||||
|
const u8 * const end = p + size;
|
||||||
|
|
||||||
|
while (p != end) {
|
||||||
|
size_t chunk_size = MIN(end - p, MAX_CHUNK_SIZE);
|
||||||
|
const u8 *chunk_end = p + chunk_size;
|
||||||
|
size_t num_unrolled_iterations = chunk_size / 4;
|
||||||
|
|
||||||
|
while (num_unrolled_iterations--) {
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
}
|
||||||
|
while (p != chunk_end) {
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
}
|
||||||
|
s1 %= DIVISOR;
|
||||||
|
s2 %= DIVISOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (s2 << 16) | s1;
|
||||||
|
}
|
||||||
|
#endif /* !DEFAULT_IMPL */
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static u32 dispatch(u32, const u8 *, size_t);
|
||||||
|
|
||||||
|
static volatile adler32_func_t adler32_impl = dispatch;
|
||||||
|
|
||||||
|
/* Choose the fastest implementation at runtime */
|
||||||
|
static u32 dispatch(u32 adler, const u8 *buffer, size_t size)
|
||||||
|
{
|
||||||
|
adler32_func_t f = arch_select_adler32_func();
|
||||||
|
|
||||||
|
if (f == NULL)
|
||||||
|
f = DEFAULT_IMPL;
|
||||||
|
|
||||||
|
adler32_impl = f;
|
||||||
|
return adler32_impl(adler, buffer, size);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
# define adler32_impl DEFAULT_IMPL /* only one implementation, use it */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT u32 LIBDEFLATEAPI
|
||||||
|
libdeflate_adler32(u32 adler, const void *buffer, size_t size)
|
||||||
|
{
|
||||||
|
if (buffer == NULL) /* return initial value */
|
||||||
|
return 1;
|
||||||
|
return adler32_impl(adler, buffer, size);
|
||||||
|
}
|
||||||
124
lib/gdeflate/libdeflate/lib/adler32_vec_template.h
Normal file
124
lib/gdeflate/libdeflate/lib/adler32_vec_template.h
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
/*
|
||||||
|
* adler32_vec_template.h - template for vectorized Adler-32 implementations
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file contains a template for vectorized Adler-32 implementations.
|
||||||
|
*
|
||||||
|
* The inner loop between reductions modulo 65521 of an unvectorized Adler-32
|
||||||
|
* implementation looks something like this:
|
||||||
|
*
|
||||||
|
* do {
|
||||||
|
* s1 += *p;
|
||||||
|
* s2 += s1;
|
||||||
|
* } while (++p != chunk_end);
|
||||||
|
*
|
||||||
|
* For vectorized calculation of s1, we only need to sum the input bytes. They
|
||||||
|
* can be accumulated into multiple counters which are eventually summed
|
||||||
|
* together.
|
||||||
|
*
|
||||||
|
* For vectorized calculation of s2, the basic idea is that for each iteration
|
||||||
|
* that processes N bytes, we can perform the following vectorizable
|
||||||
|
* calculation:
|
||||||
|
*
|
||||||
|
* s2 += N*byte_1 + (N-1)*byte_2 + (N-2)*byte_3 + ... + 1*byte_N
|
||||||
|
*
|
||||||
|
* Or, equivalently, we can sum the byte_1...byte_N for each iteration into N
|
||||||
|
* separate counters, then do the multiplications by N...1 just once at the end
|
||||||
|
* rather than once per iteration.
|
||||||
|
*
|
||||||
|
* Also, we must account for how previous bytes will affect s2 by doing the
|
||||||
|
* following at beginning of each iteration:
|
||||||
|
*
|
||||||
|
* s2 += s1 * N
|
||||||
|
*
|
||||||
|
* Furthermore, like s1, "s2" can actually be multiple counters which are
|
||||||
|
* eventually summed together.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static u32 ATTRIBUTES
|
||||||
|
FUNCNAME(u32 adler, const u8 *p, size_t size)
|
||||||
|
{
|
||||||
|
u32 s1 = adler & 0xFFFF;
|
||||||
|
u32 s2 = adler >> 16;
|
||||||
|
const u8 * const end = p + size;
|
||||||
|
const u8 *vend;
|
||||||
|
const size_t max_chunk_size =
|
||||||
|
MIN(MAX_CHUNK_SIZE, IMPL_MAX_CHUNK_SIZE) -
|
||||||
|
(MIN(MAX_CHUNK_SIZE, IMPL_MAX_CHUNK_SIZE) %
|
||||||
|
IMPL_SEGMENT_SIZE);
|
||||||
|
|
||||||
|
/* Process a byte at a time until the needed alignment is reached */
|
||||||
|
if (p != end && (uintptr_t)p % IMPL_ALIGNMENT) {
|
||||||
|
do {
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
} while (p != end && (uintptr_t)p % IMPL_ALIGNMENT);
|
||||||
|
s1 %= DIVISOR;
|
||||||
|
s2 %= DIVISOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process "chunks" of bytes using vector instructions. Chunk sizes are
|
||||||
|
* limited to MAX_CHUNK_SIZE, which guarantees that s1 and s2 never
|
||||||
|
* overflow before being reduced modulo DIVISOR. For vector processing,
|
||||||
|
* chunk sizes are also made evenly divisible by IMPL_SEGMENT_SIZE and
|
||||||
|
* may be further limited to IMPL_MAX_CHUNK_SIZE.
|
||||||
|
*/
|
||||||
|
STATIC_ASSERT(IMPL_SEGMENT_SIZE % IMPL_ALIGNMENT == 0);
|
||||||
|
vend = end - ((size_t)(end - p) % IMPL_SEGMENT_SIZE);
|
||||||
|
while (p != vend) {
|
||||||
|
size_t chunk_size = MIN((size_t)(vend - p), max_chunk_size);
|
||||||
|
|
||||||
|
s2 += s1 * chunk_size;
|
||||||
|
|
||||||
|
FUNCNAME_CHUNK((const void *)p, (const void *)(p + chunk_size),
|
||||||
|
&s1, &s2);
|
||||||
|
|
||||||
|
p += chunk_size;
|
||||||
|
s1 %= DIVISOR;
|
||||||
|
s2 %= DIVISOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Process any remaining bytes */
|
||||||
|
if (p != end) {
|
||||||
|
do {
|
||||||
|
s1 += *p++;
|
||||||
|
s2 += s1;
|
||||||
|
} while (p != end);
|
||||||
|
s1 %= DIVISOR;
|
||||||
|
s2 %= DIVISOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (s2 << 16) | s1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef FUNCNAME
|
||||||
|
#undef FUNCNAME_CHUNK
|
||||||
|
#undef ATTRIBUTES
|
||||||
|
#undef IMPL_ALIGNMENT
|
||||||
|
#undef IMPL_SEGMENT_SIZE
|
||||||
|
#undef IMPL_MAX_CHUNK_SIZE
|
||||||
125
lib/gdeflate/libdeflate/lib/arm/adler32_impl.h
Normal file
125
lib/gdeflate/libdeflate/lib/arm/adler32_impl.h
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
/*
|
||||||
|
* arm/adler32_impl.h - ARM implementations of Adler-32 checksum algorithm
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_ARM_ADLER32_IMPL_H
|
||||||
|
#define LIB_ARM_ADLER32_IMPL_H
|
||||||
|
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
/* NEON implementation */
|
||||||
|
#undef DISPATCH_NEON
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__ARM_NEON) || (ARM_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS))
|
||||||
|
# define FUNCNAME adler32_neon
|
||||||
|
# define FUNCNAME_CHUNK adler32_neon_chunk
|
||||||
|
# define IMPL_ALIGNMENT 16
|
||||||
|
# define IMPL_SEGMENT_SIZE 32
|
||||||
|
/* Prevent unsigned overflow of the 16-bit precision byte counters */
|
||||||
|
# define IMPL_MAX_CHUNK_SIZE (32 * (0xFFFF / 0xFF))
|
||||||
|
# ifdef __ARM_NEON
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL adler32_neon
|
||||||
|
# else
|
||||||
|
# ifdef __arm__
|
||||||
|
# define ATTRIBUTES __attribute__((target("fpu=neon")))
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("+simd")))
|
||||||
|
# endif
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_NEON 1
|
||||||
|
# endif
|
||||||
|
# include <arm_neon.h>
|
||||||
|
static forceinline ATTRIBUTES void
|
||||||
|
adler32_neon_chunk(const uint8x16_t *p, const uint8x16_t * const end,
|
||||||
|
u32 *s1, u32 *s2)
|
||||||
|
{
|
||||||
|
uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 };
|
||||||
|
uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, 0 };
|
||||||
|
uint16x8_t v_byte_sums_a = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
uint16x8_t v_byte_sums_b = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
uint16x8_t v_byte_sums_c = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
uint16x8_t v_byte_sums_d = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
|
||||||
|
do {
|
||||||
|
const uint8x16_t bytes1 = *p++;
|
||||||
|
const uint8x16_t bytes2 = *p++;
|
||||||
|
uint16x8_t tmp;
|
||||||
|
|
||||||
|
v_s2 += v_s1;
|
||||||
|
|
||||||
|
/* Vector Pairwise Add Long (u8 => u16) */
|
||||||
|
tmp = vpaddlq_u8(bytes1);
|
||||||
|
|
||||||
|
/* Vector Pairwise Add and Accumulate Long (u8 => u16) */
|
||||||
|
tmp = vpadalq_u8(tmp, bytes2);
|
||||||
|
|
||||||
|
/* Vector Pairwise Add and Accumulate Long (u16 => u32) */
|
||||||
|
v_s1 = vpadalq_u16(v_s1, tmp);
|
||||||
|
|
||||||
|
/* Vector Add Wide (u8 => u16) */
|
||||||
|
v_byte_sums_a = vaddw_u8(v_byte_sums_a, vget_low_u8(bytes1));
|
||||||
|
v_byte_sums_b = vaddw_u8(v_byte_sums_b, vget_high_u8(bytes1));
|
||||||
|
v_byte_sums_c = vaddw_u8(v_byte_sums_c, vget_low_u8(bytes2));
|
||||||
|
v_byte_sums_d = vaddw_u8(v_byte_sums_d, vget_high_u8(bytes2));
|
||||||
|
|
||||||
|
} while (p != end);
|
||||||
|
|
||||||
|
/* Vector Shift Left (u32) */
|
||||||
|
v_s2 = vqshlq_n_u32(v_s2, 5);
|
||||||
|
|
||||||
|
/* Vector Multiply Accumulate Long (u16 => u32) */
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a), (uint16x4_t) { 32, 31, 30, 29 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_a), (uint16x4_t) { 28, 27, 26, 25 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b), (uint16x4_t) { 24, 23, 22, 21 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_b), (uint16x4_t) { 20, 19, 18, 17 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c), (uint16x4_t) { 16, 15, 14, 13 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_c), (uint16x4_t) { 12, 11, 10, 9 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_byte_sums_d), (uint16x4_t) { 8, 7, 6, 5 });
|
||||||
|
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_d), (uint16x4_t) { 4, 3, 2, 1 });
|
||||||
|
|
||||||
|
*s1 += v_s1[0] + v_s1[1] + v_s1[2] + v_s1[3];
|
||||||
|
*s2 += v_s2[0] + v_s2[1] + v_s2[2] + v_s2[3];
|
||||||
|
}
|
||||||
|
# include "../adler32_vec_template.h"
|
||||||
|
#endif /* NEON implementation */
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static inline adler32_func_t
|
||||||
|
arch_select_adler32_func(void)
|
||||||
|
{
|
||||||
|
u32 features = get_cpu_features();
|
||||||
|
|
||||||
|
#ifdef DISPATCH_NEON
|
||||||
|
if (features & ARM_CPU_FEATURE_NEON)
|
||||||
|
return adler32_neon;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* DISPATCH */
|
||||||
|
|
||||||
|
#endif /* LIB_ARM_ADLER32_IMPL_H */
|
||||||
133
lib/gdeflate/libdeflate/lib/arm/cpu_features.c
Normal file
133
lib/gdeflate/libdeflate/lib/arm/cpu_features.c
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
/*
|
||||||
|
* arm/cpu_features.c - feature detection for ARM processors
|
||||||
|
*
|
||||||
|
* Copyright 2018 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ARM processors don't have a standard way for unprivileged programs to detect
|
||||||
|
* processor features. But, on Linux we can read the AT_HWCAP and AT_HWCAP2
|
||||||
|
* values from /proc/self/auxv.
|
||||||
|
*
|
||||||
|
* Ideally we'd use the C library function getauxval(), but it's not guaranteed
|
||||||
|
* to be available: it was only added to glibc in 2.16, and in Android it was
|
||||||
|
* added to API level 18 for ARM and level 21 for AArch64.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "../cpu_features_common.h" /* must be included first */
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
#if ARM_CPU_FEATURES_ENABLED
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#define AT_HWCAP 16
|
||||||
|
#define AT_HWCAP2 26
|
||||||
|
|
||||||
|
volatile u32 _cpu_features = 0;
|
||||||
|
|
||||||
|
static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
unsigned long auxbuf[32];
|
||||||
|
int filled = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
fd = open("/proc/self/auxv", O_RDONLY);
|
||||||
|
if (fd < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
do {
|
||||||
|
int ret = read(fd, &((char *)auxbuf)[filled],
|
||||||
|
sizeof(auxbuf) - filled);
|
||||||
|
if (ret <= 0) {
|
||||||
|
if (ret < 0 && errno == EINTR)
|
||||||
|
continue;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
filled += ret;
|
||||||
|
} while (filled < 2 * sizeof(long));
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
do {
|
||||||
|
unsigned long type = auxbuf[i];
|
||||||
|
unsigned long value = auxbuf[i + 1];
|
||||||
|
|
||||||
|
if (type == AT_HWCAP)
|
||||||
|
*hwcap = value;
|
||||||
|
else if (type == AT_HWCAP2)
|
||||||
|
*hwcap2 = value;
|
||||||
|
i += 2;
|
||||||
|
filled -= 2 * sizeof(long);
|
||||||
|
} while (filled >= 2 * sizeof(long));
|
||||||
|
|
||||||
|
memmove(auxbuf, &auxbuf[i], filled);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct cpu_feature arm_cpu_feature_table[] = {
|
||||||
|
{ARM_CPU_FEATURE_NEON, "neon"},
|
||||||
|
{ARM_CPU_FEATURE_PMULL, "pmull"},
|
||||||
|
{ARM_CPU_FEATURE_CRC32, "crc32"},
|
||||||
|
};
|
||||||
|
|
||||||
|
void setup_cpu_features(void)
|
||||||
|
{
|
||||||
|
u32 features = 0;
|
||||||
|
unsigned long hwcap = 0;
|
||||||
|
unsigned long hwcap2 = 0;
|
||||||
|
|
||||||
|
scan_auxv(&hwcap, &hwcap2);
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
STATIC_ASSERT(sizeof(long) == 4);
|
||||||
|
if (hwcap & (1 << 12)) /* HWCAP_NEON */
|
||||||
|
features |= ARM_CPU_FEATURE_NEON;
|
||||||
|
if (hwcap2 & (1 << 1)) /* HWCAP2_PMULL */
|
||||||
|
features |= ARM_CPU_FEATURE_PMULL;
|
||||||
|
if (hwcap2 & (1 << 4)) /* HWCAP2_CRC32 */
|
||||||
|
features |= ARM_CPU_FEATURE_CRC32;
|
||||||
|
#else
|
||||||
|
STATIC_ASSERT(sizeof(long) == 8);
|
||||||
|
if (hwcap & (1 << 1)) /* HWCAP_ASIMD */
|
||||||
|
features |= ARM_CPU_FEATURE_NEON;
|
||||||
|
if (hwcap & (1 << 4)) /* HWCAP_PMULL */
|
||||||
|
features |= ARM_CPU_FEATURE_PMULL;
|
||||||
|
if (hwcap & (1 << 7)) /* HWCAP_CRC32 */
|
||||||
|
features |= ARM_CPU_FEATURE_CRC32;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
disable_cpu_features_for_testing(&features, arm_cpu_feature_table,
|
||||||
|
ARRAY_LEN(arm_cpu_feature_table));
|
||||||
|
|
||||||
|
_cpu_features = features | ARM_CPU_FEATURES_KNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* ARM_CPU_FEATURES_ENABLED */
|
||||||
40
lib/gdeflate/libdeflate/lib/arm/cpu_features.h
Normal file
40
lib/gdeflate/libdeflate/lib/arm/cpu_features.h
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* arm/cpu_features.h - feature detection for ARM processors
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_ARM_CPU_FEATURES_H
|
||||||
|
#define LIB_ARM_CPU_FEATURES_H
|
||||||
|
|
||||||
|
#include "../lib_common.h"
|
||||||
|
|
||||||
|
#if (defined(__arm__) || defined(__aarch64__)) && \
|
||||||
|
defined(__linux__) && \
|
||||||
|
COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE && \
|
||||||
|
!defined(FREESTANDING)
|
||||||
|
# define ARM_CPU_FEATURES_ENABLED 1
|
||||||
|
#else
|
||||||
|
# define ARM_CPU_FEATURES_ENABLED 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ARM_CPU_FEATURES_ENABLED
|
||||||
|
|
||||||
|
#define ARM_CPU_FEATURE_NEON 0x00000001
|
||||||
|
#define ARM_CPU_FEATURE_PMULL 0x00000002
|
||||||
|
#define ARM_CPU_FEATURE_CRC32 0x00000004
|
||||||
|
|
||||||
|
#define ARM_CPU_FEATURES_KNOWN 0x80000000
|
||||||
|
|
||||||
|
extern volatile u32 _cpu_features;
|
||||||
|
|
||||||
|
void setup_cpu_features(void);
|
||||||
|
|
||||||
|
static inline u32 get_cpu_features(void)
|
||||||
|
{
|
||||||
|
if (_cpu_features == 0)
|
||||||
|
setup_cpu_features();
|
||||||
|
return _cpu_features;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* ARM_CPU_FEATURES_ENABLED */
|
||||||
|
|
||||||
|
#endif /* LIB_ARM_CPU_FEATURES_H */
|
||||||
247
lib/gdeflate/libdeflate/lib/arm/crc32_impl.h
Normal file
247
lib/gdeflate/libdeflate/lib/arm/crc32_impl.h
Normal file
|
|
@ -0,0 +1,247 @@
|
||||||
|
/*
|
||||||
|
* arm/crc32_impl.h
|
||||||
|
*
|
||||||
|
* Copyright 2017 Jun He <jun.he@linaro.org>
|
||||||
|
* Copyright 2018 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_ARM_CRC32_IMPL_H
|
||||||
|
#define LIB_ARM_CRC32_IMPL_H
|
||||||
|
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
/* Implementation using ARM CRC32 instructions */
|
||||||
|
#undef DISPATCH_ARM
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__ARM_FEATURE_CRC32) || \
|
||||||
|
(ARM_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS))
|
||||||
|
# ifdef __ARM_FEATURE_CRC32
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL crc32_arm
|
||||||
|
# else
|
||||||
|
# ifdef __arm__
|
||||||
|
# ifdef __clang__
|
||||||
|
# define ATTRIBUTES __attribute__((target("armv8-a,crc")))
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("arch=armv8-a+crc")))
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# ifdef __clang__
|
||||||
|
# define ATTRIBUTES __attribute__((target("crc")))
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("+crc")))
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_ARM 1
|
||||||
|
# endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* gcc's (as of 10.1) version of arm_acle.h for arm32, and clang's (as of
|
||||||
|
* 10.0.1) version of arm_acle.h for both arm32 and arm64, have a bug where they
|
||||||
|
* only define the CRC32 functions like __crc32b() when __ARM_FEATURE_CRC32 is
|
||||||
|
* defined. That prevents them from being used via __attribute__((target)) when
|
||||||
|
* the main target doesn't have CRC32 support enabled. The actual built-ins
|
||||||
|
* like __builtin_arm_crc32b() are available and work, however; it's just the
|
||||||
|
* wrappers in arm_acle.h like __crc32b() that erroneously don't get defined.
|
||||||
|
* Work around this by manually defining __ARM_FEATURE_CRC32.
|
||||||
|
*/
|
||||||
|
#ifndef __ARM_FEATURE_CRC32
|
||||||
|
# define __ARM_FEATURE_CRC32 1
|
||||||
|
#endif
|
||||||
|
#include <arm_acle.h>
|
||||||
|
|
||||||
|
static u32 ATTRIBUTES
|
||||||
|
crc32_arm(u32 remainder, const u8 *p, size_t size)
|
||||||
|
{
|
||||||
|
while (size != 0 && (uintptr_t)p & 7) {
|
||||||
|
remainder = __crc32b(remainder, *p++);
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size >= 32) {
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 0)));
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 1)));
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 2)));
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 3)));
|
||||||
|
p += 32;
|
||||||
|
size -= 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size >= 8) {
|
||||||
|
remainder = __crc32d(remainder, le64_bswap(*(u64 *)p));
|
||||||
|
p += 8;
|
||||||
|
size -= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size != 0) {
|
||||||
|
remainder = __crc32b(remainder, *p++);
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return remainder;
|
||||||
|
}
|
||||||
|
#undef ATTRIBUTES
|
||||||
|
#endif /* Implementation using ARM CRC32 instructions */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CRC-32 folding with ARM Crypto extension-PMULL
|
||||||
|
*
|
||||||
|
* This works the same way as the x86 PCLMUL version.
|
||||||
|
* See x86/crc32_pclmul_template.h for an explanation.
|
||||||
|
*/
|
||||||
|
#undef DISPATCH_PMULL
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__ARM_FEATURE_CRYPTO) || \
|
||||||
|
(ARM_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS)) && \
|
||||||
|
/* not yet tested on big endian, probably needs changes to work there */ \
|
||||||
|
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||||
|
# define FUNCNAME crc32_pmull
|
||||||
|
# define FUNCNAME_ALIGNED crc32_pmull_aligned
|
||||||
|
# ifdef __ARM_FEATURE_CRYPTO
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL crc32_pmull
|
||||||
|
# else
|
||||||
|
# ifdef __arm__
|
||||||
|
# define ATTRIBUTES __attribute__((target("fpu=crypto-neon-fp-armv8")))
|
||||||
|
# else
|
||||||
|
# ifdef __clang__
|
||||||
|
# define ATTRIBUTES __attribute__((target("crypto")))
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("+crypto")))
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_PMULL 1
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
static forceinline ATTRIBUTES uint8x16_t
|
||||||
|
clmul_00(uint8x16_t a, uint8x16_t b)
|
||||||
|
{
|
||||||
|
return (uint8x16_t)vmull_p64((poly64_t)vget_low_u8(a),
|
||||||
|
(poly64_t)vget_low_u8(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline ATTRIBUTES uint8x16_t
|
||||||
|
clmul_10(uint8x16_t a, uint8x16_t b)
|
||||||
|
{
|
||||||
|
return (uint8x16_t)vmull_p64((poly64_t)vget_low_u8(a),
|
||||||
|
(poly64_t)vget_high_u8(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline ATTRIBUTES uint8x16_t
|
||||||
|
clmul_11(uint8x16_t a, uint8x16_t b)
|
||||||
|
{
|
||||||
|
return (uint8x16_t)vmull_high_p64((poly64x2_t)a, (poly64x2_t)b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline ATTRIBUTES uint8x16_t
|
||||||
|
fold_128b(uint8x16_t dst, uint8x16_t src, uint8x16_t multipliers)
|
||||||
|
{
|
||||||
|
return dst ^ clmul_00(src, multipliers) ^ clmul_11(src, multipliers);
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline ATTRIBUTES u32
|
||||||
|
crc32_pmull_aligned(u32 remainder, const uint8x16_t *p, size_t nr_segs)
|
||||||
|
{
|
||||||
|
/* Constants precomputed by gen_crc32_multipliers.c. Do not edit! */
|
||||||
|
const uint8x16_t multipliers_4 =
|
||||||
|
(uint8x16_t)(uint64x2_t){ 0x8F352D95, 0x1D9513D7 };
|
||||||
|
const uint8x16_t multipliers_1 =
|
||||||
|
(uint8x16_t)(uint64x2_t){ 0xAE689191, 0xCCAA009E };
|
||||||
|
const uint8x16_t final_multiplier =
|
||||||
|
(uint8x16_t)(uint64x2_t){ 0xB8BC6765 };
|
||||||
|
const uint8x16_t mask32 = (uint8x16_t)(uint32x4_t){ 0xFFFFFFFF };
|
||||||
|
const uint8x16_t barrett_reduction_constants =
|
||||||
|
(uint8x16_t)(uint64x2_t){ 0x00000001F7011641,
|
||||||
|
0x00000001DB710641 };
|
||||||
|
const uint8x16_t zeroes = (uint8x16_t){ 0 };
|
||||||
|
|
||||||
|
const uint8x16_t * const end = p + nr_segs;
|
||||||
|
const uint8x16_t * const end512 = p + (nr_segs & ~3);
|
||||||
|
uint8x16_t x0, x1, x2, x3;
|
||||||
|
|
||||||
|
x0 = *p++ ^ (uint8x16_t)(uint32x4_t){ remainder };
|
||||||
|
if (nr_segs >= 4) {
|
||||||
|
x1 = *p++;
|
||||||
|
x2 = *p++;
|
||||||
|
x3 = *p++;
|
||||||
|
|
||||||
|
/* Fold 512 bits at a time */
|
||||||
|
while (p != end512) {
|
||||||
|
x0 = fold_128b(*p++, x0, multipliers_4);
|
||||||
|
x1 = fold_128b(*p++, x1, multipliers_4);
|
||||||
|
x2 = fold_128b(*p++, x2, multipliers_4);
|
||||||
|
x3 = fold_128b(*p++, x3, multipliers_4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fold 512 bits => 128 bits */
|
||||||
|
x1 = fold_128b(x1, x0, multipliers_1);
|
||||||
|
x2 = fold_128b(x2, x1, multipliers_1);
|
||||||
|
x0 = fold_128b(x3, x2, multipliers_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fold 128 bits at a time */
|
||||||
|
while (p != end)
|
||||||
|
x0 = fold_128b(*p++, x0, multipliers_1);
|
||||||
|
|
||||||
|
/* Fold 128 => 96 bits, implicitly appending 32 zeroes */
|
||||||
|
x0 = vextq_u8(x0, zeroes, 8) ^ clmul_10(x0, multipliers_1);
|
||||||
|
|
||||||
|
/* Fold 96 => 64 bits */
|
||||||
|
x0 = vextq_u8(x0, zeroes, 4) ^ clmul_00(x0 & mask32, final_multiplier);
|
||||||
|
|
||||||
|
/* Reduce 64 => 32 bits using Barrett reduction */
|
||||||
|
x1 = x0;
|
||||||
|
x0 = clmul_00(x0 & mask32, barrett_reduction_constants);
|
||||||
|
x0 = clmul_10(x0 & mask32, barrett_reduction_constants);
|
||||||
|
return vgetq_lane_u32((uint32x4_t)(x0 ^ x1), 1);
|
||||||
|
}
|
||||||
|
#define IMPL_ALIGNMENT 16
|
||||||
|
#define IMPL_SEGMENT_SIZE 16
|
||||||
|
#include "../crc32_vec_template.h"
|
||||||
|
#endif /* PMULL implementation */
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static inline crc32_func_t
|
||||||
|
arch_select_crc32_func(void)
|
||||||
|
{
|
||||||
|
u32 features = get_cpu_features();
|
||||||
|
|
||||||
|
#ifdef DISPATCH_ARM
|
||||||
|
if (features & ARM_CPU_FEATURE_CRC32)
|
||||||
|
return crc32_arm;
|
||||||
|
#endif
|
||||||
|
#ifdef DISPATCH_PMULL
|
||||||
|
if (features & ARM_CPU_FEATURE_PMULL)
|
||||||
|
return crc32_pmull;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* DISPATCH */
|
||||||
|
|
||||||
|
#endif /* LIB_ARM_CRC32_IMPL_H */
|
||||||
86
lib/gdeflate/libdeflate/lib/arm/matchfinder_impl.h
Normal file
86
lib/gdeflate/libdeflate/lib/arm/matchfinder_impl.h
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
/*
|
||||||
|
* arm/matchfinder_impl.h - ARM implementations of matchfinder functions
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_ARM_MATCHFINDER_IMPL_H
|
||||||
|
#define LIB_ARM_MATCHFINDER_IMPL_H
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON
|
||||||
|
# include <arm_neon.h>
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_init_neon(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
int16x8_t *p = (int16x8_t *)data;
|
||||||
|
int16x8_t v = (int16x8_t) {
|
||||||
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
|
MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
|
do {
|
||||||
|
p[0] = v;
|
||||||
|
p[1] = v;
|
||||||
|
p[2] = v;
|
||||||
|
p[3] = v;
|
||||||
|
p += 4;
|
||||||
|
size -= 4 * sizeof(*p);
|
||||||
|
} while (size != 0);
|
||||||
|
}
|
||||||
|
#define matchfinder_init matchfinder_init_neon
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_rebase_neon(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
int16x8_t *p = (int16x8_t *)data;
|
||||||
|
int16x8_t v = (int16x8_t) {
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
|
do {
|
||||||
|
p[0] = vqaddq_s16(p[0], v);
|
||||||
|
p[1] = vqaddq_s16(p[1], v);
|
||||||
|
p[2] = vqaddq_s16(p[2], v);
|
||||||
|
p[3] = vqaddq_s16(p[3], v);
|
||||||
|
p += 4;
|
||||||
|
size -= 4 * sizeof(*p);
|
||||||
|
} while (size != 0);
|
||||||
|
}
|
||||||
|
#define matchfinder_rebase matchfinder_rebase_neon
|
||||||
|
|
||||||
|
#endif /* __ARM_NEON */
|
||||||
|
|
||||||
|
#endif /* LIB_ARM_MATCHFINDER_IMPL_H */
|
||||||
363
lib/gdeflate/libdeflate/lib/bt_matchfinder.h
Normal file
363
lib/gdeflate/libdeflate/lib/bt_matchfinder.h
Normal file
|
|
@ -0,0 +1,363 @@
|
||||||
|
/*
|
||||||
|
* bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* This is a Binary Trees (bt) based matchfinder.
|
||||||
|
*
|
||||||
|
* The main data structure is a hash table where each hash bucket contains a
|
||||||
|
* binary tree of sequences whose first 4 bytes share the same hash code. Each
|
||||||
|
* sequence is identified by its starting position in the input buffer. Each
|
||||||
|
* binary tree is always sorted such that each left child represents a sequence
|
||||||
|
* lexicographically lesser than its parent and each right child represents a
|
||||||
|
* sequence lexicographically greater than its parent.
|
||||||
|
*
|
||||||
|
* The algorithm processes the input buffer sequentially. At each byte
|
||||||
|
* position, the hash code of the first 4 bytes of the sequence beginning at
|
||||||
|
* that position (the sequence being matched against) is computed. This
|
||||||
|
* identifies the hash bucket to use for that position. Then, a new binary tree
|
||||||
|
* node is created to represent the current sequence. Then, in a single tree
|
||||||
|
* traversal, the hash bucket's binary tree is searched for matches and is
|
||||||
|
* re-rooted at the new node.
|
||||||
|
*
|
||||||
|
* Compared to the simpler algorithm that uses linked lists instead of binary
|
||||||
|
* trees (see hc_matchfinder.h), the binary tree version gains more information
|
||||||
|
* at each node visitation. Ideally, the binary tree version will examine only
|
||||||
|
* 'log(n)' nodes to find the same matches that the linked list version will
|
||||||
|
* find by examining 'n' nodes. In addition, the binary tree version can
|
||||||
|
* examine fewer bytes at each node by taking advantage of the common prefixes
|
||||||
|
* that result from the sort order, whereas the linked list version may have to
|
||||||
|
* examine up to the full length of the match at each node.
|
||||||
|
*
|
||||||
|
* However, it is not always best to use the binary tree version. It requires
|
||||||
|
* nearly twice as much memory as the linked list version, and it takes time to
|
||||||
|
* keep the binary trees sorted, even at positions where the compressor does not
|
||||||
|
* need matches. Generally, when doing fast compression on small buffers,
|
||||||
|
* binary trees are the wrong approach. They are best suited for thorough
|
||||||
|
* compression and/or large buffers.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_BT_MATCHFINDER_H
|
||||||
|
#define LIB_BT_MATCHFINDER_H
|
||||||
|
|
||||||
|
#include "matchfinder_common.h"
|
||||||
|
|
||||||
|
#define BT_MATCHFINDER_HASH3_ORDER 16
|
||||||
|
#define BT_MATCHFINDER_HASH3_WAYS 2
|
||||||
|
#define BT_MATCHFINDER_HASH4_ORDER 16
|
||||||
|
|
||||||
|
#define BT_MATCHFINDER_TOTAL_HASH_SIZE \
|
||||||
|
(((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
|
||||||
|
(1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
|
||||||
|
|
||||||
|
/* Representation of a match found by the bt_matchfinder */
|
||||||
|
struct lz_match {
|
||||||
|
|
||||||
|
/* The number of bytes matched. */
|
||||||
|
u16 length;
|
||||||
|
|
||||||
|
/* The offset back from the current position that was matched. */
|
||||||
|
u16 offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bt_matchfinder {
|
||||||
|
|
||||||
|
/* The hash table for finding length 3 matches */
|
||||||
|
mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
|
||||||
|
|
||||||
|
/* The hash table which contains the roots of the binary trees for
|
||||||
|
* finding length 4+ matches */
|
||||||
|
mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
|
||||||
|
|
||||||
|
/* The child node references for the binary trees. The left and right
|
||||||
|
* children of the node for the sequence with position 'pos' are
|
||||||
|
* 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */
|
||||||
|
mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
|
||||||
|
|
||||||
|
}
|
||||||
|
#ifdef _aligned_attribute
|
||||||
|
_aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
|
/* Prepare the matchfinder for a new input buffer. */
|
||||||
|
static forceinline void
|
||||||
|
bt_matchfinder_init(struct bt_matchfinder *mf)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
|
||||||
|
MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
|
|
||||||
|
matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
|
|
||||||
|
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline mf_pos_t *
|
||||||
|
bt_left_child(struct bt_matchfinder *mf, s32 node)
|
||||||
|
{
|
||||||
|
return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline mf_pos_t *
|
||||||
|
bt_right_child(struct bt_matchfinder *mf, s32 node)
|
||||||
|
{
|
||||||
|
return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
|
||||||
|
* and bt_matchfinder_skip_position(). There must be sufficiently many bytes
|
||||||
|
* remaining to load a 32-bit integer from the *next* position. */
|
||||||
|
#define BT_MATCHFINDER_REQUIRED_NBYTES 5
|
||||||
|
|
||||||
|
/* Advance the binary tree matchfinder by one byte, optionally recording
|
||||||
|
* matches. @record_matches should be a compile-time constant. */
|
||||||
|
static forceinline struct lz_match *
|
||||||
|
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
|
||||||
|
const u8 * const restrict in_base,
|
||||||
|
const ptrdiff_t cur_pos,
|
||||||
|
const u32 max_len,
|
||||||
|
const u32 nice_len,
|
||||||
|
const u32 max_search_depth,
|
||||||
|
u32 * const restrict next_hashes,
|
||||||
|
u32 * const restrict best_len_ret,
|
||||||
|
struct lz_match * restrict lz_matchptr,
|
||||||
|
const bool record_matches)
|
||||||
|
{
|
||||||
|
const u8 *in_next = in_base + cur_pos;
|
||||||
|
u32 depth_remaining = max_search_depth;
|
||||||
|
const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
|
||||||
|
u32 next_hashseq;
|
||||||
|
u32 hash3;
|
||||||
|
u32 hash4;
|
||||||
|
s32 cur_node;
|
||||||
|
#if BT_MATCHFINDER_HASH3_WAYS >= 2
|
||||||
|
s32 cur_node_2;
|
||||||
|
#endif
|
||||||
|
const u8 *matchptr;
|
||||||
|
mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||||
|
u32 best_lt_len, best_gt_len;
|
||||||
|
u32 len;
|
||||||
|
u32 best_len = 3;
|
||||||
|
|
||||||
|
STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
|
||||||
|
BT_MATCHFINDER_HASH3_WAYS <= 2);
|
||||||
|
|
||||||
|
next_hashseq = get_unaligned_le32(in_next + 1);
|
||||||
|
|
||||||
|
hash3 = next_hashes[0];
|
||||||
|
hash4 = next_hashes[1];
|
||||||
|
|
||||||
|
next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
|
||||||
|
next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
|
||||||
|
prefetchw(&mf->hash3_tab[next_hashes[0]]);
|
||||||
|
prefetchw(&mf->hash4_tab[next_hashes[1]]);
|
||||||
|
|
||||||
|
cur_node = mf->hash3_tab[hash3][0];
|
||||||
|
mf->hash3_tab[hash3][0] = cur_pos;
|
||||||
|
#if BT_MATCHFINDER_HASH3_WAYS >= 2
|
||||||
|
cur_node_2 = mf->hash3_tab[hash3][1];
|
||||||
|
mf->hash3_tab[hash3][1] = cur_node;
|
||||||
|
#endif
|
||||||
|
if (record_matches && cur_node > cutoff) {
|
||||||
|
u32 seq3 = load_u24_unaligned(in_next);
|
||||||
|
if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
|
||||||
|
lz_matchptr->length = 3;
|
||||||
|
lz_matchptr->offset = in_next - &in_base[cur_node];
|
||||||
|
lz_matchptr++;
|
||||||
|
}
|
||||||
|
#if BT_MATCHFINDER_HASH3_WAYS >= 2
|
||||||
|
else if (cur_node_2 > cutoff &&
|
||||||
|
seq3 == load_u24_unaligned(&in_base[cur_node_2]))
|
||||||
|
{
|
||||||
|
lz_matchptr->length = 3;
|
||||||
|
lz_matchptr->offset = in_next - &in_base[cur_node_2];
|
||||||
|
lz_matchptr++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_node = mf->hash4_tab[hash4];
|
||||||
|
mf->hash4_tab[hash4] = cur_pos;
|
||||||
|
|
||||||
|
pending_lt_ptr = bt_left_child(mf, cur_pos);
|
||||||
|
pending_gt_ptr = bt_right_child(mf, cur_pos);
|
||||||
|
|
||||||
|
if (cur_node <= cutoff) {
|
||||||
|
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
||||||
|
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
||||||
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
best_lt_len = 0;
|
||||||
|
best_gt_len = 0;
|
||||||
|
len = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
matchptr = &in_base[cur_node];
|
||||||
|
|
||||||
|
if (matchptr[len] == in_next[len]) {
|
||||||
|
len = lz_extend(in_next, matchptr, len + 1, max_len);
|
||||||
|
if (!record_matches || len > best_len) {
|
||||||
|
if (record_matches) {
|
||||||
|
best_len = len;
|
||||||
|
lz_matchptr->length = len;
|
||||||
|
lz_matchptr->offset = in_next - matchptr;
|
||||||
|
lz_matchptr++;
|
||||||
|
}
|
||||||
|
if (len >= nice_len) {
|
||||||
|
*pending_lt_ptr = *bt_left_child(mf, cur_node);
|
||||||
|
*pending_gt_ptr = *bt_right_child(mf, cur_node);
|
||||||
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchptr[len] < in_next[len]) {
|
||||||
|
*pending_lt_ptr = cur_node;
|
||||||
|
pending_lt_ptr = bt_right_child(mf, cur_node);
|
||||||
|
cur_node = *pending_lt_ptr;
|
||||||
|
best_lt_len = len;
|
||||||
|
if (best_gt_len < len)
|
||||||
|
len = best_gt_len;
|
||||||
|
} else {
|
||||||
|
*pending_gt_ptr = cur_node;
|
||||||
|
pending_gt_ptr = bt_left_child(mf, cur_node);
|
||||||
|
cur_node = *pending_gt_ptr;
|
||||||
|
best_gt_len = len;
|
||||||
|
if (best_lt_len < len)
|
||||||
|
len = best_lt_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur_node <= cutoff || !--depth_remaining) {
|
||||||
|
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
||||||
|
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
||||||
|
*best_len_ret = best_len;
|
||||||
|
return lz_matchptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Retrieve a list of matches with the current position.
|
||||||
|
*
|
||||||
|
* @mf
|
||||||
|
* The matchfinder structure.
|
||||||
|
* @in_base
|
||||||
|
* Pointer to the next byte in the input buffer to process _at the last
|
||||||
|
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||||
|
* @cur_pos
|
||||||
|
* The current position in the input buffer relative to @in_base (the
|
||||||
|
* position of the sequence being matched against).
|
||||||
|
* @max_len
|
||||||
|
* The maximum permissible match length at this position. Must be >=
|
||||||
|
* BT_MATCHFINDER_REQUIRED_NBYTES.
|
||||||
|
* @nice_len
|
||||||
|
* Stop searching if a match of at least this length is found.
|
||||||
|
* Must be <= @max_len.
|
||||||
|
* @max_search_depth
|
||||||
|
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||||
|
* @next_hashes
|
||||||
|
* The precomputed hash codes for the sequence beginning at @in_next.
|
||||||
|
* These will be used and then updated with the precomputed hashcodes for
|
||||||
|
* the sequence beginning at @in_next + 1.
|
||||||
|
* @best_len_ret
|
||||||
|
* If a match of length >= 4 was found, then the length of the longest such
|
||||||
|
* match is written here; otherwise 3 is written here. (Note: this is
|
||||||
|
* redundant with the 'struct lz_match' array, but this is easier for the
|
||||||
|
* compiler to optimize when inlined and the caller immediately does a
|
||||||
|
* check against 'best_len'.)
|
||||||
|
* @lz_matchptr
|
||||||
|
* An array in which this function will record the matches. The recorded
|
||||||
|
* matches will be sorted by strictly increasing length and (non-strictly)
|
||||||
|
* increasing offset. The maximum number of matches that may be found is
|
||||||
|
* 'nice_len - 2'.
|
||||||
|
*
|
||||||
|
* The return value is a pointer to the next available slot in the @lz_matchptr
|
||||||
|
* array. (If no matches were found, this will be the same as @lz_matchptr.)
|
||||||
|
*/
|
||||||
|
static forceinline struct lz_match *
|
||||||
|
bt_matchfinder_get_matches(struct bt_matchfinder *mf,
|
||||||
|
const u8 *in_base,
|
||||||
|
ptrdiff_t cur_pos,
|
||||||
|
u32 max_len,
|
||||||
|
u32 nice_len,
|
||||||
|
u32 max_search_depth,
|
||||||
|
u32 next_hashes[2],
|
||||||
|
u32 *best_len_ret,
|
||||||
|
struct lz_match *lz_matchptr)
|
||||||
|
{
|
||||||
|
return bt_matchfinder_advance_one_byte(mf,
|
||||||
|
in_base,
|
||||||
|
cur_pos,
|
||||||
|
max_len,
|
||||||
|
nice_len,
|
||||||
|
max_search_depth,
|
||||||
|
next_hashes,
|
||||||
|
best_len_ret,
|
||||||
|
lz_matchptr,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Advance the matchfinder, but don't record any matches.
|
||||||
|
*
|
||||||
|
* This is very similar to bt_matchfinder_get_matches() because both functions
|
||||||
|
* must do hashing and tree re-rooting.
|
||||||
|
*/
|
||||||
|
static forceinline void
|
||||||
|
bt_matchfinder_skip_position(struct bt_matchfinder *mf,
|
||||||
|
const u8 *in_base,
|
||||||
|
ptrdiff_t cur_pos,
|
||||||
|
u32 nice_len,
|
||||||
|
u32 max_search_depth,
|
||||||
|
u32 next_hashes[2])
|
||||||
|
{
|
||||||
|
u32 best_len;
|
||||||
|
bt_matchfinder_advance_one_byte(mf,
|
||||||
|
in_base,
|
||||||
|
cur_pos,
|
||||||
|
nice_len,
|
||||||
|
nice_len,
|
||||||
|
max_search_depth,
|
||||||
|
next_hashes,
|
||||||
|
&best_len,
|
||||||
|
NULL,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* LIB_BT_MATCHFINDER_H */
|
||||||
88
lib/gdeflate/libdeflate/lib/cpu_features_common.h
Normal file
88
lib/gdeflate/libdeflate/lib/cpu_features_common.h
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* cpu_features_common.h - code shared by all lib/$arch/cpu_features.c
|
||||||
|
*
|
||||||
|
* Copyright 2020 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_CPU_FEATURES_COMMON_H
|
||||||
|
#define LIB_CPU_FEATURES_COMMON_H
|
||||||
|
|
||||||
|
#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
|
||||||
|
# define _GNU_SOURCE 1 /* for strdup() and strtok_r() */
|
||||||
|
# include <stdio.h>
|
||||||
|
# include <stdlib.h>
|
||||||
|
# include <string.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
|
||||||
|
struct cpu_feature {
|
||||||
|
u32 bit;
|
||||||
|
const char *name;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
|
||||||
|
/* Disable any features that are listed in $LIBDEFLATE_DISABLE_CPU_FEATURES. */
|
||||||
|
static inline void
|
||||||
|
disable_cpu_features_for_testing(u32 *features,
|
||||||
|
const struct cpu_feature *feature_table,
|
||||||
|
size_t feature_table_length)
|
||||||
|
{
|
||||||
|
char *env_value, *strbuf, *p, *saveptr = NULL;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES");
|
||||||
|
if (!env_value)
|
||||||
|
return;
|
||||||
|
strbuf = strdup(env_value);
|
||||||
|
if (!strbuf)
|
||||||
|
abort();
|
||||||
|
p = strtok_r(strbuf, ",", &saveptr);
|
||||||
|
while (p) {
|
||||||
|
for (i = 0; i < feature_table_length; i++) {
|
||||||
|
if (strcmp(p, feature_table[i].name) == 0) {
|
||||||
|
*features &= ~feature_table[i].bit;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i == feature_table_length) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n",
|
||||||
|
p);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
p = strtok_r(NULL, ",", &saveptr);
|
||||||
|
}
|
||||||
|
free(strbuf);
|
||||||
|
}
|
||||||
|
#else /* TEST_SUPPORT__DO_NOT_USE */
|
||||||
|
static inline void
|
||||||
|
disable_cpu_features_for_testing(u32 *features,
|
||||||
|
const struct cpu_feature *feature_table,
|
||||||
|
size_t feature_table_length)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif /* !TEST_SUPPORT__DO_NOT_USE */
|
||||||
|
|
||||||
|
#endif /* LIB_CPU_FEATURES_COMMON_H */
|
||||||
313
lib/gdeflate/libdeflate/lib/crc32.c
Normal file
313
lib/gdeflate/libdeflate/lib/crc32.c
Normal file
|
|
@ -0,0 +1,313 @@
|
||||||
|
/*
|
||||||
|
* crc32.c - CRC-32 checksum algorithm for the gzip format
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* High-level description of CRC
|
||||||
|
* =============================
|
||||||
|
*
|
||||||
|
* Consider a bit sequence 'bits[1...len]'. Interpret 'bits' as the "message"
|
||||||
|
* polynomial M(x) with coefficients in GF(2) (the field of integers modulo 2),
|
||||||
|
* where the coefficient of 'x^i' is 'bits[len - i]'. Then, compute:
|
||||||
|
*
|
||||||
|
* R(x) = M(x)*x^n mod G(x)
|
||||||
|
*
|
||||||
|
* where G(x) is a selected "generator" polynomial of degree 'n'. The remainder
|
||||||
|
* R(x) is a polynomial of max degree 'n - 1'. The CRC of 'bits' is R(x)
|
||||||
|
* interpreted as a bitstring of length 'n'.
|
||||||
|
*
|
||||||
|
* CRC used in gzip
|
||||||
|
* ================
|
||||||
|
*
|
||||||
|
* In the gzip format (RFC 1952):
|
||||||
|
*
|
||||||
|
* - The bitstring to checksum is formed from the bytes of the uncompressed
|
||||||
|
* data by concatenating the bits from the bytes in order, proceeding
|
||||||
|
* from the low-order bit to the high-order bit within each byte.
|
||||||
|
*
|
||||||
|
* - The generator polynomial G(x) is: x^32 + x^26 + x^23 + x^22 + x^16 +
|
||||||
|
* x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1.
|
||||||
|
* Consequently, the CRC length is 32 bits ("CRC-32").
|
||||||
|
*
|
||||||
|
* - The highest order 32 coefficients of M(x)*x^n are inverted.
|
||||||
|
*
|
||||||
|
* - All 32 coefficients of R(x) are inverted.
|
||||||
|
*
|
||||||
|
* The two inversions cause added leading and trailing zero bits to affect the
|
||||||
|
* resulting CRC, whereas with a regular CRC such bits would have no effect on
|
||||||
|
* the CRC.
|
||||||
|
*
|
||||||
|
* Computation and optimizations
|
||||||
|
* =============================
|
||||||
|
*
|
||||||
|
* We can compute R(x) through "long division", maintaining only 32 bits of
|
||||||
|
* state at any given time. Multiplication by 'x' can be implemented as
|
||||||
|
* right-shifting by 1 (assuming the polynomial<=>bitstring mapping where the
|
||||||
|
* highest order bit represents the coefficient of x^0), and both addition and
|
||||||
|
* subtraction can be implemented as bitwise exclusive OR (since we are working
|
||||||
|
* in GF(2)). Here is an unoptimized implementation:
|
||||||
|
*
|
||||||
|
* static u32 crc32_gzip(const u8 *buffer, size_t size)
|
||||||
|
* {
|
||||||
|
* u32 remainder = 0;
|
||||||
|
* const u32 divisor = 0xEDB88320;
|
||||||
|
*
|
||||||
|
* for (size_t i = 0; i < size * 8 + 32; i++) {
|
||||||
|
* int bit;
|
||||||
|
* u32 multiple;
|
||||||
|
*
|
||||||
|
* if (i < size * 8)
|
||||||
|
* bit = (buffer[i / 8] >> (i % 8)) & 1;
|
||||||
|
* else
|
||||||
|
* bit = 0; // one of the 32 appended 0 bits
|
||||||
|
*
|
||||||
|
* if (i < 32) // the first 32 bits are inverted
|
||||||
|
* bit ^= 1;
|
||||||
|
*
|
||||||
|
* if (remainder & 1)
|
||||||
|
* multiple = divisor;
|
||||||
|
* else
|
||||||
|
* multiple = 0;
|
||||||
|
*
|
||||||
|
* remainder >>= 1;
|
||||||
|
* remainder |= (u32)bit << 31;
|
||||||
|
* remainder ^= multiple;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* return ~remainder;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* In this implementation, the 32-bit integer 'remainder' maintains the
|
||||||
|
* remainder of the currently processed portion of the message (with 32 zero
|
||||||
|
* bits appended) when divided by the generator polynomial. 'remainder' is the
|
||||||
|
* representation of R(x), and 'divisor' is the representation of G(x) excluding
|
||||||
|
* the x^32 coefficient. For each bit to process, we multiply R(x) by 'x^1',
|
||||||
|
* then add 'x^0' if the new bit is a 1. If this causes R(x) to gain a nonzero
|
||||||
|
* x^32 term, then we subtract G(x) from R(x).
|
||||||
|
*
|
||||||
|
* We can speed this up by taking advantage of the fact that XOR is commutative
|
||||||
|
* and associative, so the order in which we combine the inputs into 'remainder'
|
||||||
|
* is unimportant. And since each message bit we add doesn't affect the choice
|
||||||
|
* of 'multiple' until 32 bits later, we need not actually add each message bit
|
||||||
|
* until that point:
|
||||||
|
*
|
||||||
|
* static u32 crc32_gzip(const u8 *buffer, size_t size)
|
||||||
|
* {
|
||||||
|
* u32 remainder = ~0;
|
||||||
|
* const u32 divisor = 0xEDB88320;
|
||||||
|
*
|
||||||
|
* for (size_t i = 0; i < size * 8; i++) {
|
||||||
|
* int bit;
|
||||||
|
* u32 multiple;
|
||||||
|
*
|
||||||
|
* bit = (buffer[i / 8] >> (i % 8)) & 1;
|
||||||
|
* remainder ^= bit;
|
||||||
|
* if (remainder & 1)
|
||||||
|
* multiple = divisor;
|
||||||
|
* else
|
||||||
|
* multiple = 0;
|
||||||
|
* remainder >>= 1;
|
||||||
|
* remainder ^= multiple;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* return ~remainder;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* With the above implementation we get the effect of 32 appended 0 bits for
|
||||||
|
* free; they never affect the choice of a divisor, nor would they change the
|
||||||
|
* value of 'remainder' if they were to be actually XOR'ed in. And by starting
|
||||||
|
* with a remainder of all 1 bits, we get the effect of complementing the first
|
||||||
|
* 32 message bits.
|
||||||
|
*
|
||||||
|
* The next optimization is to process the input in multi-bit units. Suppose
|
||||||
|
* that we insert the next 'n' message bits into the remainder. Then we get an
|
||||||
|
* intermediate remainder of length '32 + n' bits, and the CRC of the extra 'n'
|
||||||
|
* bits is the amount by which the low 32 bits of the remainder will change as a
|
||||||
|
* result of cancelling out those 'n' bits. Taking n=8 (one byte) and
|
||||||
|
* precomputing a table containing the CRC of each possible byte, we get
|
||||||
|
* crc32_slice1() defined below.
|
||||||
|
*
|
||||||
|
* As a further optimization, we could increase the multi-bit unit size to 16.
|
||||||
|
* However, that is inefficient because the table size explodes from 256 entries
|
||||||
|
* (1024 bytes) to 65536 entries (262144 bytes), which wastes memory and won't
|
||||||
|
* fit in L1 cache on typical processors.
|
||||||
|
*
|
||||||
|
* However, we can actually process 4 bytes at a time using 4 different tables
|
||||||
|
* with 256 entries each. Logically, we form a 64-bit intermediate remainder
|
||||||
|
* and cancel out the high 32 bits in 8-bit chunks. Bits 32-39 are cancelled
|
||||||
|
* out by the CRC of those bits, whereas bits 40-47 are be cancelled out by the
|
||||||
|
* CRC of those bits with 8 zero bits appended, and so on. This method is
|
||||||
|
* implemented in crc32_slice4(), defined below.
|
||||||
|
*
|
||||||
|
* In crc32_slice8(), this method is extended to 8 bytes at a time. The
|
||||||
|
* intermediate remainder (which we never actually store explicitly) is 96 bits.
|
||||||
|
*
|
||||||
|
* On CPUs that support fast carryless multiplication, CRCs can be computed even
|
||||||
|
* more quickly via "folding". See e.g. the x86 PCLMUL implementation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
typedef u32 (*crc32_func_t)(u32, const u8 *, size_t);
|
||||||
|
|
||||||
|
/* Include architecture-specific implementations if available */
|
||||||
|
#undef CRC32_SLICE1
|
||||||
|
#undef CRC32_SLICE4
|
||||||
|
#undef CRC32_SLICE8
|
||||||
|
#undef DEFAULT_IMPL
|
||||||
|
#undef DISPATCH
|
||||||
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
|
# include "arm/crc32_impl.h"
|
||||||
|
#elif defined(__i386__) || defined(__x86_64__)
|
||||||
|
# include "x86/crc32_impl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define a generic implementation (crc32_slice8()) if needed. crc32_slice1()
|
||||||
|
* may also be needed as a fallback for architecture-specific implementations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DEFAULT_IMPL
|
||||||
|
# define CRC32_SLICE8 1
|
||||||
|
# define DEFAULT_IMPL crc32_slice8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(CRC32_SLICE1) || defined(CRC32_SLICE4) || defined(CRC32_SLICE8)
|
||||||
|
#include "crc32_table.h"
|
||||||
|
static forceinline u32
|
||||||
|
crc32_update_byte(u32 remainder, u8 next_byte)
|
||||||
|
{
|
||||||
|
return (remainder >> 8) ^ crc32_table[(u8)remainder ^ next_byte];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CRC32_SLICE1
|
||||||
|
static u32
|
||||||
|
crc32_slice1(u32 remainder, const u8 *buffer, size_t size)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
STATIC_ASSERT(ARRAY_LEN(crc32_table) >= 0x100);
|
||||||
|
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
remainder = crc32_update_byte(remainder, buffer[i]);
|
||||||
|
return remainder;
|
||||||
|
}
|
||||||
|
#endif /* CRC32_SLICE1 */
|
||||||
|
|
||||||
|
#ifdef CRC32_SLICE4
|
||||||
|
static u32
|
||||||
|
crc32_slice4(u32 remainder, const u8 *buffer, size_t size)
|
||||||
|
{
|
||||||
|
const u8 *p = buffer;
|
||||||
|
const u8 *end = buffer + size;
|
||||||
|
const u8 *end32;
|
||||||
|
|
||||||
|
STATIC_ASSERT(ARRAY_LEN(crc32_table) >= 0x400);
|
||||||
|
|
||||||
|
for (; ((uintptr_t)p & 3) && p != end; p++)
|
||||||
|
remainder = crc32_update_byte(remainder, *p);
|
||||||
|
|
||||||
|
end32 = p + ((end - p) & ~3);
|
||||||
|
for (; p != end32; p += 4) {
|
||||||
|
u32 v = le32_bswap(*(const u32 *)p);
|
||||||
|
remainder =
|
||||||
|
crc32_table[0x300 + (u8)((remainder ^ v) >> 0)] ^
|
||||||
|
crc32_table[0x200 + (u8)((remainder ^ v) >> 8)] ^
|
||||||
|
crc32_table[0x100 + (u8)((remainder ^ v) >> 16)] ^
|
||||||
|
crc32_table[0x000 + (u8)((remainder ^ v) >> 24)];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; p != end; p++)
|
||||||
|
remainder = crc32_update_byte(remainder, *p);
|
||||||
|
|
||||||
|
return remainder;
|
||||||
|
}
|
||||||
|
#endif /* CRC32_SLICE4 */
|
||||||
|
|
||||||
|
#ifdef CRC32_SLICE8
|
||||||
|
static u32
|
||||||
|
crc32_slice8(u32 remainder, const u8 *buffer, size_t size)
|
||||||
|
{
|
||||||
|
const u8 *p = buffer;
|
||||||
|
const u8 *end = buffer + size;
|
||||||
|
const u8 *end64;
|
||||||
|
|
||||||
|
STATIC_ASSERT(ARRAY_LEN(crc32_table) >= 0x800);
|
||||||
|
|
||||||
|
for (; ((uintptr_t)p & 7) && p != end; p++)
|
||||||
|
remainder = crc32_update_byte(remainder, *p);
|
||||||
|
|
||||||
|
end64 = p + ((end - p) & ~7);
|
||||||
|
for (; p != end64; p += 8) {
|
||||||
|
u32 v1 = le32_bswap(*(const u32 *)(p + 0));
|
||||||
|
u32 v2 = le32_bswap(*(const u32 *)(p + 4));
|
||||||
|
remainder =
|
||||||
|
crc32_table[0x700 + (u8)((remainder ^ v1) >> 0)] ^
|
||||||
|
crc32_table[0x600 + (u8)((remainder ^ v1) >> 8)] ^
|
||||||
|
crc32_table[0x500 + (u8)((remainder ^ v1) >> 16)] ^
|
||||||
|
crc32_table[0x400 + (u8)((remainder ^ v1) >> 24)] ^
|
||||||
|
crc32_table[0x300 + (u8)(v2 >> 0)] ^
|
||||||
|
crc32_table[0x200 + (u8)(v2 >> 8)] ^
|
||||||
|
crc32_table[0x100 + (u8)(v2 >> 16)] ^
|
||||||
|
crc32_table[0x000 + (u8)(v2 >> 24)];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; p != end; p++)
|
||||||
|
remainder = crc32_update_byte(remainder, *p);
|
||||||
|
|
||||||
|
return remainder;
|
||||||
|
}
|
||||||
|
#endif /* CRC32_SLICE8 */
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static u32 dispatch(u32, const u8 *, size_t);
|
||||||
|
|
||||||
|
static volatile crc32_func_t crc32_impl = dispatch;
|
||||||
|
|
||||||
|
/* Choose the fastest implementation at runtime */
|
||||||
|
static u32 dispatch(u32 remainder, const u8 *buffer, size_t size)
|
||||||
|
{
|
||||||
|
crc32_func_t f = arch_select_crc32_func();
|
||||||
|
|
||||||
|
if (f == NULL)
|
||||||
|
f = DEFAULT_IMPL;
|
||||||
|
|
||||||
|
crc32_impl = f;
|
||||||
|
return crc32_impl(remainder, buffer, size);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
# define crc32_impl DEFAULT_IMPL /* only one implementation, use it */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT u32 LIBDEFLATEAPI
|
||||||
|
libdeflate_crc32(u32 remainder, const void *buffer, size_t size)
|
||||||
|
{
|
||||||
|
if (buffer == NULL) /* return initial value */
|
||||||
|
return 0;
|
||||||
|
return ~crc32_impl(~remainder, buffer, size);
|
||||||
|
}
|
||||||
526
lib/gdeflate/libdeflate/lib/crc32_table.h
Normal file
526
lib/gdeflate/libdeflate/lib/crc32_table.h
Normal file
|
|
@ -0,0 +1,526 @@
|
||||||
|
/*
|
||||||
|
* crc32_table.h - data table to accelerate CRC-32 computation
|
||||||
|
*
|
||||||
|
* THIS FILE WAS AUTOMATICALLY GENERATED BY gen_crc32_table.c. DO NOT EDIT.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static const uint32_t crc32_table[] = {
|
||||||
|
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
|
||||||
|
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
|
||||||
|
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
||||||
|
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
|
||||||
|
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
|
||||||
|
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
||||||
|
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
|
||||||
|
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
|
||||||
|
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
||||||
|
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
|
||||||
|
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
|
||||||
|
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
||||||
|
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
|
||||||
|
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
|
||||||
|
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||||
|
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
|
||||||
|
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
|
||||||
|
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
||||||
|
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
|
||||||
|
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
|
||||||
|
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
||||||
|
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
|
||||||
|
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
|
||||||
|
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
||||||
|
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
|
||||||
|
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
|
||||||
|
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
||||||
|
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
|
||||||
|
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
|
||||||
|
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||||
|
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
|
||||||
|
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
|
||||||
|
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
||||||
|
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
|
||||||
|
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
|
||||||
|
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
||||||
|
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
|
||||||
|
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
|
||||||
|
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
||||||
|
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
|
||||||
|
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
|
||||||
|
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
||||||
|
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
|
||||||
|
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
|
||||||
|
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||||
|
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
|
||||||
|
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
|
||||||
|
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
||||||
|
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
|
||||||
|
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
|
||||||
|
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
||||||
|
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
|
||||||
|
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
|
||||||
|
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
||||||
|
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
|
||||||
|
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
|
||||||
|
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
||||||
|
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
|
||||||
|
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
|
||||||
|
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||||
|
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
|
||||||
|
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
|
||||||
|
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
||||||
|
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
|
||||||
|
#if defined(CRC32_SLICE4) || defined(CRC32_SLICE8)
|
||||||
|
0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3,
|
||||||
|
0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7,
|
||||||
|
0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb,
|
||||||
|
0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf,
|
||||||
|
0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
|
||||||
|
0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496,
|
||||||
|
0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a,
|
||||||
|
0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e,
|
||||||
|
0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761,
|
||||||
|
0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
|
||||||
|
0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69,
|
||||||
|
0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d,
|
||||||
|
0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530,
|
||||||
|
0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034,
|
||||||
|
0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
|
||||||
|
0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c,
|
||||||
|
0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6,
|
||||||
|
0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2,
|
||||||
|
0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce,
|
||||||
|
0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
|
||||||
|
0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97,
|
||||||
|
0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93,
|
||||||
|
0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f,
|
||||||
|
0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b,
|
||||||
|
0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
|
||||||
|
0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60,
|
||||||
|
0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c,
|
||||||
|
0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768,
|
||||||
|
0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35,
|
||||||
|
0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
|
||||||
|
0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d,
|
||||||
|
0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539,
|
||||||
|
0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88,
|
||||||
|
0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c,
|
||||||
|
0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
|
||||||
|
0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484,
|
||||||
|
0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9,
|
||||||
|
0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd,
|
||||||
|
0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1,
|
||||||
|
0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
|
||||||
|
0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a,
|
||||||
|
0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e,
|
||||||
|
0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522,
|
||||||
|
0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026,
|
||||||
|
0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
|
||||||
|
0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f,
|
||||||
|
0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773,
|
||||||
|
0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277,
|
||||||
|
0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d,
|
||||||
|
0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
|
||||||
|
0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85,
|
||||||
|
0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81,
|
||||||
|
0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc,
|
||||||
|
0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8,
|
||||||
|
0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
|
||||||
|
0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0,
|
||||||
|
0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f,
|
||||||
|
0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b,
|
||||||
|
0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27,
|
||||||
|
0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
|
||||||
|
0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e,
|
||||||
|
0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a,
|
||||||
|
0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876,
|
||||||
|
0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72,
|
||||||
|
0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59,
|
||||||
|
0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685,
|
||||||
|
0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1,
|
||||||
|
0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d,
|
||||||
|
0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
|
||||||
|
0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5,
|
||||||
|
0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91,
|
||||||
|
0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d,
|
||||||
|
0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9,
|
||||||
|
0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
|
||||||
|
0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901,
|
||||||
|
0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd,
|
||||||
|
0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9,
|
||||||
|
0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315,
|
||||||
|
0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
|
||||||
|
0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad,
|
||||||
|
0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399,
|
||||||
|
0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45,
|
||||||
|
0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221,
|
||||||
|
0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
|
||||||
|
0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9,
|
||||||
|
0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835,
|
||||||
|
0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151,
|
||||||
|
0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d,
|
||||||
|
0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
|
||||||
|
0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5,
|
||||||
|
0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1,
|
||||||
|
0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d,
|
||||||
|
0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609,
|
||||||
|
0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
|
||||||
|
0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1,
|
||||||
|
0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d,
|
||||||
|
0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9,
|
||||||
|
0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05,
|
||||||
|
0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
|
||||||
|
0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd,
|
||||||
|
0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9,
|
||||||
|
0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75,
|
||||||
|
0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711,
|
||||||
|
0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
|
||||||
|
0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339,
|
||||||
|
0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5,
|
||||||
|
0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281,
|
||||||
|
0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d,
|
||||||
|
0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
|
||||||
|
0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895,
|
||||||
|
0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1,
|
||||||
|
0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d,
|
||||||
|
0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819,
|
||||||
|
0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
|
||||||
|
0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1,
|
||||||
|
0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d,
|
||||||
|
0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69,
|
||||||
|
0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5,
|
||||||
|
0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
|
||||||
|
0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d,
|
||||||
|
0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9,
|
||||||
|
0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625,
|
||||||
|
0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41,
|
||||||
|
0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
|
||||||
|
0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89,
|
||||||
|
0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555,
|
||||||
|
0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31,
|
||||||
|
0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed,
|
||||||
|
0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee,
|
||||||
|
0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9,
|
||||||
|
0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701,
|
||||||
|
0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056,
|
||||||
|
0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
|
||||||
|
0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26,
|
||||||
|
0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e,
|
||||||
|
0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9,
|
||||||
|
0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0,
|
||||||
|
0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
|
||||||
|
0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f,
|
||||||
|
0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68,
|
||||||
|
0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f,
|
||||||
|
0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018,
|
||||||
|
0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
|
||||||
|
0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7,
|
||||||
|
0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3,
|
||||||
|
0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084,
|
||||||
|
0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c,
|
||||||
|
0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
|
||||||
|
0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c,
|
||||||
|
0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b,
|
||||||
|
0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3,
|
||||||
|
0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4,
|
||||||
|
0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
|
||||||
|
0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba,
|
||||||
|
0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002,
|
||||||
|
0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755,
|
||||||
|
0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72,
|
||||||
|
0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
|
||||||
|
0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d,
|
||||||
|
0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca,
|
||||||
|
0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5,
|
||||||
|
0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82,
|
||||||
|
0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
|
||||||
|
0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d,
|
||||||
|
0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a,
|
||||||
|
0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d,
|
||||||
|
0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5,
|
||||||
|
0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
|
||||||
|
0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb,
|
||||||
|
0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc,
|
||||||
|
0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04,
|
||||||
|
0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953,
|
||||||
|
0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
|
||||||
|
0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623,
|
||||||
|
0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b,
|
||||||
|
0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc,
|
||||||
|
0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8,
|
||||||
|
0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
|
||||||
|
0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907,
|
||||||
|
0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50,
|
||||||
|
0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677,
|
||||||
|
0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120,
|
||||||
|
0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
|
||||||
|
0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf,
|
||||||
|
0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6,
|
||||||
|
0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981,
|
||||||
|
0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639,
|
||||||
|
0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
|
||||||
|
0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949,
|
||||||
|
0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e,
|
||||||
|
0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6,
|
||||||
|
0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1,
|
||||||
|
#endif /* CRC32_SLICE4 || CRC32_SLICE8 */
|
||||||
|
#if defined(CRC32_SLICE8)
|
||||||
|
0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0,
|
||||||
|
0xf580a6c0, 0xc8e08f70, 0x8f40f5a0, 0xb220dc10,
|
||||||
|
0x30704bc1, 0x0d106271, 0x4ab018a1, 0x77d03111,
|
||||||
|
0xc5f0ed01, 0xf890c4b1, 0xbf30be61, 0x825097d1,
|
||||||
|
0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52,
|
||||||
|
0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92,
|
||||||
|
0x5090dc43, 0x6df0f5f3, 0x2a508f23, 0x1730a693,
|
||||||
|
0xa5107a83, 0x98705333, 0xdfd029e3, 0xe2b00053,
|
||||||
|
0xc1c12f04, 0xfca106b4, 0xbb017c64, 0x866155d4,
|
||||||
|
0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314,
|
||||||
|
0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15,
|
||||||
|
0x0431c205, 0x3951ebb5, 0x7ef19165, 0x4391b8d5,
|
||||||
|
0xa121b886, 0x9c419136, 0xdbe1ebe6, 0xe681c256,
|
||||||
|
0x54a11e46, 0x69c137f6, 0x2e614d26, 0x13016496,
|
||||||
|
0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997,
|
||||||
|
0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57,
|
||||||
|
0x58f35849, 0x659371f9, 0x22330b29, 0x1f532299,
|
||||||
|
0xad73fe89, 0x9013d739, 0xd7b3ade9, 0xead38459,
|
||||||
|
0x68831388, 0x55e33a38, 0x124340e8, 0x2f236958,
|
||||||
|
0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98,
|
||||||
|
0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b,
|
||||||
|
0xcd93690b, 0xf0f340bb, 0xb7533a6b, 0x8a3313db,
|
||||||
|
0x0863840a, 0x3503adba, 0x72a3d76a, 0x4fc3feda,
|
||||||
|
0xfde322ca, 0xc0830b7a, 0x872371aa, 0xba43581a,
|
||||||
|
0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d,
|
||||||
|
0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d,
|
||||||
|
0xa9423c8c, 0x9422153c, 0xd3826fec, 0xeee2465c,
|
||||||
|
0x5cc29a4c, 0x61a2b3fc, 0x2602c92c, 0x1b62e09c,
|
||||||
|
0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, 0xbe729a1f,
|
||||||
|
0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf,
|
||||||
|
0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de,
|
||||||
|
0x3c220dce, 0x0142247e, 0x46e25eae, 0x7b82771e,
|
||||||
|
0xb1e6b092, 0x8c869922, 0xcb26e3f2, 0xf646ca42,
|
||||||
|
0x44661652, 0x79063fe2, 0x3ea64532, 0x03c66c82,
|
||||||
|
0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183,
|
||||||
|
0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743,
|
||||||
|
0xd1062710, 0xec660ea0, 0xabc67470, 0x96a65dc0,
|
||||||
|
0x248681d0, 0x19e6a860, 0x5e46d2b0, 0x6326fb00,
|
||||||
|
0xe1766cd1, 0xdc164561, 0x9bb63fb1, 0xa6d61601,
|
||||||
|
0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1,
|
||||||
|
0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546,
|
||||||
|
0x85a73956, 0xb8c710e6, 0xff676a36, 0xc2074386,
|
||||||
|
0x4057d457, 0x7d37fde7, 0x3a978737, 0x07f7ae87,
|
||||||
|
0xb5d77297, 0x88b75b27, 0xcf1721f7, 0xf2770847,
|
||||||
|
0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4,
|
||||||
|
0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404,
|
||||||
|
0x20b743d5, 0x1dd76a65, 0x5a7710b5, 0x67173905,
|
||||||
|
0xd537e515, 0xe857cca5, 0xaff7b675, 0x92979fc5,
|
||||||
|
0xe915e8db, 0xd475c16b, 0x93d5bbbb, 0xaeb5920b,
|
||||||
|
0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb,
|
||||||
|
0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca,
|
||||||
|
0x2ce505da, 0x11852c6a, 0x562556ba, 0x6b457f0a,
|
||||||
|
0x89f57f59, 0xb49556e9, 0xf3352c39, 0xce550589,
|
||||||
|
0x7c75d999, 0x4115f029, 0x06b58af9, 0x3bd5a349,
|
||||||
|
0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48,
|
||||||
|
0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888,
|
||||||
|
0x28d4c7df, 0x15b4ee6f, 0x521494bf, 0x6f74bd0f,
|
||||||
|
0xdd54611f, 0xe03448af, 0xa794327f, 0x9af41bcf,
|
||||||
|
0x18a48c1e, 0x25c4a5ae, 0x6264df7e, 0x5f04f6ce,
|
||||||
|
0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e,
|
||||||
|
0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d,
|
||||||
|
0xbdb4f69d, 0x80d4df2d, 0xc774a5fd, 0xfa148c4d,
|
||||||
|
0x78441b9c, 0x4524322c, 0x028448fc, 0x3fe4614c,
|
||||||
|
0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, 0xca64c78c,
|
||||||
|
0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae,
|
||||||
|
0x9b914216, 0x50cd91b3, 0xd659e31d, 0x1d0530b8,
|
||||||
|
0xec53826d, 0x270f51c8, 0xa19b2366, 0x6ac7f0c3,
|
||||||
|
0x77c2c07b, 0xbc9e13de, 0x3a0a6170, 0xf156b2d5,
|
||||||
|
0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035,
|
||||||
|
0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223,
|
||||||
|
0xef8580f6, 0x24d95353, 0xa24d21fd, 0x6911f258,
|
||||||
|
0x7414c2e0, 0xbf481145, 0x39dc63eb, 0xf280b04e,
|
||||||
|
0x07ac0536, 0xccf0d693, 0x4a64a43d, 0x81387798,
|
||||||
|
0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e,
|
||||||
|
0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5,
|
||||||
|
0x706ec54d, 0xbb3216e8, 0x3da66446, 0xf6fab7e3,
|
||||||
|
0x047a07ad, 0xcf26d408, 0x49b2a6a6, 0x82ee7503,
|
||||||
|
0x9feb45bb, 0x54b7961e, 0xd223e4b0, 0x197f3715,
|
||||||
|
0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e,
|
||||||
|
0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578,
|
||||||
|
0x0f580a6c, 0xc404d9c9, 0x4290ab67, 0x89cc78c2,
|
||||||
|
0x94c9487a, 0x5f959bdf, 0xd901e971, 0x125d3ad4,
|
||||||
|
0xe30b8801, 0x28575ba4, 0xaec3290a, 0x659ffaaf,
|
||||||
|
0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9,
|
||||||
|
0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59,
|
||||||
|
0x971f4ae1, 0x5c439944, 0xdad7ebea, 0x118b384f,
|
||||||
|
0xe0dd8a9a, 0x2b81593f, 0xad152b91, 0x6649f834,
|
||||||
|
0x7b4cc88c, 0xb0101b29, 0x36846987, 0xfdd8ba22,
|
||||||
|
0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4,
|
||||||
|
0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2,
|
||||||
|
0xe4a78d37, 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99,
|
||||||
|
0x7f36cf21, 0xb46a1c84, 0x32fe6e2a, 0xf9a2bd8f,
|
||||||
|
0x0b220dc1, 0xc07ede64, 0x46eaacca, 0x8db67f6f,
|
||||||
|
0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79,
|
||||||
|
0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02,
|
||||||
|
0x7ce0cdba, 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14,
|
||||||
|
0x1eb014d8, 0xd5ecc77d, 0x5378b5d3, 0x98246676,
|
||||||
|
0x852156ce, 0x4e7d856b, 0xc8e9f7c5, 0x03b52460,
|
||||||
|
0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b,
|
||||||
|
0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d,
|
||||||
|
0x1d661643, 0xd63ac5e6, 0x50aeb748, 0x9bf264ed,
|
||||||
|
0x86f75455, 0x4dab87f0, 0xcb3ff55e, 0x006326fb,
|
||||||
|
0xf135942e, 0x3a69478b, 0xbcfd3525, 0x77a1e680,
|
||||||
|
0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496,
|
||||||
|
0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340,
|
||||||
|
0x828d53f8, 0x49d1805d, 0xcf45f2f3, 0x04192156,
|
||||||
|
0xf54f9383, 0x3e134026, 0xb8873288, 0x73dbe12d,
|
||||||
|
0x6eded195, 0xa5820230, 0x2316709e, 0xe84aa33b,
|
||||||
|
0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db,
|
||||||
|
0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd,
|
||||||
|
0xf6999118, 0x3dc542bd, 0xbb513013, 0x700de3b6,
|
||||||
|
0x6d08d30e, 0xa65400ab, 0x20c07205, 0xeb9ca1a0,
|
||||||
|
0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, 0x977c6c1a,
|
||||||
|
0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c,
|
||||||
|
0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77,
|
||||||
|
0x662adecf, 0xad760d6a, 0x2be27fc4, 0xe0beac61,
|
||||||
|
0x123e1c2f, 0xd962cf8a, 0x5ff6bd24, 0x94aa6e81,
|
||||||
|
0x89af5e39, 0x42f38d9c, 0xc467ff32, 0x0f3b2c97,
|
||||||
|
0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec,
|
||||||
|
0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa,
|
||||||
|
0x16441b82, 0xdd18c827, 0x5b8cba89, 0x90d0692c,
|
||||||
|
0x8dd55994, 0x46898a31, 0xc01df89f, 0x0b412b3a,
|
||||||
|
0xfa1799ef, 0x314b4a4a, 0xb7df38e4, 0x7c83eb41,
|
||||||
|
0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957,
|
||||||
|
0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7,
|
||||||
|
0x8e035b0f, 0x455f88aa, 0xc3cbfa04, 0x089729a1,
|
||||||
|
0xf9c19b74, 0x329d48d1, 0xb4093a7f, 0x7f55e9da,
|
||||||
|
0x6250d962, 0xa90c0ac7, 0x2f987869, 0xe4c4abcc,
|
||||||
|
0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d,
|
||||||
|
0xf44f2413, 0x52382fa7, 0x63d0353a, 0xc5a73e8e,
|
||||||
|
0x33ef4e67, 0x959845d3, 0xa4705f4e, 0x020754fa,
|
||||||
|
0xc7a06a74, 0x61d761c0, 0x503f7b5d, 0xf64870e9,
|
||||||
|
0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653,
|
||||||
|
0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240,
|
||||||
|
0x5431d2a9, 0xf246d91d, 0xc3aec380, 0x65d9c834,
|
||||||
|
0xa07ef6ba, 0x0609fd0e, 0x37e1e793, 0x9196ec27,
|
||||||
|
0xcfbd399c, 0x69ca3228, 0x582228b5, 0xfe552301,
|
||||||
|
0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712,
|
||||||
|
0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66,
|
||||||
|
0x081d53e8, 0xae6a585c, 0x9f8242c1, 0x39f54975,
|
||||||
|
0xa863a552, 0x0e14aee6, 0x3ffcb47b, 0x998bbfcf,
|
||||||
|
0x5c2c8141, 0xfa5b8af5, 0xcbb39068, 0x6dc49bdc,
|
||||||
|
0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8,
|
||||||
|
0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb,
|
||||||
|
0x440b7579, 0xe27c7ecd, 0xd3946450, 0x75e36fe4,
|
||||||
|
0xb044516a, 0x16335ade, 0x27db4043, 0x81ac4bf7,
|
||||||
|
0x77e43b1e, 0xd19330aa, 0xe07b2a37, 0x460c2183,
|
||||||
|
0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590,
|
||||||
|
0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a,
|
||||||
|
0xd79acda4, 0x71edc610, 0x4005dc8d, 0xe672d739,
|
||||||
|
0x103aa7d0, 0xb64dac64, 0x87a5b6f9, 0x21d2bd4d,
|
||||||
|
0xe47583c3, 0x42028877, 0x73ea92ea, 0xd59d995e,
|
||||||
|
0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678,
|
||||||
|
0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b,
|
||||||
|
0xb8590282, 0x1e2e0936, 0x2fc613ab, 0x89b1181f,
|
||||||
|
0x4c162691, 0xea612d25, 0xdb8937b8, 0x7dfe3c0c,
|
||||||
|
0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, 0xdd80cab6,
|
||||||
|
0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5,
|
||||||
|
0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1,
|
||||||
|
0x2bc8ba5f, 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2,
|
||||||
|
0x8816eaf2, 0x2e61e146, 0x1f89fbdb, 0xb9fef06f,
|
||||||
|
0x7c59cee1, 0xda2ec555, 0xebc6dfc8, 0x4db1d47c,
|
||||||
|
0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08,
|
||||||
|
0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b,
|
||||||
|
0xefc8763c, 0x49bf7d88, 0x78576715, 0xde206ca1,
|
||||||
|
0x1b87522f, 0xbdf0599b, 0x8c184306, 0x2a6f48b2,
|
||||||
|
0xdc27385b, 0x7a5033ef, 0x4bb82972, 0xedcf22c6,
|
||||||
|
0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5,
|
||||||
|
0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3,
|
||||||
|
0xb3e4f77d, 0x1593fcc9, 0x247be654, 0x820cede0,
|
||||||
|
0x74449d09, 0xd23396bd, 0xe3db8c20, 0x45ac8794,
|
||||||
|
0x800bb91a, 0x267cb2ae, 0x1794a833, 0xb1e3a387,
|
||||||
|
0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d,
|
||||||
|
0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e,
|
||||||
|
0x139a01c7, 0xb5ed0a73, 0x840510ee, 0x22721b5a,
|
||||||
|
0xe7d525d4, 0x41a22e60, 0x704a34fd, 0xd63d3f49,
|
||||||
|
0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, 0xfdf58516,
|
||||||
|
0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105,
|
||||||
|
0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71,
|
||||||
|
0x0bbdf5ff, 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62,
|
||||||
|
0xabc30345, 0x0db408f1, 0x3c5c126c, 0x9a2b19d8,
|
||||||
|
0x5f8c2756, 0xf9fb2ce2, 0xc813367f, 0x6e643dcb,
|
||||||
|
0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf,
|
||||||
|
0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac,
|
||||||
|
0x03a0a617, 0xa5d7ada3, 0x943fb73e, 0x3248bc8a,
|
||||||
|
0xf7ef8204, 0x519889b0, 0x6070932d, 0xc6079899,
|
||||||
|
0x304fe870, 0x9638e3c4, 0xa7d0f959, 0x01a7f2ed,
|
||||||
|
0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe,
|
||||||
|
0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044,
|
||||||
|
0x90311eca, 0x3646157e, 0x07ae0fe3, 0xa1d90457,
|
||||||
|
0x579174be, 0xf1e67f0a, 0xc00e6597, 0x66796e23,
|
||||||
|
0xa3de50ad, 0x05a95b19, 0x34414184, 0x92364a30,
|
||||||
|
0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3,
|
||||||
|
0x844a0efa, 0x48e00e64, 0xc66f0987, 0x0ac50919,
|
||||||
|
0xd3e51bb5, 0x1f4f1b2b, 0x91c01cc8, 0x5d6a1c56,
|
||||||
|
0x57af154f, 0x9b0515d1, 0x158a1232, 0xd92012ac,
|
||||||
|
0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8,
|
||||||
|
0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832,
|
||||||
|
0xaf5e2a9e, 0x63f42a00, 0xed7b2de3, 0x21d12d7d,
|
||||||
|
0x2b142464, 0xe7be24fa, 0x69312319, 0xa59b2387,
|
||||||
|
0xf9766256, 0x35dc62c8, 0xbb53652b, 0x77f965b5,
|
||||||
|
0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f,
|
||||||
|
0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00,
|
||||||
|
0xaed97719, 0x62737787, 0xecfc7064, 0x205670fa,
|
||||||
|
0x85cd537d, 0x496753e3, 0xc7e85400, 0x0b42549e,
|
||||||
|
0x01875d87, 0xcd2d5d19, 0x43a25afa, 0x8f085a64,
|
||||||
|
0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b,
|
||||||
|
0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1,
|
||||||
|
0x299dc2ed, 0xe537c273, 0x6bb8c590, 0xa712c50e,
|
||||||
|
0xadd7cc17, 0x617dcc89, 0xeff2cb6a, 0x2358cbf4,
|
||||||
|
0xfa78d958, 0x36d2d9c6, 0xb85dde25, 0x74f7debb,
|
||||||
|
0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041,
|
||||||
|
0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425,
|
||||||
|
0xd16cfd3c, 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf,
|
||||||
|
0x86c3e873, 0x4a69e8ed, 0xc4e6ef0e, 0x084cef90,
|
||||||
|
0x0289e689, 0xce23e617, 0x40ace1f4, 0x8c06e16a,
|
||||||
|
0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758,
|
||||||
|
0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2,
|
||||||
|
0x030ebb0e, 0xcfa4bb90, 0x412bbc73, 0x8d81bced,
|
||||||
|
0x8744b5f4, 0x4beeb56a, 0xc561b289, 0x09cbb217,
|
||||||
|
0xac509190, 0x60fa910e, 0xee7596ed, 0x22df9673,
|
||||||
|
0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889,
|
||||||
|
0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6,
|
||||||
|
0xfbff84df, 0x37558441, 0xb9da83a2, 0x7570833c,
|
||||||
|
0x533b85da, 0x9f918544, 0x111e82a7, 0xddb48239,
|
||||||
|
0xd7718b20, 0x1bdb8bbe, 0x95548c5d, 0x59fe8cc3,
|
||||||
|
0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c,
|
||||||
|
0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776,
|
||||||
|
0x2f80b4f1, 0xe32ab46f, 0x6da5b38c, 0xa10fb312,
|
||||||
|
0xabcaba0b, 0x6760ba95, 0xe9efbd76, 0x2545bde8,
|
||||||
|
0xfc65af44, 0x30cfafda, 0xbe40a839, 0x72eaa8a7,
|
||||||
|
0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d,
|
||||||
|
0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f,
|
||||||
|
0x2e07e976, 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95,
|
||||||
|
0x79a8fc39, 0xb502fca7, 0x3b8dfb44, 0xf727fbda,
|
||||||
|
0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, 0x736df520,
|
||||||
|
0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144,
|
||||||
|
0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe,
|
||||||
|
0x0513cd12, 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1,
|
||||||
|
0x8159c3e8, 0x4df3c376, 0xc37cc495, 0x0fd6c40b,
|
||||||
|
0x7aa64737, 0xb60c47a9, 0x3883404a, 0xf42940d4,
|
||||||
|
0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e,
|
||||||
|
0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61,
|
||||||
|
0x2d095278, 0xe1a352e6, 0x6f2c5505, 0xa386559b,
|
||||||
|
0x061d761c, 0xcab77682, 0x44387161, 0x889271ff,
|
||||||
|
0x825778e6, 0x4efd7878, 0xc0727f9b, 0x0cd87f05,
|
||||||
|
0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a,
|
||||||
|
0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0,
|
||||||
|
0x83d02561, 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282,
|
||||||
|
0x079a2b9b, 0xcb302b05, 0x45bf2ce6, 0x89152c78,
|
||||||
|
0x50353ed4, 0x9c9f3e4a, 0x121039a9, 0xdeba3937,
|
||||||
|
0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd,
|
||||||
|
0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9,
|
||||||
|
0x7b211ab0, 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53,
|
||||||
|
0x2c8e0fff, 0xe0240f61, 0x6eab0882, 0xa201081c,
|
||||||
|
0xa8c40105, 0x646e019b, 0xeae10678, 0x264b06e6,
|
||||||
|
#endif /* CRC32_SLICE8 */
|
||||||
|
};
|
||||||
61
lib/gdeflate/libdeflate/lib/crc32_vec_template.h
Normal file
61
lib/gdeflate/libdeflate/lib/crc32_vec_template.h
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
/*
|
||||||
|
* crc32_vec_template.h - template for vectorized CRC-32 implementations
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define CRC32_SLICE1 1
|
||||||
|
static u32 crc32_slice1(u32, const u8 *, size_t);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Template for vectorized CRC-32 implementations.
|
||||||
|
*
|
||||||
|
* Note: on unaligned ends of the buffer, we fall back to crc32_slice1() instead
|
||||||
|
* of crc32_slice8() because only a few bytes need to be processed, so a smaller
|
||||||
|
* table is preferable.
|
||||||
|
*/
|
||||||
|
static u32 ATTRIBUTES
|
||||||
|
FUNCNAME(u32 remainder, const u8 *p, size_t size)
|
||||||
|
{
|
||||||
|
if ((uintptr_t)p % IMPL_ALIGNMENT) {
|
||||||
|
size_t n = MIN(size, -(uintptr_t)p % IMPL_ALIGNMENT);
|
||||||
|
|
||||||
|
remainder = crc32_slice1(remainder, p, n);
|
||||||
|
p += n;
|
||||||
|
size -= n;
|
||||||
|
}
|
||||||
|
if (size >= IMPL_SEGMENT_SIZE) {
|
||||||
|
remainder = FUNCNAME_ALIGNED(remainder, (const void *)p,
|
||||||
|
size / IMPL_SEGMENT_SIZE);
|
||||||
|
p += size - (size % IMPL_SEGMENT_SIZE);
|
||||||
|
size %= IMPL_SEGMENT_SIZE;
|
||||||
|
}
|
||||||
|
return crc32_slice1(remainder, p, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef FUNCNAME
|
||||||
|
#undef FUNCNAME_ALIGNED
|
||||||
|
#undef ATTRIBUTES
|
||||||
|
#undef IMPL_ALIGNMENT
|
||||||
|
#undef IMPL_SEGMENT_SIZE
|
||||||
421
lib/gdeflate/libdeflate/lib/decompress_template.h
Normal file
421
lib/gdeflate/libdeflate/lib/decompress_template.h
Normal file
|
|
@ -0,0 +1,421 @@
|
||||||
|
/*
|
||||||
|
* decompress_template.h
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the actual DEFLATE decompression routine, lifted out of
|
||||||
|
* deflate_decompress.c so that it can be compiled multiple times with different
|
||||||
|
* target instruction sets.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static enum libdeflate_result ATTRIBUTES
|
||||||
|
FUNCNAME(struct libdeflate_decompressor * restrict d,
|
||||||
|
const void * restrict in, size_t in_nbytes,
|
||||||
|
void * restrict out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
u8 *out_next = out;
|
||||||
|
u8 * const out_end = out_next + out_nbytes_avail;
|
||||||
|
const u8 *in_next = in;
|
||||||
|
const u8 * const in_end = in_next + in_nbytes;
|
||||||
|
bitbuf_t bitbuf = 0;
|
||||||
|
unsigned bitsleft = 0;
|
||||||
|
size_t overrun_count = 0;
|
||||||
|
unsigned i;
|
||||||
|
unsigned is_final_block;
|
||||||
|
unsigned block_type;
|
||||||
|
u16 len;
|
||||||
|
u16 nlen;
|
||||||
|
unsigned num_litlen_syms;
|
||||||
|
unsigned num_offset_syms;
|
||||||
|
u16 tmp16;
|
||||||
|
u32 tmp32;
|
||||||
|
|
||||||
|
next_block:
|
||||||
|
/* Starting to read the next block. */
|
||||||
|
;
|
||||||
|
|
||||||
|
STATIC_ASSERT(CAN_ENSURE(1 + 2 + 5 + 5 + 4));
|
||||||
|
ENSURE_BITS(1 + 2 + 5 + 5 + 4);
|
||||||
|
|
||||||
|
/* BFINAL: 1 bit */
|
||||||
|
is_final_block = POP_BITS(1);
|
||||||
|
|
||||||
|
/* BTYPE: 2 bits */
|
||||||
|
block_type = POP_BITS(2);
|
||||||
|
|
||||||
|
if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
|
||||||
|
|
||||||
|
/* Dynamic Huffman block. */
|
||||||
|
|
||||||
|
/* The order in which precode lengths are stored. */
|
||||||
|
static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
|
||||||
|
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned num_explicit_precode_lens;
|
||||||
|
|
||||||
|
/* Read the codeword length counts. */
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == ((1 << 5) - 1) + 257);
|
||||||
|
num_litlen_syms = POP_BITS(5) + 257;
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == ((1 << 5) - 1) + 1);
|
||||||
|
num_offset_syms = POP_BITS(5) + 1;
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == ((1 << 4) - 1) + 4);
|
||||||
|
num_explicit_precode_lens = POP_BITS(4) + 4;
|
||||||
|
|
||||||
|
d->static_codes_loaded = false;
|
||||||
|
|
||||||
|
/* Read the precode codeword lengths. */
|
||||||
|
STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
|
||||||
|
for (i = 0; i < num_explicit_precode_lens; i++) {
|
||||||
|
ENSURE_BITS(3);
|
||||||
|
d->u.precode_lens[deflate_precode_lens_permutation[i]] = POP_BITS(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
|
||||||
|
d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
|
||||||
|
|
||||||
|
/* Build the decode table for the precode. */
|
||||||
|
SAFETY_CHECK(build_precode_decode_table(d));
|
||||||
|
|
||||||
|
/* Expand the literal/length and offset codeword lengths. */
|
||||||
|
for (i = 0; i < num_litlen_syms + num_offset_syms; ) {
|
||||||
|
u32 entry;
|
||||||
|
unsigned presym;
|
||||||
|
u8 rep_val;
|
||||||
|
unsigned rep_count;
|
||||||
|
|
||||||
|
ENSURE_BITS(DEFLATE_MAX_PRE_CODEWORD_LEN + 7);
|
||||||
|
|
||||||
|
/* (The code below assumes that the precode decode table
|
||||||
|
* does not have any subtables.) */
|
||||||
|
STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
|
||||||
|
|
||||||
|
/* Read the next precode symbol. */
|
||||||
|
entry = d->u.l.precode_decode_table[BITS(DEFLATE_MAX_PRE_CODEWORD_LEN)];
|
||||||
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
||||||
|
presym = entry >> HUFFDEC_RESULT_SHIFT;
|
||||||
|
|
||||||
|
if (presym < 16) {
|
||||||
|
/* Explicit codeword length */
|
||||||
|
d->u.l.lens[i++] = presym;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run-length encoded codeword lengths */
|
||||||
|
|
||||||
|
/* Note: we don't need verify that the repeat count
|
||||||
|
* doesn't overflow the number of elements, since we
|
||||||
|
* have enough extra spaces to allow for the worst-case
|
||||||
|
* overflow (138 zeroes when only 1 length was
|
||||||
|
* remaining).
|
||||||
|
*
|
||||||
|
* In the case of the small repeat counts (presyms 16
|
||||||
|
* and 17), it is fastest to always write the maximum
|
||||||
|
* number of entries. That gets rid of branches that
|
||||||
|
* would otherwise be required.
|
||||||
|
*
|
||||||
|
* It is not just because of the numerical order that
|
||||||
|
* our checks go in the order 'presym < 16', 'presym ==
|
||||||
|
* 16', and 'presym == 17'. For typical data this is
|
||||||
|
* ordered from most frequent to least frequent case.
|
||||||
|
*/
|
||||||
|
STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
|
||||||
|
|
||||||
|
if (presym == 16) {
|
||||||
|
/* Repeat the previous length 3 - 6 times */
|
||||||
|
SAFETY_CHECK(i != 0);
|
||||||
|
rep_val = d->u.l.lens[i - 1];
|
||||||
|
STATIC_ASSERT(3 + ((1 << 2) - 1) == 6);
|
||||||
|
rep_count = 3 + POP_BITS(2);
|
||||||
|
d->u.l.lens[i + 0] = rep_val;
|
||||||
|
d->u.l.lens[i + 1] = rep_val;
|
||||||
|
d->u.l.lens[i + 2] = rep_val;
|
||||||
|
d->u.l.lens[i + 3] = rep_val;
|
||||||
|
d->u.l.lens[i + 4] = rep_val;
|
||||||
|
d->u.l.lens[i + 5] = rep_val;
|
||||||
|
i += rep_count;
|
||||||
|
} else if (presym == 17) {
|
||||||
|
/* Repeat zero 3 - 10 times */
|
||||||
|
STATIC_ASSERT(3 + ((1 << 3) - 1) == 10);
|
||||||
|
rep_count = 3 + POP_BITS(3);
|
||||||
|
d->u.l.lens[i + 0] = 0;
|
||||||
|
d->u.l.lens[i + 1] = 0;
|
||||||
|
d->u.l.lens[i + 2] = 0;
|
||||||
|
d->u.l.lens[i + 3] = 0;
|
||||||
|
d->u.l.lens[i + 4] = 0;
|
||||||
|
d->u.l.lens[i + 5] = 0;
|
||||||
|
d->u.l.lens[i + 6] = 0;
|
||||||
|
d->u.l.lens[i + 7] = 0;
|
||||||
|
d->u.l.lens[i + 8] = 0;
|
||||||
|
d->u.l.lens[i + 9] = 0;
|
||||||
|
i += rep_count;
|
||||||
|
} else {
|
||||||
|
/* Repeat zero 11 - 138 times */
|
||||||
|
STATIC_ASSERT(11 + ((1 << 7) - 1) == 138);
|
||||||
|
rep_count = 11 + POP_BITS(7);
|
||||||
|
memset(&d->u.l.lens[i], 0,
|
||||||
|
rep_count * sizeof(d->u.l.lens[i]));
|
||||||
|
i += rep_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
|
||||||
|
|
||||||
|
/* Uncompressed block: copy 'len' bytes literally from the input
|
||||||
|
* buffer to the output buffer. */
|
||||||
|
|
||||||
|
ALIGN_INPUT();
|
||||||
|
|
||||||
|
SAFETY_CHECK(in_end - in_next >= 4);
|
||||||
|
|
||||||
|
len = READ_U16();
|
||||||
|
nlen = READ_U16();
|
||||||
|
|
||||||
|
SAFETY_CHECK(len == (u16)~nlen);
|
||||||
|
if (unlikely(len > out_end - out_next))
|
||||||
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
||||||
|
SAFETY_CHECK(len <= in_end - in_next);
|
||||||
|
|
||||||
|
memcpy(out_next, in_next, len);
|
||||||
|
in_next += len;
|
||||||
|
out_next += len;
|
||||||
|
|
||||||
|
goto block_done;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Static Huffman block: build the decode tables for the static
|
||||||
|
* codes. Skip doing so if the tables are already set up from
|
||||||
|
* an earlier static block; this speeds up decompression of
|
||||||
|
* degenerate input of many empty or very short static blocks.
|
||||||
|
*
|
||||||
|
* Afterwards, the remainder is the same as decompressing a
|
||||||
|
* dynamic Huffman block.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (d->static_codes_loaded)
|
||||||
|
goto have_decode_tables;
|
||||||
|
|
||||||
|
d->static_codes_loaded = true;
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
|
||||||
|
|
||||||
|
for (i = 0; i < 144; i++)
|
||||||
|
d->u.l.lens[i] = 8;
|
||||||
|
for (; i < 256; i++)
|
||||||
|
d->u.l.lens[i] = 9;
|
||||||
|
for (; i < 280; i++)
|
||||||
|
d->u.l.lens[i] = 7;
|
||||||
|
for (; i < 288; i++)
|
||||||
|
d->u.l.lens[i] = 8;
|
||||||
|
|
||||||
|
for (; i < 288 + 32; i++)
|
||||||
|
d->u.l.lens[i] = 5;
|
||||||
|
|
||||||
|
num_litlen_syms = 288;
|
||||||
|
num_offset_syms = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decompressing a Huffman block (either dynamic or static) */
|
||||||
|
|
||||||
|
SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
|
||||||
|
SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
|
||||||
|
have_decode_tables:
|
||||||
|
|
||||||
|
/* The main DEFLATE decode loop */
|
||||||
|
for (;;) {
|
||||||
|
u32 entry;
|
||||||
|
u32 length;
|
||||||
|
u32 offset;
|
||||||
|
const u8 *src;
|
||||||
|
u8 *dst;
|
||||||
|
|
||||||
|
/* Decode a litlen symbol. */
|
||||||
|
ENSURE_BITS(DEFLATE_MAX_LITLEN_CODEWORD_LEN);
|
||||||
|
entry = d->u.litlen_decode_table[BITS(LITLEN_TABLEBITS)];
|
||||||
|
if (entry & HUFFDEC_SUBTABLE_POINTER) {
|
||||||
|
/* Litlen subtable required (uncommon case) */
|
||||||
|
REMOVE_BITS(LITLEN_TABLEBITS);
|
||||||
|
entry = d->u.litlen_decode_table[
|
||||||
|
((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) +
|
||||||
|
BITS(entry & HUFFDEC_LENGTH_MASK)];
|
||||||
|
}
|
||||||
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
||||||
|
if (entry & HUFFDEC_LITERAL) {
|
||||||
|
/* Literal */
|
||||||
|
if (unlikely(out_next == out_end))
|
||||||
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
||||||
|
*out_next++ = (u8)(entry >> HUFFDEC_RESULT_SHIFT);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match or end-of-block */
|
||||||
|
|
||||||
|
entry >>= HUFFDEC_RESULT_SHIFT;
|
||||||
|
ENSURE_BITS(MAX_ENSURE);
|
||||||
|
|
||||||
|
/* Pop the extra length bits and add them to the length base to
|
||||||
|
* produce the full length. */
|
||||||
|
length = (entry >> HUFFDEC_LENGTH_BASE_SHIFT) +
|
||||||
|
POP_BITS(entry & HUFFDEC_EXTRA_LENGTH_BITS_MASK);
|
||||||
|
|
||||||
|
/* The match destination must not end after the end of the
|
||||||
|
* output buffer. For efficiency, combine this check with the
|
||||||
|
* end-of-block check. We're using 0 for the special
|
||||||
|
* end-of-block length, so subtract 1 and it turn it into
|
||||||
|
* SIZE_MAX. */
|
||||||
|
STATIC_ASSERT(HUFFDEC_END_OF_BLOCK_LENGTH == 0);
|
||||||
|
if (unlikely((size_t)length - 1 >= out_end - out_next)) {
|
||||||
|
if (unlikely(length != HUFFDEC_END_OF_BLOCK_LENGTH))
|
||||||
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
||||||
|
goto block_done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decode the match offset. */
|
||||||
|
|
||||||
|
entry = d->offset_decode_table[BITS(OFFSET_TABLEBITS)];
|
||||||
|
if (entry & HUFFDEC_SUBTABLE_POINTER) {
|
||||||
|
/* Offset subtable required (uncommon case) */
|
||||||
|
REMOVE_BITS(OFFSET_TABLEBITS);
|
||||||
|
entry = d->offset_decode_table[
|
||||||
|
((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) +
|
||||||
|
BITS(entry & HUFFDEC_LENGTH_MASK)];
|
||||||
|
}
|
||||||
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
||||||
|
entry >>= HUFFDEC_RESULT_SHIFT;
|
||||||
|
|
||||||
|
STATIC_ASSERT(CAN_ENSURE(DEFLATE_MAX_EXTRA_LENGTH_BITS +
|
||||||
|
DEFLATE_MAX_OFFSET_CODEWORD_LEN) &&
|
||||||
|
CAN_ENSURE(DEFLATE_MAX_EXTRA_OFFSET_BITS));
|
||||||
|
if (!CAN_ENSURE(DEFLATE_MAX_EXTRA_LENGTH_BITS +
|
||||||
|
DEFLATE_MAX_OFFSET_CODEWORD_LEN +
|
||||||
|
DEFLATE_MAX_EXTRA_OFFSET_BITS))
|
||||||
|
ENSURE_BITS(DEFLATE_MAX_EXTRA_OFFSET_BITS);
|
||||||
|
|
||||||
|
/* Pop the extra offset bits and add them to the offset base to
|
||||||
|
* produce the full offset. */
|
||||||
|
offset = (entry & HUFFDEC_OFFSET_BASE_MASK) +
|
||||||
|
POP_BITS(entry >> HUFFDEC_EXTRA_OFFSET_BITS_SHIFT);
|
||||||
|
|
||||||
|
/* The match source must not begin before the beginning of the
|
||||||
|
* output buffer. */
|
||||||
|
SAFETY_CHECK(offset <= out_next - (const u8 *)out);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy the match: 'length' bytes at 'out_next - offset' to
|
||||||
|
* 'out_next', possibly overlapping. If the match doesn't end
|
||||||
|
* too close to the end of the buffer and offset >= WORDBYTES ||
|
||||||
|
* offset == 1, take a fast path which copies a word at a time
|
||||||
|
* -- potentially more than the length of the match, but that's
|
||||||
|
* fine as long as we check for enough extra space.
|
||||||
|
*
|
||||||
|
* The remaining cases are not performance-critical so are
|
||||||
|
* handled by a simple byte-by-byte copy.
|
||||||
|
*/
|
||||||
|
|
||||||
|
src = out_next - offset;
|
||||||
|
dst = out_next;
|
||||||
|
out_next += length;
|
||||||
|
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST &&
|
||||||
|
/* max overrun is writing 3 words for a min length match */
|
||||||
|
likely(out_end - out_next >=
|
||||||
|
3 * WORDBYTES - DEFLATE_MIN_MATCH_LEN)) {
|
||||||
|
if (offset >= WORDBYTES) { /* words don't overlap? */
|
||||||
|
copy_word_unaligned(src, dst);
|
||||||
|
src += WORDBYTES;
|
||||||
|
dst += WORDBYTES;
|
||||||
|
copy_word_unaligned(src, dst);
|
||||||
|
src += WORDBYTES;
|
||||||
|
dst += WORDBYTES;
|
||||||
|
do {
|
||||||
|
copy_word_unaligned(src, dst);
|
||||||
|
src += WORDBYTES;
|
||||||
|
dst += WORDBYTES;
|
||||||
|
} while (dst < out_next);
|
||||||
|
} else if (offset == 1) {
|
||||||
|
/* RLE encoding of previous byte, common if the
|
||||||
|
* data contains many repeated bytes */
|
||||||
|
machine_word_t v = repeat_byte(*src);
|
||||||
|
|
||||||
|
store_word_unaligned(v, dst);
|
||||||
|
dst += WORDBYTES;
|
||||||
|
store_word_unaligned(v, dst);
|
||||||
|
dst += WORDBYTES;
|
||||||
|
do {
|
||||||
|
store_word_unaligned(v, dst);
|
||||||
|
dst += WORDBYTES;
|
||||||
|
} while (dst < out_next);
|
||||||
|
} else {
|
||||||
|
*dst++ = *src++;
|
||||||
|
*dst++ = *src++;
|
||||||
|
do {
|
||||||
|
*dst++ = *src++;
|
||||||
|
} while (dst < out_next);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
|
||||||
|
*dst++ = *src++;
|
||||||
|
*dst++ = *src++;
|
||||||
|
do {
|
||||||
|
*dst++ = *src++;
|
||||||
|
} while (dst < out_next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
block_done:
|
||||||
|
/* Finished decoding a block. */
|
||||||
|
|
||||||
|
if (!is_final_block)
|
||||||
|
goto next_block;
|
||||||
|
|
||||||
|
/* That was the last block. */
|
||||||
|
|
||||||
|
/* Discard any readahead bits and check for excessive overread */
|
||||||
|
ALIGN_INPUT();
|
||||||
|
|
||||||
|
/* Optionally return the actual number of bytes read */
|
||||||
|
if (actual_in_nbytes_ret)
|
||||||
|
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
||||||
|
|
||||||
|
/* Optionally return the actual number of bytes written */
|
||||||
|
if (actual_out_nbytes_ret) {
|
||||||
|
*actual_out_nbytes_ret = out_next - (u8 *)out;
|
||||||
|
} else {
|
||||||
|
if (out_next != out_end)
|
||||||
|
return LIBDEFLATE_SHORT_OUTPUT;
|
||||||
|
}
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef FUNCNAME
|
||||||
|
#undef ATTRIBUTES
|
||||||
3360
lib/gdeflate/libdeflate/lib/deflate_compress.c
Normal file
3360
lib/gdeflate/libdeflate/lib/deflate_compress.c
Normal file
File diff suppressed because it is too large
Load diff
13
lib/gdeflate/libdeflate/lib/deflate_compress.h
Normal file
13
lib/gdeflate/libdeflate/lib/deflate_compress.h
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
#ifndef LIB_DEFLATE_COMPRESS_H
|
||||||
|
#define LIB_DEFLATE_COMPRESS_H
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
|
||||||
|
/* DEFLATE compression is private to deflate_compress.c, but we do need to be
|
||||||
|
* able to query the compression level for zlib and gzip header generation. */
|
||||||
|
|
||||||
|
struct libdeflate_compressor;
|
||||||
|
|
||||||
|
unsigned int deflate_get_compression_level(struct libdeflate_compressor *c);
|
||||||
|
|
||||||
|
#endif /* LIB_DEFLATE_COMPRESS_H */
|
||||||
102
lib/gdeflate/libdeflate/lib/deflate_constants.h
Normal file
102
lib/gdeflate/libdeflate/lib/deflate_constants.h
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
/*
|
||||||
|
* deflate_constants.h - constants for the DEFLATE compression format
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_DEFLATE_CONSTANTS_H
|
||||||
|
#define LIB_DEFLATE_CONSTANTS_H
|
||||||
|
|
||||||
|
#ifdef WITH_GDEFLATE
|
||||||
|
|
||||||
|
/* Enable GDeflate mode. */
|
||||||
|
#define GDEFLATE
|
||||||
|
|
||||||
|
/* GDeflate is deflate64-based. */
|
||||||
|
#define DEFLATE64
|
||||||
|
|
||||||
|
/* Number of GDeflate streams. */
|
||||||
|
#define NUM_STREAMS 32
|
||||||
|
|
||||||
|
/* The number of bits to keep in input buffer. */
|
||||||
|
#define LOW_WATERMARK_BITS 32
|
||||||
|
|
||||||
|
/* Number of bits per GDeflate bit-packet. */
|
||||||
|
#define BITS_PER_PACKET 32
|
||||||
|
|
||||||
|
/* GDeflate page size. */
|
||||||
|
#define GDEFLATE_PAGE_SIZE 65536
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Valid block types */
|
||||||
|
#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
|
||||||
|
#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
|
||||||
|
#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
|
||||||
|
|
||||||
|
/* Minimum and maximum supported match lengths (in bytes) */
|
||||||
|
#define DEFLATE_MIN_MATCH_LEN 3
|
||||||
|
#ifndef DEFLATE64
|
||||||
|
#define DEFLATE_MAX_MATCH_LEN 258
|
||||||
|
#else
|
||||||
|
/* The maximum length of a deflate64 match is 65538, however the bt_matcher
|
||||||
|
* uses 16-bits to store match lengths so to have the bt_matcher to
|
||||||
|
* work without overflows the maximum length either needs to be reduced
|
||||||
|
* to fit into 16-bits, or length storage type changed to a wider type. */
|
||||||
|
#define DEFLATE_MAX_MATCH_LEN 65535
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Minimum and maximum supported match offsets (in bytes) */
|
||||||
|
#define DEFLATE_MIN_MATCH_OFFSET 1
|
||||||
|
#ifdef DEFLATE64
|
||||||
|
#define DEFLATE_MAX_MATCH_OFFSET (32768*2)
|
||||||
|
#define DEFLATE_MAX_WINDOW_SIZE (32768*2)
|
||||||
|
#else
|
||||||
|
#define DEFLATE_MAX_MATCH_OFFSET 32768
|
||||||
|
#define DEFLATE_MAX_WINDOW_SIZE 32768
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Number of symbols in each Huffman code. Note: for the literal/length
|
||||||
|
* and offset codes, these are actually the maximum values; a given block
|
||||||
|
* might use fewer symbols. */
|
||||||
|
#define DEFLATE_NUM_PRECODE_SYMS 19
|
||||||
|
#define DEFLATE_NUM_LITLEN_SYMS 288
|
||||||
|
#define DEFLATE_NUM_OFFSET_SYMS 32
|
||||||
|
|
||||||
|
/* The maximum number of symbols across all codes */
|
||||||
|
#define DEFLATE_MAX_NUM_SYMS 288
|
||||||
|
|
||||||
|
/* Division of symbols in the literal/length code */
|
||||||
|
#define DEFLATE_NUM_LITERALS 256
|
||||||
|
#define DEFLATE_END_OF_BLOCK 256
|
||||||
|
#define DEFLATE_NUM_LEN_SYMS 31
|
||||||
|
|
||||||
|
/* Maximum codeword length, in bits, within each Huffman code */
|
||||||
|
#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
|
||||||
|
#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
|
||||||
|
#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
|
||||||
|
|
||||||
|
/* The maximum codeword length across all codes */
|
||||||
|
#define DEFLATE_MAX_CODEWORD_LEN 15
|
||||||
|
|
||||||
|
/* Maximum possible overrun when decoding codeword lengths */
|
||||||
|
#define DEFLATE_MAX_LENS_OVERRUN 137
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum number of extra bits that may be required to represent a match
|
||||||
|
* length or offset.
|
||||||
|
*/
|
||||||
|
#ifdef DEFLATE64
|
||||||
|
#define DEFLATE_MAX_EXTRA_LENGTH_BITS 16
|
||||||
|
#define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
|
||||||
|
#else
|
||||||
|
#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
|
||||||
|
#define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The maximum number of bits in which a match can be represented. This
|
||||||
|
* is the absolute worst case, which assumes the longest possible Huffman
|
||||||
|
* codewords and the maximum numbers of extra bits. */
|
||||||
|
#define DEFLATE_MAX_MATCH_BITS \
|
||||||
|
(DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \
|
||||||
|
DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)
|
||||||
|
|
||||||
|
#endif /* LIB_DEFLATE_CONSTANTS_H */
|
||||||
1021
lib/gdeflate/libdeflate/lib/deflate_decompress.c
Normal file
1021
lib/gdeflate/libdeflate/lib/deflate_decompress.c
Normal file
File diff suppressed because it is too large
Load diff
246
lib/gdeflate/libdeflate/lib/gdeflate_compress.c
Normal file
246
lib/gdeflate/libdeflate/lib/gdeflate_compress.c
Normal file
|
|
@ -0,0 +1,246 @@
|
||||||
|
/*
|
||||||
|
* gdeflate_compress.c - a compressor for GDEFLATE
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define WITH_GDEFLATE
|
||||||
|
#define HIDE_INTERFACE
|
||||||
|
|
||||||
|
#define libdeflate_compressor libdeflate_gdeflate_compressor
|
||||||
|
|
||||||
|
#include "deflate_compress.c"
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI
|
||||||
|
libdeflate_alloc_gdeflate_compressor(int compression_level)
|
||||||
|
{
|
||||||
|
struct libdeflate_compressor *c;
|
||||||
|
size_t size = offsetof(struct libdeflate_compressor, p);
|
||||||
|
|
||||||
|
if (compression_level < 0 || compression_level > 12)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
#if SUPPORT_NEAR_OPTIMAL_PARSING
|
||||||
|
if (compression_level >= 8)
|
||||||
|
size += sizeof(c->p.n);
|
||||||
|
else if (compression_level >= 1)
|
||||||
|
size += sizeof(c->p.g);
|
||||||
|
#else
|
||||||
|
if (compression_level >= 1)
|
||||||
|
size += sizeof(c->p.g);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
c = libdeflate_aligned_malloc(MATCHFINDER_MEM_ALIGNMENT, size);
|
||||||
|
if (!c)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
c->compression_level = compression_level;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The higher the compression level, the more we should bother trying to
|
||||||
|
* compress very small inputs.
|
||||||
|
*/
|
||||||
|
c->min_size_to_compress = 56 - (compression_level * 4);
|
||||||
|
|
||||||
|
switch (compression_level) {
|
||||||
|
case 0:
|
||||||
|
c->impl = deflate_compress_none;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
c->impl = deflate_compress_greedy;
|
||||||
|
c->max_search_depth = 2;
|
||||||
|
c->nice_match_length = 8;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
c->impl = deflate_compress_greedy;
|
||||||
|
c->max_search_depth = 6;
|
||||||
|
c->nice_match_length = 10;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
c->impl = deflate_compress_greedy;
|
||||||
|
c->max_search_depth = 12;
|
||||||
|
c->nice_match_length = 14;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
c->impl = deflate_compress_greedy;
|
||||||
|
c->max_search_depth = 24;
|
||||||
|
c->nice_match_length = 24;
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
c->impl = deflate_compress_lazy;
|
||||||
|
c->max_search_depth = 20;
|
||||||
|
c->nice_match_length = 30;
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
c->impl = deflate_compress_lazy;
|
||||||
|
c->max_search_depth = 40;
|
||||||
|
c->nice_match_length = 65;
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
c->impl = deflate_compress_lazy;
|
||||||
|
c->max_search_depth = 100;
|
||||||
|
c->nice_match_length = 130;
|
||||||
|
break;
|
||||||
|
#if SUPPORT_NEAR_OPTIMAL_PARSING
|
||||||
|
case 8:
|
||||||
|
c->impl = deflate_compress_near_optimal;
|
||||||
|
c->max_search_depth = 12;
|
||||||
|
c->nice_match_length = 20;
|
||||||
|
c->p.n.num_optim_passes = 1;
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
c->impl = deflate_compress_near_optimal;
|
||||||
|
c->max_search_depth = 16;
|
||||||
|
c->nice_match_length = 26;
|
||||||
|
c->p.n.num_optim_passes = 2;
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
c->impl = deflate_compress_near_optimal;
|
||||||
|
c->max_search_depth = 30;
|
||||||
|
c->nice_match_length = 50;
|
||||||
|
c->p.n.num_optim_passes = 2;
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
c->impl = deflate_compress_near_optimal;
|
||||||
|
c->max_search_depth = 60;
|
||||||
|
c->nice_match_length = 80;
|
||||||
|
c->p.n.num_optim_passes = 3;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
c->impl = deflate_compress_near_optimal;
|
||||||
|
c->max_search_depth = 100;
|
||||||
|
c->nice_match_length = 133;
|
||||||
|
c->p.n.num_optim_passes = 4;
|
||||||
|
break;
|
||||||
|
#else
|
||||||
|
case 8:
|
||||||
|
c->impl = deflate_compress_lazy;
|
||||||
|
c->max_search_depth = 150;
|
||||||
|
c->nice_match_length = 200;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
c->impl = deflate_compress_lazy;
|
||||||
|
c->max_search_depth = 200;
|
||||||
|
c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
deflate_init_offset_slot_fast(c);
|
||||||
|
deflate_init_static_codes(c);
|
||||||
|
deflate_init_length_slot();
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_compress(struct libdeflate_compressor *c,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
struct libdeflate_gdeflate_out_page* out_pages,
|
||||||
|
size_t out_npages)
|
||||||
|
{
|
||||||
|
const u8 * in_bytes = in;
|
||||||
|
size_t out_nbytes = 0;
|
||||||
|
size_t npages;
|
||||||
|
size_t upper_bound = libdeflate_gdeflate_compress_bound(c,
|
||||||
|
in_nbytes, &npages);
|
||||||
|
size_t page_upper_bound = upper_bound / npages;
|
||||||
|
|
||||||
|
if (unlikely(out_pages == NULL || out_npages != npages))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (size_t page = 0; page < npages; page++) {
|
||||||
|
size_t comp_page_nbytes;
|
||||||
|
const size_t page_nbytes = in_nbytes > GDEFLATE_PAGE_SIZE ?
|
||||||
|
GDEFLATE_PAGE_SIZE : in_nbytes;
|
||||||
|
|
||||||
|
if (unlikely(out_pages[page].nbytes < page_upper_bound)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
comp_page_nbytes = (*c->impl)(c, in_bytes, page_nbytes,
|
||||||
|
out_pages[page].data, page_upper_bound);
|
||||||
|
|
||||||
|
out_pages[page].nbytes = comp_page_nbytes;
|
||||||
|
|
||||||
|
/* Page did not fit - bail out. */
|
||||||
|
if (unlikely(comp_page_nbytes == 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
in_bytes += page_nbytes;
|
||||||
|
in_nbytes -= page_nbytes;
|
||||||
|
|
||||||
|
out_nbytes += comp_page_nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
return out_nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_compress_ex(struct libdeflate_compressor *c,
|
||||||
|
const struct libdeflate_gdeflate_in_page* in_pages,
|
||||||
|
struct libdeflate_gdeflate_out_page* out_pages, size_t npages)
|
||||||
|
{
|
||||||
|
if (unlikely(out_pages == NULL || in_pages == NULL))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (size_t page = 0; page < npages; page++) {
|
||||||
|
size_t comp_page_nbytes;
|
||||||
|
|
||||||
|
comp_page_nbytes = (*c->impl)(c, in_pages[page].data, in_pages[page].nbytes,
|
||||||
|
out_pages[page].data, out_pages[page].nbytes);
|
||||||
|
|
||||||
|
out_pages[page].nbytes = comp_page_nbytes;
|
||||||
|
|
||||||
|
/* Page did not fit - bail out. */
|
||||||
|
if (unlikely(comp_page_nbytes == 0))
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
return npages;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_free_gdeflate_compressor(struct libdeflate_compressor *c)
|
||||||
|
{
|
||||||
|
libdeflate_aligned_free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_compress_bound(struct libdeflate_compressor *c,
|
||||||
|
size_t in_nbytes, size_t *out_npages)
|
||||||
|
{
|
||||||
|
const size_t page_bound = libdeflate_deflate_compress_bound(c, GDEFLATE_PAGE_SIZE);
|
||||||
|
const size_t npages = DIV_ROUND_UP(in_nbytes, GDEFLATE_PAGE_SIZE);
|
||||||
|
|
||||||
|
if (out_npages)
|
||||||
|
*out_npages = npages;
|
||||||
|
|
||||||
|
return (page_bound + (NUM_STREAMS * BITS_PER_PACKET) / 8) * npages;
|
||||||
|
}
|
||||||
134
lib/gdeflate/libdeflate/lib/gdeflate_decompress.c
Normal file
134
lib/gdeflate/libdeflate/lib/gdeflate_decompress.c
Normal file
|
|
@ -0,0 +1,134 @@
|
||||||
|
/*
|
||||||
|
* gdeflate_decompress.c - a decompressor for GDEFLATE
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define WITH_GDEFLATE
|
||||||
|
#define HIDE_INTERFACE
|
||||||
|
|
||||||
|
#define libdeflate_decompressor libdeflate_gdeflate_decompressor
|
||||||
|
|
||||||
|
#include "deflate_decompress.c"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the main GDEFLATE decompression routine. See libdeflate.h for the
|
||||||
|
* documentation.
|
||||||
|
*
|
||||||
|
* Note that the real code is in gdeflate_decompress_template.h. The part here
|
||||||
|
* just handles calling the appropriate implementation depending on the CPU
|
||||||
|
* features at runtime.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_decompress(struct libdeflate_decompressor * restrict d,
|
||||||
|
struct libdeflate_gdeflate_in_page *in_pages,
|
||||||
|
size_t in_npages, void * restrict out,
|
||||||
|
size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
u8 * restrict out_bytes = out;
|
||||||
|
|
||||||
|
if (unlikely(in_pages == NULL || in_npages == 0))
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
for (size_t npage = 0; npage < in_npages; npage++) {
|
||||||
|
size_t page_out_nbytes_ret, page_in_nbytes_ret;
|
||||||
|
enum libdeflate_result res;
|
||||||
|
|
||||||
|
res = decompress_impl(d, in_pages[npage].data,
|
||||||
|
in_pages[npage].nbytes, out_bytes,
|
||||||
|
out_nbytes_avail, &page_in_nbytes_ret,
|
||||||
|
&page_out_nbytes_ret);
|
||||||
|
|
||||||
|
if (unlikely(res != LIBDEFLATE_SUCCESS))
|
||||||
|
return res;
|
||||||
|
|
||||||
|
out_bytes += page_out_nbytes_ret;
|
||||||
|
out_nbytes_avail -= page_out_nbytes_ret;
|
||||||
|
|
||||||
|
if (actual_out_nbytes_ret)
|
||||||
|
*actual_out_nbytes_ret += page_out_nbytes_ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_decompress_ex(struct libdeflate_gdeflate_decompressor * restrict d,
|
||||||
|
struct libdeflate_gdeflate_in_page * restrict in_pages,
|
||||||
|
struct libdeflate_gdeflate_out_page * restrict out_pages, size_t npages)
|
||||||
|
{
|
||||||
|
if (unlikely(in_pages == NULL || out_pages == NULL || npages == 0))
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
for (size_t npage = 0; npage < npages; npage++) {
|
||||||
|
size_t page_out_nbytes_ret, page_in_nbytes_ret;
|
||||||
|
enum libdeflate_result res;
|
||||||
|
|
||||||
|
res = decompress_impl(d, in_pages[npage].data,
|
||||||
|
in_pages[npage].nbytes, out_pages[npage].data,
|
||||||
|
out_pages[npage].nbytes, &page_in_nbytes_ret,
|
||||||
|
&page_out_nbytes_ret);
|
||||||
|
|
||||||
|
if (unlikely(res != LIBDEFLATE_SUCCESS))
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT struct libdeflate_decompressor * LIBDEFLATEAPI
|
||||||
|
libdeflate_alloc_gdeflate_decompressor(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Note that only certain parts of the decompressor actually must be
|
||||||
|
* initialized here:
|
||||||
|
*
|
||||||
|
* - 'static_codes_loaded' must be initialized to false.
|
||||||
|
*
|
||||||
|
* - The first half of the main portion of each decode table must be
|
||||||
|
* initialized to any value, to avoid reading from uninitialized
|
||||||
|
* memory during table expansion in build_decode_table(). (Although,
|
||||||
|
* this is really just to avoid warnings with dynamic tools like
|
||||||
|
* valgrind, since build_decode_table() is guaranteed to initialize
|
||||||
|
* all entries eventually anyway.)
|
||||||
|
*
|
||||||
|
* But for simplicity, we currently just zero the whole decompressor.
|
||||||
|
*/
|
||||||
|
struct libdeflate_decompressor *d = libdeflate_malloc(sizeof(*d));
|
||||||
|
|
||||||
|
if (d == NULL)
|
||||||
|
return NULL;
|
||||||
|
memset(d, 0, sizeof(*d));
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_free_gdeflate_decompressor(struct libdeflate_decompressor *d)
|
||||||
|
{
|
||||||
|
libdeflate_free(d);
|
||||||
|
}
|
||||||
611
lib/gdeflate/libdeflate/lib/gdeflate_decompress_template.h
Normal file
611
lib/gdeflate/libdeflate/lib/gdeflate_decompress_template.h
Normal file
|
|
@ -0,0 +1,611 @@
|
||||||
|
/*
|
||||||
|
* gdeflate_decompress_template.h
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* SPDX-FileCopyrightText: Copyright (c) 2020, 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the actual GDEFLATE decompression routine, lifted out of
|
||||||
|
* gdeflate_decompress.c so that it can be compiled multiple times with
|
||||||
|
* different target instruction sets.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Does the bitbuffer variable currently contain at least 'n' bits?
|
||||||
|
*/
|
||||||
|
#undef HAVE_BITS
|
||||||
|
#define HAVE_BITS(n) (s->bitsleft[s->idx] >= (n))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Load more bits from the input buffer until the specified number of bits is
|
||||||
|
* present in the bitbuffer variable. 'n' cannot be too large; see MAX_ENSURE
|
||||||
|
* and CAN_ENSURE().
|
||||||
|
*/
|
||||||
|
#undef ENSURE_BITS
|
||||||
|
#define ENSURE_BITS(n) \
|
||||||
|
if (!HAVE_BITS(n)) { \
|
||||||
|
s->bitbuf[s->idx] |= (bitbuf_t)(get_unaligned_le32(in_next)) << \
|
||||||
|
s->bitsleft[s->idx]; \
|
||||||
|
in_next += BITS_PER_PACKET/8; \
|
||||||
|
s->bitsleft[s->idx] += BITS_PER_PACKET; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the next 'n' bits from the bitbuffer variable without removing them.
|
||||||
|
*/
|
||||||
|
#undef BITS
|
||||||
|
#define BITS(n) ((u32)s->bitbuf[s->idx] & (((u32)1 << (n)) - 1))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove the next 'n' bits from the bitbuffer variable.
|
||||||
|
*/
|
||||||
|
#undef REMOVE_BITS
|
||||||
|
#define REMOVE_BITS(n) (s->bitbuf[s->idx] >>= (n), s->bitsleft[s->idx] -= (n))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup copy advance method depending on a number of streams used.
|
||||||
|
*/
|
||||||
|
#if (NUM_STREAMS == 32)
|
||||||
|
#define ADVANCE_COPIES() is_copy = _rotr(is_copy, 1)
|
||||||
|
#else
|
||||||
|
# pragma message("Invalid number of GDeflate streams used!")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset GDeflate stream index.
|
||||||
|
*/
|
||||||
|
#define RESET() s->idx = 0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Advance GDeflate stream index. Refill bits if necessary.
|
||||||
|
*/
|
||||||
|
#define ADVANCE() do { \
|
||||||
|
ENSURE_BITS(LOW_WATERMARK_BITS); \
|
||||||
|
s->idx = (s->idx + 1)%NUM_STREAMS; \
|
||||||
|
ADVANCE_COPIES(); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tells if current GDeflate stream has a deferred copy.
|
||||||
|
*/
|
||||||
|
#define IS_COPY() (is_copy&1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stores a deferred copy in current GDeflate stream.
|
||||||
|
*/
|
||||||
|
#define STORE_COPY(len, out) do { \
|
||||||
|
s->copies[s->idx].length = len; \
|
||||||
|
s->copies[s->idx].out_next = out; \
|
||||||
|
is_copy |= 1; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Marks a copy in current current GDeflate stream as complete.
|
||||||
|
*/
|
||||||
|
#define COPY_COMPLETE() (is_copy &= ~1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevent multiple type declarations.
|
||||||
|
*/
|
||||||
|
#ifndef GDEFLATE_TYPES_DECLARED
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup is_copy type depending on a number of streams used.
|
||||||
|
*/
|
||||||
|
#if (NUM_STREAMS == 32)
|
||||||
|
typedef u32 is_copy_t;
|
||||||
|
#else
|
||||||
|
# pragma message("Invalid number of GDeflate streams used!")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GDeflate deferred copy state structure.
|
||||||
|
*/
|
||||||
|
struct gdeflate_deferred_copy {
|
||||||
|
unsigned length;
|
||||||
|
u8 * out_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GDeflate state structure.
|
||||||
|
*/
|
||||||
|
struct gdeflate_state {
|
||||||
|
bitbuf_t bitbuf[NUM_STREAMS];
|
||||||
|
unsigned bitsleft[NUM_STREAMS];
|
||||||
|
struct gdeflate_deferred_copy copies[NUM_STREAMS];
|
||||||
|
unsigned idx;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevent multiple type declarations.
|
||||||
|
*/
|
||||||
|
#define GDEFLATE_TYPES_DECLARED
|
||||||
|
|
||||||
|
#endif /* GDEFLATE_TYPES_DECLARED */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Specialize gdeflate_do_copy function name.
|
||||||
|
*/
|
||||||
|
#define DO_COPY CONCAT(FUNCNAME, _gdeflate_do_copy)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform a deferred GDeflate copy.
|
||||||
|
*/
|
||||||
|
static forceinline enum libdeflate_result ATTRIBUTES
|
||||||
|
DO_COPY(struct libdeflate_decompressor * restrict d,
|
||||||
|
struct gdeflate_state * s, u8 * out, u8 * const out_end)
|
||||||
|
{
|
||||||
|
u32 entry;
|
||||||
|
u32 offset;
|
||||||
|
const u8 *src;
|
||||||
|
u8 *dst;
|
||||||
|
u32 tmp32;
|
||||||
|
|
||||||
|
/* Pop match params. */
|
||||||
|
u32 length = s->copies[s->idx].length;
|
||||||
|
u8 * out_next = s->copies[s->idx].out_next;
|
||||||
|
|
||||||
|
/* Decode the match offset. */
|
||||||
|
|
||||||
|
entry = d->offset_decode_table[BITS(OFFSET_TABLEBITS)];
|
||||||
|
if (entry & HUFFDEC_SUBTABLE_POINTER) {
|
||||||
|
/* Offset subtable required (uncommon case) */
|
||||||
|
REMOVE_BITS(OFFSET_TABLEBITS);
|
||||||
|
entry = d->offset_decode_table[
|
||||||
|
((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) +
|
||||||
|
BITS(entry & HUFFDEC_LENGTH_MASK)];
|
||||||
|
}
|
||||||
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
||||||
|
|
||||||
|
entry >>= HUFFDEC_RESULT_SHIFT;
|
||||||
|
|
||||||
|
/* Pop the extra offset bits and add them to the offset base to
|
||||||
|
* produce the full offset. */
|
||||||
|
offset = (entry & HUFFDEC_OFFSET_BASE_MASK) +
|
||||||
|
POP_BITS(entry >> HUFFDEC_EXTRA_OFFSET_BITS_SHIFT);
|
||||||
|
|
||||||
|
/* The match source must not begin before the beginning of the
|
||||||
|
* output buffer. */
|
||||||
|
SAFETY_CHECK(offset <= out_next - (const u8 *)out);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy the match: 'length' bytes at 'out_next - offset' to
|
||||||
|
* 'out_next', possibly overlapping. If the match doesn't end
|
||||||
|
* too close to the end of the buffer and offset >= WORDBYTES ||
|
||||||
|
* offset == 1, take a fast path which copies a word at a time
|
||||||
|
* -- potentially more than the length of the match, but that's
|
||||||
|
* fine as long as we check for enough extra space.
|
||||||
|
*
|
||||||
|
* The remaining cases are not performance-critical so are
|
||||||
|
* handled by a simple byte-by-byte copy.
|
||||||
|
*/
|
||||||
|
src = out_next - offset;
|
||||||
|
dst = out_next;
|
||||||
|
out_next += length;
|
||||||
|
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST &&
|
||||||
|
likely(out_end - out_next >= WORDBYTES && length >= WORDBYTES)) {
|
||||||
|
if (offset >= WORDBYTES) { /* words don't overlap? */
|
||||||
|
while (dst < out_next - WORDBYTES) {
|
||||||
|
copy_word_unaligned(src, dst);
|
||||||
|
src += WORDBYTES;
|
||||||
|
dst += WORDBYTES;
|
||||||
|
}
|
||||||
|
/* Tail. */
|
||||||
|
while (dst < out_next) *dst++ = *src++;
|
||||||
|
} else if (offset == 1) {
|
||||||
|
/* RLE encoding of previous byte, common if the
|
||||||
|
* data contains many repeated bytes */
|
||||||
|
machine_word_t v = repeat_byte(*src);
|
||||||
|
|
||||||
|
while (dst < out_next - WORDBYTES) {
|
||||||
|
store_word_unaligned(v, dst);
|
||||||
|
dst += WORDBYTES;
|
||||||
|
}
|
||||||
|
/* Tail. */
|
||||||
|
while (dst < out_next) *dst++ = (u8)v;
|
||||||
|
} else {
|
||||||
|
*dst++ = *src++;
|
||||||
|
*dst++ = *src++;
|
||||||
|
do {
|
||||||
|
*dst++ = *src++;
|
||||||
|
} while (dst < out_next);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
|
||||||
|
*dst++ = *src++;
|
||||||
|
*dst++ = *src++;
|
||||||
|
do {
|
||||||
|
*dst++ = *src++;
|
||||||
|
} while (dst < out_next);
|
||||||
|
}
|
||||||
|
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum libdeflate_result ATTRIBUTES
|
||||||
|
FUNCNAME(struct libdeflate_decompressor * restrict d,
|
||||||
|
const void * restrict in, size_t in_nbytes,
|
||||||
|
void * restrict out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
u8 *out_next = out;
|
||||||
|
u8 * const out_end = out_next + out_nbytes_avail;
|
||||||
|
const u8 *in_next = in;
|
||||||
|
const u8 * const in_end = in_next + in_nbytes;
|
||||||
|
struct gdeflate_state state;
|
||||||
|
struct gdeflate_state * s = &state;
|
||||||
|
unsigned i;
|
||||||
|
unsigned is_final_block;
|
||||||
|
unsigned block_type;
|
||||||
|
u16 len;
|
||||||
|
unsigned num_litlen_syms;
|
||||||
|
unsigned num_offset_syms;
|
||||||
|
u32 tmp32;
|
||||||
|
is_copy_t is_copy = 0;
|
||||||
|
|
||||||
|
/* Starting to read GDeflate stream. */
|
||||||
|
RESET();
|
||||||
|
for (unsigned n = 0; n < NUM_STREAMS; n++) {
|
||||||
|
s->bitbuf[n] = 0;
|
||||||
|
s->bitsleft[n] = 0;
|
||||||
|
s->copies[n].length = 0;
|
||||||
|
ADVANCE();
|
||||||
|
}
|
||||||
|
|
||||||
|
next_block:
|
||||||
|
/* Starting to read the next block. */
|
||||||
|
RESET();
|
||||||
|
|
||||||
|
/* BFINAL: 1 bit */
|
||||||
|
is_final_block = POP_BITS(1);
|
||||||
|
|
||||||
|
/* BTYPE: 2 bits */
|
||||||
|
block_type = POP_BITS(2);
|
||||||
|
|
||||||
|
ENSURE_BITS(LOW_WATERMARK_BITS);
|
||||||
|
|
||||||
|
if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
|
||||||
|
|
||||||
|
/* Dynamic Huffman block. */
|
||||||
|
|
||||||
|
/* The order in which precode lengths are stored. */
|
||||||
|
static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
|
||||||
|
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned num_explicit_precode_lens;
|
||||||
|
|
||||||
|
/* Read the codeword length counts. */
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == ((1 << 5) - 1) + 257);
|
||||||
|
num_litlen_syms = POP_BITS(5) + 257;
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == ((1 << 5) - 1) + 1);
|
||||||
|
num_offset_syms = POP_BITS(5) + 1;
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == ((1 << 4) - 1) + 4);
|
||||||
|
num_explicit_precode_lens = POP_BITS(4) + 4;
|
||||||
|
|
||||||
|
d->static_codes_loaded = false;
|
||||||
|
|
||||||
|
ENSURE_BITS(LOW_WATERMARK_BITS);
|
||||||
|
|
||||||
|
/* Read the precode codeword lengths. */
|
||||||
|
STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
|
||||||
|
for (i = 0; i < num_explicit_precode_lens; i++) {
|
||||||
|
d->u.precode_lens[deflate_precode_lens_permutation[i]] = POP_BITS(3);
|
||||||
|
|
||||||
|
ADVANCE();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
|
||||||
|
d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
|
||||||
|
|
||||||
|
/* Build the decode table for the precode. */
|
||||||
|
SAFETY_CHECK(build_precode_decode_table(d));
|
||||||
|
|
||||||
|
RESET();
|
||||||
|
|
||||||
|
/* Expand the literal/length and offset codeword lengths. */
|
||||||
|
for (i = 0; i < num_litlen_syms + num_offset_syms; ) {
|
||||||
|
u32 entry;
|
||||||
|
unsigned presym;
|
||||||
|
u8 rep_val;
|
||||||
|
unsigned rep_count;
|
||||||
|
|
||||||
|
/* (The code below assumes that the precode decode table
|
||||||
|
* does not have any subtables.) */
|
||||||
|
STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
|
||||||
|
|
||||||
|
/* Read the next precode symbol. */
|
||||||
|
entry = d->u.l.precode_decode_table[BITS(DEFLATE_MAX_PRE_CODEWORD_LEN)];
|
||||||
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
||||||
|
|
||||||
|
presym = entry >> HUFFDEC_RESULT_SHIFT;
|
||||||
|
|
||||||
|
if (presym < 16) {
|
||||||
|
/* Explicit codeword length */
|
||||||
|
d->u.l.lens[i++] = presym;
|
||||||
|
ADVANCE();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run-length encoded codeword lengths */
|
||||||
|
|
||||||
|
/* Note: we don't need verify that the repeat count
|
||||||
|
* doesn't overflow the number of elements, since we
|
||||||
|
* have enough extra spaces to allow for the worst-case
|
||||||
|
* overflow (138 zeroes when only 1 length was
|
||||||
|
* remaining).
|
||||||
|
*
|
||||||
|
* In the case of the small repeat counts (presyms 16
|
||||||
|
* and 17), it is fastest to always write the maximum
|
||||||
|
* number of entries. That gets rid of branches that
|
||||||
|
* would otherwise be required.
|
||||||
|
*
|
||||||
|
* It is not just because of the numerical order that
|
||||||
|
* our checks go in the order 'presym < 16', 'presym ==
|
||||||
|
* 16', and 'presym == 17'. For typical data this is
|
||||||
|
* ordered from most frequent to least frequent case.
|
||||||
|
*/
|
||||||
|
STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
|
||||||
|
|
||||||
|
if (presym == 16) {
|
||||||
|
/* Repeat the previous length 3 - 6 times */
|
||||||
|
SAFETY_CHECK(i != 0);
|
||||||
|
rep_val = d->u.l.lens[i - 1];
|
||||||
|
STATIC_ASSERT(3 + ((1 << 2) - 1) == 6);
|
||||||
|
rep_count = 3 + POP_BITS(2);
|
||||||
|
d->u.l.lens[i + 0] = rep_val;
|
||||||
|
d->u.l.lens[i + 1] = rep_val;
|
||||||
|
d->u.l.lens[i + 2] = rep_val;
|
||||||
|
d->u.l.lens[i + 3] = rep_val;
|
||||||
|
d->u.l.lens[i + 4] = rep_val;
|
||||||
|
d->u.l.lens[i + 5] = rep_val;
|
||||||
|
i += rep_count;
|
||||||
|
} else if (presym == 17) {
|
||||||
|
/* Repeat zero 3 - 10 times */
|
||||||
|
STATIC_ASSERT(3 + ((1 << 3) - 1) == 10);
|
||||||
|
rep_count = 3 + POP_BITS(3);
|
||||||
|
d->u.l.lens[i + 0] = 0;
|
||||||
|
d->u.l.lens[i + 1] = 0;
|
||||||
|
d->u.l.lens[i + 2] = 0;
|
||||||
|
d->u.l.lens[i + 3] = 0;
|
||||||
|
d->u.l.lens[i + 4] = 0;
|
||||||
|
d->u.l.lens[i + 5] = 0;
|
||||||
|
d->u.l.lens[i + 6] = 0;
|
||||||
|
d->u.l.lens[i + 7] = 0;
|
||||||
|
d->u.l.lens[i + 8] = 0;
|
||||||
|
d->u.l.lens[i + 9] = 0;
|
||||||
|
i += rep_count;
|
||||||
|
} else {
|
||||||
|
/* Repeat zero 11 - 138 times */
|
||||||
|
STATIC_ASSERT(11 + ((1 << 7) - 1) == 138);
|
||||||
|
rep_count = 11 + POP_BITS(7);
|
||||||
|
memset(&d->u.l.lens[i], 0,
|
||||||
|
rep_count * sizeof(d->u.l.lens[i]));
|
||||||
|
i += rep_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
ADVANCE();
|
||||||
|
}
|
||||||
|
} else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
|
||||||
|
|
||||||
|
/* Uncompressed block: copy 'len' bytes literally from the input
|
||||||
|
* buffer to the output buffer. */
|
||||||
|
|
||||||
|
/* Count bits in the bit buffers. */
|
||||||
|
u32 num_buffered_bits = 0;
|
||||||
|
for (u32 n = 0; n < NUM_STREAMS; n++)
|
||||||
|
num_buffered_bits += s->bitsleft[n];
|
||||||
|
|
||||||
|
SAFETY_CHECK(in_end - in_next + (num_buffered_bits + 7)/8 >= 2);
|
||||||
|
|
||||||
|
len = POP_BITS(16);
|
||||||
|
|
||||||
|
if (unlikely(len > out_end - out_next))
|
||||||
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
||||||
|
SAFETY_CHECK(len <= in_end - in_next + (num_buffered_bits + 7)/8);
|
||||||
|
|
||||||
|
while (len) {
|
||||||
|
*out_next++ = POP_BITS(8);
|
||||||
|
len--;
|
||||||
|
ADVANCE();
|
||||||
|
}
|
||||||
|
|
||||||
|
goto block_done;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Static Huffman block: build the decode tables for the static
|
||||||
|
* codes. Skip doing so if the tables are already set up from
|
||||||
|
* an earlier static block; this speeds up decompression of
|
||||||
|
* degenerate input of many empty or very short static blocks.
|
||||||
|
*
|
||||||
|
* Afterwards, the remainder is the same as decompressing a
|
||||||
|
* dynamic Huffman block.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (d->static_codes_loaded)
|
||||||
|
goto have_decode_tables;
|
||||||
|
|
||||||
|
d->static_codes_loaded = true;
|
||||||
|
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
|
||||||
|
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
|
||||||
|
|
||||||
|
for (i = 0; i < 144; i++)
|
||||||
|
d->u.l.lens[i] = 8;
|
||||||
|
for (; i < 256; i++)
|
||||||
|
d->u.l.lens[i] = 9;
|
||||||
|
for (; i < 280; i++)
|
||||||
|
d->u.l.lens[i] = 7;
|
||||||
|
for (; i < 288; i++)
|
||||||
|
d->u.l.lens[i] = 8;
|
||||||
|
|
||||||
|
for (; i < 288 + 32; i++)
|
||||||
|
d->u.l.lens[i] = 5;
|
||||||
|
|
||||||
|
num_litlen_syms = 288;
|
||||||
|
num_offset_syms = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decompressing a Huffman block (either dynamic or static) */
|
||||||
|
|
||||||
|
SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
|
||||||
|
SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
|
||||||
|
have_decode_tables:
|
||||||
|
|
||||||
|
RESET();
|
||||||
|
|
||||||
|
/* The main GDEFLATE decode loop */
|
||||||
|
for (;;) {
|
||||||
|
u32 entry;
|
||||||
|
u32 length;
|
||||||
|
|
||||||
|
if (likely(!IS_COPY())) {
|
||||||
|
/* Decode a litlen symbol. */
|
||||||
|
entry = d->u.litlen_decode_table[BITS(LITLEN_TABLEBITS)];
|
||||||
|
if (entry & HUFFDEC_SUBTABLE_POINTER) {
|
||||||
|
/* Litlen subtable required (uncommon case) */
|
||||||
|
REMOVE_BITS(LITLEN_TABLEBITS);
|
||||||
|
entry = d->u.litlen_decode_table[
|
||||||
|
((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) +
|
||||||
|
BITS(entry & HUFFDEC_LENGTH_MASK)];
|
||||||
|
}
|
||||||
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
||||||
|
if (entry & HUFFDEC_LITERAL) {
|
||||||
|
/* Literal */
|
||||||
|
if (unlikely(out_next == out_end))
|
||||||
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
||||||
|
*out_next++ = (u8)(entry >> HUFFDEC_RESULT_SHIFT);
|
||||||
|
ADVANCE();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Match or end-of-block */
|
||||||
|
|
||||||
|
entry >>= HUFFDEC_RESULT_SHIFT;
|
||||||
|
|
||||||
|
/* Pop the extra length bits and add them to the length base to
|
||||||
|
* produce the full length. */
|
||||||
|
length = (entry >> HUFFDEC_LENGTH_BASE_SHIFT) +
|
||||||
|
POP_BITS(entry & HUFFDEC_EXTRA_LENGTH_BITS_MASK);
|
||||||
|
|
||||||
|
/* The match destination must not end after the end of the
|
||||||
|
* output buffer. For efficiency, combine this check with the
|
||||||
|
* end-of-block check. We're using 0 for the special
|
||||||
|
* end-of-block length, so subtract 1 and it turn it into
|
||||||
|
* SIZE_MAX. */
|
||||||
|
STATIC_ASSERT(HUFFDEC_END_OF_BLOCK_LENGTH == 0);
|
||||||
|
if (unlikely((size_t)length - 1 >= out_end - out_next)) {
|
||||||
|
if (unlikely(length != HUFFDEC_END_OF_BLOCK_LENGTH))
|
||||||
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
||||||
|
goto block_done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Store copy for use later. */
|
||||||
|
STORE_COPY(length, out_next);
|
||||||
|
|
||||||
|
/* Advance output stream. */
|
||||||
|
out_next += length;
|
||||||
|
} else {
|
||||||
|
enum libdeflate_result res =
|
||||||
|
DO_COPY(d, s, out, out_end);
|
||||||
|
|
||||||
|
if (unlikely(res))
|
||||||
|
return res;
|
||||||
|
|
||||||
|
COPY_COMPLETE();
|
||||||
|
}
|
||||||
|
|
||||||
|
ADVANCE();
|
||||||
|
}
|
||||||
|
|
||||||
|
block_done:
|
||||||
|
|
||||||
|
/* Run the outstanding deferred copies. */
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < NUM_STREAMS; n++) {
|
||||||
|
if (IS_COPY()) {
|
||||||
|
enum libdeflate_result res =
|
||||||
|
DO_COPY(d, s, out, out_end);
|
||||||
|
|
||||||
|
if (unlikely(res))
|
||||||
|
return res;
|
||||||
|
|
||||||
|
COPY_COMPLETE();
|
||||||
|
}
|
||||||
|
|
||||||
|
ADVANCE();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finished decoding a block. */
|
||||||
|
|
||||||
|
if (!is_final_block)
|
||||||
|
goto next_block;
|
||||||
|
|
||||||
|
/* That was the last block. */
|
||||||
|
|
||||||
|
/* Optionally return the actual number of bytes read */
|
||||||
|
if (actual_in_nbytes_ret)
|
||||||
|
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
||||||
|
|
||||||
|
/* Optionally return the actual number of bytes written */
|
||||||
|
if (actual_out_nbytes_ret) {
|
||||||
|
*actual_out_nbytes_ret = out_next - (u8 *)out;
|
||||||
|
} else {
|
||||||
|
if (out_next != out_end)
|
||||||
|
return LIBDEFLATE_SHORT_OUTPUT;
|
||||||
|
}
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef FUNCNAME
|
||||||
|
#undef ATTRIBUTES
|
||||||
|
#undef IS_COPY
|
||||||
|
#undef DO_COPY
|
||||||
|
#undef ADVANCE_COPIES
|
||||||
|
#undef STORE_COPY
|
||||||
|
#undef COPY_COMPLETE
|
||||||
|
#undef RESET
|
||||||
|
#undef ADVANCE
|
||||||
95
lib/gdeflate/libdeflate/lib/gzip_compress.c
Normal file
95
lib/gdeflate/libdeflate/lib/gzip_compress.c
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* gzip_compress.c - compress with a gzip wrapper
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "deflate_compress.h"
|
||||||
|
#include "gzip_constants.h"
|
||||||
|
#include "unaligned.h"
|
||||||
|
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_compress(struct libdeflate_compressor *c,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail)
|
||||||
|
{
|
||||||
|
u8 *out_next = out;
|
||||||
|
unsigned compression_level;
|
||||||
|
u8 xfl;
|
||||||
|
size_t deflate_size;
|
||||||
|
|
||||||
|
if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* ID1 */
|
||||||
|
*out_next++ = GZIP_ID1;
|
||||||
|
/* ID2 */
|
||||||
|
*out_next++ = GZIP_ID2;
|
||||||
|
/* CM */
|
||||||
|
*out_next++ = GZIP_CM_DEFLATE;
|
||||||
|
/* FLG */
|
||||||
|
*out_next++ = 0;
|
||||||
|
/* MTIME */
|
||||||
|
put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
|
||||||
|
out_next += 4;
|
||||||
|
/* XFL */
|
||||||
|
xfl = 0;
|
||||||
|
compression_level = deflate_get_compression_level(c);
|
||||||
|
if (compression_level < 2)
|
||||||
|
xfl |= GZIP_XFL_FASTEST_COMPRESSION;
|
||||||
|
else if (compression_level >= 8)
|
||||||
|
xfl |= GZIP_XFL_SLOWEST_COMPRESSION;
|
||||||
|
*out_next++ = xfl;
|
||||||
|
/* OS */
|
||||||
|
*out_next++ = GZIP_OS_UNKNOWN; /* OS */
|
||||||
|
|
||||||
|
/* Compressed data */
|
||||||
|
deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
|
||||||
|
out_nbytes_avail - GZIP_MIN_OVERHEAD);
|
||||||
|
if (deflate_size == 0)
|
||||||
|
return 0;
|
||||||
|
out_next += deflate_size;
|
||||||
|
|
||||||
|
/* CRC32 */
|
||||||
|
put_unaligned_le32(libdeflate_crc32(0, in, in_nbytes), out_next);
|
||||||
|
out_next += 4;
|
||||||
|
|
||||||
|
/* ISIZE */
|
||||||
|
put_unaligned_le32((u32)in_nbytes, out_next);
|
||||||
|
out_next += 4;
|
||||||
|
|
||||||
|
return out_next - (u8 *)out;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
|
||||||
|
size_t in_nbytes)
|
||||||
|
{
|
||||||
|
return GZIP_MIN_OVERHEAD +
|
||||||
|
libdeflate_deflate_compress_bound(c, in_nbytes);
|
||||||
|
}
|
||||||
45
lib/gdeflate/libdeflate/lib/gzip_constants.h
Normal file
45
lib/gdeflate/libdeflate/lib/gzip_constants.h
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* gzip_constants.h - constants for the gzip wrapper format
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_GZIP_CONSTANTS_H
|
||||||
|
#define LIB_GZIP_CONSTANTS_H
|
||||||
|
|
||||||
|
#define GZIP_MIN_HEADER_SIZE 10
|
||||||
|
#define GZIP_FOOTER_SIZE 8
|
||||||
|
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
|
||||||
|
|
||||||
|
#define GZIP_ID1 0x1F
|
||||||
|
#define GZIP_ID2 0x8B
|
||||||
|
|
||||||
|
#define GZIP_CM_DEFLATE 8
|
||||||
|
|
||||||
|
#define GZIP_FTEXT 0x01
|
||||||
|
#define GZIP_FHCRC 0x02
|
||||||
|
#define GZIP_FEXTRA 0x04
|
||||||
|
#define GZIP_FNAME 0x08
|
||||||
|
#define GZIP_FCOMMENT 0x10
|
||||||
|
#define GZIP_FRESERVED 0xE0
|
||||||
|
|
||||||
|
#define GZIP_MTIME_UNAVAILABLE 0
|
||||||
|
|
||||||
|
#define GZIP_XFL_SLOWEST_COMPRESSION 0x02
|
||||||
|
#define GZIP_XFL_FASTEST_COMPRESSION 0x04
|
||||||
|
|
||||||
|
#define GZIP_OS_FAT 0
|
||||||
|
#define GZIP_OS_AMIGA 1
|
||||||
|
#define GZIP_OS_VMS 2
|
||||||
|
#define GZIP_OS_UNIX 3
|
||||||
|
#define GZIP_OS_VM_CMS 4
|
||||||
|
#define GZIP_OS_ATARI_TOS 5
|
||||||
|
#define GZIP_OS_HPFS 6
|
||||||
|
#define GZIP_OS_MACINTOSH 7
|
||||||
|
#define GZIP_OS_Z_SYSTEM 8
|
||||||
|
#define GZIP_OS_CP_M 9
|
||||||
|
#define GZIP_OS_TOPS_20 10
|
||||||
|
#define GZIP_OS_NTFS 11
|
||||||
|
#define GZIP_OS_QDOS 12
|
||||||
|
#define GZIP_OS_RISCOS 13
|
||||||
|
#define GZIP_OS_UNKNOWN 255
|
||||||
|
|
||||||
|
#endif /* LIB_GZIP_CONSTANTS_H */
|
||||||
148
lib/gdeflate/libdeflate/lib/gzip_decompress.c
Normal file
148
lib/gdeflate/libdeflate/lib/gzip_decompress.c
Normal file
|
|
@ -0,0 +1,148 @@
|
||||||
|
/*
|
||||||
|
* gzip_decompress.c - decompress with a gzip wrapper
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "gzip_constants.h"
|
||||||
|
#include "unaligned.h"
|
||||||
|
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret,
|
||||||
|
size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
const u8 *in_next = in;
|
||||||
|
const u8 * const in_end = in_next + in_nbytes;
|
||||||
|
u8 flg;
|
||||||
|
size_t actual_in_nbytes;
|
||||||
|
size_t actual_out_nbytes;
|
||||||
|
enum libdeflate_result result;
|
||||||
|
|
||||||
|
if (in_nbytes < GZIP_MIN_OVERHEAD)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* ID1 */
|
||||||
|
if (*in_next++ != GZIP_ID1)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
/* ID2 */
|
||||||
|
if (*in_next++ != GZIP_ID2)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
/* CM */
|
||||||
|
if (*in_next++ != GZIP_CM_DEFLATE)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
flg = *in_next++;
|
||||||
|
/* MTIME */
|
||||||
|
in_next += 4;
|
||||||
|
/* XFL */
|
||||||
|
in_next += 1;
|
||||||
|
/* OS */
|
||||||
|
in_next += 1;
|
||||||
|
|
||||||
|
if (flg & GZIP_FRESERVED)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* Extra field */
|
||||||
|
if (flg & GZIP_FEXTRA) {
|
||||||
|
u16 xlen = get_unaligned_le16(in_next);
|
||||||
|
in_next += 2;
|
||||||
|
|
||||||
|
if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
in_next += xlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Original file name (zero terminated) */
|
||||||
|
if (flg & GZIP_FNAME) {
|
||||||
|
while (*in_next++ != 0 && in_next != in_end)
|
||||||
|
;
|
||||||
|
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* File comment (zero terminated) */
|
||||||
|
if (flg & GZIP_FCOMMENT) {
|
||||||
|
while (*in_next++ != 0 && in_next != in_end)
|
||||||
|
;
|
||||||
|
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CRC16 for gzip header */
|
||||||
|
if (flg & GZIP_FHCRC) {
|
||||||
|
in_next += 2;
|
||||||
|
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compressed data */
|
||||||
|
result = libdeflate_deflate_decompress_ex(d, in_next,
|
||||||
|
in_end - GZIP_FOOTER_SIZE - in_next,
|
||||||
|
out, out_nbytes_avail,
|
||||||
|
&actual_in_nbytes,
|
||||||
|
actual_out_nbytes_ret);
|
||||||
|
if (result != LIBDEFLATE_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
if (actual_out_nbytes_ret)
|
||||||
|
actual_out_nbytes = *actual_out_nbytes_ret;
|
||||||
|
else
|
||||||
|
actual_out_nbytes = out_nbytes_avail;
|
||||||
|
|
||||||
|
in_next += actual_in_nbytes;
|
||||||
|
|
||||||
|
/* CRC32 */
|
||||||
|
if (libdeflate_crc32(0, out, actual_out_nbytes) !=
|
||||||
|
get_unaligned_le32(in_next))
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
in_next += 4;
|
||||||
|
|
||||||
|
/* ISIZE */
|
||||||
|
if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
in_next += 4;
|
||||||
|
|
||||||
|
if (actual_in_nbytes_ret)
|
||||||
|
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
||||||
|
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
|
||||||
|
out, out_nbytes_avail,
|
||||||
|
NULL, actual_out_nbytes_ret);
|
||||||
|
}
|
||||||
412
lib/gdeflate/libdeflate/lib/hc_matchfinder.h
Normal file
412
lib/gdeflate/libdeflate/lib/hc_matchfinder.h
Normal file
|
|
@ -0,0 +1,412 @@
|
||||||
|
/*
|
||||||
|
* hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* ---------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Algorithm
|
||||||
|
*
|
||||||
|
* This is a Hash Chains (hc) based matchfinder.
|
||||||
|
*
|
||||||
|
* The main data structure is a hash table where each hash bucket contains a
|
||||||
|
* linked list (or "chain") of sequences whose first 4 bytes share the same hash
|
||||||
|
* code. Each sequence is identified by its starting position in the input
|
||||||
|
* buffer.
|
||||||
|
*
|
||||||
|
* The algorithm processes the input buffer sequentially. At each byte
|
||||||
|
* position, the hash code of the first 4 bytes of the sequence beginning at
|
||||||
|
* that position (the sequence being matched against) is computed. This
|
||||||
|
* identifies the hash bucket to use for that position. Then, this hash
|
||||||
|
* bucket's linked list is searched for matches. Then, a new linked list node
|
||||||
|
* is created to represent the current sequence and is prepended to the list.
|
||||||
|
*
|
||||||
|
* This algorithm has several useful properties:
|
||||||
|
*
|
||||||
|
* - It only finds true Lempel-Ziv matches; i.e., those where the matching
|
||||||
|
* sequence occurs prior to the sequence being matched against.
|
||||||
|
*
|
||||||
|
* - The sequences in each linked list are always sorted by decreasing starting
|
||||||
|
* position. Therefore, the closest (smallest offset) matches are found
|
||||||
|
* first, which in many compression formats tend to be the cheapest to encode.
|
||||||
|
*
|
||||||
|
* - Although fast running time is not guaranteed due to the possibility of the
|
||||||
|
* lists getting very long, the worst degenerate behavior can be easily
|
||||||
|
* prevented by capping the number of nodes searched at each position.
|
||||||
|
*
|
||||||
|
* - If the compressor decides not to search for matches at a certain position,
|
||||||
|
* then that position can be quickly inserted without searching the list.
|
||||||
|
*
|
||||||
|
* - The algorithm is adaptable to sliding windows: just store the positions
|
||||||
|
* relative to a "base" value that is updated from time to time, and stop
|
||||||
|
* searching each list when the sequences get too far away.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Optimizations
|
||||||
|
*
|
||||||
|
* The main hash table and chains handle length 4+ matches. Length 3 matches
|
||||||
|
* are handled by a separate hash table with no chains. This works well for
|
||||||
|
* typical "greedy" or "lazy"-style compressors, where length 3 matches are
|
||||||
|
* often only helpful if they have small offsets. Instead of searching a full
|
||||||
|
* chain for length 3+ matches, the algorithm just checks for one close length 3
|
||||||
|
* match, then focuses on finding length 4+ matches.
|
||||||
|
*
|
||||||
|
* The longest_match() and skip_positions() functions are inlined into the
|
||||||
|
* compressors that use them. This isn't just about saving the overhead of a
|
||||||
|
* function call. These functions are intended to be called from the inner
|
||||||
|
* loops of compressors, where giving the compiler more control over register
|
||||||
|
* allocation is very helpful. There is also significant benefit to be gained
|
||||||
|
* from allowing the CPU to predict branches independently at each call site.
|
||||||
|
* For example, "lazy"-style compressors can be written with two calls to
|
||||||
|
* longest_match(), each of which starts with a different 'best_len' and
|
||||||
|
* therefore has significantly different performance characteristics.
|
||||||
|
*
|
||||||
|
* Although any hash function can be used, a multiplicative hash is fast and
|
||||||
|
* works well.
|
||||||
|
*
|
||||||
|
* On some processors, it is significantly faster to extend matches by whole
|
||||||
|
* words (32 or 64 bits) instead of by individual bytes. For this to be the
|
||||||
|
* case, the processor must implement unaligned memory accesses efficiently and
|
||||||
|
* must have either a fast "find first set bit" instruction or a fast "find last
|
||||||
|
* set bit" instruction, depending on the processor's endianness.
|
||||||
|
*
|
||||||
|
* The code uses one loop for finding the first match and one loop for finding a
|
||||||
|
* longer match. Each of these loops is tuned for its respective task and in
|
||||||
|
* combination are faster than a single generalized loop that handles both
|
||||||
|
* tasks.
|
||||||
|
*
|
||||||
|
* The code also uses a tight inner loop that only compares the last and first
|
||||||
|
* bytes of a potential match. It is only when these bytes match that a full
|
||||||
|
* match extension is attempted.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_HC_MATCHFINDER_H
|
||||||
|
#define LIB_HC_MATCHFINDER_H
|
||||||
|
|
||||||
|
#include "matchfinder_common.h"
|
||||||
|
|
||||||
|
#define HC_MATCHFINDER_HASH3_ORDER 15
|
||||||
|
#define HC_MATCHFINDER_HASH4_ORDER 16
|
||||||
|
|
||||||
|
#define HC_MATCHFINDER_TOTAL_HASH_SIZE \
|
||||||
|
(((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
|
||||||
|
(1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
|
||||||
|
|
||||||
|
struct hc_matchfinder {
|
||||||
|
|
||||||
|
/* The hash table for finding length 3 matches */
|
||||||
|
mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
|
||||||
|
|
||||||
|
/* The hash table which contains the first nodes of the linked lists for
|
||||||
|
* finding length 4+ matches */
|
||||||
|
mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
|
||||||
|
|
||||||
|
/* The "next node" references for the linked lists. The "next node" of
|
||||||
|
* the node for the sequence with position 'pos' is 'next_tab[pos]'. */
|
||||||
|
mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
|
||||||
|
|
||||||
|
}
|
||||||
|
#ifdef _aligned_attribute
|
||||||
|
_aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
|
/* Prepare the matchfinder for a new input buffer. */
|
||||||
|
static forceinline void
|
||||||
|
hc_matchfinder_init(struct hc_matchfinder *mf)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
|
||||||
|
MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
|
|
||||||
|
matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
|
||||||
|
|
||||||
|
matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the longest match longer than 'best_len' bytes.
|
||||||
|
*
|
||||||
|
* @mf
|
||||||
|
* The matchfinder structure.
|
||||||
|
* @in_base_p
|
||||||
|
* Location of a pointer which points to the place in the input data the
|
||||||
|
* matchfinder currently stores positions relative to. This may be updated
|
||||||
|
* by this function.
|
||||||
|
* @cur_pos
|
||||||
|
* The current position in the input buffer relative to @in_base (the
|
||||||
|
* position of the sequence being matched against).
|
||||||
|
* @best_len
|
||||||
|
* Require a match longer than this length.
|
||||||
|
* @max_len
|
||||||
|
* The maximum permissible match length at this position.
|
||||||
|
* @nice_len
|
||||||
|
* Stop searching if a match of at least this length is found.
|
||||||
|
* Must be <= @max_len.
|
||||||
|
* @max_search_depth
|
||||||
|
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||||
|
* @next_hashes
|
||||||
|
* The precomputed hash codes for the sequence beginning at @in_next.
|
||||||
|
* These will be used and then updated with the precomputed hashcodes for
|
||||||
|
* the sequence beginning at @in_next + 1.
|
||||||
|
* @offset_ret
|
||||||
|
* If a match is found, its offset is returned in this location.
|
||||||
|
*
|
||||||
|
* Return the length of the match found, or 'best_len' if no match longer than
|
||||||
|
* 'best_len' was found.
|
||||||
|
*/
|
||||||
|
static forceinline u32
|
||||||
|
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
||||||
|
const u8 ** const restrict in_base_p,
|
||||||
|
const u8 * const restrict in_next,
|
||||||
|
u32 best_len,
|
||||||
|
const u32 max_len,
|
||||||
|
const u32 nice_len,
|
||||||
|
const u32 max_search_depth,
|
||||||
|
u32 * const restrict next_hashes,
|
||||||
|
u32 * const restrict offset_ret)
|
||||||
|
{
|
||||||
|
u32 depth_remaining = max_search_depth;
|
||||||
|
const u8 *best_matchptr = in_next;
|
||||||
|
mf_pos_t cur_node3, cur_node4;
|
||||||
|
u32 hash3, hash4;
|
||||||
|
u32 next_hashseq;
|
||||||
|
u32 seq4;
|
||||||
|
const u8 *matchptr;
|
||||||
|
u32 len;
|
||||||
|
u32 cur_pos = in_next - *in_base_p;
|
||||||
|
const u8 *in_base;
|
||||||
|
mf_pos_t cutoff;
|
||||||
|
|
||||||
|
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
||||||
|
hc_matchfinder_slide_window(mf);
|
||||||
|
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
||||||
|
cur_pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
in_base = *in_base_p;
|
||||||
|
cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
|
||||||
|
|
||||||
|
if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Get the precomputed hash codes. */
|
||||||
|
hash3 = next_hashes[0];
|
||||||
|
hash4 = next_hashes[1];
|
||||||
|
|
||||||
|
/* From the hash buckets, get the first node of each linked list. */
|
||||||
|
cur_node3 = mf->hash3_tab[hash3];
|
||||||
|
cur_node4 = mf->hash4_tab[hash4];
|
||||||
|
|
||||||
|
/* Update for length 3 matches. This replaces the singleton node in the
|
||||||
|
* 'hash3' bucket with the node for the current sequence. */
|
||||||
|
mf->hash3_tab[hash3] = cur_pos;
|
||||||
|
|
||||||
|
/* Update for length 4 matches. This prepends the node for the current
|
||||||
|
* sequence to the linked list in the 'hash4' bucket. */
|
||||||
|
mf->hash4_tab[hash4] = cur_pos;
|
||||||
|
mf->next_tab[cur_pos] = cur_node4;
|
||||||
|
|
||||||
|
/* Compute the next hash codes. */
|
||||||
|
next_hashseq = get_unaligned_le32(in_next + 1);
|
||||||
|
next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
|
||||||
|
next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
|
||||||
|
prefetchw(&mf->hash3_tab[next_hashes[0]]);
|
||||||
|
prefetchw(&mf->hash4_tab[next_hashes[1]]);
|
||||||
|
|
||||||
|
if (best_len < 4) { /* No match of length >= 4 found yet? */
|
||||||
|
|
||||||
|
/* Check for a length 3 match if needed. */
|
||||||
|
|
||||||
|
if (cur_node3 <= cutoff)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
seq4 = load_u32_unaligned(in_next);
|
||||||
|
|
||||||
|
if (best_len < 3) {
|
||||||
|
matchptr = &in_base[cur_node3];
|
||||||
|
if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
|
||||||
|
best_len = 3;
|
||||||
|
best_matchptr = matchptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for a length 4 match. */
|
||||||
|
|
||||||
|
if (cur_node4 <= cutoff)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* No length 4 match found yet. Check the first 4 bytes. */
|
||||||
|
matchptr = &in_base[cur_node4];
|
||||||
|
|
||||||
|
if (load_u32_unaligned(matchptr) == seq4)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The first 4 bytes did not match. Keep trying. */
|
||||||
|
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
||||||
|
if (cur_node4 <= cutoff || !--depth_remaining)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Found a match of length >= 4. Extend it to its full length. */
|
||||||
|
best_matchptr = matchptr;
|
||||||
|
best_len = lz_extend(in_next, best_matchptr, 4, max_len);
|
||||||
|
if (best_len >= nice_len)
|
||||||
|
goto out;
|
||||||
|
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
||||||
|
if (cur_node4 <= cutoff || !--depth_remaining)
|
||||||
|
goto out;
|
||||||
|
} else {
|
||||||
|
if (cur_node4 <= cutoff || best_len >= nice_len)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for matches of length >= 5. */
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
for (;;) {
|
||||||
|
matchptr = &in_base[cur_node4];
|
||||||
|
|
||||||
|
/* Already found a length 4 match. Try for a longer
|
||||||
|
* match; start by checking either the last 4 bytes and
|
||||||
|
* the first 4 bytes, or the last byte. (The last byte,
|
||||||
|
* the one which would extend the match length by 1, is
|
||||||
|
* the most important.) */
|
||||||
|
#if UNALIGNED_ACCESS_IS_FAST
|
||||||
|
if ((load_u32_unaligned(matchptr + best_len - 3) ==
|
||||||
|
load_u32_unaligned(in_next + best_len - 3)) &&
|
||||||
|
(load_u32_unaligned(matchptr) ==
|
||||||
|
load_u32_unaligned(in_next)))
|
||||||
|
#else
|
||||||
|
if (matchptr[best_len] == in_next[best_len])
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Continue to the next node in the list. */
|
||||||
|
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
||||||
|
if (cur_node4 <= cutoff || !--depth_remaining)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if UNALIGNED_ACCESS_IS_FAST
|
||||||
|
len = 4;
|
||||||
|
#else
|
||||||
|
len = 0;
|
||||||
|
#endif
|
||||||
|
len = lz_extend(in_next, matchptr, len, max_len);
|
||||||
|
if (len > best_len) {
|
||||||
|
/* This is the new longest match. */
|
||||||
|
best_len = len;
|
||||||
|
best_matchptr = matchptr;
|
||||||
|
if (best_len >= nice_len)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Continue to the next node in the list. */
|
||||||
|
cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
|
||||||
|
if (cur_node4 <= cutoff || !--depth_remaining)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
*offset_ret = in_next - best_matchptr;
|
||||||
|
return best_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Advance the matchfinder, but don't search for matches.
|
||||||
|
*
|
||||||
|
* @mf
|
||||||
|
* The matchfinder structure.
|
||||||
|
* @in_base_p
|
||||||
|
* Location of a pointer which points to the place in the input data the
|
||||||
|
* matchfinder currently stores positions relative to. This may be updated
|
||||||
|
* by this function.
|
||||||
|
* @cur_pos
|
||||||
|
* The current position in the input buffer relative to @in_base.
|
||||||
|
* @end_pos
|
||||||
|
* The end position of the input buffer, relative to @in_base.
|
||||||
|
* @next_hashes
|
||||||
|
* The precomputed hash codes for the sequence beginning at @in_next.
|
||||||
|
* These will be used and then updated with the precomputed hashcodes for
|
||||||
|
* the sequence beginning at @in_next + @count.
|
||||||
|
* @count
|
||||||
|
* The number of bytes to advance. Must be > 0.
|
||||||
|
*
|
||||||
|
* Returns @in_next + @count.
|
||||||
|
*/
|
||||||
|
static forceinline const u8 *
|
||||||
|
hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf,
|
||||||
|
const u8 ** const restrict in_base_p,
|
||||||
|
const u8 *in_next,
|
||||||
|
const u8 * const in_end,
|
||||||
|
const u32 count,
|
||||||
|
u32 * const restrict next_hashes)
|
||||||
|
{
|
||||||
|
u32 cur_pos;
|
||||||
|
u32 hash3, hash4;
|
||||||
|
u32 next_hashseq;
|
||||||
|
u32 remaining = count;
|
||||||
|
|
||||||
|
if (unlikely(count + 5 > in_end - in_next))
|
||||||
|
return &in_next[count];
|
||||||
|
|
||||||
|
cur_pos = in_next - *in_base_p;
|
||||||
|
hash3 = next_hashes[0];
|
||||||
|
hash4 = next_hashes[1];
|
||||||
|
do {
|
||||||
|
if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
|
||||||
|
hc_matchfinder_slide_window(mf);
|
||||||
|
*in_base_p += MATCHFINDER_WINDOW_SIZE;
|
||||||
|
cur_pos = 0;
|
||||||
|
}
|
||||||
|
mf->hash3_tab[hash3] = cur_pos;
|
||||||
|
mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
|
||||||
|
mf->hash4_tab[hash4] = cur_pos;
|
||||||
|
|
||||||
|
next_hashseq = get_unaligned_le32(++in_next);
|
||||||
|
hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
|
||||||
|
hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
|
||||||
|
cur_pos++;
|
||||||
|
} while (--remaining);
|
||||||
|
|
||||||
|
prefetchw(&mf->hash3_tab[hash3]);
|
||||||
|
prefetchw(&mf->hash4_tab[hash4]);
|
||||||
|
next_hashes[0] = hash3;
|
||||||
|
next_hashes[1] = hash4;
|
||||||
|
|
||||||
|
return in_next;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* LIB_HC_MATCHFINDER_H */
|
||||||
67
lib/gdeflate/libdeflate/lib/lib_common.h
Normal file
67
lib/gdeflate/libdeflate/lib/lib_common.h
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* lib_common.h - internal header included by all library code
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_LIB_COMMON_H
|
||||||
|
#define LIB_LIB_COMMON_H
|
||||||
|
|
||||||
|
#ifdef LIBDEFLATE_H
|
||||||
|
# error "lib_common.h must always be included before libdeflate.h"
|
||||||
|
/* because BUILDING_LIBDEFLATE must be set first */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define BUILDING_LIBDEFLATE
|
||||||
|
|
||||||
|
#include "../common/common_defs.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prefix with "_libdeflate_" all global symbols which are not part of the API
|
||||||
|
* and don't already have a "libdeflate" prefix. This avoids exposing overly
|
||||||
|
* generic names when libdeflate is built as a static library.
|
||||||
|
*
|
||||||
|
* Note that the chosen prefix is not really important and can be changed
|
||||||
|
* without breaking library users. It was just chosen so that the resulting
|
||||||
|
* symbol names are unlikely to conflict with those from any other software.
|
||||||
|
* Also note that this fixup has no useful effect when libdeflate is built as a
|
||||||
|
* shared library, since these symbols are not exported.
|
||||||
|
*/
|
||||||
|
#define SYM_FIXUP(sym) _libdeflate_##sym
|
||||||
|
#define deflate_get_compression_level SYM_FIXUP(deflate_get_compression_level)
|
||||||
|
#define _cpu_features SYM_FIXUP(_cpu_features)
|
||||||
|
#define setup_cpu_features SYM_FIXUP(setup_cpu_features)
|
||||||
|
|
||||||
|
void *libdeflate_malloc(size_t size);
|
||||||
|
void libdeflate_free(void *ptr);
|
||||||
|
|
||||||
|
void *libdeflate_aligned_malloc(size_t alignment, size_t size);
|
||||||
|
void libdeflate_aligned_free(void *ptr);
|
||||||
|
|
||||||
|
#ifdef FREESTANDING
|
||||||
|
/*
|
||||||
|
* With -ffreestanding, <string.h> may be missing, and we must provide
|
||||||
|
* implementations of memset(), memcpy(), memmove(), and memcmp().
|
||||||
|
* See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
|
||||||
|
*
|
||||||
|
* Also, -ffreestanding disables interpreting calls to these functions as
|
||||||
|
* built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
|
||||||
|
* not be optimized to a single load instruction. For performance reasons we
|
||||||
|
* don't want that. So, declare these functions as macros that expand to the
|
||||||
|
* corresponding built-ins. This approach is recommended in the gcc man page.
|
||||||
|
* We still need the actual function definitions in case gcc calls them.
|
||||||
|
*/
|
||||||
|
void *memset(void *s, int c, size_t n);
|
||||||
|
#define memset(s, c, n) __builtin_memset((s), (c), (n))
|
||||||
|
|
||||||
|
void *memcpy(void *dest, const void *src, size_t n);
|
||||||
|
#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
|
||||||
|
|
||||||
|
void *memmove(void *dest, const void *src, size_t n);
|
||||||
|
#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
|
||||||
|
|
||||||
|
int memcmp(const void *s1, const void *s2, size_t n);
|
||||||
|
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
|
||||||
|
#else
|
||||||
|
#include <string.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* LIB_LIB_COMMON_H */
|
||||||
180
lib/gdeflate/libdeflate/lib/matchfinder_common.h
Normal file
180
lib/gdeflate/libdeflate/lib/matchfinder_common.h
Normal file
|
|
@ -0,0 +1,180 @@
|
||||||
|
/*
|
||||||
|
* matchfinder_common.h - common code for Lempel-Ziv matchfinding
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_MATCHFINDER_COMMON_H
|
||||||
|
#define LIB_MATCHFINDER_COMMON_H
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
#include "unaligned.h"
|
||||||
|
|
||||||
|
#ifndef MATCHFINDER_WINDOW_ORDER
|
||||||
|
# error "MATCHFINDER_WINDOW_ORDER must be defined!"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
|
||||||
|
|
||||||
|
#ifdef DEFLATE64
|
||||||
|
typedef s32 mf_pos_t;
|
||||||
|
#else
|
||||||
|
typedef s16 mf_pos_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Required alignment of the matchfinder buffer pointer and size. The values
|
||||||
|
* here come from the AVX-2 implementation, which is the worst case.
|
||||||
|
*/
|
||||||
|
#define MATCHFINDER_MEM_ALIGNMENT 32
|
||||||
|
#define MATCHFINDER_SIZE_ALIGNMENT 128
|
||||||
|
|
||||||
|
#undef matchfinder_init
|
||||||
|
#undef matchfinder_rebase
|
||||||
|
#if defined(_aligned_attribute) && !defined(DEFLATE64)
|
||||||
|
# if defined(__arm__) || defined(__aarch64__)
|
||||||
|
# include "arm/matchfinder_impl.h"
|
||||||
|
# elif defined(__i386__) || defined(__x86_64__)
|
||||||
|
# include "x86/matchfinder_impl.h"
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize the hash table portion of the matchfinder.
|
||||||
|
*
|
||||||
|
* Essentially, this is an optimized memset().
|
||||||
|
*
|
||||||
|
* 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
|
||||||
|
* 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
|
||||||
|
*/
|
||||||
|
#ifndef matchfinder_init
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_init(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
size_t num_entries = size / sizeof(*data);
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < num_entries; i++)
|
||||||
|
data[i] = MATCHFINDER_INITVAL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Slide the matchfinder by WINDOW_SIZE bytes.
|
||||||
|
*
|
||||||
|
* This must be called just after each WINDOW_SIZE bytes have been run through
|
||||||
|
* the matchfinder.
|
||||||
|
*
|
||||||
|
* This will subtract WINDOW_SIZE bytes from each entry in the array specified.
|
||||||
|
* The effect is that all entries are updated to be relative to the current
|
||||||
|
* position, rather than the position WINDOW_SIZE bytes prior.
|
||||||
|
*
|
||||||
|
* Underflow is detected and replaced with signed saturation. This ensures that
|
||||||
|
* once the sliding window has passed over a position, that position forever
|
||||||
|
* remains out of bounds.
|
||||||
|
*
|
||||||
|
* The array passed in must contain all matchfinder data that is
|
||||||
|
* position-relative. Concretely, this will include the hash table as well as
|
||||||
|
* the table of positions that is used to link together the sequences in each
|
||||||
|
* hash bucket. Note that in the latter table, the links are 1-ary in the case
|
||||||
|
* of "hash chains", and 2-ary in the case of "binary trees". In either case,
|
||||||
|
* the links need to be rebased in the same way.
|
||||||
|
*
|
||||||
|
* 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
|
||||||
|
* 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
|
||||||
|
*/
|
||||||
|
#ifndef matchfinder_rebase
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_rebase(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
size_t num_entries = size / sizeof(*data);
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if (MATCHFINDER_WINDOW_SIZE == 32768 && sizeof(mf_pos_t) == 2) {
|
||||||
|
/* Branchless version for 32768 byte windows. If the value was
|
||||||
|
* already negative, clear all bits except the sign bit; this
|
||||||
|
* changes the value to -32768. Otherwise, set the sign bit;
|
||||||
|
* this is equivalent to subtracting 32768. */
|
||||||
|
for (i = 0; i < num_entries; i++) {
|
||||||
|
u16 v = data[i];
|
||||||
|
u16 sign_bit = v & 0x8000;
|
||||||
|
v &= sign_bit - ((sign_bit >> 15) ^ 1);
|
||||||
|
v |= 0x8000;
|
||||||
|
data[i] = v;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < num_entries; i++) {
|
||||||
|
if (data[i] >= 0)
|
||||||
|
data[i] -= (mf_pos_t)MATCHFINDER_WINDOW_SIZE;
|
||||||
|
else
|
||||||
|
data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The hash function: given a sequence prefix held in the low-order bits of a
|
||||||
|
* 32-bit value, multiply by a carefully-chosen large constant. Discard any
|
||||||
|
* bits of the product that don't fit in a 32-bit value, but take the
|
||||||
|
* next-highest @num_bits bits of the product as the hash value, as those have
|
||||||
|
* the most randomness.
|
||||||
|
*/
|
||||||
|
static forceinline u32
|
||||||
|
lz_hash(u32 seq, unsigned num_bits)
|
||||||
|
{
|
||||||
|
return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the number of bytes at @matchptr that match the bytes at @strptr, up
|
||||||
|
* to a maximum of @max_len. Initially, @start_len bytes are matched.
|
||||||
|
*/
|
||||||
|
static forceinline unsigned
|
||||||
|
lz_extend(const u8 * const strptr, const u8 * const matchptr,
|
||||||
|
const unsigned start_len, const unsigned max_len)
|
||||||
|
{
|
||||||
|
unsigned len = start_len;
|
||||||
|
machine_word_t v_word;
|
||||||
|
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||||
|
|
||||||
|
if (likely(max_len - len >= 4 * WORDBYTES)) {
|
||||||
|
|
||||||
|
#define COMPARE_WORD_STEP \
|
||||||
|
v_word = load_word_unaligned(&matchptr[len]) ^ \
|
||||||
|
load_word_unaligned(&strptr[len]); \
|
||||||
|
if (v_word != 0) \
|
||||||
|
goto word_differs; \
|
||||||
|
len += WORDBYTES; \
|
||||||
|
|
||||||
|
COMPARE_WORD_STEP
|
||||||
|
COMPARE_WORD_STEP
|
||||||
|
COMPARE_WORD_STEP
|
||||||
|
COMPARE_WORD_STEP
|
||||||
|
#undef COMPARE_WORD_STEP
|
||||||
|
}
|
||||||
|
|
||||||
|
while (len + WORDBYTES <= max_len) {
|
||||||
|
v_word = load_word_unaligned(&matchptr[len]) ^
|
||||||
|
load_word_unaligned(&strptr[len]);
|
||||||
|
if (v_word != 0)
|
||||||
|
goto word_differs;
|
||||||
|
len += WORDBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (len < max_len && matchptr[len] == strptr[len])
|
||||||
|
len++;
|
||||||
|
return len;
|
||||||
|
|
||||||
|
word_differs:
|
||||||
|
if (CPU_IS_LITTLE_ENDIAN())
|
||||||
|
len += (bsfw(v_word) >> 3);
|
||||||
|
else
|
||||||
|
len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* LIB_MATCHFINDER_COMMON_H */
|
||||||
228
lib/gdeflate/libdeflate/lib/unaligned.h
Normal file
228
lib/gdeflate/libdeflate/lib/unaligned.h
Normal file
|
|
@ -0,0 +1,228 @@
|
||||||
|
/*
|
||||||
|
* unaligned.h - inline functions for unaligned memory accesses
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_UNALIGNED_H
|
||||||
|
#define LIB_UNALIGNED_H
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
|
||||||
|
/***** Unaligned loads and stores without endianness conversion *****/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* memcpy() is portable, and it usually gets optimized appropriately by modern
|
||||||
|
* compilers. I.e., each memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled
|
||||||
|
* to a load or store instruction, not to an actual function call.
|
||||||
|
*
|
||||||
|
* We no longer use the "packed struct" approach, as that is nonstandard, has
|
||||||
|
* unclear semantics, and doesn't receive enough testing
|
||||||
|
* (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
|
||||||
|
*
|
||||||
|
* arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
|
||||||
|
* where memcpy() generates inefficient code
|
||||||
|
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
|
||||||
|
* consider that one case important enough to maintain different code for.
|
||||||
|
* If you run into it, please just use a newer version of gcc (or use clang).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define DEFINE_UNALIGNED_TYPE(type) \
|
||||||
|
static forceinline type \
|
||||||
|
load_##type##_unaligned(const void *p) \
|
||||||
|
{ \
|
||||||
|
type v; \
|
||||||
|
memcpy(&v, p, sizeof(v)); \
|
||||||
|
return v; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static forceinline void \
|
||||||
|
store_##type##_unaligned(type v, void *p) \
|
||||||
|
{ \
|
||||||
|
memcpy(p, &v, sizeof(v)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_UNALIGNED_TYPE(u16)
|
||||||
|
DEFINE_UNALIGNED_TYPE(u32)
|
||||||
|
DEFINE_UNALIGNED_TYPE(u64)
|
||||||
|
DEFINE_UNALIGNED_TYPE(machine_word_t)
|
||||||
|
|
||||||
|
#define load_word_unaligned load_machine_word_t_unaligned
|
||||||
|
#define store_word_unaligned store_machine_word_t_unaligned
|
||||||
|
|
||||||
|
/***** Unaligned loads with endianness conversion *****/
|
||||||
|
|
||||||
|
static forceinline u16
|
||||||
|
get_unaligned_le16(const u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||||||
|
return le16_bswap(load_u16_unaligned(p));
|
||||||
|
else
|
||||||
|
return ((u16)p[1] << 8) | p[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline u16
|
||||||
|
get_unaligned_be16(const u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||||||
|
return be16_bswap(load_u16_unaligned(p));
|
||||||
|
else
|
||||||
|
return ((u16)p[0] << 8) | p[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline u32
|
||||||
|
get_unaligned_le32(const u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||||||
|
return le32_bswap(load_u32_unaligned(p));
|
||||||
|
else
|
||||||
|
return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
|
||||||
|
((u32)p[1] << 8) | p[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline u32
|
||||||
|
get_unaligned_be32(const u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||||||
|
return be32_bswap(load_u32_unaligned(p));
|
||||||
|
else
|
||||||
|
return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
|
||||||
|
((u32)p[2] << 8) | p[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline u64
|
||||||
|
get_unaligned_le64(const u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST)
|
||||||
|
return le64_bswap(load_u64_unaligned(p));
|
||||||
|
else
|
||||||
|
return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
|
||||||
|
((u64)p[5] << 40) | ((u64)p[4] << 32) |
|
||||||
|
((u64)p[3] << 24) | ((u64)p[2] << 16) |
|
||||||
|
((u64)p[1] << 8) | p[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline machine_word_t
|
||||||
|
get_unaligned_leword(const u8 *p)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||||
|
if (WORDBITS == 32)
|
||||||
|
return get_unaligned_le32(p);
|
||||||
|
else
|
||||||
|
return get_unaligned_le64(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/***** Unaligned stores with endianness conversion *****/
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
put_unaligned_le16(u16 v, u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||||
|
store_u16_unaligned(le16_bswap(v), p);
|
||||||
|
} else {
|
||||||
|
p[0] = (u8)(v >> 0);
|
||||||
|
p[1] = (u8)(v >> 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
put_unaligned_be16(u16 v, u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||||
|
store_u16_unaligned(be16_bswap(v), p);
|
||||||
|
} else {
|
||||||
|
p[0] = (u8)(v >> 8);
|
||||||
|
p[1] = (u8)(v >> 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
put_unaligned_le32(u32 v, u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||||
|
store_u32_unaligned(le32_bswap(v), p);
|
||||||
|
} else {
|
||||||
|
p[0] = (u8)(v >> 0);
|
||||||
|
p[1] = (u8)(v >> 8);
|
||||||
|
p[2] = (u8)(v >> 16);
|
||||||
|
p[3] = (u8)(v >> 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
put_unaligned_be32(u32 v, u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||||
|
store_u32_unaligned(be32_bswap(v), p);
|
||||||
|
} else {
|
||||||
|
p[0] = (u8)(v >> 24);
|
||||||
|
p[1] = (u8)(v >> 16);
|
||||||
|
p[2] = (u8)(v >> 8);
|
||||||
|
p[3] = (u8)(v >> 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
put_unaligned_le64(u64 v, u8 *p)
|
||||||
|
{
|
||||||
|
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||||
|
store_u64_unaligned(le64_bswap(v), p);
|
||||||
|
} else {
|
||||||
|
p[0] = (u8)(v >> 0);
|
||||||
|
p[1] = (u8)(v >> 8);
|
||||||
|
p[2] = (u8)(v >> 16);
|
||||||
|
p[3] = (u8)(v >> 24);
|
||||||
|
p[4] = (u8)(v >> 32);
|
||||||
|
p[5] = (u8)(v >> 40);
|
||||||
|
p[6] = (u8)(v >> 48);
|
||||||
|
p[7] = (u8)(v >> 56);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
put_unaligned_leword(machine_word_t v, u8 *p)
|
||||||
|
{
|
||||||
|
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||||
|
if (WORDBITS == 32)
|
||||||
|
put_unaligned_le32(v, p);
|
||||||
|
else
|
||||||
|
put_unaligned_le64(v, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/***** 24-bit loads *****/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a 32-bit value that was loaded with the platform's native endianness,
|
||||||
|
* return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
|
||||||
|
* bits contain the first 3 bytes, arranged in octets in a platform-dependent
|
||||||
|
* order, at the memory location from which the input 32-bit value was loaded.
|
||||||
|
*/
|
||||||
|
static forceinline u32
|
||||||
|
loaded_u32_to_u24(u32 v)
|
||||||
|
{
|
||||||
|
if (CPU_IS_LITTLE_ENDIAN())
|
||||||
|
return v & 0xFFFFFF;
|
||||||
|
else
|
||||||
|
return v >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Load the next 3 bytes from the memory location @p into the 24 low-order bits
|
||||||
|
* of a 32-bit value. The order in which the 3 bytes will be arranged as octets
|
||||||
|
* in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES
|
||||||
|
* bytes must be available at @p; note that this may be more than 3.
|
||||||
|
*/
|
||||||
|
static forceinline u32
|
||||||
|
load_u24_unaligned(const u8 *p)
|
||||||
|
{
|
||||||
|
#if UNALIGNED_ACCESS_IS_FAST
|
||||||
|
# define LOAD_U24_REQUIRED_NBYTES 4
|
||||||
|
return loaded_u32_to_u24(load_u32_unaligned(p));
|
||||||
|
#else
|
||||||
|
# define LOAD_U24_REQUIRED_NBYTES 3
|
||||||
|
if (CPU_IS_LITTLE_ENDIAN())
|
||||||
|
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
||||||
|
else
|
||||||
|
return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* LIB_UNALIGNED_H */
|
||||||
142
lib/gdeflate/libdeflate/lib/utils.c
Normal file
142
lib/gdeflate/libdeflate/lib/utils.c
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
/*
|
||||||
|
* utils.c - utility functions for libdeflate
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "lib_common.h"
|
||||||
|
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
#ifdef FREESTANDING
|
||||||
|
# define malloc NULL
|
||||||
|
# define free NULL
|
||||||
|
#else
|
||||||
|
# include <stdlib.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void *(*libdeflate_malloc_func)(size_t) = malloc;
|
||||||
|
static void (*libdeflate_free_func)(void *) = free;
|
||||||
|
|
||||||
|
void *
|
||||||
|
libdeflate_malloc(size_t size)
|
||||||
|
{
|
||||||
|
return (*libdeflate_malloc_func)(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
libdeflate_free(void *ptr)
|
||||||
|
{
|
||||||
|
(*libdeflate_free_func)(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
libdeflate_aligned_malloc(size_t alignment, size_t size)
|
||||||
|
{
|
||||||
|
void *ptr = libdeflate_malloc(sizeof(void *) + alignment - 1 + size);
|
||||||
|
if (ptr) {
|
||||||
|
void *orig_ptr = ptr;
|
||||||
|
ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
|
||||||
|
((void **)ptr)[-1] = orig_ptr;
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
libdeflate_aligned_free(void *ptr)
|
||||||
|
{
|
||||||
|
if (ptr)
|
||||||
|
libdeflate_free(((void **)ptr)[-1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
|
||||||
|
void (*free_func)(void *))
|
||||||
|
{
|
||||||
|
libdeflate_malloc_func = malloc_func;
|
||||||
|
libdeflate_free_func = free_func;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Implementations of libc functions for freestanding library builds.
|
||||||
|
* Normal library builds don't use these. Not optimized yet; usually the
|
||||||
|
* compiler expands these functions and doesn't actually call them anyway.
|
||||||
|
*/
|
||||||
|
#ifdef FREESTANDING
|
||||||
|
#undef memset
|
||||||
|
void * __attribute__((weak))
|
||||||
|
memset(void *s, int c, size_t n)
|
||||||
|
{
|
||||||
|
u8 *p = s;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
p[i] = c;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef memcpy
|
||||||
|
void * __attribute__((weak))
|
||||||
|
memcpy(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
u8 *d = dest;
|
||||||
|
const u8 *s = src;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
d[i] = s[i];
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef memmove
|
||||||
|
void * __attribute__((weak))
|
||||||
|
memmove(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
u8 *d = dest;
|
||||||
|
const u8 *s = src;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if (d <= s)
|
||||||
|
return memcpy(d, s, n);
|
||||||
|
|
||||||
|
for (i = n; i > 0; i--)
|
||||||
|
d[i - 1] = s[i - 1];
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef memcmp
|
||||||
|
int __attribute__((weak))
|
||||||
|
memcmp(const void *s1, const void *s2, size_t n)
|
||||||
|
{
|
||||||
|
const u8 *p1 = s1;
|
||||||
|
const u8 *p2 = s2;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
if (p1[i] != p2[i])
|
||||||
|
return (int)p1[i] - (int)p2[i];
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* FREESTANDING */
|
||||||
337
lib/gdeflate/libdeflate/lib/x86/adler32_impl.h
Normal file
337
lib/gdeflate/libdeflate/lib/x86/adler32_impl.h
Normal file
|
|
@ -0,0 +1,337 @@
|
||||||
|
/*
|
||||||
|
* x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_X86_ADLER32_IMPL_H
|
||||||
|
#define LIB_X86_ADLER32_IMPL_H
|
||||||
|
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following macros horizontally sum the s1 counters and add them to the
|
||||||
|
* real s1, and likewise for s2. They do this via a series of reductions, each
|
||||||
|
* of which halves the vector length, until just one counter remains.
|
||||||
|
*
|
||||||
|
* The s1 reductions don't depend on the s2 reductions and vice versa, so for
|
||||||
|
* efficiency they are interleaved. Also, every other s1 counter is 0 due to
|
||||||
|
* the 'psadbw' instruction (_mm_sad_epu8) summing groups of 8 bytes rather than
|
||||||
|
* 4; hence, one of the s1 reductions is skipped when going from 128 => 32 bits.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2) \
|
||||||
|
{ \
|
||||||
|
__v4su s1_last = (v_s1), s2_last = (v_s2); \
|
||||||
|
\
|
||||||
|
/* 128 => 32 bits */ \
|
||||||
|
s2_last += (__v4su)_mm_shuffle_epi32((__m128i)s2_last, 0x31); \
|
||||||
|
s1_last += (__v4su)_mm_shuffle_epi32((__m128i)s1_last, 0x02); \
|
||||||
|
s2_last += (__v4su)_mm_shuffle_epi32((__m128i)s2_last, 0x02); \
|
||||||
|
\
|
||||||
|
*(s1) += (u32)_mm_cvtsi128_si32((__m128i)s1_last); \
|
||||||
|
*(s2) += (u32)_mm_cvtsi128_si32((__m128i)s2_last); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2) \
|
||||||
|
{ \
|
||||||
|
__v4su s1_128bit, s2_128bit; \
|
||||||
|
\
|
||||||
|
/* 256 => 128 bits */ \
|
||||||
|
s1_128bit = (__v4su)_mm256_extracti128_si256((__m256i)(v_s1), 0) + \
|
||||||
|
(__v4su)_mm256_extracti128_si256((__m256i)(v_s1), 1); \
|
||||||
|
s2_128bit = (__v4su)_mm256_extracti128_si256((__m256i)(v_s2), 0) + \
|
||||||
|
(__v4su)_mm256_extracti128_si256((__m256i)(v_s2), 1); \
|
||||||
|
\
|
||||||
|
ADLER32_FINISH_VEC_CHUNK_128((s1), (s2), s1_128bit, s2_128bit); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ADLER32_FINISH_VEC_CHUNK_512(s1, s2, v_s1, v_s2) \
|
||||||
|
{ \
|
||||||
|
__v8su s1_256bit, s2_256bit; \
|
||||||
|
\
|
||||||
|
/* 512 => 256 bits */ \
|
||||||
|
s1_256bit = (__v8su)_mm512_extracti64x4_epi64((__m512i)(v_s1), 0) + \
|
||||||
|
(__v8su)_mm512_extracti64x4_epi64((__m512i)(v_s1), 1); \
|
||||||
|
s2_256bit = (__v8su)_mm512_extracti64x4_epi64((__m512i)(v_s2), 0) + \
|
||||||
|
(__v8su)_mm512_extracti64x4_epi64((__m512i)(v_s2), 1); \
|
||||||
|
\
|
||||||
|
ADLER32_FINISH_VEC_CHUNK_256((s1), (s2), s1_256bit, s2_256bit); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* AVX-512BW implementation: like the AVX2 one, but does 64 bytes at a time */
|
||||||
|
#undef DISPATCH_AVX512BW
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
/*
|
||||||
|
* clang before v3.9 is missing some AVX-512BW intrinsics including
|
||||||
|
* _mm512_sad_epu8(), a.k.a. __builtin_ia32_psadbw512. So just make using
|
||||||
|
* AVX-512BW, even when __AVX512BW__ is defined, conditional on
|
||||||
|
* COMPILER_SUPPORTS_AVX512BW_TARGET where we check for that builtin.
|
||||||
|
*/ \
|
||||||
|
COMPILER_SUPPORTS_AVX512BW_TARGET && \
|
||||||
|
(defined(__AVX512BW__) || (X86_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS))
|
||||||
|
# define FUNCNAME adler32_avx512bw
|
||||||
|
# define FUNCNAME_CHUNK adler32_avx512bw_chunk
|
||||||
|
# define IMPL_ALIGNMENT 64
|
||||||
|
# define IMPL_SEGMENT_SIZE 64
|
||||||
|
# define IMPL_MAX_CHUNK_SIZE MAX_CHUNK_SIZE
|
||||||
|
# ifdef __AVX512BW__
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL adler32_avx512bw
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("avx512bw")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_AVX512BW 1
|
||||||
|
# endif
|
||||||
|
# include <immintrin.h>
|
||||||
|
static forceinline ATTRIBUTES void
|
||||||
|
adler32_avx512bw_chunk(const __m512i *p, const __m512i *const end,
|
||||||
|
u32 *s1, u32 *s2)
|
||||||
|
{
|
||||||
|
const __m512i zeroes = _mm512_setzero_si512();
|
||||||
|
const __v64qi multipliers = (__v64qi){
|
||||||
|
64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
|
||||||
|
48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
|
||||||
|
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
|
||||||
|
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||||
|
};
|
||||||
|
const __v32hi ones = (__v32hi)_mm512_set1_epi16(1);
|
||||||
|
__v16si v_s1 = (__v16si)zeroes;
|
||||||
|
__v16si v_s1_sums = (__v16si)zeroes;
|
||||||
|
__v16si v_s2 = (__v16si)zeroes;
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* Load the next 64-byte segment */
|
||||||
|
__m512i bytes = *p++;
|
||||||
|
|
||||||
|
/* Multiply the bytes by 64...1 (the number of times they need
|
||||||
|
* to be added to s2) and add adjacent products */
|
||||||
|
__v32hi sums = (__v32hi)_mm512_maddubs_epi16(
|
||||||
|
bytes, (__m512i)multipliers);
|
||||||
|
|
||||||
|
/* Keep sum of all previous s1 counters, for adding to s2 later.
|
||||||
|
* This allows delaying the multiplication by 64 to the end. */
|
||||||
|
v_s1_sums += v_s1;
|
||||||
|
|
||||||
|
/* Add the sum of each group of 8 bytes to the corresponding s1
|
||||||
|
* counter */
|
||||||
|
v_s1 += (__v16si)_mm512_sad_epu8(bytes, zeroes);
|
||||||
|
|
||||||
|
/* Add the sum of each group of 4 products of the bytes by
|
||||||
|
* 64...1 to the corresponding s2 counter */
|
||||||
|
v_s2 += (__v16si)_mm512_madd_epi16((__m512i)sums,
|
||||||
|
(__m512i)ones);
|
||||||
|
} while (p != end);
|
||||||
|
|
||||||
|
/* Finish the s2 counters by adding the sum of the s1 values at the
|
||||||
|
* beginning of each segment, multiplied by the segment size (64) */
|
||||||
|
v_s2 += (__v16si)_mm512_slli_epi32((__m512i)v_s1_sums, 6);
|
||||||
|
|
||||||
|
/* Add the counters to the real s1 and s2 */
|
||||||
|
ADLER32_FINISH_VEC_CHUNK_512(s1, s2, v_s1, v_s2);
|
||||||
|
}
|
||||||
|
# include "../adler32_vec_template.h"
|
||||||
|
#endif /* AVX-512BW implementation */
|
||||||
|
|
||||||
|
/* AVX2 implementation: like the AVX-512BW one, but does 32 bytes at a time */
|
||||||
|
#undef DISPATCH_AVX2
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__AVX2__) || (X86_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS))
|
||||||
|
# define FUNCNAME adler32_avx2
|
||||||
|
# define FUNCNAME_CHUNK adler32_avx2_chunk
|
||||||
|
# define IMPL_ALIGNMENT 32
|
||||||
|
# define IMPL_SEGMENT_SIZE 32
|
||||||
|
# define IMPL_MAX_CHUNK_SIZE MAX_CHUNK_SIZE
|
||||||
|
# ifdef __AVX2__
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL adler32_avx2
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("avx2")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_AVX2 1
|
||||||
|
# endif
|
||||||
|
# include <immintrin.h>
|
||||||
|
static forceinline ATTRIBUTES void
|
||||||
|
adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2)
|
||||||
|
{
|
||||||
|
const __m256i zeroes = _mm256_setzero_si256();
|
||||||
|
const __v32qu multipliers = (__v32qu){
|
||||||
|
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
|
||||||
|
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||||
|
};
|
||||||
|
const __v16hu ones = (__v16hu)_mm256_set1_epi16(1);
|
||||||
|
__v8su v_s1 = (__v8su)zeroes;
|
||||||
|
__v8su v_s1_sums = (__v8su)zeroes;
|
||||||
|
__v8su v_s2 = (__v8su)zeroes;
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* Load the next 32-byte segment */
|
||||||
|
__m256i bytes = *p++;
|
||||||
|
|
||||||
|
/* Multiply the bytes by 32...1 (the number of times they need
|
||||||
|
* to be added to s2) and add adjacent products */
|
||||||
|
__v16hu sums = (__v16hu)_mm256_maddubs_epi16(
|
||||||
|
bytes, (__m256i)multipliers);
|
||||||
|
|
||||||
|
/* Keep sum of all previous s1 counters, for adding to s2 later.
|
||||||
|
* This allows delaying the multiplication by 32 to the end. */
|
||||||
|
v_s1_sums += v_s1;
|
||||||
|
|
||||||
|
/* Add the sum of each group of 8 bytes to the corresponding s1
|
||||||
|
* counter */
|
||||||
|
v_s1 += (__v8su)_mm256_sad_epu8(bytes, zeroes);
|
||||||
|
|
||||||
|
/* Add the sum of each group of 4 products of the bytes by
|
||||||
|
* 32...1 to the corresponding s2 counter */
|
||||||
|
v_s2 += (__v8su)_mm256_madd_epi16((__m256i)sums, (__m256i)ones);
|
||||||
|
} while (p != end);
|
||||||
|
|
||||||
|
/* Finish the s2 counters by adding the sum of the s1 values at the
|
||||||
|
* beginning of each segment, multiplied by the segment size (32) */
|
||||||
|
v_s2 += (__v8su)_mm256_slli_epi32((__m256i)v_s1_sums, 5);
|
||||||
|
|
||||||
|
/* Add the counters to the real s1 and s2 */
|
||||||
|
ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2);
|
||||||
|
}
|
||||||
|
# include "../adler32_vec_template.h"
|
||||||
|
#endif /* AVX2 implementation */
|
||||||
|
|
||||||
|
/* SSE2 implementation */
|
||||||
|
#undef DISPATCH_SSE2
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__SSE2__) || (X86_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS))
|
||||||
|
# define FUNCNAME adler32_sse2
|
||||||
|
# define FUNCNAME_CHUNK adler32_sse2_chunk
|
||||||
|
# define IMPL_ALIGNMENT 16
|
||||||
|
# define IMPL_SEGMENT_SIZE 32
|
||||||
|
/*
|
||||||
|
* The 16-bit precision byte counters must not be allowed to undergo *signed*
|
||||||
|
* overflow, otherwise the signed multiplications at the end (_mm_madd_epi16)
|
||||||
|
* would behave incorrectly.
|
||||||
|
*/
|
||||||
|
# define IMPL_MAX_CHUNK_SIZE (32 * (0x7FFF / 0xFF))
|
||||||
|
# ifdef __SSE2__
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL adler32_sse2
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("sse2")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_SSE2 1
|
||||||
|
# endif
|
||||||
|
# include <emmintrin.h>
|
||||||
|
static forceinline ATTRIBUTES void
|
||||||
|
adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2)
|
||||||
|
{
|
||||||
|
const __m128i zeroes = _mm_setzero_si128();
|
||||||
|
|
||||||
|
/* s1 counters: 32-bit, sum of bytes */
|
||||||
|
__v4su v_s1 = (__v4su)zeroes;
|
||||||
|
|
||||||
|
/* s2 counters: 32-bit, sum of s1 values */
|
||||||
|
__v4su v_s2 = (__v4su)zeroes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Thirty-two 16-bit counters for byte sums. Each accumulates the bytes
|
||||||
|
* that eventually need to be multiplied by a number 32...1 for addition
|
||||||
|
* into s2.
|
||||||
|
*/
|
||||||
|
__v8hu v_byte_sums_a = (__v8hu)zeroes;
|
||||||
|
__v8hu v_byte_sums_b = (__v8hu)zeroes;
|
||||||
|
__v8hu v_byte_sums_c = (__v8hu)zeroes;
|
||||||
|
__v8hu v_byte_sums_d = (__v8hu)zeroes;
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* Load the next 32 bytes */
|
||||||
|
const __m128i bytes1 = *p++;
|
||||||
|
const __m128i bytes2 = *p++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Accumulate the previous s1 counters into the s2 counters.
|
||||||
|
* Logically, this really should be v_s2 += v_s1 * 32, but we
|
||||||
|
* can do the multiplication (or left shift) later.
|
||||||
|
*/
|
||||||
|
v_s2 += v_s1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* s1 update: use "Packed Sum of Absolute Differences" to add
|
||||||
|
* the bytes horizontally with 8 bytes per sum. Then add the
|
||||||
|
* sums to the s1 counters.
|
||||||
|
*/
|
||||||
|
v_s1 += (__v4su)_mm_sad_epu8(bytes1, zeroes);
|
||||||
|
v_s1 += (__v4su)_mm_sad_epu8(bytes2, zeroes);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Also accumulate the bytes into 32 separate counters that have
|
||||||
|
* 16-bit precision.
|
||||||
|
*/
|
||||||
|
v_byte_sums_a += (__v8hu)_mm_unpacklo_epi8(bytes1, zeroes);
|
||||||
|
v_byte_sums_b += (__v8hu)_mm_unpackhi_epi8(bytes1, zeroes);
|
||||||
|
v_byte_sums_c += (__v8hu)_mm_unpacklo_epi8(bytes2, zeroes);
|
||||||
|
v_byte_sums_d += (__v8hu)_mm_unpackhi_epi8(bytes2, zeroes);
|
||||||
|
|
||||||
|
} while (p != end);
|
||||||
|
|
||||||
|
/* Finish calculating the s2 counters */
|
||||||
|
v_s2 = (__v4su)_mm_slli_epi32((__m128i)v_s2, 5);
|
||||||
|
v_s2 += (__v4su)_mm_madd_epi16((__m128i)v_byte_sums_a,
|
||||||
|
(__m128i)(__v8hu){ 32, 31, 30, 29, 28, 27, 26, 25 });
|
||||||
|
v_s2 += (__v4su)_mm_madd_epi16((__m128i)v_byte_sums_b,
|
||||||
|
(__m128i)(__v8hu){ 24, 23, 22, 21, 20, 19, 18, 17 });
|
||||||
|
v_s2 += (__v4su)_mm_madd_epi16((__m128i)v_byte_sums_c,
|
||||||
|
(__m128i)(__v8hu){ 16, 15, 14, 13, 12, 11, 10, 9 });
|
||||||
|
v_s2 += (__v4su)_mm_madd_epi16((__m128i)v_byte_sums_d,
|
||||||
|
(__m128i)(__v8hu){ 8, 7, 6, 5, 4, 3, 2, 1 });
|
||||||
|
|
||||||
|
/* Add the counters to the real s1 and s2 */
|
||||||
|
ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2);
|
||||||
|
}
|
||||||
|
# include "../adler32_vec_template.h"
|
||||||
|
#endif /* SSE2 implementation */
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static inline adler32_func_t
|
||||||
|
arch_select_adler32_func(void)
|
||||||
|
{
|
||||||
|
u32 features = get_cpu_features();
|
||||||
|
|
||||||
|
#ifdef DISPATCH_AVX512BW
|
||||||
|
if (features & X86_CPU_FEATURE_AVX512BW)
|
||||||
|
return adler32_avx512bw;
|
||||||
|
#endif
|
||||||
|
#ifdef DISPATCH_AVX2
|
||||||
|
if (features & X86_CPU_FEATURE_AVX2)
|
||||||
|
return adler32_avx2;
|
||||||
|
#endif
|
||||||
|
#ifdef DISPATCH_SSE2
|
||||||
|
if (features & X86_CPU_FEATURE_SSE2)
|
||||||
|
return adler32_sse2;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* DISPATCH */
|
||||||
|
|
||||||
|
#endif /* LIB_X86_ADLER32_IMPL_H */
|
||||||
152
lib/gdeflate/libdeflate/lib/x86/cpu_features.c
Normal file
152
lib/gdeflate/libdeflate/lib/x86/cpu_features.c
Normal file
|
|
@ -0,0 +1,152 @@
|
||||||
|
/*
|
||||||
|
* x86/cpu_features.c - feature detection for x86 processors
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "../cpu_features_common.h" /* must be included first */
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
#if X86_CPU_FEATURES_ENABLED
|
||||||
|
|
||||||
|
volatile u32 _cpu_features = 0;
|
||||||
|
|
||||||
|
/* With old GCC versions we have to manually save and restore the x86_32 PIC
|
||||||
|
* register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */
|
||||||
|
#if defined(__i386__) && defined(__PIC__)
|
||||||
|
# define EBX_CONSTRAINT "=&r"
|
||||||
|
#else
|
||||||
|
# define EBX_CONSTRAINT "=b"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Execute the CPUID instruction. */
|
||||||
|
static inline void
|
||||||
|
cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
|
||||||
|
{
|
||||||
|
__asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
|
||||||
|
"cpuid \n"
|
||||||
|
".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
|
||||||
|
: "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
|
||||||
|
: "a" (leaf), "c" (subleaf));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read an extended control register. */
|
||||||
|
static inline u64
|
||||||
|
read_xcr(u32 index)
|
||||||
|
{
|
||||||
|
u32 edx, eax;
|
||||||
|
|
||||||
|
/* Execute the "xgetbv" instruction. Old versions of binutils do not
|
||||||
|
* recognize this instruction, so list the raw bytes instead. */
|
||||||
|
__asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index));
|
||||||
|
|
||||||
|
return ((u64)edx << 32) | eax;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef BIT
|
||||||
|
#define BIT(nr) (1UL << (nr))
|
||||||
|
|
||||||
|
#define XCR0_BIT_SSE BIT(1)
|
||||||
|
#define XCR0_BIT_AVX BIT(2)
|
||||||
|
#define XCR0_BIT_OPMASK BIT(5)
|
||||||
|
#define XCR0_BIT_ZMM_HI256 BIT(6)
|
||||||
|
#define XCR0_BIT_HI16_ZMM BIT(7)
|
||||||
|
|
||||||
|
#define IS_SET(reg, nr) ((reg) & BIT(nr))
|
||||||
|
#define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask))
|
||||||
|
|
||||||
|
static const struct cpu_feature x86_cpu_feature_table[] = {
|
||||||
|
{X86_CPU_FEATURE_SSE2, "sse2"},
|
||||||
|
{X86_CPU_FEATURE_PCLMUL, "pclmul"},
|
||||||
|
{X86_CPU_FEATURE_AVX, "avx"},
|
||||||
|
{X86_CPU_FEATURE_AVX2, "avx2"},
|
||||||
|
{X86_CPU_FEATURE_BMI2, "bmi2"},
|
||||||
|
{X86_CPU_FEATURE_AVX512BW, "avx512bw"},
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Initialize _cpu_features with bits for interesting processor features. */
|
||||||
|
void setup_cpu_features(void)
|
||||||
|
{
|
||||||
|
u32 features = 0;
|
||||||
|
u32 dummy1, dummy2, dummy3, dummy4;
|
||||||
|
u32 max_function;
|
||||||
|
u32 features_1, features_2, features_3, features_4;
|
||||||
|
bool os_avx_support = false;
|
||||||
|
bool os_avx512_support = false;
|
||||||
|
|
||||||
|
/* Get maximum supported function */
|
||||||
|
cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
|
||||||
|
if (max_function < 1)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Standard feature flags */
|
||||||
|
cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
|
||||||
|
|
||||||
|
if (IS_SET(features_1, 26))
|
||||||
|
features |= X86_CPU_FEATURE_SSE2;
|
||||||
|
|
||||||
|
if (IS_SET(features_2, 1))
|
||||||
|
features |= X86_CPU_FEATURE_PCLMUL;
|
||||||
|
|
||||||
|
if (IS_SET(features_2, 27)) { /* OSXSAVE set? */
|
||||||
|
u64 xcr0 = read_xcr(0);
|
||||||
|
|
||||||
|
os_avx_support = IS_ALL_SET(xcr0,
|
||||||
|
XCR0_BIT_SSE |
|
||||||
|
XCR0_BIT_AVX);
|
||||||
|
|
||||||
|
os_avx512_support = IS_ALL_SET(xcr0,
|
||||||
|
XCR0_BIT_SSE |
|
||||||
|
XCR0_BIT_AVX |
|
||||||
|
XCR0_BIT_OPMASK |
|
||||||
|
XCR0_BIT_ZMM_HI256 |
|
||||||
|
XCR0_BIT_HI16_ZMM);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (os_avx_support && IS_SET(features_2, 28))
|
||||||
|
features |= X86_CPU_FEATURE_AVX;
|
||||||
|
|
||||||
|
if (max_function < 7)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Extended feature flags */
|
||||||
|
cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);
|
||||||
|
|
||||||
|
if (os_avx_support && IS_SET(features_3, 5))
|
||||||
|
features |= X86_CPU_FEATURE_AVX2;
|
||||||
|
|
||||||
|
if (IS_SET(features_3, 8))
|
||||||
|
features |= X86_CPU_FEATURE_BMI2;
|
||||||
|
|
||||||
|
if (os_avx512_support && IS_SET(features_3, 30))
|
||||||
|
features |= X86_CPU_FEATURE_AVX512BW;
|
||||||
|
|
||||||
|
out:
|
||||||
|
disable_cpu_features_for_testing(&features, x86_cpu_feature_table,
|
||||||
|
ARRAY_LEN(x86_cpu_feature_table));
|
||||||
|
|
||||||
|
_cpu_features = features | X86_CPU_FEATURES_KNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* X86_CPU_FEATURES_ENABLED */
|
||||||
41
lib/gdeflate/libdeflate/lib/x86/cpu_features.h
Normal file
41
lib/gdeflate/libdeflate/lib/x86/cpu_features.h
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* x86/cpu_features.h - feature detection for x86 processors
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_X86_CPU_FEATURES_H
|
||||||
|
#define LIB_X86_CPU_FEATURES_H
|
||||||
|
|
||||||
|
#include "../lib_common.h"
|
||||||
|
|
||||||
|
#if (defined(__i386__) || defined(__x86_64__)) && \
|
||||||
|
COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
|
||||||
|
# define X86_CPU_FEATURES_ENABLED 1
|
||||||
|
#else
|
||||||
|
# define X86_CPU_FEATURES_ENABLED 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if X86_CPU_FEATURES_ENABLED
|
||||||
|
|
||||||
|
#define X86_CPU_FEATURE_SSE2 0x00000001
|
||||||
|
#define X86_CPU_FEATURE_PCLMUL 0x00000002
|
||||||
|
#define X86_CPU_FEATURE_AVX 0x00000004
|
||||||
|
#define X86_CPU_FEATURE_AVX2 0x00000008
|
||||||
|
#define X86_CPU_FEATURE_BMI2 0x00000010
|
||||||
|
#define X86_CPU_FEATURE_AVX512BW 0x00000020
|
||||||
|
|
||||||
|
#define X86_CPU_FEATURES_KNOWN 0x80000000
|
||||||
|
|
||||||
|
extern volatile u32 _cpu_features;
|
||||||
|
|
||||||
|
void setup_cpu_features(void);
|
||||||
|
|
||||||
|
static inline u32 get_cpu_features(void)
|
||||||
|
{
|
||||||
|
if (_cpu_features == 0)
|
||||||
|
setup_cpu_features();
|
||||||
|
return _cpu_features;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* X86_CPU_FEATURES_ENABLED */
|
||||||
|
|
||||||
|
#endif /* LIB_X86_CPU_FEATURES_H */
|
||||||
92
lib/gdeflate/libdeflate/lib/x86/crc32_impl.h
Normal file
92
lib/gdeflate/libdeflate/lib/x86/crc32_impl.h
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
/*
|
||||||
|
* x86/crc32_impl.h - x86 implementations of CRC-32 checksum algorithm
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_X86_CRC32_IMPL_H
|
||||||
|
#define LIB_X86_CRC32_IMPL_H
|
||||||
|
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Include the PCLMUL/AVX implementation? Although our PCLMUL-optimized CRC-32
|
||||||
|
* function doesn't use any AVX intrinsics specifically, it can benefit a lot
|
||||||
|
* from being compiled for an AVX target: on Skylake, ~16700 MB/s vs. ~10100
|
||||||
|
* MB/s. I expect this is related to the PCLMULQDQ instructions being assembled
|
||||||
|
* in the newer three-operand form rather than the older two-operand form.
|
||||||
|
*
|
||||||
|
* Note: this is only needed if __AVX__ is *not* defined, since otherwise the
|
||||||
|
* "regular" PCLMUL implementation would already be AVX enabled.
|
||||||
|
*/
|
||||||
|
#undef DISPATCH_PCLMUL_AVX
|
||||||
|
#if !defined(DEFAULT_IMPL) && !defined(__AVX__) && \
|
||||||
|
X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX_TARGET && \
|
||||||
|
(defined(__PCLMUL__) || COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS)
|
||||||
|
# define FUNCNAME crc32_pclmul_avx
|
||||||
|
# define FUNCNAME_ALIGNED crc32_pclmul_avx_aligned
|
||||||
|
# define ATTRIBUTES __attribute__((target("pclmul,avx")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_PCLMUL_AVX 1
|
||||||
|
# include "crc32_pclmul_template.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* PCLMUL implementation */
|
||||||
|
#undef DISPATCH_PCLMUL
|
||||||
|
#if !defined(DEFAULT_IMPL) && \
|
||||||
|
(defined(__PCLMUL__) || (X86_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS))
|
||||||
|
# define FUNCNAME crc32_pclmul
|
||||||
|
# define FUNCNAME_ALIGNED crc32_pclmul_aligned
|
||||||
|
# ifdef __PCLMUL__
|
||||||
|
# define ATTRIBUTES
|
||||||
|
# define DEFAULT_IMPL crc32_pclmul
|
||||||
|
# else
|
||||||
|
# define ATTRIBUTES __attribute__((target("pclmul")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_PCLMUL 1
|
||||||
|
# endif
|
||||||
|
# include "crc32_pclmul_template.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static inline crc32_func_t
|
||||||
|
arch_select_crc32_func(void)
|
||||||
|
{
|
||||||
|
u32 features = get_cpu_features();
|
||||||
|
|
||||||
|
#ifdef DISPATCH_PCLMUL_AVX
|
||||||
|
if ((features & X86_CPU_FEATURE_PCLMUL) &&
|
||||||
|
(features & X86_CPU_FEATURE_AVX))
|
||||||
|
return crc32_pclmul_avx;
|
||||||
|
#endif
|
||||||
|
#ifdef DISPATCH_PCLMUL
|
||||||
|
if (features & X86_CPU_FEATURE_PCLMUL)
|
||||||
|
return crc32_pclmul;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* DISPATCH */
|
||||||
|
|
||||||
|
#endif /* LIB_X86_CRC32_IMPL_H */
|
||||||
262
lib/gdeflate/libdeflate/lib/x86/crc32_pclmul_template.h
Normal file
262
lib/gdeflate/libdeflate/lib/x86/crc32_pclmul_template.h
Normal file
|
|
@ -0,0 +1,262 @@
|
||||||
|
/*
|
||||||
|
* x86/crc32_pclmul_template.h
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CRC-32 folding with PCLMULQDQ.
|
||||||
|
*
|
||||||
|
* The basic idea is to repeatedly "fold" each 512 bits into the next 512 bits,
|
||||||
|
* producing an abbreviated message which is congruent the original message
|
||||||
|
* modulo the generator polynomial G(x).
|
||||||
|
*
|
||||||
|
* Folding each 512 bits is implemented as eight 64-bit folds, each of which
|
||||||
|
* uses one carryless multiplication instruction. It's expected that CPUs may
|
||||||
|
* be able to execute some of these multiplications in parallel.
|
||||||
|
*
|
||||||
|
* Explanation of "folding": let A(x) be 64 bits from the message, and let B(x)
|
||||||
|
* be 95 bits from a constant distance D later in the message. The relevant
|
||||||
|
* portion of the message can be written as:
|
||||||
|
*
|
||||||
|
* M(x) = A(x)*x^D + B(x)
|
||||||
|
*
|
||||||
|
* ... where + and * represent addition and multiplication, respectively, of
|
||||||
|
* polynomials over GF(2). Note that when implemented on a computer, these
|
||||||
|
* operations are equivalent to XOR and carryless multiplication, respectively.
|
||||||
|
*
|
||||||
|
* For the purpose of CRC calculation, only the remainder modulo the generator
|
||||||
|
* polynomial G(x) matters:
|
||||||
|
*
|
||||||
|
* M(x) mod G(x) = (A(x)*x^D + B(x)) mod G(x)
|
||||||
|
*
|
||||||
|
* Since the modulo operation can be applied anywhere in a sequence of additions
|
||||||
|
* and multiplications without affecting the result, this is equivalent to:
|
||||||
|
*
|
||||||
|
* M(x) mod G(x) = (A(x)*(x^D mod G(x)) + B(x)) mod G(x)
|
||||||
|
*
|
||||||
|
* For any D, 'x^D mod G(x)' will be a polynomial with maximum degree 31, i.e.
|
||||||
|
* a 32-bit quantity. So 'A(x) * (x^D mod G(x))' is equivalent to a carryless
|
||||||
|
* multiplication of a 64-bit quantity by a 32-bit quantity, producing a 95-bit
|
||||||
|
* product. Then, adding (XOR-ing) the product to B(x) produces a polynomial
|
||||||
|
* with the same length as B(x) but with the same remainder as 'A(x)*x^D +
|
||||||
|
* B(x)'. This is the basic fold operation with 64 bits.
|
||||||
|
*
|
||||||
|
* Note that the carryless multiplication instruction PCLMULQDQ actually takes
|
||||||
|
* two 64-bit inputs and produces a 127-bit product in the low-order bits of a
|
||||||
|
* 128-bit XMM register. This works fine, but care must be taken to account for
|
||||||
|
* "bit endianness". With the CRC version implemented here, bits are always
|
||||||
|
* ordered such that the lowest-order bit represents the coefficient of highest
|
||||||
|
* power of x and the highest-order bit represents the coefficient of the lowest
|
||||||
|
* power of x. This is backwards from the more intuitive order. Still,
|
||||||
|
* carryless multiplication works essentially the same either way. It just must
|
||||||
|
* be accounted for that when we XOR the 95-bit product in the low-order 95 bits
|
||||||
|
* of a 128-bit XMM register into 128-bits of later data held in another XMM
|
||||||
|
* register, we'll really be XOR-ing the product into the mathematically higher
|
||||||
|
* degree end of those later bits, not the lower degree end as may be expected.
|
||||||
|
*
|
||||||
|
* So given that caveat and the fact that we process 512 bits per iteration, the
|
||||||
|
* 'D' values we need for the two 64-bit halves of each 128 bits of data are:
|
||||||
|
*
|
||||||
|
* D = (512 + 95) - 64 for the higher-degree half of each 128 bits,
|
||||||
|
* i.e. the lower order bits in the XMM register
|
||||||
|
*
|
||||||
|
* D = (512 + 95) - 128 for the lower-degree half of each 128 bits,
|
||||||
|
* i.e. the higher order bits in the XMM register
|
||||||
|
*
|
||||||
|
* The required 'x^D mod G(x)' values were precomputed.
|
||||||
|
*
|
||||||
|
* When <= 512 bits remain in the message, we finish up by folding across
|
||||||
|
* smaller distances. This works similarly; the distance D is just different,
|
||||||
|
* so different constant multipliers must be used. Finally, once the remaining
|
||||||
|
* message is just 64 bits, it is reduced to the CRC-32 using Barrett reduction
|
||||||
|
* (explained later).
|
||||||
|
*
|
||||||
|
* For more information see the original paper from Intel:
|
||||||
|
* "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
|
||||||
|
* December 2009
|
||||||
|
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
||||||
|
*/
|
||||||
|
static u32 ATTRIBUTES
|
||||||
|
FUNCNAME_ALIGNED(u32 remainder, const __m128i *p, size_t nr_segs)
|
||||||
|
{
|
||||||
|
/* Constants precomputed by gen_crc32_multipliers.c. Do not edit! */
|
||||||
|
const __v2di multipliers_4 = (__v2di){ 0x8F352D95, 0x1D9513D7 };
|
||||||
|
const __v2di multipliers_2 = (__v2di){ 0xF1DA05AA, 0x81256527 };
|
||||||
|
const __v2di multipliers_1 = (__v2di){ 0xAE689191, 0xCCAA009E };
|
||||||
|
const __v2di final_multiplier = (__v2di){ 0xB8BC6765 };
|
||||||
|
const __m128i mask32 = (__m128i)(__v4si){ 0xFFFFFFFF };
|
||||||
|
const __v2di barrett_reduction_constants =
|
||||||
|
(__v2di){ 0x00000001F7011641, 0x00000001DB710641 };
|
||||||
|
|
||||||
|
const __m128i * const end = p + nr_segs;
|
||||||
|
const __m128i * const end512 = p + (nr_segs & ~3);
|
||||||
|
__m128i x0, x1, x2, x3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Account for the current 'remainder', i.e. the CRC of the part of the
|
||||||
|
* message already processed. Explanation: rewrite the message
|
||||||
|
* polynomial M(x) in terms of the first part A(x), the second part
|
||||||
|
* B(x), and the length of the second part in bits |B(x)| >= 32:
|
||||||
|
*
|
||||||
|
* M(x) = A(x)*x^|B(x)| + B(x)
|
||||||
|
*
|
||||||
|
* Then the CRC of M(x) is:
|
||||||
|
*
|
||||||
|
* CRC(M(x)) = CRC(A(x)*x^|B(x)| + B(x))
|
||||||
|
* = CRC(A(x)*x^32*x^(|B(x)| - 32) + B(x))
|
||||||
|
* = CRC(CRC(A(x))*x^(|B(x)| - 32) + B(x))
|
||||||
|
*
|
||||||
|
* Note: all arithmetic is modulo G(x), the generator polynomial; that's
|
||||||
|
* why A(x)*x^32 can be replaced with CRC(A(x)) = A(x)*x^32 mod G(x).
|
||||||
|
*
|
||||||
|
* So the CRC of the full message is the CRC of the second part of the
|
||||||
|
* message where the first 32 bits of the second part of the message
|
||||||
|
* have been XOR'ed with the CRC of the first part of the message.
|
||||||
|
*/
|
||||||
|
x0 = *p++;
|
||||||
|
x0 ^= (__m128i)(__v4si){ remainder };
|
||||||
|
|
||||||
|
if (p > end512) /* only 128, 256, or 384 bits of input? */
|
||||||
|
goto _128_bits_at_a_time;
|
||||||
|
x1 = *p++;
|
||||||
|
x2 = *p++;
|
||||||
|
x3 = *p++;
|
||||||
|
|
||||||
|
/* Fold 512 bits at a time */
|
||||||
|
for (; p != end512; p += 4) {
|
||||||
|
__m128i y0, y1, y2, y3;
|
||||||
|
|
||||||
|
y0 = p[0];
|
||||||
|
y1 = p[1];
|
||||||
|
y2 = p[2];
|
||||||
|
y3 = p[3];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: the immediate constant for PCLMULQDQ specifies which
|
||||||
|
* 64-bit halves of the 128-bit vectors to multiply:
|
||||||
|
*
|
||||||
|
* 0x00 means low halves (higher degree polynomial terms for us)
|
||||||
|
* 0x11 means high halves (lower degree polynomial terms for us)
|
||||||
|
*/
|
||||||
|
y0 ^= _mm_clmulepi64_si128(x0, multipliers_4, 0x00);
|
||||||
|
y1 ^= _mm_clmulepi64_si128(x1, multipliers_4, 0x00);
|
||||||
|
y2 ^= _mm_clmulepi64_si128(x2, multipliers_4, 0x00);
|
||||||
|
y3 ^= _mm_clmulepi64_si128(x3, multipliers_4, 0x00);
|
||||||
|
y0 ^= _mm_clmulepi64_si128(x0, multipliers_4, 0x11);
|
||||||
|
y1 ^= _mm_clmulepi64_si128(x1, multipliers_4, 0x11);
|
||||||
|
y2 ^= _mm_clmulepi64_si128(x2, multipliers_4, 0x11);
|
||||||
|
y3 ^= _mm_clmulepi64_si128(x3, multipliers_4, 0x11);
|
||||||
|
|
||||||
|
x0 = y0;
|
||||||
|
x1 = y1;
|
||||||
|
x2 = y2;
|
||||||
|
x3 = y3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fold 512 bits => 128 bits */
|
||||||
|
x2 ^= _mm_clmulepi64_si128(x0, multipliers_2, 0x00);
|
||||||
|
x3 ^= _mm_clmulepi64_si128(x1, multipliers_2, 0x00);
|
||||||
|
x2 ^= _mm_clmulepi64_si128(x0, multipliers_2, 0x11);
|
||||||
|
x3 ^= _mm_clmulepi64_si128(x1, multipliers_2, 0x11);
|
||||||
|
x3 ^= _mm_clmulepi64_si128(x2, multipliers_1, 0x00);
|
||||||
|
x3 ^= _mm_clmulepi64_si128(x2, multipliers_1, 0x11);
|
||||||
|
x0 = x3;
|
||||||
|
|
||||||
|
_128_bits_at_a_time:
|
||||||
|
while (p != end) {
|
||||||
|
/* Fold 128 bits into next 128 bits */
|
||||||
|
x1 = *p++;
|
||||||
|
x1 ^= _mm_clmulepi64_si128(x0, multipliers_1, 0x00);
|
||||||
|
x1 ^= _mm_clmulepi64_si128(x0, multipliers_1, 0x11);
|
||||||
|
x0 = x1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now there are just 128 bits left, stored in 'x0'. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fold 128 => 96 bits. This also implicitly appends 32 zero bits,
|
||||||
|
* which is equivalent to multiplying by x^32. This is needed because
|
||||||
|
* the CRC is defined as M(x)*x^32 mod G(x), not just M(x) mod G(x).
|
||||||
|
*/
|
||||||
|
x0 = _mm_srli_si128(x0, 8) ^
|
||||||
|
_mm_clmulepi64_si128(x0, multipliers_1, 0x10);
|
||||||
|
|
||||||
|
/* Fold 96 => 64 bits */
|
||||||
|
x0 = _mm_srli_si128(x0, 4) ^
|
||||||
|
_mm_clmulepi64_si128(x0 & mask32, final_multiplier, 0x00);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finally, reduce 64 => 32 bits using Barrett reduction.
|
||||||
|
*
|
||||||
|
* Let M(x) = A(x)*x^32 + B(x) be the remaining message. The goal is to
|
||||||
|
* compute R(x) = M(x) mod G(x). Since degree(B(x)) < degree(G(x)):
|
||||||
|
*
|
||||||
|
* R(x) = (A(x)*x^32 + B(x)) mod G(x)
|
||||||
|
* = (A(x)*x^32) mod G(x) + B(x)
|
||||||
|
*
|
||||||
|
* Then, by the Division Algorithm there exists a unique q(x) such that:
|
||||||
|
*
|
||||||
|
* A(x)*x^32 mod G(x) = A(x)*x^32 - q(x)*G(x)
|
||||||
|
*
|
||||||
|
* Since the left-hand side is of maximum degree 31, the right-hand side
|
||||||
|
* must be too. This implies that we can apply 'mod x^32' to the
|
||||||
|
* right-hand side without changing its value:
|
||||||
|
*
|
||||||
|
* (A(x)*x^32 - q(x)*G(x)) mod x^32 = q(x)*G(x) mod x^32
|
||||||
|
*
|
||||||
|
* Note that '+' is equivalent to '-' in polynomials over GF(2).
|
||||||
|
*
|
||||||
|
* We also know that:
|
||||||
|
*
|
||||||
|
* / A(x)*x^32 \
|
||||||
|
* q(x) = floor ( --------- )
|
||||||
|
* \ G(x) /
|
||||||
|
*
|
||||||
|
* To compute this efficiently, we can multiply the top and bottom by
|
||||||
|
* x^32 and move the division by G(x) to the top:
|
||||||
|
*
|
||||||
|
* / A(x) * floor(x^64 / G(x)) \
|
||||||
|
* q(x) = floor ( ------------------------- )
|
||||||
|
* \ x^32 /
|
||||||
|
*
|
||||||
|
* Note that floor(x^64 / G(x)) is a constant.
|
||||||
|
*
|
||||||
|
* So finally we have:
|
||||||
|
*
|
||||||
|
* / A(x) * floor(x^64 / G(x)) \
|
||||||
|
* R(x) = B(x) + G(x)*floor ( ------------------------- )
|
||||||
|
* \ x^32 /
|
||||||
|
*/
|
||||||
|
x1 = x0;
|
||||||
|
x0 = _mm_clmulepi64_si128(x0 & mask32, barrett_reduction_constants, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0 & mask32, barrett_reduction_constants, 0x10);
|
||||||
|
return _mm_cvtsi128_si32(_mm_srli_si128(x0 ^ x1, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IMPL_ALIGNMENT 16
|
||||||
|
#define IMPL_SEGMENT_SIZE 16
|
||||||
|
#include "../crc32_vec_template.h"
|
||||||
35
lib/gdeflate/libdeflate/lib/x86/decompress_impl.h
Normal file
35
lib/gdeflate/libdeflate/lib/x86/decompress_impl.h
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#ifndef LIB_X86_DECOMPRESS_IMPL_H
|
||||||
|
#define LIB_X86_DECOMPRESS_IMPL_H
|
||||||
|
|
||||||
|
#include "cpu_features.h"
|
||||||
|
|
||||||
|
/* Include the BMI2-optimized version? */
|
||||||
|
#undef DISPATCH_BMI2
|
||||||
|
#if !defined(__BMI2__) && X86_CPU_FEATURES_ENABLED && \
|
||||||
|
COMPILER_SUPPORTS_BMI2_TARGET
|
||||||
|
# define FUNCNAME deflate_decompress_bmi2
|
||||||
|
# define ATTRIBUTES __attribute__((target("bmi2")))
|
||||||
|
# define DISPATCH 1
|
||||||
|
# define DISPATCH_BMI2 1
|
||||||
|
#ifdef GDEFLATE
|
||||||
|
# include "../gdeflate_decompress_template.h"
|
||||||
|
#else
|
||||||
|
# include "../decompress_template.h"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DISPATCH
|
||||||
|
static inline decompress_func_t
|
||||||
|
arch_select_decompress_func(void)
|
||||||
|
{
|
||||||
|
u32 features = get_cpu_features();
|
||||||
|
|
||||||
|
#ifdef DISPATCH_BMI2
|
||||||
|
if (features & X86_CPU_FEATURE_BMI2)
|
||||||
|
return deflate_decompress_bmi2;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* DISPATCH */
|
||||||
|
|
||||||
|
#endif /* LIB_X86_DECOMPRESS_IMPL_H */
|
||||||
122
lib/gdeflate/libdeflate/lib/x86/matchfinder_impl.h
Normal file
122
lib/gdeflate/libdeflate/lib/x86/matchfinder_impl.h
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
/*
|
||||||
|
* x86/matchfinder_impl.h - x86 implementations of matchfinder functions
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_X86_MATCHFINDER_IMPL_H
|
||||||
|
#define LIB_X86_MATCHFINDER_IMPL_H
|
||||||
|
|
||||||
|
#ifdef __AVX2__
|
||||||
|
# include <immintrin.h>
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_init_avx2(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
__m256i *p = (__m256i *)data;
|
||||||
|
__m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
|
do {
|
||||||
|
p[0] = v;
|
||||||
|
p[1] = v;
|
||||||
|
p[2] = v;
|
||||||
|
p[3] = v;
|
||||||
|
p += 4;
|
||||||
|
size -= 4 * sizeof(*p);
|
||||||
|
} while (size != 0);
|
||||||
|
}
|
||||||
|
#define matchfinder_init matchfinder_init_avx2
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
__m256i *p = (__m256i *)data;
|
||||||
|
__m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
||||||
|
p[0] = _mm256_adds_epi16(p[0], v);
|
||||||
|
p[1] = _mm256_adds_epi16(p[1], v);
|
||||||
|
p[2] = _mm256_adds_epi16(p[2], v);
|
||||||
|
p[3] = _mm256_adds_epi16(p[3], v);
|
||||||
|
p += 4;
|
||||||
|
size -= 4 * sizeof(*p);
|
||||||
|
} while (size != 0);
|
||||||
|
}
|
||||||
|
#define matchfinder_rebase matchfinder_rebase_avx2
|
||||||
|
|
||||||
|
#elif defined(__SSE2__)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_init_sse2(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
__m128i *p = (__m128i *)data;
|
||||||
|
__m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
|
do {
|
||||||
|
p[0] = v;
|
||||||
|
p[1] = v;
|
||||||
|
p[2] = v;
|
||||||
|
p[3] = v;
|
||||||
|
p += 4;
|
||||||
|
size -= 4 * sizeof(*p);
|
||||||
|
} while (size != 0);
|
||||||
|
}
|
||||||
|
#define matchfinder_init matchfinder_init_sse2
|
||||||
|
|
||||||
|
static forceinline void
|
||||||
|
matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
|
||||||
|
{
|
||||||
|
__m128i *p = (__m128i *)data;
|
||||||
|
__m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
|
||||||
|
|
||||||
|
STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
|
||||||
|
STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
|
||||||
|
STATIC_ASSERT(sizeof(mf_pos_t) == 2);
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
||||||
|
p[0] = _mm_adds_epi16(p[0], v);
|
||||||
|
p[1] = _mm_adds_epi16(p[1], v);
|
||||||
|
p[2] = _mm_adds_epi16(p[2], v);
|
||||||
|
p[3] = _mm_adds_epi16(p[3], v);
|
||||||
|
p += 4;
|
||||||
|
size -= 4 * sizeof(*p);
|
||||||
|
} while (size != 0);
|
||||||
|
}
|
||||||
|
#define matchfinder_rebase matchfinder_rebase_sse2
|
||||||
|
#endif /* __SSE2__ */
|
||||||
|
|
||||||
|
#endif /* LIB_X86_MATCHFINDER_IMPL_H */
|
||||||
87
lib/gdeflate/libdeflate/lib/zlib_compress.c
Normal file
87
lib/gdeflate/libdeflate/lib/zlib_compress.c
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
/*
|
||||||
|
* zlib_compress.c - compress with a zlib wrapper
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "deflate_compress.h"
|
||||||
|
#include "unaligned.h"
|
||||||
|
#include "zlib_constants.h"
|
||||||
|
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_compress(struct libdeflate_compressor *c,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail)
|
||||||
|
{
|
||||||
|
u8 *out_next = out;
|
||||||
|
u16 hdr;
|
||||||
|
unsigned compression_level;
|
||||||
|
unsigned level_hint;
|
||||||
|
size_t deflate_size;
|
||||||
|
|
||||||
|
if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* 2 byte header: CMF and FLG */
|
||||||
|
hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12);
|
||||||
|
compression_level = deflate_get_compression_level(c);
|
||||||
|
if (compression_level < 2)
|
||||||
|
level_hint = ZLIB_FASTEST_COMPRESSION;
|
||||||
|
else if (compression_level < 6)
|
||||||
|
level_hint = ZLIB_FAST_COMPRESSION;
|
||||||
|
else if (compression_level < 8)
|
||||||
|
level_hint = ZLIB_DEFAULT_COMPRESSION;
|
||||||
|
else
|
||||||
|
level_hint = ZLIB_SLOWEST_COMPRESSION;
|
||||||
|
hdr |= level_hint << 6;
|
||||||
|
hdr |= 31 - (hdr % 31);
|
||||||
|
|
||||||
|
put_unaligned_be16(hdr, out_next);
|
||||||
|
out_next += 2;
|
||||||
|
|
||||||
|
/* Compressed data */
|
||||||
|
deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
|
||||||
|
out_nbytes_avail - ZLIB_MIN_OVERHEAD);
|
||||||
|
if (deflate_size == 0)
|
||||||
|
return 0;
|
||||||
|
out_next += deflate_size;
|
||||||
|
|
||||||
|
/* ADLER32 */
|
||||||
|
put_unaligned_be32(libdeflate_adler32(1, in, in_nbytes), out_next);
|
||||||
|
out_next += 4;
|
||||||
|
|
||||||
|
return out_next - (u8 *)out;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_compress_bound(struct libdeflate_compressor *c,
|
||||||
|
size_t in_nbytes)
|
||||||
|
{
|
||||||
|
return ZLIB_MIN_OVERHEAD +
|
||||||
|
libdeflate_deflate_compress_bound(c, in_nbytes);
|
||||||
|
}
|
||||||
21
lib/gdeflate/libdeflate/lib/zlib_constants.h
Normal file
21
lib/gdeflate/libdeflate/lib/zlib_constants.h
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* zlib_constants.h - constants for the zlib wrapper format
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIB_ZLIB_CONSTANTS_H
|
||||||
|
#define LIB_ZLIB_CONSTANTS_H
|
||||||
|
|
||||||
|
#define ZLIB_MIN_HEADER_SIZE 2
|
||||||
|
#define ZLIB_FOOTER_SIZE 4
|
||||||
|
#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
|
||||||
|
|
||||||
|
#define ZLIB_CM_DEFLATE 8
|
||||||
|
|
||||||
|
#define ZLIB_CINFO_32K_WINDOW 7
|
||||||
|
|
||||||
|
#define ZLIB_FASTEST_COMPRESSION 0
|
||||||
|
#define ZLIB_FAST_COMPRESSION 1
|
||||||
|
#define ZLIB_DEFAULT_COMPRESSION 2
|
||||||
|
#define ZLIB_SLOWEST_COMPRESSION 3
|
||||||
|
|
||||||
|
#endif /* LIB_ZLIB_CONSTANTS_H */
|
||||||
108
lib/gdeflate/libdeflate/lib/zlib_decompress.c
Normal file
108
lib/gdeflate/libdeflate/lib/zlib_decompress.c
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
* zlib_decompress.c - decompress with a zlib wrapper
|
||||||
|
*
|
||||||
|
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
||||||
|
*
|
||||||
|
* Copyright 2016 Eric Biggers
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "unaligned.h"
|
||||||
|
#include "zlib_constants.h"
|
||||||
|
|
||||||
|
#include "libdeflate.h"
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *d,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret,
|
||||||
|
size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
const u8 *in_next = in;
|
||||||
|
const u8 * const in_end = in_next + in_nbytes;
|
||||||
|
u16 hdr;
|
||||||
|
size_t actual_in_nbytes;
|
||||||
|
size_t actual_out_nbytes;
|
||||||
|
enum libdeflate_result result;
|
||||||
|
|
||||||
|
if (in_nbytes < ZLIB_MIN_OVERHEAD)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* 2 byte header: CMF and FLG */
|
||||||
|
hdr = get_unaligned_be16(in_next);
|
||||||
|
in_next += 2;
|
||||||
|
|
||||||
|
/* FCHECK */
|
||||||
|
if ((hdr % 31) != 0)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* CM */
|
||||||
|
if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* CINFO */
|
||||||
|
if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* FDICT */
|
||||||
|
if ((hdr >> 5) & 1)
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
|
||||||
|
/* Compressed data */
|
||||||
|
result = libdeflate_deflate_decompress_ex(d, in_next,
|
||||||
|
in_end - ZLIB_FOOTER_SIZE - in_next,
|
||||||
|
out, out_nbytes_avail,
|
||||||
|
&actual_in_nbytes, actual_out_nbytes_ret);
|
||||||
|
if (result != LIBDEFLATE_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
if (actual_out_nbytes_ret)
|
||||||
|
actual_out_nbytes = *actual_out_nbytes_ret;
|
||||||
|
else
|
||||||
|
actual_out_nbytes = out_nbytes_avail;
|
||||||
|
|
||||||
|
in_next += actual_in_nbytes;
|
||||||
|
|
||||||
|
/* ADLER32 */
|
||||||
|
if (libdeflate_adler32(1, out, actual_out_nbytes) !=
|
||||||
|
get_unaligned_be32(in_next))
|
||||||
|
return LIBDEFLATE_BAD_DATA;
|
||||||
|
in_next += 4;
|
||||||
|
|
||||||
|
if (actual_in_nbytes_ret)
|
||||||
|
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
||||||
|
|
||||||
|
return LIBDEFLATE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_decompress(struct libdeflate_decompressor *d,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret)
|
||||||
|
{
|
||||||
|
return libdeflate_zlib_decompress_ex(d, in, in_nbytes,
|
||||||
|
out, out_nbytes_avail,
|
||||||
|
NULL, actual_out_nbytes_ret);
|
||||||
|
}
|
||||||
540
lib/gdeflate/libdeflate/libdeflate.h
Normal file
540
lib/gdeflate/libdeflate/libdeflate.h
Normal file
|
|
@ -0,0 +1,540 @@
|
||||||
|
/*
|
||||||
|
* libdeflate.h - public header for libdeflate
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIBDEFLATE_H
|
||||||
|
#define LIBDEFLATE_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LIBDEFLATE_VERSION_MAJOR 1
|
||||||
|
#define LIBDEFLATE_VERSION_MINOR 8
|
||||||
|
#define LIBDEFLATE_VERSION_STRING "1.8"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On Windows, if you want to link to the DLL version of libdeflate, then
|
||||||
|
* #define LIBDEFLATE_DLL. Note that the calling convention is "stdcall".
|
||||||
|
*/
|
||||||
|
#ifdef LIBDEFLATE_DLL
|
||||||
|
# ifdef BUILDING_LIBDEFLATE
|
||||||
|
# define LIBDEFLATEEXPORT LIBEXPORT
|
||||||
|
# elif defined(_WIN32) || defined(__CYGWIN__)
|
||||||
|
# define LIBDEFLATEEXPORT __declspec(dllimport)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
#ifndef LIBDEFLATEEXPORT
|
||||||
|
# define LIBDEFLATEEXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_WIN32) && !defined(_WIN64)
|
||||||
|
# define LIBDEFLATEAPI_ABI __stdcall
|
||||||
|
#else
|
||||||
|
# define LIBDEFLATEAPI_ABI
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(BUILDING_LIBDEFLATE) && defined(__GNUC__) && \
|
||||||
|
defined(_WIN32) && !defined(_WIN64)
|
||||||
|
/*
|
||||||
|
* On 32-bit Windows, gcc assumes 16-byte stack alignment but MSVC only 4.
|
||||||
|
* Realign the stack when entering libdeflate to avoid crashing in SSE/AVX
|
||||||
|
* code when called from an MSVC-compiled application.
|
||||||
|
*/
|
||||||
|
# define LIBDEFLATEAPI_STACKALIGN __attribute__((force_align_arg_pointer))
|
||||||
|
#else
|
||||||
|
# define LIBDEFLATEAPI_STACKALIGN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LIBDEFLATEAPI LIBDEFLATEAPI_ABI LIBDEFLATEAPI_STACKALIGN
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Compression */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
struct libdeflate_compressor;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_alloc_compressor() allocates a new compressor that supports
|
||||||
|
* DEFLATE, zlib, and gzip compression. 'compression_level' is the compression
|
||||||
|
* level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
|
||||||
|
* medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
|
||||||
|
* "no compression", specifically "create a valid stream, but only emit
|
||||||
|
* uncompressed blocks" (this will expand the data slightly).
|
||||||
|
*
|
||||||
|
* The return value is a pointer to the new compressor, or NULL if out of memory
|
||||||
|
* or if the compression level is invalid (i.e. outside the range [0, 12]).
|
||||||
|
*
|
||||||
|
* Note: for compression, the sliding window size is defined at compilation time
|
||||||
|
* to 32768, the largest size permissible in the DEFLATE format. It cannot be
|
||||||
|
* changed at runtime.
|
||||||
|
*
|
||||||
|
* A single compressor is not safe to use by multiple threads concurrently.
|
||||||
|
* However, different threads may use different compressors concurrently.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI
|
||||||
|
libdeflate_alloc_compressor(int compression_level);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of
|
||||||
|
* data. The function attempts to compress 'in_nbytes' bytes of data located at
|
||||||
|
* 'in' and write the results to 'out', which has space for 'out_nbytes_avail'
|
||||||
|
* bytes. The return value is the compressed size in bytes, or 0 if the data
|
||||||
|
* could not be compressed to 'out_nbytes_avail' bytes or fewer.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_deflate_compress_bound() returns a worst-case upper bound on the
|
||||||
|
* number of bytes of compressed data that may be produced by compressing any
|
||||||
|
* buffer of length less than or equal to 'in_nbytes' using
|
||||||
|
* libdeflate_deflate_compress() with the specified compressor. Mathematically,
|
||||||
|
* this bound will necessarily be a number greater than or equal to 'in_nbytes'.
|
||||||
|
* It may be an overestimate of the true upper bound. The return value is
|
||||||
|
* guaranteed to be the same for all invocations with the same compressor and
|
||||||
|
* same 'in_nbytes'.
|
||||||
|
*
|
||||||
|
* As a special case, 'compressor' may be NULL. This causes the bound to be
|
||||||
|
* taken across *any* libdeflate_compressor that could ever be allocated with
|
||||||
|
* this build of the library, with any options.
|
||||||
|
*
|
||||||
|
* Note that this function is not necessary in many applications. With
|
||||||
|
* block-based compression, it is usually preferable to separately store the
|
||||||
|
* uncompressed size of each block and to store any blocks that did not compress
|
||||||
|
* to less than their original size uncompressed. In that scenario, there is no
|
||||||
|
* need to know the worst-case compressed size, since the maximum number of
|
||||||
|
* bytes of compressed data that may be used would always be one less than the
|
||||||
|
* input length. You can just pass a buffer of that size to
|
||||||
|
* libdeflate_deflate_compress() and store the data uncompressed if
|
||||||
|
* libdeflate_deflate_compress() returns 0, indicating that the compressed data
|
||||||
|
* did not fit into the provided output buffer.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
|
||||||
|
size_t in_nbytes);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_compress(), but stores the data in the zlib wrapper
|
||||||
|
* format.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_compress_bound(), but assumes the data will be
|
||||||
|
* compressed with libdeflate_zlib_compress() rather than with
|
||||||
|
* libdeflate_deflate_compress().
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
|
||||||
|
size_t in_nbytes);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_compress(), but stores the data in the gzip wrapper
|
||||||
|
* format.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_compress_bound(), but assumes the data will be
|
||||||
|
* compressed with libdeflate_gzip_compress() rather than with
|
||||||
|
* libdeflate_deflate_compress().
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
|
||||||
|
size_t in_nbytes);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_free_compressor() frees a compressor that was allocated with
|
||||||
|
* libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is
|
||||||
|
* taken.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_free_compressor(struct libdeflate_compressor *compressor);
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Decompression */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
struct libdeflate_decompressor;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_alloc_decompressor() allocates a new decompressor that can be used
|
||||||
|
* for DEFLATE, zlib, and gzip decompression. The return value is a pointer to
|
||||||
|
* the new decompressor, or NULL if out of memory.
|
||||||
|
*
|
||||||
|
* This function takes no parameters, and the returned decompressor is valid for
|
||||||
|
* decompressing data that was compressed at any compression level and with any
|
||||||
|
* sliding window size.
|
||||||
|
*
|
||||||
|
* A single decompressor is not safe to use by multiple threads concurrently.
|
||||||
|
* However, different threads may use different decompressors concurrently.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT struct libdeflate_decompressor * LIBDEFLATEAPI
|
||||||
|
libdeflate_alloc_decompressor(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Result of a call to libdeflate_deflate_decompress(),
|
||||||
|
* libdeflate_zlib_decompress(), or libdeflate_gzip_decompress().
|
||||||
|
*/
|
||||||
|
enum libdeflate_result {
|
||||||
|
/* Decompression was successful. */
|
||||||
|
LIBDEFLATE_SUCCESS = 0,
|
||||||
|
|
||||||
|
/* Decompressed failed because the compressed data was invalid, corrupt,
|
||||||
|
* or otherwise unsupported. */
|
||||||
|
LIBDEFLATE_BAD_DATA = 1,
|
||||||
|
|
||||||
|
/* A NULL 'actual_out_nbytes_ret' was provided, but the data would have
|
||||||
|
* decompressed to fewer than 'out_nbytes_avail' bytes. */
|
||||||
|
LIBDEFLATE_SHORT_OUTPUT = 2,
|
||||||
|
|
||||||
|
/* The data would have decompressed to more than 'out_nbytes_avail'
|
||||||
|
* bytes. */
|
||||||
|
LIBDEFLATE_INSUFFICIENT_SPACE = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_deflate_decompress() decompresses the DEFLATE-compressed stream
|
||||||
|
* from the buffer 'in' with compressed size up to 'in_nbytes' bytes. The
|
||||||
|
* uncompressed data is written to 'out', a buffer with size 'out_nbytes_avail'
|
||||||
|
* bytes. If decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned.
|
||||||
|
* Otherwise, a nonzero result code such as LIBDEFLATE_BAD_DATA is returned. If
|
||||||
|
* a nonzero result code is returned, then the contents of the output buffer are
|
||||||
|
* undefined.
|
||||||
|
*
|
||||||
|
* Decompression stops at the end of the DEFLATE stream (as indicated by the
|
||||||
|
* BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes.
|
||||||
|
*
|
||||||
|
* libdeflate_deflate_decompress() can be used in cases where the actual
|
||||||
|
* uncompressed size is known (recommended) or unknown (not recommended):
|
||||||
|
*
|
||||||
|
* - If the actual uncompressed size is known, then pass the actual
|
||||||
|
* uncompressed size as 'out_nbytes_avail' and pass NULL for
|
||||||
|
* 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail
|
||||||
|
* with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the
|
||||||
|
* specified number of bytes.
|
||||||
|
*
|
||||||
|
* - If the actual uncompressed size is unknown, then provide a non-NULL
|
||||||
|
* 'actual_out_nbytes_ret' and provide a buffer with some size
|
||||||
|
* 'out_nbytes_avail' that you think is large enough to hold all the
|
||||||
|
* uncompressed data. In this case, if the data decompresses to less than
|
||||||
|
* or equal to 'out_nbytes_avail' bytes, then
|
||||||
|
* libdeflate_deflate_decompress() will write the actual uncompressed size
|
||||||
|
* to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise,
|
||||||
|
* it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was
|
||||||
|
* not large enough but no other problems were encountered, or another
|
||||||
|
* nonzero result code if decompression failed for another reason.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret'
|
||||||
|
* argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL,
|
||||||
|
* then the actual compressed size of the DEFLATE stream (aligned to the next
|
||||||
|
* byte boundary) is written to *actual_in_nbytes_ret.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format
|
||||||
|
* instead of raw DEFLATE.
|
||||||
|
*
|
||||||
|
* Decompression will stop at the end of the zlib stream, even if it is shorter
|
||||||
|
* than 'in_nbytes'. If you need to know exactly where the zlib stream ended,
|
||||||
|
* use libdeflate_zlib_decompress_ex().
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_zlib_decompress(), but adds the 'actual_in_nbytes_ret'
|
||||||
|
* argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
|
||||||
|
* succeeds (indicating that the first zlib-compressed stream in the input
|
||||||
|
* buffer was decompressed), then the actual number of input bytes consumed is
|
||||||
|
* written to *actual_in_nbytes_ret.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format
|
||||||
|
* instead of raw DEFLATE.
|
||||||
|
*
|
||||||
|
* If multiple gzip-compressed members are concatenated, then only the first
|
||||||
|
* will be decompressed. Use libdeflate_gzip_decompress_ex() if you need
|
||||||
|
* multi-member support.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret'
|
||||||
|
* argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
|
||||||
|
* succeeds (indicating that the first gzip-compressed member in the input
|
||||||
|
* buffer was decompressed), then the actual number of input bytes consumed is
|
||||||
|
* written to *actual_in_nbytes_ret.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
void *out, size_t out_nbytes_avail,
|
||||||
|
size_t *actual_in_nbytes_ret,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_free_decompressor() frees a decompressor that was allocated with
|
||||||
|
* libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action
|
||||||
|
* is taken.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Checksums */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_adler32() updates a running Adler-32 checksum with 'len' bytes of
|
||||||
|
* data and returns the updated checksum. When starting a new checksum, the
|
||||||
|
* required initial value for 'adler' is 1. This value is also returned when
|
||||||
|
* 'buffer' is specified as NULL.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT uint32_t LIBDEFLATEAPI
|
||||||
|
libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_crc32() updates a running CRC-32 checksum with 'len' bytes of data
|
||||||
|
* and returns the updated checksum. When starting a new checksum, the required
|
||||||
|
* initial value for 'crc' is 0. This value is also returned when 'buffer' is
|
||||||
|
* specified as NULL.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT uint32_t LIBDEFLATEAPI
|
||||||
|
libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Custom memory allocator */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Install a custom memory allocator which libdeflate will use for all memory
|
||||||
|
* allocations. 'malloc_func' is a function that must behave like malloc(), and
|
||||||
|
* 'free_func' is a function that must behave like free().
|
||||||
|
*
|
||||||
|
* There must not be any libdeflate_compressor or libdeflate_decompressor
|
||||||
|
* structures in existence when calling this function.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
|
||||||
|
void (*free_func)(void *));
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* NVIDIA GDEFLATE-related API */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Compression */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
struct libdeflate_gdeflate_compressor;
|
||||||
|
|
||||||
|
struct libdeflate_gdeflate_out_page {
|
||||||
|
/* Buffer for compressed GDEFLATE page data. */
|
||||||
|
void *data;
|
||||||
|
/* Buffer size in bytes. If compression succeeded this field will
|
||||||
|
* contain the size of compressed GDEFLATE page.
|
||||||
|
*/
|
||||||
|
size_t nbytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct libdeflate_gdeflate_in_page {
|
||||||
|
/* Compressed GDEFLATE page data. */
|
||||||
|
const void* data;
|
||||||
|
/* Size in bytes of compressed GDEFLATE page. */
|
||||||
|
size_t nbytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_alloc_gdeflate_compressor() allocates a new compressor that
|
||||||
|
* supports GDEFLATE compression. 'compression_level' is the compression
|
||||||
|
* level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
|
||||||
|
* medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
|
||||||
|
* "no compression", specifically "create a valid stream, but only emit
|
||||||
|
* uncompressed blocks" (this will expand the data slightly).
|
||||||
|
*
|
||||||
|
* The return value is a pointer to the new compressor, or NULL if out of memory
|
||||||
|
* or if the compression level is invalid (i.e. outside the range [0, 12]).
|
||||||
|
*
|
||||||
|
* Note: for compression, the sliding window size is defined at compilation time
|
||||||
|
* to 65536, the largest size permissible in the GDEFLATE format. It cannot be
|
||||||
|
* changed at runtime.
|
||||||
|
*
|
||||||
|
* A single compressor is not safe to use by multiple threads concurrently.
|
||||||
|
* However, different threads may use different compressors concurrently.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT struct libdeflate_gdeflate_compressor * LIBDEFLATEAPI
|
||||||
|
libdeflate_alloc_gdeflate_compressor(int compression_level);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_gdeflate_compress() performs raw GDEFLATE compression on a buffer
|
||||||
|
* of data. The function attempts to compress 'in_nbytes' bytes of data located
|
||||||
|
* at 'in' and writes page results to 'out_pages', which has preallocated 'data'
|
||||||
|
* with 'nbytes' space for each page. To determine the number of pages
|
||||||
|
* 'out_npages' the input data will be split into and the upper bound on
|
||||||
|
* compressed data use the libdeflate_gdeflate_compress_bound() function. The
|
||||||
|
* return value is the compressed size in bytes, or 0 if the data could not be
|
||||||
|
* compressed. If compression succeeded the size of each compressed page will
|
||||||
|
* be written to 'nbytes' field of 'out_pages'.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_compress(struct libdeflate_gdeflate_compressor *compressor,
|
||||||
|
const void *in, size_t in_nbytes,
|
||||||
|
struct libdeflate_gdeflate_out_page *out_pages,
|
||||||
|
size_t out_npages);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_gdeflate_compress_ex() performs raw GDEFLATE compression on
|
||||||
|
* set of pages. The function attempts to compress 'npages' from 'in_pages'
|
||||||
|
* and writes page results to 'out_pages', which has preallocated 'data'
|
||||||
|
* with 'nbytes' space for each page. To determine the number of pages
|
||||||
|
* 'out_npages' the input data will be split into and the upper bound on
|
||||||
|
* compressed data use the libdeflate_gdeflate_compress_bound() function. The
|
||||||
|
* return value is the number of processed pages. If compression succeeded
|
||||||
|
* the size of each compressed page will be written to 'nbytes' field of 'out_pages'.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_compress_ex(struct libdeflate_gdeflate_compressor *compressor,
|
||||||
|
const struct libdeflate_gdeflate_in_page* in_pages,
|
||||||
|
struct libdeflate_gdeflate_out_page* out_pages, size_t npages);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_gdeflate_compress_bound() returns a worst-case upper bound on the
|
||||||
|
* number of bytes of compressed data that may be produced by compressing any
|
||||||
|
* buffer of length less than or equal to 'in_nbytes' using
|
||||||
|
* libdeflate_gdeflate_compress() with the specified compressor. Mathematically,
|
||||||
|
* this bound will necessarily be a number greater than or equal to 'in_nbytes'.
|
||||||
|
* It may be an overestimate of the true upper bound. The return value is
|
||||||
|
* guaranteed to be the same for all invocations with the same compressor and
|
||||||
|
* same 'in_nbytes'. The 'out_npages' will contain the number of GDEFLATE pages
|
||||||
|
* the input data will be split into. This number should be used to preallocate
|
||||||
|
* the page array for libdeflate_gdeflate_compress(). The upper bound on the
|
||||||
|
* number of compressed data in a page can be found by dividing the function
|
||||||
|
* result by the 'out_npages' value.
|
||||||
|
*
|
||||||
|
* As a special case, 'compressor' may be NULL. This causes the bound to be
|
||||||
|
* taken across *any* libdeflate_compressor that could ever be allocated with
|
||||||
|
* this build of the library, with any options.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT size_t LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_compress_bound(struct libdeflate_gdeflate_compressor *comp,
|
||||||
|
size_t in_nbytes, size_t *out_npages);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_free_gdeflate_compressor() frees a compressor that was allocated
|
||||||
|
* with libdeflate_alloc_gdeflate_compressor(). If a NULL pointer is passed in,
|
||||||
|
* no action is taken.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_free_gdeflate_compressor(struct libdeflate_gdeflate_compressor *comp);
|
||||||
|
|
||||||
|
/* ========================================================================== */
|
||||||
|
/* Decompression */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
struct libdeflate_gdeflate_decompressor;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_alloc_gdeflate_decompressor() allocates a new decompressor that
|
||||||
|
* can be used for GDEFLATE decompression. The return value is a pointer to
|
||||||
|
* the new decompressor, or NULL if out of memory.
|
||||||
|
*
|
||||||
|
* This function takes no parameters, and the returned decompressor is valid for
|
||||||
|
* decompressing data that was compressed at any compression level and with any
|
||||||
|
* sliding window size.
|
||||||
|
*
|
||||||
|
* A single decompressor is not safe to use by multiple threads concurrently.
|
||||||
|
* However, different threads may use different decompressors concurrently.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT struct libdeflate_gdeflate_decompressor * LIBDEFLATEAPI
|
||||||
|
libdeflate_alloc_gdeflate_decompressor(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_gdeflate_decompress() decompresses the GDEFLATE-compressed pages
|
||||||
|
* from the 'in_pages' array with 'in_npages' members. The uncompressed data is
|
||||||
|
* written to 'out', a buffer with size 'out_nbytes_avail' bytes.
|
||||||
|
* If decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned.
|
||||||
|
* Otherwise, a nonzero result code such as LIBDEFLATE_BAD_DATA is returned. If
|
||||||
|
* a nonzero result code is returned, then the contents of the output buffer are
|
||||||
|
* undefined.
|
||||||
|
*
|
||||||
|
* libdeflate_gdeflate_decompress() can be used only in cases where the actual
|
||||||
|
* uncompressed size is known.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_decompress(struct libdeflate_gdeflate_decompressor *decomp,
|
||||||
|
struct libdeflate_gdeflate_in_page *in_pages,
|
||||||
|
size_t in_npages, void *out,
|
||||||
|
size_t out_nbytes_avail,
|
||||||
|
size_t *actual_out_nbytes_ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_gdeflate_decompress_ex() decompresses the GDEFLATE-compressed pages
|
||||||
|
* from the 'in_pages' array with 'npages' members. The uncompressed data is
|
||||||
|
* written to 'out_pages'.
|
||||||
|
* If decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned.
|
||||||
|
* Otherwise, a nonzero result code such as LIBDEFLATE_BAD_DATA is returned. If
|
||||||
|
* a nonzero result code is returned, then the contents of the output buffer are
|
||||||
|
* undefined.
|
||||||
|
*
|
||||||
|
* libdeflate_gdeflate_decompress_ex() can be used only in cases where the actual
|
||||||
|
* uncompressed size is known.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||||
|
libdeflate_gdeflate_decompress_ex(struct libdeflate_gdeflate_decompressor *decomp,
|
||||||
|
struct libdeflate_gdeflate_in_page *in_pages,
|
||||||
|
struct libdeflate_gdeflate_out_page* out_pages, size_t npages);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libdeflate_free_gdeflate_decompressor() frees a decompressor that was
|
||||||
|
* allocated with libdeflate_alloc_gdeflate_decompressor(). If a NULL pointer
|
||||||
|
* is passed in, no action is taken.
|
||||||
|
*/
|
||||||
|
LIBDEFLATEEXPORT void LIBDEFLATEAPI
|
||||||
|
libdeflate_free_gdeflate_decompressor(struct libdeflate_gdeflate_decompressor *decomp);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* LIBDEFLATE_H */
|
||||||
63
project.cpp
63
project.cpp
|
|
@ -4,29 +4,76 @@ namespace fs = std::filesystem;
|
||||||
using namespace Crafter;
|
using namespace Crafter;
|
||||||
|
|
||||||
extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> args) {
|
extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> args) {
|
||||||
GitProjectSpec mathSpec{
|
std::vector<std::string> depArgs(args.begin(), args.end());
|
||||||
.source = { .url = "https://forgejo.catcrafts.net/Catcrafts/Crafter.Math.git" },
|
|
||||||
.args = std::vector<std::string>(args.begin(), args.end()),
|
bool useLocal = false;
|
||||||
};
|
for (std::string_view a : args) {
|
||||||
Configuration* math = GitProject(mathSpec);
|
if (a == "--local") { useLocal = true; break; }
|
||||||
|
}
|
||||||
|
|
||||||
|
Configuration* math = useLocal
|
||||||
|
? LocalProject({
|
||||||
|
.projectFile = "../Crafter.Math/project.cpp",
|
||||||
|
.args = depArgs,
|
||||||
|
})
|
||||||
|
: GitProject({
|
||||||
|
.source = { .url = "https://forgejo.catcrafts.net/Catcrafts/Crafter.Math.git" },
|
||||||
|
.args = depArgs,
|
||||||
|
});
|
||||||
|
|
||||||
Configuration cfg;
|
Configuration cfg;
|
||||||
cfg.path = "./";
|
cfg.path = "./";
|
||||||
cfg.name = "Crafter.Asset";
|
cfg.name = "Crafter.Asset";
|
||||||
|
// Default to library so consumers (Crafter.Graphics, examples) link against
|
||||||
|
// libCrafter.Asset.a — the Compression module's non-template entry points
|
||||||
|
// need to be in a real archive, not just PCM-inline. `--exe` (via the
|
||||||
|
// example's main.cpp) flips us to Executable to produce crafter-asset.
|
||||||
|
cfg.type = ConfigurationType::LibraryStatic;
|
||||||
ApplyStandardArgs(cfg, args);
|
ApplyStandardArgs(cfg, args);
|
||||||
|
bool wantExe = false;
|
||||||
|
for (std::string_view a : args) {
|
||||||
|
if (a == "--exe") { wantExe = true; break; }
|
||||||
|
}
|
||||||
|
if (wantExe) cfg.type = ConfigurationType::Executable;
|
||||||
cfg.outputName = (cfg.type == ConfigurationType::Executable) ? "crafter-asset" : "Crafter.Asset";
|
cfg.outputName = (cfg.type == ConfigurationType::Executable) ? "crafter-asset" : "Crafter.Asset";
|
||||||
cfg.dependencies = { math };
|
cfg.dependencies = { math };
|
||||||
|
|
||||||
std::array<fs::path, 3> ifaces = {
|
// Vendored GDeflate (Apache-2.0, Microsoft DirectStorage reference) +
|
||||||
|
// libdeflate (MIT, NVIDIA fork). The C++ wrappers live inline in the
|
||||||
|
// Compression module impl; libdeflate's .c sources are compiled here.
|
||||||
|
cfg.compileFlags.push_back("-Ilib/gdeflate/libdeflate");
|
||||||
|
const std::array<fs::path, 12> libdeflateSources = {
|
||||||
|
"lib/gdeflate/libdeflate/lib/adler32",
|
||||||
|
"lib/gdeflate/libdeflate/lib/crc32",
|
||||||
|
"lib/gdeflate/libdeflate/lib/deflate_compress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/deflate_decompress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/gdeflate_compress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/gdeflate_decompress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/gzip_compress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/gzip_decompress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/utils",
|
||||||
|
"lib/gdeflate/libdeflate/lib/zlib_compress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/zlib_decompress",
|
||||||
|
"lib/gdeflate/libdeflate/lib/x86/cpu_features",
|
||||||
|
};
|
||||||
|
for (const fs::path& p : libdeflateSources) cfg.cFiles.push_back(p);
|
||||||
|
|
||||||
|
std::array<fs::path, 4> ifaces = {
|
||||||
"interfaces/Crafter.Asset",
|
"interfaces/Crafter.Asset",
|
||||||
|
"interfaces/Crafter.Asset-Compression",
|
||||||
"interfaces/Crafter.Asset-Texture",
|
"interfaces/Crafter.Asset-Texture",
|
||||||
"interfaces/Crafter.Asset-Mesh",
|
"interfaces/Crafter.Asset-Mesh",
|
||||||
};
|
};
|
||||||
if (cfg.type == ConfigurationType::Executable) {
|
if (cfg.type == ConfigurationType::Executable) {
|
||||||
std::array<fs::path, 1> impls = { "implementations/main" };
|
std::array<fs::path, 2> impls = {
|
||||||
|
"implementations/Crafter.Asset-Compression",
|
||||||
|
"implementations/main",
|
||||||
|
};
|
||||||
cfg.GetInterfacesAndImplementations(ifaces, impls);
|
cfg.GetInterfacesAndImplementations(ifaces, impls);
|
||||||
} else {
|
} else {
|
||||||
std::array<fs::path, 0> impls = {};
|
std::array<fs::path, 1> impls = {
|
||||||
|
"implementations/Crafter.Asset-Compression",
|
||||||
|
};
|
||||||
cfg.GetInterfacesAndImplementations(ifaces, impls);
|
cfg.GetInterfacesAndImplementations(ifaces, impls);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue