Libraries: integrate xxHash library for fast hashing

`xxHash` is a fast non-cryptographic hashing library. It significantly outperforms
md5 which we use in some places currently while also having great collision
resistance if not attacked explicitly.

The library is added to `extern` because that was the easiest way to do it and has
the least impact on others. I expect this library to become a required dependency
instead of an optional one. It's licence is `BSD 2-Clause` which seems to be the
first of its kind in Blender (there is `BSD 3-Clause` a couple of times).

For now, I used the library only for data deduplication when baking geometry nodes
where the same geometry is generated for each frame. The bake time in my test
goes down from >6s to <1s (note that this includes more than just the hashing time).

Pull Request: https://projects.blender.org/blender/blender/pulls/120139
This commit is contained in:
Jacques Lucke 2024-04-03 10:22:53 +02:00
parent 8252208955
commit e2d170f685
10 changed files with 6897 additions and 16 deletions

View File

@ -32,6 +32,7 @@ endif()
add_subdirectory(rangetree)
add_subdirectory(nanosvg)
add_subdirectory(wcwidth)
add_subdirectory(xxhash)
if(WITH_BULLET)
if(NOT WITH_SYSTEM_BULLET)

21
extern/xxhash/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,21 @@
# SPDX-FileCopyrightText: 2024 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
set(INC
PUBLIC .
)
set(INC_SYS
)
set(SRC
xxhash.c
xxhash.h
)
set(LIB
)
blender_add_lib(extern_xxhash "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
add_library(bf::extern::xxhash ALIAS extern_xxhash)

26
extern/xxhash/LICENSE vendored Normal file
View File

@ -0,0 +1,26 @@
xxHash Library
Copyright (c) 2012-2021 Yann Collet
All rights reserved.
BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

6
extern/xxhash/README.blender vendored Normal file
View File

@ -0,0 +1,6 @@
Project: xxHash
URL: https://xxhash.com/
License: BSD 2-Clause
Upstream version: v0.8.2 (2023-07-21)
Local modifications:
* None

43
extern/xxhash/xxhash.c vendored Normal file
View File

@ -0,0 +1,43 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2021 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

6773
extern/xxhash/xxhash.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
The 2-Clause BSD License
------------------------
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -89,23 +89,11 @@ class BlobWriteSharing : NonCopyable, NonMovable {
*/
Map<const ImplicitSharingInfo *, StoredByRuntimeValue> stored_by_runtime_;
struct SliceHash {
uint64_t a;
uint64_t b;
BLI_STRUCT_EQUALITY_OPERATORS_2(SliceHash, a, b)
uint64_t hash() const
{
return get_default_hash(this->a, this->b);
}
};
/**
* Remembers where data was stored based on the hash of the data. This allows us to skip writing
* the same array again if it has the same hash.
*/
Map<SliceHash, BlobSlice> slice_by_content_hash_;
Map<uint64_t, BlobSlice> slice_by_content_hash_;
public:
~BlobWriteSharing();

View File

@ -579,6 +579,7 @@ set(LIB
bf_shader_fx
bf_simulation
PRIVATE bf::extern::fmtlib
PRIVATE bf::extern::xxhash
PRIVATE bf::intern::atomic
# For `vfontdata_freetype.c`.
${FREETYPE_LIBRARIES} ${BROTLI_LIBRARIES}

View File

@ -14,7 +14,6 @@
#include "BLI_endian_defines.h"
#include "BLI_endian_switch.h"
#include "BLI_hash_md5.hh"
#include "BLI_math_matrix_types.hh"
#include "BLI_path_util.h"
@ -26,6 +25,7 @@
#include <fmt/format.h>
#include <sstream>
#include <xxhash.h>
#ifdef WITH_OPENVDB
# include <openvdb/io/Stream.h>
@ -192,8 +192,7 @@ DictionaryValuePtr BlobWriteSharing::write_implicitly_shared(
std::shared_ptr<io::serialize::DictionaryValue> BlobWriteSharing::write_deduplicated(
BlobWriter &writer, const void *data, const int64_t size_in_bytes)
{
SliceHash content_hash;
BLI_hash_md5_buffer(static_cast<const char *>(data), size_in_bytes, &content_hash);
const uint64_t content_hash = XXH3_64bits(data, size_in_bytes);
const BlobSlice slice = slice_by_content_hash_.lookup_or_add_cb(
content_hash, [&]() { return writer.write(data, size_in_bytes); });
return slice.serialize();