Updated crill

Tracktion · Jan 12, 2025 · 2cbbfda · 2cbbfda
1 parent d7e6fe6
commit 2cbbfda
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 22 deletions.
diff --git a/modules/3rd_party/crill/bytewise_atomic_memcpy.h b/modules/3rd_party/crill/bytewise_atomic_memcpy.h
@@ -0,0 +1,91 @@
+// crill - the Cross-platform Real-time, I/O, and Low-Latency Library
+// Copyright (c) 2022 - Timur Doumler and Fabian Renn-Giles
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+
+#ifndef CRILL_BYTEWISE_ATOMIC_MEMCPY_H
+#define CRILL_BYTEWISE_ATOMIC_MEMCPY_H
+
+#include <atomic>
+#include "contracts.h"
+#include "platform.h"
+
+namespace crill {
+    // These are implementations of the corresponding functions
+    // atomic_load/store_per_byte_memcpy from the Concurrency TS 2.
+    // They behave as if the source and dest bytes respectively
+    // were individual atomic objects.
+    // The implementations provided below is portable, but slow.
+    // PRs with platform-optimised versions are welcome :)
+    // The implementations provided below are also *technically*
+    // UB because C++ does not let us loop over the bytes of
+    // an object representation, but that is a known wording bug that
+    // will be fixed by P1839; the technique should work on any
+    // major compiler.
+
+    // Preconditions:
+    // - order is std::memory_order::acquire or std::memory_order::relaxed
+    // - (char*)dest + [0, count) and (const char*)source + [0, count)
+    //   are valid ranges that do not overlap
+    // Effects:
+    //   Copies count consecutive bytes pointed to by source into consecutive
+    //   bytes pointed to by dest. Each individual load operation from a source
+    //   byte is atomic with memory order order. These individual loads are
+    //   unsequenced with respect to each other.
+    inline void* atomic_load_per_byte_memcpy
+    (void* dest, const void* source, size_t count, std::memory_order order)
+    {
+        CRILL_PRE(order == std::memory_order_acquire || order == std::memory_order_relaxed);
+
+        char* dest_bytes = reinterpret_cast<char*>(dest);
+        const char* src_bytes = reinterpret_cast<const char*>(source);
+
+        for (std::size_t i = 0; i < count; ++i) {
+              #if __cpp_lib_atomic_ref
+                dest_bytes[i] = std::atomic_ref<char>(src_bytes[i]).load(std::memory_order_relaxed);
+              #elif CRILL_CLANG || CRILL_GCC
+                dest_bytes[i] = __atomic_load_n(src_bytes + i, __ATOMIC_RELAXED);
+              #else
+                // No atomic_ref or equivalent functionality available on this platform!
+              #endif
+        }
+
+        std::atomic_thread_fence(order);
+
+        return dest;
+    }
+
+    // Preconditions:
+    // - order is std::memory_order::release or std::memory_order::relaxed
+    // - (char*)dest + [0, count) and (const char*)source + [0, count)
+    //   are valid ranges that do not overlap
+    // Effects:
+    //   Copies count consecutive bytes pointed to by source into consecutive
+    //   bytes pointed to by dest. Each individual store operation to a
+    //   destination byte is atomic with memory order order. These individual
+    //   stores are unsequenced with respect to each other.
+    inline void* atomic_store_per_byte_memcpy
+    (void* dest, const void* source, size_t count, std::memory_order order)
+    {
+        CRILL_PRE(order == std::memory_order_release || order == std::memory_order_relaxed);
+
+        std::atomic_thread_fence(order);
+
+        char* dest_bytes = reinterpret_cast<char*>(dest);
+        const char* src_bytes = reinterpret_cast<const char*>(source);
+
+        for (size_t i = 0; i < count; ++i) {
+          #if __cpp_lib_atomic_ref
+            std::atomic_ref<char>(dest_bytes[i]).store(src_bytes[i], std::memory_order_relaxed);
+          #elif CRILL_CLANG || CRILL_GCC
+            __atomic_store_n(dest_bytes + i, src_bytes[i], __ATOMIC_RELAXED);
+          #else
+            // No atomic_ref or equivalent functionality available on this platform!
+          #endif
+        }
+
+        return dest;
+    }
+}
+
+#endif //CRILL_BYTEWISE_ATOMIC_MEMCPY_H
diff --git a/modules/3rd_party/crill/contracts.h b/modules/3rd_party/crill/contracts.h
@@ -0,0 +1,15 @@
+// crill - the Cross-platform Real-time, I/O, and Low-Latency Library
+// Copyright (c) 2022 - Timur Doumler and Fabian Renn-Giles
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+
+#ifndef CRILL_CONTRACTS_H
+#define CRILL_CONTRACTS_H
+
+#include <cassert>
+
+// This will eventually be a proper macro-based Contracts facility
+// but at the moment is just an alias for C assert.
+#define CRILL_PRE(x) assert(x)
+
+#endif //CRILL_CONTRACTS_H
diff --git a/modules/3rd_party/crill/platform.h b/modules/3rd_party/crill/platform.h
@@ -6,11 +6,12 @@
 #ifndef CRILL_PLATFORM_H
 #define CRILL_PLATFORM_H
 
+// Macros to query target hardware architecture
 #if defined (__arm__)
   #define CRILL_ARM 1
   #define CRILL_32BIT 1
   #define CRILL_ARM_32BIT 1
-#elif defined (__arm64__) || defined (__aarch64__)
+#elif defined (__arm64__)
   #define CRILL_ARM 1
   #define CRILL_64BIT 1
   #define CRILL_ARM_64BIT 1
@@ -24,4 +25,14 @@
   #define CRILL_INTEL_64BIT 1
 #endif
 
+// Macros to query current compiler
+#if defined(__clang__)
+  #define CRILL_CLANG 1
+#elif defined(__GNUC__) || defined(__GNUG__)
+  #define CRILL_GCC 1
+#elif defined(_MSC_VER)
+  #define CRILL_MSVC 1
+#endif
+
+
 #endif //CRILL_PLATFORM_H
diff --git a/modules/3rd_party/crill/seqlock_object.h b/modules/3rd_party/crill/seqlock_object.h
@@ -6,8 +6,9 @@
 #ifndef CRILL_SEQLOCK_OBJECT_H
 #define CRILL_SEQLOCK_OBJECT_H
 
-#include <cstring>
+#include <array>
 #include <atomic>
+#include "bytewise_atomic_memcpy.h"
 
 namespace crill {
 
@@ -51,51 +52,35 @@ class seqlock_object
     // Non-blocking guarantees: wait-free.
     bool try_load(T& t) const noexcept
     {
-        std::size_t buffer[buffer_size];
-
         std::size_t seq1 = seq.load(std::memory_order_acquire);
         if (seq1 % 2 != 0)
             return false;
 
-        for (std::size_t i = 0; i < buffer_size; ++i)
-            buffer[i] = data[i].load(std::memory_order_relaxed);
-
-        std::atomic_thread_fence(std::memory_order_acquire);
+        crill::atomic_load_per_byte_memcpy(&t, &data, sizeof(data), std::memory_order_acquire);
 
         std::size_t seq2 = seq.load(std::memory_order_relaxed);
         if (seq1 != seq2)
             return false;
 
-        std::memcpy(&t, buffer, sizeof(T));
         return true;
     }
 
     // Updates the current value to the value passed in.
     // Non-blocking guarantees: wait-free.
     void store(T t) noexcept
     {
-        std::size_t buffer[buffer_size];
-        if constexpr (sizeof(T) % sizeof(std::size_t) != 0)
-            buffer[buffer_size - 1] = 0;
-
-        std::memcpy(&buffer, &t, sizeof(T));
-
+        // Note: load + store usually has better performance characteristics than fetch_add(1)
         std::size_t old_seq = seq.load(std::memory_order_relaxed);
         seq.store(old_seq + 1, std::memory_order_relaxed);
 
-        std::atomic_thread_fence(std::memory_order_release);
-
-        for (std::size_t i = 0; i < buffer_size; ++i)
-            data[i].store(buffer[i], std::memory_order_relaxed);
+        crill::atomic_store_per_byte_memcpy(&data, &t, sizeof(data), std::memory_order_release);
 
         seq.store(old_seq + 2, std::memory_order_release);
     }
 
 private:
-    static constexpr std::size_t buffer_size = (sizeof(T) + sizeof(std::size_t) - 1) / sizeof(std::size_t);
-    std::atomic<std::size_t> data[buffer_size];
+    char data[sizeof(T)];
     std::atomic<std::size_t> seq = 0;
-
     static_assert(decltype(seq)::is_always_lock_free);
 };