diff --git a/libc/config/config.json b/libc/config/config.json index d53b2936edb07..fe077d16342de 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -119,6 +119,12 @@ "doc": "Force the size of time_t to 64 bits, even on platforms where compatibility considerations would otherwise make it 32-bit." } }, + "malloc": { + "LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": { + "value": false, + "doc": "The malloc implementation will return unused slabs to system memory." + } + }, "general": { "LIBC_ADD_NULL_CHECKS": { "value": true, diff --git a/libc/config/gpu/amdgpu/config.json b/libc/config/gpu/amdgpu/config.json index 30ae10e2cfd61..d3918eff8a59d 100644 --- a/libc/config/gpu/amdgpu/config.json +++ b/libc/config/gpu/amdgpu/config.json @@ -36,5 +36,10 @@ "LIBC_CONF_MATH_OPTIMIZATIONS": { "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)" } + }, + "malloc": { + "LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": { + "value": false + } } } diff --git a/libc/config/gpu/nvptx/config.json b/libc/config/gpu/nvptx/config.json index 30ae10e2cfd61..d3918eff8a59d 100644 --- a/libc/config/gpu/nvptx/config.json +++ b/libc/config/gpu/nvptx/config.json @@ -36,5 +36,10 @@ "LIBC_CONF_MATH_OPTIMIZATIONS": { "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)" } + }, + "malloc": { + "LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": { + "value": false + } } } diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index 109412225634f..4b005bb177da1 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -32,6 +32,8 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, LIBC_ERRNO_MODE_SYSTEM, and LIBC_ERRNO_MODE_SYSTEM_INLINE. * **"general" options** - ``LIBC_ADD_NULL_CHECKS``: Add nullptr checks in the library's implementations to some functions for which passing nullptr is undefined behavior. +* **"malloc" options** + - ``LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING``: The malloc implementation will return unused slabs to system memory. * **"math" options** - ``LIBC_CONF_FREXP_INF_NAN_EXPONENT``: The value written back to the second parameter when calling frexp/frexpf/frexpl` with `+/-Inf`/`NaN` is unspecified. Configure an explicit exp value for Inf/NaN inputs. - ``LIBC_CONF_MATH_OPTIMIZATIONS``: Configures optimizations for math functions. Values accepted are LIBC_MATH_SKIP_ACCURATE_PASS, LIBC_MATH_SMALL_TABLES, LIBC_MATH_NO_ERRNO, LIBC_MATH_NO_EXCEPT, and LIBC_MATH_FAST. diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt index f8fdfeb9da9df..e6884a31b6a5b 100644 --- a/libc/src/__support/GPU/CMakeLists.txt +++ b/libc/src/__support/GPU/CMakeLists.txt @@ -3,6 +3,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU) return() endif() +if(LIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING) + list(APPEND malloc_config_copts "-DLIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING") +endif() + add_header_library( utils HDRS @@ -23,4 +27,5 @@ add_object_library( libc.src.__support.CPP.bit libc.src.__support.CPP.new .utils + ${malloc_config_copts} ) diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index bd0a55cb938fb..a55ff6b967546 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -39,6 +39,13 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1; // The number of times to attempt claiming an in-progress slab allocation. constexpr static uint32_t MAX_TRIES = 1024; +// Configuration for whether or not we will return unused slabs to memory. +#ifdef LIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING +constexpr static bool RECLAIM = false; +#else +constexpr static bool RECLAIM = true; +#endif + static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two"); namespace impl { @@ -368,7 +375,7 @@ struct GuardPtr { // and obtain exclusive rights to deconstruct it. If the CAS failed either // another thread resurrected the counter and we quit, or a parallel read // helped us invalidating it. For the latter, claim that flag and return. - if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) { + if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n && RECLAIM) { uint32_t expected = 0; if (counter.compare_exchange_strong(expected, INVALID, cpp::MemoryOrder::RELAXED, @@ -386,8 +393,9 @@ struct GuardPtr { // thread. uint64_t read() { auto val = counter.load(cpp::MemoryOrder::RELAXED); - if (val == 0 && counter.compare_exchange_strong( - val, INVALID | HELPED, cpp::MemoryOrder::RELAXED)) + if (val == 0 && RECLAIM && + counter.compare_exchange_strong(val, INVALID | HELPED, + cpp::MemoryOrder::RELAXED)) return 0; return (val & INVALID) ? 0 : val; } @@ -421,7 +429,7 @@ struct GuardPtr { return nullptr; cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); - return ptr.load(cpp::MemoryOrder::RELAXED); + return RECLAIM ? ptr.load(cpp::MemoryOrder::RELAXED) : expected; } // Finalize the associated memory and signal that it is ready to use by