Skip to content

[libc] Add a config option to disable slab reclaiming #151599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

jhuber6
Copy link
Contributor

@jhuber6 jhuber6 commented Jul 31, 2025

Summary:
Without slab reclaiming this interface is much simpler and it can speed
up cases with a lot of churn. Basically, wastes memory for performance.

Summary:
Without slab reclaiming this interface is much simpler and it can speed
up cases with a lot of churn. Basically, wastes memory for performance.
@llvmbot
Copy link
Member

llvmbot commented Jul 31, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-libc

Author: Joseph Huber (jhuber6)

Changes

Summary:
Without slab reclaiming this interface is much simpler and it can speed
up cases with a lot of churn. Basically, wastes memory for performance.


Full diff: https://github.com/llvm/llvm-project/pull/151599.diff

6 Files Affected:

  • (modified) libc/config/config.json (+6)
  • (modified) libc/config/gpu/amdgpu/config.json (+5)
  • (modified) libc/config/gpu/nvptx/config.json (+5)
  • (modified) libc/docs/configure.rst (+2)
  • (modified) libc/src/__support/GPU/CMakeLists.txt (+5)
  • (modified) libc/src/__support/GPU/allocator.cpp (+12-4)
diff --git a/libc/config/config.json b/libc/config/config.json
index d53b2936edb07..fe077d16342de 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -119,6 +119,12 @@
       "doc": "Force the size of time_t to 64 bits, even on platforms where compatibility considerations would otherwise make it 32-bit."
     }
   },
+  "malloc": {
+    "LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": {
+      "value": false,
+      "doc": "The malloc implementation will return unused slabs to system memory."
+    }
+  },
   "general": {
     "LIBC_ADD_NULL_CHECKS": {
       "value": true,
diff --git a/libc/config/gpu/amdgpu/config.json b/libc/config/gpu/amdgpu/config.json
index 30ae10e2cfd61..d3918eff8a59d 100644
--- a/libc/config/gpu/amdgpu/config.json
+++ b/libc/config/gpu/amdgpu/config.json
@@ -36,5 +36,10 @@
     "LIBC_CONF_MATH_OPTIMIZATIONS": {
       "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)"
     }
+  },
+  "malloc": {
+    "LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": {
+      "value": false
+    }
   }
 }
diff --git a/libc/config/gpu/nvptx/config.json b/libc/config/gpu/nvptx/config.json
index 30ae10e2cfd61..d3918eff8a59d 100644
--- a/libc/config/gpu/nvptx/config.json
+++ b/libc/config/gpu/nvptx/config.json
@@ -36,5 +36,10 @@
     "LIBC_CONF_MATH_OPTIMIZATIONS": {
       "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)"
     }
+  },
+  "malloc": {
+    "LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": {
+      "value": false
+    }
   }
 }
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 109412225634f..4b005bb177da1 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -32,6 +32,8 @@ to learn about the defaults for your platform and target.
     - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, LIBC_ERRNO_MODE_SYSTEM, and LIBC_ERRNO_MODE_SYSTEM_INLINE.
 * **"general" options**
     - ``LIBC_ADD_NULL_CHECKS``: Add nullptr checks in the library's implementations to some functions for which passing nullptr is undefined behavior.
+* **"malloc" options**
+    - ``LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING``: The malloc implementation will return unused slabs to system memory.
 * **"math" options**
     - ``LIBC_CONF_FREXP_INF_NAN_EXPONENT``: The value written back to the second parameter when calling frexp/frexpf/frexpl` with `+/-Inf`/`NaN` is unspecified.  Configure an explicit exp value for Inf/NaN inputs.
     - ``LIBC_CONF_MATH_OPTIMIZATIONS``: Configures optimizations for math functions. Values accepted are LIBC_MATH_SKIP_ACCURATE_PASS, LIBC_MATH_SMALL_TABLES, LIBC_MATH_NO_ERRNO, LIBC_MATH_NO_EXCEPT, and LIBC_MATH_FAST.
diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt
index f8fdfeb9da9df..e6884a31b6a5b 100644
--- a/libc/src/__support/GPU/CMakeLists.txt
+++ b/libc/src/__support/GPU/CMakeLists.txt
@@ -3,6 +3,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
   return()
 endif()
 
+if(LIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING)
+  list(APPEND malloc_config_copts "-DLIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING")
+endif()
+
 add_header_library(
   utils
   HDRS
@@ -23,4 +27,5 @@ add_object_library(
     libc.src.__support.CPP.bit
     libc.src.__support.CPP.new
     .utils
+  ${malloc_config_copts}
 )
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index bd0a55cb938fb..a55ff6b967546 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -39,6 +39,13 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
 // The number of times to attempt claiming an in-progress slab allocation.
 constexpr static uint32_t MAX_TRIES = 1024;
 
+// Configuration for whether or not we will return unused slabs to memory.
+#ifdef LIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING
+constexpr static bool RECLAIM = false;
+#else
+constexpr static bool RECLAIM = true;
+#endif
+
 static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
 
 namespace impl {
@@ -368,7 +375,7 @@ struct GuardPtr {
       // and obtain exclusive rights to deconstruct it. If the CAS failed either
       // another thread resurrected the counter and we quit, or a parallel read
       // helped us invalidating it. For the latter, claim that flag and return.
-      if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) {
+      if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n && RECLAIM) {
         uint32_t expected = 0;
         if (counter.compare_exchange_strong(expected, INVALID,
                                             cpp::MemoryOrder::RELAXED,
@@ -386,8 +393,9 @@ struct GuardPtr {
     // thread.
     uint64_t read() {
       auto val = counter.load(cpp::MemoryOrder::RELAXED);
-      if (val == 0 && counter.compare_exchange_strong(
-                          val, INVALID | HELPED, cpp::MemoryOrder::RELAXED))
+      if (val == 0 && RECLAIM &&
+          counter.compare_exchange_strong(val, INVALID | HELPED,
+                                          cpp::MemoryOrder::RELAXED))
         return 0;
       return (val & INVALID) ? 0 : val;
     }
@@ -421,7 +429,7 @@ struct GuardPtr {
       return nullptr;
 
     cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
-    return ptr.load(cpp::MemoryOrder::RELAXED);
+    return RECLAIM ? ptr.load(cpp::MemoryOrder::RELAXED) : expected;
   }
 
   // Finalize the associated memory and signal that it is ready to use by

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants