-
Notifications
You must be signed in to change notification settings - Fork 60
Open
Description
Apologies, this seems to have become rather long. It seemed like such a simple idea at the start!
There are four main tracts to this idea:
- Allow
DeviceBox<[T]>, makingDeviceBufferjust an alias that could be deprecated in future. - Make the interface safer by using
MaybeUninitfor uninitialized/zeroed allocations on the device. - Add an
Allocgeneric parameter toDeviceBox, allowing for various new type of allocation. - Bonus: Add support for async allocations.
I think this proposal is entirely backwards compatible, though it does introduce some methods that are very similar to existing, e.g. new_unit vs. uninitialized, new_zeroed vs. zeroed.
DeviceAllocator
// new `alloc` module
pub trait DeviceAllocator {
type Ptr;
fn allocate(&self, size: usize) -> CudaResult<Ptr>;
// This allows for asynchronous zeroing.
fn allocate_zeroed(&self, size: usize) -> CudaResult<Ptr>;
fn deallocate(&self, ptr: Ptr) -> CudaResult<()>;
}
// Uses `cudaMalloc`, `cudaFree`.
pub struct Global; // TODO better name?
impl DeviceAllocator for Global {
type Ptr = DevicePointer<u8>;
...
}
// Other allocators might include:
// `Unified`, `HostPinned`, `Pitched`, `Async`, `MemoryPool`, etc.pub struct DeviceBox<T, A: DeviceAllocator = Global> {
ptr: A::Ptr,
alloc: A,
}
impl<T, A> DeviceBox<T, A> {
pub fn new_in(x: T, alloc: A) -> DeviceBox<T, A>;
}MaybeUninit
impl<T> DeviceBox<T, Global> {
...
// Note that these methods are safe.
pub fn new_uninit() -> DeviceBox<MaybeUninit<T>, Global>;
pub fn new_zeroed() -> DeviceBox<MaybeUninit<T>, Global>;
}
impl<T, A> DeviceBox<T, A> {
...
pub fn new_uninit_in(alloc: A) -> DeviceBox<MaybeUninit<T>, A>;
pub fn new_zeroed_in(alloc: A) -> DeviceBox<MaybeUninit<T>, A>;
}
impl<T, A> DeviceBox<MaybeUninit<T>, A> {
pub unsafe fn assume_init(self) -> DeviceBox<T, A>;
// Use this for kernel outputs, then `assume_init` after the kernel is complete.
pub unsafe fn as_uninit_device_pointer(&mut self) -> DevicePointer<T>;
}DeviceBox<[T]>
impl<T> DeviceBox<[T], Global> {
pub fn new(x: &impl AsRef<[T]>) -> DeviceBox<[T], Global>;
pub fn new_uninit_slice() -> DeviceBox<[MaybeUninit<T>], Global>;
pub fn new_zeroed_slice() -> DeviceBox<[MaybeUninit<T>], Global>;
}
impl<T, A> DeviceBox<[T], A> {
pub fn new_in(x: &impl AsRef<[T]>, alloc: A) -> DeviceBox<[T], A>;
pub fn new_uninit_slice_in(alloc: A) -> DeviceBox<[MaybeUninit<T>], A>;
pub fn new_zeroed_slice_in(alloc: A) -> DeviceBox<[MaybeUninit<T>], A>;
}
impl<T, A> DeviceBox<[MaybeUninit<T>], A> {
pub unsafe fn assume_init(self) -> DeviceBox<[T], A>;
pub unsafe fn as_uninit_device_pointer(&mut self) -> DevicePointer<T>;
}Async
// Uses `cudaMallocAsync`, `cudaFreeAsync`.
pub struct Async<'a> {
stream: &'a Stream,
}
impl Async<'_> {
pub fn on(stream: &'a Stream) -> Async<'a>;
}
impl<'a> DeviceAllocator for Async<'a> {
type Ptr = DevicePointerAsync<'a, u8>;
...
}
pub struct DevicePointerAsync<'a, T> {
ptr: DevicePointer<T>,
stream: &'a Stream,
is_allocated: Event,
}
impl<T, A> DeviceBox<T, A>
where
A: DeviceAllocator,
A::Ptr = DevicePointerAsync<'_, T>,
{
// If the stream matches the async pointer, return it immediately.
// Otherwise, block `stream` on `is_allocated` event.
pub fn as_device_pointer_on(&mut self, stream: &Stream) -> DevicePointer<T>;
pub unsafe fn as_device_pointer_unchecked(&mut self) -> DevicePointer<T>;
}
impl<T> DeviceBox<T, Async<'_>> {
// Wait for `is_allocated` event.
pub fn synchronize(self) -> DeviceBox<T, Global>;
}Metadata
Metadata
Assignees
Labels
No labels