Skip to content

Commit

Permalink
Adding iree_hal_buffer_placement_t info to allocated HAL buffers.
Browse files Browse the repository at this point in the history
This allows users to query for the device, queue affinity, and origin
flags of an allocated buffer so that they can decide whether to copy
buffers when moving across devices/queues, perform synchronous or
asynchronous deallocations, and select queues to perform such operations.

The base `iree_hal_buffer_t` structure was cleaned up a bit and though
still ugly is better prepared for removal of the device allocator back
reference. #19159 tracks removing the allocator reference that is
currently only used by the caching allocator due to our lack of
dynamic casts.
  • Loading branch information
benvanik committed Dec 10, 2024
1 parent b8ab7a2 commit e767e0d
Show file tree
Hide file tree
Showing 45 changed files with 491 additions and 417 deletions.
8 changes: 6 additions & 2 deletions experimental/web/sample_webgpu/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -666,15 +666,19 @@ static iree_status_t allocate_mappable_device_buffer(
"unable to allocate buffer of size %" PRIdsz,
data_length);
}
const iree_hal_buffer_placement_t placement = {
.device = device,
.queue_affinity = IREE_HAL_QUEUE_AFFINITY_ANY,
};
const iree_hal_buffer_params_t target_params = {
.usage = IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
.type =
IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
.access = IREE_HAL_MEMORY_ACCESS_ALL,
};
return iree_hal_webgpu_buffer_wrap(
device, iree_hal_device_allocator(device), target_params.type,
target_params.access, target_params.usage, data_length,
origin, target_params.type, target_params.access, target_params.usage,
data_length,
/*byte_offset=*/0,
/*byte_length=*/data_length, device_buffer_handle,
iree_allocator_system(), out_buffer);
Expand Down
24 changes: 11 additions & 13 deletions experimental/webgpu/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

typedef struct iree_hal_webgpu_buffer_t {
iree_hal_buffer_t base;
iree_hal_device_t* device; // unowned
iree_allocator_t host_allocator;
WGPUBuffer handle;
bool is_mapped;
} iree_hal_webgpu_buffer_t;
Expand All @@ -33,14 +33,12 @@ static iree_hal_webgpu_buffer_t* iree_hal_webgpu_buffer_cast(
}

iree_status_t iree_hal_webgpu_buffer_wrap(
iree_hal_device_t* device, iree_hal_allocator_t* device_allocator,
iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
iree_hal_memory_access_t allowed_access,
iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
iree_device_size_t byte_offset, iree_device_size_t byte_length,
WGPUBuffer handle, iree_allocator_t host_allocator,
iree_hal_buffer_t** out_buffer) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(device_allocator);
IREE_ASSERT_ARGUMENT(handle);
IREE_ASSERT_ARGUMENT(out_buffer);
*out_buffer = NULL;
Expand All @@ -50,11 +48,11 @@ iree_status_t iree_hal_webgpu_buffer_wrap(
iree_status_t status =
iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
if (iree_status_is_ok(status)) {
iree_hal_buffer_initialize(host_allocator, device_allocator, &buffer->base,
allocation_size, byte_offset, byte_length,
memory_type, allowed_access, allowed_usage,
iree_hal_buffer_initialize(placement, &buffer->base, allocation_size,
byte_offset, byte_length, memory_type,
allowed_access, allowed_usage,
&iree_hal_webgpu_buffer_vtable, &buffer->base);
buffer->device = device;
buffer->host_allocator = host_allocator;
buffer->handle = handle;
*out_buffer = &buffer->base;
}
Expand All @@ -65,7 +63,7 @@ iree_status_t iree_hal_webgpu_buffer_wrap(

static void iree_hal_webgpu_buffer_destroy(iree_hal_buffer_t* base_buffer) {
iree_hal_webgpu_buffer_t* buffer = iree_hal_webgpu_buffer_cast(base_buffer);
iree_allocator_t host_allocator = base_buffer->host_allocator;
iree_allocator_t host_allocator = buffer->host_allocator;
IREE_TRACE_ZONE_BEGIN(z0);

if (buffer->is_mapped) {
Expand Down Expand Up @@ -99,7 +97,7 @@ static iree_status_t iree_hal_webgpu_buffer_map_range(
// Use wgpuBufferMapAsync directly to avoid this emulation.
iree_hal_webgpu_buffer_t* buffer = iree_hal_webgpu_buffer_cast(base_buffer);
return iree_hal_buffer_emulated_map_range(
buffer->device, base_buffer, mapping_mode, memory_access,
buffer->placement.device, base_buffer, mapping_mode, memory_access,
local_byte_offset, local_byte_length, mapping);
}

Expand All @@ -109,8 +107,8 @@ static iree_status_t iree_hal_webgpu_buffer_unmap_range(
// WebGPU does not allow for synchronous buffer mapping.
// Use wgpuBufferMapAsync directly to avoid this emulation.
iree_hal_webgpu_buffer_t* buffer = iree_hal_webgpu_buffer_cast(base_buffer);
return iree_hal_buffer_emulated_unmap_range(buffer->device, base_buffer,
local_byte_offset,
return iree_hal_buffer_emulated_unmap_range(buffer->placement.device,
base_buffer, local_byte_offset,
local_byte_length, mapping);
}

Expand Down
4 changes: 2 additions & 2 deletions experimental/webgpu/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ extern "C" {
// we start to support pooling.

iree_status_t iree_hal_webgpu_buffer_wrap(
iree_hal_device_t* device, iree_hal_allocator_t* device_allocator,
iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
iree_hal_memory_access_t allowed_access,
iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
iree_device_size_t byte_offset, iree_device_size_t byte_length,
WGPUBuffer handle, iree_allocator_t host_allocator,
Expand Down
9 changes: 7 additions & 2 deletions experimental/webgpu/simple_allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,14 @@ static iree_status_t iree_hal_webgpu_simple_allocator_allocate_buffer(
allocation_size);
}

const iree_hal_buffer_placement_t placement = {
.device = allocator->device,
.queue_affinity = params->queue_affinity ? params->queue_affinity
: IREE_HAL_QUEUE_AFFINITY_ANY,
.flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
};
iree_status_t status = iree_hal_webgpu_buffer_wrap(
allocator->device, base_allocator, params->type, params->access,
params->usage, allocation_size,
placement, params->type, params->access, params->usage, allocation_size,
/*byte_offset=*/0,
/*byte_length=*/allocation_size, buffer_handle, allocator->host_allocator,
out_buffer);
Expand Down
2 changes: 2 additions & 0 deletions runtime/src/iree/hal/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,8 @@ IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_allocator_vtable_t);
IREE_API_EXPORT void iree_hal_allocator_destroy(
iree_hal_allocator_t* IREE_RESTRICT allocator);

// TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no
// longer requires the pooling_allocator on iree_hal_buffer_t.
IREE_API_EXPORT void iree_hal_allocator_deallocate_buffer(
iree_hal_allocator_t* IREE_RESTRICT allocator,
iree_hal_buffer_t* IREE_RESTRICT buffer);
Expand Down
21 changes: 15 additions & 6 deletions runtime/src/iree/hal/allocator_heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ static iree_status_t iree_hal_heap_allocator_allocate_buffer(
IREE_STATISTICS(statistics = &allocator->statistics);
iree_hal_buffer_t* buffer = NULL;
IREE_RETURN_IF_ERROR(iree_hal_heap_buffer_create(
base_allocator, statistics, &compat_params, allocation_size,
allocator->data_allocator, allocator->host_allocator, &buffer));
statistics, &compat_params, allocation_size, allocator->data_allocator,
allocator->host_allocator, &buffer));

*out_buffer = buffer;
return iree_ok_status();
Expand All @@ -219,6 +219,9 @@ static iree_status_t iree_hal_heap_allocator_import_buffer(
iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
iree_hal_buffer_release_callback_t release_callback,
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
iree_hal_heap_allocator_t* allocator =
iree_hal_heap_allocator_cast(base_allocator);

// Coerce options into those required for use by heap-based devices.
iree_hal_buffer_params_t compat_params = *params;
iree_device_size_t allocation_size = external_buffer->size;
Expand All @@ -243,11 +246,17 @@ static iree_status_t iree_hal_heap_allocator_import_buffer(
"external buffer type not supported");
}

const iree_hal_buffer_placement_t placement = {
.device = NULL,
.queue_affinity = compat_params.queue_affinity
? compat_params.queue_affinity
: IREE_HAL_QUEUE_AFFINITY_ANY,
.flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
};
return iree_hal_heap_buffer_wrap(
base_allocator, compat_params.type, compat_params.access,
compat_params.usage, external_buffer->size,
iree_make_byte_span(ptr, external_buffer->size), release_callback,
out_buffer);
placement, compat_params.type, compat_params.access, compat_params.usage,
external_buffer->size, iree_make_byte_span(ptr, external_buffer->size),
release_callback, allocator->host_allocator, out_buffer);
}

static iree_status_t iree_hal_heap_allocator_export_buffer(
Expand Down
Loading

0 comments on commit e767e0d

Please sign in to comment.