Skip to content

Commit

Permalink
Merge pull request #5501 from google/benvanik-hal-submit-and-wait
Browse files Browse the repository at this point in the history
Cleaning up HAL timeouts and tunneling iree_hal_device_submit_and_wait through the stack.
  • Loading branch information
benvanik authored Apr 19, 2021
2 parents 74be346 + 4d25359 commit 3c275fc
Show file tree
Hide file tree
Showing 32 changed files with 403 additions and 339 deletions.
100 changes: 93 additions & 7 deletions iree/base/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -771,20 +771,28 @@ iree_api_version_check(iree_api_version_t expected_version,
// iree_time_t and iree_duration_t
//===----------------------------------------------------------------------===//

// Like absl::Time, represented as nanoseconds since unix epoch.
// A point in time represented as nanoseconds since unix epoch.
// TODO(benvanik): pick something easy to get into/outof time_t/etc.
typedef int64_t iree_time_t;
// Like absl::InfinitePast.
// A time in the infinite past used to indicate "already happened".
// This forces APIs that wait for a point in time to act as a poll and always
// return IREE_STATUS_DEADLINE_EXCEEDED instead of blocking the caller.
#define IREE_TIME_INFINITE_PAST INT64_MIN
// Like absl::InfiniteFuture.
// A time in the infinite future used to indicate "never".
// This causes APIs that wait for a point in time to wait however long is needed
// to satisfy the wait condition.
#define IREE_TIME_INFINITE_FUTURE INT64_MAX

// Like absl::Duration, represented as relative nanoseconds.
// A duration represented as relative nanoseconds.
typedef int64_t iree_duration_t;
// Like absl::InfiniteDuration.
#define IREE_DURATION_INFINITE INT64_MAX
// Like absl::ZeroDuration.
// A zero-length duration.
// Like IREE_TIME_INFINITE_FUTURE this forces APIs that would wait to instead
// return IREE_STATUS_DEADLINE_EXCEEDED immediately.
#define IREE_DURATION_ZERO 0
// An infinite-length duration.
// Like IREE_TIME_INFINITE_FUTURE this causes APIs that wait to do so until
// their wait condition is satisfied without returning early.
#define IREE_DURATION_INFINITE INT64_MAX

// Returns the current system time in unix nanoseconds.
// Depending on the system architecture and power mode this time may have a
Expand All @@ -806,6 +814,84 @@ iree_relative_timeout_to_deadline_ns(iree_duration_t timeout_ns);
IREE_API_EXPORT iree_duration_t
iree_absolute_deadline_to_timeout_ns(iree_time_t deadline_ns);

typedef enum {
// Timeout is defined by an absolute value `deadline_ns`.
IREE_TIMEOUT_ABSOLUTE = 0,
// Timeout is defined by a relative value `timeout_ns`.
IREE_TIMEOUT_RELATIVE = 1,
} iree_timeout_type_t;

// A timeout defined either by an absolute or relative value.
typedef struct {
iree_timeout_type_t type;
iree_time_t nanos;
} iree_timeout_t;

// Returns a timeout that will be exceeded immediately.
// This can be used with APIs that would otherwise wait to cause them to poll.
//
// Example:
// status = iree_wait_for_signal_or_timeout(&obj, iree_immediate_timeout());
// if (iree_status_is_deadline_exceeded(status)) {
// // Would have waited indicating the signal has not occurred. If the
// // timeout was not immediate the call would have blocked the caller.
// }
static inline iree_timeout_t iree_immediate_timeout() {
iree_timeout_t timeout = {IREE_TIMEOUT_ABSOLUTE, IREE_TIME_INFINITE_PAST};
return timeout;
}

// Returns a timeout that will never be reached.
// This can be used with APIs that can wait to disable the early
// deadline-exceeded returns when a condition is not met. It should be used with
// care as it can complicate program state and make termination more prone to
// hangs. On the other hand, it's really useful to not bother with actual
// deadlines. YMMV.
static inline iree_timeout_t iree_infinite_timeout() {
iree_timeout_t timeout = {IREE_TIMEOUT_ABSOLUTE, IREE_TIME_INFINITE_FUTURE};
return timeout;
}

// Defines an absolute timeout with the given time in nanoseconds.
static inline iree_timeout_t iree_make_deadline(iree_time_t deadline_ns) {
iree_timeout_t timeout = {IREE_TIMEOUT_ABSOLUTE, deadline_ns};
return timeout;
}

// Defines a relative timeout with the given time in nanoseconds.
static inline iree_timeout_t iree_make_timeout(iree_duration_t timeout_ns) {
iree_timeout_t timeout = {IREE_TIMEOUT_RELATIVE, timeout_ns};
return timeout;
}

// Converts a timeout from relative to absolute (if it is).
//
// Absolute timeouts (deadlines) are better for long-running tasks or when
// making calls that may complete in stages as relative ones will tend to skew;
// if a wait is performed with a relative timeout of 10ms but it takes 5ms to
// get from the origin of the call to the actual wait using the timeout then
// the total latency of the call may be 15ms (5ms to prepare + 10ms on the
// wait). Instead if an absolute deadline is used the caller can ensure that
// the total time spent in the operation happens regardless of the intervening
// work that happens.
//
// For this reason IREE internal APIs try to convert to absolute times and users
// may be able to reduce overhead by populating the times as absolute to start
// with via iree_make_deadline.
static inline void iree_convert_timeout_to_absolute(iree_timeout_t* timeout) {
if (timeout->type == IREE_TIMEOUT_RELATIVE) {
timeout->type = IREE_TIMEOUT_ABSOLUTE;
timeout->nanos = iree_relative_timeout_to_deadline_ns(timeout->nanos);
}
}

// Returns an absolute deadline in nanoseconds from the given timeout.
static inline iree_time_t iree_timeout_as_deadline_ns(iree_timeout_t timeout) {
return timeout.type == IREE_TIMEOUT_ABSOLUTE
? timeout.nanos
: iree_relative_timeout_to_deadline_ns(timeout.nanos);
}

//===----------------------------------------------------------------------===//
// iree_allocator_t (std::allocator-like interface)
//===----------------------------------------------------------------------===//
Expand Down
4 changes: 2 additions & 2 deletions iree/hal/cts/cts_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ class CtsTestBase : public ::testing::TestWithParam<std::string> {
/*queue_affinity=*/0,
/*batch_count=*/1, &submission_batch);
if (iree_status_is_ok(status)) {
status = iree_hal_semaphore_wait_with_deadline(signal_semaphore, 1ull,
IREE_TIME_INFINITE_FUTURE);
status = iree_hal_semaphore_wait(signal_semaphore, 1ull,
iree_infinite_timeout());
}

iree_hal_semaphore_release(signal_semaphore);
Expand Down
4 changes: 2 additions & 2 deletions iree/hal/cts/event_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ TEST_P(EventTest, SubmitWithChainedCommandBuffers) {
iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
/*queue_affinity=*/0,
/*batch_count=*/1, &submission_batch));
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(
iree_hal_semaphore_wait(signal_semaphore, 1ull, iree_infinite_timeout()));

iree_hal_command_buffer_release(command_buffer_1);
iree_hal_command_buffer_release(command_buffer_2);
Expand Down
16 changes: 8 additions & 8 deletions iree/hal/cts/semaphore_submission_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ TEST_P(SemaphoreSubmissionTest, SubmitWithNoCommandBuffers) {
iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
/*queue_affinity=*/0,
/*batch_count=*/1, &submission_batch));
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(
iree_hal_semaphore_wait(signal_semaphore, 1ull, iree_infinite_timeout()));

iree_hal_semaphore_release(signal_semaphore);
}
Expand Down Expand Up @@ -83,8 +83,8 @@ TEST_P(SemaphoreSubmissionTest, SubmitAndSignal) {
iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
/*queue_affinity=*/0,
/*batch_count=*/1, &submission_batch));
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(
iree_hal_semaphore_wait(signal_semaphore, 1ull, iree_infinite_timeout()));

iree_hal_command_buffer_release(command_buffer);
iree_hal_semaphore_release(signal_semaphore);
Expand Down Expand Up @@ -132,8 +132,8 @@ TEST_P(SemaphoreSubmissionTest, SubmitWithWait) {

// Signal the wait semaphore, work should begin and complete.
IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore, 1ull));
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
signal_semaphore, 101ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_semaphore_wait(signal_semaphore, 101ull,
iree_infinite_timeout()));

iree_hal_command_buffer_release(command_buffer);
iree_hal_semaphore_release(wait_semaphore);
Expand Down Expand Up @@ -196,9 +196,9 @@ TEST_P(SemaphoreSubmissionTest, SubmitWithMultipleSemaphores) {
signal_semaphore_list.semaphores = signal_semaphore_ptrs;
uint64_t payload_values[] = {1ull, 1ull};
signal_semaphore_list.payload_values = payload_values;
IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ALL, &signal_semaphore_list,
IREE_TIME_INFINITE_FUTURE));
iree_infinite_timeout()));

iree_hal_command_buffer_release(command_buffer);
iree_hal_semaphore_release(wait_semaphore_1);
Expand Down
66 changes: 35 additions & 31 deletions iree/hal/cts/semaphore_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,19 @@ TEST_P(SemaphoreTest, Failure) {

// Tests waiting on no semaphores.
TEST_P(SemaphoreTest, EmptyWait) {
IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
device_, IREE_HAL_WAIT_MODE_ANY, NULL, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
device_, IREE_HAL_WAIT_MODE_ALL, NULL, IREE_TIME_INFINITE_FUTURE));

IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_timeout(
device_, IREE_HAL_WAIT_MODE_ANY, NULL, IREE_DURATION_INFINITE));
IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_timeout(
device_, IREE_HAL_WAIT_MODE_ALL, NULL, IREE_DURATION_INFINITE));
IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ANY, NULL,
iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ALL, NULL,
iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));

IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ANY, NULL,
iree_make_timeout(IREE_DURATION_INFINITE)));
IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ALL, NULL,
iree_make_timeout(IREE_DURATION_INFINITE)));
}

// Tests waiting on a semaphore that has already been signaled.
Expand All @@ -103,15 +107,15 @@ TEST_P(SemaphoreTest, DISABLED_WaitAlreadySignaled) {
IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 2ull, &semaphore));

// Test both previous and current values.
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
semaphore, 2ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
semaphore, 1ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
semaphore, 2ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));

IREE_ASSERT_OK(iree_hal_semaphore_wait_with_timeout(semaphore, 1ull,
IREE_DURATION_INFINITE));
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_timeout(semaphore, 2ull,
IREE_DURATION_INFINITE));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
semaphore, 1ull, iree_make_timeout(IREE_DURATION_INFINITE)));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
semaphore, 2ull, iree_make_timeout(IREE_DURATION_INFINITE)));

iree_hal_semaphore_release(semaphore);
}
Expand All @@ -124,8 +128,8 @@ TEST_P(SemaphoreTest, WaitUnsignaled) {
// NOTE: we don't actually block here because otherwise we'd lock up.
// Result status is undefined - some backends may return DeadlineExceededError
// while others may return success.
IREE_IGNORE_ERROR(iree_hal_semaphore_wait_with_deadline(
semaphore, 3ull, IREE_TIME_INFINITE_PAST));
IREE_IGNORE_ERROR(iree_hal_semaphore_wait(
semaphore, 3ull, iree_make_deadline(IREE_TIME_INFINITE_PAST)));

iree_hal_semaphore_release(semaphore);
}
Expand All @@ -150,9 +154,9 @@ TEST_P(SemaphoreTest, WaitAllButNotAllSignaled) {
// NOTE: we don't actually block here because otherwise we'd lock up.
// Result status is undefined - some backends may return DeadlineExceededError
// while others may return success.
IREE_IGNORE_ERROR(iree_hal_device_wait_semaphores_with_deadline(
IREE_IGNORE_ERROR(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ALL, &semaphore_list,
IREE_TIME_INFINITE_PAST));
iree_make_deadline(IREE_TIME_INFINITE_PAST)));

iree_hal_semaphore_release(semaphore_a);
iree_hal_semaphore_release(semaphore_b);
Expand All @@ -175,9 +179,9 @@ TEST_P(SemaphoreTest, WaitAllAndAllSignaled) {
// NOTE: we don't actually block here because otherwise we'd lock up.
// Result status is undefined - some backends may return DeadlineExceededError
// while others may return success.
IREE_IGNORE_ERROR(iree_hal_device_wait_semaphores_with_deadline(
IREE_IGNORE_ERROR(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ALL, &semaphore_list,
IREE_TIME_INFINITE_FUTURE));
iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));

iree_hal_semaphore_release(semaphore_a);
iree_hal_semaphore_release(semaphore_b);
Expand All @@ -198,9 +202,9 @@ TEST_P(SemaphoreTest, DISABLED_WaitAny) {
uint64_t payload_values[] = {1ull, 1ull};
semaphore_list.payload_values = payload_values;

IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
IREE_ASSERT_OK(iree_hal_device_wait_semaphores(
device_, IREE_HAL_WAIT_MODE_ANY, &semaphore_list,
IREE_TIME_INFINITE_FUTURE));
iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));

iree_hal_semaphore_release(semaphore_a);
iree_hal_semaphore_release(semaphore_b);
Expand All @@ -215,16 +219,16 @@ TEST_P(SemaphoreTest, PingPong) {
IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &b2a));
std::thread thread([&]() {
// Should advance right past this because the value is already set.
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
a2b, 0ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
a2b, 0ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
IREE_ASSERT_OK(iree_hal_semaphore_signal(b2a, 1ull));
// Jump ahead (blocking at first).
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
a2b, 4ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
a2b, 4ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
});
// Block until thread signals.
IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
b2a, 1ull, IREE_TIME_INFINITE_FUTURE));
IREE_ASSERT_OK(iree_hal_semaphore_wait(
b2a, 1ull, iree_make_deadline(IREE_TIME_INFINITE_FUTURE)));
IREE_ASSERT_OK(iree_hal_semaphore_signal(a2b, 4ull));
thread.join();

Expand Down
42 changes: 20 additions & 22 deletions iree/hal/cuda/cuda_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,23 +251,30 @@ static iree_status_t iree_hal_cuda_device_queue_submit(
return iree_ok_status();
}

static iree_status_t iree_hal_cuda_device_wait_semaphores_with_timeout(
iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
const iree_hal_semaphore_list_t* semaphore_list,
iree_duration_t timeout_ns) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"semaphore not implemented");
static iree_status_t iree_hal_cuda_device_submit_and_wait(
iree_hal_device_t* base_device,
iree_hal_command_category_t command_categories,
iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count,
const iree_hal_submission_batch_t* batches,
iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value,
iree_timeout_t timeout) {
// Submit...
IREE_RETURN_IF_ERROR(iree_hal_cuda_device_queue_submit(
base_device, command_categories, queue_affinity, batch_count, batches));

// ...and wait.
return iree_hal_semaphore_wait(wait_semaphore, wait_value, timeout);
}

static iree_status_t iree_hal_cuda_device_wait_semaphores_with_deadline(
static iree_status_t iree_hal_cuda_device_wait_semaphores(
iree_hal_device_t* base_device, iree_hal_wait_mode_t wait_mode,
const iree_hal_semaphore_list_t* semaphore_list, iree_time_t deadline_ns) {
const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"semaphore not implemented");
}

static iree_status_t iree_hal_cuda_device_wait_idle_with_deadline(
iree_hal_device_t* base_device, iree_time_t deadline_ns) {
static iree_status_t iree_hal_cuda_device_wait_idle(
iree_hal_device_t* base_device, iree_timeout_t timeout) {
iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
// Wait until the stream is done.
// TODO(thomasraoux): CUDA doesn't support a deadline for wait, figure out how
Expand All @@ -278,12 +285,6 @@ static iree_status_t iree_hal_cuda_device_wait_idle_with_deadline(
return iree_ok_status();
}

static iree_status_t iree_hal_cuda_device_wait_idle_with_timeout(
iree_hal_device_t* base_device, iree_duration_t timeout_ns) {
return iree_hal_cuda_device_wait_idle_with_deadline(
base_device, iree_relative_timeout_to_deadline_ns(timeout_ns));
}

const iree_hal_device_vtable_t iree_hal_cuda_device_vtable = {
.destroy = iree_hal_cuda_device_destroy,
.id = iree_hal_cuda_device_id,
Expand All @@ -299,10 +300,7 @@ const iree_hal_device_vtable_t iree_hal_cuda_device_vtable = {
.create_executable_layout = iree_hal_cuda_device_create_executable_layout,
.create_semaphore = iree_hal_cuda_device_create_semaphore,
.queue_submit = iree_hal_cuda_device_queue_submit,
.wait_semaphores_with_deadline =
iree_hal_cuda_device_wait_semaphores_with_deadline,
.wait_semaphores_with_timeout =
iree_hal_cuda_device_wait_semaphores_with_timeout,
.wait_idle_with_deadline = iree_hal_cuda_device_wait_idle_with_deadline,
.wait_idle_with_timeout = iree_hal_cuda_device_wait_idle_with_timeout,
.submit_and_wait = iree_hal_cuda_device_submit_and_wait,
.wait_semaphores = iree_hal_cuda_device_wait_semaphores,
.wait_idle = iree_hal_cuda_device_wait_idle,
};
Loading

0 comments on commit 3c275fc

Please sign in to comment.