Skip to content

Commit

Permalink
Merge pull request #771 from CHIP-SPV/dynamic-event-pools
Browse files Browse the repository at this point in the history
Dynamic event pools
  • Loading branch information
pvelesko authored Feb 20, 2024
2 parents c6c22d2 + 6e1382d commit d1be342
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 287 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ int main() {

// Wait until all the threads finish their execution
for (int i = 0; i < numThreads; i++) {
std::cout << "Joining Tid#" << i << "\n";
T[i].join();
}

Expand Down
67 changes: 44 additions & 23 deletions scripts/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import subprocess
import hashlib
import time


parser = argparse.ArgumentParser(
Expand All @@ -14,13 +15,17 @@
parser.add_argument('device_type', type=str, choices=['cpu', 'igpu', 'dgpu'], help='Device type')
parser.add_argument('backend', type=str, choices=['opencl', 'level0-reg', 'level0-imm', 'pocl'], help='Backend to use')
parser.add_argument('--num-threads', type=int, nargs='?', default=os.cpu_count(), help='Number of threads to use (default: number of cores on the system)')
parser.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)')
parser.add_argument('--timeout', type=int, nargs='?', default=200, help='Timeout in seconds (default: 200)')
parser.add_argument('-m', '--modules', type=str, choices=['on', 'off'], default="off", help='load modulefiles automatically (default: off)')
parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
parser.add_argument('-d', '--dry-run', '-N', action='store_true', help='dry run')
parser.add_argument('-c', '--categories', action='store_true', help='run tests by categories, including running a set of tests in a single thread')

# --total-runtime cannot be used with --num-tries
group = parser.add_mutually_exclusive_group()
group.add_argument('--total-runtime', type=int, nargs='?', default=0, help='Set --num-tries such that the total runtime is approximately this value in minutes')
group.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)')

args = parser.parse_args()

# execute a command and return the output along with the return code
Expand Down Expand Up @@ -110,28 +115,44 @@ def run_cmd(cmd):
else:
texture_cmd = ""



if args.categories:
cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"

res_deviceFunc, err = run_cmd(cmd_deviceFunc)
res_graph, err = run_cmd(cmd_graph)
res_single, err = run_cmd(cmd_single)
res_other, err = run_cmd(cmd_other)

if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other:
exit(0)
def run_tests(num_tries):
if args.categories:
cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"

res_deviceFunc, err = run_cmd(cmd_deviceFunc)
res_graph, err = run_cmd(cmd_graph)
res_single, err = run_cmd(cmd_single)
res_other, err = run_cmd(cmd_other)

if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other:
exit(0)
else:
exit(1)
else:
exit(1)
cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{args.backend}.txt"
res, err = run_cmd(cmd)
return res, err


# if --total-runtime is set, calculate the number of tries by running run_tests and checking the time
num_tries = 1
if args.total_runtime:
t_start = time.time()
run_tests(1)
t_end = time.time()
# calculate the total time
total_time = t_end - t_start
# calculate the number of tries
num_tries = int(args.total_runtime * 60 / total_time)
print(f"Running tests {num_tries} times to get a total runtime of {args.total_runtime} minutes")
else:
cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{args.backend}.txt"
num_tries = args.num_tries

res, err = run_cmd(cmd)
if "0 tests failed" in res:
exit(0)
else:
exit(1)
res, err = run_tests(num_tries)
if "0 tests failed" in res:
exit(0)
else:
exit(1)
57 changes: 27 additions & 30 deletions src/CHIPBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,20 @@ chipstar::Event::Event(chipstar::Context *Ctx, chipstar::EventFlags Flags)
: EventStatus_(EVENT_STATUS_INIT), Flags_(Flags), ChipContext_(Ctx),
Msg("") {}

void chipstar::Event::addDependency(
const std::shared_ptr<chipstar::Event> &Event) {
isDeletedSanityCheck();
logDebug("Event {} Msg {} now depends on event {} msg:{}", (void *)this, Msg,
(void *)Event.get(), Event->Msg);
DependsOnList.push_back(Event);
}

void chipstar::Event::releaseDependencies() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
LOCK(EventMtx); // chipstar::Event::DependsOnList
for (auto &Dep : DependsOnList)
logDebug("Event {} msg: {} no longer depends on event {}", (void *)this,
Msg, (void *)Dep.get(), Dep->Msg);
DependsOnList.clear();
}

Expand Down Expand Up @@ -1173,9 +1184,7 @@ void chipstar::Backend::trackEvent(
const std::shared_ptr<chipstar::Event> &Event) {
LOCK(::Backend->EventsMtx); // trackImpl Backend::Events
LOCK(Event->EventMtx); // writing bool chipstar::Event::TrackCalled_
// assert(!isUserEvent() && "Attemped to track a user event!");
// assert(!Deleted_ && "chipstar::Event use after delete!");
// assert(!TrackCalled_ && "chipstar::Event already tracked!");
Event->isDeletedSanityCheck();

logDebug("Tracking chipstar::Event {} in Backend::Events", (void *)this);
assert(!Event->isTrackCalled());
Expand Down Expand Up @@ -1483,8 +1492,11 @@ chipstar::Queue::getSyncQueuesLastEvents() {
LOCK(Dev->DeviceMtx); // chipstar::Device::ChipQueues_ via getQueuesNoLock()

std::vector<std::shared_ptr<chipstar::Event>> EventsToWaitOn;
if (this->getLastEvent())
EventsToWaitOn.push_back(this->getLastEvent());
auto thisLastEvent = this->getLastEvent();
if (thisLastEvent) {
thisLastEvent->isDeletedSanityCheck();
EventsToWaitOn.push_back(thisLastEvent);
}

// If this stream is default legacy stream, sync with all other streams on
// this device
Expand Down Expand Up @@ -1585,13 +1597,9 @@ void chipstar::Queue::memCopyAsync2D(void *Dst, size_t DPitch, const void *Src,

// perform the copy
for (size_t i = 0; i < Height; ++i) {
// capture the event on last iteration
if (i == Height - 1) {
ChipEvent = memCopyAsyncImpl(Dst, Src, Width);
ChipEvent->Msg = "memCopyAsync2D";
} else {
memCopyAsyncImpl(Dst, Src, Width);
}
ChipEvent = memCopyAsyncImpl(Dst, Src, Width);
ChipEvent->Msg = "memCopyAsync2D";
::Backend->trackEvent(ChipEvent);
Src = (char *)Src + SPitch;
Dst = (char *)Dst + DPitch;
}
Expand All @@ -1600,9 +1608,6 @@ void chipstar::Queue::memCopyAsync2D(void *Dst, size_t DPitch, const void *Src,
this->MemMap(AllocInfoDst, chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE);
if (AllocInfoSrc && AllocInfoSrc->MemoryType == hipMemoryTypeHost)
this->MemMap(AllocInfoSrc, chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE);

if (ChipEvent)
::Backend->trackEvent(ChipEvent);
}

void chipstar::Queue::memFill(void *Dst, size_t Size, const void *Pattern,
Expand Down Expand Up @@ -1637,13 +1642,9 @@ void chipstar::Queue::memFillAsync2D(void *Dst, size_t Pitch, int Value,
for (size_t i = 0; i < Height; i++) {
auto Offset = Pitch * i;
char *DstP = (char *)Dst;
// capture the returned event on last iteration, otherwise don't
if (i == Height - 1) {
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync2D";
::Backend->trackEvent(ChipEvent);
} else
memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync2D";
::Backend->trackEvent(ChipEvent);
}
}

Expand All @@ -1663,13 +1664,9 @@ void chipstar::Queue::memFillAsync3D(hipPitchedPtr PitchedDevPtr, int Value,
size_t SizeBytes = Width;
auto Offset = i * (Pitch * PitchedDevPtr.ysize) + j * Pitch;
char *DstP = (char *)Dst;
// capture the returned event on last iteration, otherwise don't
if (i == Depth - 1 && j == Height - 1) {
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync3D";
::Backend->trackEvent(ChipEvent);
} else
memFillAsync(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync3D";
::Backend->trackEvent(ChipEvent);
}
}

Expand Down
43 changes: 22 additions & 21 deletions src/CHIPBackend.hh
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,7 @@ protected:
chipstar::EventFlags Flags_;
std::vector<std::shared_ptr<chipstar::Event>> DependsOnList;

#ifndef NDEBUG
// A debug flag for cathing use-after-delete.
bool Deleted_ = false;
#endif

/**
* @brief Events are always created with a context
Expand All @@ -660,18 +657,19 @@ protected:

public:
void setRecording() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
EventStatus_ = EVENT_STATUS_RECORDING;
}
void markTracked() { TrackCalled_ = true; }
bool isTrackCalled() { return TrackCalled_; }
void setTrackCalled(bool Val) { TrackCalled_ = Val; }
bool isUserEvent() { return UserEvent_; }
void setUserEvent(bool Val) { UserEvent_ = Val; }
void addDependency(const std::shared_ptr<chipstar::Event> &Event) {
assert(!Deleted_ && "Event use after delete!");
DependsOnList.push_back(Event);
}
/// @brief Add an event on which this event depends, preventing that event
/// from getting recycled
/// @param Event
void addDependency(const std::shared_ptr<chipstar::Event> &Event);
/// @brief Release dependencies, allowing them to be recycled
void releaseDependencies();
chipstar::EventFlags getFlags() { return Flags_; }
std::mutex EventMtx;
Expand All @@ -690,7 +688,7 @@ public:
* @return Context* pointer to context on which this event was created
*/
chipstar::Context *getContext() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
return ChipContext_;
}

Expand All @@ -711,7 +709,7 @@ public:
* @return false event is in init or invalid state
*/
bool isRecordingOrRecorded() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
return EventStatus_ >= EVENT_STATUS_RECORDING;
}

Expand All @@ -722,7 +720,7 @@ public:
* @return false not recorded
*/
bool isFinished() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
return (EventStatus_ == EVENT_STATUS_RECORDED);
}

Expand Down Expand Up @@ -769,16 +767,19 @@ public:
*/
virtual void hostSignal() = 0;

#ifndef NDEBUG
void markDeleted(bool State = true) {
LOCK(EventMtx); // Deleted_
#ifndef NDEBUG
Deleted_ = State;
#endif
}
bool isDeleted() {
LOCK(EventMtx); // Deleted_
return Deleted_;
}
void isDeletedSanityCheck() {
#ifndef NDEBUG
if (Deleted_) {
logError("chipstar::Event use after delete!");
std::abort();
}
#endif
}
};

class Program {
Expand Down Expand Up @@ -1772,8 +1773,7 @@ public:
*/
class Backend {
protected:
chipstar::EventMonitor *CallbackEventMonitor_ = nullptr;
chipstar::EventMonitor *StaleEventMonitor_ = nullptr;
chipstar::EventMonitor *EventMonitor_ = nullptr;

int MinQueuePriority_;
int MaxQueuePriority_ = 0;
Expand Down Expand Up @@ -2023,8 +2023,7 @@ public:
createCallbackData(hipStreamCallback_t Callback, void *UserData,
chipstar::Queue *ChipQ) = 0;

virtual chipstar::EventMonitor *createCallbackEventMonitor_() = 0;
virtual chipstar::EventMonitor *createStaleEventMonitor_() = 0;
virtual chipstar::EventMonitor *createEventMonitor_() = 0;

/* event interop */
virtual hipEvent_t getHipEvent(void *NativeEvent) = 0;
Expand Down Expand Up @@ -2127,6 +2126,8 @@ public:

virtual std::shared_ptr<chipstar::Event> getLastEvent() {
LOCK(LastEventMtx); // Queue::LastEvent_
if (LastEvent_)
LastEvent_->isDeletedSanityCheck();
return LastEvent_;
}

Expand Down
Loading

0 comments on commit d1be342

Please sign in to comment.