Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic event pools #771

Merged
merged 19 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ int main() {

// Wait until all the threads finish their execution
for (int i = 0; i < numThreads; i++) {
std::cout << "Joining Tid#" << i << "\n";
T[i].join();
}

Expand Down
67 changes: 44 additions & 23 deletions scripts/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import subprocess
import hashlib
import time


parser = argparse.ArgumentParser(
Expand All @@ -14,13 +15,17 @@
parser.add_argument('device_type', type=str, choices=['cpu', 'igpu', 'dgpu'], help='Device type')
parser.add_argument('backend', type=str, choices=['opencl', 'level0-reg', 'level0-imm', 'pocl'], help='Backend to use')
parser.add_argument('--num-threads', type=int, nargs='?', default=os.cpu_count(), help='Number of threads to use (default: number of cores on the system)')
parser.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)')
parser.add_argument('--timeout', type=int, nargs='?', default=200, help='Timeout in seconds (default: 200)')
parser.add_argument('-m', '--modules', type=str, choices=['on', 'off'], default="off", help='load modulefiles automatically (default: off)')
parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
parser.add_argument('-d', '--dry-run', '-N', action='store_true', help='dry run')
parser.add_argument('-c', '--categories', action='store_true', help='run tests by categories, including running a set of tests in a single thread')

# --total-runtime cannot be used with --num-tries
group = parser.add_mutually_exclusive_group()
group.add_argument('--total-runtime', type=int, nargs='?', default=0, help='Set --num-tries such that the total runtime is approximately this value in minutes')
group.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)')

args = parser.parse_args()

# execute a command and return the output along with the return code
Expand Down Expand Up @@ -110,28 +115,44 @@ def run_cmd(cmd):
else:
texture_cmd = ""



if args.categories:
cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"

res_deviceFunc, err = run_cmd(cmd_deviceFunc)
res_graph, err = run_cmd(cmd_graph)
res_single, err = run_cmd(cmd_single)
res_other, err = run_cmd(cmd_other)

if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other:
exit(0)
def run_tests(num_tries):
if args.categories:
cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"

res_deviceFunc, err = run_cmd(cmd_deviceFunc)
res_graph, err = run_cmd(cmd_graph)
res_single, err = run_cmd(cmd_single)
res_other, err = run_cmd(cmd_other)

if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other:
exit(0)
else:
exit(1)
else:
exit(1)
cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{args.backend}.txt"
res, err = run_cmd(cmd)
return res, err


# if --total-runtime is set, calculate the number of tries by running run_tests and checking the time
num_tries = 1
if args.total_runtime:
t_start = time.time()
run_tests(1)
t_end = time.time()
# calculate the total time
total_time = t_end - t_start
# calculate the number of tries
num_tries = int(args.total_runtime * 60 / total_time)
print(f"Running tests {num_tries} times to get a total runtime of {args.total_runtime} minutes")
else:
cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{args.backend}.txt"
num_tries = args.num_tries

res, err = run_cmd(cmd)
if "0 tests failed" in res:
exit(0)
else:
exit(1)
res, err = run_tests(num_tries)
if "0 tests failed" in res:
exit(0)
else:
exit(1)
57 changes: 27 additions & 30 deletions src/CHIPBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,20 @@ chipstar::Event::Event(chipstar::Context *Ctx, chipstar::EventFlags Flags)
: EventStatus_(EVENT_STATUS_INIT), Flags_(Flags), ChipContext_(Ctx),
Msg("") {}

void chipstar::Event::addDependency(
const std::shared_ptr<chipstar::Event> &Event) {
isDeletedSanityCheck();
logDebug("Event {} Msg {} now depends on event {} msg:{}", (void *)this, Msg,
(void *)Event.get(), Event->Msg);
DependsOnList.push_back(Event);
}

void chipstar::Event::releaseDependencies() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
LOCK(EventMtx); // chipstar::Event::DependsOnList
for (auto &Dep : DependsOnList)
logDebug("Event {} msg: {} no longer depends on event {}", (void *)this,
Msg, (void *)Dep.get(), Dep->Msg);
DependsOnList.clear();
}

Expand Down Expand Up @@ -1173,9 +1184,7 @@ void chipstar::Backend::trackEvent(
const std::shared_ptr<chipstar::Event> &Event) {
LOCK(::Backend->EventsMtx); // trackImpl Backend::Events
LOCK(Event->EventMtx); // writing bool chipstar::Event::TrackCalled_
// assert(!isUserEvent() && "Attemped to track a user event!");
// assert(!Deleted_ && "chipstar::Event use after delete!");
// assert(!TrackCalled_ && "chipstar::Event already tracked!");
Event->isDeletedSanityCheck();

logDebug("Tracking chipstar::Event {} in Backend::Events", (void *)this);
assert(!Event->isTrackCalled());
Expand Down Expand Up @@ -1483,8 +1492,11 @@ chipstar::Queue::getSyncQueuesLastEvents() {
LOCK(Dev->DeviceMtx); // chipstar::Device::ChipQueues_ via getQueuesNoLock()

std::vector<std::shared_ptr<chipstar::Event>> EventsToWaitOn;
if (this->getLastEvent())
EventsToWaitOn.push_back(this->getLastEvent());
auto thisLastEvent = this->getLastEvent();
if (thisLastEvent) {
thisLastEvent->isDeletedSanityCheck();
EventsToWaitOn.push_back(thisLastEvent);
}

// If this stream is default legacy stream, sync with all other streams on
// this device
Expand Down Expand Up @@ -1585,13 +1597,9 @@ void chipstar::Queue::memCopyAsync2D(void *Dst, size_t DPitch, const void *Src,

// perform the copy
for (size_t i = 0; i < Height; ++i) {
// capture the event on last iteration
if (i == Height - 1) {
ChipEvent = memCopyAsyncImpl(Dst, Src, Width);
ChipEvent->Msg = "memCopyAsync2D";
} else {
memCopyAsyncImpl(Dst, Src, Width);
}
ChipEvent = memCopyAsyncImpl(Dst, Src, Width);
ChipEvent->Msg = "memCopyAsync2D";
::Backend->trackEvent(ChipEvent);
Src = (char *)Src + SPitch;
Dst = (char *)Dst + DPitch;
}
Expand All @@ -1600,9 +1608,6 @@ void chipstar::Queue::memCopyAsync2D(void *Dst, size_t DPitch, const void *Src,
this->MemMap(AllocInfoDst, chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE);
if (AllocInfoSrc && AllocInfoSrc->MemoryType == hipMemoryTypeHost)
this->MemMap(AllocInfoSrc, chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE);

if (ChipEvent)
::Backend->trackEvent(ChipEvent);
}

void chipstar::Queue::memFill(void *Dst, size_t Size, const void *Pattern,
Expand Down Expand Up @@ -1637,13 +1642,9 @@ void chipstar::Queue::memFillAsync2D(void *Dst, size_t Pitch, int Value,
for (size_t i = 0; i < Height; i++) {
auto Offset = Pitch * i;
char *DstP = (char *)Dst;
// capture the returned event on last iteration, otherwise don't
if (i == Height - 1) {
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync2D";
::Backend->trackEvent(ChipEvent);
} else
memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync2D";
::Backend->trackEvent(ChipEvent);
}
}

Expand All @@ -1663,13 +1664,9 @@ void chipstar::Queue::memFillAsync3D(hipPitchedPtr PitchedDevPtr, int Value,
size_t SizeBytes = Width;
auto Offset = i * (Pitch * PitchedDevPtr.ysize) + j * Pitch;
char *DstP = (char *)Dst;
// capture the returned event on last iteration, otherwise don't
if (i == Depth - 1 && j == Height - 1) {
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync3D";
::Backend->trackEvent(ChipEvent);
} else
memFillAsync(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1);
ChipEvent->Msg = "memFillAsync3D";
::Backend->trackEvent(ChipEvent);
}
}

Expand Down
43 changes: 22 additions & 21 deletions src/CHIPBackend.hh
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,7 @@ protected:
chipstar::EventFlags Flags_;
std::vector<std::shared_ptr<chipstar::Event>> DependsOnList;

#ifndef NDEBUG
// A debug flag for cathing use-after-delete.
bool Deleted_ = false;
#endif

/**
* @brief Events are always created with a context
Expand All @@ -660,18 +657,19 @@ protected:

public:
void setRecording() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
EventStatus_ = EVENT_STATUS_RECORDING;
}
void markTracked() { TrackCalled_ = true; }
bool isTrackCalled() { return TrackCalled_; }
void setTrackCalled(bool Val) { TrackCalled_ = Val; }
bool isUserEvent() { return UserEvent_; }
void setUserEvent(bool Val) { UserEvent_ = Val; }
void addDependency(const std::shared_ptr<chipstar::Event> &Event) {
assert(!Deleted_ && "Event use after delete!");
DependsOnList.push_back(Event);
}
/// @brief Add an event on which this event depends, preventing that event
/// from getting recycled
/// @param Event
void addDependency(const std::shared_ptr<chipstar::Event> &Event);
/// @brief Release dependencies, allowing them to be recycled
void releaseDependencies();
chipstar::EventFlags getFlags() { return Flags_; }
std::mutex EventMtx;
Expand All @@ -690,7 +688,7 @@ public:
* @return Context* pointer to context on which this event was created
*/
chipstar::Context *getContext() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
return ChipContext_;
}

Expand All @@ -711,7 +709,7 @@ public:
* @return false event is in init or invalid state
*/
bool isRecordingOrRecorded() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
return EventStatus_ >= EVENT_STATUS_RECORDING;
}

Expand All @@ -722,7 +720,7 @@ public:
* @return false not recorded
*/
bool isFinished() {
assert(!Deleted_ && "chipstar::Event use after delete!");
isDeletedSanityCheck();
return (EventStatus_ == EVENT_STATUS_RECORDED);
}

Expand Down Expand Up @@ -769,16 +767,19 @@ public:
*/
virtual void hostSignal() = 0;

#ifndef NDEBUG
void markDeleted(bool State = true) {
LOCK(EventMtx); // Deleted_
#ifndef NDEBUG
Deleted_ = State;
#endif
}
bool isDeleted() {
LOCK(EventMtx); // Deleted_
return Deleted_;
}
void isDeletedSanityCheck() {
#ifndef NDEBUG
if (Deleted_) {
logError("chipstar::Event use after delete!");
std::abort();
}
#endif
}
};

class Program {
Expand Down Expand Up @@ -1772,8 +1773,7 @@ public:
*/
class Backend {
protected:
chipstar::EventMonitor *CallbackEventMonitor_ = nullptr;
chipstar::EventMonitor *StaleEventMonitor_ = nullptr;
chipstar::EventMonitor *EventMonitor_ = nullptr;

int MinQueuePriority_;
int MaxQueuePriority_ = 0;
Expand Down Expand Up @@ -2023,8 +2023,7 @@ public:
createCallbackData(hipStreamCallback_t Callback, void *UserData,
chipstar::Queue *ChipQ) = 0;

virtual chipstar::EventMonitor *createCallbackEventMonitor_() = 0;
virtual chipstar::EventMonitor *createStaleEventMonitor_() = 0;
virtual chipstar::EventMonitor *createEventMonitor_() = 0;

/* event interop */
virtual hipEvent_t getHipEvent(void *NativeEvent) = 0;
Expand Down Expand Up @@ -2127,6 +2126,8 @@ public:

virtual std::shared_ptr<chipstar::Event> getLastEvent() {
LOCK(LastEventMtx); // Queue::LastEvent_
if (LastEvent_)
LastEvent_->isDeletedSanityCheck();
return LastEvent_;
}

Expand Down
Loading
Loading