-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add USDTs for the task runtime #43453
Conversation
Maybe some of what @kpamnany did in #36870 makes sense as USDTs too? Also, it'd be great if we can record what's going on in the multi-queue. I don't know if it's the correct way to do it, but maybe something like this? JL_UV_UNLOCK();
diff --git a/src/partr.c b/src/partr.c
index 048a841158..75eb367b09 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -142,6 +142,7 @@ static inline int multiq_insert(jl_task_t *task, int16_t priority)
if (jl_atomic_load_relaxed(&heaps[rn].ntasks) >= tasks_per_heap) {
uv_mutex_unlock(&heaps[rn].lock);
// multiq insertion failed, increase #tasks per heap
+ JL_PROBE_RT_MULTIQ_INSERT_FULL(ptls->tid);
return -1;
}
@@ -154,6 +155,7 @@ static inline int multiq_insert(jl_task_t *task, int16_t priority)
jl_atomic_store_relaxed(&heaps[rn].prio, task->prio);
uv_mutex_unlock(&heaps[rn].lock);
+ JL_PROBE_RT_MULTIQ_INSERT_SUCCESS(ptls->tid);
return 0;
}
@@ -179,17 +181,22 @@ static inline jl_task_t *multiq_deletemin(void)
else if (prio1 == prio2 && prio1 == INT16_MAX)
continue;
if (uv_mutex_trylock(&heaps[rn1].lock) == 0) {
- if (prio1 == jl_atomic_load_relaxed(&heaps[rn1].prio))
+ if (prio1 == jl_atomic_load_relaxed(&heaps[rn1].prio)) {
+ JL_PROBE_RT_MULTIQ_DELETEMIN_TRYLOCK_SUCCESS(ptls->tid, i);
break;
+ }
uv_mutex_unlock(&heaps[rn1].lock);
}
}
- if (i == heap_p)
+ if (i == heap_p) {
+ JL_PROBE_RT_MULTIQ_DELETEMIN_EMPTYISH(ptls->tid);
return NULL;
+ }
task = heaps[rn1].tasks[0];
if (!jl_set_task_tid(task, ptls->tid)) {
uv_mutex_unlock(&heaps[rn1].lock);
+ JL_PROBE_RT_MULTIQ_DELETEMIN_TASKLOCK_FAILED(ptls->tid);
goto retry;
}
int32_t ntasks = jl_atomic_load_relaxed(&heaps[rn1].ntasks) - 1;
@@ -204,6 +211,7 @@ static inline jl_task_t *multiq_deletemin(void)
jl_atomic_store_relaxed(&heaps[rn1].prio, prio1);
uv_mutex_unlock(&heaps[rn1].lock);
+ JL_PROBE_RT_MULTIQ_DELETEMIN_SUCCESS(ptls->tid, task);
return task;
} I also wondered if it makes sense to record diff --git a/src/jl_uv.c b/src/jl_uv.c
index 0f616cdebb..eff4c9a040 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -205,6 +205,7 @@ JL_DLLEXPORT int jl_process_events(void)
jl_gc_safepoint_(ct->ptls);
if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == 0)) {
if (jl_atomic_load(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
+ JL_PROBE_RT_PROCESS_EVENTS(ct);
loop->stop_flag = 0;
int r = uv_run(loop, UV_RUN_NOWAIT);
JL_UV_UNLOCK(); But I've never succeeded in setting up bpftrace and so I don't know how much of it makes sense :) |
Yeah, I'll have to give that a try! Marking this as draft since I need to think longer on how all this will interact with things like user stack introspection; I'll probably split the |
|
f729150
to
7d8cba0
Compare
1f3c389
to
e8e8fbb
Compare
Can you look at |
Yes we can. Sadly |
e8e8fbb
to
7ec1f00
Compare
If we forget about |
0a38d0c
to
c5f2ebb
Compare
@tkf how would you briefly describe the |
For queue-agnostic trace points, maybe something like this? diff --git a/src/partr.c b/src/partr.c
index 0ba4f08627..4f1958159f 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -419,15 +419,21 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
// get the next runnable task from the multiq
static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
{
+ jl_task_t *ct = jl_current_task;
+ JL_PROBE_RT_GET_NEXT_TASK_BEGIN(ct);
jl_gc_safepoint();
jl_value_t *args[2] = { trypoptask, q };
jl_task_t *task = (jl_task_t*)jl_apply(args, 2);
if (jl_typeis(task, jl_task_type)) {
int self = jl_atomic_load_relaxed(&jl_current_task->tid);
jl_set_task_tid(task, self);
+ JL_PROBE_RT_POP_STICKY_SUCCESS(ct, task);
return task;
}
- return multiq_deletemin();
+ JL_PROBE_RT_POP_STICKY_FAILED(ct);
+ task = multiq_deletemin();
+ JL_PROBE_RT_GET_NEXT_TASK_END(ct, task);
+ return task;
}
static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT ...or |
Or maybe we also want to put the trace points in the task pop loop in Also, diff --git a/src/partr.c b/src/partr.c
index 0ba4f08627..e7e50b336b 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -445,9 +445,11 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
jl_task_t *ct = jl_current_task;
uint64_t start_cycles = 0;
+ JL_PROBE_RT_TASK_GET_NEXT_BEGIN(ct);
while (1) {
jl_task_t *task = get_next_task(trypoptask, q);
if (task)
+ JL_PROBE_RT_TASK_GET_NEXT_END(ct, start_cycles);
return task;
// quick, race-y check to see if there seems to be any stuff in there
@@ -472,6 +474,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
if (task) {
if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+ JL_PROBE_RT_TASK_GET_NEXT_END(ct, start_cycles);
return task;
}
@@ -534,6 +537,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
}
// the other threads will just wait for on signal to resume
+ JL_PROBE_RT_SLEEP_BEGIN(ct);
JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
int8_t gc_state = jl_gc_safe_enter(ptls);
uv_mutex_lock(&sleep_locks[ptls->tid]);
@@ -544,6 +548,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
uv_mutex_unlock(&sleep_locks[ptls->tid]);
JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
+ JL_PROBE_RT_SLEEP_END(ct);
jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
start_cycles = 0;
} |
b80872f
to
a3d87d1
Compare
3af9192
to
d628f62
Compare
Note that if you try it out on a recent Gtk.jl version you'll need to have a Gtk window open to enable the eventloop. Also that any keyboard (like a ctrl-c) or mouse input will unblock the 5 second idle timeout, also it doesn't happen during profiling.. It's a tricky one to investigate! |
Here I think. Edit to add: we want one here anyway so we can track task lifetimes in general. |
I think we can use the hooks here to insert |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you rebase?
I think the events related to task lifetime are
I think looking at the interval of 3--6 is more important than 1--6 since (shortly before) 3 is where the stack is allocated. That is to say, it let us know how long a task holds onto stack which is a rather expensive resource. So, maybe it makes sense to add yet another trace point in (I'm not sure how much 2 is useful though. It's roughly the same as 1 unless the user throws away the created |
Co-authored-by: Takafumi Arakaki <[email protected]>
1f98662
to
3ca21ab
Compare
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us | ||
JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kpamnany should this get its own probe? It was introduced during a rebase, so I just copy-pasta'd the probe from below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, in general, every state transition should get its own probe so that we can distinguish them. I'm not actually sure what's going on here in particular though so I'm not sure what to call this probe. Maybe SLEEP_CHECK_TASK_YIELD_WAKE
? @vtjnash any better suggestion?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of these transitions are the same, the code structure just makes them copy the same structure frequently. In particular, all events between a given pair of fences (or locks) are indistinguishable. In practice, that also means JL_PROBE_RT_SLEEP_CHECK_WAKE
can be indistinguishable from this event, since there is no fence since here until the next time we re-enter this loop and set sleep_check_state to sleeping.
@tkf |
Yeah, I think it's better to differentiate "start" and "re-schedule" (post-yield) |
This still sounds good to me. Someone could identify in post-analysis the difference between start and resume and pause (they are always a state toggle), but separating them early seems better. |
From @tkf's list:
This is now at the end of
This is missing and should be added here (maybe
This is done here and the rest in the list are there too. Since adding 2 above gives us @jpsamaroo demonstrated using |
More info sounds good to me! |
Co-authored-by: Takafumi Arakaki <[email protected]>
Co-authored-by: Takafumi Arakaki <[email protected]>
Co-authored-by: Takafumi Arakaki <[email protected]>
These probes may prove useful for determining task boundaries (when a Julia thread is executing a given task), to be paired with other information, such as GC initiation.
@vchuravy do we want any other USDTs in this PR? I'd like to cover all cases where a task begins/finishes executing.
Todo:
process_events
probesget_next_task
probes