-
Notifications
You must be signed in to change notification settings - Fork 6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Core] Add task_name, task_function_name and actor_name in Structured Logging #48703
Changes from 9 commits
c522245
eec0a8e
673c745
3ca9d0e
17f19a3
40b550b
bc08318
2a5200b
e70cbfa
3f7b0a0
ae80ee1
5bf148b
799a021
2f484db
64387c7
eaca9d4
36160b7
506560b
7c984cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,9 @@ class LogKey(str, Enum): | |
NODE_ID = "node_id" | ||
ACTOR_ID = "actor_id" | ||
TASK_ID = "task_id" | ||
ACTOR_NAME = "actor_name" | ||
TASK_NAME = "task_name" | ||
TASK_FUNCTION = "task_function" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: task_function_name. If you think it's too long, we can also do task_func_name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Synced offline. Changing to function name will implicitly indicate the parameter type and will be helpful to understand the field. |
||
|
||
# Logger built-in context | ||
ASCTIME = "asctime" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,4 +20,13 @@ def filter(self, record): | |
task_id = runtime_context.get_task_id() | ||
if task_id is not None: | ||
setattr(record, LogKey.TASK_ID.value, task_id) | ||
task_name = runtime_context.get_task_name() | ||
if task_name is not None: | ||
setattr(record, LogKey.TASK_NAME.value, task_name) | ||
task_function = runtime_context.get_task_function() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if task_function is not None: | ||
setattr(record, LogKey.TASK_FUNCTION.value, task_function) | ||
actor_name = runtime_context.get_actor_name() | ||
if actor_name is not None: | ||
setattr(record, LogKey.ACTOR_NAME.value, actor_name) | ||
return True |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -261,6 +261,8 @@ cdef optional[ObjectIDIndexType] NULL_PUT_INDEX = nullopt | |
# https://docs.python.org/3/library/contextvars.html#contextvars.ContextVar | ||
# It is thread-safe. | ||
async_task_id = contextvars.ContextVar('async_task_id', default=None) | ||
async_task_name = contextvars.ContextVar('async_task_name', default=None) | ||
async_task_function = contextvars.ContextVar('async_task_function', default=None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. task_function_name |
||
|
||
|
||
class DynamicObjectRefGenerator: | ||
|
@@ -1800,7 +1802,8 @@ cdef void execute_task( | |
return core_worker.run_async_func_or_coro_in_event_loop( | ||
async_function, function_descriptor, | ||
name_of_concurrency_group_to_execute, task_id=task_id, | ||
func_args=(actor, *arguments), func_kwargs=kwarguments) | ||
task_name=task_name, func_args=(actor, *arguments), | ||
func_kwargs=kwarguments) | ||
|
||
return function(actor, *arguments, **kwarguments) | ||
|
||
|
@@ -1912,7 +1915,8 @@ cdef void execute_task( | |
execute_streaming_generator_async(context), | ||
function_descriptor, | ||
name_of_concurrency_group_to_execute, | ||
task_id=task_id) | ||
task_id=task_id, | ||
task_name=task_name) | ||
else: | ||
execute_streaming_generator_sync(context) | ||
|
||
|
@@ -3400,6 +3404,48 @@ cdef class CoreWorker: | |
with nogil: | ||
CCoreWorkerProcess.GetCoreWorker().Exit(c_exit_type, detail, null_ptr) | ||
|
||
def get_current_task_name(self) -> str: | ||
"""Return the current task name. | ||
|
||
If it is a normal task, it returns the task name from the main thread. | ||
If it is a threaded actor, it returns the task name for the current thread. | ||
If it is async actor, it returns the task name stored in contextVar for | ||
the current asyncio task. | ||
""" | ||
# We can only obtain the correct task name within asyncio task | ||
# via async_task_name contextvar. We try this first. | ||
# It is needed because the core Worker's GetCurrentTask API | ||
MengjinYan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# doesn't have asyncio context, thus it cannot return the | ||
# correct task name. | ||
task_name = async_task_name.get() | ||
if task_name is None: | ||
# if it is not within asyncio context, fallback to TaskName | ||
# obtainable from core worker. | ||
task_name = CCoreWorkerProcess.GetCoreWorker().GetCurrentTaskName() \ | ||
.decode("utf-8") | ||
return task_name | ||
|
||
def get_current_task_function(self) -> str: | ||
"""Return the current task function. | ||
|
||
If it is a normal task, it returns the task function from the main thread. | ||
If it is a threaded actor, it returns the task function for the current thread. | ||
If it is async actor, it returns the task function stored in contextVar for | ||
the current asyncio task. | ||
""" | ||
# We can only obtain the correct task function within asyncio task | ||
# via async_task_function contextvar. We try this first. | ||
# It is needed because the core Worker's GetCurrentTask API | ||
# doesn't have asyncio context, thus it cannot return the | ||
# correct task function. | ||
task_function = async_task_function.get() | ||
if task_function is None: | ||
# if it is not within asyncio context, fallback to TaskName | ||
# obtainable from core worker. | ||
task_function = CCoreWorkerProcess.GetCoreWorker() \ | ||
.GetCurrentTaskFunction().decode("utf-8") | ||
return task_function | ||
|
||
def get_current_task_id(self) -> TaskID: | ||
"""Return the current task ID. | ||
|
||
|
@@ -4796,6 +4842,7 @@ cdef class CoreWorker: | |
specified_cgname: str, | ||
*, | ||
task_id: Optional[TaskID] = None, | ||
task_name: Optional[str] = None, | ||
func_args: Optional[Tuple] = None, | ||
func_kwargs: Optional[Dict] = None, | ||
): | ||
|
@@ -4842,6 +4889,9 @@ cdef class CoreWorker: | |
try: | ||
if task_id: | ||
async_task_id.set(task_id) | ||
if task_name: | ||
MengjinYan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
async_task_name.set(task_name) | ||
async_task_function.set(function_descriptor.repr) | ||
|
||
if inspect.isawaitable(func_or_coro): | ||
coroutine = func_or_coro | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,7 +124,7 @@ def get_worker_id(self) -> str: | |
@property | ||
@Deprecated(message="Use get_task_id() instead", warning=True) | ||
def task_id(self): | ||
"""Get current task ID for this worker or driver. | ||
"""Get current task ID for this worker. | ||
|
||
Task ID is the id of a Ray task. | ||
This shouldn't be used in a driver process. | ||
|
@@ -155,7 +155,7 @@ def f(): | |
Returns: | ||
The current worker's task id. None if there's no task id. | ||
""" | ||
# only worker mode has actor_id | ||
# only worker mode has task_id | ||
assert ( | ||
self.worker.mode == ray._private.worker.WORKER_MODE | ||
), f"This method is only available when the process is a\ | ||
|
@@ -165,7 +165,7 @@ def f(): | |
return task_id if not task_id.is_nil() else None | ||
|
||
def get_task_id(self) -> Optional[str]: | ||
"""Get current task ID for this worker or driver. | ||
"""Get current task ID for this worker. | ||
|
||
Task ID is the id of a Ray task. The ID will be in hex format. | ||
This shouldn't be used in a driver process. | ||
|
@@ -201,7 +201,7 @@ def get_task_id(): | |
Returns: | ||
The current worker's task id in hex. None if there's no task id. | ||
""" | ||
# only worker mode has actor_id | ||
# only worker mode has task_id | ||
if self.worker.mode != ray._private.worker.WORKER_MODE: | ||
logger.warning( | ||
"This method is only available when the process is a " | ||
|
@@ -212,12 +212,98 @@ def get_task_id(): | |
return task_id.hex() if not task_id.is_nil() else None | ||
|
||
def _get_current_task_id(self) -> TaskID: | ||
async_task_id = ray._raylet.async_task_id.get() | ||
MengjinYan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if async_task_id is None: | ||
task_id = self.worker.current_task_id | ||
else: | ||
task_id = async_task_id | ||
return task_id | ||
return self.worker.current_task_id | ||
|
||
def get_task_name(self) -> Optional[str]: | ||
"""Get current task name for this worker. | ||
|
||
Task name by default is the task's funciton call string. It can also be | ||
specified in options when triggering a task. | ||
|
||
Example: | ||
|
||
.. testcode:: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add an example for the async case? |
||
|
||
import ray | ||
|
||
@ray.remote | ||
class Actor: | ||
def get_task_name(self): | ||
return ray.get_runtime_context().get_task_name() | ||
|
||
@ray.remote | ||
def get_task_name(): | ||
return ray.get_runtime_context().get_task_name() | ||
|
||
a = Actor.remote() | ||
# Task names are available for actor tasks. | ||
print(ray.get(a.get_task_name.remote())) | ||
# Task names are available for normal tasks. | ||
# Get default task name | ||
print(ray.get(get_task_name.remote())) | ||
# Get specified task name | ||
print(ray.get(get_task_name.options(name="task_name").remote())) | ||
|
||
.. testoutput:: | ||
:options: +MOCK | ||
|
||
Actor.get_task_name | ||
get_task_name | ||
task_nams | ||
|
||
Returns: | ||
The current worker's task name | ||
""" | ||
# only worker mode has task_name | ||
if self.worker.mode != ray._private.worker.WORKER_MODE: | ||
logger.warning( | ||
"This method is only available when the process is a " | ||
f"worker. Current mode: {self.worker.mode}" | ||
) | ||
return None | ||
return self.worker.current_task_name | ||
|
||
def get_task_function(self) -> Optional[str]: | ||
"""Get current task function string for this worker. | ||
|
||
Example: | ||
|
||
.. testcode:: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add an example for the async case? |
||
|
||
import ray | ||
|
||
@ray.remote | ||
class Actor: | ||
def get_task_function(self): | ||
return ray.get_runtime_context().get_task_function() | ||
|
||
@ray.remote | ||
def get_task_function(): | ||
return ray.get_runtime_context().get_task_function() | ||
|
||
a = Actor.remote() | ||
# Task functions are available for actor tasks. | ||
print(ray.get(a.get_task_function.remote())) | ||
# Task functions are available for normal tasks. | ||
print(ray.get(get_task_function.remote())) | ||
|
||
.. testoutput:: | ||
:options: +MOCK | ||
|
||
[python modual name].Actor.get_task_name | ||
[python modual name].get_task_name | ||
|
||
Returns: | ||
The current worker's task function call string | ||
""" | ||
# only worker mode has task_function | ||
if self.worker.mode != ray._private.worker.WORKER_MODE: | ||
logger.warning( | ||
"This method is only available when the process is a " | ||
f"worker. Current mode: {self.worker.mode}" | ||
) | ||
return None | ||
return self.worker.current_task_function | ||
|
||
@property | ||
@Deprecated(message="Use get_actor_id() instead", warning=True) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need task_function? I thought we only need task_name which should be function name by default?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right. By default, in general, we the task name as the function name.
I still see 2 differences between task_name & task_function. That's why I kept them both.
name
in options, but task function will always be the function call nameThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@alanwguo do we need function name or task name is enough for now. I feel having task_name is enough.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've seen task name can explode in cardinality but function name is a bit more stable. Having both could be useful where we can decide which to use depending on performance characteristics. For example if task_name is too high cardinality, we can choose to use func name exclusively.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok