opensearch-project · IanHoang · Nov 15, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
@@ -38,6 +38,7 @@
     test_execution_orchestrator, results_publisher, \
         metrics, workload, exceptions, log
 from osbenchmark.builder import provision_config, builder
+from osbenchmark.worker_coordinator import worker_coordinator
 from osbenchmark.workload_generator import workload_generator
 from osbenchmark.utils import io, convert, process, console, net, opts, versions
 from osbenchmark import aggregator
@@ -581,9 +582,15 @@ def add_workload_source(subparser):
         action="store_true")
     test_execution_parser.add_argument(
         "--enable-worker-coordinator-profiling",
-        help="Enables a profiler for analyzing the performance of calls in Benchmark's worker coordinator (default: false).",
+        help="Enables a profiler for analyzing the performance of calls in OSB's worker coordinator (default: false). "
+            "Outputs to ~/.benchmark/logs/ as profile.log",
         default=False,
         action="store_true")
+    test_execution_parser.add_argument(
+        "--profiling-sort-type",
+        help=f"Sort profile.log by sort types (column names). Only applies if --enable-worker-coordinator-profiling is provided. "
+            f"Available sort types: {worker_coordinator.AsyncProfiler.SORT_TYPES}. Default is None.",
+        default=None)
     test_execution_parser.add_argument(
         "--enable-assertions",
         help="Enables assertion checks for tasks (default: false).",
@@ -912,6 +919,7 @@ def configure_test(arg_parser, args, cfg):
     cfg.add(config.Scope.applicationOverride, "test_execution", "pipeline", args.pipeline)
     cfg.add(config.Scope.applicationOverride, "test_execution", "user.tag", args.user_tag)
     cfg.add(config.Scope.applicationOverride, "worker_coordinator", "profiling", args.enable_worker_coordinator_profiling)
+    cfg.add(config.Scope.applicationOverride, "worker_coordinator", "profiling_sort_type", args.profiling_sort_type)
     cfg.add(config.Scope.applicationOverride, "worker_coordinator", "assertions", args.enable_assertions)
     cfg.add(config.Scope.applicationOverride, "worker_coordinator", "on.error", args.on_error)
     cfg.add(

@@ -1064,6 +1064,7 @@ class Query(Runner):
     def __init__(self):
         super().__init__()
         self._extractor = SearchAfterExtractor()
+        self.logger = logging.getLogger(__name__)
 
     async def __call__(self, opensearch, params):
         request_params, headers = self._transport_request_params(params)

@@ -465,6 +465,7 @@ def receiveMsg_PrepareWorkload(self, msg, sender):
         # the workload might have been loaded on a different machine (the coordinator machine) so we force a workload
         # update to ensure we use the latest version of plugins.
         load_workload(self.cfg)
+        self.logger.info("Preparing plugins, param sources, runners, and components for workload now")
         load_workload_plugins(self.cfg, self.workload.name, register_workload_processor=tpr.register_workload_processor,
                            force_update=True)
         # we expect on_prepare_workload can take a long time. seed a queue of tasks and delegate to child workers
@@ -1019,6 +1020,7 @@ def calculate_worker_assignments(host_configs, client_count):
 class ClientAllocations:
     def __init__(self):
         self.allocations = []
+        self.logger = logging.getLogger(__name__)
 
     def add(self, client_id, tasks):
         self.allocations.append({
@@ -1467,6 +1469,7 @@ def __init__(self, cfg, workload, task_allocations, sampler, cancel, complete, a
         self.complete = complete
         self.abort_on_error = abort_on_error
         self.profiling_enabled = self.cfg.opts("worker_coordinator", "profiling")
+        self.profiling_sort_type = self.cfg.opts("worker_coordinator", "profiling_sort_type")
         self.assertions_enabled = self.cfg.opts("worker_coordinator", "assertions")
         self.debug_event_loop = self.cfg.opts("system", "async.debug", mandatory=False, default_value=False)
         self.logger = logging.getLogger(__name__)
@@ -1526,7 +1529,9 @@ def os_clients(all_hosts, all_client_options):
             async_executor = AsyncExecutor(
                 client_id, task, schedule, opensearch, self.sampler, self.cancel, self.complete,
                 task.error_behavior(self.abort_on_error), self.cfg)
-            final_executor = AsyncProfiler(async_executor) if self.profiling_enabled else async_executor
+            final_executor = AsyncProfiler(
+                async_executor, client_id,
+                task, self.profiling_sort_type) if self.profiling_enabled else async_executor
             aws.append(final_executor())
         run_start = time.perf_counter()
         try:
@@ -1544,12 +1549,21 @@ def os_clients(all_hosts, all_client_options):
 
 
 class AsyncProfiler:
-    def __init__(self, target):
+    SORT_TYPES = ["ncall", "tsub", "ttot", "tavg"]
+
+    def __init__(self, target, client_id, task, sort_type):
         """
         :param target: The actual executor which should be profiled.
+        :param client_id: The client that is being profiled.
+        :param task: The task in the workload that is being profiled.
+        :param sort_type: If not None, the column to sort profiled results on. If None, output is not sorted
         """
         self.target = target
+        self.client_id = client_id
+        self.task = task
+        self.sort_type = sort_type
         self.profile_logger = logging.getLogger("benchmark.profile")
+        self.logger = logging.getLogger(__name__)
 
     async def __call__(self, *args, **kwargs):
         # initialize lazily, we don't need it in the majority of cases
@@ -1562,17 +1576,32 @@ async def __call__(self, *args, **kwargs):
         finally:
             yappi.stop()
             s = python_io.StringIO()
-            yappi.get_func_stats().print_all(out=s, columns={
+
+            stats = yappi.get_func_stats()
+
+            if self.sort_type:
+                if self.sort_type not in self.SORT_TYPES:
+                    raise exceptions.SystemSetupError(
+                        f"{self.sort_type} is an invalid sort type. "
+                        f"Available sort types in Async Profiler are: {self.SORT_TYPES}"
+                    )
+
+                self.logger.info("Using Async Profiler with sort type: %s", self.sort_type)
+                stats.sort(sort_type=self.sort_type, sort_order='desc')
+            else:
+                self.logger.info("Using Async Profiler without sort type")
+
+            stats.print_all(out=s, columns={
                 0: ("name", 140),
                 1: ("ncall", 8),
                 2: ("tsub", 8),
                 3: ("ttot", 8),
                 4: ("tavg", 8)
             })
 
-            profile = "\n=== Profile START ===\n"
+            profile = f"\n=== Profile start for client id [{self.client_id}] and task [{self.task}] ===\n"
             profile += s.getvalue()
-            profile += "=== Profile END ==="
+            profile += "\n=== Profile END ===\n"
             self.profile_logger.info(profile)
 
 
@@ -1827,6 +1856,7 @@ class Allocator:
 
     def __init__(self, schedule):
         self.schedule = schedule
+        self.logger = logging.getLogger(__name__)
 
     @property
     def allocations(self):

@@ -542,6 +542,7 @@ def __init__(self, workload, params, **kwargs):
 class SearchParamSource(ParamSource):
     def __init__(self, workload, params, **kwargs):
         super().__init__(workload, params, **kwargs)
+        self.logger = logging.getLogger(__name__)
         target_name = get_target(workload, params)
         type_name = params.get("type")
         if params.get("data-stream") and type_name:
@@ -1085,7 +1086,7 @@ def __init__(self, workloads, params, query_params, **kwargs):
 
         self.filter_type = self.query_params.get(self.PARAMS_NAME_FILTER_TYPE)
         self.filter_body = self.query_params.get(self.PARAMS_NAME_FILTER_BODY)
-
+        self.logger = logging.getLogger(__name__)
 
         if self.PARAMS_NAME_FILTER in params:
             self.query_params.update({

@@ -1680,7 +1680,10 @@ async def f(x):
             await asyncio.sleep(x)
             return x * 2
 
-        profiler = worker_coordinator.AsyncProfiler(f)
+        client_id = 2
+        task = "queries"
+        sort_type = None
+        profiler = worker_coordinator.AsyncProfiler(f, client_id, task, sort_type)
         start = time.perf_counter()
         # this should take roughly 1 second and should return something
         return_value = await profiler(1)