LLNL · hariharan-devarajan · Aug 25, 2024 · Jul 31, 2024 · Jul 31, 2024 · Jul 31, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,7 +14,7 @@ set(DFTRACER_PACKAGE_VERSION_MINOR "${DFTRACER_VERSION_PATCH}")
 set(DFTRACER_PACKAGE_STRING "${DFTRACER_PACKAGE_NAME} ${DFTRACER_PACKAGE_VERSION}")
 set(DFTRACER_PACKAGE_TARNAME "${DFTRACER_PACKAGE}")
 
-set(DFTRACER_VERSION "(1, 0, 3)")
+set(DFTRACER_VERSION "(1, 0, 4)")
 
 project(dftracer LANGUAGES C CXX)
 
@@ -331,18 +331,18 @@ if (DFTRACER_BUILD_PYTHON_BINDINGS)
                 . ${CMAKE_BINARY_DIR}/symlink.sh
                 \")")
 endif()
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/script/dftracer_compact.sh ${EXECUTABLE_OUTPUT_PATH}/dftracer_compact COPYONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/script/dftracer_compact ${EXECUTABLE_OUTPUT_PATH}/dftracer_compact COPYONLY)
 install(
         FILES
         ${EXECUTABLE_OUTPUT_PATH}/dftracer_compact
         DESTINATION
         bin
 )
 
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/script/merge_pfw.sh ${EXECUTABLE_OUTPUT_PATH}/merge_pfw COPYONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/script/dftracer_merge ${EXECUTABLE_OUTPUT_PATH}/dftracer_merge COPYONLY)
 install(
         FILES
-        ${EXECUTABLE_OUTPUT_PATH}/merge_pfw
+        ${EXECUTABLE_OUTPUT_PATH}/dftracer_merge
         DESTINATION
         bin
 )

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -2,6 +2,7 @@ graft src
 graft cmake
 graft dependency
 graft include
+graft script
 
 include CMakeLists.txt
 include LICENSE

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 [![Coverage Status](https://coveralls.io/repos/github/hariharan-devarajan/dftracer/badge.svg?branch=feature/apis)](https://coveralls.io/github/hariharan-devarajan/dftracer?branch=dev)
 [![Documentation Status](https://readthedocs.org/projects/dftracer/badge/?version=latest)](https://dftracer.readthedocs.io/en/latest/?badge=latest)
 
-# DFTracer v1.0.3
+# DFTracer v1.0.4
 A multi-level profiler for capturing application functions and low-level system I/O calls from deep learning workloads.
 
 Requirements for profiler
@@ -114,3 +114,41 @@ DFTRACER_ENABLE=1
 ```
 
 For more example check [Examples](https://dftracer.readthedocs.io/en/latest/examples.html).
+
+## Citation and Reference
+The original SC'24 paper describes the design and implementation of DFTracer code. Please cite this paper and the code if you use DFTracer for your research. 
+
+```
+@inproceedings{devarajan_dftracer_2024,
+	address = {Atlanta, GA},
+	title = {{DFTracer}: {An} {Analysis}-{Friendly} {Data} {Flow} {Tracer} for {AI}-{Driven} {Workflows}},
+	shorttitle = {{DFTracer}},
+	urldate = {2024-07-31},
+	booktitle = {{SC24}: {International} {Conference} for {High} {Performance} {Computing}, {Networking}, {Storage} and {Analysis}},
+	publisher = {IEEE},
+	author = {Devarajan, Hariharan and Pottier, Loic and Velusamy, Kaushik and Zheng, Huihuo and Yildirim, Izzet and Kogiou, Olga and Yu, Weikuan and Kougkas, Anthony and Sun, Xian-He and Yeom, Jae Seung and Mohror, Kathryn},
+	month = nov,
+	year = {2024},
+}
+
+@misc{devarajan_dftracer_code_2024,
+    type = {Github},
+    title = {Github {DFTracer}},
+    shorttitle = {{DFTracer}},
+    url = {https://github.com/hariharan-devarajan/dftracer.git},
+    urldate = {2024-07-31},
+    journal = {DFTracer: A multi-level dataflow tracer for capture I/O calls from worklows.},
+    author = {Devarajan, Hariharan and Pottier, Loic and Velusamy, Kaushik and Zheng, Huihuo and Yildirim, Izzet and Kogiou, Olga and Yu, Weikuan and Kougkas, Anthony and Sun, Xian-He and Yeom, Jae Seung and Mohror, Kathryn},
+    month = jun,
+    year = {2024},
+}
+```
+
+## Acknowledgments
+
+This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344; and under the auspices of the National Cancer Institute (NCI) by Frederick National Laboratory for Cancer Research (FNLCR) under Contract 75N91019D00024. This research used resources of the Argonne Leadership Computing Facility, a U.S. Department of Energy (DOE) Office of Science user facility at Argonne National Laboratory and is based on research supported by the U.S. DOE Office of Science-Advanced Scientific Computing Research Program, under Contract No. DE-AC02-06CH11357. Office of Advanced Scientific Computing Research under the DOE Early Career Research Program. Also, This material is based upon work partially supported by LLNL LDRD 23-ERD-045 and 24-SI-005. LLNL-CONF-857447.
+
+
+## License
+
+MIT License [LICENSE](./LICENSE)
diff --git a/dfanalyzer/main.py b/dfanalyzer/main.py
@@ -456,6 +456,12 @@ def _calculate_time(self):
             grouped_df = self.events.groupby(["trange", "pid", "tid"]) \
                             .agg({"compute_time": sum, "io_time": sum, "app_io_time": sum}) \
                             .groupby(["trange"]).max()
+            # check if the max io_time > time_granularity
+            max_io_time = grouped_df.max().compute()['io_time']
+            if max_io_time > self.conf.time_granularity:
+                # throw a warning, running with large granuality
+                logging.warn(f"The max io_time {max_io_time} exceeds the time_granularity {self.conf.time_granularity}. " \
+                             f"Please adjust the time_granularity to {int(2 * max_io_time /1e6)}e6 and rerun the analyzer.")
             grouped_df["io_time"] = grouped_df["io_time"].fillna(0)
             grouped_df["compute_time"] = grouped_df["compute_time"].fillna(0)
             grouped_df["app_io_time"] = grouped_df["app_io_time"].fillna(0)
@@ -536,6 +542,14 @@ def _remove_numbers(self, string_items):
         logging.info(f"List after removing numbers {list(item_sets)}")
         return list(item_sets)
 
+    def _check_hosts_time_skew(self):
+        # check if there is time skew across nodes
+        hosts_ts_df = self.events.groupby('hostname').agg({'ts': 'min'}).compute()
+        # filter the hosts if time skew exceeds 30 seconds
+        max_time_skew = 30e6
+        if np.std(hosts_ts_df['ts']) > max_time_skew:
+           logging.warn(f"The time skew exceeds {max_time_skew // 1e6} sec across hosts {hosts_ts_df.index.tolist()}")
+
     def summary(self):
         num_events = len(self.events)
         logging.info(f"Total number of events in the workload are {num_events}")
@@ -554,6 +568,8 @@ def summary(self):
 
         hosts_used = hosts_used.to_list()
         #hosts_used_regex_str = self._create_host_intervals(hosts_used)
+        if len(hosts_used) > 1:
+            self._check_hosts_time_skew()
 
         filenames_accessed = filenames_accessed.to_list()
         #filename_basename_regex_str = self._remove_numbers(filenames_accessed)

diff --git a/docs/bash_utilities.rst b/docs/bash_utilities.rst
@@ -35,12 +35,14 @@ Once the uncompressed data is parsed. The JSON utility `jq` can be used to parse
 In each case we have to remove the first `[` which has been added to support perfetto ui.
 
 For uncompressed files
+
 .. code-block:: bash
 
     cat *.pfw | grep -i "[^#[]" | jq -c '.'
 
 
 For compressed files
+
 .. code-block:: bash
 
     gzip -c -d `echo *.gz` | grep -i "[^#[]" | jq -c '.'

diff --git a/docs/build.rst b/docs/build.rst
@@ -36,7 +36,7 @@ From Github
 
 .. code-block:: Bash
 
-  DFT_VERSION=v1.0.3
+  DFT_VERSION=v1.0.4
   pip install git+https://github.com/hariharan-devarajan/dftracer.git@${DFT_VERSION}
 
 .. attention::

diff --git a/docs/conf.py b/docs/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = u'0.0'
 # The full version, including alpha/beta/rc tags
-release = u'1.0.3'
+release = u'1.0.4'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/dfanalyzer_build.rst b/docs/dfanalyzer_build.rst
@@ -27,11 +27,9 @@ Getting Started with DFAnalyzer
 The most user-friendly way to utilize DFAnalyzer to analyze traces from DFTracer is to use Jupyter Notebooks.
 To run the notebook you will have to install Jupyter. We have a simple requirement.txt file for that as well.
 
-
 .. code-block:: Bash
 
     cd dftracer
     pip install -r examples/dfanalyzer/requirements.txt
 
-
-A simple example of loading DFAnalyzer and quick recommended queries are available on Navigate to :code:`<dftracer>/examples/dfanalyzer/dfanalyzer_distributed.ipynb` and run your notebook.
+A simple example of loading DFAnalyzer and quick recommended queries are available on to :code:`<dftracer>/examples/dfanalyzer/dfanalyzer_distributed.ipynb` and run your notebook.
diff --git a/docs/dfanalyzer_overlap_analysis.rst b/docs/dfanalyzer_overlap_analysis.rst
@@ -0,0 +1,144 @@
+===========================
+Overlap Analysis
+===========================
+
+This section describes how to run I/O and Computation Analysis on DFAnalyzer.
+
+----------
+
+Make sure you already did the necessary steps to build the :code:`dfanalyzer` :doc:`here <dfanalyzer_build>`.
+
+----------------------------------------
+Overlap vs Non-overlap Analysis
+----------------------------------------
+
+When we build our model and utilizing our dataloader with multiple dataloader workers
+and prefetching, we hope that data loading can be hidden behind the computation.
+
+In this case, we call this as "Best Case" because the I/O and computation are overlapped.
+
+However, in some cases, the I/O and computation are not overlapped due to faster computation time compared to I/O time (e.g. bandwidth in the filesystem is not fully utilized).
+
+For this, we can analyze the overlap between I/O and computation using the :code:`dfanalyzer`.
+
+This page will guide you how to do overlap analysis on top of :code:`dftracer` and :code:`dfanalyzer`.
+
+.. note::
+
+    This tutorial assume you use :code:`Pytorch`, however, this technique can be applied to other frameworks as well.
+
+----------------------------------------
+Annotating using DFTracer
+----------------------------------------
+
+Assume we have training loop similar to this:
+
+.. code-block:: python
+
+    def forward(model: torch.nn.Module, x: torch.Tensor):
+        ... # additional actions here
+        output = model(x)
+        ...
+        return output
+
+    def training(model, optimizer, dataloader, num_epoch):
+        for epoch in range(num_epoch):
+            for batch in dataloader:
+               optimizer.zero_grad()
+               out = forward(batch)
+               loss = calculate_loss(out)
+               loss.backward()
+               optimizer.step()
+
+Now, we need to modify code a bit to annotate computation and I/O using :code:`dftracer`
+
+.. code-block:: python
+
+    from dftracer.logger import dft_fn as Profile
+
+    dlp_computation = Profile("computation")
+    #                               ^
+    # will be map as:              "cat"
+    dlp_io = Profile("IO", "real_IO")
+    #                  ^       ^
+    # will be map as: "cat"  "name"
+
+    @dlp_computation.log # here we annotate as this computation
+    def forward(model: torch.nn.Module, x: torch.Tensor):
+        ...
+        output = model(x)
+        ...
+        return output
+
+    def training(model, optimizer, dataloader, num_epoch):
+        for epoch in range(num_epoch):
+            for batch in dlp_io.iter(dataloader): # we annotate this as I/O
+               optimizer.zero_grad()
+               out = forward(batch)
+               loss = calculate_loss(out)
+               loss.backward()
+               optimizer.step()
+
+    # or maybe even better, annotate the __getitem__ or iterable in your dataset
+
+    # dlp_dataset = Profile("MyDataset")
+    # class MyDataset(torch.utils.data.Dataset):
+    #     def __len__(self):
+    #         return len(self.data)
+
+    #     @dlp_dataset.log
+    #     def __getitem__(self, idx: int):
+    #         ...
+
+----------------------------------------
+Analysis using DFAnalyzer
+----------------------------------------
+
+Now, let's open the :code:`dfanalyzer-distributed.ipynb` in the :code:`examples/dfanalyzer` directory.
+
+Let's tweak function :code:`get_conditions_stormer` and change it to :code:`get_conditions_<MY_MODEL>`. Let's assume :code:`<MY_MODEL>` is "cool_ml" in this example.
+
+.. code-block:: python
+
+    def get_conditions_cool_ml(json_object):
+        app_io_econd = (
+            "IO" in json_object["cat"] # <----- specify "IO" cat as App I/O
+        )  # I/O has that application is issuing
+        compute_cond = "compute" in json_object["cat"] # <----- specify "cat" cat as Computation
+        io_cond = json_object["cat"] in ["POSIX", "STDIO"] # <---- leave this as default because dftracer will give the cat for you
+        return app_io_cond, compute_cond, io_cond
+
+Scroll down a bit in the same notebook file, and replace :code:`get_conditions_stormer` with :code:`get_conditions_cool_ml`
+
+.. code-block:: python
+
+    conf = update_dft_configuration(
+        dask_scheduler=dask_scheduler,
+        verbose=True,
+        workers=4,
+        time_granularity=80e6,
+        log_file=f"./df_{os.getenv('USER')}.log",
+        conditions=get_conditions_cool_ml, # <---- replace this
+    )
+
+Now, scroll down and modify :code:`analyzer` variable in the same notebook file
+
+.. code-block:: python
+
+   analyzer = DFAnalyzer(f"<LOCATION OF YOUR TRACE FILES>/*.pfw.gz") # <-- it supports globbing!
+
+Now, after that, just execute the whole notebook and notice specifically at the line where we call
+
+.. code-block:: python
+
+   items = analyzer.summary()
+   items
+
+If it runs successfully, you should see the summary of the overlap analysis.
+Below, is the example:
+
+.. image:: images/dfanalyzer/overlap-analysis/overlap-analysis-summary.png
+  :width: 800
+:alt: Overlap Analysis Summary
+
+Here, we can see the info about **Unoverlapped App I/O** and **Unoverlapped I/O** which tell you how severe (in time unit) your I/O and computation are not overlapped.
diff --git a/docs/images/dfanalyzer/overlap-analysis/overlap-analysis-summary.png b/docs/images/dfanalyzer/overlap-analysis/overlap-analysis-summary.png
diff --git a/docs/images/perfetto/perfetto-flags-confirm-1.png b/docs/images/perfetto/perfetto-flags-confirm-1.png
diff --git a/docs/images/perfetto/perfetto-flags-confirm-2.png b/docs/images/perfetto/perfetto-flags-confirm-2.png
diff --git a/docs/images/perfetto/perfetto-interface.png b/docs/images/perfetto/perfetto-interface.png
diff --git a/docs/images/perfetto/perfetto-sql-result-example-pread.png b/docs/images/perfetto/perfetto-sql-result-example-pread.png
diff --git a/docs/images/perfetto/perfetto-sql-textbox-2.png b/docs/images/perfetto/perfetto-sql-textbox-2.png
diff --git a/docs/images/perfetto/perfetto-sql-textbox.png b/docs/images/perfetto/perfetto-sql-textbox.png
diff --git a/docs/images/perfetto/perfetto-ui-confirm.png b/docs/images/perfetto/perfetto-ui-confirm.png
diff --git a/docs/images/perfetto/perfetto-viz.png b/docs/images/perfetto/perfetto-viz.png
diff --git a/docs/index.rst b/docs/index.rst
@@ -26,14 +26,16 @@ DFTracer: is a library for profiling I/O calls and application functions.
    dfanalyzer_distributed
    dfanalyzer_conf
    dfanalyzer_alcf_polaris
+   dfanalyzer_overlap_analysis
 
 .. toctree::
    :maxdepth: 2
    :caption: Utilities
 
    utilities
    bash_utilities
-
+   perfetto
+
 .. toctree::
    :maxdepth: 2
    :caption: Reference
@@ -63,4 +65,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-